├── requirements.txt ├── bibtexparser ├── tests │ ├── data │ │ ├── fieldname.bib │ │ ├── comments_only.bib │ │ ├── wrong.bib │ │ ├── comments_only_output.bib │ │ ├── website.bib │ │ ├── crossref_missing_entries.bib │ │ ├── book.bib │ │ ├── book_bom.bib │ │ ├── book_output.bib │ │ ├── book_comma_first.bib │ │ ├── xref_missing_entries.bib │ │ ├── string.bib │ │ ├── crossref_cascading_cycle.bib │ │ ├── article_field_name_with_underscore.bib │ │ ├── article_start_with_whitespace.bib │ │ ├── article_oneline.bib │ │ ├── article_with_strings.bib │ │ ├── article.bib │ │ ├── article_output.bib │ │ ├── article_with_strings_output.bib │ │ ├── common_strings.bib │ │ ├── encoding.bib │ │ ├── article_no_braces.bib │ │ ├── article_start_with_bom.bib │ │ ├── article_trailing_comma_output.bib │ │ ├── article_with_protection_braces.bib │ │ ├── traps.bib │ │ ├── article_with_special_characters.bib │ │ ├── article_comma_first_and_trailing_comma_output.bib │ │ ├── article_with_annotation.bib │ │ ├── article_with_annotation_output.bib │ │ ├── crossref_cascading.bib │ │ ├── features.bib │ │ ├── comments_percentage.bib │ │ ├── comments_percentage_nolastcoma.bib │ │ ├── article_homogenize.bib │ │ ├── article_comma_first.bib │ │ ├── features_output.bib │ │ ├── comments_spaces_and_declarations.bib │ │ ├── multiline_comments.bib │ │ ├── multiple_entries_output.bib │ │ ├── crossref_cascading_aliases.bib │ │ ├── multiple_entries_and_comments_output.bib │ │ ├── features2.bib │ │ ├── multiple_entries.bib │ │ ├── multiple_entries_and_comments.bib │ │ ├── article_missing_coma.bib │ │ ├── xref_entries.bib │ │ └── crossref_entries.bib │ ├── test_preambles.py │ ├── test_homogenise_fields.py │ ├── test_latexenc.py │ ├── test_bibdatabase.py │ ├── test_bibtexexpression.py │ ├── test_bwriter.py │ ├── test_bibtexparser.py │ ├── test_bibtex_strings.py │ ├── test_customization.py │ ├── test_comments.py │ ├── test_bibtexwriter.py │ └── test_crossref_resolving.py ├── __init__.py ├── bwriter.py ├── bibdatabase.py ├── bibtexexpression.py ├── bparser.py └── customization.py ├── MANIFEST.in ├── tox.ini ├── .coveragerc ├── RELEASE ├── .gitignore ├── docs ├── source │ ├── who.rst │ ├── bibtex_conv.rst │ ├── bibtexparser.rst │ ├── index.rst │ ├── install.rst │ ├── logging.rst │ ├── conf.py │ └── tutorial.rst └── Makefile ├── CONTRIBUTORS.txt ├── setup.py ├── .travis.yml ├── README.rst ├── CHANGELOG └── COPYING /requirements.txt: -------------------------------------------------------------------------------- 1 | future>=0.16.0 2 | pyparsing>=2.0.3 3 | unittest2>=1.1.0 4 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/fieldname.bib: -------------------------------------------------------------------------------- 1 | @BOOK{Bird1987, 2 | Dc.Date = {2004-01}, 3 | } 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.md 3 | include docs/Makefile 4 | include docs/source/* 5 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py35 3 | [testenv] 4 | deps = nose 5 | pyparsing 6 | commands = nosetests 7 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = bibtexparser 4 | 5 | [report] 6 | exclude_lines = 7 | if __name__ == .__main__.: 8 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_only.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | @Comment{ignore this line too!} 3 | @COMMENT{and ignore this line too!} 4 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/wrong.bib: -------------------------------------------------------------------------------- 1 | 2 | 3 | @wrong{foo, 4 | author = {wrong} 5 | } 6 | 7 | @article{bar, 8 | author = {correct} 9 | } 10 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_only_output.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | 3 | @comment{ignore this line too!} 4 | 5 | @comment{and ignore this line too!} 6 | 7 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/website.bib: -------------------------------------------------------------------------------- 1 | @misc{feder2006, 2 | title = {BibTeX}, 3 | author = {Alexander Feder}, 4 | link = {http://bibtex.org}, 5 | year = {2006} 6 | } 7 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/crossref_missing_entries.bib: -------------------------------------------------------------------------------- 1 | % Testing missing crossref 2 | @INBOOK{mcr, 3 | AUTHOR = {Megan Mistrel}, 4 | TITLE = {Lumbering Lunatics}, 5 | ORIGDATE = {1933}, 6 | CROSSREF = {missing1} 7 | } 8 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book.bib: -------------------------------------------------------------------------------- 1 | @BOOK{Bird1987, 2 | title = {Dynamics of Polymeric Liquid}, 3 | publisher = {Wiley Edition}, 4 | year = {1987}, 5 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 6 | volume = {1}, 7 | edition = {2}, 8 | } 9 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book_bom.bib: -------------------------------------------------------------------------------- 1 | @BOOK{Bird1987, 2 | title = {Dynamics of Polymeric Liquid}, 3 | publisher = {Wiley Edition}, 4 | year = {1987}, 5 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 6 | volume = {1}, 7 | edition = {2}, 8 | } 9 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book_output.bib: -------------------------------------------------------------------------------- 1 | @book{Bird1987, 2 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 3 | edition = {2}, 4 | publisher = {Wiley Edition}, 5 | title = {Dynamics of Polymeric Liquid}, 6 | volume = {1}, 7 | year = {1987} 8 | } 9 | 10 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book_comma_first.bib: -------------------------------------------------------------------------------- 1 | @book{Bird1987 2 | , author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.} 3 | , edition = {2} 4 | , publisher = {Wiley Edition} 5 | , title = {Dynamics of Polymeric Liquid} 6 | , volume = {1} 7 | , year = {1987} 8 | } 9 | 10 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | How to release 2 | ============== 3 | 4 | * Update CHANGELOG 5 | * Update version in __init__.py 6 | * git tag -a 'vX' 7 | * merge in branch latest 8 | * Send the package on pypi 9 | python setup.py sdist upload 10 | * tick the doc version on readthedocs 11 | * Update version in __init__.py 12 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/xref_missing_entries.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Testing missing xref 5 | @INBOOK{mxr, 6 | AUTHOR = {Megan Mistrel}, 7 | TITLE = {Lumbering Lunatics}, 8 | ORIGDATE = {1933}, 9 | XREF = {missing1} 10 | } 11 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/string.bib: -------------------------------------------------------------------------------- 1 | @STRING{oakland = {Proceedings of the {IEEE} Symposium on Security and Privacy}} 2 | @INPROCEEDINGS{cha:oakland15, 3 | author = {Sang Kil Cha and Maverick Woo and David Brumley}, 4 | title = {{Program-Adaptive Mutational Fuzzing}}, 5 | booktitle = oakland, 6 | year = {2015}, 7 | pages = {725--741} 8 | } 9 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/crossref_cascading_cycle.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Testing circular refs detection 5 | 6 | @BOOK{circ1, 7 | DATE = {1911}, 8 | CROSSREF = {circ2} 9 | } 10 | 11 | @BOOK{circ2, 12 | DATE = {1911}, 13 | CROSSREF = {circ1} 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_field_name_with_underscore.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | pages = {12-23}, 7 | journal = {Nice Journal}, 8 | comments = {A comment}, 9 | keyword = {keyword1, keyword2}, 10 | strange-field-name2 = {val2}, 11 | strange_field_name = {val}, 12 | } 13 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_start_with_whitespace.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal} 7 | } 8 | 9 | @ARTICLE{Cesar2014, 10 | author = {Jean Cesar}, 11 | title = {An amazing title}, 12 | year = {2014}, 13 | volume = {12}, 14 | journal = {Nice Journal} 15 | } -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_oneline.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, author = {Jean Cesar}, title = {An amazing title}, year = {2013}, volume = {12}, journal = {Nice Journal}, comments = {A comment}, keyword = {keyword1, keyword2}} 2 | 3 | @ARTICLE{ Baltazar2013,author = {Jean Baltazar},title = {An amazing title},year = {2013},volume = {12},journal = {Nice Journal},comments = {A comment},keyword = {keyword1, keyword2}} 4 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_strings.bib: -------------------------------------------------------------------------------- 1 | @STRING{ nice_journal = "Nice Journal" } 2 | @STRING ( jean={Jean} ) 3 | @STRING{cesar = {César}} 4 | 5 | @ARTICLE{Cesar2013, 6 | author = jean # " " # cesar, 7 | title = {An amazing title}, 8 | year = {2013}, 9 | month = jan, 10 | volume = {12}, 11 | pages = {12-23}, 12 | journal = nice_journal, 13 | comments = {A comment}, 14 | keyword = {keyword1, keyword2}, 15 | } 16 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Nice Journal}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_output.bib: -------------------------------------------------------------------------------- 1 | @article{Cesar2013, 2 | abstract = {This is an abstract. This line should be long enough to test 3 | multilines... and with a french érudit word}, 4 | author = {Jean César}, 5 | comments = {A comment}, 6 | journal = {Nice Journal}, 7 | keyword = {keyword1, keyword2}, 8 | month = {jan}, 9 | pages = {12-23}, 10 | title = {An amazing title}, 11 | volume = {12}, 12 | year = {2013} 13 | } 14 | 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_strings_output.bib: -------------------------------------------------------------------------------- 1 | @string{nice_journal = {Nice Journal}} 2 | 3 | @string{jean = {Jean}} 4 | 5 | @string{cesar = {César}} 6 | 7 | @article{Cesar2013, 8 | author = jean # { } # cesar, 9 | comments = {A comment}, 10 | journal = nice_journal, 11 | keyword = {keyword1, keyword2}, 12 | month = jan, 13 | pages = {12-23}, 14 | title = {An amazing title}, 15 | volume = {12}, 16 | year = {2013} 17 | } 18 | 19 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/common_strings.bib: -------------------------------------------------------------------------------- 1 | @string{jan = {January}} 2 | 3 | @string{feb = {February}} 4 | 5 | @string{mar = {March}} 6 | 7 | @string{apr = {April}} 8 | 9 | @string{may = {May}} 10 | 11 | @string{jun = {June}} 12 | 13 | @string{jul = {July}} 14 | 15 | @string{aug = {August}} 16 | 17 | @string{sep = {September}} 18 | 19 | @string{oct = {October}} 20 | 21 | @string{nov = {November}} 22 | 23 | @string{dec = {December}} 24 | 25 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/encoding.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar_2013, 2 | author = {Jean César}, 3 | title = {An amazing title: à}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Elémentaire}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keywords = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_no_braces.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = "Jean C{\'e}sar{\"u}", 3 | title = "An amazing title", 4 | year = "2013", 5 | month = "jan", 6 | volume = "12", 7 | pages = "12-23", 8 | journal = "Nice Journal", 9 | abstract = "This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word", 11 | comments = "A comment", 12 | keyword = "keyword1, keyword2", 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_start_with_bom.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Cesar2013, 3 | author = {Jean César}, 4 | title = {An amazing title}, 5 | year = {2013}, 6 | month = "jan", 7 | volume = {12}, 8 | pages = {12-23}, 9 | journal = {Nice Journal}, 10 | abstract = {This is an abstract. This line should be long enough to test 11 | multilines... and with a french érudit word}, 12 | comments = {A comment}, 13 | keyword = {keyword1, keyword2}, 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_trailing_comma_output.bib: -------------------------------------------------------------------------------- 1 | @article{Cesar2013, 2 | abstract = {This is an abstract. This line should be long enough to test 3 | multilines... and with a french érudit word}, 4 | author = {Jean César}, 5 | comments = {A comment}, 6 | journal = {Nice Journal}, 7 | keyword = {keyword1, keyword2}, 8 | month = {jan}, 9 | pages = {12-23}, 10 | title = {An amazing title}, 11 | volume = {12}, 12 | year = {2013}, 13 | } 14 | 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_protection_braces.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {{An amazing title}}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {{Nice Journal}}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/traps.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Laide2013, 2 | author = {Jean Laid{\'e}, 3 | Ben Loaeb}, 4 | title = {{An} amazing {title}}, 5 | year = {2013}, 6 | month = "jan", 7 | volume = {n.s.~2}, 8 | pages = {12-23}, 9 | journal = {Nice Journal}, 10 | abstract = {This is an abstract. This line should be long enough to test 11 | multilines... and with a french érudit word}, 12 | comments = {A comment}, 13 | keywords = {keyword1, keyword2}, 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_special_characters.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean C{\'e}sar{\"u}}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Nice Journal}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_comma_first_and_trailing_comma_output.bib: -------------------------------------------------------------------------------- 1 | @article{Cesar2013 2 | , abstract = {This is an abstract. This line should be long enough to test 3 | multilines... and with a french érudit word} 4 | , author = {Jean César} 5 | , comments = {A comment} 6 | , journal = {Nice Journal} 7 | , keyword = {keyword1, keyword2} 8 | , month = {jan} 9 | , pages = {12-23} 10 | , title = {An amazing title} 11 | , volume = {12} 12 | , year = {2013} 13 | , 14 | } 15 | 16 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_annotation.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | author+an = {1=highlight}, 4 | title = {An amazing title}, 5 | year = {2013}, 6 | month = "jan", 7 | volume = {12}, 8 | pages = {12-23}, 9 | journal = {Nice Journal}, 10 | abstract = {This is an abstract. This line should be long enough to test 11 | multilines... and with a french érudit word}, 12 | comments = {A comment}, 13 | keyword = {keyword1, keyword2}, 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_annotation_output.bib: -------------------------------------------------------------------------------- 1 | @article{Cesar2013, 2 | abstract = {This is an abstract. This line should be long enough to test 3 | multilines... and with a french érudit word}, 4 | author = {Jean César}, 5 | author+an = {1=highlight}, 6 | comments = {A comment}, 7 | journal = {Nice Journal}, 8 | keyword = {keyword1, keyword2}, 9 | month = {jan}, 10 | pages = {12-23}, 11 | title = {An amazing title}, 12 | volume = {12}, 13 | year = {2013} 14 | } 15 | 16 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/crossref_cascading.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Test of dependency calculations for non-cited entries 5 | 6 | @BOOK{r1, 7 | DATE = {1911}, 8 | CROSSREF = {r2} 9 | } 10 | 11 | @BOOK{r2, 12 | DATE = {1911}, 13 | CROSSREF = {r3} 14 | } 15 | 16 | @BOOK{r3, 17 | DATE = {1911}, 18 | CROSSREF = {r4} 19 | } 20 | 21 | @BOOK{r4, 22 | DATE = {1911}, 23 | } 24 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | @Comment{ignore this line too!} 3 | @COMMENT{and ignore this line too!} 4 | 5 | @preamble{ "\makeatletter" } 6 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" } 7 | @preamble{ "\makeatother" } 8 | 9 | @string{mystring = "Hello"} 10 | @string{myconf = "My International Conference"} 11 | @string{myname = "Doe"} 12 | 13 | @inproceedings{mykey, 14 | author = "John", 15 | title = {Cool Stuff}, 16 | booktitle = myconf, 17 | year = 2014, 18 | } 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | 37 | # Pycharm 38 | .idea 39 | 40 | # Vim. 41 | *.swp 42 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_percentage.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2}, 9 | } 10 | % comment. 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2}, 19 | } 20 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_percentage_nolastcoma.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2} 9 | } 10 | % comment. 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2} 19 | } 20 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_homogenize.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | authors = {Jean César}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Nice Journal}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | editors = {Edith Or}, 13 | keywords = {keyword1, keyword2}, 14 | links = {http://my.link/to-content}, 15 | subjects = "Some topic of interest", 16 | } 17 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_comma_first.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013 2 | , author = {Jean Cesar} 3 | , title = {An amazing title} 4 | , year = {2013} 5 | , volume = {12} 6 | , journal = {Nice Journal} 7 | , comments = {A comment} 8 | , keyword = {keyword1, keyword2} 9 | } 10 | 11 | @ARTICLE{ Baltazar2013 12 | , author = {Jean Baltazar} 13 | , title = {An amazing title} 14 | , year = {2013} 15 | , volume = {12} 16 | , journal = {Nice Journal} 17 | , comments = {A comment} 18 | , keyword = {keyword1, keyword2}} 19 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features_output.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | 3 | @comment{ignore this line too!} 4 | 5 | @comment{and ignore this line too!} 6 | 7 | @preamble{ "\makeatletter" } 8 | 9 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" } 10 | 11 | @preamble{ "\makeatother" } 12 | 13 | @string{mystring = "Hello"} 14 | 15 | @string{myconf = "My International Conference"} 16 | 17 | @string{myname = "Doe"} 18 | 19 | @inproceedings{mykey, 20 | author = {John}, 21 | booktitle = {My International Conference}, 22 | title = {Cool Stuff}, 23 | year = {2014} 24 | } 25 | 26 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_spaces_and_declarations.bib: -------------------------------------------------------------------------------- 1 | % a comment 2 | @preamble{ "Blah blah" } 3 | 4 | Another comment 5 | @string{title = {A great title} } 6 | 7 | and one more comment 8 | 9 | @ARTICLE{Cesar2013, 10 | author = {Jean César}, 11 | title = title, 12 | year = {2013}, 13 | month = "jan", 14 | volume = {12}, 15 | pages = {12-23}, 16 | journal = {Nice Journal}, 17 | abstract = {This is an abstract. This line should be long enough to test 18 | multilines... and with a french érudit word}, 19 | comments = {A comment}, 20 | keyword = {keyword1, keyword2}, 21 | } 22 | -------------------------------------------------------------------------------- /docs/source/who.rst: -------------------------------------------------------------------------------- 1 | Who uses BibtexParser? 2 | ====================== 3 | 4 | If your project uses BibtexParser, you can ask for the addition of a link in this list. 5 | 6 | * https://pypi.org/project/vitae/ 7 | * http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/ 8 | * https://github.com/Phyks/BMC 9 | * http://aurelien.naldi.info/research/publications.html 10 | * http://robot.kut.ac.kr/publications 11 | * https://git.atelo.org/etlapale/bibgen 12 | * https://onmenwhostareongraphs.wordpress.com/2015/06/09/graph-display-software-for-author-relationships-with-bibtex-files/ 13 | * https://github.com/vitorfs/parsifal 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiline_comments.bib: -------------------------------------------------------------------------------- 1 | @comment{Lorem ipsum dolor sit amet, 2 | consectetur adipisicing elit} 3 | 4 | @comment{ 5 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 6 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 7 | 8 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 9 | Excepteur sint occaecat cupidatat non proident. 10 | , 11 | } 12 | 13 | @comment{ 14 | 15 | 16 | Sunt in culpa qui officia deserunt mollit anim id est laborum. 17 | 18 | 19 | } 20 | 21 | @comment{} 22 | 23 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_output.bib: -------------------------------------------------------------------------------- 1 | @book{Toto3000, 2 | author = {Toto, A and Titi, B}, 3 | title = {A title} 4 | } 5 | 6 | @article{Wigner1938, 7 | author = {Wigner, E.}, 8 | doi = {10.1039/TF9383400029}, 9 | issn = {0014-7672}, 10 | journal = {Trans. Faraday Soc.}, 11 | owner = {fr}, 12 | pages = {29--41}, 13 | publisher = {The Royal Society of Chemistry}, 14 | title = {The transition state method}, 15 | volume = {34}, 16 | year = {1938} 17 | } 18 | 19 | @book{Yablon2005, 20 | author = {Yablon, A.D.}, 21 | publisher = {Springer}, 22 | title = {Optical fiber fusion slicing}, 23 | year = {2005} 24 | } 25 | 26 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/crossref_cascading_aliases.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Testing cascading crossrefs 5 | @MVBOOK{ccr1, 6 | IDS = {ccr1alias}, 7 | AUTHOR = {Vince Various}, 8 | EDITOR = {Edward Editor}, 9 | TITLE = {Stuff Concerning Varia}, 10 | DATE = {1934} 11 | } 12 | 13 | % using alias 14 | @BOOK{ccr2, 15 | TITLE = {Misc etc.}, 16 | DATE = {1923}, 17 | CROSSREF = {ccr1alias} 18 | } 19 | 20 | @INBOOK{ccr3, 21 | TITLE = {Perhaps, Perchance, Possibilities?}, 22 | DATE = {1911}, 23 | CROSSREF = {ccr2} 24 | } 25 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_and_comments_output.bib: -------------------------------------------------------------------------------- 1 | @comment{} 2 | 3 | @comment{A comment} 4 | 5 | @book{Toto3000, 6 | author = {Toto, A and Titi, B}, 7 | title = {A title} 8 | } 9 | 10 | @article{Wigner1938, 11 | author = {Wigner, E.}, 12 | doi = {10.1039/TF9383400029}, 13 | issn = {0014-7672}, 14 | journal = {Trans. Faraday Soc.}, 15 | owner = {fr}, 16 | pages = {29--41}, 17 | publisher = {The Royal Society of Chemistry}, 18 | title = {The transition state method}, 19 | volume = {34}, 20 | year = {1938} 21 | } 22 | 23 | @book{Yablon2005, 24 | author = {Yablon, A.D.}, 25 | publisher = {Springer}, 26 | title = {Optical fiber fusion slicing}, 27 | year = {2005} 28 | } 29 | 30 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features2.bib: -------------------------------------------------------------------------------- 1 | @string{CoOl = "Cool"} 2 | @string{stuff = "Stuff"} 3 | @string{myTitle = cool # " " # stuff} 4 | 5 | @string{int = "International"} 6 | @string{myconf = "My "#int#" Conference"} 7 | 8 | @string{myname = "Doe"} 9 | 10 | @String {firstname = "John"} 11 | @String {lastname = myname} 12 | @String {domain = "example"} 13 | @String {tld = "com"} 14 | 15 | @String {foo = "1--10"} 16 | @String {BaR = FOO} 17 | @String {pages = baR} 18 | 19 | @inproceedings{mykey, 20 | author = "John " # mynamE, 21 | title = mytitle, 22 | booktitle = myconf, 23 | pages = pages, 24 | year = 2014, 25 | note = "Email: " # firstname # "." # lastname # 26 | "@" # domain # "." # tld, 27 | } 28 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | - François Boulogne 2 | Project coordinator 3 | 4 | - bibserver's contributors 5 | for the parser's core and the permission to release this project under LGPLv3 and BSD 6 | 7 | - Shuen-Huei (Drake) Guan 8 | Python 2.7 porting 9 | 10 | - Sebastien Diemer 11 | Bugfix 12 | 13 | - Georg C. Brückmann 14 | Support for non-standard entry types 15 | 16 | - Uwe Schmidt 17 | String replacement 18 | 19 | - faph 20 | coma fixes, optional keys sanitising, refactoring and other improvements 21 | 22 | - Steven M. Bellovin 23 | Fix braces detection 24 | 25 | - Sven Goossens 26 | Support for bibtex with leading spaces 27 | 28 | - Michal Grochmal 29 | Comma first syntax support 30 | 31 | - Cschaffner 32 | New features in bwriter 33 | 34 | - Olivier Mangin 35 | Pyparsing implementation of the parser. 36 | 37 | - Blair Bonnett 38 | customization.splitname() function 39 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries.bib: -------------------------------------------------------------------------------- 1 | @Book{Yablon2005, 2 | Title = {Optical fiber fusion slicing}, 3 | Author = {Yablon, A.D.}, 4 | Publisher = {Springer}, 5 | Year = {2005}, 6 | } 7 | 8 | @Article{Wigner1938, 9 | Title = {The transition state method}, 10 | Author = {Wigner, E.}, 11 | Journal = {Trans. Faraday Soc.}, 12 | Year = {1938}, 13 | Pages = {29--41}, 14 | Volume = {34}, 15 | Doi = {10.1039/TF9383400029}, 16 | ISSN = {0014-7672}, 17 | Owner = {fr}, 18 | Publisher = {The Royal Society of Chemistry}, 19 | } 20 | 21 | @Book{Toto3000, 22 | Title = {A title}, 23 | Author = {Toto, A and Titi, B}, 24 | } 25 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_and_comments.bib: -------------------------------------------------------------------------------- 1 | @Book{Yablon2005, 2 | Title = {Optical fiber fusion slicing}, 3 | Author = {Yablon, A.D.}, 4 | Publisher = {Springer}, 5 | Year = {2005}, 6 | } 7 | 8 | @Article{Wigner1938, 9 | Title = {The transition state method}, 10 | Author = {Wigner, E.}, 11 | Journal = {Trans. Faraday Soc.}, 12 | Year = {1938}, 13 | Pages = {29--41}, 14 | Volume = {34}, 15 | Doi = {10.1039/TF9383400029}, 16 | ISSN = {0014-7672}, 17 | Owner = {fr}, 18 | Publisher = {The Royal Society of Chemistry}, 19 | } 20 | 21 | @Book{Toto3000, 22 | Title = {A title}, 23 | Author = {Toto, A and Titi, B}, 24 | } 25 | 26 | @Comment{} 27 | 28 | @Comment{A comment} 29 | 30 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_missing_coma.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2} 9 | } 10 | 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2}} 19 | 20 | @ARTICLE{Aimar2013, 21 | author = {Jean Aimar}, 22 | title = {An amazing title}, 23 | year = {2013}, 24 | volume = {12}, 25 | journal = {Nice Journal}, 26 | comments = {A comment}, 27 | keyword = {keyword1, keyword2}, 28 | month = "january" 29 | } 30 | 31 | @ARTICLE{Doute2013, 32 | author = {Jean Doute}, 33 | title = {An amazing title}, 34 | volume = {12}, 35 | journal = {Nice Journal}, 36 | comments = {A comment}, 37 | keyword = {keyword1, keyword2}, 38 | year = "2013" 39 | } 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from setuptools import setup 5 | except ImportError as ex: 6 | print('[python-bibtexparser] setuptools not found. Falling back to distutils.core') 7 | from distutils.core import setup 8 | 9 | with open('bibtexparser/__init__.py') as fh: 10 | for line in fh: 11 | if line.startswith('__version__'): 12 | version = line.strip().split()[-1][1:-1] 13 | break 14 | 15 | setup( 16 | name = 'bibtexparser', 17 | version = version, 18 | url = "https://github.com/sciunto-org/python-bibtexparser", 19 | author = "Francois Boulogne and other contributors", 20 | license = "LGPLv3 or BSD", 21 | author_email = "devel@sciunto.org", 22 | description = "Bibtex parser for python 2.7 and 3.3 and newer", 23 | packages = ['bibtexparser'], 24 | install_requires = ['pyparsing>=2.0.3', 25 | 'future>=0.16.0'], 26 | extra_requires = {'unittest': 'unittest2>=1.1.0'} 27 | ) 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: python 3 | matrix: 4 | include: 5 | - python: "2.7" 6 | env: TEST_SUITE=suite_2_7 7 | - python: "3.3" 8 | env: TEST_SUITE=suite_3_3 9 | - python: "3.4" 10 | env: TEST_SUITE=suite_3_4 11 | - python: "3.5" 12 | env: TEST_SUITE=suite_3_5 13 | - python: "3.6" 14 | env: TEST_SUITE=suite_3_6 15 | - python: "3.7" 16 | dist: xenial # Required for Python 3.7 17 | sudo: true # travis-ci/travis-ci#9069 18 | env: TEST_SUITE=suite_3_7 19 | - python: "pypy-5.7.1" 20 | env: TEST_SUITE=suite_pypy 21 | - python: "pypy3.5-5.8.0" 22 | env: TEST_SUITE=suite_pypy3 23 | install: 24 | - if [[ $TEST_SUITE == suite_3_6 ]]; then 25 | pip install sphinx; 26 | fi; 27 | - pip install coverage 28 | - pip install -r requirements.txt 29 | - python setup.py install 30 | script: 31 | - nosetests --with-coverage --cover-erase --cover-package=bibtexparser 32 | - if [[ $TEST_SUITE == suite_3_6 ]]; then 33 | cd docs; 34 | make html; 35 | fi; 36 | 37 | after_success: 38 | - pip install coveralls 39 | - coveralls 40 | -------------------------------------------------------------------------------- /docs/source/bibtex_conv.rst: -------------------------------------------------------------------------------- 1 | =============================================== 2 | Bibtex tips, conventions and unrelated projects 3 | =============================================== 4 | 5 | This page presents various resources about bibtex in general. 6 | 7 | Format 8 | ====== 9 | 10 | http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html 11 | 12 | * Comments 13 | * Variable 14 | * @preamble 15 | * Name convention 16 | 17 | Upper case letters in titles 18 | ---------------------------- 19 | 20 | Put the letter/word in curly braces like {this}. 21 | 22 | 23 | General references 24 | ------------------ 25 | 26 | * http://tug.ctan.org/tex-archive/info/bibtex/tamethebeast/ttb_en.pdf 27 | * http://ctan.mirrors.hoobly.com/macros/latex/contrib/biblatex/doc/biblatex.pdf 28 | 29 | IEEE citation reference 30 | ----------------------- 31 | 32 | * https://origin.www.ieee.org/documents/ieeecitationref.pdf 33 | 34 | 35 | Common Errors in Bibliographies John Owens 36 | ------------------------------------------ 37 | 38 | * http://www.ece.ucdavis.edu/~jowens/biberrors.html 39 | 40 | Common abbreviations for journals 41 | --------------------------------- 42 | 43 | * Jabref list http://jabref.sourceforge.net/resources.php#downloadlists 44 | 45 | 46 | Projects 47 | ======== 48 | 49 | Here are some interesting projects using bibtex but not necessarily this parser. 50 | 51 | Display your bibliography in html pages 52 | --------------------------------------- 53 | 54 | * http://www.monperrus.net/martin/bibtexbrowser/ 55 | 56 | -------------------------------------------------------------------------------- /docs/source/bibtexparser.rst: -------------------------------------------------------------------------------- 1 | .. _bibtexparser_api: 2 | 3 | .. contents:: 4 | 5 | bibtexparser: API 6 | ================= 7 | 8 | :mod:`bibtexparser` --- Parsing and writing BibTeX files 9 | -------------------------------------------------------- 10 | 11 | .. automodule:: bibtexparser 12 | :members: load, loads, dumps, dump 13 | 14 | :mod:`bibtexparser.bibdatabase` --- The bibliographic database object 15 | --------------------------------------------------------------------- 16 | 17 | .. autoclass:: bibtexparser.bibdatabase.BibDatabase 18 | :members: entries, entries_dict, comments, strings, preambles 19 | 20 | :mod:`bibtexparser.bparser` --- Tune the default parser 21 | -------------------------------------------------------- 22 | 23 | .. automodule:: bibtexparser.bparser 24 | :members: 25 | 26 | :mod:`bibtexparser.customization` --- Functions to customize records 27 | -------------------------------------------------------------------- 28 | 29 | .. automodule:: bibtexparser.customization 30 | :members: 31 | 32 | Exception classes 33 | ^^^^^^^^^^^^^^^^^ 34 | .. autoclass:: bibtexparser.customization.InvalidName 35 | 36 | :mod:`bibtexparser.bwriter` --- Tune the default writer 37 | ------------------------------------------------------- 38 | 39 | .. autoclass:: bibtexparser.bwriter.BibTexWriter 40 | :members: 41 | 42 | :mod:`bibtexparser.bibtexexpression` --- Parser's core relying on pyparsing 43 | --------------------------------------------------------------------------- 44 | 45 | .. automodule:: bibtexparser.bibtexexpression 46 | :members: 47 | 48 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. BibtexParser documentation master file, created by 2 | sphinx-quickstart on Thu Aug 1 13:30:23 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to BibtexParser's documentation! 7 | ======================================== 8 | 9 | 10 | :Author: François Boulogne, Olivier Mangin, Lucas Verney, and other contributors. 11 | :Devel: `github.com project `_ 12 | :Mirror: `git.sciunto.org `_ 13 | :Bugs: `github.com `_ 14 | :Generated: |today| 15 | :License: LGPL v3 or BSD 16 | :Version: |release| 17 | 18 | BibtexParser is a python library to parse bibtex files. The code relies on `pyparsing `_ and is tested with unittests. 19 | 20 | If you use BibtexParser for your project, feel free to send me an email. I would be happy to hear that and to mention your project in the documentation. 21 | 22 | Contents: 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | 27 | install.rst 28 | tutorial.rst 29 | bibtexparser.rst 30 | logging.rst 31 | bibtex_conv.rst 32 | who.rst 33 | 34 | 35 | Other projects 36 | ============== 37 | 38 | * http://pybtex.sourceforge.net/ 39 | * http://pybliographer.org/ 40 | * https://github.com/matthew-brett/babybib 41 | 42 | Indices and tables 43 | ================== 44 | 45 | * :ref:`genindex` 46 | * :ref:`modindex` 47 | * :ref:`search` 48 | 49 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | python-bibtexparser 2 | =================== 3 | 4 | Python library to parse `bibtex `_ files. 5 | 6 | 7 | IMPORTANT: the library is looking for new maintainers. Please, manifest yourself if you are interested. 8 | 9 | .. contents:: 10 | 11 | 12 | Bibtexparser relies on `pyparsing `_ and is compatible with Python 2.7 and 3.3 or newer. 13 | 14 | Documentation 15 | ------------- 16 | 17 | Our documentation includes the installation procedure, a tutorial, the API and advices to report a bug. 18 | References, related projects and softwares based on bibtexparser are also listed. If you would like to appear on this list, feel free to open a ticket or send an email. 19 | 20 | `Documentation on readthedocs.io `_ 21 | 22 | Upgrading 23 | --------- 24 | 25 | Please, read the changelog before upgrading regarding API modifications. 26 | Prior version 1.0, we do not hesitate to modify the API to get the best API from your feedbacks. 27 | 28 | License 29 | ------- 30 | 31 | Dual license (at your choice): 32 | 33 | * LGPLv3. 34 | * BSD 35 | 36 | See COPYING for details. 37 | 38 | History and evolutions 39 | ---------------------- 40 | 41 | The original source code was part of bibserver from `OKFN `_. This project is released under the AGPLv3. OKFN and the original authors kindly provided the permission to use a subpart of their project (ie the bibtex parser) under LGPLv3. Many thanks to them! 42 | 43 | The parser evolved to a new core based on pyparsing. 44 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/xref_entries.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Testing mincrossrefs. xr1 and xr2 xrefs should trigger inclusion of xrm and also 5 | % the xreffields in both of them 6 | @INBOOK{xr1, 7 | AUTHOR = {Zoe Zentrum}, 8 | TITLE = {Moods Mildly Modified}, 9 | ORIGDATE = {1921}, 10 | XREF = {xrm} 11 | } 12 | 13 | @INBOOK{xr2, 14 | AUTHOR = {Ian Instant}, 15 | TITLE = {Migraines Multiplying Madly}, 16 | ORIGDATE = {1926}, 17 | XREF = {xrm} 18 | } 19 | 20 | @BOOK{xrm, 21 | EDITOR = {Peter Prendergast}, 22 | TITLE = {Calligraphy, Calisthenics, Culture}, 23 | PUBLISHER = {Mainstream}, 24 | YEAR = {1970} 25 | } 26 | 27 | % Testing explicit cite of xref parent. Should trigger inclusion of child xref field 28 | @INBOOK{xr3, 29 | AUTHOR = {Norman Normal}, 30 | TITLE = {Russian Regalia Revisited}, 31 | ORIGDATE = {1923}, 32 | XREF = {xrt} 33 | } 34 | 35 | @BOOK{xrt, 36 | EDITOR = {Lucy Lunders}, 37 | TITLE = {Kings, Cork and Calculation}, 38 | PUBLISHER = {Middling}, 39 | YEAR = {1977} 40 | } 41 | 42 | % Testing mincrossrefs not reached. cr4 is cited, cr5 isn't, therefore mincrossrefs (2) for 43 | % crn not reached 44 | @INBOOK{xr4, 45 | AUTHOR = {Megan Mistrel}, 46 | TITLE = {Lumbering Lunatics}, 47 | ORIGDATE = {1933}, 48 | XREF = {xrn} 49 | } 50 | 51 | @INBOOK{xr5, 52 | AUTHOR = {Kenneth Kunrath}, 53 | TITLE = {Dreadful Dreary Days}, 54 | ORIGDATE = {1900}, 55 | XREF = {xrn} 56 | } 57 | 58 | @BOOK{xrn, 59 | EDITOR = {Victor Vivacious}, 60 | TITLE = {Examples of Excellent Exaggerations}, 61 | PUBLISHER = {Oxford}, 62 | YEAR = {1935} 63 | } 64 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_preambles.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bibtexparser 3 | from bibtexparser.bibdatabase import BibDatabase 4 | from collections import OrderedDict 5 | 6 | 7 | class TestPreambleParse(unittest.TestCase): 8 | def test_single_preamble_parse_count(self): 9 | bibtex_str = '@preamble{" a "}\n\n' 10 | bib_database = bibtexparser.loads(bibtex_str) 11 | self.assertEqual(len(bib_database.preambles), 1) 12 | 13 | def test_multiple_preamble_parse_count(self): 14 | bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 15 | bib_database = bibtexparser.loads(bibtex_str) 16 | self.assertEqual(len(bib_database.preambles), 2) 17 | 18 | def test_single_preamble_parse(self): 19 | bibtex_str = '@preamble{" a "}\n\n' 20 | bib_database = bibtexparser.loads(bibtex_str) 21 | expected = [' a '] 22 | self.assertEqual(bib_database.preambles, expected) 23 | 24 | def test_multiple_preamble_parse(self): 25 | bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 26 | bib_database = bibtexparser.loads(bibtex_str) 27 | expected = [' a ', 'b'] 28 | self.assertEqual(bib_database.preambles, expected) 29 | 30 | 31 | class TestPreambleWrite(unittest.TestCase): 32 | def test_single_preamble_write(self): 33 | bib_database = BibDatabase() 34 | bib_database.preambles = [' a '] 35 | result = bibtexparser.dumps(bib_database) 36 | expected = '@preamble{" a "}\n\n' 37 | self.assertEqual(result, expected) 38 | 39 | def test_multiple_string_write(self): 40 | bib_database = BibDatabase() 41 | bib_database.preambles = [' a ', 'b'] 42 | result = bibtexparser.dumps(bib_database) 43 | expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 44 | self.assertEqual(result, expected) 45 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | How to install and test? 3 | ======================== 4 | 5 | How to install? 6 | =============== 7 | 8 | Requirements 9 | ------------ 10 | 11 | * python **2.7** or python **3.3** or newer 12 | * pyparsing **2.0.3** or newer 13 | 14 | Package manager (recommended for those OS users) 15 | ------------------------------------------------ 16 | 17 | * `Archlinux `_ 18 | * `Debian `_ 19 | 20 | pip (recommended to other users) 21 | --------------------------------- 22 | 23 | To install with pip: 24 | 25 | .. code-block:: sh 26 | 27 | pip install bibtexparser 28 | 29 | 30 | Manual installation (recommended for packagers) 31 | ----------------------------------------------- 32 | 33 | Download the archive on `Pypi `_. 34 | 35 | .. code-block:: sh 36 | 37 | python setup.py install 38 | 39 | 40 | How to run the test suite? 41 | ========================== 42 | 43 | This paragraph briefly describes how to run the test suite. 44 | This is useful for contributors, for packagers but also for users who wants to check their environment. 45 | 46 | 47 | Virtualenv 48 | ---------- 49 | 50 | You can make a virtualenv. I like `pew `_ for that because the API is easier. 51 | 52 | The first time, you need to make a virtualenv 53 | 54 | .. code-block:: sh 55 | 56 | pew mkproject bibtexparser 57 | pip install -r requirements.txt 58 | python setup.py install 59 | nosetest 60 | 61 | 62 | If you already have a virtualenv, you can use workon 63 | 64 | .. code-block:: sh 65 | 66 | pew workon bibtexparser 67 | 68 | 69 | Tox 70 | --- 71 | 72 | The advantage of `Tox `_ is that you can build and test the code against several versions of python. 73 | Of course, you need tox to be installed on your system. 74 | The configuration file is tox.ini, in the root of the project. There, you can change the python versions. 75 | 76 | .. code-block:: sh 77 | 78 | tox # and nothing more :) 79 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_homogenise_fields.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | import io 5 | import unittest 6 | from bibtexparser.bparser import BibTexParser 7 | 8 | 9 | class TestHomogenizeFields(unittest.TestCase): 10 | 11 | def test_homogenize_default(self): 12 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 13 | bib = BibTexParser(bibfile.read()) 14 | entries = bib.get_entry_list() 15 | self.assertNotIn('url', entries[0]) 16 | self.assertIn('link', entries[0]) 17 | 18 | def test_homogenize_on(self): 19 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 20 | bib = BibTexParser(bibfile.read(), homogenize_fields=True) 21 | entries = bib.get_entry_list() 22 | self.assertIn('url', entries[0]) 23 | self.assertNotIn('link', entries[0]) 24 | 25 | def test_homogenize_off(self): 26 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 27 | bib = BibTexParser(bibfile.read(), homogenize_fields=False) 28 | entries = bib.get_entry_list() 29 | self.assertNotIn('url', entries[0]) 30 | self.assertIn('link', entries[0]) 31 | 32 | def test_homogenizes_fields(self): 33 | self.maxDiff = None 34 | with io.open('bibtexparser/tests/data/article_homogenize.bib', 35 | 'r', encoding='utf-8') as bibfile: 36 | bib = BibTexParser(bibfile.read(), homogenize_fields=True) 37 | expected_dict = { 38 | 'Cesar2013': { 39 | 'keyword': 'keyword1, keyword2', 40 | 'ENTRYTYPE': 'article', 41 | 'abstract': 'This is an abstract. This line should be ' 42 | 'long enough to test\nmultilines... and with ' 43 | 'a french érudit word', 44 | 'year': '2013', 45 | 'journal': 'Nice Journal', 46 | 'ID': 'Cesar2013', 47 | 'pages': '12-23', 48 | 'title': 'An amazing title', 49 | 'comments': 'A comment', 50 | 'author': 'Jean César', 51 | 'volume': '12', 52 | 'month': 'jan', 53 | 'url': "http://my.link/to-content", 54 | 'subject': "Some topic of interest", 55 | 'editor': "Edith Or", 56 | } 57 | } 58 | self.assertEqual(bib.get_entry_dict(), expected_dict) 59 | -------------------------------------------------------------------------------- /docs/source/logging.rst: -------------------------------------------------------------------------------- 1 | How to report a bug? 2 | ==================== 3 | 4 | Bugs can be reported on github or via private communications. 5 | 6 | Steps 7 | ----- 8 | 9 | 1. Make a minimal code, which reproduces the problem. 10 | 2. Provide the code, the bibtex (if necessary), the output. 11 | 3. For a parsing error, provide the expected output. 12 | 4. For a crash, set the logger to the debug level (see below). 13 | 14 | If you want to provide a patch (that's wonderful! thank you), please, take few minutes to write a unit test that fails without your contribution. 15 | 16 | Logging module to understand failures 17 | ------------------------------------- 18 | 19 | Syntax of bibtex files is simple but there are many possible variations. This library probably fails for some of them. 20 | 21 | Bibtexparser includes a large quantity of debug messages which helps to understand why and where the parser fails. 22 | The example below can be used to print these messages in the console. 23 | 24 | .. code-block:: python 25 | 26 | import logging 27 | import logging.config 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | logging.config.dictConfig({ 32 | 'version': 1, 33 | 'disable_existing_loggers': False, 34 | 'formatters': { 35 | 'standard': { 36 | 'format': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s:%(lineno)d: %(message)s' 37 | }, 38 | }, 39 | 'handlers': { 40 | 'default': { 41 | 'level':'DEBUG', 42 | 'formatter': 'standard', 43 | 'class':'logging.StreamHandler', 44 | }, 45 | }, 46 | 'loggers': { 47 | '': { 48 | 'handlers': ['default'], 49 | 'level': 'DEBUG', 50 | 'formatter': 'standard', 51 | 'propagate': True 52 | } 53 | } 54 | }) 55 | 56 | 57 | if __name__ == '__main__': 58 | bibtex = """@ARTICLE{Cesar2013, 59 | author = {Jean César}, 60 | title = {An amazing title}, 61 | year = {2013}, 62 | month = jan, 63 | volume = {12}, 64 | pages = {12--23}, 65 | journal = {Nice Journal}, 66 | abstract = {This is an abstract. This line should be long enough to test 67 | multilines...}, 68 | comments = {A comment}, 69 | keywords = {keyword1, keyword2}, 70 | } 71 | """ 72 | 73 | with open('/tmp/bibtex.bib', 'w') as bibfile: 74 | bibfile.write(bibtex) 75 | 76 | from bibtexparser.bparser import BibTexParser 77 | 78 | with open('/tmp/bibtex.bib', 'r') as bibfile: 79 | bp = BibTexParser(bibfile.read()) 80 | print(bp.get_entry_list()) 81 | 82 | I recommend you to use this output if you would like to report a bug. 83 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | v1.xxx 2 | ====== 3 | 4 | v1.1.0 5 | ====== 6 | 7 | * BUGFIX: Fix for pyparsing 2.3.1 (#226) 8 | * NEW: Add support for BibLaTeX annotations (#208) 9 | * NEW: Feature: crossref Support (#216) 10 | * ENH: Handles declarations on lines starting with spaces after comments (#199) 11 | * ENH: Checks for empty citekeys and whitespaces (#213) 12 | 13 | v1.0.1 14 | ====== 15 | 16 | * BUGFIX: future missing in setup.py 17 | 18 | v1.0 19 | ==== 20 | 21 | * ENH: we use pyparsing (#64) by Olivier Magin. 22 | * DOC: Refactoring of the tutorial 23 | * DOC: include docs/ in manifest 24 | * API: fix spelling "homogenize". Affects customization and bparser 25 | * API: BibtexParser: homogenize_fields is now False by default (#94) 26 | 27 | v0.6.2 28 | ====== 29 | 30 | * ENH: customization: handle various hyphens (#76). 31 | * ENH: writer: all values according to this maximal key width (#83). 32 | * END: writer: display_order allows to have custom ordering of the fields of 33 | each entry instead of just alphabetical (#83) by cschaffner. 34 | * FIX: bad support of braces in string (#90) by sangkilc. 35 | 36 | v0.6.1 37 | ====== 38 | 39 | * API: Previous type and id keywords which are automatically added to 40 | the dictionnary are now ENTRYTYPE and ID, respectively (#42). 41 | * ENH: comma first syntax support (#49) by Michal Grochmal. 42 | 43 | v0.6.0 44 | ====== 45 | 46 | * DOC: clarify version number 47 | * ENH: support for bibtex with leading spaces (#34) 48 | * FIX: if title contained multiples words in braces 49 | * ENH: code refactoring (#33) 50 | * ENH: support for comment blocks (#32) 51 | * ENH: Removed comma after last key-value pair by faph (#28) 52 | * ENH: optional keys sanitising by faph (#29) 53 | * FIX: missing coma at the end of a record (#24) 54 | * DOC: clarify the usecase of to_bibtex 55 | * FIX: raise exception for TypeError in to_bibtex (#22) 56 | 57 | v0.5.5 58 | ====== 59 | 60 | * ENH: json output 61 | * ENH: Add (optional) support for non-standard entry types by Georg C. Brückmann 62 | * FIX: protect uppercase only on unprotected characters. #18 63 | * ENH: string replacement by Uwe Schmidt (#13 #20) 64 | 65 | v0.5.4 66 | ====== 67 | 68 | * ENH: json output 69 | * API: enhance the naming choice for bwriter 70 | 71 | v0.5.3 72 | ====== 73 | 74 | * ENH: add writer (#16), thanks to Lucas Verney 75 | * MAINT: Remove non-standard --BREAK-- command detection 76 | * FIX: missing strip() (#14) by Sebastien Diemer 77 | * API breakage: the parser takes data instead of a filehandler 78 | 79 | v0.5.2 80 | ====== 81 | 82 | * ENH: fix tests latex encoding 83 | * ENH: support @comment @preambule (escaped) 84 | * ENH: check that bibtype belongs to a known type 85 | 86 | v0.5.1 87 | ====== 88 | 89 | * ENH: split keywords with various separators 90 | * ENH: get_entry_dict make the dict once 91 | * ENH: add messages with logging 92 | * FIX: fix unittest related to braces detection 93 | 94 | v0.5 95 | ==== 96 | 97 | * Permission from original authors and OKFN to use LGPLv3 98 | * ENH: Python 2.7 support 99 | * FIX: issue related to accents 100 | 101 | v0.4 102 | ==== 103 | 104 | * ENH: Transformations on characters are now considered as a customization 105 | * ENH: New customization: clean latex style 106 | * FIX: issue related to name processing 107 | 108 | v0.3 109 | ==== 110 | 111 | * DOC: moved to readsthedoc 112 | * DOC: several improvements 113 | * MAINT: separate customizations 114 | 115 | v0.2 116 | ==== 117 | 118 | * TEST: initialized 119 | * DOC: initialized 120 | 121 | v0.1 122 | ==== 123 | 124 | * First preliminary release 125 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/crossref_entries.bib: -------------------------------------------------------------------------------- 1 | % From biber test data : t/tdata/crossrefs.bib 2 | % Kept initial comment but not for our purpose 3 | 4 | % Testing mincrossrefs. cr1 and cr2 crossrefs should trigger inclusion of cr_m and also 5 | % the crossref fields in both of them 6 | % Also a test of some aliases 7 | @INBOOK{cr1, 8 | AUTHOR = {Graham Gullam}, 9 | TITLE = {Great and Good Graphs}, 10 | ORIGDATE = {1955}, 11 | ARCHIVEPREFIX = {SomEPrFiX}, 12 | PRIMARYCLASS = {SOMECLASS}, 13 | CROSSREF = {cr_m} 14 | } 15 | 16 | @INBOOK{cr2, 17 | AUTHOR = {Frederick Fumble}, 18 | TITLE = {Fabulous Fourier Forms}, 19 | SCHOOL = {School}, 20 | INSTITUTION = {Institution}, 21 | ORIGDATE = {1943}, 22 | CROSSREF = {cr_m} 23 | } 24 | 25 | @BOOK{cr_m, 26 | EDITOR = {Edgar Erbriss}, 27 | TITLE = {Graphs of the Continent}, 28 | PUBLISHER = {Grimble}, 29 | YEAR = {1974} 30 | } 31 | 32 | % Testing explicit cite of crossref parent. Should trigger inclusion of child crossref field 33 | @INBOOK{cr3, 34 | AUTHOR = {Arthur Aptitude}, 35 | TITLE = {Arrangements of All Articles}, 36 | ORIGDATE = {1934}, 37 | ARCHIVEPREFIX = {SomEPrFiX}, 38 | EPRINTTYPE = {sometype}, 39 | CROSSREF = {crt} 40 | } 41 | 42 | @BOOK{crt, 43 | EDITOR = {Mark Monkley}, 44 | TITLE = {Beasts of the Burbling Burns}, 45 | PUBLISHER = {Rancour}, 46 | YEAR = {1996} 47 | } 48 | 49 | % Testing mincrossrefs not reached. cr4 is cited, cr5 isn't, therefore mincrossrefs (2) for 50 | % crn not reached 51 | @INBOOK{cr4, 52 | AUTHOR = {Morris Mumble}, 53 | TITLE = {Enterprising Entities}, 54 | ORIGDATE = {1911}, 55 | CROSSREF = {crn} 56 | } 57 | 58 | @INBOOK{cr5, 59 | AUTHOR = {Oliver Ordinary}, 60 | TITLE = {Questionable Quidities}, 61 | ORIGDATE = {1919}, 62 | CROSSREF = {crn} 63 | } 64 | 65 | @BOOK{crn, 66 | EDITOR = {Jeremy Jermain}, 67 | TITLE = {Vanquished, Victor, Vandal}, 68 | PUBLISHER = {Pillsbury}, 69 | YEAR = {1945} 70 | } 71 | 72 | % Testing inheritance of event information 73 | @PROCEEDINGS{cr6i, 74 | AUTHOR = {Spurious Author}, 75 | ADDRESS = {Address}, 76 | TITLE = {Title of proceeding}, 77 | EDITOR = {Editor}, 78 | PUBLISHER = {Publisher of proceeding}, 79 | EVENTDATE = {2009-08-21/2009-08-24}, 80 | EVENTTITLE = {Title of the event}, 81 | VENUE = {Location of event}, 82 | YEAR = {2009} 83 | } 84 | 85 | @INPROCEEDINGS{cr6, 86 | AUTHOR = {Author, Firstname}, 87 | CROSSREF = {cr6i}, 88 | PAGES = {123--}, 89 | TITLE = {Title of inproceeding}, 90 | BOOKTITLE = {Manual booktitle}, 91 | YEAR = {2009}, 92 | } 93 | 94 | % Testing inheritance of special fields (booktitle, bookauthor etc.) 95 | @BOOK{cr7i, 96 | AUTHOR = {Brian Bookauthor}, 97 | TITLE = {Book Title}, 98 | SUBTITLE = {Book Subtitle}, 99 | TITLEADDON = {Book Titleaddon}, 100 | PUBLISHER = {Publisher of proceeding}, 101 | YEAR = {2009}, 102 | VERBA = {String}, 103 | } 104 | 105 | @INBOOK{cr7, 106 | AUTHOR = {Author, Firstname}, 107 | CROSSREF = {cr7i}, 108 | PAGES = {123--126}, 109 | TITLE = {Title of Book bit}, 110 | YEAR = {2010} 111 | } 112 | 113 | % Testing supression of default inheritance 114 | @COLLECTION{cr8i, 115 | EDITOR = {Brian Editor}, 116 | TITLE = {Book Title}, 117 | SUBTITLE = {Book Subtitle}, 118 | TITLEADDON = {Book Titleaddon}, 119 | PUBLISHER = {Publisher of Collection}, 120 | YEAR = {2009} 121 | } 122 | 123 | @INCOLLECTION{cr8, 124 | AUTHOR = {Smith, Firstname}, 125 | CROSSREF = {cr8i}, 126 | PAGES = {1--12}, 127 | TITLE = {Title of Collection bit}, 128 | YEAR = {2010} 129 | } 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /bibtexparser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | `BibTeX `_ is a bibliographic data file format. 3 | 4 | The :mod:`bibtexparser` module can parse BibTeX files and write them. The API is similar to the 5 | :mod:`json` module. The parsed data is returned as a simple :class:`BibDatabase` object with the main attribute being 6 | :attr:`entries` representing bibliographic sources such as books and journal articles. 7 | 8 | The following functions provide a quick and basic way to manipulate a BibTeX file. 9 | More advanced features are also available in this module. 10 | 11 | Parsing a file is as simple as:: 12 | 13 | import bibtexparser 14 | with open('bibtex.bib') as bibtex_file: 15 | bibtex_database = bibtexparser.load(bibtex_file) 16 | 17 | And writing:: 18 | 19 | import bibtexparser 20 | with open('bibtex.bib', 'w') as bibtex_file: 21 | bibtexparser.dump(bibtex_database, bibtex_file) 22 | 23 | """ 24 | __all__ = [ 25 | 'loads', 'load', 'dumps', 'dump', 'bibdatabase', 26 | 'bparser', 'bwriter', 'bibtexexpression', 'latexenc', 'customization', 27 | ] 28 | __version__ = '1.1.0' 29 | 30 | import sys 31 | 32 | from . import bibdatabase, bibtexexpression, bparser, bwriter, latexenc, customization 33 | 34 | 35 | def loads(bibtex_str, parser=None): 36 | """ 37 | Load :class:`BibDatabase` object from a string 38 | 39 | :param bibtex_str: input BibTeX string to be parsed 40 | :type bibtex_str: str or unicode 41 | :param parser: custom parser to use (optional) 42 | :type parser: BibTexParser 43 | :returns: bibliographic database object 44 | :rtype: BibDatabase 45 | """ 46 | if parser is None: 47 | parser = bparser.BibTexParser() 48 | return parser.parse(bibtex_str) 49 | 50 | 51 | def load(bibtex_file, parser=None): 52 | """ 53 | Load :class:`BibDatabase` object from a file 54 | 55 | :param bibtex_file: input file to be parsed 56 | :type bibtex_file: file 57 | :param parser: custom parser to use (optional) 58 | :type parser: BibTexParser 59 | :returns: bibliographic database object 60 | :rtype: BibDatabase 61 | 62 | Example:: 63 | 64 | import bibtexparser 65 | with open('bibtex.bib') as bibtex_file: 66 | bibtex_database = bibtexparser.load(bibtex_file) 67 | 68 | """ 69 | if parser is None: 70 | parser = bparser.BibTexParser() 71 | return parser.parse_file(bibtex_file) 72 | 73 | 74 | def dumps(bib_database, writer=None): 75 | """ 76 | Dump :class:`BibDatabase` object to a BibTeX string 77 | 78 | :param bib_database: bibliographic database object 79 | :type bib_database: BibDatabase 80 | :param writer: custom writer to use (optional) (not yet implemented) 81 | :type writer: BibTexWriter 82 | :returns: BibTeX string 83 | :rtype: unicode 84 | """ 85 | if writer is None: 86 | writer = bwriter.BibTexWriter() 87 | return writer.write(bib_database) 88 | 89 | 90 | def dump(bib_database, bibtex_file, writer=None): 91 | """ 92 | Dump :class:`BibDatabase` object as a BibTeX text file 93 | 94 | :param bib_database: bibliographic database object 95 | :type bib_database: BibDatabase 96 | :param bibtex_file: file to write to 97 | :type bibtex_file: file 98 | :param writer: custom writer to use (optional) (not yet implemented) 99 | :type writer: BibTexWriter 100 | 101 | Example:: 102 | 103 | import bibtexparser 104 | with open('bibtex.bib', 'w') as bibtex_file: 105 | bibtexparser.dump(bibtex_database, bibtex_file) 106 | 107 | """ 108 | if writer is None: 109 | writer = bwriter.BibTexWriter() 110 | if sys.version_info >= (3, 0): 111 | bibtex_file.write(writer.write(bib_database)) 112 | else: 113 | # Encode to UTF-8 114 | bibtex_file.write(writer.write(bib_database).encode("utf-8")) 115 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_latexenc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #This program is free software: you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation, either version 3 of the License, or 6 | #(at your option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, 9 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | #GNU General Public License for more details. 12 | # 13 | #You should have received a copy of the GNU General Public License 14 | #along with this program. If not, see 15 | # 16 | # Author: Francois Boulogne , 2012 17 | 18 | from __future__ import unicode_literals 19 | import unittest 20 | 21 | from bibtexparser.latexenc import (string_to_latex, latex_to_unicode, 22 | protect_uppercase) 23 | 24 | 25 | class TestLatexConverter(unittest.TestCase): 26 | 27 | def test_accent(self): 28 | string = 'à é è ö' 29 | result = string_to_latex(string) 30 | expected = "{\`a} {\\\'e} {\`e} {\\\"o}" 31 | self.assertEqual(result, expected) 32 | 33 | def test_special_caracter(self): 34 | string = 'ç' 35 | result = string_to_latex(string) 36 | expected = '{\c c}' 37 | self.assertEqual(result, expected) 38 | 39 | 40 | class TestUppercaseProtection(unittest.TestCase): 41 | 42 | def test_uppercase(self): 43 | string = 'An upPer Case A' 44 | result = protect_uppercase(string) 45 | expected = '{A}n up{P}er {C}ase {A}' 46 | self.assertEqual(result, expected) 47 | 48 | def test_lowercase(self): 49 | string = 'a' 50 | result = protect_uppercase(string) 51 | expected = 'a' 52 | self.assertEqual(result, expected) 53 | 54 | def test_alreadyprotected(self): 55 | string = '{A}, m{A}gnificient, it is a {A}...' 56 | result = protect_uppercase(string) 57 | expected = '{A}, m{A}gnificient, it is a {A}...' 58 | self.assertEqual(result, expected) 59 | 60 | def test_traps(self): 61 | string = '{A, m{Agnificient, it is a {A' 62 | result = protect_uppercase(string) 63 | expected = '{A, m{Agnificient, it is a {A' 64 | self.assertEqual(result, expected) 65 | 66 | def test_traps2(self): 67 | string = 'A}, mA}gnificient, it is a A}' 68 | result = protect_uppercase(string) 69 | expected = 'A}, mA}gnificient, it is a A}' 70 | self.assertEqual(result, expected) 71 | 72 | 73 | class TestUnicodeConversion(unittest.TestCase): 74 | 75 | def test_accents(self): 76 | string = "{\`a} {\\\'e} {\`e} {\\\"o}" 77 | result = latex_to_unicode(string) 78 | expected = 'à é è ö' 79 | self.assertEqual(result, expected) 80 | 81 | def test_ignores_trailing_modifier(self): 82 | string = "a\\\'" 83 | result = latex_to_unicode(string) 84 | expected = 'a' 85 | self.assertEqual(result, expected) 86 | 87 | def test_special_caracter(self): 88 | string = '{\c c}' 89 | result = latex_to_unicode(string) 90 | expected = 'ç' 91 | self.assertEqual(result, expected) 92 | 93 | def test_does_not_modify_existing_combining(self): 94 | string = b'ph\xc6\xa1\xcc\x89'.decode('utf8') 95 | result = latex_to_unicode(string) 96 | expected = 'phở' # normalized 97 | self.assertEqual(result, expected) 98 | 99 | def test_does_not_modify_two_existing_combining(self): 100 | string = b'pho\xcc\x9b\xcc\x89'.decode('utf8') 101 | result = latex_to_unicode(string) 102 | expected = 'phở' # normalized 103 | self.assertEqual(result, expected) 104 | 105 | 106 | if __name__ == '__main__': 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibdatabase.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString, 3 | BibDataStringExpression) 4 | 5 | 6 | class TestBibDatabase(unittest.TestCase): 7 | entries = [{'ENTRYTYPE': 'book', 8 | 'year': '1987', 9 | 'edition': '2', 10 | 'publisher': 'Wiley Edition', 11 | 'ID': 'Bird1987', 12 | 'volume': '1', 13 | 'title': 'Dynamics of Polymeric Liquid', 14 | 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.' 15 | }] 16 | 17 | def test_entries_list_method(self): 18 | bib_db = BibDatabase() 19 | bib_db.entries = self.entries 20 | self.assertEqual(bib_db.entries, bib_db.get_entry_list()) 21 | 22 | def test_entries_dict_prop(self): 23 | bib_db = BibDatabase() 24 | bib_db.entries = self.entries 25 | self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict()) 26 | 27 | 28 | class TestBibDataString(unittest.TestCase): 29 | 30 | def setUp(self): 31 | self.bd = BibDatabase() 32 | 33 | def test_name_is_lower(self): 34 | bds = BibDataString(self.bd, 'nAmE') 35 | self.assertTrue(bds.name.islower()) 36 | 37 | def test_raises_KeyError(self): 38 | bds = BibDataString(self.bd, 'name') 39 | with self.assertRaises(KeyError): 40 | bds.get_value() 41 | 42 | def test_get_value(self): 43 | bds = BibDataString(self.bd, 'name') 44 | self.bd.strings['name'] = 'value' 45 | self.assertEqual(bds.get_value(), 'value') 46 | 47 | def test_expand_string(self): 48 | bds = BibDataString(self.bd, 'name') 49 | self.bd.strings['name'] = 'value' 50 | self.assertEqual(BibDataString.expand_string('name'), 'name') 51 | self.assertEqual(BibDataString.expand_string(bds), 'value') 52 | 53 | def test_get_value_string_is_defined_by_expression(self): 54 | self.bd.strings['name'] = 'string' 55 | exp = BibDataStringExpression(['this is a ', 56 | BibDataString(self.bd, 'name')]) 57 | self.bd.strings['exp'] = exp 58 | bds = BibDataString(self.bd, 'exp') 59 | self.assertEqual(bds.get_value(), 'this is a string') 60 | 61 | def test_strings_are_equal_iif_name_is_equal(self): 62 | self.bd.strings['a'] = 'foo' 63 | self.bd.strings['b'] = 'foo' 64 | a1 = BibDataString(self.bd, 'a') 65 | a2 = BibDataString(self.bd, 'a') 66 | b = BibDataString(self.bd, 'b') 67 | self.assertEqual(a1, a2) 68 | self.assertNotEqual(a1, b) 69 | self.assertNotEqual(a1, b) 70 | self.assertNotEqual(a1, "foo") 71 | 72 | 73 | class TestBibDataStringExpression(unittest.TestCase): 74 | 75 | def setUp(self): 76 | self.bd = BibDatabase() 77 | self.bd.strings['name'] = 'value' 78 | self.bds = BibDataString(self.bd, 'name') 79 | 80 | def test_get_value(self): 81 | exp = BibDataStringExpression( 82 | ["The string has value: ", self.bds, '.']) 83 | self.assertEqual(exp.get_value(), 'The string has value: value.') 84 | 85 | def test_raises_KeyError(self): 86 | bds = BibDataString(self.bd, 'unknown') 87 | exp = BibDataStringExpression([bds, self.bds, 'text']) 88 | with self.assertRaises(KeyError): 89 | exp.get_value() 90 | 91 | def test_equations_are_equal_iif_same(self): 92 | a1 = BibDataString(self.bd, 'a') 93 | a2 = BibDataString(self.bd, 'a') 94 | exp = BibDataStringExpression([a1, self.bds, 'text']) 95 | self.assertEqual(exp, BibDataStringExpression([a2, self.bds, 'text'])) 96 | self.assertNotEqual(exp, BibDataStringExpression(['foo', self.bds, 'text'])) 97 | self.assertNotEqual(exp, 'foovaluetext') 98 | 99 | 100 | if __name__ == '__main__': 101 | unittest.main() 102 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexexpression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | from __future__ import unicode_literals 6 | import unittest 7 | 8 | from bibtexparser.bibtexexpression import BibtexExpression 9 | 10 | 11 | class TestBibtexExpression(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.expr = BibtexExpression() 15 | 16 | def test_minimal(self): 17 | result = self.expr.entry.parseString('@journal{key, name = 123 }') 18 | self.assertEqual(result.get('EntryType'), 'journal') 19 | self.assertEqual(result.get('Key'), 'key') 20 | self.assertEqual(result.get('Fields'), {'name': '123'}) 21 | 22 | def test_capital_type(self): 23 | result = self.expr.entry.parseString('@JOURNAL{key, name = 123 }') 24 | self.assertEqual(result.get('EntryType'), 'JOURNAL') 25 | 26 | def test_capital_key(self): 27 | result = self.expr.entry.parseString('@journal{KEY, name = 123 }') 28 | self.assertEqual(result.get('Key'), 'KEY') 29 | 30 | def test_braced(self): 31 | result = self.expr.entry.parseString('@journal{key, name = {abc} }') 32 | self.assertEqual(result.get('Fields'), {'name': 'abc'}) 33 | 34 | def test_braced_with_new_line(self): 35 | result = self.expr.entry.parseString( 36 | '@journal{key, name = {abc\ndef} }') 37 | self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'}) 38 | 39 | def test_braced_unicode(self): 40 | result = self.expr.entry.parseString( 41 | '@journal{key, name = {àbcđéf} }') 42 | self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'}) 43 | 44 | def test_quoted(self): 45 | result = self.expr.entry.parseString('@journal{key, name = "abc" }') 46 | self.assertEqual(result.get('Fields'), {'name': 'abc'}) 47 | 48 | def test_quoted_with_new_line(self): 49 | result = self.expr.entry.parseString( 50 | '@journal{key, name = "abc\ndef" }') 51 | self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'}) 52 | 53 | def test_quoted_with_unicode(self): 54 | result = self.expr.entry.parseString( 55 | '@journal{key, name = "àbcđéf" }') 56 | self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'}) 57 | 58 | def test_entry_declaration_after_space(self): 59 | self.expr.entry.parseString(' @journal{key, name = {abcd}}') 60 | 61 | def test_entry_declaration_no_key(self): 62 | with self.assertRaises(self.expr.ParseException): 63 | self.expr.entry.parseString('@misc{name = {abcd}}') 64 | 65 | def test_entry_declaration_no_key_new_line(self): 66 | with self.assertRaises(self.expr.ParseException): 67 | self.expr.entry.parseString('@misc{\n name = {abcd}}') 68 | 69 | def test_entry_declaration_no_key_comma(self): 70 | with self.assertRaises(self.expr.ParseException): 71 | self.expr.entry.parseString('@misc{, \nname = {abcd}}') 72 | 73 | def test_entry_declaration_no_key_keyvalue_without_space(self): 74 | with self.assertRaises(self.expr.ParseException): 75 | self.expr.entry.parseString('@misc{\nname=aaa}') 76 | 77 | def test_entry_declaration_key_with_whitespace(self): 78 | with self.assertRaises(self.expr.ParseException): 79 | self.expr.entry.parseString('@misc{ xx yy, \n name = aaa}') 80 | 81 | def test_string_declaration_after_space(self): 82 | self.expr.string_def.parseString(' @string{ name = {abcd}}') 83 | 84 | def test_preamble_declaration_after_space(self): 85 | self.expr.preamble_decl.parseString(' @preamble{ "blah blah " }') 86 | 87 | def test_declaration_after_space(self): 88 | keys = [] 89 | self.expr.entry.addParseAction( 90 | lambda s, l, t: keys.append(t.get('Key')) 91 | ) 92 | self.expr.main_expression.parseString(' @journal{key, name = {abcd}}') 93 | self.assertEqual(keys, ['key']) 94 | 95 | def test_declaration_after_space_and_comment(self): 96 | keys = [] 97 | self.expr.entry.addParseAction( 98 | lambda s, l, t: keys.append(t.get('Key')) 99 | ) 100 | self.expr.main_expression.parseString( 101 | '% Implicit comment\n @article{key, name={abcd}}' 102 | ) 103 | self.assertEqual(keys, ['key']) 104 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bwriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Author: Francois Boulogne 4 | # License: 5 | 6 | from __future__ import unicode_literals 7 | 8 | import unittest 9 | import os 10 | import io 11 | import sys 12 | 13 | from bibtexparser.bparser import BibTexParser 14 | from bibtexparser.bwriter import BibTexWriter, to_bibtex 15 | from bibtexparser.customization import author 16 | 17 | 18 | def _data_path(filename): 19 | return os.path.join('bibtexparser/tests/data', filename) 20 | 21 | 22 | class TestBibtexWriterList(unittest.TestCase): 23 | 24 | def test_article(self): 25 | with io.open(_data_path('article.bib'), 'r') as bibfile: 26 | bib = BibTexParser(bibfile.read()) 27 | 28 | with io.open(_data_path('article_output.bib'), 'r') as bibfile: 29 | expected = bibfile.read() 30 | result = to_bibtex(bib) 31 | self.maxDiff = None 32 | self.assertEqual(expected, result) 33 | 34 | def test_article_with_annotation(self): 35 | with io.open(_data_path('article_with_annotation.bib'), 'r') as bibfile: 36 | bib = BibTexParser(bibfile.read()) 37 | 38 | with io.open(_data_path('article_with_annotation_output.bib'), 'r') \ 39 | as bibfile: 40 | expected = bibfile.read() 41 | result = to_bibtex(bib) 42 | self.maxDiff = None 43 | self.assertEqual(expected, result) 44 | 45 | def test_book(self): 46 | with io.open(_data_path('book.bib'), 'r') as bibfile: 47 | bib = BibTexParser(bibfile.read()) 48 | 49 | with io.open(_data_path('book_output.bib'), 'r') as bibfile: 50 | expected = bibfile.read() 51 | result = to_bibtex(bib) 52 | self.maxDiff = None 53 | self.assertEqual(expected, result) 54 | 55 | def test_comma_first(self): 56 | with io.open(_data_path('book.bib'), 'r') as bibfile: 57 | bib = BibTexParser(bibfile.read()) 58 | 59 | with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: 60 | expected = bibfile.read() 61 | writer = BibTexWriter() 62 | writer.indent = ' ' 63 | writer.comma_first = True 64 | result = writer.write(bib) 65 | self.maxDiff = None 66 | self.assertEqual(expected, result) 67 | 68 | def test_multiple(self): 69 | with io.open(_data_path('multiple_entries.bib'), 'r') as bibfile: 70 | bib = BibTexParser(bibfile.read()) 71 | 72 | with io.open(_data_path('multiple_entries_output.bib'), 'r') as bibfile: 73 | expected = bibfile.read() 74 | result = to_bibtex(bib) 75 | self.maxDiff = None 76 | self.assertEqual(expected, result) 77 | 78 | def test_exception_typeerror(self): 79 | with io.open(_data_path('article.bib'), 'r') as bibfile: 80 | bib = BibTexParser(bibfile.read(), customization=author) 81 | self.assertRaises(TypeError, to_bibtex, bib) 82 | 83 | def test_with_strings(self): 84 | with io.open(_data_path('article_with_strings.bib'), 'r') as bibfile: 85 | bib = BibTexParser(bibfile.read(), common_strings=True, 86 | interpolate_strings=False) 87 | with io.open(_data_path( 88 | 'article_with_strings_output.bib'), 'r') as bibfile: 89 | expected = bibfile.read() 90 | result = to_bibtex(bib) 91 | self.maxDiff = None 92 | self.assertEqual(expected, result) 93 | 94 | def test_trailing_comma(self): 95 | with io.open(_data_path('article.bib'), 'r') as bibfile: 96 | bib = BibTexParser(bibfile.read()) 97 | 98 | with io.open(_data_path('article_trailing_comma_output.bib'), 'r') as bibfile: 99 | expected = bibfile.read() 100 | writer = BibTexWriter() 101 | writer.add_trailing_comma = True 102 | result = writer.write(bib) 103 | self.maxDiff = None 104 | self.assertEqual(expected, result) 105 | 106 | def test_comma_first_and_trailing_comma(self): 107 | with io.open(_data_path('article.bib'), 'r') as bibfile: 108 | bib = BibTexParser(bibfile.read()) 109 | 110 | with io.open(_data_path('article_comma_first_and_trailing_comma_output.bib'), 'r') as bibfile: 111 | expected = bibfile.read() 112 | writer = BibTexWriter() 113 | writer.add_trailing_comma = True 114 | writer.comma_first = True 115 | result = writer.write(bib) 116 | self.maxDiff = None 117 | self.assertEqual(expected, result) 118 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexparser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bibtexparser 3 | from bibtexparser.bparser import BibTexParser 4 | from tempfile import TemporaryFile 5 | 6 | 7 | class TestBibtexParserParserMethods(unittest.TestCase): 8 | input_file_path = 'bibtexparser/tests/data/book.bib' 9 | input_bom_file_path = 'bibtexparser/tests/data/book_bom.bib' 10 | entries_expected = [{'ENTRYTYPE': 'book', 11 | 'year': '1987', 12 | 'edition': '2', 13 | 'publisher': 'Wiley Edition', 14 | 'ID': 'Bird1987', 15 | 'volume': '1', 16 | 'title': 'Dynamics of Polymeric Liquid', 17 | 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.', 18 | }] 19 | 20 | def test_parse_immediately(self): 21 | with open(self.input_file_path) as bibtex_file: 22 | bibtex_str = bibtex_file.read() 23 | bibtex_database = BibTexParser(bibtex_str) 24 | self.assertEqual(bibtex_database.entries, self.entries_expected) 25 | 26 | def test_parse_str(self): 27 | parser = BibTexParser() 28 | with open(self.input_file_path) as bibtex_file: 29 | bibtex_str = bibtex_file.read() 30 | bibtex_database = parser.parse(bibtex_str) 31 | self.assertEqual(bibtex_database.entries, self.entries_expected) 32 | 33 | def test_parse_bom_str(self): 34 | parser = BibTexParser() 35 | with open(self.input_bom_file_path) as bibtex_file: 36 | bibtex_str = bibtex_file.read() 37 | bibtex_database = parser.parse(bibtex_str) 38 | self.assertEqual(bibtex_database.entries, self.entries_expected) 39 | 40 | def test_parse_bom_bytes(self): 41 | parser = BibTexParser() 42 | with open(self.input_bom_file_path, 'rb') as bibtex_file: 43 | bibtex_str = bibtex_file.read() 44 | bibtex_database = parser.parse(bibtex_str) 45 | self.assertEqual(bibtex_database.entries, self.entries_expected) 46 | 47 | def test_parse_file(self): 48 | parser = BibTexParser() 49 | with open(self.input_file_path) as bibtex_file: 50 | bibtex_database = parser.parse_file(bibtex_file) 51 | self.assertEqual(bibtex_database.entries, self.entries_expected) 52 | 53 | def test_parse_str_module(self): 54 | with open(self.input_file_path) as bibtex_file: 55 | bibtex_str = bibtex_file.read() 56 | bibtex_database = bibtexparser.loads(bibtex_str) 57 | self.assertEqual(bibtex_database.entries, self.entries_expected) 58 | 59 | def test_parse_file_module(self): 60 | with open(self.input_file_path) as bibtex_file: 61 | bibtex_database = bibtexparser.load(bibtex_file) 62 | self.assertEqual(bibtex_database.entries, self.entries_expected) 63 | 64 | 65 | class TestBibtexparserWriteMethods(unittest.TestCase): 66 | input_file_path = 'bibtexparser/tests/data/book.bib' 67 | expected = \ 68 | """@book{Bird1987, 69 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 70 | edition = {2}, 71 | publisher = {Wiley Edition}, 72 | title = {Dynamics of Polymeric Liquid}, 73 | volume = {1}, 74 | year = {1987} 75 | } 76 | 77 | """ 78 | 79 | def test_write_str(self): 80 | with open(self.input_file_path) as bibtex_file: 81 | bibtex_database = bibtexparser.load(bibtex_file) 82 | result = bibtexparser.dumps(bibtex_database) 83 | self.assertEqual(result, self.expected) 84 | 85 | def test_write_file(self): 86 | with open(self.input_file_path) as bibtex_file: 87 | bibtex_database = bibtexparser.load(bibtex_file) 88 | 89 | with TemporaryFile(mode='w+') as bibtex_out_file: 90 | bibtexparser.dump(bibtex_database, bibtex_out_file) 91 | bibtex_out_file.seek(0) 92 | bibtex_out_str = bibtex_out_file.read() 93 | 94 | self.assertEqual(bibtex_out_str, self.expected) 95 | 96 | class TestBibtexparserFieldNames(unittest.TestCase): 97 | input_file_path = 'bibtexparser/tests/data/fieldname.bib' 98 | entries_expected = [{'ENTRYTYPE': 'book', 99 | 'ID': 'Bird1987', 100 | 'dc.date': '2004-01' 101 | }] 102 | 103 | def test_parse_immediately(self): 104 | with open(self.input_file_path) as bibtex_file: 105 | bibtex_str = bibtex_file.read() 106 | bibtex_database = BibTexParser(bibtex_str) 107 | self.assertEqual(bibtex_database.entries, self.entries_expected) 108 | 109 | if __name__ == '__main__': 110 | unittest.main() 111 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtex_strings.py: -------------------------------------------------------------------------------- 1 | import io 2 | import unittest 3 | import codecs 4 | import bibtexparser 5 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString, 6 | BibDataStringExpression) 7 | from bibtexparser.bparser import BibTexParser 8 | from bibtexparser.bwriter import BibTexWriter 9 | from collections import OrderedDict 10 | 11 | 12 | class TestStringParse(unittest.TestCase): 13 | 14 | def test_single_string_parse_count(self): 15 | bibtex_str = '@string{name1 = "value1"}\n\n' 16 | bib_database = bibtexparser.loads(bibtex_str) 17 | self.assertEqual(len(bib_database.strings), 1) 18 | 19 | def test_multiple_string_parse_count(self): 20 | bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' 21 | bib_database = bibtexparser.loads(bibtex_str) 22 | self.assertEqual(len(bib_database.strings), 2) 23 | 24 | def test_single_string_parse(self): 25 | bibtex_str = '@string{name1 = "value1"}\n\n' 26 | bib_database = bibtexparser.loads(bibtex_str) 27 | expected = {'name1': 'value1'} 28 | self.assertEqual(bib_database.strings, expected) 29 | 30 | def test_multiple_string_parse(self): 31 | bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' 32 | bib_database = bibtexparser.loads(bibtex_str) 33 | expected = OrderedDict() 34 | expected['name1'] = 'value1' 35 | expected['name2'] = 'value2' 36 | self.assertEqual(bib_database.strings, expected) 37 | 38 | def test_string_braces(self): 39 | with codecs.open('bibtexparser/tests/data/string.bib', 'r', 'utf-8') as bibfile: 40 | bib = BibTexParser(bibfile.read()) 41 | res = bib.get_entry_list() 42 | expected = [{'author': 'Sang Kil Cha and Maverick Woo and David Brumley', 43 | 'ID': 'cha:oakland15', 44 | 'year': '2015', 45 | 'booktitle': 'Proceedings of the {IEEE} Symposium on Security and Privacy', 46 | 'title': '{Program-Adaptive Mutational Fuzzing}', 47 | 'ENTRYTYPE': 'inproceedings', 48 | 'pages': '725--741' 49 | }] 50 | self.assertEqual(res, expected) 51 | 52 | def test_string_parse_accept_chars(self): 53 | bibtex_str = '@string{pub-ieee-std = {IEEE}}\n\n@string{pub-ieee-std:adr = {New York, NY, USA}}' 54 | bib_database = bibtexparser.loads(bibtex_str) 55 | self.assertEqual(len(bib_database.strings), 2) 56 | expected = OrderedDict() 57 | expected['pub-ieee-std'] = 'IEEE' 58 | expected['pub-ieee-std:adr'] = 'New York, NY, USA' 59 | self.assertEqual(bib_database.strings, expected) 60 | 61 | 62 | class TestStringWrite(unittest.TestCase): 63 | 64 | def test_single_string_write(self): 65 | bib_database = BibDatabase() 66 | bib_database.strings['name1'] = 'value1' 67 | result = bibtexparser.dumps(bib_database) 68 | expected = '@string{name1 = {value1}}\n\n' 69 | self.assertEqual(result, expected) 70 | 71 | def test_multiple_string_write(self): 72 | bib_database = BibDatabase() 73 | bib_database.strings['name1'] = 'value1' 74 | bib_database.strings['name2'] = 'value2' # Order is important! 75 | result = bibtexparser.dumps(bib_database) 76 | expected = '@string{name1 = {value1}}\n\n@string{name2 = {value2}}\n\n' 77 | self.assertEqual(result, expected) 78 | 79 | def test_ignore_common_strings(self): 80 | bib_database = BibDatabase() 81 | bib_database.load_common_strings() 82 | result = bibtexparser.dumps(bib_database) 83 | self.assertEqual(result, '') 84 | 85 | def test_ignore_common_strings_only_if_not_overloaded(self): 86 | bib_database = BibDatabase() 87 | bib_database.load_common_strings() 88 | bib_database.strings['jan'] = 'Janvier' 89 | result = bibtexparser.dumps(bib_database) 90 | self.assertEqual(result, '@string{jan = {Janvier}}\n\n') 91 | 92 | def test_write_common_strings(self): 93 | bib_database = BibDatabase() 94 | bib_database.load_common_strings() 95 | writer = BibTexWriter(write_common_strings=True) 96 | result = bibtexparser.dumps(bib_database, writer=writer) 97 | with io.open('bibtexparser/tests/data/common_strings.bib') as f: 98 | expected = f.read() 99 | self.assertEqual(result, expected) 100 | 101 | def test_write_dependent_strings(self): 102 | bib_database = BibDatabase() 103 | bib_database.strings['title'] = 'Mr' 104 | expr = BibDataStringExpression([BibDataString(bib_database, 'title'), 'Smith']) 105 | bib_database.strings['name'] = expr 106 | result = bibtexparser.dumps(bib_database) 107 | expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n' 108 | self.assertEqual(result, expected) 109 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_customization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals 5 | import unittest 6 | 7 | from bibtexparser.customization import getnames, convert_to_unicode, homogenize_latex_encoding, page_double_hyphen, keyword, add_plaintext_fields 8 | 9 | 10 | class TestBibtexParserMethod(unittest.TestCase): 11 | 12 | ########### 13 | # getnames 14 | ########### 15 | def test_getnames(self): 16 | names = ['Foo Bar', 17 | 'Foo B. Bar', 18 | 'F. B. Bar', 19 | 'F.B. Bar', 20 | 'F. Bar', 21 | 'Jean de Savigny', 22 | 'Jean la Tour', 23 | 'Jean le Tour', 24 | 'Mike ben Akar', 25 | #'Jean de la Tour', 26 | #'Johannes Diderik van der Waals', 27 | ] 28 | result = getnames(names) 29 | expected = ['Bar, Foo', 30 | 'Bar, Foo B.', 31 | 'Bar, F. B.', 32 | 'Bar, F. B.', 33 | 'Bar, F.', 34 | 'de Savigny, Jean', 35 | 'la Tour, Jean', 36 | 'le Tour, Jean', 37 | 'ben Akar, Mike', 38 | #'de la Tour, Jean', 39 | #'van der Waals, Johannes Diderik', 40 | ] 41 | self.assertEqual(result, expected) 42 | 43 | @unittest.skip('Bug #9') 44 | def test_getnames_braces(self): 45 | names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}'] 46 | result = getnames(names) 47 | expected = ['Delgado de Molina, A.', 'Vigné, M.'] 48 | self.assertEqual(result, expected) 49 | 50 | ########### 51 | # page_double_hyphen 52 | ########### 53 | def test_page_double_hyphen_alreadyOK(self): 54 | record = {'pages': '12--24'} 55 | result = page_double_hyphen(record) 56 | expected = record 57 | self.assertEqual(result, expected) 58 | 59 | def test_page_double_hyphen_simple(self): 60 | record = {'pages': '12-24'} 61 | result = page_double_hyphen(record) 62 | expected = {'pages': '12--24'} 63 | self.assertEqual(result, expected) 64 | 65 | def test_page_double_hyphen_space(self): 66 | record = {'pages': '12 - 24'} 67 | result = page_double_hyphen(record) 68 | expected = {'pages': '12--24'} 69 | self.assertEqual(result, expected) 70 | 71 | def test_page_double_hyphen_nothing(self): 72 | record = {'pages': '12 24'} 73 | result = page_double_hyphen(record) 74 | expected = {'pages': '12 24'} 75 | self.assertEqual(result, expected) 76 | 77 | ########### 78 | # convert to unicode 79 | ########### 80 | def test_convert_to_unicode(self): 81 | record = {'toto': '{\`a} \`{a}'} 82 | result = convert_to_unicode(record) 83 | expected = {'toto': 'à à'} 84 | self.assertEqual(result, expected) 85 | record = {'toto': '{\\"u} \\"{u}'} 86 | result = convert_to_unicode(record) 87 | expected = {'toto': 'ü ü'} 88 | self.assertEqual(result, expected) 89 | # From issue 121 90 | record = {'title': '{Two Gedenk\\"uberlieferung der Angelsachsen}'} 91 | result = convert_to_unicode(record) 92 | expected = {'title': 'Two Gedenküberlieferung der Angelsachsen'} 93 | self.assertEqual(result, expected) 94 | # From issue 161 95 | record = {'title': r"p\^{a}t\'{e}"} 96 | result = convert_to_unicode(record) 97 | expected = {'title': "pâté"} 98 | self.assertEqual(result, expected) 99 | record = {'title': r"\^{i}le"} 100 | result = convert_to_unicode(record) 101 | expected = {'title': "île"} 102 | self.assertEqual(result, expected) 103 | record = {'title': r"\texttimes{}{\texttimes}\texttimes"} 104 | result = convert_to_unicode(record) 105 | expected = {'title': "×××"} 106 | self.assertEqual(result, expected) 107 | 108 | ########### 109 | # homogenize 110 | ########### 111 | def test_homogenize(self): 112 | record = {'toto': 'à {\`a} \`{a}'} 113 | result = homogenize_latex_encoding(record) 114 | expected = {'toto': '{\`a} {\`a} {\`a}'} 115 | self.assertEqual(result, expected) 116 | 117 | ########### 118 | # add_plaintext_fields 119 | ########### 120 | def test_add_plaintext_fields(self): 121 | record = { 122 | 'title': 'On-line {Recognition} of {Handwritten} {Mathematical} {Symbols}', 123 | 'foobar': ['{FFT} {Foobar}', '{foobar}'], 124 | 'foobar2': {'item1': '{FFT} {Foobar}', 'item2': '{foobar}'} 125 | } 126 | result = add_plaintext_fields(record) 127 | expected = { 128 | 'title': 'On-line {Recognition} of {Handwritten} {Mathematical} {Symbols}', 129 | 'plain_title': 'On-line Recognition of Handwritten Mathematical Symbols', 130 | 'foobar': ['{FFT} {Foobar}', '{foobar}'], 131 | 'plain_foobar': ['FFT Foobar', 'foobar'], 132 | 'foobar2': {'item1': '{FFT} {Foobar}', 'item2': '{foobar}'}, 133 | 'plain_foobar2': {'item1': 'FFT Foobar', 'item2': 'foobar'} 134 | } 135 | self.assertEqual(result, expected) 136 | 137 | ########### 138 | # keywords 139 | ########### 140 | def test_keywords(self): 141 | record = {'keyword': "a b, a b , a b;a b ; a b, a b\n"} 142 | result = keyword(record) 143 | expected = {'keyword': ['a b'] * 6} 144 | self.assertEqual(result, expected) 145 | 146 | if __name__ == '__main__': 147 | unittest.main() 148 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BibtexParser.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BibtexParser.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/BibtexParser" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BibtexParser" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_comments.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from bibtexparser.bparser import BibTexParser 3 | from bibtexparser.bwriter import to_bibtex 4 | 5 | 6 | """ The code is supposed to treat comments the following way: 7 | Each @Comment opens a comment that ends when something 8 | that is not a comment is encountered. More precisely 9 | this means a line starting with an @. Lines that are not 10 | parsed as anything else are also considered comments. 11 | If the comment starts and ends with braces, they are removed. 12 | 13 | Current issues: 14 | - a comment followed by a line starting with @smthing 15 | that is not a valid bibtex element are parsed separately, 16 | that is as two comments. 17 | - braces are either ignored or removed which is not easily 18 | predictable. 19 | """ 20 | 21 | 22 | class TestParseComment(unittest.TestCase): 23 | 24 | def test_comment_count(self): 25 | with open('bibtexparser/tests/data/features.bib') as bibfile: 26 | bib = BibTexParser(bibfile.read()) 27 | self.assertEqual(len(bib.comments), 3) 28 | 29 | def test_comment_list(self): 30 | with open('bibtexparser/tests/data/features.bib') as bibfile: 31 | bib = BibTexParser(bibfile.read()) 32 | expected = ["ignore this line!", 33 | "ignore this line too!", 34 | "and ignore this line too!"] 35 | self.assertEqual(bib.comments, expected) 36 | 37 | def test_multiline_comments(self): 38 | with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile: 39 | bib = BibTexParser(bibfile.read()) 40 | expected = [ 41 | """Lorem ipsum dolor sit amet, 42 | consectetur adipisicing elit""", 43 | """ 44 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 45 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 46 | 47 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 48 | Excepteur sint occaecat cupidatat non proident. 49 | , 50 | """, 51 | """ 52 | 53 | 54 | Sunt in culpa qui officia deserunt mollit anim id est laborum. 55 | 56 | 57 | """, 58 | "" 59 | ] 60 | self.maxDiff = None 61 | self.assertEqual(bib.comments, expected) 62 | 63 | def test_multiple_entries(self): 64 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: 65 | bparser = BibTexParser() 66 | bib = bparser.parse_file(bibfile) 67 | expected = ["", 68 | "A comment"] 69 | self.assertEqual(bib.comments, expected) 70 | 71 | def test_comments_percentage(self): 72 | with open('bibtexparser/tests/data/comments_percentage.bib', 'r') as bibfile: 73 | bib = BibTexParser(bibfile.read()) 74 | res = bib.get_entry_list() 75 | expected = [{'ENTRYTYPE': 'article', 76 | 'journal': 'Nice Journal', 77 | 'volume': '12', 78 | 'ID': 'Cesar2013', 79 | 'year': '2013', 80 | 'author': 'Jean Cesar', 81 | 'comments': 'A comment', 82 | 'keyword': 'keyword1, keyword2', 83 | 'title': 'An amazing title' 84 | }, 85 | {'ENTRYTYPE': 'article', 86 | 'journal': 'Nice Journal', 87 | 'volume': '12', 88 | 'ID': 'Baltazar2013', 89 | 'year': '2013', 90 | 'author': 'Jean Baltazar', 91 | 'comments': 'A comment', 92 | 'keyword': 'keyword1, keyword2', 93 | 'title': 'An amazing title' 94 | }] 95 | self.assertEqual(res, expected) 96 | 97 | def test_comments_percentage_nocoma(self): 98 | with open('bibtexparser/tests/data/comments_percentage_nolastcoma.bib', 'r') as bibfile: 99 | bib = BibTexParser(bibfile.read()) 100 | res = bib.get_entry_list() 101 | expected = [{'ENTRYTYPE': 'article', 102 | 'journal': 'Nice Journal', 103 | 'volume': '12', 104 | 'ID': 'Cesar2013', 105 | 'year': '2013', 106 | 'author': 'Jean Cesar', 107 | 'comments': 'A comment', 108 | 'keyword': 'keyword1, keyword2', 109 | 'title': 'An amazing title' 110 | }, 111 | {'ENTRYTYPE': 'article', 112 | 'journal': 'Nice Journal', 113 | 'volume': '12', 114 | 'ID': 'Baltazar2013', 115 | 'year': '2013', 116 | 'author': 'Jean Baltazar', 117 | 'comments': 'A comment', 118 | 'keyword': 'keyword1, keyword2', 119 | 'title': 'An amazing title' 120 | }] 121 | self.assertEqual(res, expected) 122 | 123 | def test_no_newline(self): 124 | comments = """This is a comment.""" 125 | expected = ["This is a comment."] 126 | bib = BibTexParser(comments) 127 | self.assertEqual(bib.comments, expected) 128 | 129 | def test_43(self): 130 | comment = "@STRING{foo = \"bar\"}\n" \ 131 | "This is a comment\n" \ 132 | "This is a second comment." 133 | expected = "This is a comment\nThis is a second comment." 134 | bib = BibTexParser(comment) 135 | self.assertEqual(bib.comments, [expected]) 136 | self.assertEqual(bib.strings, {'foo': 'bar'}) 137 | 138 | def test_43_bis(self): 139 | comment = "@STRING{foo = \"bar\"}\n" \ 140 | "This is a comment\n" \ 141 | "STRING{Baz = \"This should be interpreted as comment.\"}" 142 | expected = "This is a comment\n" \ 143 | "STRING{Baz = \"This should be interpreted as comment.\"}" 144 | bib = BibTexParser(comment) 145 | self.assertEqual(bib.comments, [expected]) 146 | self.assertEqual(bib.strings, {'foo': 'bar'}) 147 | 148 | 149 | class TestWriteComment(unittest.TestCase): 150 | def test_comment_write(self): 151 | with open('bibtexparser/tests/data/comments_only.bib') as bibfile: 152 | bib = BibTexParser(bibfile.read()) 153 | 154 | with open('bibtexparser/tests/data/comments_only_output.bib') as bibfile: 155 | expected = bibfile.read() 156 | result = to_bibtex(bib) 157 | self.assertEqual(result, expected) 158 | 159 | def test_multiline_comment_write(self): 160 | with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile: 161 | expected = bibfile.read() 162 | 163 | bib = BibTexParser(expected) 164 | result = to_bibtex(bib) 165 | self.assertEqual(result, expected) 166 | 167 | def test_multiple_entries(self): 168 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: 169 | bib = BibTexParser(bibfile.read()) 170 | with open('bibtexparser/tests/data/multiple_entries_and_comments_output.bib') as bibfile: 171 | expected = bibfile.read() 172 | result = to_bibtex(bib) 173 | self.assertEqual(result, expected) 174 | -------------------------------------------------------------------------------- /bibtexparser/bwriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Author: Francois Boulogne 4 | # License: 5 | 6 | 7 | import logging 8 | from bibtexparser.bibdatabase import (BibDatabase, COMMON_STRINGS, 9 | BibDataString, 10 | BibDataStringExpression) 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | __all__ = ['BibTexWriter'] 16 | 17 | 18 | def to_bibtex(parsed): 19 | """ 20 | Convenience function for backwards compatibility. 21 | """ 22 | return BibTexWriter().write(parsed) 23 | 24 | 25 | def _str_or_expr_to_bibtex(e): 26 | if isinstance(e, BibDataStringExpression): 27 | return ' # '.join([_str_or_expr_to_bibtex(s) for s in e.expr]) 28 | elif isinstance(e, BibDataString): 29 | return e.name 30 | else: 31 | return '{' + e + '}' 32 | 33 | 34 | class BibTexWriter(object): 35 | """ 36 | Writer to convert a :class:`BibDatabase` object to a string or file formatted as a BibTeX file. 37 | 38 | Example:: 39 | 40 | from bibtexparser.bwriter import BibTexWriter 41 | 42 | bib_database = ... 43 | 44 | writer = BibTexWriter() 45 | writer.contents = ['comments', 'entries'] 46 | writer.indent = ' ' 47 | writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') 48 | bibtex_str = bibtexparser.dumps(bib_database, writer) 49 | 50 | """ 51 | 52 | _valid_contents = ['entries', 'comments', 'preambles', 'strings'] 53 | 54 | def __init__(self, write_common_strings=False): 55 | #: List of BibTeX elements to write, valid values are `entries`, `comments`, `preambles`, `strings`. 56 | self.contents = ['comments', 'preambles', 'strings', 'entries'] 57 | #: Character(s) for indenting BibTeX field-value pairs. Default: single space. 58 | self.indent = ' ' 59 | #: Align values. Determines the maximal number of characters used in any fieldname and aligns all values 60 | # according to that by filling up with single spaces. Default: False 61 | self.align_values = False 62 | #: Characters(s) for separating BibTeX entries. Default: new line. 63 | self.entry_separator = '\n' 64 | #: Tuple of fields for ordering BibTeX entries. Set to `None` to disable sorting. Default: BibTeX key `('ID', )`. 65 | self.order_entries_by = ('ID', ) 66 | #: Tuple of fields for display order in a single BibTeX entry. Fields not listed here will be displayed 67 | #: alphabetically at the end. Set to '[]' for alphabetical order. Default: '[]' 68 | self.display_order = [] 69 | #: BibTeX syntax allows comma first syntax 70 | #: (common in functional languages), use this to enable 71 | #: comma first syntax as the bwriter output 72 | self.comma_first = False 73 | #: BibTeX syntax allows the comma to be optional at the end of the last field in an entry. 74 | #: Use this to enable writing this last comma in the bwriter output. Defaults: False. 75 | self.add_trailing_comma = False 76 | #: internal variable used if self.align_values = True 77 | self._max_field_width = 0 78 | #: Whether common strings are written 79 | self.common_strings = write_common_strings 80 | 81 | def write(self, bib_database): 82 | """ 83 | Converts a bibliographic database to a BibTeX-formatted string. 84 | 85 | :param bib_database: bibliographic database to be converted to a BibTeX string 86 | :type bib_database: BibDatabase 87 | :return: BibTeX-formatted string 88 | :rtype: str or unicode 89 | """ 90 | bibtex = '' 91 | for content in self.contents: 92 | try: 93 | # Add each element set (entries, comments) 94 | bibtex += getattr(self, '_' + content + '_to_bibtex')(bib_database) 95 | except AttributeError: 96 | logger.warning("BibTeX item '{}' does not exist and will not be written. Valid items are {}." 97 | .format(content, self._valid_contents)) 98 | return bibtex 99 | 100 | def _entries_to_bibtex(self, bib_database): 101 | bibtex = '' 102 | if self.order_entries_by: 103 | # TODO: allow sort field does not exist for entry 104 | entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by)) 105 | else: 106 | entries = bib_database.entries 107 | 108 | if self.align_values: 109 | # determine maximum field width to be used 110 | widths = [max(map(len, entry.keys())) for entry in entries] 111 | self._max_field_width = max(widths) 112 | 113 | for entry in entries: 114 | bibtex += self._entry_to_bibtex(entry) 115 | return bibtex 116 | 117 | def _entry_to_bibtex(self, entry): 118 | bibtex = '' 119 | # Write BibTeX key 120 | bibtex += '@' + entry['ENTRYTYPE'] + '{' + entry['ID'] 121 | 122 | # create display_order of fields for this entry 123 | # first those keys which are both in self.display_order and in entry.keys 124 | display_order = [i for i in self.display_order if i in entry] 125 | # then all the other fields sorted alphabetically 126 | display_order += [i for i in sorted(entry) if i not in self.display_order] 127 | if self.comma_first: 128 | field_fmt = u"\n{indent}, {field:<{field_max_w}} = {value}" 129 | else: 130 | field_fmt = u",\n{indent}{field:<{field_max_w}} = {value}" 131 | # Write field = value lines 132 | for field in [i for i in display_order if i not in ['ENTRYTYPE', 'ID']]: 133 | try: 134 | bibtex += field_fmt.format( 135 | indent=self.indent, 136 | field=field, 137 | field_max_w=self._max_field_width, 138 | value=_str_or_expr_to_bibtex(entry[field])) 139 | except TypeError: 140 | raise TypeError(u"The field %s in entry %s must be a string" 141 | % (field, entry['ID'])) 142 | if self.add_trailing_comma: 143 | if self.comma_first: 144 | bibtex += '\n'+self.indent+',' 145 | else: 146 | bibtex += ',' 147 | bibtex += "\n}\n" + self.entry_separator 148 | return bibtex 149 | 150 | def _comments_to_bibtex(self, bib_database): 151 | return ''.join(['@comment{{{0}}}\n{1}'.format(comment, self.entry_separator) 152 | for comment in bib_database.comments]) 153 | 154 | def _preambles_to_bibtex(self, bib_database): 155 | return ''.join(['@preamble{{"{0}"}}\n{1}'.format(preamble, self.entry_separator) 156 | for preamble in bib_database.preambles]) 157 | 158 | def _strings_to_bibtex(self, bib_database): 159 | return ''.join([ 160 | u'@string{{{name} = {value}}}\n{sep}'.format( 161 | name=name, 162 | value=_str_or_expr_to_bibtex(value), 163 | sep=self.entry_separator) 164 | for name, value in bib_database.strings.items() 165 | if (self.common_strings or 166 | name not in COMMON_STRINGS or # user defined string 167 | value != COMMON_STRINGS[name] # string has been updated 168 | )]) 169 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # BibtexParser documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Aug 1 13:30:23 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys, os 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | sys.path.insert(0, os.path.abspath('../..')) 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | #needs_sphinx = '1.0' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode'] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | #source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = 'BibtexParser' 45 | copyright = '2013-2016, F. Boulogne and other contributors' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | try: 52 | import bibtexparser as bp 53 | # The short X.Y version. 54 | version = bp.__version__ 55 | # The full version, including alpha/beta/rc tags. 56 | release = bp.__version__ 57 | except ImportError: 58 | version = 'latest' 59 | release = 'latest' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = [] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all documents. 76 | #default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | #add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | #add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | #show_authors = False 88 | 89 | # The name of the Pygments (syntax highlighting) style to use. 90 | pygments_style = 'sphinx' 91 | 92 | # A list of ignored prefixes for module index sorting. 93 | #modindex_common_prefix = [] 94 | 95 | # -- Options for HTML output --------------------------------------------------- 96 | 97 | # The theme to use for HTML and HTML Help pages. See the documentation for 98 | # a list of builtin themes. 99 | html_theme = 'default' 100 | 101 | # Theme options are theme-specific and customize the look and feel of a theme 102 | # further. For a list of options available for each theme, see the 103 | # documentation. 104 | #html_theme_options = {} 105 | 106 | # Add any paths that contain custom themes here, relative to this directory. 107 | #html_theme_path = [] 108 | 109 | # The name for this set of Sphinx documents. If None, it defaults to 110 | # " v documentation". 111 | #html_title = None 112 | 113 | # A shorter title for the navigation bar. Default is the same as html_title. 114 | #html_short_title = None 115 | 116 | # The name of an image file (relative to this directory) to place at the top 117 | # of the sidebar. 118 | #html_logo = None 119 | 120 | # The name of an image file (within the static path) to use as favicon of the 121 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 122 | # pixels large. 123 | #html_favicon = None 124 | 125 | # Add any paths that contain custom static files (such as style sheets) here, 126 | # relative to this directory. They are copied after the builtin static files, 127 | # so a file named "default.css" will overwrite the builtin "default.css". 128 | #html_static_path = ['_static'] 129 | 130 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 131 | # using the given strftime format. 132 | #html_last_updated_fmt = '%b %d, %Y' 133 | 134 | # If true, SmartyPants will be used to convert quotes and dashes to 135 | # typographically correct entities. 136 | #html_use_smartypants = True 137 | 138 | # Custom sidebar templates, maps document names to template names. 139 | #html_sidebars = {} 140 | 141 | # Additional templates that should be rendered to pages, maps page names to 142 | # template names. 143 | #html_additional_pages = {} 144 | 145 | # If false, no module index is generated. 146 | #html_domain_indices = True 147 | 148 | # If false, no index is generated. 149 | #html_use_index = True 150 | 151 | # If true, the index is split into individual pages for each letter. 152 | #html_split_index = False 153 | 154 | # If true, links to the reST sources are added to the pages. 155 | #html_show_sourcelink = True 156 | 157 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 158 | #html_show_sphinx = True 159 | 160 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 161 | #html_show_copyright = True 162 | 163 | # If true, an OpenSearch description file will be output, and all pages will 164 | # contain a tag referring to it. The value of this option must be the 165 | # base URL from which the finished HTML is served. 166 | #html_use_opensearch = '' 167 | 168 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 169 | #html_file_suffix = None 170 | 171 | # Output file base name for HTML help builder. 172 | htmlhelp_basename = 'BibtexParserdoc' 173 | 174 | 175 | # -- Options for LaTeX output -------------------------------------------------- 176 | 177 | latex_elements = { 178 | # The paper size ('letterpaper' or 'a4paper'). 179 | #'papersize': 'letterpaper', 180 | 181 | # The font size ('10pt', '11pt' or '12pt'). 182 | #'pointsize': '10pt', 183 | 184 | # Additional stuff for the LaTeX preamble. 185 | #'preamble': '', 186 | } 187 | 188 | # Grouping the document tree into LaTeX files. List of tuples 189 | # (source start file, target name, title, author, documentclass [howto/manual]). 190 | latex_documents = [ 191 | ('index', 'BibtexParser.tex', 'BibtexParser Documentation', 192 | 'F. Boulogne', 'manual'), 193 | ] 194 | 195 | # The name of an image file (relative to this directory) to place at the top of 196 | # the title page. 197 | #latex_logo = None 198 | 199 | # For "manual" documents, if this is true, then toplevel headings are parts, 200 | # not chapters. 201 | #latex_use_parts = False 202 | 203 | # If true, show page references after internal links. 204 | #latex_show_pagerefs = False 205 | 206 | # If true, show URL addresses after external links. 207 | #latex_show_urls = False 208 | 209 | # Documents to append as an appendix to all manuals. 210 | #latex_appendices = [] 211 | 212 | # If false, no module index is generated. 213 | #latex_domain_indices = True 214 | 215 | 216 | # -- Options for manual page output -------------------------------------------- 217 | 218 | # One entry per manual page. List of tuples 219 | # (source start file, name, description, authors, manual section). 220 | man_pages = [ 221 | ('index', 'bibtexparser', 'BibtexParser Documentation', 222 | ['F. Boulogne'], 1) 223 | ] 224 | 225 | # If true, show URL addresses after external links. 226 | #man_show_urls = False 227 | 228 | 229 | # -- Options for Texinfo output ------------------------------------------------ 230 | 231 | # Grouping the document tree into Texinfo files. List of tuples 232 | # (source start file, target name, title, author, 233 | # dir menu entry, description, category) 234 | texinfo_documents = [ 235 | ('index', 'BibtexParser', 'BibtexParser Documentation', 236 | 'F. Boulogne', 'BibtexParser', 'One line description of project.', 237 | 'Miscellaneous'), 238 | ] 239 | 240 | # Documents to append as an appendix to all manuals. 241 | #texinfo_appendices = [] 242 | 243 | # If false, no module index is generated. 244 | #texinfo_domain_indices = True 245 | 246 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 247 | #texinfo_show_urls = 'footnote' 248 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexwriter.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import tempfile 3 | import unittest 4 | import bibtexparser 5 | from bibtexparser.bwriter import BibTexWriter 6 | from bibtexparser.bibdatabase import BibDatabase 7 | 8 | 9 | class TestBibTexWriter(unittest.TestCase): 10 | def test_content_entries_only(self): 11 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 12 | bib_database = bibtexparser.load(bibtex_file) 13 | writer = BibTexWriter() 14 | writer.contents = ['entries'] 15 | result = bibtexparser.dumps(bib_database, writer) 16 | expected = \ 17 | """@book{Toto3000, 18 | author = {Toto, A and Titi, B}, 19 | title = {A title} 20 | } 21 | 22 | @article{Wigner1938, 23 | author = {Wigner, E.}, 24 | doi = {10.1039/TF9383400029}, 25 | issn = {0014-7672}, 26 | journal = {Trans. Faraday Soc.}, 27 | owner = {fr}, 28 | pages = {29--41}, 29 | publisher = {The Royal Society of Chemistry}, 30 | title = {The transition state method}, 31 | volume = {34}, 32 | year = {1938} 33 | } 34 | 35 | @book{Yablon2005, 36 | author = {Yablon, A.D.}, 37 | publisher = {Springer}, 38 | title = {Optical fiber fusion slicing}, 39 | year = {2005} 40 | } 41 | 42 | """ 43 | self.assertEqual(result, expected) 44 | 45 | def test_content_comment_only(self): 46 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 47 | bib_database = bibtexparser.load(bibtex_file) 48 | writer = BibTexWriter() 49 | writer.contents = ['comments'] 50 | result = bibtexparser.dumps(bib_database, writer) 51 | expected = \ 52 | """@comment{} 53 | 54 | @comment{A comment} 55 | 56 | """ 57 | self.assertEqual(result, expected) 58 | 59 | def test_indent(self): 60 | bib_database = BibDatabase() 61 | bib_database.entries = [{'ID': 'abc123', 62 | 'ENTRYTYPE': 'book', 63 | 'author': 'test'}] 64 | writer = BibTexWriter() 65 | writer.indent = ' ' 66 | result = bibtexparser.dumps(bib_database, writer) 67 | expected = \ 68 | """@book{abc123, 69 | author = {test} 70 | } 71 | 72 | """ 73 | self.assertEqual(result, expected) 74 | 75 | def test_align(self): 76 | bib_database = BibDatabase() 77 | bib_database.entries = [{'ID': 'abc123', 78 | 'ENTRYTYPE': 'book', 79 | 'author': 'test', 80 | 'thisisaverylongkey': 'longvalue'}] 81 | writer = BibTexWriter() 82 | writer.align_values = True 83 | result = bibtexparser.dumps(bib_database, writer) 84 | expected = \ 85 | """@book{abc123, 86 | author = {test}, 87 | thisisaverylongkey = {longvalue} 88 | } 89 | 90 | """ 91 | self.assertEqual(result, expected) 92 | 93 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 94 | bib_database = bibtexparser.load(bibtex_file) 95 | writer = BibTexWriter() 96 | writer.contents = ['entries'] 97 | writer.align_values = True 98 | result = bibtexparser.dumps(bib_database, writer) 99 | expected = \ 100 | """@book{Toto3000, 101 | author = {Toto, A and Titi, B}, 102 | title = {A title} 103 | } 104 | 105 | @article{Wigner1938, 106 | author = {Wigner, E.}, 107 | doi = {10.1039/TF9383400029}, 108 | issn = {0014-7672}, 109 | journal = {Trans. Faraday Soc.}, 110 | owner = {fr}, 111 | pages = {29--41}, 112 | publisher = {The Royal Society of Chemistry}, 113 | title = {The transition state method}, 114 | volume = {34}, 115 | year = {1938} 116 | } 117 | 118 | @book{Yablon2005, 119 | author = {Yablon, A.D.}, 120 | publisher = {Springer}, 121 | title = {Optical fiber fusion slicing}, 122 | year = {2005} 123 | } 124 | 125 | """ 126 | self.assertEqual(result, expected) 127 | 128 | 129 | def test_entry_separator(self): 130 | bib_database = BibDatabase() 131 | bib_database.entries = [{'ID': 'abc123', 132 | 'ENTRYTYPE': 'book', 133 | 'author': 'test'}] 134 | writer = BibTexWriter() 135 | writer.entry_separator = '' 136 | result = bibtexparser.dumps(bib_database, writer) 137 | expected = \ 138 | """@book{abc123, 139 | author = {test} 140 | } 141 | """ 142 | self.assertEqual(result, expected) 143 | 144 | def test_display_order(self): 145 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 146 | bib_database = bibtexparser.load(bibtex_file) 147 | writer = BibTexWriter() 148 | writer.contents = ['entries'] 149 | writer.display_order = ['year', 'publisher', 'title'] 150 | result = bibtexparser.dumps(bib_database, writer) 151 | expected = \ 152 | """@book{Toto3000, 153 | title = {A title}, 154 | author = {Toto, A and Titi, B} 155 | } 156 | 157 | @article{Wigner1938, 158 | year = {1938}, 159 | publisher = {The Royal Society of Chemistry}, 160 | title = {The transition state method}, 161 | author = {Wigner, E.}, 162 | doi = {10.1039/TF9383400029}, 163 | issn = {0014-7672}, 164 | journal = {Trans. Faraday Soc.}, 165 | owner = {fr}, 166 | pages = {29--41}, 167 | volume = {34} 168 | } 169 | 170 | @book{Yablon2005, 171 | year = {2005}, 172 | publisher = {Springer}, 173 | title = {Optical fiber fusion slicing}, 174 | author = {Yablon, A.D.} 175 | } 176 | 177 | """ 178 | self.assertEqual(result, expected) 179 | 180 | 181 | class TestEntrySorting(unittest.TestCase): 182 | bib_database = BibDatabase() 183 | bib_database.entries = [{'ID': 'b', 184 | 'ENTRYTYPE': 'article'}, 185 | {'ID': 'c', 186 | 'ENTRYTYPE': 'book'}, 187 | {'ID': 'a', 188 | 'ENTRYTYPE': 'book'}] 189 | 190 | def test_sort_default(self): 191 | result = bibtexparser.dumps(self.bib_database) 192 | expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" 193 | self.assertEqual(result, expected) 194 | 195 | def test_sort_none(self): 196 | writer = BibTexWriter() 197 | writer.order_entries_by = None 198 | result = bibtexparser.dumps(self.bib_database, writer) 199 | expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" 200 | self.assertEqual(result, expected) 201 | 202 | def test_sort_id(self): 203 | writer = BibTexWriter() 204 | writer.order_entries_by = ('ID', ) 205 | result = bibtexparser.dumps(self.bib_database, writer) 206 | expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" 207 | self.assertEqual(result, expected) 208 | 209 | def test_sort_type(self): 210 | writer = BibTexWriter() 211 | writer.order_entries_by = ('ENTRYTYPE', ) 212 | result = bibtexparser.dumps(self.bib_database, writer) 213 | expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" 214 | self.assertEqual(result, expected) 215 | 216 | def test_sort_type_id(self): 217 | writer = BibTexWriter() 218 | writer.order_entries_by = ('ENTRYTYPE', 'ID') 219 | result = bibtexparser.dumps(self.bib_database, writer) 220 | expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n" 221 | self.assertEqual(result, expected) 222 | 223 | def test_sort_missing_field(self): 224 | bib_database = BibDatabase() 225 | bib_database.entries = [{'ID': 'b', 226 | 'ENTRYTYPE': 'article', 227 | 'year': '2000'}, 228 | {'ID': 'c', 229 | 'ENTRYTYPE': 'book', 230 | 'year': '2010'}, 231 | {'ID': 'a', 232 | 'ENTRYTYPE': 'book'}] 233 | writer = BibTexWriter() 234 | writer.order_entries_by = ('year', ) 235 | result = bibtexparser.dumps(bib_database, writer) 236 | expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" 237 | self.assertEqual(result, expected) 238 | 239 | def test_unicode_problems(self): 240 | # See #51 241 | bibtex = """ 242 | @article{Mesa-Gresa2013, 243 | abstract = {During a 4-week period half the mice (n = 16) were exposed to EE and the other half (n = 16) remained in a standard environment (SE). Aggr. Behav. 9999:XX-XX, 2013. © 2013 Wiley Periodicals, Inc.}, 244 | author = {Mesa-Gresa, Patricia and P\'{e}rez-Martinez, Asunci\'{o}n and Redolat, Rosa}, 245 | doi = {10.1002/ab.21481}, 246 | file = {:Users/jscholz/Documents/mendeley/Mesa-Gresa, P\'{e}rez-Martinez, Redolat - 2013 - Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior.pdf:pdf}, 247 | issn = {1098-2337}, 248 | journal = {Aggressive behavior}, 249 | month = "apr", 250 | number = {April}, 251 | pages = {269--279}, 252 | pmid = {23588702}, 253 | title = {{Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior in Male Mice.}}, 254 | url = {http://www.ncbi.nlm.nih.gov/pubmed/23588702}, 255 | volume = {39}, 256 | year = {2013} 257 | } 258 | """ 259 | bibdb = bibtexparser.loads(bibtex) 260 | with tempfile.TemporaryFile(mode='w+') as bibtex_file: 261 | bibtexparser.dump(bibdb, bibtex_file) 262 | # No exception should be raised 263 | 264 | -------------------------------------------------------------------------------- /bibtexparser/bibdatabase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import OrderedDict 5 | import sys 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | if sys.version_info >= (3, 0): 11 | ustr = str 12 | else: 13 | ustr = unicode 14 | 15 | 16 | STANDARD_TYPES = set([ 17 | 'article', 18 | 'book', 19 | 'booklet', 20 | 'conference', 21 | 'inbook', 22 | 'incollection', 23 | 'inproceedings', 24 | 'manual', 25 | 'mastersthesis', 26 | 'misc', 27 | 'phdthesis', 28 | 'proceedings', 29 | 'techreport', 30 | 'unpublished']) 31 | 32 | COMMON_STRINGS = OrderedDict([ 33 | ('jan', 'January'), 34 | ('feb', 'February'), 35 | ('mar', 'March'), 36 | ('apr', 'April'), 37 | ('may', 'May'), 38 | ('jun', 'June'), 39 | ('jul', 'July'), 40 | ('aug', 'August'), 41 | ('sep', 'September'), 42 | ('oct', 'October'), 43 | ('nov', 'November'), 44 | ('dec', 'December'), 45 | ]) 46 | 47 | 48 | class UndefinedString(KeyError): 49 | pass 50 | 51 | 52 | class BibDatabase(object): 53 | """ 54 | Bibliographic database object that follows the data structure of a BibTeX file. 55 | """ 56 | 57 | def __init__(self): 58 | #: List of BibTeX entries, for example `@book{...}`, `@article{...}`, etc. Each entry is a simple dict with 59 | #: BibTeX field-value pairs, for example `'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'` Each 60 | #: entry will always have the following dict keys (in addition to other BibTeX fields): 61 | #: 62 | #: * `ID` (BibTeX key) 63 | #: * `ENTRYTYPE` (entry type in lowercase, e.g. `book`, `article` etc.) 64 | self.entries = [] 65 | self._entries_dict = {} 66 | #: List of BibTeX comment (`@comment{...}`) blocks. 67 | self.comments = [] 68 | #: OrderedDict of BibTeX string definitions (`@string{...}`). In order of definition. 69 | self.strings = OrderedDict() # Not sure if order is import, keep order just in case 70 | #: List of BibTeX preamble (`@preamble{...}`) blocks. 71 | self.preambles = [] 72 | 73 | #: List of fields that should not be updated when resolving crossrefs 74 | self._not_updated_by_crossref = ['_FROM_CROSSREF'] 75 | 76 | def load_common_strings(self): 77 | self.strings.update(COMMON_STRINGS) 78 | 79 | def get_entry_list(self): 80 | """Get a list of bibtex entries. 81 | 82 | :returns: BibTeX entries 83 | :rtype: list 84 | .. deprecated:: 0.5.6 85 | Use :attr:`entries` instead. 86 | """ 87 | return self.entries 88 | 89 | @staticmethod 90 | def entry_sort_key(entry, fields): 91 | result = [] 92 | for field in fields: 93 | result.append(ustr(entry.get(field, '')).lower()) # Sorting always as string 94 | return tuple(result) 95 | 96 | def _make_entries_dict(self): 97 | for entry in self.entries: 98 | self._entries_dict[entry['ID']] = entry 99 | 100 | def get_entry_dict(self): 101 | """Return a dictionary of BibTeX entries. 102 | The dict key is the BibTeX entry key 103 | """ 104 | # If the hash has never been made, make it 105 | if not self._entries_dict: 106 | self._make_entries_dict() 107 | return self._entries_dict 108 | 109 | entries_dict = property(get_entry_dict) 110 | 111 | def expand_string(self, name): 112 | try: 113 | return BibDataStringExpression.expand_if_expression( 114 | self.strings[name]) 115 | except KeyError: 116 | raise(UndefinedString(name)) 117 | 118 | def _add_missing_from_crossref_entry(self, entry, dependencies=set()): 119 | if entry['ID'] in self._crossref_updated: 120 | return 121 | 122 | if entry['_crossref'] not in self.entries_dict: 123 | logger.error("Crossref reference %s for %s is missing.", 124 | entry['_crossref'], 125 | entry['ID']) 126 | return 127 | 128 | if entry['_crossref'] in dependencies: 129 | logger.error("Circular crossref dependency: %s->%s->%s.", 130 | "->".join(dependencies), 131 | entry['ID'], 132 | entry['_crossref']) 133 | return 134 | 135 | crossref_entry = self.entries_dict[entry['_crossref']] 136 | if '_crossref' in crossref_entry: 137 | dependencies.add(entry['ID']) 138 | self._add_missing_from_crossref_entry(crossref_entry, dependencies) 139 | dependencies.remove(entry['ID']) 140 | 141 | from_crossref = {bibfield: bibvalue 142 | for (bibfield, bibvalue) in crossref_entry.items() 143 | if bibfield not in entry.keys() and 144 | bibfield not in self._not_updated_by_crossref} 145 | 146 | entry.update(from_crossref) 147 | 148 | self._crossref_updated.append(entry['ID']) 149 | entry['_FROM_CROSSREF'] = sorted(from_crossref.keys()) 150 | del entry['_crossref'] 151 | 152 | def add_missing_from_crossref(self): 153 | """Resolve crossrefs and update entries accordingly. 154 | """ 155 | self._crossref_updated = [] 156 | for entry in self.entries: 157 | if "_crossref" in entry: 158 | self._add_missing_from_crossref_entry(entry) 159 | 160 | 161 | class BibDataString(object): 162 | """ 163 | Represents a bibtex string. 164 | 165 | This object enables maintaining string expressions as list of strings 166 | and BibDataString. Can be interpolated from Bibdatabase. 167 | """ 168 | 169 | def __init__(self, bibdatabase, name): 170 | self._bibdatabase = bibdatabase 171 | self.name = name.lower() 172 | 173 | def __eq__(self, other): 174 | return isinstance(other, BibDataString) and self.name == other.name 175 | 176 | def __repr__(self): 177 | return "BibDataString({})".format(self.name.__repr__()) 178 | 179 | def get_value(self): 180 | """ 181 | Query value from string name. 182 | 183 | :returns: string 184 | """ 185 | return self._bibdatabase.expand_string(self.name) 186 | 187 | def get_dependencies(self, known_dependencies=set()): 188 | """Recursively tracks strings on which the expression depends. 189 | 190 | :param kown_dependencies: dependencies to ignore 191 | """ 192 | raise NotImplementedError 193 | 194 | @staticmethod 195 | def expand_string(string_or_bibdatastring): 196 | """ 197 | Eventually replaces a bibdatastring by its value. 198 | 199 | :param string_or_bibdatastring: the parsed token 200 | :type string_expr: string or BibDataString 201 | :returns: string 202 | """ 203 | if isinstance(string_or_bibdatastring, BibDataString): 204 | return string_or_bibdatastring.get_value() 205 | else: 206 | return string_or_bibdatastring 207 | 208 | 209 | class BibDataStringExpression(object): 210 | """ 211 | Represents a bibtex string expression. 212 | 213 | String expressions are sequences of regular strings and bibtex strings. 214 | This object enables maintaining string expressions as list of strings. 215 | The expression are represented as lists of regular strings and 216 | BibDataStrings. They can be interpolated from Bibdatabase. 217 | 218 | BibDataStringExpression(e) 219 | 220 | :param e: list of strings and BibDataStrings 221 | """ 222 | 223 | def __init__(self, expression): 224 | self.expr = expression 225 | 226 | def __eq__(self, other): 227 | return isinstance(other, BibDataStringExpression) and self.expr == other.expr 228 | 229 | def __repr__(self): 230 | return "BibDataStringExpression({})".format(self.expr.__repr__()) 231 | 232 | def get_value(self): 233 | """ 234 | Replaces bibdatastrings by their values in the expression. 235 | 236 | :returns: string 237 | """ 238 | return ''.join([BibDataString.expand_string(s) for s in self.expr]) 239 | 240 | def apply_on_strings(self, fun): 241 | """ 242 | Maps a function on strings in expression, keeping unchanged 243 | BibDataStrings. 244 | 245 | :param fun: function from strings to strings 246 | """ 247 | self.expr = [s if isinstance(s, BibDataString) else fun(s) 248 | for s in self.expr] 249 | 250 | @staticmethod 251 | def expand_if_expression(string_or_expression): 252 | """ 253 | Eventually replaces a BibDataStringExpression by its value. 254 | 255 | :param string_or_expression: the object to expand 256 | :type string_expr: string or BibDataStringExpression 257 | :returns: string 258 | """ 259 | if isinstance(string_or_expression, BibDataStringExpression): 260 | return string_or_expression.get_value() 261 | else: 262 | return string_or_expression 263 | 264 | @staticmethod 265 | def expression_if_needed(tokens): 266 | """Build expression only if tokens are not a regular value. 267 | """ 268 | if len(tokens) == 1 and not isinstance(tokens[0], BibDataString): 269 | return tokens[0] 270 | else: 271 | return BibDataStringExpression(tokens) 272 | 273 | 274 | def as_text(text_string_or_expression): 275 | if isinstance(text_string_or_expression, 276 | (BibDataString, BibDataStringExpression)): 277 | return text_string_or_expression.get_value() 278 | else: 279 | return ustr(text_string_or_expression) 280 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2018, François Boulogne and the python-bibtexparser contributors 2 | All rights reserved. 3 | 4 | 5 | The code is distributed under a dual license (at your choice). 6 | 7 | ##################################################################### 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | (1) Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 15 | (2) Redistributions in binary form must reproduce the above copyright 16 | notice, this list of conditions and the following disclaimer in 17 | the documentation and/or other materials provided with the 18 | distribution. 19 | 20 | (3)The name of the author may not be used to 21 | endorse or promote products derived from this software without 22 | specific prior written permission. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 | POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ##################################################################### 37 | GNU LESSER GENERAL PUBLIC LICENSE 38 | Version 3, 29 June 2007 39 | 40 | Copyright (C) 2007 Free Software Foundation, Inc. 41 | Everyone is permitted to copy and distribute verbatim copies 42 | of this license document, but changing it is not allowed. 43 | 44 | 45 | This version of the GNU Lesser General Public License incorporates 46 | the terms and conditions of version 3 of the GNU General Public 47 | License, supplemented by the additional permissions listed below. 48 | 49 | 0. Additional Definitions. 50 | 51 | As used herein, "this License" refers to version 3 of the GNU Lesser 52 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 53 | General Public License. 54 | 55 | "The Library" refers to a covered work governed by this License, 56 | other than an Application or a Combined Work as defined below. 57 | 58 | An "Application" is any work that makes use of an interface provided 59 | by the Library, but which is not otherwise based on the Library. 60 | Defining a subclass of a class defined by the Library is deemed a mode 61 | of using an interface provided by the Library. 62 | 63 | A "Combined Work" is a work produced by combining or linking an 64 | Application with the Library. The particular version of the Library 65 | with which the Combined Work was made is also called the "Linked 66 | Version". 67 | 68 | The "Minimal Corresponding Source" for a Combined Work means the 69 | Corresponding Source for the Combined Work, excluding any source code 70 | for portions of the Combined Work that, considered in isolation, are 71 | based on the Application, and not on the Linked Version. 72 | 73 | The "Corresponding Application Code" for a Combined Work means the 74 | object code and/or source code for the Application, including any data 75 | and utility programs needed for reproducing the Combined Work from the 76 | Application, but excluding the System Libraries of the Combined Work. 77 | 78 | 1. Exception to Section 3 of the GNU GPL. 79 | 80 | You may convey a covered work under sections 3 and 4 of this License 81 | without being bound by section 3 of the GNU GPL. 82 | 83 | 2. Conveying Modified Versions. 84 | 85 | If you modify a copy of the Library, and, in your modifications, a 86 | facility refers to a function or data to be supplied by an Application 87 | that uses the facility (other than as an argument passed when the 88 | facility is invoked), then you may convey a copy of the modified 89 | version: 90 | 91 | a) under this License, provided that you make a good faith effort to 92 | ensure that, in the event an Application does not supply the 93 | function or data, the facility still operates, and performs 94 | whatever part of its purpose remains meaningful, or 95 | 96 | b) under the GNU GPL, with none of the additional permissions of 97 | this License applicable to that copy. 98 | 99 | 3. Object Code Incorporating Material from Library Header Files. 100 | 101 | The object code form of an Application may incorporate material from 102 | a header file that is part of the Library. You may convey such object 103 | code under terms of your choice, provided that, if the incorporated 104 | material is not limited to numerical parameters, data structure 105 | layouts and accessors, or small macros, inline functions and templates 106 | (ten or fewer lines in length), you do both of the following: 107 | 108 | a) Give prominent notice with each copy of the object code that the 109 | Library is used in it and that the Library and its use are 110 | covered by this License. 111 | 112 | b) Accompany the object code with a copy of the GNU GPL and this license 113 | document. 114 | 115 | 4. Combined Works. 116 | 117 | You may convey a Combined Work under terms of your choice that, 118 | taken together, effectively do not restrict modification of the 119 | portions of the Library contained in the Combined Work and reverse 120 | engineering for debugging such modifications, if you also do each of 121 | the following: 122 | 123 | a) Give prominent notice with each copy of the Combined Work that 124 | the Library is used in it and that the Library and its use are 125 | covered by this License. 126 | 127 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 128 | document. 129 | 130 | c) For a Combined Work that displays copyright notices during 131 | execution, include the copyright notice for the Library among 132 | these notices, as well as a reference directing the user to the 133 | copies of the GNU GPL and this license document. 134 | 135 | d) Do one of the following: 136 | 137 | 0) Convey the Minimal Corresponding Source under the terms of this 138 | License, and the Corresponding Application Code in a form 139 | suitable for, and under terms that permit, the user to 140 | recombine or relink the Application with a modified version of 141 | the Linked Version to produce a modified Combined Work, in the 142 | manner specified by section 6 of the GNU GPL for conveying 143 | Corresponding Source. 144 | 145 | 1) Use a suitable shared library mechanism for linking with the 146 | Library. A suitable mechanism is one that (a) uses at run time 147 | a copy of the Library already present on the user's computer 148 | system, and (b) will operate properly with a modified version 149 | of the Library that is interface-compatible with the Linked 150 | Version. 151 | 152 | e) Provide Installation Information, but only if you would otherwise 153 | be required to provide such information under section 6 of the 154 | GNU GPL, and only to the extent that such information is 155 | necessary to install and execute a modified version of the 156 | Combined Work produced by recombining or relinking the 157 | Application with a modified version of the Linked Version. (If 158 | you use option 4d0, the Installation Information must accompany 159 | the Minimal Corresponding Source and Corresponding Application 160 | Code. If you use option 4d1, you must provide the Installation 161 | Information in the manner specified by section 6 of the GNU GPL 162 | for conveying Corresponding Source.) 163 | 164 | 5. Combined Libraries. 165 | 166 | You may place library facilities that are a work based on the 167 | Library side by side in a single library together with other library 168 | facilities that are not Applications and are not covered by this 169 | License, and convey such a combined library under terms of your 170 | choice, if you do both of the following: 171 | 172 | a) Accompany the combined library with a copy of the same work based 173 | on the Library, uncombined with any other library facilities, 174 | conveyed under the terms of this License. 175 | 176 | b) Give prominent notice with the combined library that part of it 177 | is a work based on the Library, and explaining where to find the 178 | accompanying uncombined form of the same work. 179 | 180 | 6. Revised Versions of the GNU Lesser General Public License. 181 | 182 | The Free Software Foundation may publish revised and/or new versions 183 | of the GNU Lesser General Public License from time to time. Such new 184 | versions will be similar in spirit to the present version, but may 185 | differ in detail to address new problems or concerns. 186 | 187 | Each version is given a distinguishing version number. If the 188 | Library as you received it specifies that a certain numbered version 189 | of the GNU Lesser General Public License "or any later version" 190 | applies to it, you have the option of following the terms and 191 | conditions either of that published version or of any later version 192 | published by the Free Software Foundation. If the Library as you 193 | received it does not specify a version number of the GNU Lesser 194 | General Public License, you may choose any version of the GNU Lesser 195 | General Public License ever published by the Free Software Foundation. 196 | 197 | If the Library as you received it specifies that a proxy can decide 198 | whether future versions of the GNU Lesser General Public License shall 199 | apply, that proxy's public statement of acceptance of any version is 200 | permanent authorization for you to choose that version for the 201 | Library. 202 | -------------------------------------------------------------------------------- /bibtexparser/bibtexexpression.py: -------------------------------------------------------------------------------- 1 | import pyparsing as pp 2 | 3 | from .bibdatabase import BibDataStringExpression 4 | 5 | 6 | # General helpers 7 | 8 | def _strip_after_new_lines(s): 9 | """Removes leading and trailing whitespaces in all but first line.""" 10 | lines = s.splitlines() 11 | if len(lines) > 1: 12 | lines = [lines[0]] + [l.lstrip() for l in lines[1:]] 13 | return '\n'.join(lines) 14 | 15 | 16 | def strip_after_new_lines(s): 17 | """Removes leading and trailing whitespaces in all but first line. 18 | 19 | :param s: string or BibDataStringExpression 20 | """ 21 | if isinstance(s, BibDataStringExpression): 22 | s.apply_on_strings(_strip_after_new_lines) 23 | return s 24 | else: 25 | return _strip_after_new_lines(s) 26 | 27 | 28 | def add_logger_parse_action(expr, log_func): 29 | """Register a callback on expression parsing with the adequate message.""" 30 | def action(s, l, t): 31 | log_func("Found {}: {}".format(expr.resultsName, t)) 32 | expr.addParseAction(action) 33 | 34 | 35 | # Parse action helpers 36 | # Helpers for returning values from the parsed tokens. Shaped as pyparsing's 37 | # parse actions. See pyparsing documentation for the arguments. 38 | 39 | def first_token(string_, location, token): 40 | # TODO Handle this case correctly! 41 | assert(len(token) == 1) 42 | return token[0] 43 | 44 | 45 | def remove_trailing_newlines(string_, location, token): 46 | if token[0]: 47 | return token[0].rstrip('\n') 48 | 49 | 50 | def remove_braces(string_, location, token): 51 | if len(token[0]) < 1: 52 | return '' 53 | else: 54 | start = 1 if token[0][0] == '{' else 0 55 | end = -1 if token[0][-1] == '}' else None 56 | return token[0][start:end] 57 | 58 | 59 | def field_to_pair(string_, location, token): 60 | """ 61 | Looks for parsed element named 'Field'. 62 | 63 | :returns: (name, value). 64 | """ 65 | field = token.get('Field') 66 | value = field.get('Value') 67 | if isinstance(value, pp.ParseResults): 68 | # For pyparsing >= 2.3.1 (see #225 and API change note in pyparsing's 69 | # Changelog). 70 | value = value[0] 71 | return (field.get('FieldName'), 72 | strip_after_new_lines(value)) 73 | 74 | 75 | # Expressions helpers 76 | 77 | def in_braces_or_pars(exp): 78 | """ 79 | exp -> (exp)|{exp} 80 | """ 81 | return ((pp.Suppress('{') + exp + pp.Suppress('}')) | 82 | (pp.Suppress('(') + exp + pp.Suppress(')'))) 83 | 84 | 85 | class BibtexExpression(object): 86 | """Gives access to pyparsing expressions. 87 | 88 | Attributes are pyparsing expressions for the following elements: 89 | 90 | * main_expression: the bibtex file 91 | * string_def: a string definition 92 | * preamble_decl: a preamble declaration 93 | * explicit_comment: an explicit comment 94 | * entry: an entry definition 95 | * implicit_comment: an implicit comment 96 | 97 | """ 98 | 99 | ParseException = pp.ParseException 100 | 101 | def __init__(self): 102 | 103 | # Bibtex keywords 104 | 105 | string_def_start = pp.CaselessKeyword("@string") 106 | preamble_start = pp.CaselessKeyword("@preamble") 107 | comment_line_start = pp.CaselessKeyword('@comment') 108 | 109 | # String names 110 | string_name = pp.Word(pp.alphanums + '_-:')('StringName') 111 | self.set_string_name_parse_action(lambda s, l, t: None) 112 | string_name.addParseAction(self._string_name_parse_action) 113 | 114 | # Values inside bibtex fields 115 | # Values can be integer or string expressions. The latter may use 116 | # quoted or braced values. 117 | 118 | # Integer values 119 | integer = pp.Word(pp.nums)('Integer') 120 | 121 | # Braced values: braced values can contain nested (but balanced) braces 122 | braced_value_content = pp.CharsNotIn('{}') 123 | braced_value = pp.Forward() # Recursive definition for nested braces 124 | braced_value <<= pp.originalTextFor( 125 | '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' 126 | )('BracedValue') 127 | braced_value.setParseAction(remove_braces) 128 | # TODO add ignore for "\}" and "\{" ? 129 | # TODO @ are not parsed by bibtex in braces 130 | 131 | # Quoted values: may contain braced content with balanced braces 132 | brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None) 133 | text_in_quoted = pp.CharsNotIn('"{}') 134 | # (quotes should be escaped by braces in quoted value) 135 | quoted_value = pp.originalTextFor( 136 | '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"' 137 | )('QuotedValue') 138 | quoted_value.addParseAction(pp.removeQuotes) 139 | 140 | # String expressions 141 | string_expr = pp.delimitedList( 142 | (quoted_value | braced_value | string_name), delim='#' 143 | )('StringExpression') 144 | self.set_string_expression_parse_action(lambda s, l, t: None) 145 | string_expr.addParseAction(self._string_expr_parse_action) 146 | 147 | value = (integer | string_expr)('Value') 148 | 149 | # Entries 150 | 151 | # @EntryType { ... 152 | entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') 153 | entry_type.setParseAction(first_token) 154 | 155 | # Entry key: any character up to a ',' without leading and trailing 156 | # spaces. Also exclude spaces and prevent it from being empty. 157 | key = pp.SkipTo(',')('Key') # TODO Maybe also exclude @',\#}{~% 158 | 159 | def citekeyParseAction(string_, location, token): 160 | """Parse action for validating citekeys. 161 | 162 | It ensures citekey is not empty and has no space. 163 | 164 | :args: see pyparsing documentation. 165 | """ 166 | key = first_token(string_, location, token).strip() 167 | if len(key) < 1: 168 | raise self.ParseException( 169 | string_, loc=location, msg="Empty citekeys are not allowed.") 170 | for i, c in enumerate(key): 171 | if c.isspace(): 172 | raise self.ParseException( 173 | string_, loc=(location + i), 174 | msg="Whitespace not allowed in citekeys.") 175 | return key 176 | 177 | key.setParseAction(citekeyParseAction) 178 | 179 | # Field name: word of letters, digits, dashes and underscores 180 | field_name = pp.Word(pp.alphanums + '_-().+')('FieldName') 181 | field_name.setParseAction(first_token) 182 | 183 | # Field: field_name = value 184 | field = pp.Group(field_name + pp.Suppress('=') + value)('Field') 185 | field.setParseAction(field_to_pair) 186 | 187 | # List of fields: comma separeted fields 188 | field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) 189 | )('Fields') 190 | field_list.setParseAction( 191 | lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) 192 | 193 | # Entry: type, key, and fields 194 | self.entry = (entry_type + 195 | in_braces_or_pars(key + pp.Suppress(',') + field_list) 196 | )('Entry') 197 | 198 | # Other stuff: comments, string definitions, and preamble declarations 199 | 200 | # Explicit comments: @comment + everything up to next valid declaration 201 | # starting on new line. 202 | not_an_implicit_comment = (pp.LineEnd() + pp.Literal('@') 203 | ) | pp.StringEnd() 204 | self.explicit_comment = ( 205 | pp.Suppress(comment_line_start) + 206 | pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), 207 | asString=True))('ExplicitComment') 208 | self.explicit_comment.addParseAction(remove_trailing_newlines) 209 | self.explicit_comment.addParseAction(remove_braces) 210 | # Previous implementation included comment until next '}'. 211 | # This is however not inline with bibtex behavior that is to only 212 | # ignore until EOL. Brace stipping is arbitrary here but avoids 213 | # duplication on bibtex write. 214 | 215 | # Empty implicit_comments lead to infinite loop of zeroOrMore 216 | def mustNotBeEmpty(t): 217 | if not t[0]: 218 | raise pp.ParseException("Match must not be empty.") 219 | 220 | # Implicit comments: not anything else 221 | self.implicit_comment = pp.originalTextFor( 222 | pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), 223 | asString=True)('ImplicitComment') 224 | self.implicit_comment.addParseAction(remove_trailing_newlines) 225 | 226 | # String definition 227 | self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( 228 | string_name + 229 | pp.Suppress('=') + 230 | string_expr('StringValue') 231 | ))('StringDefinition') 232 | 233 | # Preamble declaration 234 | self.preamble_decl = (pp.Suppress(preamble_start) + 235 | in_braces_or_pars(value))('PreambleDeclaration') 236 | 237 | # Main bibtex expression 238 | 239 | self.main_expression = pp.ZeroOrMore( 240 | self.string_def | 241 | self.preamble_decl | 242 | self.explicit_comment | 243 | self.entry | 244 | self.implicit_comment) 245 | 246 | def add_log_function(self, log_fun): 247 | """Add notice to logger on entry, comment, preamble, string definitions. 248 | 249 | :param log_fun: logger function 250 | """ 251 | for e in [self.entry, 252 | self.implicit_comment, 253 | self.explicit_comment, 254 | self.preamble_decl, 255 | self.string_def]: 256 | add_logger_parse_action(e, log_fun) 257 | 258 | def set_string_name_parse_action(self, fun): 259 | """Set the parseAction for string name expression. 260 | 261 | .. Note:: 262 | 263 | For some reason pyparsing duplicates the string_name 264 | expression so setting its parseAction a posteriori has no effect 265 | in the context of a string expression. This is why this function 266 | should be used instead. 267 | """ 268 | self._string_name_parse_action_fun = fun 269 | 270 | def _string_name_parse_action(self, s, l, t): 271 | return self._string_name_parse_action_fun(s, l, t) 272 | 273 | def set_string_expression_parse_action(self, fun): 274 | """Set the parseAction for string_expression expression. 275 | 276 | .. Note:: 277 | 278 | See set_string_name_parse_action. 279 | """ 280 | self._string_expr_parse_action_fun = fun 281 | 282 | def _string_expr_parse_action(self, s, l, t): 283 | return self._string_expr_parse_action_fun(s, l, t) 284 | 285 | def parseFile(self, file_obj): 286 | return self.main_expression.parseFile(file_obj, parseAll=True) 287 | -------------------------------------------------------------------------------- /bibtexparser/bparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Original source: github.com/okfn/bibserver 5 | # Authors: 6 | # markmacgillivray 7 | # Etienne Posthumus (epoz) 8 | # Francois Boulogne 9 | 10 | import sys 11 | import io 12 | import logging 13 | 14 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString, as_text, 15 | BibDataStringExpression, STANDARD_TYPES) 16 | from bibtexparser.bibtexexpression import BibtexExpression 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | __all__ = ['BibTexParser'] 21 | 22 | 23 | if sys.version_info >= (3, 0): 24 | ustr = str 25 | else: 26 | ustr = unicode 27 | 28 | 29 | def parse(data, *args, **kwargs): 30 | parser = BibTexParser(*args, **kwargs) 31 | return parser.parse(data) 32 | 33 | 34 | class BibTexParser(object): 35 | """ 36 | A parser for reading BibTeX bibliographic data files. 37 | 38 | Example:: 39 | 40 | from bibtexparser.bparser import BibTexParser 41 | 42 | bibtex_str = ... 43 | 44 | parser = BibTexParser() 45 | parser.ignore_nonstandard_types = False 46 | parser.homogenize_fields = False 47 | parser.common_strings = False 48 | bib_database = bibtexparser.loads(bibtex_str, parser) 49 | 50 | :param customization: function or None (default) 51 | Customization to apply to parsed entries. 52 | :param ignore_nonstandard_types: bool (default True) 53 | If True ignores non-standard bibtex entry types. 54 | :param homogenize_fields: bool (default False) 55 | Common field name replacements (as set in alt_dict attribute). 56 | :param interpolate_strings: bool (default True) 57 | If True, replace bibtex string by their value, else uses 58 | BibDataString objects. 59 | :param common_strings: bool (default False) 60 | Include common string definitions (e.g. month abbreviations) to 61 | the bibtex file. 62 | :param add_missing_from_crossref: bool (default False) 63 | Resolve BibTeX references set in the crossref field for BibTeX entries 64 | and add the fields from the referenced entry to the referencing entry. 65 | """ 66 | 67 | def __new__(cls, data=None, **args): 68 | """ 69 | To catch the old API structure in which creating the parser would 70 | immediately parse and return data. 71 | """ 72 | 73 | if data is None: 74 | return super(BibTexParser, cls).__new__(cls) 75 | else: 76 | # For backwards compatibility: if data is given, parse 77 | # and return the `BibDatabase` object instead of the parser. 78 | return parse(data, **args) 79 | 80 | def __init__(self, data=None, 81 | customization=None, 82 | ignore_nonstandard_types=True, 83 | homogenize_fields=False, 84 | interpolate_strings=True, 85 | common_strings=False, 86 | add_missing_from_crossref=False): 87 | """ 88 | Creates a parser for rading BibTeX files 89 | 90 | :return: parser 91 | :rtype: `BibTexParser` 92 | """ 93 | self.bib_database = BibDatabase() 94 | 95 | #: Load common strings such as months abbreviation 96 | #: Default: `False`. 97 | self.common_strings = common_strings 98 | if self.common_strings: 99 | self.bib_database.load_common_strings() 100 | 101 | #: Callback function to process BibTeX entries after parsing, 102 | #: for example to create a list from a string with multiple values. 103 | #: By default all BibTeX values are treated as simple strings. 104 | #: Default: `None`. 105 | self.customization = customization 106 | 107 | #: Ignore non-standard BibTeX types (`book`, `article`, etc). 108 | #: Default: `True`. 109 | self.ignore_nonstandard_types = ignore_nonstandard_types 110 | 111 | #: Sanitize BibTeX field names, for example change `url` to `link` etc. 112 | #: Field names are always converted to lowercase names. 113 | #: Default: `False`. 114 | self.homogenize_fields = homogenize_fields 115 | 116 | #: Interpolate Bibtex Strings or keep the structure 117 | self.interpolate_strings = interpolate_strings 118 | 119 | # On some sample data files, the character encoding detection simply 120 | # hangs We are going to default to utf8, and mandate it. 121 | self.encoding = 'utf8' 122 | 123 | # Add missing field from cross-ref 124 | self.add_missing_from_crossref = add_missing_from_crossref 125 | 126 | # pre-defined set of key changes 127 | self.alt_dict = { 128 | 'keyw': u'keyword', 129 | 'keywords': u'keyword', 130 | 'authors': u'author', 131 | 'editors': u'editor', 132 | 'urls': u'url', 133 | 'link': u'url', 134 | 'links': u'url', 135 | 'subjects': u'subject', 136 | 'xref': u'crossref' 137 | } 138 | 139 | # Setup the parser expression 140 | self._init_expressions() 141 | 142 | def parse(self, bibtex_str, partial=False): 143 | """Parse a BibTeX string into an object 144 | 145 | :param bibtex_str: BibTeX string 146 | :type: str or unicode 147 | :param partial: If True, print errors only on parsing failures. 148 | If False, an exception is raised. 149 | :type: boolean 150 | :return: bibliographic database 151 | :rtype: BibDatabase 152 | """ 153 | bibtex_file_obj = self._bibtex_file_obj(bibtex_str) 154 | try: 155 | self._expr.parseFile(bibtex_file_obj) 156 | except self._expr.ParseException as exc: 157 | logger.error("Could not parse properly, starting at %s", exc.line) 158 | if not partial: 159 | raise exc 160 | 161 | if self.add_missing_from_crossref: 162 | self.bib_database.add_missing_from_crossref() 163 | 164 | return self.bib_database 165 | 166 | def parse_file(self, file, partial=False): 167 | """Parse a BibTeX file into an object 168 | 169 | :param file: BibTeX file or file-like object 170 | :type: file 171 | :param partial: If True, print errors only on parsing failures. 172 | If False, an exception is raised. 173 | :type: boolean 174 | :return: bibliographic database 175 | :rtype: BibDatabase 176 | """ 177 | return self.parse(file.read(), partial=partial) 178 | 179 | def _init_expressions(self): 180 | """ 181 | Defines all parser expressions used internally. 182 | """ 183 | self._expr = BibtexExpression() 184 | 185 | # Handle string as BibDataString object 186 | self._expr.set_string_name_parse_action( 187 | lambda s, l, t: 188 | BibDataString(self.bib_database, t[0])) 189 | if self.interpolate_strings: 190 | maybe_interpolate = lambda expr: as_text(expr) 191 | else: 192 | maybe_interpolate = lambda expr: expr 193 | self._expr.set_string_expression_parse_action( 194 | lambda s, l, t: 195 | maybe_interpolate( 196 | BibDataStringExpression.expression_if_needed(t))) 197 | 198 | # Add notice to logger 199 | self._expr.add_log_function(logger.debug) 200 | 201 | # Set actions 202 | self._expr.entry.addParseAction( 203 | lambda s, l, t: self._add_entry( 204 | t.get('EntryType'), t.get('Key'), t.get('Fields')) 205 | ) 206 | self._expr.implicit_comment.addParseAction( 207 | lambda s, l, t: self._add_comment(t[0]) 208 | ) 209 | self._expr.explicit_comment.addParseAction( 210 | lambda s, l, t: self._add_comment(t[0]) 211 | ) 212 | self._expr.preamble_decl.addParseAction( 213 | lambda s, l, t: self._add_preamble(t[0]) 214 | ) 215 | self._expr.string_def.addParseAction( 216 | lambda s, l, t: self._add_string(t['StringName'].name, 217 | t['StringValue']) 218 | ) 219 | 220 | def _bibtex_file_obj(self, bibtex_str): 221 | # Some files have Byte-order marks inserted at the start 222 | byte = b'\xef\xbb\xbf' 223 | if isinstance(bibtex_str, ustr): 224 | byte = ustr(byte, self.encoding, 'ignore') 225 | if bibtex_str[0] == byte: 226 | bibtex_str = bibtex_str[1:] 227 | else: 228 | if bibtex_str[:3] == byte: 229 | bibtex_str = bibtex_str[3:] 230 | bibtex_str = bibtex_str.decode(encoding=self.encoding) 231 | return io.StringIO(bibtex_str) 232 | 233 | def _clean_val(self, val): 234 | """ Clean instring before adding to dictionary 235 | 236 | :param val: a value 237 | :type val: string 238 | :returns: string -- value 239 | """ 240 | if not val or val == "{}": 241 | return '' 242 | return val 243 | 244 | def _clean_key(self, key): 245 | """ Lowercase a key and return as unicode. 246 | 247 | :param key: a key 248 | :type key: string 249 | :returns: (unicode) string -- value 250 | """ 251 | key = key.lower() 252 | if not isinstance(key, ustr): 253 | return ustr(key, 'utf-8') 254 | else: 255 | return key 256 | 257 | def _clean_field_key(self, key): 258 | """ Clean a bibtex field key and homogenize alternative forms. 259 | 260 | :param key: a key 261 | :type key: string 262 | :returns: string -- value 263 | """ 264 | key = self._clean_key(key) 265 | if self.homogenize_fields: 266 | if key in list(self.alt_dict.keys()): 267 | key = self.alt_dict[key] 268 | return key 269 | 270 | def _add_entry(self, entry_type, entry_id, fields): 271 | """ Adds a parsed entry. 272 | Includes checking type and fields, cleaning, applying customizations. 273 | 274 | :param entry_type: the entry type 275 | :type entry_type: string 276 | :param entry_id: the entry bibid 277 | :type entry_id: string 278 | :param fields: the fields and values 279 | :type fields: dictionary 280 | :returns: string -- value 281 | """ 282 | d = {} 283 | entry_type = self._clean_key(entry_type) 284 | if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES: 285 | logger.warning('Entry type %s not standard. Not considered.', 286 | entry_type) 287 | return 288 | for key in fields: 289 | d[self._clean_field_key(key)] = self._clean_val(fields[key]) 290 | d['ENTRYTYPE'] = entry_type 291 | d['ID'] = entry_id 292 | 293 | crossref = d.get('crossref', None) 294 | if self.add_missing_from_crossref and crossref is not None: 295 | d['_crossref'] = crossref 296 | 297 | if self.customization is not None: 298 | logger.debug('Apply customizations and return dict') 299 | d = self.customization(d) 300 | 301 | self.bib_database.entries.append(d) 302 | 303 | def _add_comment(self, comment): 304 | """ 305 | Stores a comment in the list of comment. 306 | 307 | :param comment: the parsed comment 308 | :type comment: string 309 | """ 310 | logger.debug('Store comment in list of comments: ' + 311 | comment.__repr__()) 312 | self.bib_database.comments.append(comment) 313 | 314 | def _add_string(self, string_key, string): 315 | """ 316 | Stores a new string in the string dictionary. 317 | 318 | :param string_key: the string key 319 | :type string_key: string 320 | :param string: the string value 321 | :type string: string 322 | """ 323 | if string_key in self.bib_database.strings: 324 | logger.warning('Overwritting existing string for key: %s.', 325 | string_key) 326 | logger.debug(u'Store string: {} -> {}'.format(string_key, string)) 327 | self.bib_database.strings[string_key] = self._clean_val(string) 328 | 329 | def _add_preamble(self, preamble): 330 | """ 331 | Stores a preamble. 332 | 333 | :param preamble: the parsed preamble 334 | :type preamble: string 335 | """ 336 | logger.debug('Store preamble in list of preambles') 337 | self.bib_database.preambles.append(preamble) 338 | -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Tutorial 3 | ======== 4 | 5 | Step 0: Vocabulary 6 | ================== 7 | 8 | * An **entry** designates for example `@book{...}`, `@article{...}`, etc. 9 | * A **comment** is written as `@comment{...}`. 10 | * A **preamble** is a `@preamble{...}` block. 11 | * A **string** is `@string{...}`. 12 | 13 | In an entry, you can find 14 | 15 | * an **entry type** like `article`, `book`, etc. 16 | * **entry keys** or **keys** such as `author`, `title`, `year`... 17 | * and also **records**, which designates the values of those keys. 18 | 19 | 20 | Step 1: Prepare a BibTeX file 21 | ============================= 22 | 23 | First, we prepare a BibTeX sample file. This is just for the purpose of illustration: 24 | 25 | .. code-block:: python 26 | 27 | bibtex = """@ARTICLE{Cesar2013, 28 | author = {Jean César}, 29 | title = {An amazing title}, 30 | year = {2013}, 31 | volume = {12}, 32 | pages = {12--23}, 33 | journal = {Nice Journal}, 34 | abstract = {This is an abstract. This line should be long enough to test 35 | multilines...}, 36 | comments = {A comment}, 37 | keywords = {keyword1, keyword2} 38 | } 39 | """ 40 | 41 | with open('bibtex.bib', 'w') as bibfile: 42 | bibfile.write(bibtex) 43 | 44 | Step 2: Parse it! 45 | ================= 46 | 47 | Simplest call 48 | ------------- 49 | 50 | OK. Everything is in place. Let's parse the BibTeX file. 51 | 52 | .. code-block:: python 53 | 54 | import bibtexparser 55 | 56 | with open('bibtex.bib') as bibtex_file: 57 | bib_database = bibtexparser.load(bibtex_file) 58 | 59 | print(bib_database.entries) 60 | 61 | 62 | It prints a list of dictionaries for reference entries, for example books, articles: 63 | 64 | .. code-block:: python 65 | 66 | [{'journal': 'Nice Journal', 67 | 'comments': 'A comment', 68 | 'pages': '12--23', 69 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 70 | 'title': 'An amazing title', 71 | 'year': '2013', 72 | 'volume': '12', 73 | 'ID': 'Cesar2013', 74 | 'author': 'Jean César', 75 | 'keyword': 'keyword1, keyword2', 76 | 'ENTRYTYPE': 'article'}] 77 | 78 | Note that, by convention, uppercase keys (ID, ENTRYTYPE) are data generated by the parser, while lowercase keys come from the original bibtex file. 79 | 80 | You can also print comments, preambles and string: 81 | 82 | .. code-block:: python 83 | 84 | print(bib_database.comments) 85 | print(bib_database.preambles) 86 | print(bib_database.strings) 87 | 88 | .. note:: 89 | If your bibtex contains months defined as strings such as :code:`month = jan`, you will need to parse it with the :code:`common_strings` option: 90 | :code:`bib_database = bibtexparser.bparser.BibTexParser(common_strings=True).parse_file(bibtex_file)`. (More in `Using bibtex strings`_.) 91 | 92 | 93 | Parse a string 94 | -------------- 95 | 96 | If for some reason, you prefer to parse a string, that's also possible: 97 | 98 | .. code-block:: python 99 | 100 | import bibtexparser 101 | 102 | with open('bibtex.bib') as bibtex_file: 103 | bibtex_str = bibtex_file.read() 104 | 105 | bib_database = bibtexparser.loads(bibtex_str) 106 | 107 | 108 | Tune parser's options 109 | --------------------- 110 | 111 | In the previous snippet, several default options are used. 112 | You can tweak them as you wish. 113 | 114 | .. code-block:: python 115 | 116 | import bibtexparser 117 | from bibtexparser.bparser import BibTexParser 118 | 119 | parser = BibTexParser(common_strings=False) 120 | parser.ignore_nonstandard_types = False 121 | parser.homogenise_fields = False 122 | 123 | bib_database = bibtexparser.loads(bibtex_str, parser) 124 | 125 | .. note:: 126 | The :code:`common_strings` option needs to be set when the parser object is created and has no effect if changed afterwards. 127 | 128 | Step 3: Export 129 | ============== 130 | 131 | Once you worked on your parsed database, you may want to export the result. This library provides some functions to help on that. However, you can write your own functions if you have specific requirements. 132 | 133 | Create a BibTeX file or string 134 | -------------------------------- 135 | 136 | The bibliographic data can be converted back into a string : 137 | 138 | .. code-block:: python 139 | 140 | import bibtexparser 141 | 142 | bibtex_str = bibtexparser.dumps(bib_database) 143 | 144 | or a BibTeX file like this: 145 | 146 | .. code-block:: python 147 | 148 | import bibtexparser 149 | 150 | with open('bibtex.bib', 'w') as bibtex_file: 151 | bibtexparser.dump(bibtex_database, bibtex_file) 152 | 153 | 154 | Call the writer 155 | --------------- 156 | 157 | In the first section we prepared a BibTeX sample file, we can prepare the same file using pure python and the ``BibTexWriter`` class. 158 | 159 | .. code-block:: python 160 | 161 | from bibtexparser.bwriter import BibTexWriter 162 | from bibtexparser.bibdatabase import BibDatabase 163 | 164 | db = BibDatabase() 165 | db.entries = [ 166 | {'journal': 'Nice Journal', 167 | 'comments': 'A comment', 168 | 'pages': '12--23', 169 | 'month': 'jan', 170 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 171 | 'title': 'An amazing title', 172 | 'year': '2013', 173 | 'volume': '12', 174 | 'ID': 'Cesar2013', 175 | 'author': 'Jean César', 176 | 'keyword': 'keyword1, keyword2', 177 | 'ENTRYTYPE': 'article'}] 178 | 179 | writer = BibTexWriter() 180 | with open('bibtex.bib', 'w') as bibfile: 181 | bibfile.write(writer.write(db)) 182 | 183 | This code generates the following file: 184 | 185 | .. code-block:: latex 186 | 187 | @article{Cesar2013, 188 | abstract = {This is an abstract. This line should be long enough to test 189 | multilines...}, 190 | author = {Jean César}, 191 | comments = {A comment}, 192 | journal = {Nice Journal}, 193 | keyword = {keyword1, keyword2}, 194 | month = {jan}, 195 | pages = {12--23}, 196 | title = {An amazing title}, 197 | volume = {12}, 198 | year = {2013} 199 | } 200 | 201 | The writer also has several flags that can be enabled to customize the output file. 202 | For example we can use ``indent`` and ``comma_first`` to customize the previous entry, first the code: 203 | 204 | .. code-block:: python 205 | 206 | from bibtexparser.bwriter import BibTexWriter 207 | from bibtexparser.bibdatabase import BibDatabase 208 | 209 | db = BibDatabase() 210 | db.entries = [ 211 | {'journal': 'Nice Journal', 212 | 'comments': 'A comment', 213 | 'pages': '12--23', 214 | 'month': 'jan', 215 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 216 | 'title': 'An amazing title', 217 | 'year': '2013', 218 | 'volume': '12', 219 | 'ID': 'Cesar2013', 220 | 'author': 'Jean César', 221 | 'keyword': 'keyword1, keyword2', 222 | 'ENTRYTYPE': 'article'}] 223 | 224 | writer = BibTexWriter() 225 | writer.indent = ' ' # indent entries with 4 spaces instead of one 226 | writer.comma_first = True # place the comma at the beginning of the line 227 | with open('bibtex.bib', 'w') as bibfile: 228 | bibfile.write(writer.write(db)) 229 | 230 | This code results in the following, customized, file: 231 | 232 | .. code-block:: latex 233 | 234 | @article{Cesar2013 235 | , abstract = {This is an abstract. This line should be long enough to test 236 | multilines...} 237 | , author = {Jean César} 238 | , comments = {A comment} 239 | , journal = {Nice Journal} 240 | , keyword = {keyword1, keyword2} 241 | , month = {jan} 242 | , pages = {12--23} 243 | , title = {An amazing title} 244 | , volume = {12} 245 | , year = {2013} 246 | } 247 | 248 | 249 | Flags to the writer object can modify not only how an entry is printed but how several BibTeX entries are sorted and separated. 250 | See the :ref:`API ` for the full list of flags. 251 | 252 | 253 | Step 4: Add salt and pepper 254 | =========================== 255 | 256 | In this section, we discuss about some customizations and details. 257 | 258 | Customizations 259 | -------------- 260 | 261 | By default, the parser does not alter the content of each field and keeps it as a simple string. There are many cases 262 | where this is not desired. For example, instead of a string with a multiple of authors, it could be parsed as a list. 263 | 264 | To modify field values during parsing, a callback function can be supplied to the parser which can be used to modify 265 | BibTeX entries. The library includes several functions which may be used. Alternatively, you can read them to create 266 | your own functions. 267 | 268 | .. code-block:: python 269 | 270 | import bibtexparser 271 | from bibtexparser.bparser import BibTexParser 272 | from bibtexparser.customization import * 273 | 274 | # Let's define a function to customize our entries. 275 | # It takes a record and return this record. 276 | def customizations(record): 277 | """Use some functions delivered by the library 278 | 279 | :param record: a record 280 | :returns: -- customized record 281 | """ 282 | record = type(record) 283 | record = author(record) 284 | record = editor(record) 285 | record = journal(record) 286 | record = keyword(record) 287 | record = link(record) 288 | record = page_double_hyphen(record) 289 | record = doi(record) 290 | return record 291 | 292 | with open('bibtex.bib') as bibtex_file: 293 | parser = BibTexParser() 294 | parser.customization = customizations 295 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 296 | print(bib_database.entries) 297 | 298 | 299 | If you think that you have a customization which could be useful to others, please share with us! 300 | 301 | 302 | Accents and weird characters 303 | ---------------------------- 304 | 305 | Your bibtex may contain accents and specific characters. 306 | They are sometimes coded like this ``\'{e}`` but this is not the correct way, ``{\'e}`` is preferred. Moreover, you may want to manipulate ``é``. There is different situations: 307 | 308 | * Case 1: you plan to use this library to work with latex and you assume that the original bibtex is clean. You have nothing to do. 309 | 310 | * Case 2: you plan to use this library to work with latex but your bibtex is not really clean. 311 | 312 | .. code-block:: python 313 | 314 | import bibtexparser 315 | from bibtexparser.bparser import BibTexParser 316 | from bibtexparser.customization import homogenize_latex_encoding 317 | 318 | with open('bibtex.bib') as bibtex_file: 319 | parser = BibTexParser() 320 | parser.customization = homogenize_latex_encoding 321 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 322 | print(bib_database.entries) 323 | 324 | 325 | * Case 3: you plan to use this library to work with something different and your bibtex is not really clean. 326 | Then, you probably want to use unicode. 327 | 328 | .. code-block:: python 329 | 330 | import bibtexparser 331 | from bibtexparser.bparser import BibTexParser 332 | from bibtexparser.customization import convert_to_unicode 333 | 334 | with open('bibtex.bib') as bibtex_file: 335 | parser = BibTexParser() 336 | parser.customization = convert_to_unicode 337 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 338 | print(bib_database.entries) 339 | 340 | 341 | .. Note:: 342 | 343 | If you want to mix different customization functions, you can write your own function. 344 | 345 | 346 | Using bibtex strings 347 | -------------------- 348 | 349 | .. Warning:: support for bibtex strings representation is still an experimental feature; the way strings are represented is likely to change in future releases. 350 | 351 | Bibtex strings and string expressions are expanded by default into the value they represent. 352 | This behavior is controlled by the ``interpolate_string`` argument of the BibTexParser. It defaults to ``True`` but can be set to ``False``, in which case bibtex strings and string expressions from input files are represented with the :class:`bibdatabase.BibDataString` and :class:`bibdatabase.BibDataStringExpression` from the :mod:`bibdatabase` module. Both classes retain the intrinsic structure of the string or expression so that they can be written to a new file, the same way. Each instance provides a :func:`get_value` method to interpolate the string or expression and the module also provide an :func:`bibdatabase.as_text` helper to expand a string or an expression when needed. 353 | 354 | Using the code would yield the following output. 355 | 356 | .. code-block:: python 357 | 358 | from bibtexparser.bparser import BibTexParser 359 | from bibtexparser.bibdatabase import as_text 360 | 361 | 362 | bibtex = """@STRING{ jean = "Jean"} 363 | 364 | @ARTICLE{Cesar2013, 365 | author = jean # { César}, 366 | title = {An amazing title}, 367 | year = {2013}, 368 | month = jan, 369 | volume = {12}, 370 | pages = {12--23}, 371 | journal = {Nice Journal}, 372 | } 373 | """ 374 | 375 | bp = BibTexParser(interpolate_strings=False) 376 | bib_database = bp.parse(bibtex) 377 | bib_database.entries[0] 378 | as_text(bd.entries[0]['author']) 379 | 380 | .. code-block:: python 381 | 382 | {'ENTRYTYPE': 'article', 383 | 'ID': 'Cesar2013', 384 | 'author': BibDataStringExpression([BibDataString('jean'), ' César']), 385 | 'journal': 'Nice Journal', 386 | 'month': BibDataStringExpression([BibDataString('jan')]), 387 | 'pages': '12--23', 388 | 'title': 'An amazing title', 389 | } 390 | 'Jean César' 391 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_crossref_resolving.py: -------------------------------------------------------------------------------- 1 | import unittest2 as unittest 2 | from bibtexparser.bibdatabase import BibDatabase 3 | from bibtexparser.bparser import BibTexParser 4 | 5 | 6 | class TestCrossRef(unittest.TestCase): 7 | def test_crossref(self): 8 | self.maxDiff = None 9 | input_file_path = 'bibtexparser/tests/data/crossref_entries.bib' 10 | entries_expected = {'cr1': {'ENTRYTYPE': 'inbook', 11 | 'ID': 'cr1', 12 | '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 13 | 'archiveprefix': 'SomEPrFiX', 14 | 'author': 'Graham Gullam', 15 | 'crossref': 'cr_m', 16 | 'editor': 'Edgar Erbriss', 17 | 'origdate': '1955', 18 | 'primaryclass': 'SOMECLASS', 19 | 'publisher': 'Grimble', 20 | 'title': 'Great and Good Graphs', 21 | 'year': '1974'}, 22 | 'cr2': {'ENTRYTYPE': 'inbook', 23 | 'ID': 'cr2', 24 | '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 25 | 'author': 'Frederick Fumble', 26 | 'crossref': 'cr_m', 27 | 'editor': 'Edgar Erbriss', 28 | 'institution': 'Institution', 29 | 'origdate': '1943', 30 | 'publisher': 'Grimble', 31 | 'school': 'School', 32 | 'title': 'Fabulous Fourier Forms', 33 | 'year': '1974'}, 34 | 'cr3': {'ENTRYTYPE': 'inbook', 35 | 'ID': 'cr3', 36 | '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 37 | 'archiveprefix': 'SomEPrFiX', 38 | 'author': 'Arthur Aptitude', 39 | 'crossref': 'crt', 40 | 'editor': 'Mark Monkley', 41 | 'eprinttype': 'sometype', 42 | 'origdate': '1934', 43 | 'publisher': 'Rancour', 44 | 'title': 'Arrangements of All Articles', 45 | 'year': '1996'}, 46 | 'cr4': {'ENTRYTYPE': 'inbook', 47 | 'ID': 'cr4', 48 | '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 49 | 'author': 'Morris Mumble', 50 | 'crossref': 'crn', 51 | 'editor': 'Jeremy Jermain', 52 | 'origdate': '1911', 53 | 'publisher': 'Pillsbury', 54 | 'title': 'Enterprising Entities', 55 | 'year': '1945'}, 56 | 'cr5': {'ENTRYTYPE': 'inbook', 57 | 'ID': 'cr5', 58 | '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 59 | 'author': 'Oliver Ordinary', 60 | 'crossref': 'crn', 61 | 'editor': 'Jeremy Jermain', 62 | 'origdate': '1919', 63 | 'publisher': 'Pillsbury', 64 | 'title': 'Questionable Quidities', 65 | 'year': '1945'}, 66 | 'cr6': {'ENTRYTYPE': 'inproceedings', 67 | 'ID': 'cr6', 68 | '_FROM_CROSSREF': ['address', 69 | 'editor', 70 | 'eventdate', 71 | 'eventtitle', 72 | 'publisher', 73 | 'venue'], 74 | 'address': 'Address', 75 | 'author': 'Author, Firstname', 76 | 'booktitle': 'Manual booktitle', 77 | 'crossref': 'cr6i', 78 | 'editor': 'Editor', 79 | 'eventdate': '2009-08-21/2009-08-24', 80 | 'eventtitle': 'Title of the event', 81 | 'pages': '123--', 82 | 'publisher': 'Publisher of proceeding', 83 | 'title': 'Title of inproceeding', 84 | 'venue': 'Location of event', 85 | 'year': '2009'}, 86 | 'cr6i': {'ENTRYTYPE': 'proceedings', 87 | 'ID': 'cr6i', 88 | 'address': 'Address', 89 | 'author': 'Spurious Author', 90 | 'editor': 'Editor', 91 | 'eventdate': '2009-08-21/2009-08-24', 92 | 'eventtitle': 'Title of the event', 93 | 'publisher': 'Publisher of proceeding', 94 | 'title': 'Title of proceeding', 95 | 'venue': 'Location of event', 96 | 'year': '2009'}, 97 | 'cr7': {'ENTRYTYPE': 'inbook', 98 | 'ID': 'cr7', 99 | '_FROM_CROSSREF': ['publisher', 'subtitle', 'titleaddon', 'verba'], 100 | 'author': 'Author, Firstname', 101 | 'crossref': 'cr7i', 102 | 'pages': '123--126', 103 | 'publisher': 'Publisher of proceeding', 104 | 'subtitle': 'Book Subtitle', 105 | 'title': 'Title of Book bit', 106 | 'titleaddon': 'Book Titleaddon', 107 | 'verba': 'String', 108 | 'year': '2010'}, 109 | 'cr7i': {'ENTRYTYPE': 'book', 110 | 'ID': 'cr7i', 111 | 'author': 'Brian Bookauthor', 112 | 'publisher': 'Publisher of proceeding', 113 | 'subtitle': 'Book Subtitle', 114 | 'title': 'Book Title', 115 | 'titleaddon': 'Book Titleaddon', 116 | 'verba': 'String', 117 | 'year': '2009'}, 118 | 'cr8': {'ENTRYTYPE': 'incollection', 119 | 'ID': 'cr8', 120 | '_FROM_CROSSREF': ['editor', 'publisher', 'subtitle', 'titleaddon'], 121 | 'author': 'Smith, Firstname', 122 | 'crossref': 'cr8i', 123 | 'editor': 'Brian Editor', 124 | 'pages': '1--12', 125 | 'publisher': 'Publisher of Collection', 126 | 'subtitle': 'Book Subtitle', 127 | 'title': 'Title of Collection bit', 128 | 'titleaddon': 'Book Titleaddon', 129 | 'year': '2010'}, 130 | 'cr8i': {'ENTRYTYPE': 'collection', 131 | 'ID': 'cr8i', 132 | 'editor': 'Brian Editor', 133 | 'publisher': 'Publisher of Collection', 134 | 'subtitle': 'Book Subtitle', 135 | 'title': 'Book Title', 136 | 'titleaddon': 'Book Titleaddon', 137 | 'year': '2009'}, 138 | 'cr_m': {'ENTRYTYPE': 'book', 139 | 'ID': 'cr_m', 140 | 'editor': 'Edgar Erbriss', 141 | 'publisher': 'Grimble', 142 | 'title': 'Graphs of the Continent', 143 | 'year': '1974'}, 144 | 'crn': {'ENTRYTYPE': 'book', 145 | 'ID': 'crn', 146 | 'editor': 'Jeremy Jermain', 147 | 'publisher': 'Pillsbury', 148 | 'title': 'Vanquished, Victor, Vandal', 149 | 'year': '1945'}, 150 | 'crt': {'ENTRYTYPE': 'book', 151 | 'ID': 'crt', 152 | 'editor': 'Mark Monkley', 153 | 'publisher': 'Rancour', 154 | 'title': 'Beasts of the Burbling Burns', 155 | 'year': '1996'}} 156 | parser = BibTexParser(add_missing_from_crossref=True, ignore_nonstandard_types=False) 157 | with open(input_file_path) as bibtex_file: 158 | bibtex_database = parser.parse_file(bibtex_file) 159 | self.assertDictEqual(bibtex_database.entries_dict, entries_expected) 160 | 161 | def test_crossref_cascading(self): 162 | input_file_path = 'bibtexparser/tests/data/crossref_cascading.bib' 163 | entries_expected = {'r1': {'ENTRYTYPE': 'book', 164 | 'ID': 'r1', 165 | '_FROM_CROSSREF': [], 166 | 'crossref': 'r2', 167 | 'date': '1911'}, 168 | 'r2': {'ENTRYTYPE': 'book', 169 | 'ID': 'r2', 170 | '_FROM_CROSSREF': [], 171 | 'crossref': 'r3', 172 | 'date': '1911'}, 173 | 'r3': {'ENTRYTYPE': 'book', 174 | 'ID': 'r3', 175 | '_FROM_CROSSREF': [], 176 | 'crossref': 'r4', 177 | 'date': '1911'}, 178 | 'r4': {'ENTRYTYPE': 'book', 179 | 'ID': 'r4', 180 | 'date': '1911'}} 181 | 182 | parser = BibTexParser(add_missing_from_crossref=True) 183 | with open(input_file_path) as bibtex_file: 184 | bibtex_database = parser.parse_file(bibtex_file) 185 | self.assertDictEqual(bibtex_database.entries_dict, entries_expected) 186 | 187 | def test_crossref_cascading_cycle(self): 188 | input_file_path = 'bibtexparser/tests/data/crossref_cascading_cycle.bib' 189 | entries_expected = {'circ1': {'ENTRYTYPE': 'book', 190 | 'ID': 'circ1', 191 | '_FROM_CROSSREF': [], 192 | 'crossref': 'circ2', 193 | 'date': '1911'}, 194 | 'circ2': {'ENTRYTYPE': 'book', 195 | 'ID': 'circ2', 196 | '_FROM_CROSSREF': [], 197 | 'crossref': 'circ1', 198 | 'date': '1911'}} 199 | parser = BibTexParser(add_missing_from_crossref=True) 200 | with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm: 201 | with open(input_file_path) as bibtex_file: 202 | bibtex_database = parser.parse_file(bibtex_file) 203 | self.assertIn("ERROR:bibtexparser.bibdatabase:Circular crossref dependency: circ1->circ2->circ1.", cm.output) 204 | self.assertDictEqual(bibtex_database.entries_dict, entries_expected) 205 | 206 | def test_crossref_missing_entries(self): 207 | input_file_path = 'bibtexparser/tests/data/crossref_missing_entries.bib' 208 | entries_expected = {'mcr': {'ENTRYTYPE': 'inbook', 209 | 'ID': 'mcr', 210 | '_crossref': 'missing1', 211 | 'author': 'Megan Mistrel', 212 | 'crossref': 'missing1', 213 | 'origdate': '1933', 214 | 'title': 'Lumbering Lunatics'}} 215 | 216 | parser = BibTexParser(add_missing_from_crossref=True) 217 | with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm: 218 | with open(input_file_path) as bibtex_file: 219 | bibtex_database = parser.parse_file(bibtex_file) 220 | self.assertIn("ERROR:bibtexparser.bibdatabase:Crossref reference missing1 for mcr is missing.", cm.output) 221 | self.assertDictEqual(bibtex_database.entries_dict, entries_expected) 222 | 223 | if __name__ == '__main__': 224 | unittest.main() 225 | -------------------------------------------------------------------------------- /bibtexparser/customization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | A set of functions useful for customizing bibtex fields. 6 | You can find inspiration from these functions to design yours. 7 | Each of them takes a record and return the modified record. 8 | """ 9 | 10 | import re 11 | import logging 12 | 13 | from builtins import str 14 | 15 | from bibtexparser.latexenc import latex_to_unicode, string_to_latex, protect_uppercase 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | __all__ = ['splitname', 'getnames', 'author', 'editor', 'journal', 'keyword', 20 | 'link', 'page_double_hyphen', 'doi', 'type', 'convert_to_unicode', 21 | 'homogenize_latex_encoding', 'add_plaintext_fields'] 22 | 23 | 24 | class InvalidName(ValueError): 25 | """Exception raised by :py:func:`customization.splitname` when an invalid name is input. 26 | 27 | """ 28 | pass 29 | 30 | 31 | def splitname(name, strict_mode=True): 32 | """ 33 | Break a name into its constituent parts: First, von, Last, and Jr. 34 | 35 | :param string name: a string containing a single name 36 | :param Boolean strict_mode: whether to use strict mode 37 | :returns: dictionary of constituent parts 38 | :raises `customization.InvalidName`: If an invalid name is given and 39 | ``strict_mode = True``. 40 | 41 | In BibTeX, a name can be represented in any of three forms: 42 | * First von Last 43 | * von Last, First 44 | * von Last, Jr, First 45 | 46 | This function attempts to split a given name into its four parts. The 47 | returned dictionary has keys of ``first``, ``last``, ``von`` and ``jr``. 48 | Each value is a list of the words making up that part; this may be an empty 49 | list. If the input has no non-whitespace characters, a blank dictionary is 50 | returned. 51 | 52 | It is capable of detecting some errors with the input name. If the 53 | ``strict_mode`` parameter is ``True``, which is the default, this results in 54 | a :class:`customization.InvalidName` exception being raised. If it is 55 | ``False``, the function continues, working around the error as best it can. 56 | The errors that can be detected are listed below along with the handling 57 | for non-strict mode: 58 | 59 | * Name finishes with a trailing comma: delete the comma 60 | * Too many parts (e.g., von Last, Jr, First, Error): merge extra parts 61 | into First 62 | * Unterminated opening brace: add closing brace to end of input 63 | * Unmatched closing brace: add opening brace at start of word 64 | 65 | """ 66 | # Useful references: 67 | # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#names 68 | # http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 69 | 70 | # Whitespace characters that can separate words. 71 | whitespace = set(' ~\r\n\t') 72 | 73 | # We'll iterate over the input once, dividing it into a list of words for 74 | # each comma-separated section. We'll also calculate the case of each word 75 | # as we work. 76 | sections = [[]] # Sections of the name. 77 | cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless. 78 | word = [] # Current word. 79 | case = -1 # Case of the current word. 80 | level = 0 # Current brace level. 81 | bracestart = False # Will the next character be the first within a brace? 82 | controlseq = True # Are we currently processing a control sequence? 83 | specialchar = None # Are we currently processing a special character? 84 | 85 | # Using an iterator allows us to deal with escapes in a simple manner. 86 | nameiter = iter(name) 87 | for char in nameiter: 88 | # An escape. 89 | if char == '\\': 90 | escaped = next(nameiter) 91 | 92 | # BibTeX doesn't allow whitespace escaping. Copy the slash and fall 93 | # through to the normal case to handle the whitespace. 94 | if escaped in whitespace: 95 | word.append(char) 96 | char = escaped 97 | 98 | else: 99 | # Is this the first character in a brace? 100 | if bracestart: 101 | bracestart = False 102 | controlseq = escaped.isalpha() 103 | specialchar = True 104 | 105 | # Can we use it to determine the case? 106 | elif (case == -1) and escaped.isalpha(): 107 | if escaped.isupper(): 108 | case = 1 109 | else: 110 | case = 0 111 | 112 | # Copy the escape to the current word and go to the next 113 | # character in the input. 114 | word.append(char) 115 | word.append(escaped) 116 | continue 117 | 118 | # Start of a braced expression. 119 | if char == '{': 120 | level += 1 121 | word.append(char) 122 | bracestart = True 123 | controlseq = False 124 | specialchar = False 125 | continue 126 | 127 | # All the below cases imply this (and don't test its previous value). 128 | bracestart = False 129 | 130 | # End of a braced expression. 131 | if char == '}': 132 | # Check and reduce the level. 133 | if level: 134 | level -= 1 135 | else: 136 | if strict_mode: 137 | raise InvalidName("Unmatched closing brace in name {{{0}}}.".format(name)) 138 | word.insert(0, '{') 139 | 140 | # Update the state, append the character, and move on. 141 | controlseq = False 142 | specialchar = False 143 | word.append(char) 144 | continue 145 | 146 | # Inside a braced expression. 147 | if level: 148 | # Is this the end of a control sequence? 149 | if controlseq: 150 | if not char.isalpha(): 151 | controlseq = False 152 | 153 | # If it's a special character, can we use it for a case? 154 | elif specialchar: 155 | if (case == -1) and char.isalpha(): 156 | if char.isupper(): 157 | case = 1 158 | else: 159 | case = 0 160 | 161 | # Append the character and move on. 162 | word.append(char) 163 | continue 164 | 165 | # End of a word. 166 | # NB. we know we're not in a brace here due to the previous case. 167 | if char == ',' or char in whitespace: 168 | # Don't add empty words due to repeated whitespace. 169 | if word: 170 | sections[-1].append(''.join(word)) 171 | word = [] 172 | cases[-1].append(case) 173 | case = -1 174 | controlseq = False 175 | specialchar = False 176 | 177 | # End of a section. 178 | if char == ',': 179 | if len(sections) < 3: 180 | sections.append([]) 181 | cases.append([]) 182 | elif strict_mode: 183 | raise InvalidName("Too many commas in the name {{{0}}}.".format(name)) 184 | continue 185 | 186 | # Regular character. 187 | word.append(char) 188 | if (case == -1) and char.isalpha(): 189 | if char.isupper(): 190 | case = 1 191 | else: 192 | case = 0 193 | 194 | # Unterminated brace? 195 | if level: 196 | if strict_mode: 197 | raise InvalidName("Unterminated opening brace in the name {{{0}}}.".format(name)) 198 | while level: 199 | word.append('}') 200 | level -= 1 201 | 202 | # Handle the final word. 203 | if word: 204 | sections[-1].append(''.join(word)) 205 | cases[-1].append(case) 206 | 207 | # Get rid of trailing sections. 208 | if not sections[-1]: 209 | # Trailing comma? 210 | if (len(sections) > 1) and strict_mode: 211 | raise InvalidName("Trailing comma at end of name {{{0}}}.".format(name)) 212 | sections.pop(-1) 213 | cases.pop(-1) 214 | 215 | # No non-whitespace input. 216 | if not sections or not any(bool(section) for section in sections): 217 | return {} 218 | 219 | # Initialise the output dictionary. 220 | parts = {'first': [], 'last': [], 'von': [], 'jr': []} 221 | 222 | # Form 1: "First von Last" 223 | if len(sections) == 1: 224 | p0 = sections[0] 225 | 226 | # One word only: last cannot be empty. 227 | if len(p0) == 1: 228 | parts['last'] = p0 229 | 230 | # Two words: must be first and last. 231 | elif len(p0) == 2: 232 | parts['first'] = p0[:1] 233 | parts['last'] = p0[1:] 234 | 235 | # Need to use the cases to figure it out. 236 | else: 237 | cases = cases[0] 238 | 239 | # First is the longest sequence of words starting with uppercase 240 | # that is not the whole string. von is then the longest sequence 241 | # whose last word starts with lowercase that is not the whole 242 | # string. Last is the rest. NB., this means last cannot be empty. 243 | 244 | # At least one lowercase letter. 245 | if 0 in cases: 246 | # Index from end of list of first and last lowercase word. 247 | firstl = cases.index(0) - len(cases) 248 | lastl = -cases[::-1].index(0) - 1 249 | if lastl == -1: 250 | lastl -= 1 # Cannot consume the rest of the string. 251 | 252 | # Pull the parts out. 253 | parts['first'] = p0[:firstl] 254 | parts['von'] = p0[firstl:lastl+1] 255 | parts['last'] = p0[lastl+1:] 256 | 257 | # No lowercase: last is the last word, first is everything else. 258 | else: 259 | parts['first'] = p0[:-1] 260 | parts['last'] = p0[-1:] 261 | 262 | # Form 2 ("von Last, First") or 3 ("von Last, jr, First") 263 | else: 264 | # As long as there is content in the first name partition, use it as-is. 265 | first = sections[-1] 266 | if first and first[0]: 267 | parts['first'] = first 268 | 269 | # And again with the jr part. 270 | if len(sections) == 3: 271 | jr = sections[-2] 272 | if jr and jr[0]: 273 | parts['jr'] = jr 274 | 275 | # Last name cannot be empty; if there is only one word in the first 276 | # partition, we have to use it for the last name. 277 | last = sections[0] 278 | if len(last) == 1: 279 | parts['last'] = last 280 | 281 | # Have to look at the cases to figure it out. 282 | else: 283 | lcases = cases[0] 284 | 285 | # At least one lowercase: von is the longest sequence of whitespace 286 | # separated words whose last word does not start with an uppercase 287 | # word, and last is the rest. 288 | if 0 in lcases: 289 | split = len(lcases) - lcases[::-1].index(0) 290 | if split == len(lcases): 291 | split = 0 # Last cannot be empty. 292 | parts['von'] = sections[0][:split] 293 | parts['last'] = sections[0][split:] 294 | 295 | # All uppercase => all last. 296 | else: 297 | parts['last'] = sections[0] 298 | 299 | # Done. 300 | return parts 301 | 302 | 303 | def getnames(names): 304 | """Convert people names as surname, firstnames 305 | or surname, initials. 306 | 307 | :param names: a list of names 308 | :type names: list 309 | :returns: list -- Correctly formated names 310 | 311 | .. Note:: 312 | This function is known to be too simple to handle properly 313 | the complex rules. We would like to enhance this in forthcoming 314 | releases. 315 | """ 316 | tidynames = [] 317 | for namestring in names: 318 | namestring = namestring.strip() 319 | if len(namestring) < 1: 320 | continue 321 | if ',' in namestring: 322 | namesplit = namestring.split(',', 1) 323 | last = namesplit[0].strip() 324 | firsts = [i.strip() for i in namesplit[1].split()] 325 | else: 326 | namesplit = namestring.split() 327 | last = namesplit.pop() 328 | firsts = [i.replace('.', '. ').strip() for i in namesplit] 329 | if last in ['jnr', 'jr', 'junior']: 330 | last = firsts.pop() 331 | for item in firsts: 332 | if item in ['ben', 'van', 'der', 'de', 'la', 'le']: 333 | last = firsts.pop() + ' ' + last 334 | tidynames.append(last + ", " + ' '.join(firsts)) 335 | return tidynames 336 | 337 | 338 | def author(record): 339 | """ 340 | Split author field into a list of "Name, Surname". 341 | 342 | :param record: the record. 343 | :type record: dict 344 | :returns: dict -- the modified record. 345 | 346 | """ 347 | if "author" in record: 348 | if record["author"]: 349 | record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")]) 350 | else: 351 | del record["author"] 352 | return record 353 | 354 | 355 | def editor(record): 356 | """ 357 | Turn the editor field into a dict composed of the original editor name 358 | and a editor id (without coma or blank). 359 | 360 | :param record: the record. 361 | :type record: dict 362 | :returns: dict -- the modified record. 363 | 364 | """ 365 | if "editor" in record: 366 | if record["editor"]: 367 | record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")]) 368 | # convert editor to object 369 | record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]] 370 | else: 371 | del record["editor"] 372 | return record 373 | 374 | 375 | def page_double_hyphen(record): 376 | """ 377 | Separate pages by a double hyphen (--). 378 | 379 | :param record: the record. 380 | :type record: dict 381 | :returns: dict -- the modified record. 382 | 383 | """ 384 | if "pages" in record: 385 | # hyphen, non-breaking hyphen, en dash, em dash, hyphen-minus, minus sign 386 | separators = [u'‐', u'‑', u'–', u'—', u'-', u'−'] 387 | for separator in separators: 388 | if separator in record["pages"]: 389 | p = [i.strip().strip(separator) for i in record["pages"].split(separator)] 390 | record["pages"] = p[0] + '--' + p[-1] 391 | return record 392 | 393 | 394 | def type(record): 395 | """ 396 | Put the type into lower case. 397 | 398 | :param record: the record. 399 | :type record: dict 400 | :returns: dict -- the modified record. 401 | 402 | """ 403 | if "type" in record: 404 | record["type"] = record["type"].lower() 405 | return record 406 | 407 | 408 | def journal(record): 409 | """ 410 | Turn the journal field into a dict composed of the original journal name 411 | and a journal id (without coma or blank). 412 | 413 | :param record: the record. 414 | :type record: dict 415 | :returns: dict -- the modified record. 416 | 417 | """ 418 | if "journal" in record: 419 | # switch journal to object 420 | if record["journal"]: 421 | record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')} 422 | 423 | return record 424 | 425 | 426 | def keyword(record, sep=',|;'): 427 | """ 428 | Split keyword field into a list. 429 | 430 | :param record: the record. 431 | :type record: dict 432 | :param sep: pattern used for the splitting regexp. 433 | :type record: string, optional 434 | :returns: dict -- the modified record. 435 | 436 | """ 437 | if "keyword" in record: 438 | record["keyword"] = [i.strip() for i in re.split(sep, record["keyword"].replace('\n', ''))] 439 | 440 | return record 441 | 442 | 443 | def link(record): 444 | """ 445 | 446 | :param record: the record. 447 | :type record: dict 448 | :returns: dict -- the modified record. 449 | 450 | """ 451 | if "link" in record: 452 | links = [i.strip().replace(" ", " ") for i in record["link"].split('\n')] 453 | record['link'] = [] 454 | for link in links: 455 | parts = link.split(" ") 456 | linkobj = {"url": parts[0]} 457 | if len(parts) > 1: 458 | linkobj["anchor"] = parts[1] 459 | if len(parts) > 2: 460 | linkobj["format"] = parts[2] 461 | if len(linkobj["url"]) > 0: 462 | record["link"].append(linkobj) 463 | 464 | return record 465 | 466 | 467 | def doi(record): 468 | """ 469 | 470 | :param record: the record. 471 | :type record: dict 472 | :returns: dict -- the modified record. 473 | 474 | """ 475 | if 'doi' in record: 476 | if 'link' not in record: 477 | record['link'] = [] 478 | nodoi = True 479 | for item in record['link']: 480 | if 'doi' in item: 481 | nodoi = False 482 | if nodoi: 483 | link = record['doi'] 484 | if link.startswith('10'): 485 | link = 'http://dx.doi.org/' + link 486 | record['link'].append({"url": link, "anchor": "doi"}) 487 | return record 488 | 489 | 490 | def convert_to_unicode(record): 491 | """ 492 | Convert accent from latex to unicode style. 493 | 494 | :param record: the record. 495 | :type record: dict 496 | :returns: dict -- the modified record. 497 | """ 498 | for val in record: 499 | if isinstance(record[val], list): 500 | record[val] = [ 501 | latex_to_unicode(x) for x in record[val] 502 | ] 503 | elif isinstance(record[val], dict): 504 | record[val] = { 505 | k: latex_to_unicode(v) for k, v in record[val].items() 506 | } 507 | else: 508 | record[val] = latex_to_unicode(record[val]) 509 | return record 510 | 511 | 512 | def homogenize_latex_encoding(record): 513 | """ 514 | Homogenize the latex enconding style for bibtex 515 | 516 | This function is experimental. 517 | 518 | :param record: the record. 519 | :type record: dict 520 | :returns: dict -- the modified record. 521 | """ 522 | # First, we convert everything to unicode 523 | record = convert_to_unicode(record) 524 | # And then, we fall back 525 | for val in record: 526 | if val not in ('ID',): 527 | logger.debug('Apply string_to_latex to: %s', val) 528 | record[val] = string_to_latex(record[val]) 529 | if val == 'title': 530 | logger.debug('Protect uppercase in title') 531 | logger.debug('Before: %s', record[val]) 532 | record[val] = protect_uppercase(record[val]) 533 | logger.debug('After: %s', record[val]) 534 | return record 535 | 536 | 537 | def add_plaintext_fields(record): 538 | """ 539 | For each field in the record, add a `plain_` field containing the 540 | plaintext, stripped from braces and similar. See 541 | https://github.com/sciunto-org/python-bibtexparser/issues/116. 542 | 543 | :param record: the record. 544 | :type record: dict 545 | :returns: dict -- the modified record. 546 | """ 547 | def _strip_string(string): 548 | for stripped in ['{', '}']: 549 | string = string.replace(stripped, "") 550 | return string 551 | 552 | for key in list(record.keys()): 553 | plain_key = "plain_{}".format(key) 554 | record[plain_key] = record[key] 555 | 556 | if isinstance(record[plain_key], str): 557 | record[plain_key] = _strip_string(record[plain_key]) 558 | elif isinstance(record[plain_key], dict): 559 | record[plain_key] = { 560 | subkey: _strip_string(value) 561 | for subkey, value in record[plain_key].items() 562 | } 563 | elif isinstance(record[plain_key], list): 564 | record[plain_key] = [ 565 | _strip_string(value) 566 | for value in record[plain_key] 567 | ] 568 | 569 | return record 570 | --------------------------------------------------------------------------------