├── requirements.txt
├── tox.ini
├── MANIFEST.in
├── .coveragerc
├── bibtexparser
    ├── tests
    │   ├── data
    │   │   ├── comments_only.bib
    │   │   ├── wrong.bib
    │   │   ├── comments_only_output.bib
    │   │   ├── website.bib
    │   │   ├── book.bib
    │   │   ├── book_output.bib
    │   │   ├── book_comma_first.bib
    │   │   ├── string.bib
    │   │   ├── article_field_name_with_underscore.bib
    │   │   ├── article_start_with_whitespace.bib
    │   │   ├── article_oneline.bib
    │   │   ├── article.bib
    │   │   ├── article_output.bib
    │   │   ├── encoding.bib
    │   │   ├── article_no_braces.bib
    │   │   ├── article_start_with_bom.bib
    │   │   ├── article_with_protection_braces.bib
    │   │   ├── traps.bib
    │   │   ├── article_with_special_characters.bib
    │   │   ├── features.bib
    │   │   ├── comments_percentage.bib
    │   │   ├── comments_percentage_nolastcoma.bib
    │   │   ├── article_comma_first.bib
    │   │   ├── features_output.bib
    │   │   ├── multiline_comments.bib
    │   │   ├── multiple_entries_output.bib
    │   │   ├── multiple_entries_and_comments_output.bib
    │   │   ├── features2.bib
    │   │   ├── multiple_entries.bib
    │   │   ├── multiple_entries_and_comments.bib
    │   │   └── article_missing_coma.bib
    │   ├── test_bibdatabase.py
    │   ├── test_homogenise_fields.py
    │   ├── test_preambles.py
    │   ├── test_bibtexexpression.py
    │   ├── test_latexenc.py
    │   ├── test_bibtex_strings.py
    │   ├── test_bwriter.py
    │   ├── test_bibtexparser.py
    │   ├── test_customization.py
    │   ├── test_comments.py
    │   ├── test_bibtexwriter.py
    │   ├── test_bparser.py
    │   └── test_splitname.py
    ├── __init__.py
    ├── bibdatabase.py
    ├── bwriter.py
    ├── bibtexexpression.py
    ├── bparser.py
    └── customization.py
├── RELEASE
├── .gitignore
├── docs
    ├── source
    │   ├── who.rst
    │   ├── bibtexparser.rst
    │   ├── bibtex_conv.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── logging.rst
    │   ├── conf.py
    │   └── tutorial.rst
    └── Makefile
├── CONTRIBUTORS.txt
├── .travis.yml
├── setup.py
├── README.rst
├── CHANGELOG
└── COPYING


/requirements.txt:
--------------------------------------------------------------------------------
1 | pyparsing>=2.0.3
2 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27,py35
3 | [testenv]
4 | deps = nose pyparsing
5 | commands = nosetests
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include *.md
3 | include docs/Makefile
4 | include docs/source/*
5 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = bibtexparser
4 | 
5 | [report]
6 | exclude_lines =
7 |     if __name__ == .__main__.:
8 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_only.bib:
--------------------------------------------------------------------------------
1 | @comment{ignore this line!}
2 | @Comment{ignore this line too!}
3 | @COMMENT{and ignore this line too!}
4 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/wrong.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | @wrong{foo,
 4 |     author = {wrong}
 5 | }
 6 | 
 7 | @article{bar,
 8 |     author = {correct}
 9 | }
10 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_only_output.bib:
--------------------------------------------------------------------------------
1 | @comment{ignore this line!}
2 | 
3 | @comment{ignore this line too!}
4 | 
5 | @comment{and ignore this line too!}
6 | 
7 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/website.bib:
--------------------------------------------------------------------------------
1 | @misc{feder2006,
2 |  title = {BibTeX},
3 |  author = {Alexander Feder},
4 |  url = {http://bibtex.org},
5 |  year = {2006}
6 | }
7 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book.bib:
--------------------------------------------------------------------------------
1 | @BOOK{Bird1987,
2 |   title = {Dynamics of Polymeric Liquid},
3 |   publisher = {Wiley Edition},
4 |   year = {1987},
5 |   author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
6 |   volume = {1},
7 |   edition = {2},
8 | }
9 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book_output.bib:
--------------------------------------------------------------------------------
 1 | @book{Bird1987,
 2 |  author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
 3 |  edition = {2},
 4 |  publisher = {Wiley Edition},
 5 |  title = {Dynamics of Polymeric Liquid},
 6 |  volume = {1},
 7 |  year = {1987}
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book_comma_first.bib:
--------------------------------------------------------------------------------
 1 | @book{Bird1987
 2 |    , author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}
 3 |    , edition = {2}
 4 |    , publisher = {Wiley Edition}
 5 |    , title = {Dynamics of Polymeric Liquid}
 6 |    , volume = {1}
 7 |    , year = {1987}
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/string.bib:
--------------------------------------------------------------------------------
1 | @STRING{oakland = {Proceedings of the {IEEE} Symposium on Security and Privacy}}
2 | @INPROCEEDINGS{cha:oakland15,
3 |     author = {Sang Kil Cha and Maverick Woo and David Brumley},
4 |     title = {{Program-Adaptive Mutational Fuzzing}},
5 |     booktitle = oakland,
6 |     year = {2015},
7 |     pages = {725--741}
8 | }
9 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_field_name_with_underscore.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   pages = {12-23},
 7 |   journal = {Nice Journal},
 8 |   comments = {A comment},
 9 |   keyword = {keyword1, keyword2},
10 |   strange-field-name2 = {val2},
11 |   strange_field_name = {val},
12 | }
13 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_start_with_whitespace.bib:
--------------------------------------------------------------------------------
 1 |  @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal}
 7 | }
 8 | 
 9 |     @ARTICLE{Cesar2014,
10 |   author = {Jean Cesar},
11 |   title = {An amazing title},
12 |   year = {2014},
13 |   volume = {12},
14 |   journal = {Nice Journal}
15 | }


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_oneline.bib:
--------------------------------------------------------------------------------
1 | @ARTICLE{Cesar2013, author = {Jean Cesar}, title = {An amazing title}, year = {2013}, volume = {12}, journal = {Nice Journal}, comments = {A comment}, keyword = {keyword1, keyword2}}
2 | 
3 | @ARTICLE{ Baltazar2013,author = {Jean Baltazar},title = {An amazing title},year = {2013},volume = {12},journal = {Nice Journal},comments = {A comment},keyword = {keyword1, keyword2}}
4 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Nice Journal},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/RELEASE:
--------------------------------------------------------------------------------
 1 | How to release
 2 | ==============
 3 | 
 4 | * Update CHANGELOG
 5 | * Update version in __init__.py
 6 | * git tag -a 'vX'
 7 | * merge in branch latest
 8 | * Create a tarball and upload it on the server
 9 |     git archive master  --prefix 'bibtexparser/' | bzip2 > bibtexparser-x.y.tar.bz2
10 | * Send the package on pypi
11 |     python setup.py sdist upload
12 | * tick the doc version on readthedocs
13 | * Update version in __init__.py
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_output.bib:
--------------------------------------------------------------------------------
 1 | @article{Cesar2013,
 2 |  abstract = {This is an abstract. This line should be long enough to test
 3 | multilines... and with a french érudit word},
 4 |  author = {Jean César},
 5 |  comments = {A comment},
 6 |  journal = {Nice Journal},
 7 |  keyword = {keyword1, keyword2},
 8 |  month = {jan},
 9 |  pages = {12-23},
10 |  title = {An amazing title},
11 |  volume = {12},
12 |  year = {2013}
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/encoding.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar_2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title: à},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Elémentaire},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | 	 multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keywords = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_no_braces.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = "Jean C{\'e}sar{\"u}",
 3 |   title = "An amazing title",
 4 |   year = "2013",
 5 |   month = "jan",
 6 |   volume = "12",
 7 |   pages = "12-23",
 8 |   journal = "Nice Journal",
 9 |   abstract = "This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word",
11 |   comments = "A comment",
12 |   keyword = "keyword1, keyword2",
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_start_with_bom.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @ARTICLE{Cesar2013,
 3 |   author = {Jean César},
 4 |   title = {An amazing title},
 5 |   year = {2013},
 6 |   month = "jan",
 7 |   volume = {12},
 8 |   pages = {12-23},
 9 |   journal = {Nice Journal},
10 |   abstract = {This is an abstract. This line should be long enough to test
11 | multilines... and with a french érudit word},
12 |   comments = {A comment},
13 |   keyword = {keyword1, keyword2},
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_protection_braces.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {{An amazing title}},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {{Nice Journal}},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/traps.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Laide2013,
 2 |   author = {Jean Laid{\'e},
 3 |   Ben Loaeb},
 4 |   title = {{An} amazing {title}},
 5 |   year = {2013},
 6 |   month = "jan",
 7 |   volume = {n.s.~2},
 8 |   pages = {12-23},
 9 |   journal = {Nice Journal},
10 |   abstract = {This is an abstract. This line should be long enough to test
11 | 	 multilines... and with a french érudit word},
12 |   comments = {A comment},
13 |   keywords = {keyword1, keyword2},
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_special_characters.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean C{\'e}sar{\"u}},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Nice Journal},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features.bib:
--------------------------------------------------------------------------------
 1 | @comment{ignore this line!}
 2 | @Comment{ignore this line too!}
 3 | @COMMENT{and ignore this line too!}
 4 | 
 5 | @preamble{ "\makeatletter" }
 6 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" }
 7 | @preamble{ "\makeatother" }
 8 | 
 9 | @string{mystring = "Hello"}
10 | @string{myconf = "My International Conference"}
11 | @string{myname = "Doe"}
12 | 
13 | @inproceedings{mykey,
14 |   author = "John",
15 |   title = {Cool Stuff},
16 |   booktitle = myconf,
17 |   year = 2014,
18 | }
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 
37 | # Pycharm
38 | .idea
39 | 
40 | # Vim.
41 | *.swp
42 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_percentage.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2},
 9 | }
10 | % comment.
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2},
19 | }
20 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_percentage_nolastcoma.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2}
 9 | }
10 | % comment.
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2}
19 | }
20 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_comma_first.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013
 2 |  , author = {Jean Cesar}
 3 |  , title = {An amazing title}
 4 |  , year = {2013}
 5 |  , volume = {12}
 6 |  , journal = {Nice Journal}
 7 |  , comments = {A comment}
 8 |  , keyword = {keyword1, keyword2}
 9 | }
10 | 
11 | @ARTICLE{ Baltazar2013
12 |         , author = {Jean Baltazar}
13 |         , title = {An amazing title}
14 |         , year = {2013}
15 |         , volume = {12}
16 |         , journal = {Nice Journal}
17 |         , comments = {A comment}
18 |         , keyword = {keyword1, keyword2}}
19 | 


--------------------------------------------------------------------------------
/docs/source/who.rst:
--------------------------------------------------------------------------------
 1 | Who uses BibtexParser?
 2 | ======================
 3 | 
 4 | If your project uses BibtexParser, you can ask for the addition of a link in this list.
 5 | 
 6 | * http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/
 7 | * https://github.com/Phyks/BMC
 8 | * http://aurelien.naldi.info/research/publications.html
 9 | * http://robot.kut.ac.kr/publications
10 | * https://git.atelo.org/etlapale/bibgen
11 | * https://onmenwhostareongraphs.wordpress.com/2015/06/09/graph-display-software-for-author-relationships-with-bibtex-files/
12 | * https://github.com/vitorfs/parsifal
13 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features_output.bib:
--------------------------------------------------------------------------------
 1 | @comment{ignore this line!}
 2 | 
 3 | @comment{ignore this line too!}
 4 | 
 5 | @comment{and ignore this line too!}
 6 | 
 7 | @preamble{ "\makeatletter" }
 8 | 
 9 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" }
10 | 
11 | @preamble{ "\makeatother" }
12 | 
13 | @string{mystring = "Hello"}
14 | 
15 | @string{myconf = "My International Conference"}
16 | 
17 | @string{myname = "Doe"}
18 | 
19 | @inproceedings{mykey,
20 |  author = {John},
21 |  booktitle = {My International Conference},
22 |  title = {Cool Stuff},
23 |  year = {2014}
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiline_comments.bib:
--------------------------------------------------------------------------------
 1 | @comment{Lorem ipsum dolor sit amet,
 2 | consectetur adipisicing elit}
 3 | 
 4 | @comment{
 5 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
 6 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
 7 | 
 8 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
 9 | Excepteur sint occaecat cupidatat non proident.
10 |  ,
11 | }
12 | 
13 | @comment{
14 | 
15 | 
16 | Sunt in culpa qui officia deserunt mollit anim id est laborum.
17 | 
18 | 
19 | }
20 | 
21 | @comment{}
22 | 
23 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_output.bib:
--------------------------------------------------------------------------------
 1 | @book{Toto3000,
 2 |  author = {Toto, A and Titi, B},
 3 |  title = {A title}
 4 | }
 5 | 
 6 | @article{Wigner1938,
 7 |  author = {Wigner, E.},
 8 |  doi = {10.1039/TF9383400029},
 9 |  issn = {0014-7672},
10 |  journal = {Trans. Faraday Soc.},
11 |  owner = {fr},
12 |  pages = {29--41},
13 |  publisher = {The Royal Society of Chemistry},
14 |  title = {The transition state method},
15 |  volume = {34},
16 |  year = {1938}
17 | }
18 | 
19 | @book{Yablon2005,
20 |  author = {Yablon, A.D.},
21 |  publisher = {Springer},
22 |  title = {Optical fiber fusion slicing},
23 |  year = {2005}
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_and_comments_output.bib:
--------------------------------------------------------------------------------
 1 | @comment{}
 2 | 
 3 | @comment{A comment}
 4 | 
 5 | @book{Toto3000,
 6 |  author = {Toto, A and Titi, B},
 7 |  title = {A title}
 8 | }
 9 | 
10 | @article{Wigner1938,
11 |  author = {Wigner, E.},
12 |  doi = {10.1039/TF9383400029},
13 |  issn = {0014-7672},
14 |  journal = {Trans. Faraday Soc.},
15 |  owner = {fr},
16 |  pages = {29--41},
17 |  publisher = {The Royal Society of Chemistry},
18 |  title = {The transition state method},
19 |  volume = {34},
20 |  year = {1938}
21 | }
22 | 
23 | @book{Yablon2005,
24 |  author = {Yablon, A.D.},
25 |  publisher = {Springer},
26 |  title = {Optical fiber fusion slicing},
27 |  year = {2005}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features2.bib:
--------------------------------------------------------------------------------
 1 | @string{CoOl = "Cool"}
 2 | @string{stuff = "Stuff"}
 3 | @string{myTitle = cool # " " # stuff}
 4 | 
 5 | @string{int = "International"}
 6 | @string{myconf = "My "#int#" Conference"}
 7 | 
 8 | @string{myname = "Doe"}
 9 | 
10 | @String {firstname = "John"}
11 | @String {lastname  = myname}
12 | @String {domain  = "example"}
13 | @String {tld  = "com"}
14 | 
15 | @String {foo = "1--10"}
16 | @String {BaR = FOO}
17 | @String {pages = baR}
18 | 
19 | @inproceedings{mykey,
20 |   author = "John " # mynamE,
21 |   title = mytitle,
22 |   booktitle = myconf,
23 |   pages = pages,
24 |   year = 2014,
25 |   note = "Email: " # firstname # "." # lastname #
26 |           "@" # domain # "." # tld,
27 | }
28 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
 1 | - François Boulogne
 2 |   Project coordinator
 3 | 
 4 | - bibserver's contributors
 5 |   for the parser's core and the permission to release this project under LGPLv3 and BSD
 6 | 
 7 | - Shuen-Huei (Drake) Guan
 8 |   Python 2.7 porting
 9 | 
10 | - Sebastien Diemer
11 |   Bugfix
12 | 
13 | - Georg C. Brückmann
14 |   Support for non-standard entry types
15 | 
16 | - Uwe Schmidt
17 |   String replacement
18 | 
19 | - faph
20 |   coma fixes, optional keys sanitising, refactoring and other improvements
21 | 
22 | - Steven M. Bellovin
23 |   Fix braces detection
24 | 
25 | - Sven Goossens
26 |   Support for bibtex with leading spaces
27 | 
28 | - Michal Grochmal
29 |   Comma first syntax support
30 | 
31 | - Cschaffner
32 |   New features in bwriter
33 | 
34 | - Olivier Mangin
35 |   Pyparsing implementation of the parser.
36 | 
37 | - Blair Bonnett
38 |   customization.splitname() function
39 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | matrix:
 3 |   include:
 4 |     - python: "2.7"
 5 |       env: TEST_SUITE=suite_2_7
 6 |     - python: "3.3"
 7 |       env: TEST_SUITE=suite_3_3
 8 |     - python: "3.4"
 9 |       env: TEST_SUITE=suite_3_4
10 |     - python: "3.5"
11 |       env: TEST_SUITE=suite_3_5
12 |     - python: "pypy"
13 |       env: TEST_SUITE=suite_pypy
14 |     - python: "pypy3"
15 |       env: TEST_SUITE=suite_pypy3
16 | install:
17 |   - if [[ $TEST_SUITE == suite_3_5 ]]; then
18 |         pip install sphinx;
19 |     fi;
20 |   - pip install coverage
21 |   - pip install -r requirements.txt
22 |   - python setup.py install
23 | script:
24 |   - nosetests --with-coverage  --cover-erase --cover-package=bibtexparser
25 |   - if [[ $TEST_SUITE == suite_3_5 ]]; then
26 |         cd docs;
27 |         make html;
28 |     fi;
29 | 
30 | after_success:
31 |   - pip install coveralls
32 |   - coveralls
33 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | try:
 4 |     from setuptools import setup
 5 | except ImportError as ex:
 6 |     print('[python-bibtexparser] setuptools not found. Falling back to distutils.core')
 7 |     from distutils.core import setup
 8 | 
 9 | with open('bibtexparser/__init__.py') as fh:
10 |     for line in fh:
11 |         if line.startswith('__version__'):
12 |             version = line.strip().split()[-1][1:-1]
13 |             break
14 | 
15 | setup(
16 |     name         = 'bibtexparser',
17 |     version      = version,
18 |     url          = "https://github.com/sciunto-org/python-bibtexparser",
19 |     author       = "Francois Boulogne and other contributors",
20 |     license      = "LGPLv3 or BSD",
21 |     author_email = "devel@sciunto.org",
22 |     description  = "Bibtex parser for python 2.7 and 3.3 and newer",
23 |     packages     = ['bibtexparser'],
24 |     install_requires = ['pyparsing'],
25 | )
26 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries.bib:
--------------------------------------------------------------------------------
 1 | @Book{Yablon2005,
 2 |   Title                    = {Optical fiber fusion slicing},
 3 |   Author                   = {Yablon, A.D.},
 4 |   Publisher                = {Springer},
 5 |   Year                     = {2005},
 6 | }
 7 | 
 8 | @Article{Wigner1938,
 9 |   Title                    = {The transition state method},
10 |   Author                   = {Wigner, E.},
11 |   Journal                  = {Trans. Faraday Soc.},
12 |   Year                     = {1938},
13 |   Pages                    = {29--41},
14 |   Volume                   = {34},
15 |   Doi                      = {10.1039/TF9383400029},
16 |   ISSN                     = {0014-7672},
17 |   Owner                    = {fr},
18 |   Publisher                = {The Royal Society of Chemistry},
19 | }
20 | 
21 | @Book{Toto3000,
22 |   Title                    = {A title},
23 |   Author                   = {Toto, A and Titi, B},
24 | }
25 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_and_comments.bib:
--------------------------------------------------------------------------------
 1 | @Book{Yablon2005,
 2 |   Title                    = {Optical fiber fusion slicing},
 3 |   Author                   = {Yablon, A.D.},
 4 |   Publisher                = {Springer},
 5 |   Year                     = {2005},
 6 | }
 7 | 
 8 | @Article{Wigner1938,
 9 |   Title                    = {The transition state method},
10 |   Author                   = {Wigner, E.},
11 |   Journal                  = {Trans. Faraday Soc.},
12 |   Year                     = {1938},
13 |   Pages                    = {29--41},
14 |   Volume                   = {34},
15 |   Doi                      = {10.1039/TF9383400029},
16 |   ISSN                     = {0014-7672},
17 |   Owner                    = {fr},
18 |   Publisher                = {The Royal Society of Chemistry},
19 | }
20 | 
21 | @Book{Toto3000,
22 |   Title                    = {A title},
23 |   Author                   = {Toto, A and Titi, B},
24 | }
25 | 
26 | @Comment{}
27 | 
28 | @Comment{A comment}
29 | 
30 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibdatabase.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from bibtexparser.bibdatabase import BibDatabase
 3 | 
 4 | 
 5 | class TestBibDatabase(unittest.TestCase):
 6 |     entries = [{'ENTRYTYPE': 'book',
 7 |                 'year': '1987',
 8 |                 'edition': '2',
 9 |                 'publisher': 'Wiley Edition',
10 |                 'ID': 'Bird1987',
11 |                 'volume': '1',
12 |                 'title': 'Dynamics of Polymeric Liquid',
13 |                 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'
14 |                }]
15 | 
16 |     def test_entries_list_method(self):
17 |         bib_db = BibDatabase()
18 |         bib_db.entries = self.entries
19 |         self.assertEqual(bib_db.entries, bib_db.get_entry_list())
20 | 
21 |     def test_entries_dict_prop(self):
22 |         bib_db = BibDatabase()
23 |         bib_db.entries = self.entries
24 |         self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict())
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_missing_coma.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2}
 9 | }
10 | 
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2}}
19 | 
20 | @ARTICLE{Aimar2013,
21 |   author = {Jean Aimar},
22 |   title = {An amazing title},
23 |   year = {2013},
24 |   volume = {12},
25 |   journal = {Nice Journal},
26 |   comments = {A comment},
27 |   keyword = {keyword1, keyword2},
28 |   month = "january"
29 | }
30 | 
31 | @ARTICLE{Doute2013,
32 |   author = {Jean Doute},
33 |   title = {An amazing title},
34 |   volume = {12},
35 |   journal = {Nice Journal},
36 |   comments = {A comment},
37 |   keyword = {keyword1, keyword2},
38 |   year = "2013"
39 | }
40 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_homogenise_fields.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from bibtexparser.bparser import BibTexParser
 3 | 
 4 | 
 5 | class TestHomogenizeFields(unittest.TestCase):
 6 |     def test_homogenize_default(self):
 7 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
 8 |             bib = BibTexParser(bibfile.read())
 9 |             entries = bib.get_entry_list()
10 |             self.assertNotIn('link', entries[0])
11 |             self.assertIn('url', entries[0])
12 | 
13 |     def test_homogenize_on(self):
14 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
15 |             bib = BibTexParser(bibfile.read(), homogenize_fields=True)
16 |             entries = bib.get_entry_list()
17 |             self.assertIn('link', entries[0])
18 |             self.assertNotIn('url', entries[0])
19 | 
20 |     def test_homogenize_off(self):
21 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
22 |             bib = BibTexParser(bibfile.read(), homogenize_fields=False)
23 |             entries = bib.get_entry_list()
24 |             self.assertNotIn('link', entries[0])
25 |             self.assertIn('url', entries[0])
26 | 


--------------------------------------------------------------------------------
/docs/source/bibtexparser.rst:
--------------------------------------------------------------------------------
 1 | .. _bibtexparser_api:
 2 | 
 3 | .. contents::
 4 | 
 5 | bibtexparser: API
 6 | =================
 7 | 
 8 | :mod:`bibtexparser` --- Parsing and writing BibTeX files
 9 | --------------------------------------------------------
10 | 
11 | .. automodule:: bibtexparser
12 |     :members: load, loads, dumps, dump
13 | 
14 | :mod:`bibtexparser.bibdatabase` --- The bibliographic database object
15 | ---------------------------------------------------------------------
16 | 
17 | .. autoclass:: bibdatabase.BibDatabase
18 |     :members: entries, entries_dict, comments, strings, preambles
19 | 
20 | :mod:`bibtexparser.bparser` --- Tune the default parser
21 | --------------------------------------------------------
22 | 
23 | .. automodule:: bparser
24 |     :members:
25 | 
26 | :mod:`bibtexparser.customization` --- Functions to customize records
27 | --------------------------------------------------------------------
28 | 
29 | .. automodule:: customization
30 |     :members:
31 | 
32 | Exception classes
33 | ^^^^^^^^^^^^^^^^^
34 | .. autoclass:: customization.InvalidName
35 | 
36 | :mod:`bibtexparser.bwriter` --- Tune the default writer
37 | -------------------------------------------------------
38 | 
39 | .. autoclass:: bwriter.BibTexWriter
40 |     :members:
41 | 
42 | :mod:`bibtexparser.bibtexexpression` --- Parser's core relying on pyparsing
43 | ---------------------------------------------------------------------------
44 | 
45 | .. automodule:: bibtexexpression
46 |     :members:
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/source/bibtex_conv.rst:
--------------------------------------------------------------------------------
 1 | ===============================================
 2 | Bibtex tips, conventions and unrelated projects
 3 | ===============================================
 4 | 
 5 | This page presents various resources about bibtex in general.
 6 | 
 7 | Format
 8 | ======
 9 | 
10 | http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html
11 | 
12 | * Comments
13 | * Variable
14 | * @preamble
15 | * Name convention
16 | 
17 | Upper case letters in titles
18 | ----------------------------
19 | 
20 | Put the letter/word in curly braces like {this}.
21 | 
22 | 
23 | General references
24 | ------------------
25 | 
26 | * http://tug.ctan.org/tex-archive/info/bibtex/tamethebeast/ttb_en.pdf
27 | * http://ctan.mirrors.hoobly.com/macros/latex/contrib/biblatex/doc/biblatex.pdf
28 | 
29 | IEEE citation reference
30 | -----------------------
31 | 
32 | * https://origin.www.ieee.org/documents/ieeecitationref.pdf
33 | 
34 | 
35 | Common Errors in Bibliographies John Owens
36 | ------------------------------------------
37 | 
38 | * http://www.ece.ucdavis.edu/~jowens/biberrors.html
39 | 
40 | Common abbreviations for journals
41 | ---------------------------------
42 | 
43 | * Jabref list http://jabref.sourceforge.net/resources.php#downloadlists
44 | 
45 | 
46 | Projects
47 | ========
48 | 
49 | Here are some interesting projects using bibtex but not necessarily this parser.
50 | 
51 | Display your bibliography in html pages
52 | ---------------------------------------
53 | 
54 | * http://www.monperrus.net/martin/bibtexbrowser/
55 | 
56 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. BibtexParser documentation master file, created by
 2 |    sphinx-quickstart on Thu Aug  1 13:30:23 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to BibtexParser's documentation!
 7 | ========================================
 8 | 
 9 | 
10 | :Author: François Boulogne and other contributors
11 | :Devel: `github.com project <https://github.com/sciunto-org/python-bibtexparser>`_
12 | :Mirror: `git.sciunto.org <https://git.sciunto.org/mirror/python-bibtexparser>`_
13 | :Bugs: `github.com <https://github.com/sciunto-org/python-bibtexparser/issues>`_
14 | :Generated: |today|
15 | :License: LGPL v3 or BSD
16 | :Version: |release|
17 | 
18 | BibtexParser is a python library to parse bibtex files. The code relies on `pyparsing <http://pyparsing.wikispaces.com/>`_ and is tested with unittests.
19 | 
20 | If you use BibtexParser for your project, feel free to send me an email. I would be happy to hear that and to mention your project in the documentation.
21 | 
22 | Contents:
23 | 
24 | .. toctree::
25 |     :maxdepth: 2
26 | 
27 |     install.rst
28 |     tutorial.rst
29 |     bibtexparser.rst
30 |     logging.rst
31 |     bibtex_conv.rst
32 |     who.rst
33 | 
34 | 
35 | Other projects
36 | ==============
37 | 
38 | * http://pybtex.sourceforge.net/
39 | * http://pybliographer.org/
40 | * https://github.com/matthew-brett/babybib
41 | 
42 | Indices and tables
43 | ==================
44 | 
45 | * :ref:`genindex`
46 | * :ref:`modindex`
47 | * :ref:`search`
48 | 
49 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | python-bibtexparser
 2 | ===================
 3 | 
 4 | Python library to parse `bibtex <https://en.wikipedia.org/wiki/BibTeX>`_ files.
 5 | 
 6 | 
 7 | IMPORTANT: the library is looking for new maintainers. Please, manifest yourself if you are interested.
 8 | 
 9 | .. contents::
10 | 
11 | 
12 | Bibtexparser relies on `pyparsing <https://pypi.python.org/pypi/pyparsing>`_ and is compatible with Python 2.7 and 3.3 or newer.
13 | 
14 | Documentation
15 | -------------
16 | 
17 | Our documentation includes the installation procedure, a tutorial, the API and advices to report a bug.
18 | References, related projects and softwares based on bibtexparser are also listed. If you would like to appear on this list, feel free to open a ticket or send an email.
19 | 
20 | `Documentation on readthedocs.io <https://bibtexparser.readthedocs.io/>`_
21 | 
22 | Upgrading
23 | ---------
24 | 
25 | Please, read the changelog before upgrading regarding API modifications.
26 | Prior version 1.0, we do not hesitate to modify the API to get the best API from your feedbacks.
27 | 
28 | License
29 | -------
30 | 
31 | Dual license (at your choice):
32 | 
33 | * LGPLv3.
34 | * BSD
35 | 
36 | See COPYING for details.
37 | 
38 | History and evolutions
39 | ----------------------
40 | 
41 | The original source code was part of bibserver from `OKFN <http://github.com/okfn/bibserver>`_. This project is released under the AGPLv3. OKFN and the original authors kindly provided the permission to use a subpart of their project (ie the bibtex parser) under LGPLv3. Many thanks to them!
42 | 
43 | The parser evolved to a new core based on pyparsing.
44 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_preambles.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import bibtexparser
 3 | from bibtexparser.bibdatabase import BibDatabase
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | class TestPreambleParse(unittest.TestCase):
 8 |     def test_single_preamble_parse_count(self):
 9 |         bibtex_str = '@preamble{" a "}\n\n'
10 |         bib_database = bibtexparser.loads(bibtex_str)
11 |         self.assertEqual(len(bib_database.preambles), 1)
12 | 
13 |     def test_multiple_preamble_parse_count(self):
14 |         bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
15 |         bib_database = bibtexparser.loads(bibtex_str)
16 |         self.assertEqual(len(bib_database.preambles), 2)
17 | 
18 |     def test_single_preamble_parse(self):
19 |         bibtex_str = '@preamble{" a "}\n\n'
20 |         bib_database = bibtexparser.loads(bibtex_str)
21 |         expected = [' a ']
22 |         self.assertEqual(bib_database.preambles, expected)
23 | 
24 |     def test_multiple_preamble_parse(self):
25 |         bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
26 |         bib_database = bibtexparser.loads(bibtex_str)
27 |         expected = [' a ', 'b']
28 |         self.assertEqual(bib_database.preambles, expected)
29 | 
30 | 
31 | class TestPreambleWrite(unittest.TestCase):
32 |     def test_single_preamble_write(self):
33 |         bib_database = BibDatabase()
34 |         bib_database.preambles = [' a ']
35 |         result = bibtexparser.dumps(bib_database)
36 |         expected = '@preamble{" a "}\n\n'
37 |         self.assertEqual(result, expected)
38 | 
39 |     def test_multiple_string_write(self):
40 |         bib_database = BibDatabase()
41 |         bib_database.preambles = [' a ', 'b']
42 |         result = bibtexparser.dumps(bib_database)
43 |         expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
44 |         self.assertEqual(result, expected)
45 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | ========================
 2 | How to install and test?
 3 | ========================
 4 | 
 5 | How to install?
 6 | ===============
 7 | 
 8 | Requirements
 9 | ------------
10 | 
11 | * python **2.7** or python **3.3** or newer
12 | * pyparsing **2.0.3** or newer
13 | 
14 | Package manager (recommended for those OS users)
15 | ------------------------------------------------
16 | 
17 | * `Archlinux <https://aur.archlinux.org/packages/python-bibtexparser/>`_
18 | * `Debian <https://packages.debian.org/en/sid/main/python-bibtexparser>`_
19 | 
20 | pip (recommended to other users)
21 | ---------------------------------
22 | 
23 | To install with pip:
24 | 
25 | .. code-block:: sh
26 | 
27 |     pip install bibtexparser
28 | 
29 | 
30 | Manual installation (recommended for packagers)
31 | -----------------------------------------------
32 | 
33 | Download the archive on `Pypi <http://pypi.python.org/pypi/bibtexparser/>`_.
34 | 
35 | .. code-block:: sh
36 | 
37 |     python setup.py install
38 | 
39 | 
40 | How to run the test suite?
41 | ==========================
42 | 
43 | This paragraph briefly describes how to run the test suite.
44 | This is useful for contributors, for packagers but also for users who wants to check their environment.
45 | 
46 | 
47 | Virtualenv
48 | ----------
49 | 
50 | You can make a virtualenv. I like `pew <https://pypi.python.org/pypi/pew/>`_ for that because the API is easier.
51 | 
52 | The first time, you need to make a virtualenv
53 | 
54 | .. code-block:: sh
55 | 
56 |     pew mkproject bibtexparser
57 |     pip install -r requirements.txt
58 |     python setup.py install
59 |     nosetest
60 | 
61 | 
62 | If you already have a virtualenv, you can use workon
63 | 
64 | .. code-block:: sh
65 | 
66 |     pew workon bibtexparser
67 | 
68 | 
69 | Tox
70 | ---
71 | 
72 | The advantage of `Tox <https://pypi.python.org/pypi/tox>`_ is that you can build and test the code against several versions of python.
73 | Of course, you need tox to be installed on your system.
74 | The configuration file is tox.ini, in the root of the project. There, you can change the python versions.
75 | 
76 | .. code-block:: sh
77 | 
78 |     tox # and nothing more :)
79 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexexpression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | from __future__ import unicode_literals
 6 | import unittest
 7 | 
 8 | from bibtexparser.bibtexexpression import BibtexExpression
 9 | 
10 | 
11 | class TestBibtexExpression(unittest.TestCase):
12 | 
13 |     def setUp(self):
14 |         self.expr = BibtexExpression()
15 | 
16 |     def test_minimal(self):
17 |         result = self.expr.entry.parseString('@journal{key, name = 123 }')
18 |         self.assertEqual(result.get('EntryType'), 'journal')
19 |         self.assertEqual(result.get('Key'), 'key')
20 |         self.assertEqual(result.get('Fields'), {'name': '123'})
21 | 
22 |     def test_capital_type(self):
23 |         result = self.expr.entry.parseString('@JOURNAL{key, name = 123 }')
24 |         self.assertEqual(result.get('EntryType'), 'JOURNAL')
25 | 
26 |     def test_capital_key(self):
27 |         result = self.expr.entry.parseString('@journal{KEY, name = 123 }')
28 |         self.assertEqual(result.get('Key'), 'KEY')
29 | 
30 |     def test_braced(self):
31 |         result = self.expr.entry.parseString('@journal{key, name = {abc} }')
32 |         self.assertEqual(result.get('Fields'), {'name': 'abc'})
33 | 
34 |     def test_braced_with_new_line(self):
35 |         result = self.expr.entry.parseString(
36 |             '@journal{key, name = {abc\ndef} }')
37 |         self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})
38 | 
39 |     def test_braced_unicode(self):
40 |         result = self.expr.entry.parseString(
41 |             '@journal{key, name = {àbcđéf} }')
42 |         self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})
43 | 
44 |     def test_quoted(self):
45 |         result = self.expr.entry.parseString('@journal{key, name = "abc" }')
46 |         self.assertEqual(result.get('Fields'), {'name': 'abc'})
47 | 
48 |     def test_quoted_with_new_line(self):
49 |         result = self.expr.entry.parseString(
50 |             '@journal{key, name = "abc\ndef" }')
51 |         self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})
52 | 
53 |     def test_quoted_with_unicode(self):
54 |         result = self.expr.entry.parseString(
55 |             '@journal{key, name = "àbcđéf" }')
56 |         self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})
57 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_latexenc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This program is free software: you can redistribute it and/or modify
 4 | #it under the terms of the GNU General Public License as published by
 5 | #the Free Software Foundation, either version 3 of the License, or
 6 | #(at your option) any later version.
 7 | #
 8 | #This program is distributed in the hope that it will be useful,
 9 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | #GNU General Public License for more details.
12 | #
13 | #You should have received a copy of the GNU General Public License
14 | #along with this program.  If not, see <http://www.gnu.org/licenses/>
15 | #
16 | # Author: Francois Boulogne <fboulogne at sciunto dot org>, 2012
17 | 
18 | from __future__ import unicode_literals
19 | import unittest
20 | 
21 | from bibtexparser.latexenc import *
22 | 
23 | class TestLatexConverter(unittest.TestCase):
24 | 
25 |     def test_accent(self):
26 |         string = 'à é è ö'
27 |         result = string_to_latex(string)
28 |         expected = "{\`a} {\\\'e} {\`e} {\\\"o}"
29 |         self.assertEqual(result, expected)
30 | 
31 |     def test_special_caracter(self):
32 |         string = 'ç'
33 |         result = string_to_latex(string)
34 |         expected = '{\c c}'
35 |         self.assertEqual(result, expected)
36 | 
37 | class TestUppercaseProtection(unittest.TestCase):
38 | 
39 |     def test_uppercase(self):
40 |         string = 'An upPer Case A'
41 |         result = protect_uppercase(string)
42 |         expected = '{A}n up{P}er {C}ase {A}'
43 |         self.assertEqual(result, expected)
44 | 
45 |     def test_lowercase(self):
46 |         string = 'a'
47 |         result = protect_uppercase(string)
48 |         expected = 'a'
49 |         self.assertEqual(result, expected)
50 | 
51 |     def test_alreadyprotected(self):
52 |         string = '{A}, m{A}gnificient, it is a {A}...'
53 |         result = protect_uppercase(string)
54 |         expected = '{A}, m{A}gnificient, it is a {A}...'
55 |         self.assertEqual(result, expected)
56 | 
57 |     def test_traps(self):
58 |         string = '{A, m{Agnificient, it is a {A'
59 |         result = protect_uppercase(string)
60 |         expected = '{A, m{Agnificient, it is a {A'
61 |         self.assertEqual(result, expected)
62 | 
63 |     def test_traps2(self):
64 |         string = 'A}, mA}gnificient, it is a A}'
65 |         result = protect_uppercase(string)
66 |         expected = 'A}, mA}gnificient, it is a A}'
67 |         self.assertEqual(result, expected)
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtex_strings.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import codecs
 3 | import bibtexparser
 4 | from bibtexparser.bibdatabase import BibDatabase
 5 | from bibtexparser.bparser import BibTexParser
 6 | from collections import OrderedDict
 7 | 
 8 | 
 9 | class TestStringParse(unittest.TestCase):
10 |     def test_single_string_parse_count(self):
11 |         bibtex_str = '@string{name1 = "value1"}\n\n'
12 |         bib_database = bibtexparser.loads(bibtex_str)
13 |         self.assertEqual(len(bib_database.strings), 1)
14 | 
15 |     def test_multiple_string_parse_count(self):
16 |         bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
17 |         bib_database = bibtexparser.loads(bibtex_str)
18 |         self.assertEqual(len(bib_database.strings), 2)
19 | 
20 |     def test_single_string_parse(self):
21 |         bibtex_str = '@string{name1 = "value1"}\n\n'
22 |         bib_database = bibtexparser.loads(bibtex_str)
23 |         expected = {'name1': 'value1'}
24 |         self.assertEqual(bib_database.strings, expected)
25 | 
26 |     def test_multiple_string_parse(self):
27 |         bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
28 |         bib_database = bibtexparser.loads(bibtex_str)
29 |         expected = OrderedDict()
30 |         expected['name1'] = 'value1'
31 |         expected['name2'] = 'value2'
32 |         self.assertEqual(bib_database.strings, expected)
33 | 
34 |     def test_string_braces(self):
35 |         with codecs.open('bibtexparser/tests/data/string.bib', 'r', 'utf-8') as bibfile:
36 |             bib = BibTexParser(bibfile.read())
37 |             res = bib.get_entry_list()
38 |         expected = [{'author': 'Sang Kil Cha and Maverick Woo and David Brumley',
39 | 		     'ID': 'cha:oakland15',
40 | 		     'year': '2015',
41 | 		     'booktitle': 'Proceedings of the {IEEE} Symposium on Security and Privacy',
42 | 		     'title': '{Program-Adaptive Mutational Fuzzing}',
43 | 	             'ENTRYTYPE': 'inproceedings',
44 | 		     'pages': '725--741'
45 |                      }]
46 |         self.assertEqual(res, expected)
47 | 
48 | 
49 | 
50 | class TestStringWrite(unittest.TestCase):
51 |     def test_single_string_write(self):
52 |         bib_database = BibDatabase()
53 |         bib_database.strings['name1'] = 'value1'
54 |         result = bibtexparser.dumps(bib_database)
55 |         expected = '@string{name1 = "value1"}\n\n'
56 |         self.assertEqual(result, expected)
57 | 
58 |     def test_multiple_string_write(self):
59 |         bib_database = BibDatabase()
60 |         bib_database.strings['name1'] = 'value1'
61 |         bib_database.strings['name2'] = 'value2'  # Order is important!
62 |         result = bibtexparser.dumps(bib_database)
63 |         expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
64 |         self.assertEqual(result, expected)
65 | 


--------------------------------------------------------------------------------
/docs/source/logging.rst:
--------------------------------------------------------------------------------
 1 | How to report a bug?
 2 | ====================
 3 | 
 4 | Bugs can be reported on github or via private communications.
 5 | 
 6 | Steps
 7 | -----
 8 | 
 9 | 1. Make a minimal code, which reproduces the problem.
10 | 2. Provide the code, the bibtex (if necessary), the output.
11 | 3. For a parsing error, provide the expected output.
12 | 4. For a crash, set the logger to the debug level (see below).
13 | 
14 | If you want to provide a patch (that's wonderful! thank you), please, take few minutes to write a unit test that fails without your contribution.
15 | 
16 | Logging module to understand failures
17 | -------------------------------------
18 | 
19 | Syntax of bibtex files is simple but there are many possible variations. This library probably fails for some of them.
20 | 
21 | Bibtexparser includes a large quantity of debug messages which helps to understand why and where the parser fails.
22 | The example below can be used to print these messages in the console.
23 | 
24 | .. code-block:: python
25 | 
26 |     import logging
27 |     import logging.config
28 | 
29 |     logger = logging.getLogger(__name__)
30 | 
31 |     logging.config.dictConfig({
32 |         'version': 1,
33 |         'disable_existing_loggers': False,
34 |         'formatters': {
35 |             'standard': {
36 |                 'format': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s:%(lineno)d: %(message)s'
37 |             },
38 |         },
39 |         'handlers': {
40 |             'default': {
41 |                 'level':'DEBUG',
42 |                 'formatter': 'standard',
43 |                 'class':'logging.StreamHandler',
44 |             },
45 |         },
46 |         'loggers': {
47 |             '': {
48 |                 'handlers': ['default'],
49 |                 'level': 'DEBUG',
50 |                 'formatter': 'standard',
51 |                 'propagate': True
52 |             }
53 |         }
54 |     })
55 | 
56 | 
57 |     if __name__ == '__main__':
58 |         bibtex = """@ARTICLE{Cesar2013,
59 |           author = {Jean César},
60 |           title = {An amazing title},
61 |           year = {2013},
62 |           month = jan,
63 |           volume = {12},
64 |           pages = {12--23},
65 |           journal = {Nice Journal},
66 |           abstract = {This is an abstract. This line should be long enough to test
67 |         	 multilines...},
68 |           comments = {A comment},
69 |           keywords = {keyword1, keyword2},
70 |         }
71 |         """
72 | 
73 |         with open('/tmp/bibtex.bib', 'w') as bibfile:
74 |             bibfile.write(bibtex)
75 | 
76 |         from bibtexparser.bparser import BibTexParser
77 | 
78 |         with open('/tmp/bibtex.bib', 'r') as bibfile:
79 |             bp = BibTexParser(bibfile.read())
80 |             print(bp.get_entry_list())
81 | 
82 | I recommend you to use this output if you would like to report a bug.
83 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bwriter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Francois Boulogne
 4 | # License:
 5 | 
 6 | from __future__ import unicode_literals
 7 | 
 8 | import unittest
 9 | import sys
10 | 
11 | from bibtexparser.bparser import BibTexParser
12 | from bibtexparser.bwriter import BibTexWriter, to_bibtex
13 | from bibtexparser.customization import author
14 | 
15 | 
16 | class TestBibtexWriterList(unittest.TestCase):
17 | 
18 |     ###########
19 |     # ARTICLE
20 |     ###########
21 |     def test_article(self):
22 |         with open('bibtexparser/tests/data/article.bib', 'r') as bibfile:
23 |             bib = BibTexParser(bibfile.read())
24 | 
25 |         with open('bibtexparser/tests/data/article_output.bib', 'r') as bibfile:
26 |             expected = bibfile.read()
27 |         result = to_bibtex(bib)
28 |         if not sys.version_info >= (3, 0):
29 |             if isinstance(result, unicode):
30 |                 result = result.encode('utf-8')
31 |         self.maxDiff = None
32 |         self.assertEqual(expected, result)
33 | 
34 |     ###########
35 |     # BOOK
36 |     ###########
37 |     def test_book(self):
38 |         with open('bibtexparser/tests/data/book.bib', 'r') as bibfile:
39 |             bib = BibTexParser(bibfile.read())
40 | 
41 |         with open('bibtexparser/tests/data/book_output.bib', 'r') as bibfile:
42 |             expected = bibfile.read()
43 |         result = to_bibtex(bib)
44 |         self.maxDiff = None
45 |         self.assertEqual(expected, result)
46 | 
47 |     ###########
48 |     # COMMA FIRST
49 |     ###########
50 |     def test_comma_first(self):
51 |         with open('bibtexparser/tests/data/book.bib', 'r') as bibfile:
52 |             bib = BibTexParser(bibfile.read())
53 | 
54 |         with open('bibtexparser/tests/data/book_comma_first.bib', 'r') as bibfile:
55 |             expected = bibfile.read()
56 |         writer = BibTexWriter()
57 |         writer.indent = '   '
58 |         writer.comma_first = True
59 |         result = writer.write(bib)
60 |         self.maxDiff = None
61 |         self.assertEqual(expected, result)
62 | 
63 |     ###########
64 |     # MULTIPLE
65 |     ###########
66 |     def test_multiple(self):
67 |         with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile:
68 |             bib = BibTexParser(bibfile.read())
69 | 
70 |         with open('bibtexparser/tests/data/multiple_entries_output.bib', 'r') as bibfile:
71 |             expected = bibfile.read()
72 |         result = to_bibtex(bib)
73 |         self.maxDiff = None
74 |         self.assertEqual(expected, result)
75 | 
76 |     ###########
77 |     # Exception
78 |     ###########
79 |     def test_exception_typeerror(self):
80 |         with open('bibtexparser/tests/data/article.bib', 'r') as bibfile:
81 |             bib = BibTexParser(bibfile.read(), customization=author)
82 |         self.assertRaises(TypeError, to_bibtex, bib)
83 | 
84 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
  1 | v0.XXX
  2 | ======
  3 | 
  4 | * ENH: we use pyparsing (#64) by Olivier Magin.
  5 | * DOC: Refactoring of the tutorial
  6 | * DOC: include docs/ in manifest
  7 | * API: fix spelling "homogenize". Affects customization and bparser
  8 | * API: BibtexParser: homogenize_fields is now False by default (#94)
  9 | 
 10 | v0.6.2
 11 | ======
 12 | 
 13 | * ENH: customization: handle various hyphens (#76).
 14 | * ENH: writer: all values according to this maximal key width (#83).
 15 | * END: writer: display_order allows to have custom ordering of the fields of
 16 | each entry instead of just alphabetical (#83) by cschaffner.
 17 | * FIX: bad support of braces in string (#90) by sangkilc.
 18 | 
 19 | v0.6.1
 20 | ======
 21 | 
 22 | * API: Previous type and id keywords which are automatically added to
 23 | the dictionnary are now ENTRYTYPE and ID, respectively (#42).
 24 | * ENH: comma first syntax support (#49) by Michal Grochmal.
 25 | 
 26 | v0.6.0
 27 | ======
 28 | 
 29 | * DOC: clarify version number
 30 | * ENH: support for bibtex with leading spaces (#34)
 31 | * FIX: if title contained multiples words in braces
 32 | * ENH: code refactoring (#33)
 33 | * ENH: support for comment blocks (#32)
 34 | * ENH: Removed comma after last key-value pair by faph (#28)
 35 | * ENH: optional keys sanitising by faph (#29)
 36 | * FIX: missing coma at the end of a record (#24)
 37 | * DOC: clarify the usecase of to_bibtex
 38 | * FIX: raise exception for TypeError in to_bibtex (#22)
 39 | 
 40 | v0.5.5
 41 | ======
 42 | 
 43 | * ENH: json output
 44 | * ENH: Add (optional) support for non-standard entry types by Georg C. Brückmann
 45 | * FIX: protect uppercase only on unprotected characters. #18
 46 | * ENH: string replacement by Uwe Schmidt (#13 #20)
 47 | 
 48 | v0.5.4
 49 | ======
 50 | 
 51 | * ENH: json output
 52 | * API: enhance the naming choice for bwriter
 53 | 
 54 | v0.5.3
 55 | ======
 56 | 
 57 | * ENH: add writer (#16), thanks to Lucas Verney
 58 | * MAINT: Remove non-standard --BREAK-- command detection
 59 | * FIX: missing strip() (#14) by Sebastien Diemer
 60 | * API breakage: the parser takes data instead of a filehandler
 61 | 
 62 | v0.5.2
 63 | ======
 64 | 
 65 | * ENH: fix tests latex encoding
 66 | * ENH: support @comment @preambule (escaped)
 67 | * ENH: check that bibtype belongs to a known type
 68 | 
 69 | v0.5.1
 70 | ======
 71 | 
 72 | * ENH: split keywords with various separators
 73 | * ENH: get_entry_dict make the dict once
 74 | * ENH: add messages with logging
 75 | * FIX: fix unittest related to braces detection
 76 | 
 77 | v0.5
 78 | ====
 79 | 
 80 | * Permission from original authors and OKFN to use LGPLv3
 81 | * ENH: Python 2.7 support
 82 | * FIX: issue related to accents
 83 | 
 84 | v0.4
 85 | ====
 86 | 
 87 | * ENH: Transformations on characters are now considered as a customization
 88 | * ENH: New customization: clean latex style
 89 | * FIX: issue related to name processing
 90 | 
 91 | v0.3
 92 | ====
 93 | 
 94 | * DOC: moved to readsthedoc
 95 | * DOC: several improvements
 96 | * MAINT: separate customizations
 97 | 
 98 | v0.2
 99 | ====
100 | 
101 | * TEST: initialized
102 | * DOC: initialized
103 | 
104 | v0.1
105 | ====
106 | 
107 | * First preliminary release
108 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexparser.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import bibtexparser
 3 | from bibtexparser.bparser import BibTexParser
 4 | from tempfile import TemporaryFile
 5 | 
 6 | 
 7 | class TestBibtexParserParserMethods(unittest.TestCase):
 8 |     input_file_path = 'bibtexparser/tests/data/book.bib'
 9 |     entries_expected = [{'ENTRYTYPE': 'book',
10 |                          'year': '1987',
11 |                          'edition': '2',
12 |                          'publisher': 'Wiley Edition',
13 |                          'ID': 'Bird1987',
14 |                          'volume': '1',
15 |                          'title': 'Dynamics of Polymeric Liquid',
16 |                          'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'
17 |                         }]
18 | 
19 |     def test_parse_immediately(self):
20 |         with open(self.input_file_path) as bibtex_file:
21 |             bibtex_str = bibtex_file.read()
22 |         bibtex_database = BibTexParser(bibtex_str)
23 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
24 | 
25 |     def test_parse_str(self):
26 |         parser = BibTexParser()
27 |         with open(self.input_file_path) as bibtex_file:
28 |             bibtex_str = bibtex_file.read()
29 |         bibtex_database = parser.parse(bibtex_str)
30 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
31 | 
32 |     def test_parse_file(self):
33 |         parser = BibTexParser()
34 |         with open(self.input_file_path) as bibtex_file:
35 |             bibtex_database = parser.parse_file(bibtex_file)
36 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
37 | 
38 |     def test_parse_str_module(self):
39 |         with open(self.input_file_path) as bibtex_file:
40 |             bibtex_str = bibtex_file.read()
41 |         bibtex_database = bibtexparser.loads(bibtex_str)
42 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
43 | 
44 |     def test_parse_file_module(self):
45 |         with open(self.input_file_path) as bibtex_file:
46 |             bibtex_database = bibtexparser.load(bibtex_file)
47 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
48 | 
49 | 
50 | class TestBibtexparserWriteMethods(unittest.TestCase):
51 |     input_file_path = 'bibtexparser/tests/data/book.bib'
52 |     expected = \
53 | """@book{Bird1987,
54 |  author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
55 |  edition = {2},
56 |  publisher = {Wiley Edition},
57 |  title = {Dynamics of Polymeric Liquid},
58 |  volume = {1},
59 |  year = {1987}
60 | }
61 | 
62 | """
63 | 
64 |     def test_write_str(self):
65 |         with open(self.input_file_path) as bibtex_file:
66 |             bibtex_database = bibtexparser.load(bibtex_file)
67 |         result = bibtexparser.dumps(bibtex_database)
68 |         self.assertEqual(result, self.expected)
69 | 
70 |     def test_write_file(self):
71 |         with open(self.input_file_path) as bibtex_file:
72 |             bibtex_database = bibtexparser.load(bibtex_file)
73 | 
74 |         with TemporaryFile(mode='w+') as bibtex_out_file:
75 |             bibtexparser.dump(bibtex_database, bibtex_out_file)
76 |             bibtex_out_file.seek(0)
77 |             bibtex_out_str = bibtex_out_file.read()
78 | 
79 |         self.assertEqual(bibtex_out_str, self.expected)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/bibtexparser/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | `BibTeX <http://en.wikipedia.org/wiki/BibTeX>`_ is a bibliographic data file format.
  3 | 
  4 | The :mod:`bibtexparser` module can parse BibTeX files and write them. The API is similar to the
  5 | :mod:`json` module. The parsed data is returned as a simple :class:`BibDatabase` object with the main attribute being
  6 | :attr:`entries` representing bibliographic sources such as books and journal articles.
  7 | 
  8 | The following functions provide a quick and basic way to manipulate a BibTeX file.
  9 | More advanced features are also available in this module.
 10 | 
 11 | Parsing a file is as simple as::
 12 | 
 13 |     import bibtexparser
 14 |     with open('bibtex.bib') as bibtex_file:
 15 |        bibtex_database = bibtexparser.load(bibtex_file)
 16 | 
 17 | And writing::
 18 | 
 19 |     import bibtexparser
 20 |     with open('bibtex.bib', 'w') as bibtex_file:
 21 |         bibtexparser.dump(bibtex_database, bibtex_file)
 22 | 
 23 | """
 24 | __all__ = [
 25 |     'loads', 'load', 'dumps', 'dump', 'bibdatabase',
 26 |     'bparser', 'bwriter', 'bibtexexpression', 'latexenc', 'customization',
 27 | ]
 28 | __version__ = '0.6.2'
 29 | 
 30 | from . import bibdatabase, bibtexexpression, bparser, bwriter, latexenc, customization
 31 | 
 32 | 
 33 | def loads(bibtex_str, parser=None):
 34 |     """
 35 |     Load :class:`BibDatabase` object from a string
 36 | 
 37 |     :param bibtex_str: input BibTeX string to be parsed
 38 |     :type bibtex_str: str or unicode
 39 |     :param parser: custom parser to use (optional)
 40 |     :type parser: BibTexParser
 41 |     :returns: bibliographic database object
 42 |     :rtype: BibDatabase
 43 |     """
 44 |     if parser is None:
 45 |         parser = bparser.BibTexParser()
 46 |     return parser.parse(bibtex_str)
 47 | 
 48 | 
 49 | def load(bibtex_file, parser=None):
 50 |     """
 51 |     Load :class:`BibDatabase` object from a file
 52 | 
 53 |     :param bibtex_file: input file to be parsed
 54 |     :type bibtex_file: file
 55 |     :param parser: custom parser to use (optional)
 56 |     :type parser: BibTexParser
 57 |     :returns: bibliographic database object
 58 |     :rtype: BibDatabase
 59 | 
 60 |     Example::
 61 | 
 62 |         import bibtexparser
 63 |         with open('bibtex.bib') as bibtex_file:
 64 |            bibtex_database = bibtexparser.load(bibtex_file)
 65 | 
 66 |     """
 67 |     if parser is None:
 68 |         parser = bparser.BibTexParser()
 69 |     return parser.parse_file(bibtex_file)
 70 | 
 71 | 
 72 | def dumps(bib_database, writer=None):
 73 |     """
 74 |     Dump :class:`BibDatabase` object to a BibTeX string
 75 | 
 76 |     :param bib_database: bibliographic database object
 77 |     :type bib_database: BibDatabase
 78 |     :param writer: custom writer to use (optional) (not yet implemented)
 79 |     :type writer: BibTexWriter
 80 |     :returns: BibTeX string
 81 |     :rtype: unicode
 82 |     """
 83 |     if writer is None:
 84 |         writer = bwriter.BibTexWriter()
 85 |     return writer.write(bib_database)
 86 | 
 87 | 
 88 | def dump(bib_database, bibtex_file, writer=None):
 89 |     """
 90 |     Dump :class:`BibDatabase` object as a BibTeX text file
 91 | 
 92 |     :param bib_database: bibliographic database object
 93 |     :type bib_database: BibDatabase
 94 |     :param bibtex_file: file to write to
 95 |     :type bibtex_file: file
 96 |     :param writer: custom writer to use (optional) (not yet implemented)
 97 |     :type writer: BibTexWriter
 98 | 
 99 |     Example::
100 | 
101 |         import bibtexparser
102 |         with open('bibtex.bib', 'w') as bibtex_file:
103 |             bibtexparser.dump(bibtex_database, bibtex_file)
104 | 
105 |     """
106 |     if writer is None:
107 |         writer = bwriter.BibTexWriter()
108 |     bibtex_file.write(writer.write(bib_database))
109 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_customization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import unicode_literals
  5 | import unittest
  6 | 
  7 | from bibtexparser.customization import getnames, convert_to_unicode, homogenize_latex_encoding, page_double_hyphen, keyword
  8 | 
  9 | 
 10 | class TestBibtexParserMethod(unittest.TestCase):
 11 | 
 12 |     ###########
 13 |     # getnames
 14 |     ###########
 15 |     def test_getnames(self):
 16 |         names = ['Foo Bar',
 17 |                  'Foo B. Bar',
 18 |                  'F. B. Bar',
 19 |                  'F.B. Bar',
 20 |                  'F. Bar',
 21 |                  'Jean de Savigny',
 22 |                  'Jean la Tour',
 23 |                  'Jean le Tour',
 24 |                  'Mike ben Akar',
 25 |                  #'Jean de la Tour',
 26 |                  #'Johannes Diderik van der Waals',
 27 |                  ]
 28 |         result = getnames(names)
 29 |         expected = ['Bar, Foo',
 30 |                     'Bar, Foo B.',
 31 |                     'Bar, F. B.',
 32 |                     'Bar, F. B.',
 33 |                     'Bar, F.',
 34 |                     'de Savigny, Jean',
 35 |                     'la Tour, Jean',
 36 |                     'le Tour, Jean',
 37 |                     'ben Akar, Mike',
 38 |                     #'de la Tour, Jean',
 39 |                     #'van der Waals, Johannes Diderik',
 40 |                     ]
 41 |         self.assertEqual(result, expected)
 42 | 
 43 |     @unittest.skip('Bug #9')
 44 |     def test_getnames_braces(self):
 45 |         names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}']
 46 |         result = getnames(names)
 47 |         expected = ['Delgado de Molina, A.', 'Vigné, M.']
 48 |         self.assertEqual(result, expected)
 49 | 
 50 |     ###########
 51 |     # page_double_hyphen
 52 |     ###########
 53 |     def test_page_double_hyphen_alreadyOK(self):
 54 |         record = {'pages': '12--24'}
 55 |         result = page_double_hyphen(record)
 56 |         expected = record
 57 |         self.assertEqual(result, expected)
 58 | 
 59 |     def test_page_double_hyphen_simple(self):
 60 |         record = {'pages': '12-24'}
 61 |         result = page_double_hyphen(record)
 62 |         expected = {'pages': '12--24'}
 63 |         self.assertEqual(result, expected)
 64 | 
 65 |     def test_page_double_hyphen_space(self):
 66 |         record = {'pages': '12 - 24'}
 67 |         result = page_double_hyphen(record)
 68 |         expected = {'pages': '12--24'}
 69 |         self.assertEqual(result, expected)
 70 | 
 71 |     def test_page_double_hyphen_nothing(self):
 72 |         record = {'pages': '12 24'}
 73 |         result = page_double_hyphen(record)
 74 |         expected = {'pages': '12 24'}
 75 |         self.assertEqual(result, expected)
 76 | 
 77 |     ###########
 78 |     # convert to unicode
 79 |     ###########
 80 |     def test_convert_to_unicode(self):
 81 |         record = {'toto': '{\`a} \`{a}'}
 82 |         result = convert_to_unicode(record)
 83 |         expected = {'toto': 'à à'}
 84 |         self.assertEqual(result, expected)
 85 |         record = {'toto': '{\\"u} \\"{u}'}
 86 |         result = convert_to_unicode(record)
 87 |         expected = {'toto': 'ü ü'}
 88 |         self.assertEqual(result, expected)
 89 | 
 90 |     ###########
 91 |     # homogenize
 92 |     ###########
 93 |     def test_homogenize(self):
 94 |         record = {'toto': 'à {\`a} \`{a}'}
 95 |         result = homogenize_latex_encoding(record)
 96 |         expected = {'toto': '{\`a} {\`a} {\`a}'}
 97 |         self.assertEqual(result, expected)
 98 | 
 99 |     ###########
100 |     # keywords
101 |     ###########
102 |     def test_keywords(self):
103 |         record = {'keyword': "a b, a b , a b;a b ; a b, a b\n"}
104 |         result = keyword(record)
105 |         expected = {'keyword': ['a b'] * 6}
106 |         self.assertEqual(result, expected)
107 | 
108 | if __name__ == '__main__':
109 |     unittest.main()
110 | 


--------------------------------------------------------------------------------
/bibtexparser/bibdatabase.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import sys
  3 | 
  4 | 
  5 | if sys.version_info.major == 2:
  6 |     TEXT_TYPE = unicode
  7 | else:
  8 |     TEXT_TYPE = str
  9 | 
 10 | 
 11 | STANDARD_TYPES = set([
 12 |     'article',
 13 |     'book',
 14 |     'booklet',
 15 |     'conference',
 16 |     'inbook',
 17 |     'incollection',
 18 |     'inproceedings',
 19 |     'manual',
 20 |     'mastersthesis',
 21 |     'misc',
 22 |     'phdthesis',
 23 |     'proceedings',
 24 |     'techreport',
 25 |     'unpublished'])
 26 | COMMON_STRINGS = {
 27 |     'jan': 'January',
 28 |     'feb': 'February',
 29 |     'mar': 'March',
 30 |     'apr': 'April',
 31 |     'may': 'May',
 32 |     'jun': 'June',
 33 |     'jul': 'July',
 34 |     'aug': 'August',
 35 |     'sep': 'September',
 36 |     'oct': 'October',
 37 |     'nov': 'November',
 38 |     'dec': 'December',
 39 |     }
 40 | 
 41 | 
 42 | class BibDatabase(object):
 43 |     """
 44 |     Bibliographic database object that follows the data structure of a BibTeX file.
 45 |     """
 46 |     def __init__(self):
 47 |         #: List of BibTeX entries, for example `@book{...}`, `@article{...}`, etc. Each entry is a simple dict with
 48 |         #: BibTeX field-value pairs, for example `'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'` Each
 49 |         #: entry will always have the following dict keys (in addition to other BibTeX fields):
 50 |         #:
 51 |         #: * `ID` (BibTeX key)
 52 |         #: * `ENTRYTYPE` (entry type in lowercase, e.g. `book`, `article` etc.)
 53 |         self.entries = []
 54 |         self._entries_dict = {}
 55 |         #: List of BibTeX comment (`@comment{...}`) blocks.
 56 |         self.comments = []
 57 |         #: OrderedDict of BibTeX string definitions (`@string{...}`). In order of definition.
 58 |         self.strings = OrderedDict()  # Not sure if order is import, keep order just in case
 59 |         #: List of BibTeX preamble (`@preamble{...}`) blocks.
 60 |         self.preambles = []
 61 | 
 62 |     def load_common_strings(self):
 63 |         self.strings.update(COMMON_STRINGS)
 64 | 
 65 |     def get_entry_list(self):
 66 |         """Get a list of bibtex entries.
 67 | 
 68 |         :returns: BibTeX entries
 69 |         :rtype: list
 70 |         .. deprecated:: 0.5.6
 71 |            Use :attr:`entries` instead.
 72 |         """
 73 |         return self.entries
 74 | 
 75 |     @staticmethod
 76 |     def entry_sort_key(entry, fields):
 77 |         result = []
 78 |         for field in fields:
 79 |             result.append(TEXT_TYPE(entry.get(field, '')).lower())  # Sorting always as string
 80 |         return tuple(result)
 81 | 
 82 |     def get_entry_dict(self):
 83 |         """Return a dictionary of BibTeX entries.
 84 |         The dict key is the BibTeX entry key
 85 |         """
 86 |         # If the hash has never been made, make it
 87 |         if not self._entries_dict:
 88 |             for entry in self.entries:
 89 |                 self._entries_dict[entry['ID']] = entry
 90 |         return self._entries_dict
 91 | 
 92 |     entries_dict = property(get_entry_dict)
 93 | 
 94 |     def expand_string(self, name):
 95 |         try:
 96 |             return self.strings[name]
 97 |         except KeyError:
 98 |             raise(KeyError("Unknown string: {}.".format(name)))
 99 | 
100 | 
101 | class BibDataString(object):
102 |     """
103 |     Represents a bibtex string.
104 | 
105 |     This object enables mainting string expressions as list of strings
106 |     and BibDataString. Can be interpolated from Bibdatabase.
107 |     """
108 | 
109 |     def __init__(self, bibdatabase, name):
110 |         self._bibdatabase = bibdatabase
111 |         self.name = name.lower()
112 | 
113 |     def __repr__(self):
114 |         return "BibDataString({})".format(self.name.__repr__())
115 | 
116 |     def get_value(self):
117 |         """
118 |         Query value from string name.
119 | 
120 |         :returns: string
121 |         """
122 |         return self._bibdatabase.expand_string(self.name)
123 | 


--------------------------------------------------------------------------------
/bibtexparser/bwriter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # Author: Francois Boulogne
  4 | # License:
  5 | 
  6 | import logging
  7 | from bibtexparser.bibdatabase import BibDatabase
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | __all__ = ['BibTexWriter']
 12 | 
 13 | 
 14 | def to_bibtex(parsed):
 15 |     """
 16 |     Convenience function for backwards compatibility.
 17 |     """
 18 |     return BibTexWriter().write(parsed)
 19 | 
 20 | 
 21 | class BibTexWriter(object):
 22 |     """
 23 |     Writer to convert a :class:`BibDatabase` object to a string or file formatted as a BibTeX file.
 24 | 
 25 |     Example::
 26 | 
 27 |         from bibtexparser.bwriter import BibTexWriter
 28 | 
 29 |         bib_database = ...
 30 | 
 31 |         writer = BibTexWriter()
 32 |         writer.contents = ['comments', 'entries']
 33 |         writer.indent = '  '
 34 |         writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
 35 |         bibtex_str = bibtexparser.dumps(bib_database, writer)
 36 | 
 37 |     """
 38 | 
 39 |     _valid_contents = ['entries', 'comments', 'preambles', 'strings']
 40 | 
 41 |     def __init__(self):
 42 |         #: List of BibTeX elements to write, valid values are `entries`, `comments`, `preambles`, `strings`.
 43 |         self.contents = ['comments', 'preambles', 'strings', 'entries']
 44 |         #: Character(s) for indenting BibTeX field-value pairs. Default: single space.
 45 |         self.indent = ' '
 46 |         #: Align values. Determines the maximal number of characters used in any fieldname and aligns all values
 47 |         #    according to that by filling up with single spaces. Default: False
 48 |         self.align_values = False
 49 |         #: Characters(s) for separating BibTeX entries. Default: new line.
 50 |         self.entry_separator = '\n'
 51 |         #: Tuple of fields for ordering BibTeX entries. Set to `None` to disable sorting. Default: BibTeX key `('ID', )`.
 52 |         self.order_entries_by = ('ID', )
 53 |         #: Tuple of fields for display order in a single BibTeX entry. Fields not listed here will be displayed
 54 |         #: alphabetically at the end. Set to '[]' for alphabetical order. Default: '[]'
 55 |         self.display_order = []
 56 |         #: BibTeX syntax allows comma first syntax
 57 |         #: (common in functional languages), use this to enable
 58 |         #: comma first syntax as the bwritter output
 59 |         self.comma_first = False
 60 | 
 61 |         #: internal variable used if self.align_values = True
 62 |         self._max_field_width = 0
 63 | 
 64 | 
 65 |     def write(self, bib_database):
 66 |         """
 67 |         Converts a bibliographic database to a BibTeX-formatted string.
 68 | 
 69 |         :param bib_database: bibliographic database to be converted to a BibTeX string
 70 |         :type bib_database: BibDatabase
 71 |         :return: BibTeX-formatted string
 72 |         :rtype: str or unicode
 73 |         """
 74 |         bibtex = ''
 75 |         for content in self.contents:
 76 |             try:
 77 |                 # Add each element set (entries, comments)
 78 |                 bibtex += getattr(self, '_' + content + '_to_bibtex')(bib_database)
 79 |             except AttributeError:
 80 |                 logger.warning("BibTeX item '{}' does not exist and will not be written. Valid items are {}."
 81 |                                .format(content, self._valid_contents))
 82 |         return bibtex
 83 | 
 84 |     def _entries_to_bibtex(self, bib_database):
 85 |         bibtex = ''
 86 |         if self.order_entries_by:
 87 |             # TODO: allow sort field does not exist for entry
 88 |             entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by))
 89 |         else:
 90 |             entries = bib_database.entries
 91 | 
 92 |         if self.align_values:
 93 |             # determine maximum field width to be used
 94 |             widths = [max(map(len, entry.keys())) for entry in entries]
 95 |             self._max_field_width = max(widths)
 96 | 
 97 |         for entry in entries:
 98 |             bibtex += self._entry_to_bibtex(entry)
 99 |         return bibtex
100 | 
101 |     def _entry_to_bibtex(self, entry):
102 |         bibtex = ''
103 |         # Write BibTeX key
104 |         bibtex += '@' + entry['ENTRYTYPE'] + '{' + entry['ID']
105 | 
106 |         # create display_order of fields for this entry
107 |         # first those keys which are both in self.display_order and in entry.keys
108 |         display_order = [i for i in self.display_order if i in entry]
109 |         # then all the other fields sorted alphabetically
110 |         more_fields = [i for i in sorted(entry) if i not in self.display_order]
111 |         display_order += [i for i in sorted(entry) if i not in self.display_order]
112 | 
113 |         # Write field = value lines
114 |         for field in [i for i in display_order if i not in ['ENTRYTYPE', 'ID']]:
115 |             try:
116 |                 if self.comma_first:
117 |                     bibtex += "\n" + self.indent + ", " + "{0:<{1}}".format(field, self._max_field_width) + " = {" + entry[field] + "}"
118 |                 else:
119 |                     bibtex += ",\n" + self.indent + "{0:<{1}}".format(field, self._max_field_width) + " = {" + entry[field] + "}"
120 |             except TypeError:
121 |                 raise TypeError(u"The field %s in entry %s must be a string"
122 |                                 % (field, entry['ID']))
123 |         bibtex += "\n}\n" + self.entry_separator
124 |         return bibtex
125 | 
126 |     def _comments_to_bibtex(self, bib_database):
127 |         return ''.join(['@comment{{{0}}}\n{1}'.format(comment, self.entry_separator)
128 |                         for comment in bib_database.comments])
129 | 
130 |     def _preambles_to_bibtex(self, bib_database):
131 |         return ''.join(['@preamble{{"{0}"}}\n{1}'.format(preamble, self.entry_separator)
132 |                         for preamble in bib_database.preambles])
133 | 
134 |     def _strings_to_bibtex(self, bib_database):
135 |         return ''.join(['@string{{{0} = "{1}"}}\n{2}'.format(name, value, self.entry_separator)
136 |                         for name, value in bib_database.strings.items()])
137 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BibtexParser.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BibtexParser.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/BibtexParser"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BibtexParser"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_comments.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from bibtexparser.bparser import BibTexParser
  3 | from bibtexparser.bwriter import to_bibtex
  4 | 
  5 | 
  6 | """ The code is supposed to treat comments the following way:
  7 |     Each @Comment opens a comment that ends when something
  8 |     that is not a comment is encountered. More precisely
  9 |     this means a line starting with an @. Lines that are not
 10 |     parsed as anything else are also considered comments.
 11 |     If the comment starts and ends with braces, they are removed.
 12 | 
 13 |     Current issues:
 14 |         - a comment followed by a line starting with @smthing
 15 |         that is not a valid bibtex element are parsed separately,
 16 |         that is as two comments.
 17 |         - braces are either ignored or removed which is not easily
 18 |         predictable.
 19 | """
 20 | 
 21 | 
 22 | class TestParseComment(unittest.TestCase):
 23 | 
 24 |     def test_comment_count(self):
 25 |         with open('bibtexparser/tests/data/features.bib') as bibfile:
 26 |             bib = BibTexParser(bibfile.read())
 27 |         self.assertEqual(len(bib.comments), 3)
 28 | 
 29 |     def test_comment_list(self):
 30 |         with open('bibtexparser/tests/data/features.bib') as bibfile:
 31 |             bib = BibTexParser(bibfile.read())
 32 |         expected = ["ignore this line!",
 33 |                     "ignore this line too!",
 34 |                     "and ignore this line too!"]
 35 |         self.assertEqual(bib.comments, expected)
 36 | 
 37 |     def test_multiline_comments(self):
 38 |         with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile:
 39 |             bib = BibTexParser(bibfile.read())
 40 |         expected = [
 41 | """Lorem ipsum dolor sit amet,
 42 | consectetur adipisicing elit""",
 43 | """
 44 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
 45 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
 46 | 
 47 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
 48 | Excepteur sint occaecat cupidatat non proident.
 49 |  ,
 50 | """,
 51 | """
 52 | 
 53 | 
 54 | Sunt in culpa qui officia deserunt mollit anim id est laborum.
 55 | 
 56 | 
 57 | """,
 58 | ""
 59 |         ]
 60 |         self.maxDiff = None
 61 |         self.assertEqual(bib.comments, expected)
 62 | 
 63 |     def test_multiple_entries(self):
 64 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile:
 65 |             bparser = BibTexParser()
 66 |             bib = bparser.parse_file(bibfile)
 67 |         expected = ["",
 68 |                     "A comment"]
 69 |         self.assertEqual(bib.comments, expected)
 70 | 
 71 |     def test_comments_percentage(self):
 72 |         with open('bibtexparser/tests/data/comments_percentage.bib', 'r') as bibfile:
 73 |             bib = BibTexParser(bibfile.read())
 74 |             res = bib.get_entry_list()
 75 |         expected = [{'ENTRYTYPE': 'article',
 76 |                      'journal': 'Nice Journal',
 77 |                      'volume': '12',
 78 |                      'ID': 'Cesar2013',
 79 |                      'year': '2013',
 80 |                      'author': 'Jean Cesar',
 81 |                      'comments': 'A comment',
 82 |                      'keyword': 'keyword1, keyword2',
 83 |                      'title': 'An amazing title'
 84 |                      },
 85 |                     {'ENTRYTYPE': 'article',
 86 |                      'journal': 'Nice Journal',
 87 |                      'volume': '12',
 88 |                      'ID': 'Baltazar2013',
 89 |                      'year': '2013',
 90 |                      'author': 'Jean Baltazar',
 91 |                      'comments': 'A comment',
 92 |                      'keyword': 'keyword1, keyword2',
 93 |                      'title': 'An amazing title'
 94 |                      }]
 95 |         self.assertEqual(res, expected)
 96 | 
 97 |     def test_comments_percentage_nocoma(self):
 98 |         with open('bibtexparser/tests/data/comments_percentage_nolastcoma.bib', 'r') as bibfile:
 99 |             bib = BibTexParser(bibfile.read())
100 |             res = bib.get_entry_list()
101 |         expected = [{'ENTRYTYPE': 'article',
102 |                      'journal': 'Nice Journal',
103 |                      'volume': '12',
104 |                      'ID': 'Cesar2013',
105 |                      'year': '2013',
106 |                      'author': 'Jean Cesar',
107 |                      'comments': 'A comment',
108 |                      'keyword': 'keyword1, keyword2',
109 |                      'title': 'An amazing title'
110 |                      },
111 |                     {'ENTRYTYPE': 'article',
112 |                      'journal': 'Nice Journal',
113 |                      'volume': '12',
114 |                      'ID': 'Baltazar2013',
115 |                      'year': '2013',
116 |                      'author': 'Jean Baltazar',
117 |                      'comments': 'A comment',
118 |                      'keyword': 'keyword1, keyword2',
119 |                      'title': 'An amazing title'
120 |                      }]
121 |         self.assertEqual(res, expected)
122 | 
123 |     def test_no_newline(self):
124 |         comments = """This is a comment."""
125 |         expected = ["This is a comment."]
126 |         bib = BibTexParser(comments)
127 |         self.assertEqual(bib.comments, expected)
128 | 
129 |     def test_43(self):
130 |         comment = "@STRING{foo = \"bar\"}\n" \
131 |                   "This is a comment\n" \
132 |                   "This is a second comment."
133 |         expected = "This is a comment\nThis is a second comment."
134 |         bib = BibTexParser(comment)
135 |         self.assertEqual(bib.comments, [expected])
136 |         self.assertEqual(bib.strings, {'foo': 'bar'})
137 | 
138 |     def test_43_bis(self):
139 |         comment = "@STRING{foo = \"bar\"}\n" \
140 |                   "This is a comment\n" \
141 |                   "STRING{Baz = \"This should be interpreted as comment.\"}"
142 |         expected = "This is a comment\n" \
143 |                    "STRING{Baz = \"This should be interpreted as comment.\"}"
144 |         bib = BibTexParser(comment)
145 |         self.assertEqual(bib.comments, [expected])
146 |         self.assertEqual(bib.strings, {'foo': 'bar'})
147 | 
148 | 
149 | class TestWriteComment(unittest.TestCase):
150 |     def test_comment_write(self):
151 |         with open('bibtexparser/tests/data/comments_only.bib') as bibfile:
152 |             bib = BibTexParser(bibfile.read())
153 | 
154 |         with open('bibtexparser/tests/data/comments_only_output.bib') as bibfile:
155 |             expected = bibfile.read()
156 |         result = to_bibtex(bib)
157 |         self.assertEqual(result, expected)
158 | 
159 |     def test_multiline_comment_write(self):
160 |         with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile:
161 |             expected = bibfile.read()
162 | 
163 |         bib = BibTexParser(expected)
164 |         result = to_bibtex(bib)
165 |         self.assertEqual(result, expected)
166 | 
167 |     def test_multiple_entries(self):
168 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile:
169 |             bib = BibTexParser(bibfile.read())
170 |         with open('bibtexparser/tests/data/multiple_entries_and_comments_output.bib') as bibfile:
171 |             expected = bibfile.read()
172 |         result = to_bibtex(bib)
173 |         self.assertEqual(result, expected)
174 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexwriter.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import bibtexparser
  3 | from bibtexparser.bwriter import BibTexWriter
  4 | from bibtexparser.bibdatabase import BibDatabase
  5 | 
  6 | 
  7 | class TestBibTexWriter(unittest.TestCase):
  8 |     def test_content_entries_only(self):
  9 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 10 |             bib_database = bibtexparser.load(bibtex_file)
 11 |         writer = BibTexWriter()
 12 |         writer.contents = ['entries']
 13 |         result = bibtexparser.dumps(bib_database, writer)
 14 |         expected = \
 15 | """@book{Toto3000,
 16 |  author = {Toto, A and Titi, B},
 17 |  title = {A title}
 18 | }
 19 | 
 20 | @article{Wigner1938,
 21 |  author = {Wigner, E.},
 22 |  doi = {10.1039/TF9383400029},
 23 |  issn = {0014-7672},
 24 |  journal = {Trans. Faraday Soc.},
 25 |  owner = {fr},
 26 |  pages = {29--41},
 27 |  publisher = {The Royal Society of Chemistry},
 28 |  title = {The transition state method},
 29 |  volume = {34},
 30 |  year = {1938}
 31 | }
 32 | 
 33 | @book{Yablon2005,
 34 |  author = {Yablon, A.D.},
 35 |  publisher = {Springer},
 36 |  title = {Optical fiber fusion slicing},
 37 |  year = {2005}
 38 | }
 39 | 
 40 | """
 41 |         self.assertEqual(result, expected)
 42 | 
 43 |     def test_content_comment_only(self):
 44 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 45 |             bib_database = bibtexparser.load(bibtex_file)
 46 |         writer = BibTexWriter()
 47 |         writer.contents = ['comments']
 48 |         result = bibtexparser.dumps(bib_database, writer)
 49 |         expected = \
 50 | """@comment{}
 51 | 
 52 | @comment{A comment}
 53 | 
 54 | """
 55 |         self.assertEqual(result, expected)
 56 | 
 57 |     def test_indent(self):
 58 |         bib_database = BibDatabase()
 59 |         bib_database.entries = [{'ID': 'abc123',
 60 |                                  'ENTRYTYPE': 'book',
 61 |                                  'author': 'test'}]
 62 |         writer = BibTexWriter()
 63 |         writer.indent = '  '
 64 |         result = bibtexparser.dumps(bib_database, writer)
 65 |         expected = \
 66 | """@book{abc123,
 67 |   author = {test}
 68 | }
 69 | 
 70 | """
 71 |         self.assertEqual(result, expected)
 72 | 
 73 |     def test_align(self):
 74 |         bib_database = BibDatabase()
 75 |         bib_database.entries = [{'ID': 'abc123',
 76 |                                  'ENTRYTYPE': 'book',
 77 |                                  'author': 'test',
 78 |                                  'thisisaverylongkey': 'longvalue'}]
 79 |         writer = BibTexWriter()
 80 |         writer.align_values = True
 81 |         result = bibtexparser.dumps(bib_database, writer)
 82 |         expected = \
 83 | """@book{abc123,
 84 |  author             = {test},
 85 |  thisisaverylongkey = {longvalue}
 86 | }
 87 | 
 88 | """
 89 |         self.assertEqual(result, expected)
 90 | 
 91 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 92 |             bib_database = bibtexparser.load(bibtex_file)
 93 |         writer = BibTexWriter()
 94 |         writer.contents = ['entries']
 95 |         writer.align_values = True
 96 |         result = bibtexparser.dumps(bib_database, writer)
 97 |         expected = \
 98 | """@book{Toto3000,
 99 |  author    = {Toto, A and Titi, B},
100 |  title     = {A title}
101 | }
102 | 
103 | @article{Wigner1938,
104 |  author    = {Wigner, E.},
105 |  doi       = {10.1039/TF9383400029},
106 |  issn      = {0014-7672},
107 |  journal   = {Trans. Faraday Soc.},
108 |  owner     = {fr},
109 |  pages     = {29--41},
110 |  publisher = {The Royal Society of Chemistry},
111 |  title     = {The transition state method},
112 |  volume    = {34},
113 |  year      = {1938}
114 | }
115 | 
116 | @book{Yablon2005,
117 |  author    = {Yablon, A.D.},
118 |  publisher = {Springer},
119 |  title     = {Optical fiber fusion slicing},
120 |  year      = {2005}
121 | }
122 | 
123 | """
124 |         self.assertEqual(result, expected)
125 | 
126 | 
127 |     def test_entry_separator(self):
128 |         bib_database = BibDatabase()
129 |         bib_database.entries = [{'ID': 'abc123',
130 |                                  'ENTRYTYPE': 'book',
131 |                                  'author': 'test'}]
132 |         writer = BibTexWriter()
133 |         writer.entry_separator = ''
134 |         result = bibtexparser.dumps(bib_database, writer)
135 |         expected = \
136 | """@book{abc123,
137 |  author = {test}
138 | }
139 | """
140 |         self.assertEqual(result, expected)
141 | 
142 |     def test_display_order(self):
143 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
144 |             bib_database = bibtexparser.load(bibtex_file)
145 |         writer = BibTexWriter()
146 |         writer.contents = ['entries']
147 |         writer.display_order = ['year', 'publisher', 'title']
148 |         result = bibtexparser.dumps(bib_database, writer)
149 |         expected = \
150 | """@book{Toto3000,
151 |  title = {A title},
152 |  author = {Toto, A and Titi, B}
153 | }
154 | 
155 | @article{Wigner1938,
156 |  year = {1938},
157 |  publisher = {The Royal Society of Chemistry},
158 |  title = {The transition state method},
159 |  author = {Wigner, E.},
160 |  doi = {10.1039/TF9383400029},
161 |  issn = {0014-7672},
162 |  journal = {Trans. Faraday Soc.},
163 |  owner = {fr},
164 |  pages = {29--41},
165 |  volume = {34}
166 | }
167 | 
168 | @book{Yablon2005,
169 |  year = {2005},
170 |  publisher = {Springer},
171 |  title = {Optical fiber fusion slicing},
172 |  author = {Yablon, A.D.}
173 | }
174 | 
175 | """
176 |         self.assertEqual(result, expected)
177 | 
178 | 
179 | class TestEntrySorting(unittest.TestCase):
180 |     bib_database = BibDatabase()
181 |     bib_database.entries = [{'ID': 'b',
182 |                              'ENTRYTYPE': 'article'},
183 |                             {'ID': 'c',
184 |                              'ENTRYTYPE': 'book'},
185 |                             {'ID': 'a',
186 |                              'ENTRYTYPE': 'book'}]
187 | 
188 |     def test_sort_default(self):
189 |         result = bibtexparser.dumps(self.bib_database)
190 |         expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
191 |         self.assertEqual(result, expected)
192 | 
193 |     def test_sort_none(self):
194 |         writer = BibTexWriter()
195 |         writer.order_entries_by = None
196 |         result = bibtexparser.dumps(self.bib_database, writer)
197 |         expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
198 |         self.assertEqual(result, expected)
199 | 
200 |     def test_sort_id(self):
201 |         writer = BibTexWriter()
202 |         writer.order_entries_by = ('ID', )
203 |         result = bibtexparser.dumps(self.bib_database, writer)
204 |         expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
205 |         self.assertEqual(result, expected)
206 | 
207 |     def test_sort_type(self):
208 |         writer = BibTexWriter()
209 |         writer.order_entries_by = ('ENTRYTYPE', )
210 |         result = bibtexparser.dumps(self.bib_database, writer)
211 |         expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
212 |         self.assertEqual(result, expected)
213 | 
214 |     def test_sort_type_id(self):
215 |         writer = BibTexWriter()
216 |         writer.order_entries_by = ('ENTRYTYPE', 'ID')
217 |         result = bibtexparser.dumps(self.bib_database, writer)
218 |         expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n"
219 |         self.assertEqual(result, expected)
220 | 
221 |     def test_sort_missing_field(self):
222 |         bib_database = BibDatabase()
223 |         bib_database.entries = [{'ID': 'b',
224 |                                  'ENTRYTYPE': 'article',
225 |                                  'year': '2000'},
226 |                                 {'ID': 'c',
227 |                                  'ENTRYTYPE': 'book',
228 |                                  'year': '2010'},
229 |                                 {'ID': 'a',
230 |                                  'ENTRYTYPE': 'book'}]
231 |         writer = BibTexWriter()
232 |         writer.order_entries_by = ('year', )
233 |         result = bibtexparser.dumps(bib_database, writer)
234 |         expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
235 |         self.assertEqual(result, expected)
236 | 
237 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # BibtexParser documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Aug  1 13:30:23 2013.
  6 | #
  7 | # This file is execfile()d with the current directory set to its containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys, os
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | sys.path.insert(0, os.path.abspath('../..'))
 21 | sys.path.insert(0, os.path.abspath('../../bibtexparser'))
 22 | 
 23 | # -- General configuration -----------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be extensions
 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode']
 31 | 
 32 | # Add any paths that contain templates here, relative to this directory.
 33 | templates_path = ['_templates']
 34 | 
 35 | # The suffix of source filenames.
 36 | source_suffix = '.rst'
 37 | 
 38 | # The encoding of source files.
 39 | #source_encoding = 'utf-8-sig'
 40 | 
 41 | # The master toctree document.
 42 | master_doc = 'index'
 43 | 
 44 | # General information about the project.
 45 | project = 'BibtexParser'
 46 | copyright = '2013-2016, F. Boulogne and other contributors'
 47 | 
 48 | # The version info for the project you're documenting, acts as replacement for
 49 | # |version| and |release|, also used in various other places throughout the
 50 | # built documents.
 51 | #
 52 | try:
 53 |     import bibtexparser as bp
 54 |     # The short X.Y version.
 55 |     version = bp.__version__
 56 |     # The full version, including alpha/beta/rc tags.
 57 |     release = bp.__version__
 58 | except ImportError:
 59 |     version = 'latest'
 60 |     release = 'latest'
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | #today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | #today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = []
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all documents.
 77 | #default_role = None
 78 | 
 79 | # If true, '()' will be appended to :func: etc. cross-reference text.
 80 | #add_function_parentheses = True
 81 | 
 82 | # If true, the current module name will be prepended to all description
 83 | # unit titles (such as .. function::).
 84 | #add_module_names = True
 85 | 
 86 | # If true, sectionauthor and moduleauthor directives will be shown in the
 87 | # output. They are ignored by default.
 88 | #show_authors = False
 89 | 
 90 | # The name of the Pygments (syntax highlighting) style to use.
 91 | pygments_style = 'sphinx'
 92 | 
 93 | # A list of ignored prefixes for module index sorting.
 94 | #modindex_common_prefix = []
 95 | 
 96 | # -- Options for HTML output ---------------------------------------------------
 97 | 
 98 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 99 | # a list of builtin themes.
100 | html_theme = 'default'
101 | 
102 | # Theme options are theme-specific and customize the look and feel of a theme
103 | # further.  For a list of options available for each theme, see the
104 | # documentation.
105 | #html_theme_options = {}
106 | 
107 | # Add any paths that contain custom themes here, relative to this directory.
108 | #html_theme_path = []
109 | 
110 | # The name for this set of Sphinx documents.  If None, it defaults to
111 | # "<project> v<release> documentation".
112 | #html_title = None
113 | 
114 | # A shorter title for the navigation bar.  Default is the same as html_title.
115 | #html_short_title = None
116 | 
117 | # The name of an image file (relative to this directory) to place at the top
118 | # of the sidebar.
119 | #html_logo = None
120 | 
121 | # The name of an image file (within the static path) to use as favicon of the
122 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
123 | # pixels large.
124 | #html_favicon = None
125 | 
126 | # Add any paths that contain custom static files (such as style sheets) here,
127 | # relative to this directory. They are copied after the builtin static files,
128 | # so a file named "default.css" will overwrite the builtin "default.css".
129 | #html_static_path = ['_static']
130 | 
131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
132 | # using the given strftime format.
133 | #html_last_updated_fmt = '%b %d, %Y'
134 | 
135 | # If true, SmartyPants will be used to convert quotes and dashes to
136 | # typographically correct entities.
137 | #html_use_smartypants = True
138 | 
139 | # Custom sidebar templates, maps document names to template names.
140 | #html_sidebars = {}
141 | 
142 | # Additional templates that should be rendered to pages, maps page names to
143 | # template names.
144 | #html_additional_pages = {}
145 | 
146 | # If false, no module index is generated.
147 | #html_domain_indices = True
148 | 
149 | # If false, no index is generated.
150 | #html_use_index = True
151 | 
152 | # If true, the index is split into individual pages for each letter.
153 | #html_split_index = False
154 | 
155 | # If true, links to the reST sources are added to the pages.
156 | #html_show_sourcelink = True
157 | 
158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
159 | #html_show_sphinx = True
160 | 
161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
162 | #html_show_copyright = True
163 | 
164 | # If true, an OpenSearch description file will be output, and all pages will
165 | # contain a <link> tag referring to it.  The value of this option must be the
166 | # base URL from which the finished HTML is served.
167 | #html_use_opensearch = ''
168 | 
169 | # This is the file name suffix for HTML files (e.g. ".xhtml").
170 | #html_file_suffix = None
171 | 
172 | # Output file base name for HTML help builder.
173 | htmlhelp_basename = 'BibtexParserdoc'
174 | 
175 | 
176 | # -- Options for LaTeX output --------------------------------------------------
177 | 
178 | latex_elements = {
179 | # The paper size ('letterpaper' or 'a4paper').
180 | #'papersize': 'letterpaper',
181 | 
182 | # The font size ('10pt', '11pt' or '12pt').
183 | #'pointsize': '10pt',
184 | 
185 | # Additional stuff for the LaTeX preamble.
186 | #'preamble': '',
187 | }
188 | 
189 | # Grouping the document tree into LaTeX files. List of tuples
190 | # (source start file, target name, title, author, documentclass [howto/manual]).
191 | latex_documents = [
192 |   ('index', 'BibtexParser.tex', 'BibtexParser Documentation',
193 |    'F. Boulogne', 'manual'),
194 | ]
195 | 
196 | # The name of an image file (relative to this directory) to place at the top of
197 | # the title page.
198 | #latex_logo = None
199 | 
200 | # For "manual" documents, if this is true, then toplevel headings are parts,
201 | # not chapters.
202 | #latex_use_parts = False
203 | 
204 | # If true, show page references after internal links.
205 | #latex_show_pagerefs = False
206 | 
207 | # If true, show URL addresses after external links.
208 | #latex_show_urls = False
209 | 
210 | # Documents to append as an appendix to all manuals.
211 | #latex_appendices = []
212 | 
213 | # If false, no module index is generated.
214 | #latex_domain_indices = True
215 | 
216 | 
217 | # -- Options for manual page output --------------------------------------------
218 | 
219 | # One entry per manual page. List of tuples
220 | # (source start file, name, description, authors, manual section).
221 | man_pages = [
222 |     ('index', 'bibtexparser', 'BibtexParser Documentation',
223 |      ['F. Boulogne'], 1)
224 | ]
225 | 
226 | # If true, show URL addresses after external links.
227 | #man_show_urls = False
228 | 
229 | 
230 | # -- Options for Texinfo output ------------------------------------------------
231 | 
232 | # Grouping the document tree into Texinfo files. List of tuples
233 | # (source start file, target name, title, author,
234 | #  dir menu entry, description, category)
235 | texinfo_documents = [
236 |   ('index', 'BibtexParser', 'BibtexParser Documentation',
237 |    'F. Boulogne', 'BibtexParser', 'One line description of project.',
238 |    'Miscellaneous'),
239 | ]
240 | 
241 | # Documents to append as an appendix to all manuals.
242 | #texinfo_appendices = []
243 | 
244 | # If false, no module index is generated.
245 | #texinfo_domain_indices = True
246 | 
247 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
248 | #texinfo_show_urls = 'footnote'
249 | 


--------------------------------------------------------------------------------
/bibtexparser/bibtexexpression.py:
--------------------------------------------------------------------------------
  1 | import pyparsing as pp
  2 | 
  3 | 
  4 | # General helpers
  5 | 
  6 | def strip_after_new_lines(s):
  7 |     """Removes leading and trailing whitespaces in all but first line."""
  8 |     lines = s.splitlines()
  9 |     if len(lines) > 1:
 10 |         lines = [lines[0]] + [l.lstrip() for l in lines[1:]]
 11 |     return '\n'.join(lines)
 12 | 
 13 | 
 14 | def add_logger_parse_action(expr, log_func):
 15 |     """Register a callback on expression parsing with the adequate message."""
 16 |     def action(s, l, t):
 17 |         log_func("Found {}: {}".format(expr.resultsName, t))
 18 |     expr.addParseAction(action)
 19 | 
 20 | 
 21 | # Parse action helpers
 22 | # Helpers for returning values from the parsed tokens. Shaped as pyparsing's
 23 | # parse actions. In pyparsing wording:
 24 | # s, l, t, stand for string, location, token
 25 | 
 26 | def first_token(s, l, t):
 27 |     # TODO Handle this case correctly!
 28 |     assert(len(t) == 1)
 29 |     return t[0]
 30 | 
 31 | 
 32 | def remove_trailing_newlines(s, l, t):
 33 |     if t[0]:
 34 |         return t[0].rstrip('\n')
 35 | 
 36 | 
 37 | def remove_braces(s, l, t):
 38 |     if len(t[0]) < 1:
 39 |         return ''
 40 |     else:
 41 |         start = 1 if t[0][0] == '{' else 0
 42 |         end = -1 if t[0][-1] == '}' else None
 43 |         return t[0][start:end]
 44 | 
 45 | 
 46 | def field_to_pair(s, l, t):
 47 |     """
 48 |     Looks for parsed element named 'Field'.
 49 | 
 50 |     :returns: (name, value).
 51 |     """
 52 |     f = t.get('Field')
 53 |     # Not sure it is desirable here to strip but it is for conformance
 54 |     # to previous implementation
 55 |     return (f.get('FieldName'),
 56 |             strip_after_new_lines(f.get('Value')))
 57 | 
 58 | 
 59 | # Expressions helpers
 60 | 
 61 | def in_braces_or_pars(exp):
 62 |     """
 63 |     exp -> (exp)|{exp}
 64 |     """
 65 |     return ((pp.Suppress('{') + exp + pp.Suppress('}')) |
 66 |             (pp.Suppress('(') + exp + pp.Suppress(')')))
 67 | 
 68 | 
 69 | class BibtexExpression(object):
 70 |     """Gives access to pyparsing expressions.
 71 | 
 72 |     Attributes are pyparsing expressions for the following elements:
 73 | 
 74 |     * main_expression: the bibtex file
 75 |     * string_def: a string definition
 76 |     * preamble_decl: a preamble declaration
 77 |     * explicit_comment: an explicit comment
 78 |     * entry: an entry definition
 79 |     * implicit_comment: an implicit comment
 80 | 
 81 |     """
 82 | 
 83 |     ParseException = pp.ParseException
 84 | 
 85 |     def __init__(self):
 86 | 
 87 |         # Bibtex keywords
 88 | 
 89 |         string_def_start = pp.CaselessKeyword("@string")
 90 |         preamble_start = pp.CaselessKeyword("@preamble")
 91 |         comment_line_start = pp.CaselessKeyword('@comment')
 92 | 
 93 |         # String names
 94 |         string_name = pp.Word(pp.alphanums + '_')('StringName')
 95 |         self.set_string_name_parse_action(lambda s, l, t: None)
 96 |         string_name.addParseAction(self._string_name_parse_action)
 97 | 
 98 |         # Values inside bibtex fields
 99 |         # Values can be integer or string expressions. The latter may use
100 |         # quoted or braced values.
101 | 
102 |         # Integer values
103 |         integer = pp.Word(pp.nums)('Integer')
104 | 
105 |         # Braced values: braced values can contain nested (but balanced) braces
106 |         braced_value_content = pp.CharsNotIn('{}')
107 |         braced_value = pp.Forward()  # Recursive definition for nested braces
108 |         braced_value <<= pp.originalTextFor(
109 |             '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}'
110 |             )('BracedValue')
111 |         braced_value.setParseAction(remove_braces)
112 |         # TODO add ignore for "\}" and "\{" ?
113 |         # TODO @ are not parsed by bibtex in braces
114 | 
115 |         # Quoted values: may contain braced content with balanced braces
116 |         brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None)
117 |         text_in_quoted = pp.CharsNotIn('"{}')
118 |         # (quotes should be escaped by braces in quoted value)
119 |         quoted_value = pp.originalTextFor(
120 |             '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"'
121 |             )('QuotedValue')
122 |         quoted_value.addParseAction(pp.removeQuotes)
123 | 
124 |         # String expressions
125 |         string_expr = pp.delimitedList(
126 |             (quoted_value | braced_value | string_name), delim='#'
127 |             )('StringExpression')
128 |         self.set_string_expression_parse_action(lambda s, l, t: None)
129 |         string_expr.addParseAction(self._string_expr_parse_action)
130 | 
131 |         value = (integer | string_expr)('Value')
132 | 
133 |         # Entries
134 | 
135 |         # @EntryType { ...
136 |         entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType')
137 |         entry_type.setParseAction(first_token)
138 | 
139 |         # Entry key: any character up to a ',' without leading and trailing
140 |         # spaces.
141 |         key = pp.SkipTo(',')('Key')  # Exclude @',\#}{~%
142 |         key.setParseAction(lambda s, l, t: first_token(s, l, t).strip())
143 | 
144 |         # Field name: word of letters, digits, dashes and underscores
145 |         field_name = pp.Word(pp.alphanums + '_-()')('FieldName')
146 |         field_name.setParseAction(first_token)
147 | 
148 |         # Field: field_name = value
149 |         field = pp.Group(field_name + pp.Suppress('=') + value)('Field')
150 |         field.setParseAction(field_to_pair)
151 | 
152 |         # List of fields: comma separeted fields
153 |         field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(','))
154 |                       )('Fields')
155 |         field_list.setParseAction(
156 |             lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))})
157 | 
158 |         # Entry: type, key, and fields
159 |         self.entry = (entry_type +
160 |                       in_braces_or_pars(key + pp.Suppress(',') + field_list)
161 |                       )('Entry')
162 | 
163 |         # Other stuff: comments, string definitions, and preamble declarations
164 | 
165 |         # Explicit comments: @comment + everything up to next valid declaration
166 |         # starting on new line.
167 |         not_an_implicit_comment = (pp.LineStart() + pp.Literal('@')
168 |                                    ) | pp.stringEnd()
169 |         self.explicit_comment = (
170 |             pp.Suppress(comment_line_start) +
171 |             pp.originalTextFor(pp.SkipTo(not_an_implicit_comment),
172 |                                asString=True))('ExplicitComment')
173 |         self.explicit_comment.addParseAction(remove_trailing_newlines)
174 |         self.explicit_comment.addParseAction(remove_braces)
175 |         # Previous implementation included comment until next '}'.
176 |         # This is however not inline with bibtex behavior that is to only
177 |         # ignore until EOL. Brace stipping is arbitrary here but avoids
178 |         # duplication on bibtex write.
179 | 
180 |         # Empty implicit_comments lead to infinite loop of zeroOrMore
181 |         def mustNotBeEmpty(t):
182 |             if not t[0]:
183 |                 raise pp.ParseException("Match must not be empty.")
184 | 
185 |         # Implicit comments: not anything else
186 |         self.implicit_comment = pp.originalTextFor(
187 |             pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty),
188 |             asString=True)('ImplicitComment')
189 |         self.implicit_comment.addParseAction(remove_trailing_newlines)
190 | 
191 |         # String definition
192 |         self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars(
193 |             string_name +
194 |             pp.Suppress('=') +
195 |             string_expr('StringValue')
196 |             ))('StringDefinition')
197 | 
198 |         # Preamble declaration
199 |         self.preamble_decl = (pp.Suppress(preamble_start) +
200 |                               in_braces_or_pars(value))('PreambleDeclaration')
201 | 
202 |         # Main bibtex expression
203 | 
204 |         self.main_expression = pp.ZeroOrMore(
205 |                 self.string_def |
206 |                 self.preamble_decl |
207 |                 self.explicit_comment |
208 |                 self.entry |
209 |                 self.implicit_comment)
210 | 
211 |     def add_log_function(self, log_fun):
212 |         """Add notice to logger on entry, comment, preamble, string definitions.
213 | 
214 |         :param log_fun: logger function
215 |         """
216 |         for e in [self.entry,
217 |                   self.implicit_comment,
218 |                   self.explicit_comment,
219 |                   self.preamble_decl,
220 |                   self.string_def]:
221 |             add_logger_parse_action(e, log_fun)
222 | 
223 |     def set_string_name_parse_action(self, fun):
224 |         """Set the parseAction for string name expression.
225 | 
226 |         .. Note::
227 | 
228 |             For some reason pyparsing duplicates the string_name
229 |             expression so setting its parseAction a posteriori has no effect
230 |             in the context of a string expression. This is why this function
231 |             should be used instead.
232 |         """
233 |         self._string_name_parse_action_fun = fun
234 | 
235 |     def _string_name_parse_action(self, s, l, t):
236 |         return self._string_name_parse_action_fun(s, l, t)
237 | 
238 |     def set_string_expression_parse_action(self, fun):
239 |         """Set the parseAction for string_expression expression.
240 | 
241 |         .. Note::
242 | 
243 |             See set_string_name_parse_action.
244 |         """
245 |         self._string_expr_parse_action_fun = fun
246 | 
247 |     def _string_expr_parse_action(self, s, l, t):
248 |         return self._string_expr_parse_action_fun(s, l, t)
249 | 
250 |     def parseFile(self, file_obj):
251 |         return self.main_expression.parseFile(file_obj, parseAll=True)
252 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 | The code is distributed under a dual license (at your choice).
  2 | 
  3 | #####################################################################
  4 | Redistribution and use in source and binary forms, with or without
  5 | modification, are permitted provided that the following conditions are
  6 | met:
  7 | 
  8 |     (1) Redistributions of source code must retain the above copyright
  9 |     notice, this list of conditions and the following disclaimer. 
 10 | 
 11 |     (2) Redistributions in binary form must reproduce the above copyright
 12 |     notice, this list of conditions and the following disclaimer in
 13 |     the documentation and/or other materials provided with the
 14 |     distribution.  
 15 |     
 16 |     (3)The name of the author may not be used to
 17 |     endorse or promote products derived from this software without
 18 |     specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 29 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 30 | POSSIBILITY OF SUCH DAMAGE.
 31 | 
 32 | #####################################################################
 33 |                    GNU LESSER GENERAL PUBLIC LICENSE
 34 |                        Version 3, 29 June 2007
 35 | 
 36 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 37 |  Everyone is permitted to copy and distribute verbatim copies
 38 |  of this license document, but changing it is not allowed.
 39 | 
 40 | 
 41 |   This version of the GNU Lesser General Public License incorporates
 42 | the terms and conditions of version 3 of the GNU General Public
 43 | License, supplemented by the additional permissions listed below.
 44 | 
 45 |   0. Additional Definitions.
 46 | 
 47 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 48 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 49 | General Public License.
 50 | 
 51 |   "The Library" refers to a covered work governed by this License,
 52 | other than an Application or a Combined Work as defined below.
 53 | 
 54 |   An "Application" is any work that makes use of an interface provided
 55 | by the Library, but which is not otherwise based on the Library.
 56 | Defining a subclass of a class defined by the Library is deemed a mode
 57 | of using an interface provided by the Library.
 58 | 
 59 |   A "Combined Work" is a work produced by combining or linking an
 60 | Application with the Library.  The particular version of the Library
 61 | with which the Combined Work was made is also called the "Linked
 62 | Version".
 63 | 
 64 |   The "Minimal Corresponding Source" for a Combined Work means the
 65 | Corresponding Source for the Combined Work, excluding any source code
 66 | for portions of the Combined Work that, considered in isolation, are
 67 | based on the Application, and not on the Linked Version.
 68 | 
 69 |   The "Corresponding Application Code" for a Combined Work means the
 70 | object code and/or source code for the Application, including any data
 71 | and utility programs needed for reproducing the Combined Work from the
 72 | Application, but excluding the System Libraries of the Combined Work.
 73 | 
 74 |   1. Exception to Section 3 of the GNU GPL.
 75 | 
 76 |   You may convey a covered work under sections 3 and 4 of this License
 77 | without being bound by section 3 of the GNU GPL.
 78 | 
 79 |   2. Conveying Modified Versions.
 80 | 
 81 |   If you modify a copy of the Library, and, in your modifications, a
 82 | facility refers to a function or data to be supplied by an Application
 83 | that uses the facility (other than as an argument passed when the
 84 | facility is invoked), then you may convey a copy of the modified
 85 | version:
 86 | 
 87 |    a) under this License, provided that you make a good faith effort to
 88 |    ensure that, in the event an Application does not supply the
 89 |    function or data, the facility still operates, and performs
 90 |    whatever part of its purpose remains meaningful, or
 91 | 
 92 |    b) under the GNU GPL, with none of the additional permissions of
 93 |    this License applicable to that copy.
 94 | 
 95 |   3. Object Code Incorporating Material from Library Header Files.
 96 | 
 97 |   The object code form of an Application may incorporate material from
 98 | a header file that is part of the Library.  You may convey such object
 99 | code under terms of your choice, provided that, if the incorporated
100 | material is not limited to numerical parameters, data structure
101 | layouts and accessors, or small macros, inline functions and templates
102 | (ten or fewer lines in length), you do both of the following:
103 | 
104 |    a) Give prominent notice with each copy of the object code that the
105 |    Library is used in it and that the Library and its use are
106 |    covered by this License.
107 | 
108 |    b) Accompany the object code with a copy of the GNU GPL and this license
109 |    document.
110 | 
111 |   4. Combined Works.
112 | 
113 |   You may convey a Combined Work under terms of your choice that,
114 | taken together, effectively do not restrict modification of the
115 | portions of the Library contained in the Combined Work and reverse
116 | engineering for debugging such modifications, if you also do each of
117 | the following:
118 | 
119 |    a) Give prominent notice with each copy of the Combined Work that
120 |    the Library is used in it and that the Library and its use are
121 |    covered by this License.
122 | 
123 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
124 |    document.
125 | 
126 |    c) For a Combined Work that displays copyright notices during
127 |    execution, include the copyright notice for the Library among
128 |    these notices, as well as a reference directing the user to the
129 |    copies of the GNU GPL and this license document.
130 | 
131 |    d) Do one of the following:
132 | 
133 |        0) Convey the Minimal Corresponding Source under the terms of this
134 |        License, and the Corresponding Application Code in a form
135 |        suitable for, and under terms that permit, the user to
136 |        recombine or relink the Application with a modified version of
137 |        the Linked Version to produce a modified Combined Work, in the
138 |        manner specified by section 6 of the GNU GPL for conveying
139 |        Corresponding Source.
140 | 
141 |        1) Use a suitable shared library mechanism for linking with the
142 |        Library.  A suitable mechanism is one that (a) uses at run time
143 |        a copy of the Library already present on the user's computer
144 |        system, and (b) will operate properly with a modified version
145 |        of the Library that is interface-compatible with the Linked
146 |        Version.
147 | 
148 |    e) Provide Installation Information, but only if you would otherwise
149 |    be required to provide such information under section 6 of the
150 |    GNU GPL, and only to the extent that such information is
151 |    necessary to install and execute a modified version of the
152 |    Combined Work produced by recombining or relinking the
153 |    Application with a modified version of the Linked Version. (If
154 |    you use option 4d0, the Installation Information must accompany
155 |    the Minimal Corresponding Source and Corresponding Application
156 |    Code. If you use option 4d1, you must provide the Installation
157 |    Information in the manner specified by section 6 of the GNU GPL
158 |    for conveying Corresponding Source.)
159 | 
160 |   5. Combined Libraries.
161 | 
162 |   You may place library facilities that are a work based on the
163 | Library side by side in a single library together with other library
164 | facilities that are not Applications and are not covered by this
165 | License, and convey such a combined library under terms of your
166 | choice, if you do both of the following:
167 | 
168 |    a) Accompany the combined library with a copy of the same work based
169 |    on the Library, uncombined with any other library facilities,
170 |    conveyed under the terms of this License.
171 | 
172 |    b) Give prominent notice with the combined library that part of it
173 |    is a work based on the Library, and explaining where to find the
174 |    accompanying uncombined form of the same work.
175 | 
176 |   6. Revised Versions of the GNU Lesser General Public License.
177 | 
178 |   The Free Software Foundation may publish revised and/or new versions
179 | of the GNU Lesser General Public License from time to time. Such new
180 | versions will be similar in spirit to the present version, but may
181 | differ in detail to address new problems or concerns.
182 | 
183 |   Each version is given a distinguishing version number. If the
184 | Library as you received it specifies that a certain numbered version
185 | of the GNU Lesser General Public License "or any later version"
186 | applies to it, you have the option of following the terms and
187 | conditions either of that published version or of any later version
188 | published by the Free Software Foundation. If the Library as you
189 | received it does not specify a version number of the GNU Lesser
190 | General Public License, you may choose any version of the GNU Lesser
191 | General Public License ever published by the Free Software Foundation.
192 | 
193 |   If the Library as you received it specifies that a proxy can decide
194 | whether future versions of the GNU Lesser General Public License shall
195 | apply, that proxy's public statement of acceptance of any version is
196 | permanent authorization for you to choose that version for the
197 | Library.
198 | 


--------------------------------------------------------------------------------
/docs/source/tutorial.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Tutorial
  3 | ========
  4 | 
  5 | Step 0: Vocabulary
  6 | ==================
  7 | 
  8 | * An **entry** designates for example `@book{...}`, `@article{...}`, etc.
  9 | * A **comment** is written as `@comment{...}`.
 10 | * A **preamble** is a `@preamble{...}` block.
 11 | * A **string** is `@string{...}`.
 12 | 
 13 | In an entry, you can find
 14 | 
 15 | * an **entry type** like `article`, `book`, etc.
 16 | * **entry keys** or **keys** such as `author`, `title`, `year`...
 17 | * and also **records**, which designates the values of those keys.
 18 | 
 19 | 
 20 | Step 1: Prepare a BibTeX file
 21 | =============================
 22 | 
 23 | First, we prepare a BibTeX sample file. This is just for the purpose of illustration:
 24 | 
 25 | .. code-block:: python
 26 | 
 27 |     bibtex = """@ARTICLE{Cesar2013,
 28 |       author = {Jean César},
 29 |       title = {An amazing title},
 30 |       year = {2013},
 31 |       month = jan,
 32 |       volume = {12},
 33 |       pages = {12--23},
 34 |       journal = {Nice Journal},
 35 |       abstract = {This is an abstract. This line should be long enough to test
 36 |     	 multilines...},
 37 |       comments = {A comment},
 38 |       keywords = {keyword1, keyword2}
 39 |     }
 40 |     """
 41 | 
 42 |     with open('bibtex.bib', 'w') as bibfile:
 43 |         bibfile.write(bibtex)
 44 | 
 45 | Step 2: Parse it!
 46 | =================
 47 | 
 48 | Simplest call
 49 | -------------
 50 | 
 51 | OK. Everything is in place. Let's parse the BibTeX file.
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |     import bibtexparser
 56 | 
 57 |     with open('bibtex.bib') as bibtex_file:
 58 |         bib_database = bibtexparser.load(bibtex_file)
 59 | 
 60 |     print(bib_database.entries)
 61 | 
 62 | 
 63 | It prints a list of dictionaries for reference entries, for example books, articles:
 64 | 
 65 | .. code-block:: python
 66 | 
 67 |     [{'journal': 'Nice Journal',
 68 |       'comments': 'A comment',
 69 |       'pages': '12--23',
 70 |       'month': 'jan',
 71 |       'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
 72 |       'title': 'An amazing title',
 73 |       'year': '2013',
 74 |       'volume': '12',
 75 |       'ID': 'Cesar2013',
 76 |       'author': 'Jean César',
 77 |       'keyword': 'keyword1, keyword2',
 78 |       'ENTRYTYPE': 'article'}]
 79 | 
 80 | Note that, by convention, uppercase keys (ID, ENTRYTYPE) are data generated by the parser, while lowercase keys come from the original bibtex file.
 81 | 
 82 | You can also print comments, preambles and string:
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |     print(bib_database.comments)
 87 |     print(bib_database.preambles)
 88 |     print(bib_database.strings)
 89 | 
 90 | 
 91 | Parse a string
 92 | --------------
 93 | 
 94 | If for some reason, you prefer to parse a string, that's also possible:
 95 | 
 96 | .. code-block:: python
 97 | 
 98 |     import bibtexparser
 99 | 
100 |     with open('bibtex.bib') as bibtex_file:
101 |         bibtex_str = bibtex_file.read()
102 | 
103 |     bib_database = bibtexparser.loads(bibtex_str)
104 | 
105 | 
106 | Tune parser's options
107 | ---------------------
108 | 
109 | In the previous snippet, several default options are used.
110 | You can tweak them as you wish.
111 | 
112 | .. code-block:: python
113 | 
114 |     import bibtexparser
115 |     from bibtexparser.bparser import BibTexParser
116 | 
117 | 	parser = BibTexParser()
118 | 	parser.ignore_nonstandard_types = False
119 | 	parser.homogenise_fields = False
120 | 	parser.common_strings = False
121 | 
122 | 	bib_database = bibtexparser.loads(bibtex_str, parser)
123 | 
124 | 
125 | Step 3: Export
126 | ==============
127 | 
128 | Once you worked on your parsed database, you may want to export the result. This library provides some functions to help on that. However, you can write your own functions if you have specific requirements.
129 | 
130 | Create a BibTeX file or string
131 | --------------------------------
132 | 
133 | The bibliographic data can be converted back into a string :
134 | 
135 | .. code-block:: python
136 | 
137 |     import bibtexparser
138 | 
139 |     bibtex_str = bibtexparser.dumps(bib_database)
140 | 
141 | or a BibTeX file like this:
142 | 
143 | .. code-block:: python
144 | 
145 |     import bibtexparser
146 | 
147 |     with open('bibtex.bib', 'w') as bibtex_file:
148 |         bibtexparser.dump(bibtex_database, bibtex_file)
149 | 
150 | 
151 | Call the writer
152 | ---------------
153 | 
154 | In the first section we prepared a BibTeX sample file, we can prepare the same file using pure python and the ``BibTexWriter`` class.
155 | 
156 | .. code-block:: python
157 | 
158 |     from bibtexparser.bwriter import BibTexWriter
159 |     from bibtexparser.bibdatabase import BibDatabase
160 | 
161 |     db = BibDatabase()
162 |     db.entries = [
163 |         {'journal': 'Nice Journal',
164 |          'comments': 'A comment',
165 |          'pages': '12--23',
166 |          'month': 'jan',
167 |          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
168 |          'title': 'An amazing title',
169 |          'year': '2013',
170 |          'volume': '12',
171 |          'ID': 'Cesar2013',
172 |          'author': 'Jean César',
173 |          'keyword': 'keyword1, keyword2',
174 |          'ENTRYTYPE': 'article'}]
175 | 
176 |     writer = BibTexWriter()
177 |     with open('bibtex.bib', 'w') as bibfile:
178 |         bibfile.write(writer.write(db))
179 | 
180 | This code generates the following file:
181 | 
182 | .. code-block:: latex
183 | 
184 |     @article{Cesar2013,
185 |      abstract = {This is an abstract. This line should be long enough to test
186 |     multilines...},
187 |      author = {Jean César},
188 |      comments = {A comment},
189 |      journal = {Nice Journal},
190 |      keyword = {keyword1, keyword2},
191 |      month = {jan},
192 |      pages = {12--23},
193 |      title = {An amazing title},
194 |      volume = {12},
195 |      year = {2013}
196 |     }
197 | 
198 | The writer also has several flags that can be enabled to customize the output file.
199 | For example we can use ``indent`` and ``comma_first`` to customize the previous entry, first the code:
200 | 
201 | .. code-block:: python
202 | 
203 |     from bibtexparser.bwriter import BibTexWriter
204 |     from bibtexparser.bibdatabase import BibDatabase
205 | 
206 |     db = BibDatabase()
207 |     db.entries = [
208 |         {'journal': 'Nice Journal',
209 |          'comments': 'A comment',
210 |          'pages': '12--23',
211 |          'month': 'jan',
212 |          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
213 |          'title': 'An amazing title',
214 |          'year': '2013',
215 |          'volume': '12',
216 |          'ID': 'Cesar2013',
217 |          'author': 'Jean César',
218 |          'keyword': 'keyword1, keyword2',
219 |          'ENTRYTYPE': 'article'}]
220 | 
221 |     writer = BibTexWriter()
222 |     writer.indent = '    '     # indent entries with 4 spaces instead of one
223 |     writer.comma_first = True  # place the comma at the beginning of the line
224 |     with open('bibtex.bib', 'w') as bibfile:
225 |         bibfile.write(writer.write(db))
226 | 
227 | This code results in the following, customized, file:
228 | 
229 | .. code-block:: latex
230 | 
231 |     @article{Cesar2013
232 |     ,    abstract = {This is an abstract. This line should be long enough to test
233 |     multilines...}
234 |     ,    author = {Jean César}
235 |     ,    comments = {A comment}
236 |     ,    journal = {Nice Journal}
237 |     ,    keyword = {keyword1, keyword2}
238 |     ,    month = {jan}
239 |     ,    pages = {12--23}
240 |     ,    title = {An amazing title}
241 |     ,    volume = {12}
242 |     ,    year = {2013}
243 |     }
244 | 
245 | 
246 | Flags to the writer object can modify not only how an entry is printed but how several BibTeX entries are sorted and separated.
247 | See the :ref:`bibtexparser_api` for the full list of flags.
248 | 
249 | 
250 | Step 4: Add salt and pepper
251 | ===========================
252 | 
253 | In this section, we discuss about some customizations and details.
254 | 
255 | Customizations
256 | --------------
257 | 
258 | By default, the parser does not alter the content of each field and keeps it as a simple string. There are many cases
259 | where this is not desired. For example, instead of a string with a multiple of authors, it could be parsed as a list.
260 | 
261 | To modify field values during parsing, a callback function can be supplied to the parser which can be used to modify
262 | BibTeX entries. The library includes several functions which may be used. Alternatively, you can read them to create
263 | your own functions.
264 | 
265 | .. code-block:: python
266 | 
267 |     import bibtexparser
268 |     from bibtexparser.bparser import BibTexParser
269 |     from bibtexparser.customization import *
270 | 
271 |     # Let's define a function to customize our entries.
272 |     # It takes a record and return this record.
273 |     def customizations(record):
274 |         """Use some functions delivered by the library
275 | 
276 |         :param record: a record
277 |         :returns: -- customized record
278 |         """
279 |         record = type(record)
280 |         record = author(record)
281 |         record = editor(record)
282 |         record = journal(record)
283 |         record = keyword(record)
284 |         record = link(record)
285 |         record = page_double_hyphen(record)
286 |         record = doi(record)
287 |         return record
288 | 
289 |     with open('bibtex.bib') as bibtex_file:
290 |         parser = BibTexParser()
291 |         parser.customization = customizations
292 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
293 |         print(bib_database.entries)
294 | 
295 | 
296 | If you think that you have a customization which could be useful to others, please share with us!
297 | 
298 | 
299 | Accents and weird characters
300 | ----------------------------
301 | 
302 | Your bibtex may contain accents and specific characters.
303 | They are sometimes coded like this ``\'{e}`` but this is not the correct way, ``{\'e}`` is preferred. Moreover, you may want to manipulate ``é``. There is different situations:
304 | 
305 | * Case 1: you plan to use this library to work with latex and you assume that the original bibtex is clean. You have nothing to do.
306 | 
307 | * Case 2: you plan to use this library to work with latex but your bibtex is not really clean.
308 | 
309 | .. code-block:: python
310 | 
311 |     import bibtexparser
312 |     from bibtexparser.bparser import BibTexParser
313 |     from bibtexparser.customization import homogenize_latex_encoding
314 | 
315 |     with open('bibtex.bib') as bibtex_file:
316 |         parser = BibTexParser()
317 |         parser.customization = homogenize_latex_encoding
318 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
319 |         print(bib_database.entries)
320 | 
321 | 
322 | * Case 3: you plan to use this library to work with something different and your bibtex is not really clean.
323 |   Then, you probably want to use unicode.
324 | 
325 | .. code-block:: python
326 | 
327 |     import bibtexparser
328 |     from bibtexparser.bparser import BibTexParser
329 |     from bibtexparser.customization import convert_to_unicode
330 | 
331 |     with open('bibtex.bib') as bibtex_file:
332 |         parser = BibTexParser()
333 |         parser.customization = convert_to_unicode
334 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
335 |         print(bib_database.entries)
336 | 
337 | 
338 | .. Note::
339 | 
340 |     If you want to mix different customization functions, you can write your own function.
341 | 


--------------------------------------------------------------------------------
/bibtexparser/bparser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Original source: github.com/okfn/bibserver
  5 | # Authors:
  6 | # markmacgillivray
  7 | # Etienne Posthumus (epoz)
  8 | # Francois Boulogne <fboulogne at april dot org>
  9 | 
 10 | import sys
 11 | import io
 12 | import logging
 13 | 
 14 | from bibtexparser.bibdatabase import BibDatabase, BibDataString, STANDARD_TYPES
 15 | from bibtexparser.bibtexexpression import BibtexExpression
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | __all__ = ['BibTexParser']
 20 | 
 21 | 
 22 | if sys.version_info >= (3, 0):
 23 |     ustr = str
 24 | else:
 25 |     ustr = unicode
 26 | 
 27 | 
 28 | def parse(data, *args, **kwargs):
 29 |     parser = BibTexParser(*args, **kwargs)
 30 |     return parser.parse(data)
 31 | 
 32 | 
 33 | class BibTexParser(object):
 34 |     """
 35 |     A parser for reading BibTeX bibliographic data files.
 36 | 
 37 |     Example::
 38 | 
 39 |         from bibtexparser.bparser import BibTexParser
 40 | 
 41 |         bibtex_str = ...
 42 | 
 43 |         parser = BibTexParser()
 44 |         parser.ignore_nonstandard_types = False
 45 |         parser.homogenize_fields = False
 46 |         parser.common_strings = False
 47 |         bib_database = bibtexparser.loads(bibtex_str, parser)
 48 | 
 49 |     """
 50 | 
 51 |     def __new__(cls, data=None, **args):
 52 |         """
 53 |         To catch the old API structure in which creating the parser would
 54 |         immediately parse and return data.
 55 |         """
 56 | 
 57 |         if data is None:
 58 |             return super(BibTexParser, cls).__new__(cls)
 59 |         else:
 60 |             # For backwards compatibility: if data is given, parse
 61 |             # and return the `BibDatabase` object instead of the parser.
 62 |             return parse(data, **args)
 63 | 
 64 |     def __init__(self, data=None,
 65 |                  customization=None,
 66 |                  ignore_nonstandard_types=True,
 67 |                  homogenize_fields=False,
 68 |                  common_strings=False):
 69 |         """
 70 |         Creates a parser for rading BibTeX files
 71 | 
 72 |         :return: parser
 73 |         :rtype: `BibTexParser`
 74 |         """
 75 |         self.bib_database = BibDatabase()
 76 | 
 77 |         #: Load common strings such as months abbreviation
 78 |         #: Default: `False`.
 79 |         self.common_strings = common_strings
 80 |         if self.common_strings:
 81 |             self.bib_database.load_common_strings()
 82 | 
 83 |         #: Callback function to process BibTeX entries after parsing,
 84 |         #: for example to create a list from a string with multiple values.
 85 |         #: By default all BibTeX values are treated as simple strings.
 86 |         #: Default: `None`.
 87 |         self.customization = customization
 88 | 
 89 |         #: Ignore non-standard BibTeX types (`book`, `article`, etc).
 90 |         #: Default: `True`.
 91 |         self.ignore_nonstandard_types = ignore_nonstandard_types
 92 | 
 93 |         #: Sanitize BibTeX field names, for example change `url` to `link` etc.
 94 |         #: Field names are always converted to lowercase names.
 95 |         #: Default: `False`.
 96 |         self.homogenize_fields = homogenize_fields
 97 | 
 98 |         # On some sample data files, the character encoding detection simply
 99 |         # hangs We are going to default to utf8, and mandate it.
100 |         self.encoding = 'utf8'
101 | 
102 |         # pre-defined set of key changes
103 |         self.alt_dict = {
104 |             'keyw': u'keyword',
105 |             'keywords': u'keyword',
106 |             'authors': u'author',
107 |             'editors': u'editor',
108 |             'url': u'link',
109 |             'urls': u'link',
110 |             'links': u'link',
111 |             'subjects': u'subject'
112 |         }
113 | 
114 |         # Setup the parser expression
115 |         self._init_expressions()
116 | 
117 |     def parse(self, bibtex_str, partial=False):
118 |         """Parse a BibTeX string into an object
119 | 
120 |         :param bibtex_str: BibTeX string
121 |         :type: str or unicode
122 |         :param partial: If True, print errors only on parsing failures.
123 |         If False, an exception is raised.
124 |         :type: boolean
125 |         :return: bibliographic database
126 |         :rtype: BibDatabase
127 |         """
128 |         bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
129 |         try:
130 |             self._expr.parseFile(bibtex_file_obj)
131 |         except self._expr.ParseException as exc:
132 |             logger.error("Could not parse properly, starting at %s", exc.line)
133 |             if not partial:
134 |                 raise exc
135 |         return self.bib_database
136 | 
137 |     def parse_file(self, file, partial=False):
138 |         """Parse a BibTeX file into an object
139 | 
140 |         :param file: BibTeX file or file-like object
141 |         :type: file
142 |         :param partial: If True, print errors only on parsing failures.
143 |         If False, an exception is raised.
144 |         :type: boolean
145 |         :return: bibliographic database
146 |         :rtype: BibDatabase
147 |         """
148 |         return self.parse(file.read(), partial=partial)
149 | 
150 |     def _init_expressions(self):
151 |         """
152 |         Defines all parser expressions used internally.
153 |         """
154 |         self._expr = BibtexExpression()
155 | 
156 |         # Handle string as BibDataString object
157 |         self._expr.set_string_name_parse_action(
158 |             lambda s, l, t:
159 |                 BibDataString(self.bib_database, t[0]))
160 |         self._expr.set_string_expression_parse_action(
161 |             lambda s, l, t:
162 |                 self._interpolate_string_expression(t))
163 | 
164 |         # Add notice to logger
165 |         self._expr.add_log_function(logger.debug)
166 | 
167 |         # Set actions
168 |         self._expr.entry.addParseAction(
169 |             lambda s, l, t: self._add_entry(
170 |                 t.get('EntryType'), t.get('Key'), t.get('Fields'))
171 |             )
172 |         self._expr.implicit_comment.addParseAction(
173 |             lambda s, l, t: self._add_comment(t[0])
174 |             )
175 |         self._expr.explicit_comment.addParseAction(
176 |             lambda s, l, t: self._add_comment(t[0])
177 |             )
178 |         self._expr.preamble_decl.addParseAction(
179 |             lambda s, l, t: self._add_preamble(t[0])
180 |             )
181 |         self._expr.string_def.addParseAction(
182 |             lambda s, l, t: self._add_string(t['StringName'].name,
183 |                                              t['StringValue'])
184 |             )
185 | 
186 |     def _bibtex_file_obj(self, bibtex_str):
187 |         # Some files have Byte-order marks inserted at the start
188 |         byte = '\xef\xbb\xbf'
189 |         if not isinstance(byte, ustr):
190 |             byte = ustr(byte, self.encoding, 'ignore')
191 |         if bibtex_str[:3] == byte:
192 |             bibtex_str = bibtex_str[3:]
193 |         if not isinstance(bibtex_str, ustr):
194 |             bibtex_str = bibtex_str.decode(encoding=self.encoding)
195 |         return io.StringIO(bibtex_str)
196 | 
197 |     def _clean_val(self, val):
198 |         """ Clean instring before adding to dictionary
199 | 
200 |         :param val: a value
201 |         :type val: string
202 |         :returns: string -- value
203 |         """
204 |         if not val or val == "{}":
205 |             return ''
206 |         return val
207 | 
208 |     def _clean_key(self, key):
209 |         """ Lowercase a key and return as unicode.
210 | 
211 |         :param key: a key
212 |         :type key: string
213 |         :returns: (unicode) string -- value
214 |         """
215 |         key = key.lower()
216 |         if not isinstance(key, ustr):
217 |             return ustr(key, 'utf-8')
218 |         else:
219 |             return key
220 | 
221 |     def _clean_field_key(self, key):
222 |         """ Clean a bibtex field key and homogenize alternative forms.
223 | 
224 |         :param key: a key
225 |         :type key: string
226 |         :returns: string -- value
227 |         """
228 |         key = self._clean_key(key)
229 |         if self.homogenize_fields:
230 |             if key in list(self.alt_dict.keys()):
231 |                 key = self.alt_dict[key]
232 |         return key
233 | 
234 |     def _add_entry(self, entry_type, entry_id, fields):
235 |         """ Adds a parsed entry.
236 |         Includes checking type and fields, cleaning, applying customizations.
237 | 
238 |         :param entry_type: the entry type
239 |         :type entry_type: string
240 |         :param entry_id: the entry bibid
241 |         :type entry_id: string
242 |         :param fields: the fields and values
243 |         :type fields: dictionary
244 |         :returns: string -- value
245 |         """
246 |         d = {}
247 |         entry_type = self._clean_key(entry_type)
248 |         if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES:
249 |             logger.warning('Entry type %s not standard. Not considered.',
250 |                            entry_type)
251 |             return
252 |         for key in fields:
253 |             d[self._clean_field_key(key)] = self._clean_val(fields[key])
254 |         d['ENTRYTYPE'] = entry_type
255 |         d['ID'] = entry_id
256 |         if self.customization is not None:
257 |             # apply any customizations to the record object then return it
258 |             logger.debug('Apply customizations and return dict')
259 |             d = self.customization(d)
260 |         self.bib_database.entries.append(d)
261 | 
262 |     def _add_comment(self, comment):
263 |         """
264 |         Stores a comment in the list of comment.
265 | 
266 |         :param comment: the parsed comment
267 |         :type comment: string
268 |         """
269 |         logger.debug('Store comment in list of comments: ' +
270 |                      comment.__repr__())
271 |         self.bib_database.comments.append(comment)
272 | 
273 |     def _add_string(self, string_key, string):
274 |         """
275 |         Stores a new string in the string dictionary.
276 | 
277 |         :param string_key: the string key
278 |         :type string_key: string
279 |         :param string: the string value
280 |         :type string: string
281 |         """
282 |         if string_key in self.bib_database.strings:
283 |             logger.warning('Overwritting existing string for key: %s.',
284 |                            string_key)
285 |         logger.debug('Store string: {} -> {}'.format(string_key, string))
286 |         self.bib_database.strings[string_key] = self._clean_val(string)
287 | 
288 |     def _interpolate_string_expression(self, string_expr):
289 |         """
290 |         Replaces bibdatastrings by their values in an expression.
291 | 
292 |         :param string_expr: the parsed string as a list
293 |         :type string_expr: list
294 |         """
295 |         return ''.join([self._expand_string(s) for s in string_expr])
296 | 
297 |     def _expand_string(self, string_or_bibdatastring):
298 |         """
299 |         Eventually replaces a bibdatastring by its value.
300 | 
301 |         :param string_or_bibdatastring: the parsed token
302 |         :type string_expr: string or BibDataString
303 |         :returns: string
304 |         """
305 |         if isinstance(string_or_bibdatastring, BibDataString):
306 |             return string_or_bibdatastring.get_value()
307 |         else:
308 |             return string_or_bibdatastring
309 | 
310 |     def _add_preamble(self, preamble):
311 |         """
312 |         Stores a preamble.
313 | 
314 |         :param preamble: the parsed preamble
315 |         :type preamble: string
316 |         """
317 |         logger.debug('Store preamble in list of preambles')
318 |         self.bib_database.preambles.append(preamble)
319 | 


--------------------------------------------------------------------------------
/bibtexparser/customization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | A set of functions useful for customizing bibtex fields.
  6 | You can find inspiration from these functions to design yours.
  7 | Each of them takes a record and return the modified record.
  8 | """
  9 | 
 10 | import itertools
 11 | import re
 12 | import logging
 13 | 
 14 | from bibtexparser.latexenc import unicode_to_latex, unicode_to_crappy_latex1, unicode_to_crappy_latex2, string_to_latex, protect_uppercase
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | __all__ = ['splitname', 'getnames', 'author', 'editor', 'journal', 'keyword',
 19 |            'link', 'page_double_hyphen', 'doi', 'type', 'convert_to_unicode',
 20 |            'homogenize_latex_encoding']
 21 | 
 22 | 
 23 | class InvalidName(ValueError):
 24 |     """Exception raised by :py:func:`customization.splitname` when an invalid name is input.
 25 | 
 26 |     """
 27 |     pass
 28 | 
 29 | 
 30 | def splitname(name, strict_mode=True):
 31 |     """
 32 |     Break a name into its constituent parts: First, von, Last, and Jr.
 33 | 
 34 |     :param string name: a string containing a single name
 35 |     :param Boolean strict_mode: whether to use strict mode
 36 |     :returns: dictionary of constituent parts
 37 |     :raises `customization.InvalidName`: If an invalid name is given and
 38 |                                          ``strict_mode = True``.
 39 | 
 40 |     In BibTeX, a name can be represented in any of three forms:
 41 |         * First von Last
 42 |         * von Last, First
 43 |         * von Last, Jr, First
 44 | 
 45 |     This function attempts to split a given name into its four parts. The
 46 |     returned dictionary has keys of ``first``, ``last``, ``von`` and ``jr``.
 47 |     Each value is a list of the words making up that part; this may be an empty
 48 |     list.  If the input has no non-whitespace characters, a blank dictionary is
 49 |     returned.
 50 | 
 51 |     It is capable of detecting some errors with the input name. If the
 52 |     ``strict_mode`` parameter is ``True``, which is the default, this results in
 53 |     a :class:`customization.InvalidName` exception being raised. If it is
 54 |     ``False``, the function continues, working around the error as best it can.
 55 |     The errors that can be detected are listed below along with the handling
 56 |     for non-strict mode:
 57 | 
 58 |         * Name finishes with a trailing comma: delete the comma
 59 |         * Too many parts (e.g., von Last, Jr, First, Error): merge extra parts
 60 |           into First
 61 |         * Unterminated opening brace: add closing brace to end of input
 62 |         * Unmatched closing brace: add opening brace at start of word
 63 | 
 64 |     """
 65 |     # Useful references:
 66 |     # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#names
 67 |     # http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf
 68 | 
 69 |     # Whitespace characters that can separate words.
 70 |     whitespace = set(' ~\r\n\t')
 71 | 
 72 |     # We'll iterate over the input once, dividing it into a list of words for
 73 |     # each comma-separated section. We'll also calculate the case of each word
 74 |     # as we work.
 75 |     sections = [[]]      # Sections of the name.
 76 |     cases = [[]]         # 1 = uppercase, 0 = lowercase, -1 = caseless.
 77 |     word = []            # Current word.
 78 |     case = -1            # Case of the current word.
 79 |     level = 0            # Current brace level.
 80 |     bracestart = False   # Will the next character be the first within a brace?
 81 |     controlseq = True    # Are we currently processing a control sequence?
 82 |     specialchar = None   # Are we currently processing a special character?
 83 | 
 84 |     # Using an iterator allows us to deal with escapes in a simple manner.
 85 |     nameiter = iter(name)
 86 |     for char in nameiter:
 87 |         # An escape.
 88 |         if char == '\\':
 89 |             escaped = next(nameiter)
 90 | 
 91 |             # BibTeX doesn't allow whitespace escaping. Copy the slash and fall
 92 |             # through to the normal case to handle the whitespace.
 93 |             if escaped in whitespace:
 94 |                 word.append(char)
 95 |                 char = escaped
 96 | 
 97 |             else:
 98 |                 # Is this the first character in a brace?
 99 |                 if bracestart:
100 |                     bracestart = False
101 |                     controlseq = escaped.isalpha()
102 |                     specialchar = True
103 | 
104 |                 # Can we use it to determine the case?
105 |                 elif (case == -1) and escaped.isalpha():
106 |                     if escaped.isupper():
107 |                         case = 1
108 |                     else:
109 |                         case = 0
110 | 
111 |                 # Copy the escape to the current word and go to the next
112 |                 # character in the input.
113 |                 word.append(char)
114 |                 word.append(escaped)
115 |                 continue
116 | 
117 |         # Start of a braced expression.
118 |         if char == '{':
119 |             level += 1
120 |             word.append(char)
121 |             bracestart = True
122 |             controlseq = False
123 |             specialchar = False
124 |             continue
125 | 
126 |         # All the below cases imply this (and don't test its previous value).
127 |         bracestart = False
128 | 
129 |         # End of a braced expression.
130 |         if char == '}':
131 |             # Check and reduce the level.
132 |             if level:
133 |                 level -= 1
134 |             else:
135 |                 if strict_mode:
136 |                     raise InvalidName("Unmatched closing brace in name {{{0}}}.".format(name))
137 |                 word.insert(0, '{')
138 | 
139 |             # Update the state, append the character, and move on.
140 |             controlseq = False
141 |             specialchar = False
142 |             word.append(char)
143 |             continue
144 | 
145 |         # Inside a braced expression.
146 |         if level:
147 |             # Is this the end of a control sequence?
148 |             if controlseq:
149 |                 if not char.isalpha():
150 |                     controlseq = False
151 | 
152 |             # If it's a special character, can we use it for a case?
153 |             elif specialchar:
154 |                 if (case == -1) and char.isalpha():
155 |                     if char.isupper():
156 |                         case = 1
157 |                     else:
158 |                         case = 0
159 | 
160 |             # Append the character and move on.
161 |             word.append(char)
162 |             continue
163 | 
164 |         # End of a word.
165 |         # NB. we know we're not in a brace here due to the previous case.
166 |         if char == ',' or char in whitespace:
167 |             # Don't add empty words due to repeated whitespace.
168 |             if word:
169 |                 sections[-1].append(''.join(word))
170 |                 word = []
171 |                 cases[-1].append(case)
172 |                 case = -1
173 |                 controlseq = False
174 |                 specialchar = False
175 | 
176 |             # End of a section.
177 |             if char == ',':
178 |                 if len(sections) < 3:
179 |                     sections.append([])
180 |                     cases.append([])
181 |                 elif strict_mode:
182 |                     raise InvalidName("Too many commas in the name {{{0}}}.".format(name))
183 |             continue
184 | 
185 |         # Regular character.
186 |         word.append(char)
187 |         if (case == -1) and char.isalpha():
188 |             if char.isupper():
189 |                 case = 1
190 |             else:
191 |                 case = 0
192 | 
193 |     # Unterminated brace?
194 |     if level:
195 |         if strict_mode:
196 |             raise InvalidName("Unterminated opening brace in the name {{{0}}}.".format(name))
197 |         while level:
198 |             word.append('}')
199 |             level -= 1
200 | 
201 |     # Handle the final word.
202 |     if word:
203 |         sections[-1].append(''.join(word))
204 |         cases[-1].append(case)
205 | 
206 |     # Get rid of trailing sections.
207 |     if not sections[-1]:
208 |         # Trailing comma?
209 |         if (len(sections) > 1) and strict_mode:
210 |             raise InvalidName("Trailing comma at end of name {{{0}}}.".format(name))
211 |         sections.pop(-1)
212 |         cases.pop(-1)
213 | 
214 |     # No non-whitespace input.
215 |     if not sections or not any(bool(section) for section in sections):
216 |         return {}
217 | 
218 |     # Initialise the output dictionary.
219 |     parts = {'first': [], 'last': [], 'von': [], 'jr': []}
220 | 
221 |     # Form 1: "First von Last"
222 |     if len(sections) == 1:
223 |         p0 = sections[0]
224 | 
225 |         # One word only: last cannot be empty.
226 |         if len(p0) == 1:
227 |             parts['last'] = p0
228 | 
229 |         # Two words: must be first and last.
230 |         elif len(p0) == 2:
231 |             parts['first'] = p0[:1]
232 |             parts['last'] = p0[1:]
233 | 
234 |         # Need to use the cases to figure it out.
235 |         else:
236 |             cases = cases[0]
237 | 
238 |             # First is the longest sequence of words starting with uppercase
239 |             # that is not the whole string. von is then the longest sequence
240 |             # whose last word starts with lowercase that is not the whole
241 |             # string. Last is the rest. NB., this means last cannot be empty.
242 | 
243 |             # At least one lowercase letter.
244 |             if 0 in cases:
245 |                 # Index from end of list of first and last lowercase word.
246 |                 firstl = cases.index(0) - len(cases)
247 |                 lastl = -cases[::-1].index(0) - 1
248 |                 if lastl == -1:
249 |                     lastl -= 1      # Cannot consume the rest of the string.
250 | 
251 |                 # Pull the parts out.
252 |                 parts['first'] = p0[:firstl]
253 |                 parts['von'] = p0[firstl:lastl+1]
254 |                 parts['last'] = p0[lastl+1:]
255 | 
256 |             # No lowercase: last is the last word, first is everything else.
257 |             else:
258 |                 parts['first'] = p0[:-1]
259 |                 parts['last'] = p0[-1:]
260 | 
261 |     # Form 2 ("von Last, First") or 3 ("von Last, jr, First")
262 |     else:
263 |         # As long as there is content in the first name partition, use it as-is.
264 |         first = sections[-1]
265 |         if first and first[0]:
266 |             parts['first'] = first
267 | 
268 |         # And again with the jr part.
269 |         if len(sections) == 3:
270 |             jr = sections[-2]
271 |             if jr and jr[0]:
272 |                 parts['jr'] = jr
273 | 
274 |         # Last name cannot be empty; if there is only one word in the first
275 |         # partition, we have to use it for the last name.
276 |         last = sections[0]
277 |         if len(last) == 1:
278 |             parts['last'] = last
279 | 
280 |         # Have to look at the cases to figure it out.
281 |         else:
282 |             lcases = cases[0]
283 | 
284 |             # At least one lowercase: von is the longest sequence of whitespace
285 |             # separated words whose last word does not start with an uppercase
286 |             # word, and last is the rest.
287 |             if 0 in lcases:
288 |                 split = len(lcases) - lcases[::-1].index(0)
289 |                 if split == len(lcases):
290 |                     split = 0            # Last cannot be empty.
291 |                 parts['von'] = sections[0][:split]
292 |                 parts['last'] = sections[0][split:]
293 | 
294 |             # All uppercase => all last.
295 |             else:
296 |                 parts['last'] = sections[0]
297 | 
298 |     # Done.
299 |     return parts
300 | 
301 | 
302 | def getnames(names):
303 |     """Convert people names as surname, firstnames
304 |     or surname, initials.
305 | 
306 |     :param names: a list of names
307 |     :type names: list
308 |     :returns: list -- Correctly formated names
309 | 
310 |     .. Note::
311 | 
312 |     This function is known to be too simple to handle properly
313 |     the complex rules. We would like to enhance this in forthcoming releases.
314 |     """
315 |     tidynames = []
316 |     for namestring in names:
317 |         namestring = namestring.strip()
318 |         if len(namestring) < 1:
319 |             continue
320 |         if ',' in namestring:
321 |             namesplit = namestring.split(',', 1)
322 |             last = namesplit[0].strip()
323 |             firsts = [i.strip() for i in namesplit[1].split()]
324 |         else:
325 |             namesplit = namestring.split()
326 |             last = namesplit.pop()
327 |             firsts = [i.replace('.', '. ').strip() for i in namesplit]
328 |         if last in ['jnr', 'jr', 'junior']:
329 |             last = firsts.pop()
330 |         for item in firsts:
331 |             if item in ['ben', 'van', 'der', 'de', 'la', 'le']:
332 |                 last = firsts.pop() + ' ' + last
333 |         tidynames.append(last + ", " + ' '.join(firsts))
334 |     return tidynames
335 | 
336 | 
337 | def author(record):
338 |     """
339 |     Split author field into a list of "Name, Surname".
340 | 
341 |     :param record: the record.
342 |     :type record: dict
343 |     :returns: dict -- the modified record.
344 | 
345 |     """
346 |     if "author" in record:
347 |         if record["author"]:
348 |             record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
349 |         else:
350 |             del record["author"]
351 |     return record
352 | 
353 | 
354 | def editor(record):
355 |     """
356 |     Turn the editor field into a dict composed of the original editor name
357 |     and a editor id (without coma or blank).
358 | 
359 |     :param record: the record.
360 |     :type record: dict
361 |     :returns: dict -- the modified record.
362 | 
363 |     """
364 |     if "editor" in record:
365 |         if record["editor"]:
366 |             record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
367 |             # convert editor to object
368 |             record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]]
369 |         else:
370 |             del record["editor"]
371 |     return record
372 | 
373 | 
374 | def page_double_hyphen(record):
375 |     """
376 |     Separate pages by a double hyphen (--).
377 | 
378 |     :param record: the record.
379 |     :type record: dict
380 |     :returns: dict -- the modified record.
381 | 
382 |     """
383 |     if "pages" in record:
384 |         # hyphen, non-breaking hyphen, en dash, em dash, hyphen-minus, minus sign
385 |         separators = [u'‐', u'‑', u'–', u'—', u'-', u'−']
386 |         for separator in separators:
387 |             if separator in record["pages"]:
388 |                 p = [i.strip().strip(separator) for i in record["pages"].split(separator)]
389 |                 record["pages"] = p[0] + '--' + p[-1]
390 |     return record
391 | 
392 | 
393 | def type(record):
394 |     """
395 |     Put the type into lower case.
396 | 
397 |     :param record: the record.
398 |     :type record: dict
399 |     :returns: dict -- the modified record.
400 | 
401 |     """
402 |     if "type" in record:
403 |         record["type"] = record["type"].lower()
404 |     return record
405 | 
406 | 
407 | def journal(record):
408 |     """
409 |     Turn the journal field into a dict composed of the original journal name
410 |     and a journal id (without coma or blank).
411 | 
412 |     :param record: the record.
413 |     :type record: dict
414 |     :returns: dict -- the modified record.
415 | 
416 |     """
417 |     if "journal" in record:
418 |         # switch journal to object
419 |         if record["journal"]:
420 |             record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
421 | 
422 |     return record
423 | 
424 | 
425 | def keyword(record, sep=',|;'):
426 |     """
427 |     Split keyword field into a list.
428 | 
429 |     :param record: the record.
430 |     :type record: dict
431 |     :param sep: pattern used for the splitting regexp.
432 |     :type record: string, optional
433 |     :returns: dict -- the modified record.
434 | 
435 |     """
436 |     if "keyword" in record:
437 |         record["keyword"] = [i.strip() for i in re.split(sep, record["keyword"].replace('\n', ''))]
438 | 
439 |     return record
440 | 
441 | 
442 | def link(record):
443 |     """
444 | 
445 |     :param record: the record.
446 |     :type record: dict
447 |     :returns: dict -- the modified record.
448 | 
449 |     """
450 |     if "link" in record:
451 |         links = [i.strip().replace("  ", " ") for i in record["link"].split('\n')]
452 |         record['link'] = []
453 |         for link in links:
454 |             parts = link.split(" ")
455 |             linkobj = {"url": parts[0]}
456 |             if len(parts) > 1:
457 |                 linkobj["anchor"] = parts[1]
458 |             if len(parts) > 2:
459 |                 linkobj["format"] = parts[2]
460 |             if len(linkobj["url"]) > 0:
461 |                 record["link"].append(linkobj)
462 | 
463 |     return record
464 | 
465 | 
466 | def doi(record):
467 |     """
468 | 
469 |     :param record: the record.
470 |     :type record: dict
471 |     :returns: dict -- the modified record.
472 | 
473 |     """
474 |     if 'doi' in record:
475 |         if 'link' not in record:
476 |             record['link'] = []
477 |         nodoi = True
478 |         for item in record['link']:
479 |             if 'doi' in item:
480 |                 nodoi = False
481 |         if nodoi:
482 |             link = record['doi']
483 |             if link.startswith('10'):
484 |                 link = 'http://dx.doi.org/' + link
485 |             record['link'].append({"url": link, "anchor": "doi"})
486 |     return record
487 | 
488 | 
489 | def convert_to_unicode(record):
490 |     """
491 |     Convert accent from latex to unicode style.
492 | 
493 |     :param record: the record.
494 |     :type record: dict
495 |     :returns: dict -- the modified record.
496 |     """
497 |     for val in record:
498 |         if '\\' in record[val] or '{' in record[val]:
499 |             for k, v in itertools.chain(unicode_to_crappy_latex1, unicode_to_latex):
500 |                 if v in record[val]:
501 |                     record[val] = record[val].replace(v, k)
502 | 
503 |         # If there is still very crappy items
504 |         if '\\' in record[val]:
505 |             for k, v in unicode_to_crappy_latex2:
506 |                 if v in record[val]:
507 |                     parts = record[val].split(str(v))
508 |                     for key, record[val] in enumerate(parts):
509 |                         if key+1 < len(parts) and len(parts[key+1]) > 0:
510 |                             # Change order to display accents
511 |                             parts[key] = parts[key] + parts[key+1][0]
512 |                             parts[key+1] = parts[key+1][1:]
513 |                     record[val] = k.join(parts)
514 |     return record
515 | 
516 | 
517 | def homogenize_latex_encoding(record):
518 |     """
519 |     Homogenize the latex enconding style for bibtex
520 | 
521 |     This function is experimental.
522 | 
523 |     :param record: the record.
524 |     :type record: dict
525 |     :returns: dict -- the modified record.
526 |     """
527 |     # First, we convert everything to unicode
528 |     record = convert_to_unicode(record)
529 |     # And then, we fall back
530 |     for val in record:
531 |         if val not in ('ID',):
532 |             logger.debug('Apply string_to_latex to: %s', val)
533 |             record[val] = string_to_latex(record[val])
534 |             if val == 'title':
535 |                 logger.debug('Protect uppercase in title')
536 |                 logger.debug('Before: %s', record[val])
537 |                 record[val] = protect_uppercase(record[val])
538 |                 logger.debug('After: %s', record[val])
539 |     return record
540 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bparser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import unicode_literals
  5 | import unittest
  6 | import codecs
  7 | 
  8 | from bibtexparser.bparser import BibTexParser
  9 | from bibtexparser.customization import *
 10 | from bibtexparser import customization
 11 | 
 12 | 
 13 | def customizations_unicode(record):
 14 |     """Use all functions related to specific fields
 15 |     + converter to unicode.
 16 | 
 17 |     :param record: a record
 18 |     :returns: -- customized record
 19 |     """
 20 | 
 21 |     record = type(record)
 22 |     record = author(record)
 23 |     record = editor(record)
 24 |     record = journal(record)
 25 |     record = keyword(record)
 26 |     record = link(record)
 27 |     record = page_double_hyphen(record)
 28 |     record = doi(record)
 29 |     record = convert_to_unicode(record)
 30 |     return record
 31 | 
 32 | 
 33 | def customizations_latex(record):
 34 |     """Use all functions related to specific fields
 35 |     + converter to latex.
 36 | 
 37 |     :param record: a record
 38 |     :returns: -- customized record
 39 |     """
 40 | 
 41 |     record = homogenize_latex_encoding(record)
 42 |     record = type(record)
 43 |     record = author(record)
 44 |     record = editor(record)
 45 |     record = journal(record)
 46 |     record = keyword(record)
 47 |     record = link(record)
 48 |     record = page_double_hyphen(record)
 49 |     record = doi(record)
 50 |     return record
 51 | 
 52 | 
 53 | class TestBibtexParserList(unittest.TestCase):
 54 | 
 55 |     def test_wrong(self):
 56 |         """
 57 |         Wrong entry type
 58 |         """
 59 |         with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile:
 60 |             self.assetRaises(TypeError, BibTexParser, bibfile)
 61 | 
 62 |     ###########
 63 |     # ARTICLE
 64 |     ###########
 65 |     # test also that list and dict are equivalent
 66 |     def test_article(self):
 67 |         with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile:
 68 |             bib = BibTexParser(bibfile.read())
 69 |             res_list = bib.get_entry_list()
 70 |             res_dict = bib.get_entry_dict()
 71 |             expected_list = [{'keyword': 'keyword1, keyword2',
 72 |                               'ENTRYTYPE': 'article',
 73 |                               'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
 74 |                               'year': '2013',
 75 |                               'journal': 'Nice Journal',
 76 |                               'ID': 'Cesar2013',
 77 |                               'pages': '12-23',
 78 |                               'title': 'An amazing title',
 79 |                               'comments': 'A comment',
 80 |                               'author': 'Jean César',
 81 |                               'volume': '12',
 82 |                               'month': 'jan'
 83 |                               }]
 84 |             expected_dict = {'Cesar2013': {'keyword': 'keyword1, keyword2',
 85 |                               'ENTRYTYPE': 'article',
 86 |                               'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
 87 |                               'year': '2013',
 88 |                               'journal': 'Nice Journal',
 89 |                               'ID': 'Cesar2013',
 90 |                               'pages': '12-23',
 91 |                               'title': 'An amazing title',
 92 |                               'comments': 'A comment',
 93 |                               'author': 'Jean César',
 94 |                               'volume': '12',
 95 |                               'month': 'jan'
 96 |                               }}
 97 |         self.assertEqual(res_list, expected_list)
 98 |         self.assertEqual(res_dict, expected_dict)
 99 | 
100 |     def test_article_start_bom(self):
101 |         with codecs.open('bibtexparser/tests/data/article_start_with_bom.bib', 'r', 'utf-8') as bibfile:
102 |             bib = BibTexParser(bibfile.read())
103 |             res = bib.get_entry_list()
104 |         expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
105 |                      'ENTRYTYPE': 'article',
106 |                      'pages': '12-23',
107 |                      'volume': '12',
108 |                      'ID': 'Cesar2013',
109 |                      'year': '2013',
110 |                      'author': 'Jean César',
111 |                      'journal': 'Nice Journal',
112 |                      'comments': 'A comment',
113 |                      'month': 'jan',
114 |                      'keyword': 'keyword1, keyword2',
115 |                      'title': 'An amazing title'
116 |                      }]
117 |         self.assertEqual(res, expected)
118 | 
119 |     def test_article_cust_unicode(self):
120 |         with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile:
121 |             bib = BibTexParser(bibfile.read(), customization=customizations_unicode)
122 |             res = bib.get_entry_list()
123 |         expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
124 |                      'ENTRYTYPE': 'article',
125 |                      'pages': '12--23',
126 |                      'volume': '12',
127 |                      'ID': 'Cesar2013',
128 |                      'year': '2013',
129 |                      'author': ['César, Jean'],
130 |                      'journal': {'ID': 'NiceJournal', 'name': 'Nice Journal'},
131 |                      'comments': 'A comment',
132 |                      'month': 'jan',
133 |                      'keyword': ['keyword1', 'keyword2'],
134 |                      'title': 'An amazing title'
135 |                      }]
136 |         self.assertEqual(res, expected)
137 | 
138 |     def test_article_cust_latex(self):
139 |         with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile:
140 |             bib = BibTexParser(bibfile.read(), customization=customizations_latex)
141 |             res = bib.get_entry_list()
142 |         expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french {\\\'e}rudit word',
143 |                      'ENTRYTYPE': 'article',
144 |                      'pages': '12--23',
145 |                      'volume': '12',
146 |                      'ID': 'Cesar2013',
147 |                      'year': '2013',
148 |                      'author': ['C{\\\'e}sar, Jean'],
149 |                      'journal': {'ID': 'NiceJournal', 'name': 'Nice Journal'},
150 |                      'comments': 'A comment',
151 |                      'month': 'jan',
152 |                      'keyword': ['keyword1', 'keyword2'],
153 |                      'title': '{A}n amazing title'
154 |                      }]
155 |         self.assertEqual(res, expected)
156 | 
157 |     def test_article_cust_order(self):
158 |         def cust(record):
159 |             record = customization.page_double_hyphen(record)
160 |             record = customization.homogenize_latex_encoding(record)
161 |             record = customization.author(record)
162 |             return record
163 | 
164 |         def cust2(record):
165 |             record = customization.author(record)
166 |             record = customization.page_double_hyphen(record)
167 |             record = customization.homogenize_latex_encoding(record)
168 |             return record
169 | 
170 |         with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile:
171 |             bib = BibTexParser(bibfile.read(), customization=cust)
172 |             res = bib.get_entry_list()
173 |         with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile:
174 |             bib2 = BibTexParser(bibfile.read(), customization=cust2)
175 |             res2 = bib.get_entry_list()
176 |         self.assertEqual(res, res2)
177 | 
178 |     def test_article_missing_coma(self):
179 |         with open('bibtexparser/tests/data/article_missing_coma.bib', 'r') as bibfile:
180 |             bib = BibTexParser(bibfile.read())
181 |             res = bib.get_entry_list()
182 |         expected = [{'ENTRYTYPE': 'article',
183 |                      'journal': 'Nice Journal',
184 |                      'volume': '12',
185 |                      'ID': 'Cesar2013',
186 |                      'year': '2013',
187 |                      'author': 'Jean Cesar',
188 |                      'comments': 'A comment',
189 |                      'keyword': 'keyword1, keyword2',
190 |                      'title': 'An amazing title'
191 |                      },
192 |                     {'ENTRYTYPE': 'article',
193 |                      'journal': 'Nice Journal',
194 |                      'volume': '12',
195 |                      'ID': 'Baltazar2013',
196 |                      'year': '2013',
197 |                      'author': 'Jean Baltazar',
198 |                      'comments': 'A comment',
199 |                      'keyword': 'keyword1, keyword2',
200 |                      'title': 'An amazing title'
201 |                      },
202 |                     {'ENTRYTYPE': 'article',
203 |                      'journal': 'Nice Journal',
204 |                      'volume': '12',
205 |                      'ID': 'Aimar2013',
206 |                      'year': '2013',
207 |                      'author': 'Jean Aimar',
208 |                      'comments': 'A comment',
209 |                      'keyword': 'keyword1, keyword2',
210 |                      'title': 'An amazing title',
211 |                      'month': 'january'
212 |                      },
213 |                     {'ENTRYTYPE': 'article',
214 |                      'journal': 'Nice Journal',
215 |                      'volume': '12',
216 |                      'ID': 'Doute2013',
217 |                      'year': '2013',
218 |                      'author': 'Jean Doute',
219 |                      'comments': 'A comment',
220 |                      'keyword': 'keyword1, keyword2',
221 |                      'title': 'An amazing title'
222 |                      }]
223 |         self.assertEqual(res, expected)
224 | 
225 |     def test_oneline(self):
226 |         with open('bibtexparser/tests/data/article_oneline.bib', 'r') as bibfile:
227 |             bib = BibTexParser(bibfile.read())
228 |             res = bib.get_entry_list()
229 |         expected = [{'ENTRYTYPE': 'article',
230 |                      'journal': 'Nice Journal',
231 |                      'volume': '12',
232 |                      'ID': 'Cesar2013',
233 |                      'year': '2013',
234 |                      'author': 'Jean Cesar',
235 |                      'comments': 'A comment',
236 |                      'keyword': 'keyword1, keyword2',
237 |                      'title': 'An amazing title'
238 |                      },
239 |                     {'ENTRYTYPE': 'article',
240 |                      'journal': 'Nice Journal',
241 |                      'volume': '12',
242 |                      'ID': 'Baltazar2013',
243 |                      'year': '2013',
244 |                      'author': 'Jean Baltazar',
245 |                      'comments': 'A comment',
246 |                      'keyword': 'keyword1, keyword2',
247 |                      'title': 'An amazing title'
248 |                      }]
249 |         self.assertEqual(res, expected)
250 | 
251 | 
252 |     def test_article_start_with_whitespace(self):
253 |         with open('bibtexparser/tests/data/article_start_with_whitespace.bib', 'r') as bibfile:
254 |             bib = BibTexParser(bibfile.read())
255 |             self.assertEqual(len(bib.get_entry_list()), 2)
256 | 
257 |     def test_article_comma_first(self):
258 |         with open('bibtexparser/tests/data/article_comma_first.bib', 'r') as bibfile:
259 |             bib = BibTexParser(bibfile.read())
260 |             res = bib.get_entry_list()
261 |         expected = [{'ENTRYTYPE': 'article',
262 |                      'journal': 'Nice Journal',
263 |                      'volume': '12',
264 |                      'ID': 'Cesar2013',
265 |                      'year': '2013',
266 |                      'author': 'Jean Cesar',
267 |                      'comments': 'A comment',
268 |                      'keyword': 'keyword1, keyword2',
269 |                      'title': 'An amazing title'
270 |                      },
271 |                     {'ENTRYTYPE': 'article',
272 |                      'journal': 'Nice Journal',
273 |                      'volume': '12',
274 |                      'ID': 'Baltazar2013',
275 |                      'year': '2013',
276 |                      'author': 'Jean Baltazar',
277 |                      'comments': 'A comment',
278 |                      'keyword': 'keyword1, keyword2',
279 |                      'title': 'An amazing title'
280 |                      }]
281 |         self.assertEqual(res, expected)
282 | 
283 |     def test_article_no_braces(self):
284 |         with open('bibtexparser/tests/data/article_no_braces.bib', 'r') as bibfile:
285 |             bib = BibTexParser(bibfile.read())
286 |             res = bib.get_entry_list()
287 |         expected = [{'ENTRYTYPE': 'article',
288 |                      'journal': 'Nice Journal',
289 |                      'volume': '12',
290 |                      'pages': '12-23',
291 |                      'ID': 'Cesar2013',
292 |                      'year': '2013',
293 |                      'month': 'jan',
294 |                      'author': 'Jean C{\\\'e}sar{\\\"u}',
295 |                      'comments': 'A comment',
296 |                      'keyword': 'keyword1, keyword2',
297 |                      'title': 'An amazing title',
298 |                      'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word",
299 |                      },
300 |                      ]
301 |         self.assertEqual(res, expected)
302 | 
303 |     def test_article_special_characters(self):
304 |         with open('bibtexparser/tests/data/article_with_special_characters.bib', 'r') as bibfile:
305 |             bib = BibTexParser(bibfile.read())
306 |             res = bib.get_entry_list()
307 |         expected = [{'ENTRYTYPE': 'article',
308 |                      'journal': 'Nice Journal',
309 |                      'volume': '12',
310 |                      'pages': '12-23',
311 |                      'ID': 'Cesar2013',
312 |                      'year': '2013',
313 |                      'month': 'jan',
314 |                      'author': 'Jean C{\\\'e}sar{\\\"u}',
315 |                      'comments': 'A comment',
316 |                      'keyword': 'keyword1, keyword2',
317 |                      'title': 'An amazing title',
318 |                      'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word",
319 |                      },
320 |                      ]
321 |         self.assertEqual(res, expected)
322 | 
323 |     def test_article_protection_braces(self):
324 |         with open('bibtexparser/tests/data/article_with_protection_braces.bib', 'r') as bibfile:
325 |             bib = BibTexParser(bibfile.read())
326 |             res = bib.get_entry_list()
327 |         expected = [{'ENTRYTYPE': 'article',
328 |                      'journal': '{Nice Journal}',
329 |                      'volume': '12',
330 |                      'pages': '12-23',
331 |                      'ID': 'Cesar2013',
332 |                      'year': '2013',
333 |                      'month': 'jan',
334 |                      'author': 'Jean César',
335 |                      'comments': 'A comment',
336 |                      'keyword': 'keyword1, keyword2',
337 |                      'title': '{An amazing title}',
338 |                      'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word",
339 |                      },
340 |                      ]
341 |         self.assertEqual(res, expected)
342 | 
343 | 
344 |     ###########
345 |     # BOOK
346 |     ###########
347 |     def test_book(self):
348 |         with open('bibtexparser/tests/data/book.bib', 'r') as bibfile:
349 |             bib = BibTexParser(bibfile.read())
350 |             res = bib.get_entry_list()
351 |             expected = [{'ENTRYTYPE': 'book',
352 |                          'year': '1987',
353 |                          'edition': '2',
354 |                          'publisher': 'Wiley Edition',
355 |                          'ID': 'Bird1987',
356 |                          'volume': '1',
357 |                          'title': 'Dynamics of Polymeric Liquid',
358 |                          'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'
359 |                          }]
360 | 
361 |         self.assertEqual(res, expected)
362 | 
363 |     def test_book_cust_unicode(self):
364 |         with open('bibtexparser/tests/data/book.bib', 'r') as bibfile:
365 |             bib = BibTexParser(bibfile.read(), customization=customizations_unicode)
366 |             res = bib.get_entry_list()
367 |             expected = [{'ENTRYTYPE': 'book',
368 |                          'year': '1987',
369 |                          'edition': '2',
370 |                          'publisher': 'Wiley Edition',
371 |                          'ID': 'Bird1987',
372 |                          'volume': '1',
373 |                          'title': 'Dynamics of Polymeric Liquid',
374 |                          'author': ['Bird, R.B.', 'Armstrong, R.C.', 'Hassager, O.']
375 |                          }]
376 | 
377 |         self.assertEqual(res, expected)
378 | 
379 |     def test_book_cust_latex(self):
380 |         with open('bibtexparser/tests/data/book.bib', 'r') as bibfile:
381 |             bib = BibTexParser(bibfile.read(), customization=customizations_latex)
382 |             res = bib.get_entry_list()
383 |             expected = [{'ENTRYTYPE': 'book',
384 |                          'year': '1987',
385 |                          'edition': '2',
386 |                          'publisher': 'Wiley Edition',
387 |                          'ID': 'Bird1987',
388 |                          'volume': '1',
389 |                          'title': '{D}ynamics of {P}olymeric {L}iquid',
390 |                          'author': ['Bird, R.B.', 'Armstrong, R.C.', 'Hassager, O.']
391 |                          }]
392 | 
393 |         self.assertEqual(res, expected)
394 | 
395 |     ###########
396 |     # TRAPS
397 |     ###########
398 |     def test_traps(self):
399 |         with codecs.open('bibtexparser/tests/data/traps.bib', 'r', 'utf-8') as bibfile:
400 |             bib = BibTexParser(bibfile.read())
401 |             res = bib.get_entry_list()
402 |             expected = [{'keywords': 'keyword1, keyword2',
403 |                          'ENTRYTYPE': 'article',
404 |                          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
405 |                          'year': '2013',
406 |                          'journal': 'Nice Journal',
407 |                          'ID': 'Laide2013',
408 |                          'pages': '12-23',
409 |                          'title': '{An} amazing {title}',
410 |                          'comments': 'A comment',
411 |                          'author': 'Jean Laid{\\\'e},\nBen Loaeb',
412 |                          'volume': 'n.s.~2',
413 |                          'month': 'jan'
414 |                          }]
415 |         self.assertEqual(res, expected)
416 | 
417 |     ###########
418 |     # FEATURES
419 |     ###########
420 |     def test_features(self):
421 |         with open('bibtexparser/tests/data/features.bib', 'r') as bibfile:
422 |             bib = BibTexParser(bibfile.read())
423 |             res = bib.get_entry_list()
424 |             expected = [{'ENTRYTYPE': 'inproceedings',
425 |                          'year': '2014',
426 |                          'title': 'Cool Stuff',
427 |                          'author': 'John',
428 |                          'ID': 'mykey',
429 |                          'booktitle': 'My International Conference',
430 |                          }]
431 |         self.assertEqual(res, expected)
432 | 
433 |     def test_features2(self):
434 |         with open('bibtexparser/tests/data/features2.bib', 'r') as bibfile:
435 |             bib = BibTexParser(bibfile.read())
436 |             res = bib.get_entry_list()
437 |             expected = [{'ENTRYTYPE': 'inproceedings',
438 |                          'year': '2014',
439 |                          'title': 'Cool Stuff',
440 |                          'author': 'John Doe',
441 |                          'ID': 'mykey',
442 |                          'booktitle': 'My International Conference',
443 |                          'note': 'Email: John.Doe@example.com',
444 |                          'pages': '1--10',
445 |                          }]
446 |         self.assertEqual(res, expected)
447 | 
448 |     ###########
449 |     # WRONG
450 |     ###########
451 |     def test_wrong(self):
452 |         with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile:
453 |             bib = BibTexParser(bibfile.read())
454 |             res = bib.get_entry_list()
455 |             expected = [{'author': 'correct',
456 |                          'ID': 'bar',
457 |                          'ENTRYTYPE': 'article'}]
458 |         self.assertEqual(res, expected)
459 | 
460 |     ###########
461 |     # ENCODING
462 |     ###########
463 |     def test_encoding(self):
464 |         with codecs.open('bibtexparser/tests/data/encoding.bib', 'r', 'utf-8') as bibfile:
465 |             bib = BibTexParser(bibfile.read())
466 |             res = bib.get_entry_list()
467 |             expected = [{'keywords': 'keyword1, keyword2',
468 |                          'ENTRYTYPE': 'article',
469 |                          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word',
470 |                          'year': '2013',
471 |                          'journal': 'Elémentaire',
472 |                          'ID': 'Cesar_2013',
473 |                          'pages': '12-23',
474 |                          'title': 'An amazing title: à',
475 |                          'comments': 'A comment',
476 |                          'author': 'Jean César',
477 |                          'volume': '12',
478 |                          'month': 'jan'
479 |                          }]
480 |         self.assertEqual(res, expected)
481 | 
482 |     def test_encoding_with_homogenize(self):
483 |         with codecs.open('bibtexparser/tests/data/encoding.bib', 'r', 'utf-8') as bibfile:
484 |             bib = BibTexParser(bibfile.read(), customization=homogenize_latex_encoding)
485 |             res = bib.get_entry_list()
486 |             expected = [{'keywords': 'keyword1, keyword2',
487 |                          'ENTRYTYPE': 'article',
488 |                          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french {\\\'e}rudit word',
489 |                          'year': '2013',
490 |                          'journal': 'El{\\\'e}mentaire',
491 |                          'ID': 'Cesar_2013',
492 |                          'pages': '12-23',
493 |                          'title': '{A}n amazing title: {\\`a}',
494 |                          'comments': 'A comment',
495 |                          'author': 'Jean C{\\\'e}sar',
496 |                          'volume': '12',
497 |                          'month': 'jan'
498 |                          }]
499 |         self.assertEqual(res, expected)
500 | 
501 |     def test_field_name_with_dash_underscore(self):
502 |         with open('bibtexparser/tests/data/article_field_name_with_underscore.bib', 'r') as bibfile:
503 |             bib = BibTexParser(bibfile.read())
504 |         res = bib.get_entry_list()
505 |         expected = [{
506 |             'keyword': 'keyword1, keyword2',
507 |             'ENTRYTYPE': 'article',
508 |             'year': '2013',
509 |             'journal': 'Nice Journal',
510 |             'ID': 'Cesar2013',
511 |             'pages': '12-23',
512 |             'title': 'An amazing title',
513 |             'comments': 'A comment',
514 |             'author': 'Jean César',
515 |             'volume': '12',
516 |             'strange_field_name': 'val',
517 |             'strange-field-name2': 'val2',
518 |             }]
519 |         self.assertEqual(res, expected)
520 | 
521 | if __name__ == '__main__':
522 |     unittest.main()
523 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_splitname.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import unittest
  5 | 
  6 | from bibtexparser.customization import InvalidName, splitname
  7 | 
  8 | class TestSplitnameMethod(unittest.TestCase):
  9 |     def test_splitname_basic(self):
 10 |         """Basic tests of customization.splitname() """
 11 |         # Empty input.
 12 |         result = splitname("")
 13 |         expected = {}
 14 |         self.assertEqual(result, expected, msg="Invalid output for empty name")
 15 | 
 16 |         # Non-whitespace names.
 17 |         result = splitname("    ")
 18 |         expected = {}
 19 |         self.assertEqual(result, expected, msg="Invalid output for space-only name")
 20 |         result = splitname("  \t~~")
 21 |         expected = {}
 22 |         self.assertEqual(result, expected, msg="Invalid output for whitespace name")
 23 | 
 24 |         # Test strict mode.
 25 |         with self.assertRaises(InvalidName):         # Trailing comma (4 cases).
 26 |             splitname("BB,", strict_mode=True)
 27 |         with self.assertRaises(InvalidName):
 28 |             splitname("BB,  ", strict_mode=True)
 29 |         with self.assertRaises(InvalidName):
 30 |             splitname("BB, ~\t", strict_mode=True)
 31 |         with self.assertRaises(InvalidName):
 32 |             splitname(", ~\t", strict_mode=True)
 33 |         with self.assertRaises(InvalidName):         # Too many sections.
 34 |             splitname("AA, BB, CC, DD", strict_mode=True)
 35 |         with self.assertRaises(InvalidName):         # Unterminated opening brace (x3).
 36 |             splitname("AA {BB CC", strict_mode=True)
 37 |         with self.assertRaises(InvalidName):
 38 |             splitname("AA {{{BB CC", strict_mode=True)
 39 |         with self.assertRaises(InvalidName):
 40 |             splitname("AA {{{BB} CC}", strict_mode=True)
 41 |         with self.assertRaises(InvalidName):         # Unmatched closing brace (x3).
 42 |             splitname("AA BB CC}", strict_mode=True)
 43 |         with self.assertRaises(InvalidName):
 44 |             splitname("AA BB CC}}}", strict_mode=True)
 45 |         with self.assertRaises(InvalidName):
 46 |             splitname("{AA {BB CC}}}", strict_mode=True)
 47 | 
 48 |         # Test strict mode off for trailing comma.
 49 |         expected = {'first': [], 'von': [], 'last': ["BB"], 'jr': []}
 50 |         result = splitname("BB,", strict_mode=False)
 51 |         self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off")
 52 |         result = splitname("BB,   ", strict_mode=False)
 53 |         self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off")
 54 |         result = splitname("BB,  ~\t ", strict_mode=False)
 55 |         self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off")
 56 |         expected = {}
 57 |         result = splitname(",  ~\t", strict_mode=False)
 58 |         self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off")
 59 | 
 60 |         # Test strict mode off for too many sections.
 61 |         expected = {'first': ["CC", "DD"], 'von': [], 'last': ["AA"], 'jr': ["BB"]}
 62 |         result = splitname("AA, BB, CC, DD", strict_mode=False)
 63 |         self.assertEqual(result, expected, msg="Invalid output for too many sections with strict mode off")
 64 | 
 65 |         # Test strict mode off for an unterminated opening brace.
 66 |         result = splitname("AA {BB CC", strict_mode=False)
 67 |         expected = {'first': ["AA"], 'von': [], 'last': ["{BB CC}"], 'jr': []}
 68 |         self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off")
 69 |         result = splitname("AA {{{BB CC", strict_mode=False)
 70 |         expected = {'first': ["AA"], 'von': [], 'last': ["{{{BB CC}}}"], 'jr': []}
 71 |         self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off")
 72 |         result = splitname("AA {{{BB} CC}", strict_mode=False)
 73 |         expected = {'first': ["AA"], 'von': [], 'last': ["{{{BB} CC}}"], 'jr': []}
 74 |         self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off")
 75 | 
 76 |         # Test strict mode off for an unmatched closing brace.
 77 |         result = splitname("AA BB CC}", strict_mode=False)
 78 |         expected = {'first': ["AA", "BB"], 'von': [], 'last': ["{CC}"], 'jr': []}
 79 |         self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off")
 80 |         result = splitname("AA BB CC}}}", strict_mode=False)
 81 |         expected = {'first': ["AA", "BB"], 'von': [], 'last': ["{{{CC}}}"], 'jr': []}
 82 |         self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off")
 83 |         result = splitname("{AA {BB CC}}}", strict_mode=False)
 84 |         expected = {'first': [], 'von': [], 'last': ["{{AA {BB CC}}}"], 'jr': []}
 85 |         self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off")
 86 | 
 87 |         # Test it handles commas at higher brace levels.
 88 |         result = splitname("CC, dd, {AA, BB}")
 89 |         expected = {'first': ["{AA, BB}"], 'von': [], 'last': ["CC"], 'jr': ["dd"]}
 90 |         self.assertEqual(result, expected, msg="Invalid output for braced commas")
 91 | 
 92 | 
 93 |     def test_splitname_cases(self):
 94 |         """Test customization.splitname() vs output from BibTeX """
 95 |         for name, expected in splitname_test_cases:
 96 |             result = splitname(name)
 97 |             self.assertEqual(result, expected, msg="Input name: {0}".format(name))
 98 | 
 99 | 
100 | splitname_test_cases = (
101 |     (r'Per Brinch Hansen',
102 |      {'first': ['Per', 'Brinch'], 'von': [], 'last': ['Hansen'], 'jr': []}),
103 | 
104 |     (r'Brinch Hansen, Per',
105 |      {'first': ['Per'], 'von': [], 'last': ['Brinch', 'Hansen'], 'jr': []}),
106 | 
107 |     (r'Brinch Hansen,, Per',
108 |      {'first': ['Per'], 'von': [], 'last': ['Brinch', 'Hansen'], 'jr': []}),
109 | 
110 |     (r"Charles Louis Xavier Joseph de la Vall{\'e}e Poussin",
111 |      {'first': ['Charles', 'Louis', 'Xavier', 'Joseph'], 'von': ['de', 'la'],
112 |       'last':  [r'Vall{\'e}e', 'Poussin'], 'jr': []}),
113 | 
114 |     (r'D[onald] E. Knuth',
115 |      {'first': ['D[onald]', 'E.'], 'von': [], 'last': ['Knuth'], 'jr': []}),
116 | 
117 |     (r'A. {Delgado de Molina}',
118 |      {'first': ['A.'], 'von': [], 'last': ['{Delgado de Molina}'], 'jr': []}),
119 | 
120 |     (r"M. Vign{\'e}",
121 |      {'first': ['M.'], 'von': [], 'last': [r"Vign{\'e}"], 'jr': []}),
122 | 
123 | ###############################################################################
124 | #
125 | # Test cases from
126 | # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html
127 | #
128 | ###############################################################################
129 | 
130 |     (r'AA BB',
131 |      {'first': ['AA'], 'von': [], 'last': ['BB'], 'jr': []}),
132 | 
133 |     (r'AA',
134 |      {'first': [], 'von': [], 'last': ['AA'], 'jr': []}),
135 | 
136 |     (r'AA bb',
137 |      {'first': ['AA'], 'von': [], 'last': ['bb'], 'jr': []}),
138 | 
139 |     (r'aa',
140 |      {'first': [], 'von': [], 'last': ['aa'], 'jr': []}),
141 | 
142 |     (r'AA bb CC',
143 |      {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': []}),
144 | 
145 |     (r'AA bb CC dd EE',
146 |      {'first': ['AA'], 'von': ['bb', 'CC', 'dd'], 'last': ['EE'], 'jr': []}),
147 | 
148 |     (r'AA 1B cc dd',
149 |      {'first': ['AA', '1B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}),
150 | 
151 |     (r'AA 1b cc dd',
152 |      {'first': ['AA'], 'von': ['1b', 'cc'], 'last': ['dd'], 'jr': []}),
153 | 
154 |     (r'AA {b}B cc dd',
155 |      {'first': ['AA', '{b}B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}),
156 | 
157 |     (r'AA {b}b cc dd',
158 |      {'first': ['AA'], 'von': ['{b}b', 'cc'], 'last': ['dd'], 'jr': []}),
159 | 
160 |     (r'AA {B}b cc dd',
161 |      {'first': ['AA'], 'von': ['{B}b', 'cc'], 'last': ['dd'], 'jr': []}),
162 | 
163 |     (r'AA {B}B cc dd',
164 |      {'first': ['AA', '{B}B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}),
165 | 
166 |     (r'AA \BB{b} cc dd',
167 |      {'first': ['AA', r'\BB{b}'], 'von': ['cc'], 'last': ['dd'], 'jr': []}),
168 | 
169 |     (r'AA \bb{b} cc dd',
170 |      {'first': ['AA'], 'von': [r'\bb{b}', 'cc'], 'last': ['dd'], 'jr': []}),
171 | 
172 |     (r'AA {bb} cc DD',
173 |      {'first': ['AA', '{bb}'], 'von': ['cc'], 'last': ['DD'], 'jr': []}),
174 | 
175 |     (r'AA bb {cc} DD',
176 |      {'first': ['AA'], 'von': ['bb'], 'last': ['{cc}', 'DD'], 'jr': []}),
177 | 
178 |     (r'AA {bb} CC',
179 |      {'first': ['AA', '{bb}'], 'von': [], 'last': ['CC'], 'jr': []}),
180 | 
181 |     (r'bb CC, AA',
182 |      {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': []}),
183 | 
184 |     (r'bb CC, aa',
185 |      {'first': ['aa'], 'von': ['bb'], 'last': ['CC'], 'jr': []}),
186 | 
187 |     (r'bb CC dd EE, AA',
188 |      {'first': ['AA'], 'von': ['bb', 'CC', 'dd'], 'last': ['EE'], 'jr': []}),
189 | 
190 |     (r'bb, AA',
191 |      {'first': ['AA'], 'von': [], 'last': ['bb'], 'jr': []}),
192 | 
193 |     (r'bb CC,XX, AA',
194 |      {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': ['XX']}),
195 | 
196 |     (r'bb CC,xx, AA',
197 |      {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': ['xx']}),
198 | 
199 |     (r'BB,, AA',
200 |      {'first': ['AA'], 'von': [], 'last': ['BB'], 'jr': []}),
201 | 
202 |     (r"Paul \'Emile Victor",
203 |      {'first': ['Paul', r"\'Emile"], 'von': [], 'last': ['Victor'], 'jr': []}),
204 | 
205 |     (r"Paul {\'E}mile Victor",
206 |      {'first': ['Paul', r"{\'E}mile"], 'von': [], 'last': ['Victor'], 'jr': []}),
207 | 
208 |     (r"Paul \'emile Victor",
209 |      {'first': ['Paul'], 'von': [r"\'emile"], 'last': ['Victor'], 'jr': []}),
210 | 
211 |     (r"Paul {\'e}mile Victor",
212 |      {'first': ['Paul'], 'von': [r"{\'e}mile"], 'last': ['Victor'], 'jr': []}),
213 | 
214 |     (r"Victor, Paul \'Emile",
215 |      {'first': ['Paul', r"\'Emile"], 'von': [], 'last': ['Victor'], 'jr': []}),
216 | 
217 |     (r"Victor, Paul {\'E}mile",
218 |      {'first': ['Paul', r"{\'E}mile"], 'von': [], 'last': ['Victor'], 'jr': []}),
219 | 
220 |     (r"Victor, Paul \'emile",
221 |      {'first': ['Paul', r"\'emile"], 'von': [], 'last': ['Victor'], 'jr': []}),
222 | 
223 |     (r"Victor, Paul {\'e}mile",
224 |      {'first': ['Paul', r"{\'e}mile"], 'von': [], 'last': ['Victor'], 'jr': []}),
225 | 
226 |     (r'Dominique Galouzeau de Villepin',
227 |      {'first': ['Dominique', 'Galouzeau'], 'von': ['de'], 'last': ['Villepin'], 'jr': []}),
228 | 
229 |     (r'Dominique {G}alouzeau de Villepin',
230 |      {'first': ['Dominique'], 'von': ['{G}alouzeau', 'de'],
231 |       'last': ['Villepin'], 'jr': []}),
232 | 
233 |     (r'Galouzeau de Villepin, Dominique',
234 |      {'first': ['Dominique'], 'von': ['Galouzeau', 'de'],
235 |       'last': ['Villepin'], 'jr': []}),
236 | 
237 | ###############################################################################
238 | #
239 | # Test cases from pybtex
240 | # See file /pybtex/tests/parse_name_test.py in the pybtex source.
241 | #
242 | ###############################################################################
243 | 
244 |     (r'A. E.                   Siegman',
245 |      {'first': ['A.', 'E.'], 'von': [], 'last': ['Siegman'], 'jr': []}),
246 | 
247 |     (r'A. G. W. Cameron',
248 |      {'first': ['A.', 'G.', 'W.'], 'von': [], 'last': ['Cameron'], 'jr': []}),
249 | 
250 |     (r'A. Hoenig',
251 |      {'first': ['A.'], 'von': [], 'last': ['Hoenig'], 'jr': []}),
252 | 
253 |     (r'A. J. Van Haagen',
254 |      {'first': ['A.', 'J.', 'Van'], 'von': [], 'last': ['Haagen'], 'jr': []}),
255 | 
256 |     (r'A. S. Berdnikov',
257 |      {'first': ['A.', 'S.'], 'von': [], 'last': ['Berdnikov'], 'jr': []}),
258 | 
259 |     (r'A. Trevorrow',
260 |      {'first': ['A.'], 'von': [], 'last': ['Trevorrow'], 'jr': []}),
261 | 
262 |     (r'Adam H. Lewenberg',
263 |      {'first': ['Adam', 'H.'], 'von': [], 'last': ['Lewenberg'], 'jr': []}),
264 | 
265 |     (r'Addison-Wesley Publishing Company',
266 |      {'first': ['Addison-Wesley', 'Publishing'], 'von': [],
267 |       'last': ['Company'], 'jr': []}),
268 | 
269 |     (r'Advogato (Raph Levien)',
270 |      {'first': ['Advogato', '(Raph'], 'von': [], 'last': ['Levien)'], 'jr': []}),
271 | 
272 |     (r'Andrea de Leeuw van Weenen',
273 |      {'first': ['Andrea'], 'von': ['de', 'Leeuw', 'van'], 'last': ['Weenen'], 'jr': []}),
274 | 
275 |     (r'Andreas Geyer-Schulz',
276 |      {'first': ['Andreas'], 'von': [], 'last': ['Geyer-Schulz'], 'jr': []}),
277 | 
278 |     (r'Andr{\'e} Heck',
279 |      {'first': [r'Andr{\'e}'], 'von': [], 'last': ['Heck'], 'jr': []}),
280 | 
281 |     (r'Anne Br{\"u}ggemann-Klein',
282 |      {'first': ['Anne'], 'von': [], 'last': [r'Br{\"u}ggemann-Klein'], 'jr': []}),
283 | 
284 |     (r'Anonymous',
285 |      {'first': [], 'von': [], 'last': ['Anonymous'], 'jr': []}),
286 | 
287 |     (r'B. Beeton',
288 |      {'first': ['B.'], 'von': [], 'last': ['Beeton'], 'jr': []}),
289 | 
290 |     (r'B. Hamilton Kelly',
291 |      {'first': ['B.', 'Hamilton'], 'von': [], 'last': ['Kelly'], 'jr': []}),
292 | 
293 |     (r'B. V. Venkata Krishna Sastry',
294 |      {'first': ['B.', 'V.', 'Venkata', 'Krishna'], 'von': [],
295 |       'last': ['Sastry'], 'jr': []}),
296 | 
297 |     (r'Benedict L{\o}fstedt',
298 |      {'first': ['Benedict'], 'von': [], 'last': [r'L{\o}fstedt'], 'jr': []}),
299 | 
300 |     (r'Bogus{\l}aw Jackowski',
301 |      {'first': ['Bogus{\l}aw'], 'von': [], 'last': ['Jackowski'], 'jr': []}),
302 | 
303 |     (r'Christina A. L.\ Thiele',
304 |      {'first': ['Christina', 'A.', 'L.\\'], 'von': [],
305 |       'last': ['Thiele'], 'jr': []}),
306 | 
307 |     (r"D. Men'shikov",
308 |      {'first': ['D.'], 'von': [], 'last': ["Men'shikov"], 'jr': []}),
309 | 
310 |     (r'Darko \v{Z}ubrini{\'c}',
311 |      {'first': ['Darko'], 'von': [], 'last': [r'\v{Z}ubrini{\'c}'], 'jr': []}),
312 | 
313 |     (r'Dunja Mladeni{\'c}',
314 |      {'first': ['Dunja'], 'von': [], 'last': [r'Mladeni{\'c}'], 'jr': []}),
315 | 
316 |     (r'Edwin V. {Bell, II}',
317 |      {'first': ['Edwin', 'V.'], 'von': [], 'last': ['{Bell, II}'], 'jr': []}),
318 | 
319 |     (r'Frank G. {Bennett, Jr.}',
320 |      {'first': ['Frank', 'G.'], 'von': [], 'last': ['{Bennett, Jr.}'], 'jr': []}),
321 | 
322 |     (r'Fr{\'e}d{\'e}ric Boulanger',
323 |      {'first': [r'Fr{\'e}d{\'e}ric'], 'von': [], 'last': ['Boulanger'], 'jr': []}),
324 | 
325 |     (r'Ford, Jr., Henry',
326 |      {'first': ['Henry'], 'von': [], 'last': ['Ford'], 'jr': ['Jr.']}),
327 | 
328 |     (r'mr Ford, Jr., Henry',
329 |      {'first': ['Henry'], 'von': ['mr'], 'last': ['Ford'], 'jr': ['Jr.']}),
330 | 
331 |     (r'Fukui Rei',
332 |      {'first': ['Fukui'], 'von': [], 'last': ['Rei'], 'jr': []}),
333 | 
334 |     (r'G. Gr{\"a}tzer',
335 |      {'first': ['G.'], 'von': [], 'last': [r'Gr{\"a}tzer'], 'jr': []}),
336 | 
337 |     (r'George Gr{\"a}tzer',
338 |      {'first': ['George'], 'von': [], 'last': [r'Gr{\"a}tzer'], 'jr': []}),
339 | 
340 |     (r'Georgia K. M. Tobin',
341 |      {'first': ['Georgia', 'K.', 'M.'], 'von': [], 'last': ['Tobin'], 'jr': []}),
342 | 
343 |     (r'Gilbert van den Dobbelsteen',
344 |      {'first': ['Gilbert'], 'von': ['van', 'den'], 'last': ['Dobbelsteen'], 'jr': []}),
345 | 
346 |     (r'Gy{\"o}ngyi Bujdos{\'o}',
347 |      {'first': [r'Gy{\"o}ngyi'], 'von': [], 'last': [r'Bujdos{\'o}'], 'jr': []}),
348 | 
349 |     (r'Helmut J{\"u}rgensen',
350 |      {'first': ['Helmut'], 'von': [], 'last': [r'J{\"u}rgensen'], 'jr': []}),
351 | 
352 |     (r'Herbert Vo{\ss}',
353 |      {'first': ['Herbert'], 'von': [], 'last': ['Vo{\ss}'], 'jr': []}),
354 | 
355 |     (r"H{\'a}n Th{\^e}\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh",
356 |      {'first': [r'H{\'a}n', r"Th{\^e}\llap{\raise 0.5ex\hbox{\'{\relax}}}"],
357 |       'von': [], 'last': [r"Th{\'a}nh"], 'jr': []}),
358 | 
359 |     (r"H{\`a}n Th\^e\llap{\raise0.5ex\hbox{\'{\relax}}} Th{\`a}nh",
360 |      {'first': [r'H{\`a}n', r"Th\^e\llap{\raise0.5ex\hbox{\'{\relax}}}"],
361 |       'von': [], 'last': [r"Th{\`a}nh"], 'jr': []}),
362 | 
363 |     (r'J. Vesel{\'y}',
364 |      {'first': ['J.'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}),
365 | 
366 |     (r'Javier Rodr\'{\i}guez Laguna',
367 |      {'first': ['Javier', r'Rodr\'{\i}guez'], 'von': [], 'last': ['Laguna'], 'jr': []}),
368 | 
369 |     (r'Ji\v{r}\'{\i} Vesel{\'y}',
370 |      {'first': [r'Ji\v{r}\'{\i}'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}),
371 | 
372 |     (r'Ji\v{r}\'{\i} Zlatu{\v{s}}ka',
373 |      {'first': [r'Ji\v{r}\'{\i}'], 'von': [], 'last': [r'Zlatu{\v{s}}ka'], 'jr': []}),
374 | 
375 |     (r'Ji\v{r}{\'\i} Vesel{\'y}',
376 |      {'first': [r'Ji\v{r}{\'\i}'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}),
377 | 
378 |     (r'Ji\v{r}{\'{\i}}Zlatu{\v{s}}ka',
379 |      {'first': [], 'von': [], 'last': [r'Ji\v{r}{\'{\i}}Zlatu{\v{s}}ka'], 'jr': []}),
380 | 
381 |     (r'Jim Hef{}feron',
382 |      {'first': ['Jim'], 'von': [], 'last': ['Hef{}feron'], 'jr': []}),
383 | 
384 |     (r'J{\"o}rg Knappen',
385 |      {'first': [r'J{\"o}rg'], 'von': [], 'last': ['Knappen'], 'jr': []}),
386 | 
387 |     (r'J{\"o}rgen L. Pind',
388 |      {'first': [r'J{\"o}rgen', 'L.'], 'von': [], 'last': ['Pind'], 'jr': []}),
389 | 
390 |     (r'J{\'e}r\^ome Laurens',
391 |      {'first': [r'J{\'e}r\^ome'], 'von': [], 'last': ['Laurens'], 'jr': []}),
392 | 
393 |     (r'J{{\"o}}rg Knappen',
394 |      {'first': [r'J{{\"o}}rg'], 'von': [], 'last': ['Knappen'], 'jr': []}),
395 | 
396 |     (r'K. Anil Kumar',
397 |      {'first': ['K.', 'Anil'], 'von': [], 'last': ['Kumar'], 'jr': []}),
398 | 
399 |     (r'Karel Hor{\'a}k',
400 |      {'first': ['Karel'], 'von': [], 'last': [r'Hor{\'a}k'], 'jr': []}),
401 | 
402 |     (r'Karel P\'{\i}{\v{s}}ka',
403 |      {'first': ['Karel'], 'von': [], 'last': [r'P\'{\i}{\v{s}}ka'], 'jr': []}),
404 | 
405 |     (r'Karel P{\'\i}{\v{s}}ka',
406 |      {'first': ['Karel'], 'von': [], 'last': [r'P{\'\i}{\v{s}}ka'], 'jr': []}),
407 | 
408 |     (r'Karel Skoup\'{y}',
409 |      {'first': ['Karel'], 'von': [], 'last': [r'Skoup\'{y}'], 'jr': []}),
410 | 
411 |     (r'Karel Skoup{\'y}',
412 |      {'first': ['Karel'], 'von': [], 'last': [r'Skoup{\'y}'], 'jr': []}),
413 | 
414 |     (r'Kent McPherson',
415 |      {'first': ['Kent'], 'von': [], 'last': ['McPherson'], 'jr': []}),
416 | 
417 |     (r'Klaus H{\"o}ppner',
418 |      {'first': ['Klaus'], 'von': [], 'last': [r'H{\"o}ppner'], 'jr': []}),
419 | 
420 |     (r'Lars Hellstr{\"o}m',
421 |      {'first': ['Lars'], 'von': [], 'last': [r'Hellstr{\"o}m'], 'jr': []}),
422 | 
423 |     (r'Laura Elizabeth Jackson',
424 |      {'first': ['Laura', 'Elizabeth'], 'von': [], 'last': ['Jackson'], 'jr': []}),
425 | 
426 |     (r'M. D{\'{\i}}az',
427 |      {'first': ['M.'], 'von': [], 'last': [r'D{\'{\i}}az'], 'jr': []}),
428 | 
429 |     (r'M/iche/al /O Searc/oid',
430 |      {'first': [r'M/iche/al', r'/O'], 'von': [], 'last': [r'Searc/oid'], 'jr': []}),
431 | 
432 |     (r'Marek Ry{\'c}ko',
433 |      {'first': ['Marek'], 'von': [], 'last': [r'Ry{\'c}ko'], 'jr': []}),
434 | 
435 |     (r'Marina Yu. Nikulina',
436 |      {'first': ['Marina', 'Yu.'], 'von': [], 'last': ['Nikulina'], 'jr': []}),
437 | 
438 |     (r'Max D{\'{\i}}az',
439 |      {'first': ['Max'], 'von': [], 'last': [r'D{\'{\i}}az'], 'jr': []}),
440 | 
441 |     (r'Merry Obrecht Sawdey',
442 |      {'first': ['Merry', 'Obrecht'], 'von': [], 'last': ['Sawdey'], 'jr': []}),
443 | 
444 |     (r'Miroslava Mis{\'a}kov{\'a}',
445 |      {'first': ['Miroslava'], 'von': [], 'last': [r'Mis{\'a}kov{\'a}'], 'jr': []}),
446 | 
447 |     (r'N. A. F. M. Poppelier',
448 |      {'first': ['N.', 'A.', 'F.', 'M.'], 'von': [], 'last': ['Poppelier'], 'jr': []}),
449 | 
450 |     (r'Nico A. F. M. Poppelier',
451 |      {'first': ['Nico', 'A.', 'F.', 'M.'], 'von': [], 'last': ['Poppelier'], 'jr': []}),
452 | 
453 |     (r'Onofrio de Bari',
454 |      {'first': ['Onofrio'], 'von': ['de'], 'last': ['Bari'], 'jr': []}),
455 | 
456 |     (r'Pablo Rosell-Gonz{\'a}lez',
457 |      {'first': ['Pablo'], 'von': [], 'last': [r'Rosell-Gonz{\'a}lez'], 'jr': []}),
458 | 
459 |     (r'Paco La                  Bruna',
460 |      {'first': ['Paco', 'La'], 'von': [], 'last': ['Bruna'], 'jr': []}),
461 | 
462 |     (r'Paul                  Franchi-Zannettacci',
463 |      {'first': ['Paul'], 'von': [], 'last': ['Franchi-Zannettacci'], 'jr': []}),
464 | 
465 |     (r'Pavel \v{S}eve\v{c}ek',
466 |      {'first': ['Pavel'], 'von': [], 'last': [r'\v{S}eve\v{c}ek'], 'jr': []}),
467 | 
468 |     (r'Petr Ol{\v{s}}ak',
469 |      {'first': ['Petr'], 'von': [], 'last': [r'Ol{\v{s}}ak'], 'jr': []}),
470 | 
471 |     (r'Petr Ol{\v{s}}{\'a}k',
472 |      {'first': ['Petr'], 'von': [], 'last': [r'Ol{\v{s}}{\'a}k'], 'jr': []}),
473 | 
474 |     (r'Primo\v{z} Peterlin',
475 |      {'first': [r'Primo\v{z}'], 'von': [], 'last': ['Peterlin'], 'jr': []}),
476 | 
477 |     (r'Prof. Alban Grimm',
478 |      {'first': ['Prof.', 'Alban'], 'von': [], 'last': ['Grimm'], 'jr': []}),
479 | 
480 |     (r'P{\'e}ter Husz{\'a}r',
481 |      {'first': [r'P{\'e}ter'], 'von': [], 'last': [r'Husz{\'a}r'], 'jr': []}),
482 | 
483 |     (r'P{\'e}ter Szab{\'o}',
484 |      {'first': [r'P{\'e}ter'], 'von': [], 'last': [r'Szab{\'o}'], 'jr': []}),
485 | 
486 |     (r'Rafa{\l}\.Zbikowski',
487 |      {'first': [], 'von': [], 'last': [r'Rafa{\l}\.Zbikowski'], 'jr': []}),
488 | 
489 |     (r'Rainer Sch{\"o}pf',
490 |      {'first': ['Rainer'], 'von': [], 'last': [r'Sch{\"o}pf'], 'jr': []}),
491 | 
492 |     (r'T. L. (Frank) Pappas',
493 |      {'first': ['T.', 'L.', '(Frank)'], 'von': [], 'last': ['Pappas'], 'jr': []}),
494 | 
495 |     (r'TUG 2004 conference',
496 |      {'first': ['TUG', '2004'], 'von': [], 'last': ['conference'], 'jr': []}),
497 | 
498 |     (r'TUG {\sltt DVI} Driver Standards Committee',
499 |      {'first': ['TUG', '{\sltt DVI}', 'Driver', 'Standards'], 'von': [],
500 |       'last': ['Committee'], 'jr': []}),
501 | 
502 |     (r'TUG {\sltt xDVIx} Driver Standards Committee',
503 |      {'first': ['TUG'], 'von': ['{\sltt xDVIx}'],
504 |       'last': ['Driver', 'Standards', 'Committee'], 'jr': []}),
505 | 
506 |     (r'University of M{\"u}nster',
507 |      {'first': ['University'], 'von': ['of'], 'last': [r'M{\"u}nster'], 'jr': []}),
508 | 
509 |     (r'Walter van der Laan',
510 |      {'first': ['Walter'], 'von': ['van', 'der'], 'last': ['Laan'], 'jr': []}),
511 | 
512 |     (r'Wendy G.                  McKay',
513 |      {'first': ['Wendy', 'G.'], 'von': [], 'last': ['McKay'], 'jr': []}),
514 | 
515 |     (r'Wendy McKay',
516 |      {'first': ['Wendy'], 'von': [], 'last': ['McKay'], 'jr': []}),
517 | 
518 |     (r'W{\l}odek Bzyl',
519 |      {'first': [r'W{\l}odek'], 'von': [], 'last': ['Bzyl'], 'jr': []}),
520 | 
521 |     (r'\LaTeX Project Team',
522 |      {'first': [r'\LaTeX', 'Project'], 'von': [], 'last': ['Team'], 'jr': []}),
523 | 
524 |     (r'\rlap{Lutz Birkhahn}',
525 |      {'first': [], 'von': [], 'last': [r'\rlap{Lutz Birkhahn}'], 'jr': []}),
526 | 
527 |     (r'{Jim Hef{}feron}',
528 |      {'first': [], 'von': [], 'last': ['{Jim Hef{}feron}'], 'jr': []}),
529 | 
530 |     (r'{Kristoffer H\o{}gsbro Rose}',
531 |      {'first': [], 'von': [], 'last': ['{Kristoffer H\o{}gsbro Rose}'], 'jr': []}),
532 | 
533 |     (r'{TUG} {Working} {Group} on a {\TeX} {Directory} {Structure}',
534 |      {'first': ['{TUG}', '{Working}', '{Group}'], 'von': ['on', 'a'],
535 |       'last': [r'{\TeX}', '{Directory}', '{Structure}'], 'jr': []}),
536 | 
537 |     (r'{The \TUB{} Team}',
538 |      {'first': [], 'von': [], 'last': [r'{The \TUB{} Team}'], 'jr': []}),
539 | 
540 |     (r'{\LaTeX} project team',
541 |      {'first': [r'{\LaTeX}'], 'von': ['project'], 'last': ['team'], 'jr': []}),
542 | 
543 |     (r'{\NTG{} \TeX{} future working group}',
544 |      {'first': [], 'von': [], 'last': [r'{\NTG{} \TeX{} future working group}'], 'jr': []}),
545 | 
546 |     (r'{{\LaTeX\,3} Project Team}',
547 |      {'first': [], 'von': [], 'last': [r'{{\LaTeX\,3} Project Team}'], 'jr': []}),
548 | 
549 |     (r'Johansen Kyle, Derik Mamania M.',
550 |      {'first': ['Derik', 'Mamania', 'M.'], 'von': [], 'last': ['Johansen', 'Kyle'], 'jr': []}),
551 | 
552 |     (r"Johannes Adam Ferdinand Alois Josef Maria Marko d'Aviano Pius von und zu Liechtenstein",
553 |      {'first': ['Johannes', 'Adam', 'Ferdinand', 'Alois', 'Josef', 'Maria', 'Marko'],
554 |       'von': ["d'Aviano", 'Pius', 'von', 'und', 'zu'], 'last': ['Liechtenstein'], 'jr': []}),
555 | 
556 |     (r"Brand\~{a}o, F",
557 |      {'first': ['F'], 'von': [], 'last': ['Brand\\', '{a}o'], 'jr': []}),
558 | )
559 | 
560 | 
561 | if __name__ == '__main__':
562 |     unittest.main()
563 | 


--------------------------------------------------------------------------------