├── requirements.txt
├── bibtexparser
    ├── tests
    │   ├── data
    │   │   ├── fieldname.bib
    │   │   ├── comments_only.bib
    │   │   ├── wrong.bib
    │   │   ├── comments_only_output.bib
    │   │   ├── website.bib
    │   │   ├── crossref_missing_entries.bib
    │   │   ├── book.bib
    │   │   ├── book_bom.bib
    │   │   ├── book_output.bib
    │   │   ├── book_comma_first.bib
    │   │   ├── xref_missing_entries.bib
    │   │   ├── string.bib
    │   │   ├── crossref_cascading_cycle.bib
    │   │   ├── article_field_name_with_underscore.bib
    │   │   ├── article_start_with_whitespace.bib
    │   │   ├── article_oneline.bib
    │   │   ├── article_with_strings.bib
    │   │   ├── article.bib
    │   │   ├── article_output.bib
    │   │   ├── article_with_strings_output.bib
    │   │   ├── common_strings.bib
    │   │   ├── encoding.bib
    │   │   ├── article_no_braces.bib
    │   │   ├── article_start_with_bom.bib
    │   │   ├── article_trailing_comma_output.bib
    │   │   ├── article_with_protection_braces.bib
    │   │   ├── traps.bib
    │   │   ├── article_with_special_characters.bib
    │   │   ├── article_comma_first_and_trailing_comma_output.bib
    │   │   ├── article_with_annotation.bib
    │   │   ├── article_with_annotation_output.bib
    │   │   ├── crossref_cascading.bib
    │   │   ├── features.bib
    │   │   ├── comments_percentage.bib
    │   │   ├── comments_percentage_nolastcoma.bib
    │   │   ├── article_homogenize.bib
    │   │   ├── article_comma_first.bib
    │   │   ├── features_output.bib
    │   │   ├── comments_spaces_and_declarations.bib
    │   │   ├── multiline_comments.bib
    │   │   ├── multiple_entries_output.bib
    │   │   ├── crossref_cascading_aliases.bib
    │   │   ├── multiple_entries_and_comments_output.bib
    │   │   ├── features2.bib
    │   │   ├── multiple_entries.bib
    │   │   ├── multiple_entries_and_comments.bib
    │   │   ├── article_missing_coma.bib
    │   │   ├── xref_entries.bib
    │   │   └── crossref_entries.bib
    │   ├── test_preambles.py
    │   ├── test_homogenise_fields.py
    │   ├── test_latexenc.py
    │   ├── test_bibdatabase.py
    │   ├── test_bibtexexpression.py
    │   ├── test_bwriter.py
    │   ├── test_bibtexparser.py
    │   ├── test_bibtex_strings.py
    │   ├── test_customization.py
    │   ├── test_comments.py
    │   ├── test_bibtexwriter.py
    │   └── test_crossref_resolving.py
    ├── __init__.py
    ├── bwriter.py
    ├── bibdatabase.py
    ├── bibtexexpression.py
    ├── bparser.py
    └── customization.py
├── MANIFEST.in
├── tox.ini
├── .coveragerc
├── RELEASE
├── .gitignore
├── docs
    ├── source
    │   ├── who.rst
    │   ├── bibtex_conv.rst
    │   ├── bibtexparser.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── logging.rst
    │   ├── conf.py
    │   └── tutorial.rst
    └── Makefile
├── CONTRIBUTORS.txt
├── setup.py
├── .travis.yml
├── README.rst
├── CHANGELOG
└── COPYING


/requirements.txt:
--------------------------------------------------------------------------------
1 | future>=0.16.0
2 | pyparsing>=2.0.3
3 | unittest2>=1.1.0
4 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/fieldname.bib:
--------------------------------------------------------------------------------
1 | @BOOK{Bird1987,
2 |   Dc.Date = {2004-01},
3 | }
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.md
3 | include docs/Makefile
4 | include docs/source/*
5 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27,py35
3 | [testenv]
4 | deps = nose
5 |        pyparsing
6 | commands = nosetests
7 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = bibtexparser
4 | 
5 | [report]
6 | exclude_lines =
7 |     if __name__ == .__main__.:
8 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_only.bib:
--------------------------------------------------------------------------------
1 | @comment{ignore this line!}
2 | @Comment{ignore this line too!}
3 | @COMMENT{and ignore this line too!}
4 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/wrong.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | @wrong{foo,
 4 |     author = {wrong}
 5 | }
 6 | 
 7 | @article{bar,
 8 |     author = {correct}
 9 | }
10 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_only_output.bib:
--------------------------------------------------------------------------------
1 | @comment{ignore this line!}
2 | 
3 | @comment{ignore this line too!}
4 | 
5 | @comment{and ignore this line too!}
6 | 
7 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/website.bib:
--------------------------------------------------------------------------------
1 | @misc{feder2006,
2 |  title = {BibTeX},
3 |  author = {Alexander Feder},
4 |  link = {http://bibtex.org},
5 |  year = {2006}
6 | }
7 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/crossref_missing_entries.bib:
--------------------------------------------------------------------------------
1 | % Testing missing crossref
2 | @INBOOK{mcr,
3 |   AUTHOR    = {Megan Mistrel},
4 |   TITLE     = {Lumbering Lunatics},
5 |   ORIGDATE  = {1933},
6 |   CROSSREF  = {missing1}
7 | }
8 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book.bib:
--------------------------------------------------------------------------------
1 | @BOOK{Bird1987,
2 |   title = {Dynamics of Polymeric Liquid},
3 |   publisher = {Wiley Edition},
4 |   year = {1987},
5 |   author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
6 |   volume = {1},
7 |   edition = {2},
8 | }
9 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book_bom.bib:
--------------------------------------------------------------------------------
1 | @BOOK{Bird1987,
2 |   title = {Dynamics of Polymeric Liquid},
3 |   publisher = {Wiley Edition},
4 |   year = {1987},
5 |   author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
6 |   volume = {1},
7 |   edition = {2},
8 | }
9 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book_output.bib:
--------------------------------------------------------------------------------
 1 | @book{Bird1987,
 2 |  author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
 3 |  edition = {2},
 4 |  publisher = {Wiley Edition},
 5 |  title = {Dynamics of Polymeric Liquid},
 6 |  volume = {1},
 7 |  year = {1987}
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/book_comma_first.bib:
--------------------------------------------------------------------------------
 1 | @book{Bird1987
 2 |    , author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}
 3 |    , edition = {2}
 4 |    , publisher = {Wiley Edition}
 5 |    , title = {Dynamics of Polymeric Liquid}
 6 |    , volume = {1}
 7 |    , year = {1987}
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/RELEASE:
--------------------------------------------------------------------------------
 1 | How to release
 2 | ==============
 3 | 
 4 | * Update CHANGELOG
 5 | * Update version in __init__.py
 6 | * git tag -a 'vX'
 7 | * merge in branch latest
 8 | * Send the package on pypi
 9 |     python setup.py sdist upload
10 | * tick the doc version on readthedocs
11 | * Update version in __init__.py
12 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/xref_missing_entries.bib:
--------------------------------------------------------------------------------
 1 | % From biber test data : t/tdata/crossrefs.bib
 2 | % Kept initial comment but not for our purpose
 3 | 
 4 | % Testing missing xref
 5 | @INBOOK{mxr,
 6 |   AUTHOR    = {Megan Mistrel},
 7 |   TITLE     = {Lumbering Lunatics},
 8 |   ORIGDATE  = {1933},
 9 |   XREF      = {missing1}
10 | }
11 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/string.bib:
--------------------------------------------------------------------------------
1 | @STRING{oakland = {Proceedings of the {IEEE} Symposium on Security and Privacy}}
2 | @INPROCEEDINGS{cha:oakland15,
3 |     author = {Sang Kil Cha and Maverick Woo and David Brumley},
4 |     title = {{Program-Adaptive Mutational Fuzzing}},
5 |     booktitle = oakland,
6 |     year = {2015},
7 |     pages = {725--741}
8 | }
9 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/crossref_cascading_cycle.bib:
--------------------------------------------------------------------------------
 1 | % From biber test data : t/tdata/crossrefs.bib
 2 | % Kept initial comment but not for our purpose
 3 | 
 4 | % Testing circular refs detection
 5 | 
 6 | @BOOK{circ1,
 7 |   DATE      = {1911},
 8 |   CROSSREF  = {circ2}
 9 | }
10 | 
11 | @BOOK{circ2,
12 |   DATE      = {1911},
13 |   CROSSREF  = {circ1}
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_field_name_with_underscore.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   pages = {12-23},
 7 |   journal = {Nice Journal},
 8 |   comments = {A comment},
 9 |   keyword = {keyword1, keyword2},
10 |   strange-field-name2 = {val2},
11 |   strange_field_name = {val},
12 | }
13 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_start_with_whitespace.bib:
--------------------------------------------------------------------------------
 1 |  @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal}
 7 | }
 8 | 
 9 |     @ARTICLE{Cesar2014,
10 |   author = {Jean Cesar},
11 |   title = {An amazing title},
12 |   year = {2014},
13 |   volume = {12},
14 |   journal = {Nice Journal}
15 | }


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_oneline.bib:
--------------------------------------------------------------------------------
1 | @ARTICLE{Cesar2013, author = {Jean Cesar}, title = {An amazing title}, year = {2013}, volume = {12}, journal = {Nice Journal}, comments = {A comment}, keyword = {keyword1, keyword2}}
2 | 
3 | @ARTICLE{ Baltazar2013,author = {Jean Baltazar},title = {An amazing title},year = {2013},volume = {12},journal = {Nice Journal},comments = {A comment},keyword = {keyword1, keyword2}}
4 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_strings.bib:
--------------------------------------------------------------------------------
 1 | @STRING{ nice_journal = "Nice Journal" }
 2 | @STRING ( jean={Jean} )
 3 | @STRING{cesar = {César}}
 4 | 
 5 | @ARTICLE{Cesar2013,
 6 |   author = jean # " " # cesar,
 7 |   title = {An amazing title},
 8 |   year = {2013},
 9 |   month = jan,
10 |   volume = {12},
11 |   pages = {12-23},
12 |   journal = nice_journal,
13 |   comments = {A comment},
14 |   keyword = {keyword1, keyword2},
15 | }
16 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Nice Journal},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_output.bib:
--------------------------------------------------------------------------------
 1 | @article{Cesar2013,
 2 |  abstract = {This is an abstract. This line should be long enough to test
 3 | multilines... and with a french érudit word},
 4 |  author = {Jean César},
 5 |  comments = {A comment},
 6 |  journal = {Nice Journal},
 7 |  keyword = {keyword1, keyword2},
 8 |  month = {jan},
 9 |  pages = {12-23},
10 |  title = {An amazing title},
11 |  volume = {12},
12 |  year = {2013}
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_strings_output.bib:
--------------------------------------------------------------------------------
 1 | @string{nice_journal = {Nice Journal}}
 2 | 
 3 | @string{jean = {Jean}}
 4 | 
 5 | @string{cesar = {César}}
 6 | 
 7 | @article{Cesar2013,
 8 |  author = jean # { } # cesar,
 9 |  comments = {A comment},
10 |  journal = nice_journal,
11 |  keyword = {keyword1, keyword2},
12 |  month = jan,
13 |  pages = {12-23},
14 |  title = {An amazing title},
15 |  volume = {12},
16 |  year = {2013}
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/common_strings.bib:
--------------------------------------------------------------------------------
 1 | @string{jan = {January}}
 2 | 
 3 | @string{feb = {February}}
 4 | 
 5 | @string{mar = {March}}
 6 | 
 7 | @string{apr = {April}}
 8 | 
 9 | @string{may = {May}}
10 | 
11 | @string{jun = {June}}
12 | 
13 | @string{jul = {July}}
14 | 
15 | @string{aug = {August}}
16 | 
17 | @string{sep = {September}}
18 | 
19 | @string{oct = {October}}
20 | 
21 | @string{nov = {November}}
22 | 
23 | @string{dec = {December}}
24 | 
25 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/encoding.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar_2013,
 2 |   author = {Jean César},
 3 |   title = {An amazing title: à},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Elémentaire},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | 	 multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keywords = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_no_braces.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = "Jean C{\'e}sar{\"u}",
 3 |   title = "An amazing title",
 4 |   year = "2013",
 5 |   month = "jan",
 6 |   volume = "12",
 7 |   pages = "12-23",
 8 |   journal = "Nice Journal",
 9 |   abstract = "This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word",
11 |   comments = "A comment",
12 |   keyword = "keyword1, keyword2",
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_start_with_bom.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @ARTICLE{Cesar2013,
 3 |   author = {Jean César},
 4 |   title = {An amazing title},
 5 |   year = {2013},
 6 |   month = "jan",
 7 |   volume = {12},
 8 |   pages = {12-23},
 9 |   journal = {Nice Journal},
10 |   abstract = {This is an abstract. This line should be long enough to test
11 | multilines... and with a french érudit word},
12 |   comments = {A comment},
13 |   keyword = {keyword1, keyword2},
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_trailing_comma_output.bib:
--------------------------------------------------------------------------------
 1 | @article{Cesar2013,
 2 |  abstract = {This is an abstract. This line should be long enough to test
 3 | multilines... and with a french érudit word},
 4 |  author = {Jean César},
 5 |  comments = {A comment},
 6 |  journal = {Nice Journal},
 7 |  keyword = {keyword1, keyword2},
 8 |  month = {jan},
 9 |  pages = {12-23},
10 |  title = {An amazing title},
11 |  volume = {12},
12 |  year = {2013},
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_protection_braces.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   title = {{An amazing title}},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {{Nice Journal}},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/traps.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Laide2013,
 2 |   author = {Jean Laid{\'e},
 3 |   Ben Loaeb},
 4 |   title = {{An} amazing {title}},
 5 |   year = {2013},
 6 |   month = "jan",
 7 |   volume = {n.s.~2},
 8 |   pages = {12-23},
 9 |   journal = {Nice Journal},
10 |   abstract = {This is an abstract. This line should be long enough to test
11 | 	 multilines... and with a french érudit word},
12 |   comments = {A comment},
13 |   keywords = {keyword1, keyword2},
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_special_characters.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean C{\'e}sar{\"u}},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Nice Journal},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   keyword = {keyword1, keyword2},
13 | }
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_comma_first_and_trailing_comma_output.bib:
--------------------------------------------------------------------------------
 1 | @article{Cesar2013
 2 |  , abstract = {This is an abstract. This line should be long enough to test
 3 | multilines... and with a french érudit word}
 4 |  , author = {Jean César}
 5 |  , comments = {A comment}
 6 |  , journal = {Nice Journal}
 7 |  , keyword = {keyword1, keyword2}
 8 |  , month = {jan}
 9 |  , pages = {12-23}
10 |  , title = {An amazing title}
11 |  , volume = {12}
12 |  , year = {2013}
13 |  ,
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_annotation.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean César},
 3 |   author+an = {1=highlight},
 4 |   title = {An amazing title},
 5 |   year = {2013},
 6 |   month = "jan",
 7 |   volume = {12},
 8 |   pages = {12-23},
 9 |   journal = {Nice Journal},
10 |   abstract = {This is an abstract. This line should be long enough to test
11 | multilines... and with a french érudit word},
12 |   comments = {A comment},
13 |   keyword = {keyword1, keyword2},
14 | }
15 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_with_annotation_output.bib:
--------------------------------------------------------------------------------
 1 | @article{Cesar2013,
 2 |  abstract = {This is an abstract. This line should be long enough to test
 3 | multilines... and with a french érudit word},
 4 |  author = {Jean César},
 5 |  author+an = {1=highlight},
 6 |  comments = {A comment},
 7 |  journal = {Nice Journal},
 8 |  keyword = {keyword1, keyword2},
 9 |  month = {jan},
10 |  pages = {12-23},
11 |  title = {An amazing title},
12 |  volume = {12},
13 |  year = {2013}
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/crossref_cascading.bib:
--------------------------------------------------------------------------------
 1 | % From biber test data : t/tdata/crossrefs.bib
 2 | % Kept initial comment but not for our purpose
 3 | 
 4 | % Test of dependency calculations for non-cited entries
 5 | 
 6 | @BOOK{r1,
 7 |   DATE      = {1911},
 8 |   CROSSREF  = {r2}
 9 | }
10 | 
11 | @BOOK{r2,
12 |   DATE      = {1911},
13 |   CROSSREF  = {r3}
14 | }
15 | 
16 | @BOOK{r3,
17 |   DATE      = {1911},
18 |   CROSSREF  = {r4}
19 | }
20 | 
21 | @BOOK{r4,
22 |   DATE      = {1911},
23 | }
24 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features.bib:
--------------------------------------------------------------------------------
 1 | @comment{ignore this line!}
 2 | @Comment{ignore this line too!}
 3 | @COMMENT{and ignore this line too!}
 4 | 
 5 | @preamble{ "\makeatletter" }
 6 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" }
 7 | @preamble{ "\makeatother" }
 8 | 
 9 | @string{mystring = "Hello"}
10 | @string{myconf = "My International Conference"}
11 | @string{myname = "Doe"}
12 | 
13 | @inproceedings{mykey,
14 |   author = "John",
15 |   title = {Cool Stuff},
16 |   booktitle = myconf,
17 |   year = 2014,
18 | }
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 
37 | # Pycharm
38 | .idea
39 | 
40 | # Vim.
41 | *.swp
42 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_percentage.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2},
 9 | }
10 | % comment.
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2},
19 | }
20 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_percentage_nolastcoma.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2}
 9 | }
10 | % comment.
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2}
19 | }
20 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_homogenize.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   authors = {Jean César},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   month = "jan",
 6 |   volume = {12},
 7 |   pages = {12-23},
 8 |   journal = {Nice Journal},
 9 |   abstract = {This is an abstract. This line should be long enough to test
10 | multilines... and with a french érudit word},
11 |   comments = {A comment},
12 |   editors = {Edith Or},
13 |   keywords = {keyword1, keyword2},
14 |   links = {http://my.link/to-content},
15 |   subjects = "Some topic of interest",
16 | }
17 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_comma_first.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013
 2 |  , author = {Jean Cesar}
 3 |  , title = {An amazing title}
 4 |  , year = {2013}
 5 |  , volume = {12}
 6 |  , journal = {Nice Journal}
 7 |  , comments = {A comment}
 8 |  , keyword = {keyword1, keyword2}
 9 | }
10 | 
11 | @ARTICLE{ Baltazar2013
12 |         , author = {Jean Baltazar}
13 |         , title = {An amazing title}
14 |         , year = {2013}
15 |         , volume = {12}
16 |         , journal = {Nice Journal}
17 |         , comments = {A comment}
18 |         , keyword = {keyword1, keyword2}}
19 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features_output.bib:
--------------------------------------------------------------------------------
 1 | @comment{ignore this line!}
 2 | 
 3 | @comment{ignore this line too!}
 4 | 
 5 | @comment{and ignore this line too!}
 6 | 
 7 | @preamble{ "\makeatletter" }
 8 | 
 9 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" }
10 | 
11 | @preamble{ "\makeatother" }
12 | 
13 | @string{mystring = "Hello"}
14 | 
15 | @string{myconf = "My International Conference"}
16 | 
17 | @string{myname = "Doe"}
18 | 
19 | @inproceedings{mykey,
20 |  author = {John},
21 |  booktitle = {My International Conference},
22 |  title = {Cool Stuff},
23 |  year = {2014}
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/comments_spaces_and_declarations.bib:
--------------------------------------------------------------------------------
 1 | % a comment
 2 |  @preamble{ "Blah blah" }
 3 | 
 4 | Another comment
 5 |  @string{title = {A great title} }
 6 | 
 7 |  and one more comment
 8 | 
 9 |    @ARTICLE{Cesar2013,
10 |   author = {Jean César},
11 |   title = title,
12 |   year = {2013},
13 |   month = "jan",
14 |   volume = {12},
15 |   pages = {12-23},
16 |   journal = {Nice Journal},
17 |   abstract = {This is an abstract. This line should be long enough to test
18 | multilines... and with a french érudit word},
19 |   comments = {A comment},
20 |   keyword = {keyword1, keyword2},
21 | }
22 | 


--------------------------------------------------------------------------------
/docs/source/who.rst:
--------------------------------------------------------------------------------
 1 | Who uses BibtexParser?
 2 | ======================
 3 | 
 4 | If your project uses BibtexParser, you can ask for the addition of a link in this list.
 5 | 
 6 | * https://pypi.org/project/vitae/
 7 | * http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/
 8 | * https://github.com/Phyks/BMC
 9 | * http://aurelien.naldi.info/research/publications.html
10 | * http://robot.kut.ac.kr/publications
11 | * https://git.atelo.org/etlapale/bibgen
12 | * https://onmenwhostareongraphs.wordpress.com/2015/06/09/graph-display-software-for-author-relationships-with-bibtex-files/
13 | * https://github.com/vitorfs/parsifal
14 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiline_comments.bib:
--------------------------------------------------------------------------------
 1 | @comment{Lorem ipsum dolor sit amet,
 2 | consectetur adipisicing elit}
 3 | 
 4 | @comment{
 5 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
 6 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
 7 | 
 8 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
 9 | Excepteur sint occaecat cupidatat non proident.
10 |  ,
11 | }
12 | 
13 | @comment{
14 | 
15 | 
16 | Sunt in culpa qui officia deserunt mollit anim id est laborum.
17 | 
18 | 
19 | }
20 | 
21 | @comment{}
22 | 
23 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_output.bib:
--------------------------------------------------------------------------------
 1 | @book{Toto3000,
 2 |  author = {Toto, A and Titi, B},
 3 |  title = {A title}
 4 | }
 5 | 
 6 | @article{Wigner1938,
 7 |  author = {Wigner, E.},
 8 |  doi = {10.1039/TF9383400029},
 9 |  issn = {0014-7672},
10 |  journal = {Trans. Faraday Soc.},
11 |  owner = {fr},
12 |  pages = {29--41},
13 |  publisher = {The Royal Society of Chemistry},
14 |  title = {The transition state method},
15 |  volume = {34},
16 |  year = {1938}
17 | }
18 | 
19 | @book{Yablon2005,
20 |  author = {Yablon, A.D.},
21 |  publisher = {Springer},
22 |  title = {Optical fiber fusion slicing},
23 |  year = {2005}
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/crossref_cascading_aliases.bib:
--------------------------------------------------------------------------------
 1 | % From biber test data : t/tdata/crossrefs.bib
 2 | % Kept initial comment but not for our purpose
 3 | 
 4 | % Testing cascading crossrefs
 5 | @MVBOOK{ccr1,
 6 |   IDS       = {ccr1alias},
 7 |   AUTHOR    = {Vince Various},
 8 |   EDITOR    = {Edward Editor},
 9 |   TITLE     = {Stuff Concerning Varia},
10 |   DATE      = {1934}
11 | }
12 | 
13 | % using alias
14 | @BOOK{ccr2,
15 |   TITLE     = {Misc etc.},
16 |   DATE      = {1923},
17 |   CROSSREF  = {ccr1alias}
18 | }
19 | 
20 | @INBOOK{ccr3,
21 |   TITLE     = {Perhaps, Perchance, Possibilities?},
22 |   DATE      = {1911},
23 |   CROSSREF  = {ccr2}
24 | }
25 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_and_comments_output.bib:
--------------------------------------------------------------------------------
 1 | @comment{}
 2 | 
 3 | @comment{A comment}
 4 | 
 5 | @book{Toto3000,
 6 |  author = {Toto, A and Titi, B},
 7 |  title = {A title}
 8 | }
 9 | 
10 | @article{Wigner1938,
11 |  author = {Wigner, E.},
12 |  doi = {10.1039/TF9383400029},
13 |  issn = {0014-7672},
14 |  journal = {Trans. Faraday Soc.},
15 |  owner = {fr},
16 |  pages = {29--41},
17 |  publisher = {The Royal Society of Chemistry},
18 |  title = {The transition state method},
19 |  volume = {34},
20 |  year = {1938}
21 | }
22 | 
23 | @book{Yablon2005,
24 |  author = {Yablon, A.D.},
25 |  publisher = {Springer},
26 |  title = {Optical fiber fusion slicing},
27 |  year = {2005}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/features2.bib:
--------------------------------------------------------------------------------
 1 | @string{CoOl = "Cool"}
 2 | @string{stuff = "Stuff"}
 3 | @string{myTitle = cool # " " # stuff}
 4 | 
 5 | @string{int = "International"}
 6 | @string{myconf = "My "#int#" Conference"}
 7 | 
 8 | @string{myname = "Doe"}
 9 | 
10 | @String {firstname = "John"}
11 | @String {lastname  = myname}
12 | @String {domain  = "example"}
13 | @String {tld  = "com"}
14 | 
15 | @String {foo = "1--10"}
16 | @String {BaR = FOO}
17 | @String {pages = baR}
18 | 
19 | @inproceedings{mykey,
20 |   author = "John " # mynamE,
21 |   title = mytitle,
22 |   booktitle = myconf,
23 |   pages = pages,
24 |   year = 2014,
25 |   note = "Email: " # firstname # "." # lastname #
26 |           "@" # domain # "." # tld,
27 | }
28 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
 1 | - François Boulogne
 2 |   Project coordinator
 3 | 
 4 | - bibserver's contributors
 5 |   for the parser's core and the permission to release this project under LGPLv3 and BSD
 6 | 
 7 | - Shuen-Huei (Drake) Guan
 8 |   Python 2.7 porting
 9 | 
10 | - Sebastien Diemer
11 |   Bugfix
12 | 
13 | - Georg C. Brückmann
14 |   Support for non-standard entry types
15 | 
16 | - Uwe Schmidt
17 |   String replacement
18 | 
19 | - faph
20 |   coma fixes, optional keys sanitising, refactoring and other improvements
21 | 
22 | - Steven M. Bellovin
23 |   Fix braces detection
24 | 
25 | - Sven Goossens
26 |   Support for bibtex with leading spaces
27 | 
28 | - Michal Grochmal
29 |   Comma first syntax support
30 | 
31 | - Cschaffner
32 |   New features in bwriter
33 | 
34 | - Olivier Mangin
35 |   Pyparsing implementation of the parser.
36 | 
37 | - Blair Bonnett
38 |   customization.splitname() function
39 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries.bib:
--------------------------------------------------------------------------------
 1 | @Book{Yablon2005,
 2 |   Title                    = {Optical fiber fusion slicing},
 3 |   Author                   = {Yablon, A.D.},
 4 |   Publisher                = {Springer},
 5 |   Year                     = {2005},
 6 | }
 7 | 
 8 | @Article{Wigner1938,
 9 |   Title                    = {The transition state method},
10 |   Author                   = {Wigner, E.},
11 |   Journal                  = {Trans. Faraday Soc.},
12 |   Year                     = {1938},
13 |   Pages                    = {29--41},
14 |   Volume                   = {34},
15 |   Doi                      = {10.1039/TF9383400029},
16 |   ISSN                     = {0014-7672},
17 |   Owner                    = {fr},
18 |   Publisher                = {The Royal Society of Chemistry},
19 | }
20 | 
21 | @Book{Toto3000,
22 |   Title                    = {A title},
23 |   Author                   = {Toto, A and Titi, B},
24 | }
25 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/multiple_entries_and_comments.bib:
--------------------------------------------------------------------------------
 1 | @Book{Yablon2005,
 2 |   Title                    = {Optical fiber fusion slicing},
 3 |   Author                   = {Yablon, A.D.},
 4 |   Publisher                = {Springer},
 5 |   Year                     = {2005},
 6 | }
 7 | 
 8 | @Article{Wigner1938,
 9 |   Title                    = {The transition state method},
10 |   Author                   = {Wigner, E.},
11 |   Journal                  = {Trans. Faraday Soc.},
12 |   Year                     = {1938},
13 |   Pages                    = {29--41},
14 |   Volume                   = {34},
15 |   Doi                      = {10.1039/TF9383400029},
16 |   ISSN                     = {0014-7672},
17 |   Owner                    = {fr},
18 |   Publisher                = {The Royal Society of Chemistry},
19 | }
20 | 
21 | @Book{Toto3000,
22 |   Title                    = {A title},
23 |   Author                   = {Toto, A and Titi, B},
24 | }
25 | 
26 | @Comment{}
27 | 
28 | @Comment{A comment}
29 | 
30 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/article_missing_coma.bib:
--------------------------------------------------------------------------------
 1 | @ARTICLE{Cesar2013,
 2 |   author = {Jean Cesar},
 3 |   title = {An amazing title},
 4 |   year = {2013},
 5 |   volume = {12},
 6 |   journal = {Nice Journal},
 7 |   comments = {A comment},
 8 |   keyword = {keyword1, keyword2}
 9 | }
10 | 
11 | @ARTICLE{Baltazar2013,
12 |   author = {Jean Baltazar},
13 |   title = {An amazing title},
14 |   year = {2013},
15 |   volume = {12},
16 |   journal = {Nice Journal},
17 |   comments = {A comment},
18 |   keyword = {keyword1, keyword2}}
19 | 
20 | @ARTICLE{Aimar2013,
21 |   author = {Jean Aimar},
22 |   title = {An amazing title},
23 |   year = {2013},
24 |   volume = {12},
25 |   journal = {Nice Journal},
26 |   comments = {A comment},
27 |   keyword = {keyword1, keyword2},
28 |   month = "january"
29 | }
30 | 
31 | @ARTICLE{Doute2013,
32 |   author = {Jean Doute},
33 |   title = {An amazing title},
34 |   volume = {12},
35 |   journal = {Nice Journal},
36 |   comments = {A comment},
37 |   keyword = {keyword1, keyword2},
38 |   year = "2013"
39 | }
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | try:
 4 |     from setuptools import setup
 5 | except ImportError as ex:
 6 |     print('[python-bibtexparser] setuptools not found. Falling back to distutils.core')
 7 |     from distutils.core import setup
 8 | 
 9 | with open('bibtexparser/__init__.py') as fh:
10 |     for line in fh:
11 |         if line.startswith('__version__'):
12 |             version = line.strip().split()[-1][1:-1]
13 |             break
14 | 
15 | setup(
16 |     name         = 'bibtexparser',
17 |     version      = version,
18 |     url          = "https://github.com/sciunto-org/python-bibtexparser",
19 |     author       = "Francois Boulogne and other contributors",
20 |     license      = "LGPLv3 or BSD",
21 |     author_email = "devel@sciunto.org",
22 |     description  = "Bibtex parser for python 2.7 and 3.3 and newer",
23 |     packages     = ['bibtexparser'],
24 |     install_requires = ['pyparsing>=2.0.3',
25 |                         'future>=0.16.0'],
26 |     extra_requires = {'unittest': 'unittest2>=1.1.0'}
27 | )
28 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | language: python
 3 | matrix:
 4 |   include:
 5 |     - python: "2.7"
 6 |       env: TEST_SUITE=suite_2_7
 7 |     - python: "3.3"
 8 |       env: TEST_SUITE=suite_3_3
 9 |     - python: "3.4"
10 |       env: TEST_SUITE=suite_3_4
11 |     - python: "3.5"
12 |       env: TEST_SUITE=suite_3_5
13 |     - python: "3.6"
14 |       env: TEST_SUITE=suite_3_6
15 |     - python: "3.7"
16 |       dist: xenial # Required for Python 3.7
17 |       sudo: true   # travis-ci/travis-ci#9069
18 |       env: TEST_SUITE=suite_3_7
19 |     - python: "pypy-5.7.1"
20 |       env: TEST_SUITE=suite_pypy
21 |     - python: "pypy3.5-5.8.0"
22 |       env: TEST_SUITE=suite_pypy3
23 | install:
24 |   - if [[ $TEST_SUITE == suite_3_6 ]]; then
25 |         pip install sphinx;
26 |     fi;
27 |   - pip install coverage
28 |   - pip install -r requirements.txt
29 |   - python setup.py install
30 | script:
31 |   - nosetests --with-coverage  --cover-erase --cover-package=bibtexparser
32 |   - if [[ $TEST_SUITE == suite_3_6 ]]; then
33 |         cd docs;
34 |         make html;
35 |     fi;
36 | 
37 | after_success:
38 |   - pip install coveralls
39 |   - coveralls
40 | 


--------------------------------------------------------------------------------
/docs/source/bibtex_conv.rst:
--------------------------------------------------------------------------------
 1 | ===============================================
 2 | Bibtex tips, conventions and unrelated projects
 3 | ===============================================
 4 | 
 5 | This page presents various resources about bibtex in general.
 6 | 
 7 | Format
 8 | ======
 9 | 
10 | http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html
11 | 
12 | * Comments
13 | * Variable
14 | * @preamble
15 | * Name convention
16 | 
17 | Upper case letters in titles
18 | ----------------------------
19 | 
20 | Put the letter/word in curly braces like {this}.
21 | 
22 | 
23 | General references
24 | ------------------
25 | 
26 | * http://tug.ctan.org/tex-archive/info/bibtex/tamethebeast/ttb_en.pdf
27 | * http://ctan.mirrors.hoobly.com/macros/latex/contrib/biblatex/doc/biblatex.pdf
28 | 
29 | IEEE citation reference
30 | -----------------------
31 | 
32 | * https://origin.www.ieee.org/documents/ieeecitationref.pdf
33 | 
34 | 
35 | Common Errors in Bibliographies John Owens
36 | ------------------------------------------
37 | 
38 | * http://www.ece.ucdavis.edu/~jowens/biberrors.html
39 | 
40 | Common abbreviations for journals
41 | ---------------------------------
42 | 
43 | * Jabref list http://jabref.sourceforge.net/resources.php#downloadlists
44 | 
45 | 
46 | Projects
47 | ========
48 | 
49 | Here are some interesting projects using bibtex but not necessarily this parser.
50 | 
51 | Display your bibliography in html pages
52 | ---------------------------------------
53 | 
54 | * http://www.monperrus.net/martin/bibtexbrowser/
55 | 
56 | 


--------------------------------------------------------------------------------
/docs/source/bibtexparser.rst:
--------------------------------------------------------------------------------
 1 | .. _bibtexparser_api:
 2 | 
 3 | .. contents::
 4 | 
 5 | bibtexparser: API
 6 | =================
 7 | 
 8 | :mod:`bibtexparser` --- Parsing and writing BibTeX files
 9 | --------------------------------------------------------
10 | 
11 | .. automodule:: bibtexparser
12 |     :members: load, loads, dumps, dump
13 | 
14 | :mod:`bibtexparser.bibdatabase` --- The bibliographic database object
15 | ---------------------------------------------------------------------
16 | 
17 | .. autoclass:: bibtexparser.bibdatabase.BibDatabase
18 |     :members: entries, entries_dict, comments, strings, preambles
19 | 
20 | :mod:`bibtexparser.bparser` --- Tune the default parser
21 | --------------------------------------------------------
22 | 
23 | .. automodule:: bibtexparser.bparser
24 |     :members:
25 | 
26 | :mod:`bibtexparser.customization` --- Functions to customize records
27 | --------------------------------------------------------------------
28 | 
29 | .. automodule:: bibtexparser.customization
30 |     :members:
31 | 
32 | Exception classes
33 | ^^^^^^^^^^^^^^^^^
34 | .. autoclass:: bibtexparser.customization.InvalidName
35 | 
36 | :mod:`bibtexparser.bwriter` --- Tune the default writer
37 | -------------------------------------------------------
38 | 
39 | .. autoclass:: bibtexparser.bwriter.BibTexWriter
40 |     :members:
41 | 
42 | :mod:`bibtexparser.bibtexexpression` --- Parser's core relying on pyparsing
43 | ---------------------------------------------------------------------------
44 | 
45 | .. automodule:: bibtexparser.bibtexexpression
46 |     :members:
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. BibtexParser documentation master file, created by
 2 |    sphinx-quickstart on Thu Aug  1 13:30:23 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to BibtexParser's documentation!
 7 | ========================================
 8 | 
 9 | 
10 | :Author: François Boulogne, Olivier Mangin, Lucas Verney, and other contributors.
11 | :Devel: `github.com project <https://github.com/sciunto-org/python-bibtexparser>`_
12 | :Mirror: `git.sciunto.org <https://git.sciunto.org/mirror/python-bibtexparser>`_
13 | :Bugs: `github.com <https://github.com/sciunto-org/python-bibtexparser/issues>`_
14 | :Generated: |today|
15 | :License: LGPL v3 or BSD
16 | :Version: |release|
17 | 
18 | BibtexParser is a python library to parse bibtex files. The code relies on `pyparsing <http://pyparsing.wikispaces.com/>`_ and is tested with unittests.
19 | 
20 | If you use BibtexParser for your project, feel free to send me an email. I would be happy to hear that and to mention your project in the documentation.
21 | 
22 | Contents:
23 | 
24 | .. toctree::
25 |     :maxdepth: 2
26 | 
27 |     install.rst
28 |     tutorial.rst
29 |     bibtexparser.rst
30 |     logging.rst
31 |     bibtex_conv.rst
32 |     who.rst
33 | 
34 | 
35 | Other projects
36 | ==============
37 | 
38 | * http://pybtex.sourceforge.net/
39 | * http://pybliographer.org/
40 | * https://github.com/matthew-brett/babybib
41 | 
42 | Indices and tables
43 | ==================
44 | 
45 | * :ref:`genindex`
46 | * :ref:`modindex`
47 | * :ref:`search`
48 | 
49 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | python-bibtexparser
 2 | ===================
 3 | 
 4 | Python library to parse `bibtex <https://en.wikipedia.org/wiki/BibTeX>`_ files.
 5 | 
 6 | 
 7 | IMPORTANT: the library is looking for new maintainers. Please, manifest yourself if you are interested.
 8 | 
 9 | .. contents::
10 | 
11 | 
12 | Bibtexparser relies on `pyparsing <https://pypi.python.org/pypi/pyparsing>`_ and is compatible with Python 2.7 and 3.3 or newer.
13 | 
14 | Documentation
15 | -------------
16 | 
17 | Our documentation includes the installation procedure, a tutorial, the API and advices to report a bug.
18 | References, related projects and softwares based on bibtexparser are also listed. If you would like to appear on this list, feel free to open a ticket or send an email.
19 | 
20 | `Documentation on readthedocs.io <https://bibtexparser.readthedocs.io/>`_
21 | 
22 | Upgrading
23 | ---------
24 | 
25 | Please, read the changelog before upgrading regarding API modifications.
26 | Prior version 1.0, we do not hesitate to modify the API to get the best API from your feedbacks.
27 | 
28 | License
29 | -------
30 | 
31 | Dual license (at your choice):
32 | 
33 | * LGPLv3.
34 | * BSD
35 | 
36 | See COPYING for details.
37 | 
38 | History and evolutions
39 | ----------------------
40 | 
41 | The original source code was part of bibserver from `OKFN <http://github.com/okfn/bibserver>`_. This project is released under the AGPLv3. OKFN and the original authors kindly provided the permission to use a subpart of their project (ie the bibtex parser) under LGPLv3. Many thanks to them!
42 | 
43 | The parser evolved to a new core based on pyparsing.
44 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/xref_entries.bib:
--------------------------------------------------------------------------------
 1 | % From biber test data : t/tdata/crossrefs.bib
 2 | % Kept initial comment but not for our purpose
 3 | 
 4 | % Testing mincrossrefs. xr1 and xr2 xrefs should trigger inclusion of xrm and also
 5 | % the xreffields in both of them
 6 | @INBOOK{xr1,
 7 |   AUTHOR    = {Zoe Zentrum},
 8 |   TITLE     = {Moods Mildly Modified},
 9 |   ORIGDATE  = {1921},
10 |   XREF      = {xrm}
11 | }
12 | 
13 | @INBOOK{xr2,
14 |   AUTHOR    = {Ian Instant},
15 |   TITLE     = {Migraines Multiplying Madly},
16 |   ORIGDATE  = {1926},
17 |   XREF      = {xrm}
18 | }
19 | 
20 | @BOOK{xrm,
21 |   EDITOR    = {Peter Prendergast},
22 |   TITLE     = {Calligraphy, Calisthenics, Culture},
23 |   PUBLISHER = {Mainstream},
24 |   YEAR      = {1970}
25 | }
26 | 
27 | % Testing explicit cite of xref parent. Should trigger inclusion of child xref field
28 | @INBOOK{xr3,
29 |   AUTHOR    = {Norman Normal},
30 |   TITLE     = {Russian Regalia Revisited},
31 |   ORIGDATE  = {1923},
32 |   XREF      = {xrt}
33 | }
34 | 
35 | @BOOK{xrt,
36 |   EDITOR    = {Lucy Lunders},
37 |   TITLE     = {Kings, Cork and Calculation},
38 |   PUBLISHER = {Middling},
39 |   YEAR      = {1977}
40 | }
41 | 
42 | % Testing mincrossrefs not reached. cr4 is cited, cr5 isn't, therefore mincrossrefs (2) for
43 | % crn not reached
44 | @INBOOK{xr4,
45 |   AUTHOR    = {Megan Mistrel},
46 |   TITLE     = {Lumbering Lunatics},
47 |   ORIGDATE  = {1933},
48 |   XREF      = {xrn}
49 | }
50 | 
51 | @INBOOK{xr5,
52 |   AUTHOR    = {Kenneth Kunrath},
53 |   TITLE     = {Dreadful Dreary Days},
54 |   ORIGDATE  = {1900},
55 |   XREF      = {xrn}
56 | }
57 | 
58 | @BOOK{xrn,
59 |   EDITOR    = {Victor Vivacious},
60 |   TITLE     = {Examples of Excellent Exaggerations},
61 |   PUBLISHER = {Oxford},
62 |   YEAR      = {1935}
63 | }
64 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_preambles.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import bibtexparser
 3 | from bibtexparser.bibdatabase import BibDatabase
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | class TestPreambleParse(unittest.TestCase):
 8 |     def test_single_preamble_parse_count(self):
 9 |         bibtex_str = '@preamble{" a "}\n\n'
10 |         bib_database = bibtexparser.loads(bibtex_str)
11 |         self.assertEqual(len(bib_database.preambles), 1)
12 | 
13 |     def test_multiple_preamble_parse_count(self):
14 |         bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
15 |         bib_database = bibtexparser.loads(bibtex_str)
16 |         self.assertEqual(len(bib_database.preambles), 2)
17 | 
18 |     def test_single_preamble_parse(self):
19 |         bibtex_str = '@preamble{" a "}\n\n'
20 |         bib_database = bibtexparser.loads(bibtex_str)
21 |         expected = [' a ']
22 |         self.assertEqual(bib_database.preambles, expected)
23 | 
24 |     def test_multiple_preamble_parse(self):
25 |         bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
26 |         bib_database = bibtexparser.loads(bibtex_str)
27 |         expected = [' a ', 'b']
28 |         self.assertEqual(bib_database.preambles, expected)
29 | 
30 | 
31 | class TestPreambleWrite(unittest.TestCase):
32 |     def test_single_preamble_write(self):
33 |         bib_database = BibDatabase()
34 |         bib_database.preambles = [' a ']
35 |         result = bibtexparser.dumps(bib_database)
36 |         expected = '@preamble{" a "}\n\n'
37 |         self.assertEqual(result, expected)
38 | 
39 |     def test_multiple_string_write(self):
40 |         bib_database = BibDatabase()
41 |         bib_database.preambles = [' a ', 'b']
42 |         result = bibtexparser.dumps(bib_database)
43 |         expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
44 |         self.assertEqual(result, expected)
45 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | ========================
 2 | How to install and test?
 3 | ========================
 4 | 
 5 | How to install?
 6 | ===============
 7 | 
 8 | Requirements
 9 | ------------
10 | 
11 | * python **2.7** or python **3.3** or newer
12 | * pyparsing **2.0.3** or newer
13 | 
14 | Package manager (recommended for those OS users)
15 | ------------------------------------------------
16 | 
17 | * `Archlinux <https://aur.archlinux.org/packages/python-bibtexparser/>`_
18 | * `Debian <https://packages.debian.org/en/sid/main/python-bibtexparser>`_
19 | 
20 | pip (recommended to other users)
21 | ---------------------------------
22 | 
23 | To install with pip:
24 | 
25 | .. code-block:: sh
26 | 
27 |     pip install bibtexparser
28 | 
29 | 
30 | Manual installation (recommended for packagers)
31 | -----------------------------------------------
32 | 
33 | Download the archive on `Pypi <http://pypi.python.org/pypi/bibtexparser/>`_.
34 | 
35 | .. code-block:: sh
36 | 
37 |     python setup.py install
38 | 
39 | 
40 | How to run the test suite?
41 | ==========================
42 | 
43 | This paragraph briefly describes how to run the test suite.
44 | This is useful for contributors, for packagers but also for users who wants to check their environment.
45 | 
46 | 
47 | Virtualenv
48 | ----------
49 | 
50 | You can make a virtualenv. I like `pew <https://pypi.python.org/pypi/pew/>`_ for that because the API is easier.
51 | 
52 | The first time, you need to make a virtualenv
53 | 
54 | .. code-block:: sh
55 | 
56 |     pew mkproject bibtexparser
57 |     pip install -r requirements.txt
58 |     python setup.py install
59 |     nosetest
60 | 
61 | 
62 | If you already have a virtualenv, you can use workon
63 | 
64 | .. code-block:: sh
65 | 
66 |     pew workon bibtexparser
67 | 
68 | 
69 | Tox
70 | ---
71 | 
72 | The advantage of `Tox <https://pypi.python.org/pypi/tox>`_ is that you can build and test the code against several versions of python.
73 | Of course, you need tox to be installed on your system.
74 | The configuration file is tox.ini, in the root of the project. There, you can change the python versions.
75 | 
76 | .. code-block:: sh
77 | 
78 |     tox # and nothing more :)
79 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_homogenise_fields.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | import io
 5 | import unittest
 6 | from bibtexparser.bparser import BibTexParser
 7 | 
 8 | 
 9 | class TestHomogenizeFields(unittest.TestCase):
10 | 
11 |     def test_homogenize_default(self):
12 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
13 |             bib = BibTexParser(bibfile.read())
14 |             entries = bib.get_entry_list()
15 |             self.assertNotIn('url', entries[0])
16 |             self.assertIn('link', entries[0])
17 | 
18 |     def test_homogenize_on(self):
19 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
20 |             bib = BibTexParser(bibfile.read(), homogenize_fields=True)
21 |             entries = bib.get_entry_list()
22 |             self.assertIn('url', entries[0])
23 |             self.assertNotIn('link', entries[0])
24 | 
25 |     def test_homogenize_off(self):
26 |         with open('bibtexparser/tests/data/website.bib', 'r') as bibfile:
27 |             bib = BibTexParser(bibfile.read(), homogenize_fields=False)
28 |             entries = bib.get_entry_list()
29 |             self.assertNotIn('url', entries[0])
30 |             self.assertIn('link', entries[0])
31 | 
32 |     def test_homogenizes_fields(self):
33 |         self.maxDiff = None
34 |         with io.open('bibtexparser/tests/data/article_homogenize.bib',
35 |                      'r', encoding='utf-8') as bibfile:
36 |             bib = BibTexParser(bibfile.read(), homogenize_fields=True)
37 |             expected_dict = {
38 |                 'Cesar2013': {
39 |                     'keyword': 'keyword1, keyword2',
40 |                     'ENTRYTYPE': 'article',
41 |                     'abstract': 'This is an abstract. This line should be '
42 |                                 'long enough to test\nmultilines... and with '
43 |                                 'a french érudit word',
44 |                     'year': '2013',
45 |                     'journal': 'Nice Journal',
46 |                     'ID': 'Cesar2013',
47 |                     'pages': '12-23',
48 |                     'title': 'An amazing title',
49 |                     'comments': 'A comment',
50 |                     'author': 'Jean César',
51 |                     'volume': '12',
52 |                     'month': 'jan',
53 |                     'url': "http://my.link/to-content",
54 |                     'subject': "Some topic of interest",
55 |                     'editor': "Edith Or",
56 |                 }
57 |             }
58 |             self.assertEqual(bib.get_entry_dict(), expected_dict)
59 | 


--------------------------------------------------------------------------------
/docs/source/logging.rst:
--------------------------------------------------------------------------------
 1 | How to report a bug?
 2 | ====================
 3 | 
 4 | Bugs can be reported on github or via private communications.
 5 | 
 6 | Steps
 7 | -----
 8 | 
 9 | 1. Make a minimal code, which reproduces the problem.
10 | 2. Provide the code, the bibtex (if necessary), the output.
11 | 3. For a parsing error, provide the expected output.
12 | 4. For a crash, set the logger to the debug level (see below).
13 | 
14 | If you want to provide a patch (that's wonderful! thank you), please, take few minutes to write a unit test that fails without your contribution.
15 | 
16 | Logging module to understand failures
17 | -------------------------------------
18 | 
19 | Syntax of bibtex files is simple but there are many possible variations. This library probably fails for some of them.
20 | 
21 | Bibtexparser includes a large quantity of debug messages which helps to understand why and where the parser fails.
22 | The example below can be used to print these messages in the console.
23 | 
24 | .. code-block:: python
25 | 
26 |     import logging
27 |     import logging.config
28 | 
29 |     logger = logging.getLogger(__name__)
30 | 
31 |     logging.config.dictConfig({
32 |         'version': 1,
33 |         'disable_existing_loggers': False,
34 |         'formatters': {
35 |             'standard': {
36 |                 'format': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s:%(lineno)d: %(message)s'
37 |             },
38 |         },
39 |         'handlers': {
40 |             'default': {
41 |                 'level':'DEBUG',
42 |                 'formatter': 'standard',
43 |                 'class':'logging.StreamHandler',
44 |             },
45 |         },
46 |         'loggers': {
47 |             '': {
48 |                 'handlers': ['default'],
49 |                 'level': 'DEBUG',
50 |                 'formatter': 'standard',
51 |                 'propagate': True
52 |             }
53 |         }
54 |     })
55 | 
56 | 
57 |     if __name__ == '__main__':
58 |         bibtex = """@ARTICLE{Cesar2013,
59 |           author = {Jean César},
60 |           title = {An amazing title},
61 |           year = {2013},
62 |           month = jan,
63 |           volume = {12},
64 |           pages = {12--23},
65 |           journal = {Nice Journal},
66 |           abstract = {This is an abstract. This line should be long enough to test
67 |         	 multilines...},
68 |           comments = {A comment},
69 |           keywords = {keyword1, keyword2},
70 |         }
71 |         """
72 | 
73 |         with open('/tmp/bibtex.bib', 'w') as bibfile:
74 |             bibfile.write(bibtex)
75 | 
76 |         from bibtexparser.bparser import BibTexParser
77 | 
78 |         with open('/tmp/bibtex.bib', 'r') as bibfile:
79 |             bp = BibTexParser(bibfile.read())
80 |             print(bp.get_entry_list())
81 | 
82 | I recommend you to use this output if you would like to report a bug.
83 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
  1 | v1.xxx
  2 | ======
  3 | 
  4 | v1.1.0
  5 | ======
  6 | 
  7 | * BUGFIX: Fix for pyparsing 2.3.1 (#226)
  8 | * NEW: Add support for BibLaTeX annotations (#208)
  9 | * NEW: Feature: crossref Support (#216)
 10 | * ENH: Handles declarations on lines starting with spaces after comments (#199)
 11 | * ENH: Checks for empty citekeys and whitespaces (#213)
 12 | 
 13 | v1.0.1
 14 | ======
 15 | 
 16 | * BUGFIX: future missing in setup.py
 17 | 
 18 | v1.0
 19 | ====
 20 | 
 21 | * ENH: we use pyparsing (#64) by Olivier Magin.
 22 | * DOC: Refactoring of the tutorial
 23 | * DOC: include docs/ in manifest
 24 | * API: fix spelling "homogenize". Affects customization and bparser
 25 | * API: BibtexParser: homogenize_fields is now False by default (#94)
 26 | 
 27 | v0.6.2
 28 | ======
 29 | 
 30 | * ENH: customization: handle various hyphens (#76).
 31 | * ENH: writer: all values according to this maximal key width (#83).
 32 | * END: writer: display_order allows to have custom ordering of the fields of
 33 | each entry instead of just alphabetical (#83) by cschaffner.
 34 | * FIX: bad support of braces in string (#90) by sangkilc.
 35 | 
 36 | v0.6.1
 37 | ======
 38 | 
 39 | * API: Previous type and id keywords which are automatically added to
 40 | the dictionnary are now ENTRYTYPE and ID, respectively (#42).
 41 | * ENH: comma first syntax support (#49) by Michal Grochmal.
 42 | 
 43 | v0.6.0
 44 | ======
 45 | 
 46 | * DOC: clarify version number
 47 | * ENH: support for bibtex with leading spaces (#34)
 48 | * FIX: if title contained multiples words in braces
 49 | * ENH: code refactoring (#33)
 50 | * ENH: support for comment blocks (#32)
 51 | * ENH: Removed comma after last key-value pair by faph (#28)
 52 | * ENH: optional keys sanitising by faph (#29)
 53 | * FIX: missing coma at the end of a record (#24)
 54 | * DOC: clarify the usecase of to_bibtex
 55 | * FIX: raise exception for TypeError in to_bibtex (#22)
 56 | 
 57 | v0.5.5
 58 | ======
 59 | 
 60 | * ENH: json output
 61 | * ENH: Add (optional) support for non-standard entry types by Georg C. Brückmann
 62 | * FIX: protect uppercase only on unprotected characters. #18
 63 | * ENH: string replacement by Uwe Schmidt (#13 #20)
 64 | 
 65 | v0.5.4
 66 | ======
 67 | 
 68 | * ENH: json output
 69 | * API: enhance the naming choice for bwriter
 70 | 
 71 | v0.5.3
 72 | ======
 73 | 
 74 | * ENH: add writer (#16), thanks to Lucas Verney
 75 | * MAINT: Remove non-standard --BREAK-- command detection
 76 | * FIX: missing strip() (#14) by Sebastien Diemer
 77 | * API breakage: the parser takes data instead of a filehandler
 78 | 
 79 | v0.5.2
 80 | ======
 81 | 
 82 | * ENH: fix tests latex encoding
 83 | * ENH: support @comment @preambule (escaped)
 84 | * ENH: check that bibtype belongs to a known type
 85 | 
 86 | v0.5.1
 87 | ======
 88 | 
 89 | * ENH: split keywords with various separators
 90 | * ENH: get_entry_dict make the dict once
 91 | * ENH: add messages with logging
 92 | * FIX: fix unittest related to braces detection
 93 | 
 94 | v0.5
 95 | ====
 96 | 
 97 | * Permission from original authors and OKFN to use LGPLv3
 98 | * ENH: Python 2.7 support
 99 | * FIX: issue related to accents
100 | 
101 | v0.4
102 | ====
103 | 
104 | * ENH: Transformations on characters are now considered as a customization
105 | * ENH: New customization: clean latex style
106 | * FIX: issue related to name processing
107 | 
108 | v0.3
109 | ====
110 | 
111 | * DOC: moved to readsthedoc
112 | * DOC: several improvements
113 | * MAINT: separate customizations
114 | 
115 | v0.2
116 | ====
117 | 
118 | * TEST: initialized
119 | * DOC: initialized
120 | 
121 | v0.1
122 | ====
123 | 
124 | * First preliminary release
125 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/data/crossref_entries.bib:
--------------------------------------------------------------------------------
  1 | % From biber test data : t/tdata/crossrefs.bib
  2 | % Kept initial comment but not for our purpose
  3 | 
  4 | % Testing mincrossrefs. cr1 and cr2 crossrefs should trigger inclusion of cr_m and also
  5 | % the crossref fields in both of them
  6 | % Also a test of some aliases
  7 | @INBOOK{cr1,
  8 |   AUTHOR        = {Graham Gullam},
  9 |   TITLE         = {Great and Good Graphs},
 10 |   ORIGDATE      = {1955},
 11 |   ARCHIVEPREFIX = {SomEPrFiX},
 12 |   PRIMARYCLASS  = {SOMECLASS},
 13 |   CROSSREF      = {cr_m}
 14 | }
 15 | 
 16 | @INBOOK{cr2,
 17 |   AUTHOR      = {Frederick Fumble},
 18 |   TITLE       = {Fabulous Fourier Forms},
 19 |   SCHOOL      = {School},
 20 |   INSTITUTION = {Institution},
 21 |   ORIGDATE    = {1943},
 22 |   CROSSREF    = {cr_m}
 23 | }
 24 | 
 25 | @BOOK{cr_m,
 26 |   EDITOR    = {Edgar Erbriss},
 27 |   TITLE     = {Graphs of the Continent},
 28 |   PUBLISHER = {Grimble},
 29 |   YEAR      = {1974}
 30 | }
 31 | 
 32 | % Testing explicit cite of crossref parent. Should trigger inclusion of child crossref field
 33 | @INBOOK{cr3,
 34 |   AUTHOR        = {Arthur Aptitude},
 35 |   TITLE         = {Arrangements of All Articles},
 36 |   ORIGDATE      = {1934},
 37 |   ARCHIVEPREFIX = {SomEPrFiX},
 38 |   EPRINTTYPE    = {sometype},
 39 |   CROSSREF      = {crt}
 40 | }
 41 | 
 42 | @BOOK{crt,
 43 |   EDITOR    = {Mark Monkley},
 44 |   TITLE     = {Beasts of the Burbling Burns},
 45 |   PUBLISHER = {Rancour},
 46 |   YEAR      = {1996}
 47 | }
 48 | 
 49 | % Testing mincrossrefs not reached. cr4 is cited, cr5 isn't, therefore mincrossrefs (2) for
 50 | % crn not reached
 51 | @INBOOK{cr4,
 52 |   AUTHOR    = {Morris Mumble},
 53 |   TITLE     = {Enterprising Entities},
 54 |   ORIGDATE  = {1911},
 55 |   CROSSREF  = {crn}
 56 | }
 57 | 
 58 | @INBOOK{cr5,
 59 |   AUTHOR    = {Oliver Ordinary},
 60 |   TITLE     = {Questionable Quidities},
 61 |   ORIGDATE  = {1919},
 62 |   CROSSREF  = {crn}
 63 | }
 64 | 
 65 | @BOOK{crn,
 66 |   EDITOR    = {Jeremy Jermain},
 67 |   TITLE     = {Vanquished, Victor, Vandal},
 68 |   PUBLISHER = {Pillsbury},
 69 |   YEAR      = {1945}
 70 | }
 71 | 
 72 | % Testing inheritance of event information
 73 | @PROCEEDINGS{cr6i,
 74 |   AUTHOR     = {Spurious Author},
 75 |   ADDRESS    = {Address},
 76 |   TITLE      = {Title of proceeding},
 77 |   EDITOR     = {Editor},
 78 |   PUBLISHER  = {Publisher of proceeding},
 79 |   EVENTDATE  = {2009-08-21/2009-08-24},
 80 |   EVENTTITLE = {Title of the event},
 81 |   VENUE      = {Location of event},
 82 |   YEAR       = {2009}
 83 | }
 84 | 
 85 | @INPROCEEDINGS{cr6,
 86 |   AUTHOR     = {Author, Firstname},
 87 |   CROSSREF   = {cr6i},
 88 |   PAGES      = {123--},
 89 |   TITLE      = {Title of inproceeding},
 90 |   BOOKTITLE  = {Manual booktitle},
 91 |   YEAR       = {2009},
 92 | }
 93 | 
 94 | % Testing inheritance of special fields (booktitle, bookauthor etc.)
 95 | @BOOK{cr7i,
 96 |   AUTHOR     = {Brian Bookauthor},
 97 |   TITLE      = {Book Title},
 98 |   SUBTITLE   = {Book Subtitle},
 99 |   TITLEADDON = {Book Titleaddon},
100 |   PUBLISHER  = {Publisher of proceeding},
101 |   YEAR       = {2009},
102 |   VERBA      = {String},
103 | }
104 | 
105 | @INBOOK{cr7,
106 |   AUTHOR     = {Author, Firstname},
107 |   CROSSREF   = {cr7i},
108 |   PAGES      = {123--126},
109 |   TITLE      = {Title of Book bit},
110 |   YEAR       = {2010}
111 | }
112 | 
113 | % Testing supression of default inheritance
114 | @COLLECTION{cr8i,
115 |   EDITOR     = {Brian Editor},
116 |   TITLE      = {Book Title},
117 |   SUBTITLE   = {Book Subtitle},
118 |   TITLEADDON = {Book Titleaddon},
119 |   PUBLISHER  = {Publisher of Collection},
120 |   YEAR       = {2009}
121 | }
122 | 
123 | @INCOLLECTION{cr8,
124 |   AUTHOR     = {Smith, Firstname},
125 |   CROSSREF   = {cr8i},
126 |   PAGES      = {1--12},
127 |   TITLE      = {Title of Collection bit},
128 |   YEAR       = {2010}
129 | }
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/bibtexparser/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | `BibTeX <http://en.wikipedia.org/wiki/BibTeX>`_ is a bibliographic data file format.
  3 | 
  4 | The :mod:`bibtexparser` module can parse BibTeX files and write them. The API is similar to the
  5 | :mod:`json` module. The parsed data is returned as a simple :class:`BibDatabase` object with the main attribute being
  6 | :attr:`entries` representing bibliographic sources such as books and journal articles.
  7 | 
  8 | The following functions provide a quick and basic way to manipulate a BibTeX file.
  9 | More advanced features are also available in this module.
 10 | 
 11 | Parsing a file is as simple as::
 12 | 
 13 |     import bibtexparser
 14 |     with open('bibtex.bib') as bibtex_file:
 15 |        bibtex_database = bibtexparser.load(bibtex_file)
 16 | 
 17 | And writing::
 18 | 
 19 |     import bibtexparser
 20 |     with open('bibtex.bib', 'w') as bibtex_file:
 21 |         bibtexparser.dump(bibtex_database, bibtex_file)
 22 | 
 23 | """
 24 | __all__ = [
 25 |     'loads', 'load', 'dumps', 'dump', 'bibdatabase',
 26 |     'bparser', 'bwriter', 'bibtexexpression', 'latexenc', 'customization',
 27 | ]
 28 | __version__ = '1.1.0'
 29 | 
 30 | import sys
 31 | 
 32 | from . import bibdatabase, bibtexexpression, bparser, bwriter, latexenc, customization
 33 | 
 34 | 
 35 | def loads(bibtex_str, parser=None):
 36 |     """
 37 |     Load :class:`BibDatabase` object from a string
 38 | 
 39 |     :param bibtex_str: input BibTeX string to be parsed
 40 |     :type bibtex_str: str or unicode
 41 |     :param parser: custom parser to use (optional)
 42 |     :type parser: BibTexParser
 43 |     :returns: bibliographic database object
 44 |     :rtype: BibDatabase
 45 |     """
 46 |     if parser is None:
 47 |         parser = bparser.BibTexParser()
 48 |     return parser.parse(bibtex_str)
 49 | 
 50 | 
 51 | def load(bibtex_file, parser=None):
 52 |     """
 53 |     Load :class:`BibDatabase` object from a file
 54 | 
 55 |     :param bibtex_file: input file to be parsed
 56 |     :type bibtex_file: file
 57 |     :param parser: custom parser to use (optional)
 58 |     :type parser: BibTexParser
 59 |     :returns: bibliographic database object
 60 |     :rtype: BibDatabase
 61 | 
 62 |     Example::
 63 | 
 64 |         import bibtexparser
 65 |         with open('bibtex.bib') as bibtex_file:
 66 |            bibtex_database = bibtexparser.load(bibtex_file)
 67 | 
 68 |     """
 69 |     if parser is None:
 70 |         parser = bparser.BibTexParser()
 71 |     return parser.parse_file(bibtex_file)
 72 | 
 73 | 
 74 | def dumps(bib_database, writer=None):
 75 |     """
 76 |     Dump :class:`BibDatabase` object to a BibTeX string
 77 | 
 78 |     :param bib_database: bibliographic database object
 79 |     :type bib_database: BibDatabase
 80 |     :param writer: custom writer to use (optional) (not yet implemented)
 81 |     :type writer: BibTexWriter
 82 |     :returns: BibTeX string
 83 |     :rtype: unicode
 84 |     """
 85 |     if writer is None:
 86 |         writer = bwriter.BibTexWriter()
 87 |     return writer.write(bib_database)
 88 | 
 89 | 
 90 | def dump(bib_database, bibtex_file, writer=None):
 91 |     """
 92 |     Dump :class:`BibDatabase` object as a BibTeX text file
 93 | 
 94 |     :param bib_database: bibliographic database object
 95 |     :type bib_database: BibDatabase
 96 |     :param bibtex_file: file to write to
 97 |     :type bibtex_file: file
 98 |     :param writer: custom writer to use (optional) (not yet implemented)
 99 |     :type writer: BibTexWriter
100 | 
101 |     Example::
102 | 
103 |         import bibtexparser
104 |         with open('bibtex.bib', 'w') as bibtex_file:
105 |             bibtexparser.dump(bibtex_database, bibtex_file)
106 | 
107 |     """
108 |     if writer is None:
109 |         writer = bwriter.BibTexWriter()
110 |     if sys.version_info >= (3, 0):
111 |         bibtex_file.write(writer.write(bib_database))
112 |     else:
113 |         # Encode to UTF-8
114 |         bibtex_file.write(writer.write(bib_database).encode("utf-8"))
115 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_latexenc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #This program is free software: you can redistribute it and/or modify
  4 | #it under the terms of the GNU General Public License as published by
  5 | #the Free Software Foundation, either version 3 of the License, or
  6 | #(at your option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful,
  9 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | #GNU General Public License for more details.
 12 | #
 13 | #You should have received a copy of the GNU General Public License
 14 | #along with this program.  If not, see <http://www.gnu.org/licenses/>
 15 | #
 16 | # Author: Francois Boulogne <fboulogne at sciunto dot org>, 2012
 17 | 
 18 | from __future__ import unicode_literals
 19 | import unittest
 20 | 
 21 | from bibtexparser.latexenc import (string_to_latex, latex_to_unicode,
 22 |                                    protect_uppercase)
 23 | 
 24 | 
 25 | class TestLatexConverter(unittest.TestCase):
 26 | 
 27 |     def test_accent(self):
 28 |         string = 'à é è ö'
 29 |         result = string_to_latex(string)
 30 |         expected = "{\`a} {\\\'e} {\`e} {\\\"o}"
 31 |         self.assertEqual(result, expected)
 32 | 
 33 |     def test_special_caracter(self):
 34 |         string = 'ç'
 35 |         result = string_to_latex(string)
 36 |         expected = '{\c c}'
 37 |         self.assertEqual(result, expected)
 38 | 
 39 | 
 40 | class TestUppercaseProtection(unittest.TestCase):
 41 | 
 42 |     def test_uppercase(self):
 43 |         string = 'An upPer Case A'
 44 |         result = protect_uppercase(string)
 45 |         expected = '{A}n up{P}er {C}ase {A}'
 46 |         self.assertEqual(result, expected)
 47 | 
 48 |     def test_lowercase(self):
 49 |         string = 'a'
 50 |         result = protect_uppercase(string)
 51 |         expected = 'a'
 52 |         self.assertEqual(result, expected)
 53 | 
 54 |     def test_alreadyprotected(self):
 55 |         string = '{A}, m{A}gnificient, it is a {A}...'
 56 |         result = protect_uppercase(string)
 57 |         expected = '{A}, m{A}gnificient, it is a {A}...'
 58 |         self.assertEqual(result, expected)
 59 | 
 60 |     def test_traps(self):
 61 |         string = '{A, m{Agnificient, it is a {A'
 62 |         result = protect_uppercase(string)
 63 |         expected = '{A, m{Agnificient, it is a {A'
 64 |         self.assertEqual(result, expected)
 65 | 
 66 |     def test_traps2(self):
 67 |         string = 'A}, mA}gnificient, it is a A}'
 68 |         result = protect_uppercase(string)
 69 |         expected = 'A}, mA}gnificient, it is a A}'
 70 |         self.assertEqual(result, expected)
 71 | 
 72 | 
 73 | class TestUnicodeConversion(unittest.TestCase):
 74 | 
 75 |     def test_accents(self):
 76 |         string = "{\`a} {\\\'e} {\`e} {\\\"o}"
 77 |         result = latex_to_unicode(string)
 78 |         expected = 'à é è ö'
 79 |         self.assertEqual(result, expected)
 80 | 
 81 |     def test_ignores_trailing_modifier(self):
 82 |         string = "a\\\'"
 83 |         result = latex_to_unicode(string)
 84 |         expected = 'a'
 85 |         self.assertEqual(result, expected)
 86 | 
 87 |     def test_special_caracter(self):
 88 |         string = '{\c c}'
 89 |         result = latex_to_unicode(string)
 90 |         expected = 'ç'
 91 |         self.assertEqual(result, expected)
 92 | 
 93 |     def test_does_not_modify_existing_combining(self):
 94 |         string = b'ph\xc6\xa1\xcc\x89'.decode('utf8')
 95 |         result = latex_to_unicode(string)
 96 |         expected = 'phở'  # normalized
 97 |         self.assertEqual(result, expected)
 98 | 
 99 |     def test_does_not_modify_two_existing_combining(self):
100 |         string = b'pho\xcc\x9b\xcc\x89'.decode('utf8')
101 |         result = latex_to_unicode(string)
102 |         expected = 'phở'  # normalized
103 |         self.assertEqual(result, expected)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     unittest.main()
108 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibdatabase.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString,
  3 |                                       BibDataStringExpression)
  4 | 
  5 | 
  6 | class TestBibDatabase(unittest.TestCase):
  7 |     entries = [{'ENTRYTYPE': 'book',
  8 |                 'year': '1987',
  9 |                 'edition': '2',
 10 |                 'publisher': 'Wiley Edition',
 11 |                 'ID': 'Bird1987',
 12 |                 'volume': '1',
 13 |                 'title': 'Dynamics of Polymeric Liquid',
 14 |                 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'
 15 |                 }]
 16 | 
 17 |     def test_entries_list_method(self):
 18 |         bib_db = BibDatabase()
 19 |         bib_db.entries = self.entries
 20 |         self.assertEqual(bib_db.entries, bib_db.get_entry_list())
 21 | 
 22 |     def test_entries_dict_prop(self):
 23 |         bib_db = BibDatabase()
 24 |         bib_db.entries = self.entries
 25 |         self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict())
 26 | 
 27 | 
 28 | class TestBibDataString(unittest.TestCase):
 29 | 
 30 |     def setUp(self):
 31 |         self.bd = BibDatabase()
 32 | 
 33 |     def test_name_is_lower(self):
 34 |         bds = BibDataString(self.bd, 'nAmE')
 35 |         self.assertTrue(bds.name.islower())
 36 | 
 37 |     def test_raises_KeyError(self):
 38 |         bds = BibDataString(self.bd, 'name')
 39 |         with self.assertRaises(KeyError):
 40 |             bds.get_value()
 41 | 
 42 |     def test_get_value(self):
 43 |         bds = BibDataString(self.bd, 'name')
 44 |         self.bd.strings['name'] = 'value'
 45 |         self.assertEqual(bds.get_value(), 'value')
 46 | 
 47 |     def test_expand_string(self):
 48 |         bds = BibDataString(self.bd, 'name')
 49 |         self.bd.strings['name'] = 'value'
 50 |         self.assertEqual(BibDataString.expand_string('name'), 'name')
 51 |         self.assertEqual(BibDataString.expand_string(bds), 'value')
 52 | 
 53 |     def test_get_value_string_is_defined_by_expression(self):
 54 |         self.bd.strings['name'] = 'string'
 55 |         exp = BibDataStringExpression(['this is a ',
 56 |                                        BibDataString(self.bd, 'name')])
 57 |         self.bd.strings['exp'] = exp
 58 |         bds = BibDataString(self.bd, 'exp')
 59 |         self.assertEqual(bds.get_value(), 'this is a string')
 60 | 
 61 |     def test_strings_are_equal_iif_name_is_equal(self):
 62 |         self.bd.strings['a'] = 'foo'
 63 |         self.bd.strings['b'] = 'foo'
 64 |         a1 = BibDataString(self.bd, 'a')
 65 |         a2 = BibDataString(self.bd, 'a')
 66 |         b = BibDataString(self.bd, 'b')
 67 |         self.assertEqual(a1, a2)
 68 |         self.assertNotEqual(a1, b)
 69 |         self.assertNotEqual(a1, b)
 70 |         self.assertNotEqual(a1, "foo")
 71 | 
 72 | 
 73 | class TestBibDataStringExpression(unittest.TestCase):
 74 | 
 75 |     def setUp(self):
 76 |         self.bd = BibDatabase()
 77 |         self.bd.strings['name'] = 'value'
 78 |         self.bds = BibDataString(self.bd, 'name')
 79 | 
 80 |     def test_get_value(self):
 81 |         exp = BibDataStringExpression(
 82 |             ["The string has value: ", self.bds, '.'])
 83 |         self.assertEqual(exp.get_value(), 'The string has value: value.')
 84 | 
 85 |     def test_raises_KeyError(self):
 86 |         bds = BibDataString(self.bd, 'unknown')
 87 |         exp = BibDataStringExpression([bds, self.bds, 'text'])
 88 |         with self.assertRaises(KeyError):
 89 |             exp.get_value()
 90 | 
 91 |     def test_equations_are_equal_iif_same(self):
 92 |         a1 = BibDataString(self.bd, 'a')
 93 |         a2 = BibDataString(self.bd, 'a')
 94 |         exp = BibDataStringExpression([a1, self.bds, 'text'])
 95 |         self.assertEqual(exp, BibDataStringExpression([a2, self.bds, 'text']))
 96 |         self.assertNotEqual(exp, BibDataStringExpression(['foo', self.bds, 'text']))
 97 |         self.assertNotEqual(exp, 'foovaluetext')
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     unittest.main()
102 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexexpression.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | 
  5 | from __future__ import unicode_literals
  6 | import unittest
  7 | 
  8 | from bibtexparser.bibtexexpression import BibtexExpression
  9 | 
 10 | 
 11 | class TestBibtexExpression(unittest.TestCase):
 12 | 
 13 |     def setUp(self):
 14 |         self.expr = BibtexExpression()
 15 | 
 16 |     def test_minimal(self):
 17 |         result = self.expr.entry.parseString('@journal{key, name = 123 }')
 18 |         self.assertEqual(result.get('EntryType'), 'journal')
 19 |         self.assertEqual(result.get('Key'), 'key')
 20 |         self.assertEqual(result.get('Fields'), {'name': '123'})
 21 | 
 22 |     def test_capital_type(self):
 23 |         result = self.expr.entry.parseString('@JOURNAL{key, name = 123 }')
 24 |         self.assertEqual(result.get('EntryType'), 'JOURNAL')
 25 | 
 26 |     def test_capital_key(self):
 27 |         result = self.expr.entry.parseString('@journal{KEY, name = 123 }')
 28 |         self.assertEqual(result.get('Key'), 'KEY')
 29 | 
 30 |     def test_braced(self):
 31 |         result = self.expr.entry.parseString('@journal{key, name = {abc} }')
 32 |         self.assertEqual(result.get('Fields'), {'name': 'abc'})
 33 | 
 34 |     def test_braced_with_new_line(self):
 35 |         result = self.expr.entry.parseString(
 36 |             '@journal{key, name = {abc\ndef} }')
 37 |         self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})
 38 | 
 39 |     def test_braced_unicode(self):
 40 |         result = self.expr.entry.parseString(
 41 |             '@journal{key, name = {àbcđéf} }')
 42 |         self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})
 43 | 
 44 |     def test_quoted(self):
 45 |         result = self.expr.entry.parseString('@journal{key, name = "abc" }')
 46 |         self.assertEqual(result.get('Fields'), {'name': 'abc'})
 47 | 
 48 |     def test_quoted_with_new_line(self):
 49 |         result = self.expr.entry.parseString(
 50 |             '@journal{key, name = "abc\ndef" }')
 51 |         self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})
 52 | 
 53 |     def test_quoted_with_unicode(self):
 54 |         result = self.expr.entry.parseString(
 55 |             '@journal{key, name = "àbcđéf" }')
 56 |         self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})
 57 | 
 58 |     def test_entry_declaration_after_space(self):
 59 |         self.expr.entry.parseString('  @journal{key, name = {abcd}}')
 60 | 
 61 |     def test_entry_declaration_no_key(self):
 62 |         with self.assertRaises(self.expr.ParseException):
 63 |             self.expr.entry.parseString('@misc{name = {abcd}}')
 64 | 
 65 |     def test_entry_declaration_no_key_new_line(self):
 66 |         with self.assertRaises(self.expr.ParseException):
 67 |             self.expr.entry.parseString('@misc{\n name = {abcd}}')
 68 | 
 69 |     def test_entry_declaration_no_key_comma(self):
 70 |         with self.assertRaises(self.expr.ParseException):
 71 |             self.expr.entry.parseString('@misc{, \nname = {abcd}}')
 72 | 
 73 |     def test_entry_declaration_no_key_keyvalue_without_space(self):
 74 |         with self.assertRaises(self.expr.ParseException):
 75 |             self.expr.entry.parseString('@misc{\nname=aaa}')
 76 | 
 77 |     def test_entry_declaration_key_with_whitespace(self):
 78 |         with self.assertRaises(self.expr.ParseException):
 79 |             self.expr.entry.parseString('@misc{ xx yy, \n name = aaa}')
 80 | 
 81 |     def test_string_declaration_after_space(self):
 82 |         self.expr.string_def.parseString('  @string{ name = {abcd}}')
 83 | 
 84 |     def test_preamble_declaration_after_space(self):
 85 |         self.expr.preamble_decl.parseString('  @preamble{ "blah blah " }')
 86 | 
 87 |     def test_declaration_after_space(self):
 88 |         keys = []
 89 |         self.expr.entry.addParseAction(
 90 |             lambda s, l, t: keys.append(t.get('Key'))
 91 |         )
 92 |         self.expr.main_expression.parseString(' @journal{key, name = {abcd}}')
 93 |         self.assertEqual(keys, ['key'])
 94 | 
 95 |     def test_declaration_after_space_and_comment(self):
 96 |         keys = []
 97 |         self.expr.entry.addParseAction(
 98 |             lambda s, l, t: keys.append(t.get('Key'))
 99 |         )
100 |         self.expr.main_expression.parseString(
101 |             '% Implicit comment\n @article{key, name={abcd}}'
102 |         )
103 |         self.assertEqual(keys, ['key'])
104 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bwriter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # Author: Francois Boulogne
  4 | # License:
  5 | 
  6 | from __future__ import unicode_literals
  7 | 
  8 | import unittest
  9 | import os
 10 | import io
 11 | import sys
 12 | 
 13 | from bibtexparser.bparser import BibTexParser
 14 | from bibtexparser.bwriter import BibTexWriter, to_bibtex
 15 | from bibtexparser.customization import author
 16 | 
 17 | 
 18 | def _data_path(filename):
 19 |     return os.path.join('bibtexparser/tests/data', filename)
 20 | 
 21 | 
 22 | class TestBibtexWriterList(unittest.TestCase):
 23 | 
 24 |     def test_article(self):
 25 |         with io.open(_data_path('article.bib'), 'r') as bibfile:
 26 |             bib = BibTexParser(bibfile.read())
 27 | 
 28 |         with io.open(_data_path('article_output.bib'), 'r') as bibfile:
 29 |             expected = bibfile.read()
 30 |         result = to_bibtex(bib)
 31 |         self.maxDiff = None
 32 |         self.assertEqual(expected, result)
 33 | 
 34 |     def test_article_with_annotation(self):
 35 |         with io.open(_data_path('article_with_annotation.bib'), 'r') as bibfile:
 36 |             bib = BibTexParser(bibfile.read())
 37 | 
 38 |         with io.open(_data_path('article_with_annotation_output.bib'), 'r') \
 39 |                 as bibfile:
 40 |             expected = bibfile.read()
 41 |         result = to_bibtex(bib)
 42 |         self.maxDiff = None
 43 |         self.assertEqual(expected, result)
 44 | 
 45 |     def test_book(self):
 46 |         with io.open(_data_path('book.bib'), 'r') as bibfile:
 47 |             bib = BibTexParser(bibfile.read())
 48 | 
 49 |         with io.open(_data_path('book_output.bib'), 'r') as bibfile:
 50 |             expected = bibfile.read()
 51 |         result = to_bibtex(bib)
 52 |         self.maxDiff = None
 53 |         self.assertEqual(expected, result)
 54 | 
 55 |     def test_comma_first(self):
 56 |         with io.open(_data_path('book.bib'), 'r') as bibfile:
 57 |             bib = BibTexParser(bibfile.read())
 58 | 
 59 |         with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile:
 60 |             expected = bibfile.read()
 61 |         writer = BibTexWriter()
 62 |         writer.indent = '   '
 63 |         writer.comma_first = True
 64 |         result = writer.write(bib)
 65 |         self.maxDiff = None
 66 |         self.assertEqual(expected, result)
 67 | 
 68 |     def test_multiple(self):
 69 |         with io.open(_data_path('multiple_entries.bib'), 'r') as bibfile:
 70 |             bib = BibTexParser(bibfile.read())
 71 | 
 72 |         with io.open(_data_path('multiple_entries_output.bib'), 'r') as bibfile:
 73 |             expected = bibfile.read()
 74 |         result = to_bibtex(bib)
 75 |         self.maxDiff = None
 76 |         self.assertEqual(expected, result)
 77 | 
 78 |     def test_exception_typeerror(self):
 79 |         with io.open(_data_path('article.bib'), 'r') as bibfile:
 80 |             bib = BibTexParser(bibfile.read(), customization=author)
 81 |         self.assertRaises(TypeError, to_bibtex, bib)
 82 | 
 83 |     def test_with_strings(self):
 84 |         with io.open(_data_path('article_with_strings.bib'), 'r') as bibfile:
 85 |             bib = BibTexParser(bibfile.read(), common_strings=True,
 86 |                                interpolate_strings=False)
 87 |         with io.open(_data_path(
 88 |                 'article_with_strings_output.bib'), 'r') as bibfile:
 89 |             expected = bibfile.read()
 90 |         result = to_bibtex(bib)
 91 |         self.maxDiff = None
 92 |         self.assertEqual(expected, result)
 93 | 
 94 |     def test_trailing_comma(self):
 95 |         with io.open(_data_path('article.bib'), 'r') as bibfile:
 96 |             bib = BibTexParser(bibfile.read())
 97 | 
 98 |         with io.open(_data_path('article_trailing_comma_output.bib'), 'r') as bibfile:
 99 |             expected = bibfile.read()
100 |         writer = BibTexWriter()
101 |         writer.add_trailing_comma = True
102 |         result = writer.write(bib)
103 |         self.maxDiff = None
104 |         self.assertEqual(expected, result)
105 | 
106 |     def test_comma_first_and_trailing_comma(self):
107 |         with io.open(_data_path('article.bib'), 'r') as bibfile:
108 |             bib = BibTexParser(bibfile.read())
109 | 
110 |         with io.open(_data_path('article_comma_first_and_trailing_comma_output.bib'), 'r') as bibfile:
111 |             expected = bibfile.read()
112 |         writer = BibTexWriter()
113 |         writer.add_trailing_comma = True
114 |         writer.comma_first = True
115 |         result = writer.write(bib)
116 |         self.maxDiff = None
117 |         self.assertEqual(expected, result)
118 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexparser.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import bibtexparser
  3 | from bibtexparser.bparser import BibTexParser
  4 | from tempfile import TemporaryFile
  5 | 
  6 | 
  7 | class TestBibtexParserParserMethods(unittest.TestCase):
  8 |     input_file_path = 'bibtexparser/tests/data/book.bib'
  9 |     input_bom_file_path = 'bibtexparser/tests/data/book_bom.bib'
 10 |     entries_expected = [{'ENTRYTYPE': 'book',
 11 |                          'year': '1987',
 12 |                          'edition': '2',
 13 |                          'publisher': 'Wiley Edition',
 14 |                          'ID': 'Bird1987',
 15 |                          'volume': '1',
 16 |                          'title': 'Dynamics of Polymeric Liquid',
 17 |                          'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.',
 18 |                         }]
 19 | 
 20 |     def test_parse_immediately(self):
 21 |         with open(self.input_file_path) as bibtex_file:
 22 |             bibtex_str = bibtex_file.read()
 23 |         bibtex_database = BibTexParser(bibtex_str)
 24 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 25 | 
 26 |     def test_parse_str(self):
 27 |         parser = BibTexParser()
 28 |         with open(self.input_file_path) as bibtex_file:
 29 |             bibtex_str = bibtex_file.read()
 30 |         bibtex_database = parser.parse(bibtex_str)
 31 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 32 | 
 33 |     def test_parse_bom_str(self):
 34 |         parser = BibTexParser()
 35 |         with open(self.input_bom_file_path) as bibtex_file:
 36 |             bibtex_str = bibtex_file.read()
 37 |             bibtex_database = parser.parse(bibtex_str)
 38 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 39 | 
 40 |     def test_parse_bom_bytes(self):
 41 |         parser = BibTexParser()
 42 |         with open(self.input_bom_file_path, 'rb') as bibtex_file:
 43 |             bibtex_str = bibtex_file.read()
 44 |             bibtex_database = parser.parse(bibtex_str)
 45 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 46 | 
 47 |     def test_parse_file(self):
 48 |         parser = BibTexParser()
 49 |         with open(self.input_file_path) as bibtex_file:
 50 |             bibtex_database = parser.parse_file(bibtex_file)
 51 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 52 | 
 53 |     def test_parse_str_module(self):
 54 |         with open(self.input_file_path) as bibtex_file:
 55 |             bibtex_str = bibtex_file.read()
 56 |         bibtex_database = bibtexparser.loads(bibtex_str)
 57 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 58 | 
 59 |     def test_parse_file_module(self):
 60 |         with open(self.input_file_path) as bibtex_file:
 61 |             bibtex_database = bibtexparser.load(bibtex_file)
 62 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
 63 | 
 64 | 
 65 | class TestBibtexparserWriteMethods(unittest.TestCase):
 66 |     input_file_path = 'bibtexparser/tests/data/book.bib'
 67 |     expected = \
 68 | """@book{Bird1987,
 69 |  author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.},
 70 |  edition = {2},
 71 |  publisher = {Wiley Edition},
 72 |  title = {Dynamics of Polymeric Liquid},
 73 |  volume = {1},
 74 |  year = {1987}
 75 | }
 76 | 
 77 | """
 78 | 
 79 |     def test_write_str(self):
 80 |         with open(self.input_file_path) as bibtex_file:
 81 |             bibtex_database = bibtexparser.load(bibtex_file)
 82 |         result = bibtexparser.dumps(bibtex_database)
 83 |         self.assertEqual(result, self.expected)
 84 | 
 85 |     def test_write_file(self):
 86 |         with open(self.input_file_path) as bibtex_file:
 87 |             bibtex_database = bibtexparser.load(bibtex_file)
 88 | 
 89 |         with TemporaryFile(mode='w+') as bibtex_out_file:
 90 |             bibtexparser.dump(bibtex_database, bibtex_out_file)
 91 |             bibtex_out_file.seek(0)
 92 |             bibtex_out_str = bibtex_out_file.read()
 93 | 
 94 |         self.assertEqual(bibtex_out_str, self.expected)
 95 | 
 96 | class TestBibtexparserFieldNames(unittest.TestCase):
 97 |     input_file_path = 'bibtexparser/tests/data/fieldname.bib'
 98 |     entries_expected = [{'ENTRYTYPE': 'book',
 99 |                          'ID': 'Bird1987',
100 |                          'dc.date': '2004-01'
101 |                         }]
102 | 
103 |     def test_parse_immediately(self):
104 |         with open(self.input_file_path) as bibtex_file:
105 |             bibtex_str = bibtex_file.read()
106 |         bibtex_database = BibTexParser(bibtex_str)
107 |         self.assertEqual(bibtex_database.entries, self.entries_expected)
108 | 
109 | if __name__ == '__main__':
110 |     unittest.main()
111 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtex_strings.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import unittest
  3 | import codecs
  4 | import bibtexparser
  5 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString,
  6 |                                       BibDataStringExpression)
  7 | from bibtexparser.bparser import BibTexParser
  8 | from bibtexparser.bwriter import BibTexWriter
  9 | from collections import OrderedDict
 10 | 
 11 | 
 12 | class TestStringParse(unittest.TestCase):
 13 | 
 14 |     def test_single_string_parse_count(self):
 15 |         bibtex_str = '@string{name1 = "value1"}\n\n'
 16 |         bib_database = bibtexparser.loads(bibtex_str)
 17 |         self.assertEqual(len(bib_database.strings), 1)
 18 | 
 19 |     def test_multiple_string_parse_count(self):
 20 |         bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
 21 |         bib_database = bibtexparser.loads(bibtex_str)
 22 |         self.assertEqual(len(bib_database.strings), 2)
 23 | 
 24 |     def test_single_string_parse(self):
 25 |         bibtex_str = '@string{name1 = "value1"}\n\n'
 26 |         bib_database = bibtexparser.loads(bibtex_str)
 27 |         expected = {'name1': 'value1'}
 28 |         self.assertEqual(bib_database.strings, expected)
 29 | 
 30 |     def test_multiple_string_parse(self):
 31 |         bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
 32 |         bib_database = bibtexparser.loads(bibtex_str)
 33 |         expected = OrderedDict()
 34 |         expected['name1'] = 'value1'
 35 |         expected['name2'] = 'value2'
 36 |         self.assertEqual(bib_database.strings, expected)
 37 | 
 38 |     def test_string_braces(self):
 39 |         with codecs.open('bibtexparser/tests/data/string.bib', 'r', 'utf-8') as bibfile:
 40 |             bib = BibTexParser(bibfile.read())
 41 |             res = bib.get_entry_list()
 42 |         expected = [{'author': 'Sang Kil Cha and Maverick Woo and David Brumley',
 43 | 		     'ID': 'cha:oakland15',
 44 | 		     'year': '2015',
 45 | 		     'booktitle': 'Proceedings of the {IEEE} Symposium on Security and Privacy',
 46 | 		     'title': '{Program-Adaptive Mutational Fuzzing}',
 47 | 	             'ENTRYTYPE': 'inproceedings',
 48 | 		     'pages': '725--741'
 49 |                      }]
 50 |         self.assertEqual(res, expected)
 51 | 
 52 |     def test_string_parse_accept_chars(self):
 53 |         bibtex_str = '@string{pub-ieee-std = {IEEE}}\n\n@string{pub-ieee-std:adr = {New York, NY, USA}}'
 54 |         bib_database = bibtexparser.loads(bibtex_str)
 55 |         self.assertEqual(len(bib_database.strings), 2)
 56 |         expected = OrderedDict()
 57 |         expected['pub-ieee-std'] = 'IEEE'
 58 |         expected['pub-ieee-std:adr'] = 'New York, NY, USA'
 59 |         self.assertEqual(bib_database.strings, expected)
 60 | 
 61 | 
 62 | class TestStringWrite(unittest.TestCase):
 63 | 
 64 |     def test_single_string_write(self):
 65 |         bib_database = BibDatabase()
 66 |         bib_database.strings['name1'] = 'value1'
 67 |         result = bibtexparser.dumps(bib_database)
 68 |         expected = '@string{name1 = {value1}}\n\n'
 69 |         self.assertEqual(result, expected)
 70 | 
 71 |     def test_multiple_string_write(self):
 72 |         bib_database = BibDatabase()
 73 |         bib_database.strings['name1'] = 'value1'
 74 |         bib_database.strings['name2'] = 'value2'  # Order is important!
 75 |         result = bibtexparser.dumps(bib_database)
 76 |         expected = '@string{name1 = {value1}}\n\n@string{name2 = {value2}}\n\n'
 77 |         self.assertEqual(result, expected)
 78 | 
 79 |     def test_ignore_common_strings(self):
 80 |         bib_database = BibDatabase()
 81 |         bib_database.load_common_strings()
 82 |         result = bibtexparser.dumps(bib_database)
 83 |         self.assertEqual(result, '')
 84 | 
 85 |     def test_ignore_common_strings_only_if_not_overloaded(self):
 86 |         bib_database = BibDatabase()
 87 |         bib_database.load_common_strings()
 88 |         bib_database.strings['jan'] = 'Janvier'
 89 |         result = bibtexparser.dumps(bib_database)
 90 |         self.assertEqual(result, '@string{jan = {Janvier}}\n\n')
 91 | 
 92 |     def test_write_common_strings(self):
 93 |         bib_database = BibDatabase()
 94 |         bib_database.load_common_strings()
 95 |         writer = BibTexWriter(write_common_strings=True)
 96 |         result = bibtexparser.dumps(bib_database, writer=writer)
 97 |         with io.open('bibtexparser/tests/data/common_strings.bib') as f:
 98 |             expected = f.read()
 99 |         self.assertEqual(result, expected)
100 | 
101 |     def test_write_dependent_strings(self):
102 |         bib_database = BibDatabase()
103 |         bib_database.strings['title'] = 'Mr'
104 |         expr = BibDataStringExpression([BibDataString(bib_database, 'title'), 'Smith'])
105 |         bib_database.strings['name'] = expr
106 |         result = bibtexparser.dumps(bib_database)
107 |         expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n'
108 |         self.assertEqual(result, expected)
109 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_customization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import unicode_literals
  5 | import unittest
  6 | 
  7 | from bibtexparser.customization import getnames, convert_to_unicode, homogenize_latex_encoding, page_double_hyphen, keyword, add_plaintext_fields
  8 | 
  9 | 
 10 | class TestBibtexParserMethod(unittest.TestCase):
 11 | 
 12 |     ###########
 13 |     # getnames
 14 |     ###########
 15 |     def test_getnames(self):
 16 |         names = ['Foo Bar',
 17 |                  'Foo B. Bar',
 18 |                  'F. B. Bar',
 19 |                  'F.B. Bar',
 20 |                  'F. Bar',
 21 |                  'Jean de Savigny',
 22 |                  'Jean la Tour',
 23 |                  'Jean le Tour',
 24 |                  'Mike ben Akar',
 25 |                  #'Jean de la Tour',
 26 |                  #'Johannes Diderik van der Waals',
 27 |                  ]
 28 |         result = getnames(names)
 29 |         expected = ['Bar, Foo',
 30 |                     'Bar, Foo B.',
 31 |                     'Bar, F. B.',
 32 |                     'Bar, F. B.',
 33 |                     'Bar, F.',
 34 |                     'de Savigny, Jean',
 35 |                     'la Tour, Jean',
 36 |                     'le Tour, Jean',
 37 |                     'ben Akar, Mike',
 38 |                     #'de la Tour, Jean',
 39 |                     #'van der Waals, Johannes Diderik',
 40 |                     ]
 41 |         self.assertEqual(result, expected)
 42 | 
 43 |     @unittest.skip('Bug #9')
 44 |     def test_getnames_braces(self):
 45 |         names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}']
 46 |         result = getnames(names)
 47 |         expected = ['Delgado de Molina, A.', 'Vigné, M.']
 48 |         self.assertEqual(result, expected)
 49 | 
 50 |     ###########
 51 |     # page_double_hyphen
 52 |     ###########
 53 |     def test_page_double_hyphen_alreadyOK(self):
 54 |         record = {'pages': '12--24'}
 55 |         result = page_double_hyphen(record)
 56 |         expected = record
 57 |         self.assertEqual(result, expected)
 58 | 
 59 |     def test_page_double_hyphen_simple(self):
 60 |         record = {'pages': '12-24'}
 61 |         result = page_double_hyphen(record)
 62 |         expected = {'pages': '12--24'}
 63 |         self.assertEqual(result, expected)
 64 | 
 65 |     def test_page_double_hyphen_space(self):
 66 |         record = {'pages': '12 - 24'}
 67 |         result = page_double_hyphen(record)
 68 |         expected = {'pages': '12--24'}
 69 |         self.assertEqual(result, expected)
 70 | 
 71 |     def test_page_double_hyphen_nothing(self):
 72 |         record = {'pages': '12 24'}
 73 |         result = page_double_hyphen(record)
 74 |         expected = {'pages': '12 24'}
 75 |         self.assertEqual(result, expected)
 76 | 
 77 |     ###########
 78 |     # convert to unicode
 79 |     ###########
 80 |     def test_convert_to_unicode(self):
 81 |         record = {'toto': '{\`a} \`{a}'}
 82 |         result = convert_to_unicode(record)
 83 |         expected = {'toto': 'à à'}
 84 |         self.assertEqual(result, expected)
 85 |         record = {'toto': '{\\"u} \\"{u}'}
 86 |         result = convert_to_unicode(record)
 87 |         expected = {'toto': 'ü ü'}
 88 |         self.assertEqual(result, expected)
 89 |         # From issue 121
 90 |         record = {'title': '{Two Gedenk\\"uberlieferung der Angelsachsen}'}
 91 |         result = convert_to_unicode(record)
 92 |         expected = {'title': 'Two Gedenküberlieferung der Angelsachsen'}
 93 |         self.assertEqual(result, expected)
 94 |         # From issue 161
 95 |         record = {'title': r"p\^{a}t\'{e}"}
 96 |         result = convert_to_unicode(record)
 97 |         expected = {'title': "pâté"}
 98 |         self.assertEqual(result, expected)
 99 |         record = {'title': r"\^{i}le"}
100 |         result = convert_to_unicode(record)
101 |         expected = {'title': "île"}
102 |         self.assertEqual(result, expected)
103 |         record = {'title': r"\texttimes{}{\texttimes}\texttimes"}
104 |         result = convert_to_unicode(record)
105 |         expected = {'title': "×××"}
106 |         self.assertEqual(result, expected)
107 | 
108 |     ###########
109 |     # homogenize
110 |     ###########
111 |     def test_homogenize(self):
112 |         record = {'toto': 'à {\`a} \`{a}'}
113 |         result = homogenize_latex_encoding(record)
114 |         expected = {'toto': '{\`a} {\`a} {\`a}'}
115 |         self.assertEqual(result, expected)
116 | 
117 |     ###########
118 |     # add_plaintext_fields
119 |     ###########
120 |     def test_add_plaintext_fields(self):
121 |         record = {
122 |             'title': 'On-line {Recognition} of {Handwritten} {Mathematical} {Symbols}',
123 |             'foobar': ['{FFT} {Foobar}', '{foobar}'],
124 |             'foobar2': {'item1': '{FFT} {Foobar}', 'item2': '{foobar}'}
125 |         }
126 |         result = add_plaintext_fields(record)
127 |         expected = {
128 |             'title': 'On-line {Recognition} of {Handwritten} {Mathematical} {Symbols}',
129 |             'plain_title': 'On-line Recognition of Handwritten Mathematical Symbols',
130 |             'foobar': ['{FFT} {Foobar}', '{foobar}'],
131 |             'plain_foobar': ['FFT Foobar', 'foobar'],
132 |             'foobar2': {'item1': '{FFT} {Foobar}', 'item2': '{foobar}'},
133 |             'plain_foobar2': {'item1': 'FFT Foobar', 'item2': 'foobar'}
134 |         }
135 |         self.assertEqual(result, expected)
136 | 
137 |     ###########
138 |     # keywords
139 |     ###########
140 |     def test_keywords(self):
141 |         record = {'keyword': "a b, a b , a b;a b ; a b, a b\n"}
142 |         result = keyword(record)
143 |         expected = {'keyword': ['a b'] * 6}
144 |         self.assertEqual(result, expected)
145 | 
146 | if __name__ == '__main__':
147 |     unittest.main()
148 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BibtexParser.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BibtexParser.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/BibtexParser"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BibtexParser"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_comments.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from bibtexparser.bparser import BibTexParser
  3 | from bibtexparser.bwriter import to_bibtex
  4 | 
  5 | 
  6 | """ The code is supposed to treat comments the following way:
  7 |     Each @Comment opens a comment that ends when something
  8 |     that is not a comment is encountered. More precisely
  9 |     this means a line starting with an @. Lines that are not
 10 |     parsed as anything else are also considered comments.
 11 |     If the comment starts and ends with braces, they are removed.
 12 | 
 13 |     Current issues:
 14 |         - a comment followed by a line starting with @smthing
 15 |         that is not a valid bibtex element are parsed separately,
 16 |         that is as two comments.
 17 |         - braces are either ignored or removed which is not easily
 18 |         predictable.
 19 | """
 20 | 
 21 | 
 22 | class TestParseComment(unittest.TestCase):
 23 | 
 24 |     def test_comment_count(self):
 25 |         with open('bibtexparser/tests/data/features.bib') as bibfile:
 26 |             bib = BibTexParser(bibfile.read())
 27 |         self.assertEqual(len(bib.comments), 3)
 28 | 
 29 |     def test_comment_list(self):
 30 |         with open('bibtexparser/tests/data/features.bib') as bibfile:
 31 |             bib = BibTexParser(bibfile.read())
 32 |         expected = ["ignore this line!",
 33 |                     "ignore this line too!",
 34 |                     "and ignore this line too!"]
 35 |         self.assertEqual(bib.comments, expected)
 36 | 
 37 |     def test_multiline_comments(self):
 38 |         with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile:
 39 |             bib = BibTexParser(bibfile.read())
 40 |         expected = [
 41 | """Lorem ipsum dolor sit amet,
 42 | consectetur adipisicing elit""",
 43 | """
 44 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
 45 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
 46 | 
 47 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
 48 | Excepteur sint occaecat cupidatat non proident.
 49 |  ,
 50 | """,
 51 | """
 52 | 
 53 | 
 54 | Sunt in culpa qui officia deserunt mollit anim id est laborum.
 55 | 
 56 | 
 57 | """,
 58 | ""
 59 |         ]
 60 |         self.maxDiff = None
 61 |         self.assertEqual(bib.comments, expected)
 62 | 
 63 |     def test_multiple_entries(self):
 64 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile:
 65 |             bparser = BibTexParser()
 66 |             bib = bparser.parse_file(bibfile)
 67 |         expected = ["",
 68 |                     "A comment"]
 69 |         self.assertEqual(bib.comments, expected)
 70 | 
 71 |     def test_comments_percentage(self):
 72 |         with open('bibtexparser/tests/data/comments_percentage.bib', 'r') as bibfile:
 73 |             bib = BibTexParser(bibfile.read())
 74 |             res = bib.get_entry_list()
 75 |         expected = [{'ENTRYTYPE': 'article',
 76 |                      'journal': 'Nice Journal',
 77 |                      'volume': '12',
 78 |                      'ID': 'Cesar2013',
 79 |                      'year': '2013',
 80 |                      'author': 'Jean Cesar',
 81 |                      'comments': 'A comment',
 82 |                      'keyword': 'keyword1, keyword2',
 83 |                      'title': 'An amazing title'
 84 |                      },
 85 |                     {'ENTRYTYPE': 'article',
 86 |                      'journal': 'Nice Journal',
 87 |                      'volume': '12',
 88 |                      'ID': 'Baltazar2013',
 89 |                      'year': '2013',
 90 |                      'author': 'Jean Baltazar',
 91 |                      'comments': 'A comment',
 92 |                      'keyword': 'keyword1, keyword2',
 93 |                      'title': 'An amazing title'
 94 |                      }]
 95 |         self.assertEqual(res, expected)
 96 | 
 97 |     def test_comments_percentage_nocoma(self):
 98 |         with open('bibtexparser/tests/data/comments_percentage_nolastcoma.bib', 'r') as bibfile:
 99 |             bib = BibTexParser(bibfile.read())
100 |             res = bib.get_entry_list()
101 |         expected = [{'ENTRYTYPE': 'article',
102 |                      'journal': 'Nice Journal',
103 |                      'volume': '12',
104 |                      'ID': 'Cesar2013',
105 |                      'year': '2013',
106 |                      'author': 'Jean Cesar',
107 |                      'comments': 'A comment',
108 |                      'keyword': 'keyword1, keyword2',
109 |                      'title': 'An amazing title'
110 |                      },
111 |                     {'ENTRYTYPE': 'article',
112 |                      'journal': 'Nice Journal',
113 |                      'volume': '12',
114 |                      'ID': 'Baltazar2013',
115 |                      'year': '2013',
116 |                      'author': 'Jean Baltazar',
117 |                      'comments': 'A comment',
118 |                      'keyword': 'keyword1, keyword2',
119 |                      'title': 'An amazing title'
120 |                      }]
121 |         self.assertEqual(res, expected)
122 | 
123 |     def test_no_newline(self):
124 |         comments = """This is a comment."""
125 |         expected = ["This is a comment."]
126 |         bib = BibTexParser(comments)
127 |         self.assertEqual(bib.comments, expected)
128 | 
129 |     def test_43(self):
130 |         comment = "@STRING{foo = \"bar\"}\n" \
131 |                   "This is a comment\n" \
132 |                   "This is a second comment."
133 |         expected = "This is a comment\nThis is a second comment."
134 |         bib = BibTexParser(comment)
135 |         self.assertEqual(bib.comments, [expected])
136 |         self.assertEqual(bib.strings, {'foo': 'bar'})
137 | 
138 |     def test_43_bis(self):
139 |         comment = "@STRING{foo = \"bar\"}\n" \
140 |                   "This is a comment\n" \
141 |                   "STRING{Baz = \"This should be interpreted as comment.\"}"
142 |         expected = "This is a comment\n" \
143 |                    "STRING{Baz = \"This should be interpreted as comment.\"}"
144 |         bib = BibTexParser(comment)
145 |         self.assertEqual(bib.comments, [expected])
146 |         self.assertEqual(bib.strings, {'foo': 'bar'})
147 | 
148 | 
149 | class TestWriteComment(unittest.TestCase):
150 |     def test_comment_write(self):
151 |         with open('bibtexparser/tests/data/comments_only.bib') as bibfile:
152 |             bib = BibTexParser(bibfile.read())
153 | 
154 |         with open('bibtexparser/tests/data/comments_only_output.bib') as bibfile:
155 |             expected = bibfile.read()
156 |         result = to_bibtex(bib)
157 |         self.assertEqual(result, expected)
158 | 
159 |     def test_multiline_comment_write(self):
160 |         with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile:
161 |             expected = bibfile.read()
162 | 
163 |         bib = BibTexParser(expected)
164 |         result = to_bibtex(bib)
165 |         self.assertEqual(result, expected)
166 | 
167 |     def test_multiple_entries(self):
168 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile:
169 |             bib = BibTexParser(bibfile.read())
170 |         with open('bibtexparser/tests/data/multiple_entries_and_comments_output.bib') as bibfile:
171 |             expected = bibfile.read()
172 |         result = to_bibtex(bib)
173 |         self.assertEqual(result, expected)
174 | 


--------------------------------------------------------------------------------
/bibtexparser/bwriter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # Author: Francois Boulogne
  4 | # License:
  5 | 
  6 | 
  7 | import logging
  8 | from bibtexparser.bibdatabase import (BibDatabase, COMMON_STRINGS,
  9 |                                       BibDataString,
 10 |                                       BibDataStringExpression)
 11 | 
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | __all__ = ['BibTexWriter']
 16 | 
 17 | 
 18 | def to_bibtex(parsed):
 19 |     """
 20 |     Convenience function for backwards compatibility.
 21 |     """
 22 |     return BibTexWriter().write(parsed)
 23 | 
 24 | 
 25 | def _str_or_expr_to_bibtex(e):
 26 |     if isinstance(e, BibDataStringExpression):
 27 |         return ' # '.join([_str_or_expr_to_bibtex(s) for s in e.expr])
 28 |     elif isinstance(e, BibDataString):
 29 |         return e.name
 30 |     else:
 31 |         return '{' + e + '}'
 32 | 
 33 | 
 34 | class BibTexWriter(object):
 35 |     """
 36 |     Writer to convert a :class:`BibDatabase` object to a string or file formatted as a BibTeX file.
 37 | 
 38 |     Example::
 39 | 
 40 |         from bibtexparser.bwriter import BibTexWriter
 41 | 
 42 |         bib_database = ...
 43 | 
 44 |         writer = BibTexWriter()
 45 |         writer.contents = ['comments', 'entries']
 46 |         writer.indent = '  '
 47 |         writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
 48 |         bibtex_str = bibtexparser.dumps(bib_database, writer)
 49 | 
 50 |     """
 51 | 
 52 |     _valid_contents = ['entries', 'comments', 'preambles', 'strings']
 53 | 
 54 |     def __init__(self, write_common_strings=False):
 55 |         #: List of BibTeX elements to write, valid values are `entries`, `comments`, `preambles`, `strings`.
 56 |         self.contents = ['comments', 'preambles', 'strings', 'entries']
 57 |         #: Character(s) for indenting BibTeX field-value pairs. Default: single space.
 58 |         self.indent = ' '
 59 |         #: Align values. Determines the maximal number of characters used in any fieldname and aligns all values
 60 |         #    according to that by filling up with single spaces. Default: False
 61 |         self.align_values = False
 62 |         #: Characters(s) for separating BibTeX entries. Default: new line.
 63 |         self.entry_separator = '\n'
 64 |         #: Tuple of fields for ordering BibTeX entries. Set to `None` to disable sorting. Default: BibTeX key `('ID', )`.
 65 |         self.order_entries_by = ('ID', )
 66 |         #: Tuple of fields for display order in a single BibTeX entry. Fields not listed here will be displayed
 67 |         #: alphabetically at the end. Set to '[]' for alphabetical order. Default: '[]'
 68 |         self.display_order = []
 69 |         #: BibTeX syntax allows comma first syntax
 70 |         #: (common in functional languages), use this to enable
 71 |         #: comma first syntax as the bwriter output
 72 |         self.comma_first = False
 73 |         #: BibTeX syntax allows the comma to be optional at the end of the last field in an entry.
 74 |         #: Use this to enable writing this last comma in the bwriter output. Defaults: False.
 75 |         self.add_trailing_comma = False
 76 |         #: internal variable used if self.align_values = True
 77 |         self._max_field_width = 0
 78 |         #: Whether common strings are written
 79 |         self.common_strings = write_common_strings
 80 | 
 81 |     def write(self, bib_database):
 82 |         """
 83 |         Converts a bibliographic database to a BibTeX-formatted string.
 84 | 
 85 |         :param bib_database: bibliographic database to be converted to a BibTeX string
 86 |         :type bib_database: BibDatabase
 87 |         :return: BibTeX-formatted string
 88 |         :rtype: str or unicode
 89 |         """
 90 |         bibtex = ''
 91 |         for content in self.contents:
 92 |             try:
 93 |                 # Add each element set (entries, comments)
 94 |                 bibtex += getattr(self, '_' + content + '_to_bibtex')(bib_database)
 95 |             except AttributeError:
 96 |                 logger.warning("BibTeX item '{}' does not exist and will not be written. Valid items are {}."
 97 |                                .format(content, self._valid_contents))
 98 |         return bibtex
 99 | 
100 |     def _entries_to_bibtex(self, bib_database):
101 |         bibtex = ''
102 |         if self.order_entries_by:
103 |             # TODO: allow sort field does not exist for entry
104 |             entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by))
105 |         else:
106 |             entries = bib_database.entries
107 | 
108 |         if self.align_values:
109 |             # determine maximum field width to be used
110 |             widths = [max(map(len, entry.keys())) for entry in entries]
111 |             self._max_field_width = max(widths)
112 | 
113 |         for entry in entries:
114 |             bibtex += self._entry_to_bibtex(entry)
115 |         return bibtex
116 | 
117 |     def _entry_to_bibtex(self, entry):
118 |         bibtex = ''
119 |         # Write BibTeX key
120 |         bibtex += '@' + entry['ENTRYTYPE'] + '{' + entry['ID']
121 | 
122 |         # create display_order of fields for this entry
123 |         # first those keys which are both in self.display_order and in entry.keys
124 |         display_order = [i for i in self.display_order if i in entry]
125 |         # then all the other fields sorted alphabetically
126 |         display_order += [i for i in sorted(entry) if i not in self.display_order]
127 |         if self.comma_first:
128 |             field_fmt = u"\n{indent}, {field:<{field_max_w}} = {value}"
129 |         else:
130 |             field_fmt = u",\n{indent}{field:<{field_max_w}} = {value}"
131 |         # Write field = value lines
132 |         for field in [i for i in display_order if i not in ['ENTRYTYPE', 'ID']]:
133 |             try:
134 |                 bibtex += field_fmt.format(
135 |                     indent=self.indent,
136 |                     field=field,
137 |                     field_max_w=self._max_field_width,
138 |                     value=_str_or_expr_to_bibtex(entry[field]))
139 |             except TypeError:
140 |                 raise TypeError(u"The field %s in entry %s must be a string"
141 |                                 % (field, entry['ID']))
142 |         if self.add_trailing_comma:
143 |             if self.comma_first:
144 |                 bibtex += '\n'+self.indent+','
145 |             else:
146 |                 bibtex += ','
147 |         bibtex += "\n}\n" + self.entry_separator
148 |         return bibtex
149 | 
150 |     def _comments_to_bibtex(self, bib_database):
151 |         return ''.join(['@comment{{{0}}}\n{1}'.format(comment, self.entry_separator)
152 |                         for comment in bib_database.comments])
153 | 
154 |     def _preambles_to_bibtex(self, bib_database):
155 |         return ''.join(['@preamble{{"{0}"}}\n{1}'.format(preamble, self.entry_separator)
156 |                         for preamble in bib_database.preambles])
157 | 
158 |     def _strings_to_bibtex(self, bib_database):
159 |         return ''.join([
160 |             u'@string{{{name} = {value}}}\n{sep}'.format(
161 |                 name=name,
162 |                 value=_str_or_expr_to_bibtex(value),
163 |                 sep=self.entry_separator)
164 |             for name, value in bib_database.strings.items()
165 |             if (self.common_strings or
166 |                 name not in COMMON_STRINGS or  # user defined string
167 |                 value != COMMON_STRINGS[name]  # string has been updated
168 |                 )])
169 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # BibtexParser documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Aug  1 13:30:23 2013.
  6 | #
  7 | # This file is execfile()d with the current directory set to its containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys, os
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | sys.path.insert(0, os.path.abspath('../..'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | #needs_sphinx = '1.0'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode']
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = '.rst'
 36 | 
 37 | # The encoding of source files.
 38 | #source_encoding = 'utf-8-sig'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = 'index'
 42 | 
 43 | # General information about the project.
 44 | project = 'BibtexParser'
 45 | copyright = '2013-2016, F. Boulogne and other contributors'
 46 | 
 47 | # The version info for the project you're documenting, acts as replacement for
 48 | # |version| and |release|, also used in various other places throughout the
 49 | # built documents.
 50 | #
 51 | try:
 52 |     import bibtexparser as bp
 53 |     # The short X.Y version.
 54 |     version = bp.__version__
 55 |     # The full version, including alpha/beta/rc tags.
 56 |     release = bp.__version__
 57 | except ImportError:
 58 |     version = 'latest'
 59 |     release = 'latest'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #language = None
 64 | 
 65 | # There are two options for replacing |today|: either, you set today to some
 66 | # non-false value, then it is used:
 67 | #today = ''
 68 | # Else, today_fmt is used as the format for a strftime call.
 69 | #today_fmt = '%B %d, %Y'
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | exclude_patterns = []
 74 | 
 75 | # The reST default role (used for this markup: `text`) to use for all documents.
 76 | #default_role = None
 77 | 
 78 | # If true, '()' will be appended to :func: etc. cross-reference text.
 79 | #add_function_parentheses = True
 80 | 
 81 | # If true, the current module name will be prepended to all description
 82 | # unit titles (such as .. function::).
 83 | #add_module_names = True
 84 | 
 85 | # If true, sectionauthor and moduleauthor directives will be shown in the
 86 | # output. They are ignored by default.
 87 | #show_authors = False
 88 | 
 89 | # The name of the Pygments (syntax highlighting) style to use.
 90 | pygments_style = 'sphinx'
 91 | 
 92 | # A list of ignored prefixes for module index sorting.
 93 | #modindex_common_prefix = []
 94 | 
 95 | # -- Options for HTML output ---------------------------------------------------
 96 | 
 97 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 98 | # a list of builtin themes.
 99 | html_theme = 'default'
100 | 
101 | # Theme options are theme-specific and customize the look and feel of a theme
102 | # further.  For a list of options available for each theme, see the
103 | # documentation.
104 | #html_theme_options = {}
105 | 
106 | # Add any paths that contain custom themes here, relative to this directory.
107 | #html_theme_path = []
108 | 
109 | # The name for this set of Sphinx documents.  If None, it defaults to
110 | # "<project> v<release> documentation".
111 | #html_title = None
112 | 
113 | # A shorter title for the navigation bar.  Default is the same as html_title.
114 | #html_short_title = None
115 | 
116 | # The name of an image file (relative to this directory) to place at the top
117 | # of the sidebar.
118 | #html_logo = None
119 | 
120 | # The name of an image file (within the static path) to use as favicon of the
121 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
122 | # pixels large.
123 | #html_favicon = None
124 | 
125 | # Add any paths that contain custom static files (such as style sheets) here,
126 | # relative to this directory. They are copied after the builtin static files,
127 | # so a file named "default.css" will overwrite the builtin "default.css".
128 | #html_static_path = ['_static']
129 | 
130 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
131 | # using the given strftime format.
132 | #html_last_updated_fmt = '%b %d, %Y'
133 | 
134 | # If true, SmartyPants will be used to convert quotes and dashes to
135 | # typographically correct entities.
136 | #html_use_smartypants = True
137 | 
138 | # Custom sidebar templates, maps document names to template names.
139 | #html_sidebars = {}
140 | 
141 | # Additional templates that should be rendered to pages, maps page names to
142 | # template names.
143 | #html_additional_pages = {}
144 | 
145 | # If false, no module index is generated.
146 | #html_domain_indices = True
147 | 
148 | # If false, no index is generated.
149 | #html_use_index = True
150 | 
151 | # If true, the index is split into individual pages for each letter.
152 | #html_split_index = False
153 | 
154 | # If true, links to the reST sources are added to the pages.
155 | #html_show_sourcelink = True
156 | 
157 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
158 | #html_show_sphinx = True
159 | 
160 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
161 | #html_show_copyright = True
162 | 
163 | # If true, an OpenSearch description file will be output, and all pages will
164 | # contain a <link> tag referring to it.  The value of this option must be the
165 | # base URL from which the finished HTML is served.
166 | #html_use_opensearch = ''
167 | 
168 | # This is the file name suffix for HTML files (e.g. ".xhtml").
169 | #html_file_suffix = None
170 | 
171 | # Output file base name for HTML help builder.
172 | htmlhelp_basename = 'BibtexParserdoc'
173 | 
174 | 
175 | # -- Options for LaTeX output --------------------------------------------------
176 | 
177 | latex_elements = {
178 | # The paper size ('letterpaper' or 'a4paper').
179 | #'papersize': 'letterpaper',
180 | 
181 | # The font size ('10pt', '11pt' or '12pt').
182 | #'pointsize': '10pt',
183 | 
184 | # Additional stuff for the LaTeX preamble.
185 | #'preamble': '',
186 | }
187 | 
188 | # Grouping the document tree into LaTeX files. List of tuples
189 | # (source start file, target name, title, author, documentclass [howto/manual]).
190 | latex_documents = [
191 |   ('index', 'BibtexParser.tex', 'BibtexParser Documentation',
192 |    'F. Boulogne', 'manual'),
193 | ]
194 | 
195 | # The name of an image file (relative to this directory) to place at the top of
196 | # the title page.
197 | #latex_logo = None
198 | 
199 | # For "manual" documents, if this is true, then toplevel headings are parts,
200 | # not chapters.
201 | #latex_use_parts = False
202 | 
203 | # If true, show page references after internal links.
204 | #latex_show_pagerefs = False
205 | 
206 | # If true, show URL addresses after external links.
207 | #latex_show_urls = False
208 | 
209 | # Documents to append as an appendix to all manuals.
210 | #latex_appendices = []
211 | 
212 | # If false, no module index is generated.
213 | #latex_domain_indices = True
214 | 
215 | 
216 | # -- Options for manual page output --------------------------------------------
217 | 
218 | # One entry per manual page. List of tuples
219 | # (source start file, name, description, authors, manual section).
220 | man_pages = [
221 |     ('index', 'bibtexparser', 'BibtexParser Documentation',
222 |      ['F. Boulogne'], 1)
223 | ]
224 | 
225 | # If true, show URL addresses after external links.
226 | #man_show_urls = False
227 | 
228 | 
229 | # -- Options for Texinfo output ------------------------------------------------
230 | 
231 | # Grouping the document tree into Texinfo files. List of tuples
232 | # (source start file, target name, title, author,
233 | #  dir menu entry, description, category)
234 | texinfo_documents = [
235 |   ('index', 'BibtexParser', 'BibtexParser Documentation',
236 |    'F. Boulogne', 'BibtexParser', 'One line description of project.',
237 |    'Miscellaneous'),
238 | ]
239 | 
240 | # Documents to append as an appendix to all manuals.
241 | #texinfo_appendices = []
242 | 
243 | # If false, no module index is generated.
244 | #texinfo_domain_indices = True
245 | 
246 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
247 | #texinfo_show_urls = 'footnote'
248 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_bibtexwriter.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import tempfile
  3 | import unittest
  4 | import bibtexparser
  5 | from bibtexparser.bwriter import BibTexWriter
  6 | from bibtexparser.bibdatabase import BibDatabase
  7 | 
  8 | 
  9 | class TestBibTexWriter(unittest.TestCase):
 10 |     def test_content_entries_only(self):
 11 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 12 |             bib_database = bibtexparser.load(bibtex_file)
 13 |         writer = BibTexWriter()
 14 |         writer.contents = ['entries']
 15 |         result = bibtexparser.dumps(bib_database, writer)
 16 |         expected = \
 17 | """@book{Toto3000,
 18 |  author = {Toto, A and Titi, B},
 19 |  title = {A title}
 20 | }
 21 | 
 22 | @article{Wigner1938,
 23 |  author = {Wigner, E.},
 24 |  doi = {10.1039/TF9383400029},
 25 |  issn = {0014-7672},
 26 |  journal = {Trans. Faraday Soc.},
 27 |  owner = {fr},
 28 |  pages = {29--41},
 29 |  publisher = {The Royal Society of Chemistry},
 30 |  title = {The transition state method},
 31 |  volume = {34},
 32 |  year = {1938}
 33 | }
 34 | 
 35 | @book{Yablon2005,
 36 |  author = {Yablon, A.D.},
 37 |  publisher = {Springer},
 38 |  title = {Optical fiber fusion slicing},
 39 |  year = {2005}
 40 | }
 41 | 
 42 | """
 43 |         self.assertEqual(result, expected)
 44 | 
 45 |     def test_content_comment_only(self):
 46 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 47 |             bib_database = bibtexparser.load(bibtex_file)
 48 |         writer = BibTexWriter()
 49 |         writer.contents = ['comments']
 50 |         result = bibtexparser.dumps(bib_database, writer)
 51 |         expected = \
 52 | """@comment{}
 53 | 
 54 | @comment{A comment}
 55 | 
 56 | """
 57 |         self.assertEqual(result, expected)
 58 | 
 59 |     def test_indent(self):
 60 |         bib_database = BibDatabase()
 61 |         bib_database.entries = [{'ID': 'abc123',
 62 |                                  'ENTRYTYPE': 'book',
 63 |                                  'author': 'test'}]
 64 |         writer = BibTexWriter()
 65 |         writer.indent = '  '
 66 |         result = bibtexparser.dumps(bib_database, writer)
 67 |         expected = \
 68 | """@book{abc123,
 69 |   author = {test}
 70 | }
 71 | 
 72 | """
 73 |         self.assertEqual(result, expected)
 74 | 
 75 |     def test_align(self):
 76 |         bib_database = BibDatabase()
 77 |         bib_database.entries = [{'ID': 'abc123',
 78 |                                  'ENTRYTYPE': 'book',
 79 |                                  'author': 'test',
 80 |                                  'thisisaverylongkey': 'longvalue'}]
 81 |         writer = BibTexWriter()
 82 |         writer.align_values = True
 83 |         result = bibtexparser.dumps(bib_database, writer)
 84 |         expected = \
 85 | """@book{abc123,
 86 |  author             = {test},
 87 |  thisisaverylongkey = {longvalue}
 88 | }
 89 | 
 90 | """
 91 |         self.assertEqual(result, expected)
 92 | 
 93 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
 94 |             bib_database = bibtexparser.load(bibtex_file)
 95 |         writer = BibTexWriter()
 96 |         writer.contents = ['entries']
 97 |         writer.align_values = True
 98 |         result = bibtexparser.dumps(bib_database, writer)
 99 |         expected = \
100 | """@book{Toto3000,
101 |  author    = {Toto, A and Titi, B},
102 |  title     = {A title}
103 | }
104 | 
105 | @article{Wigner1938,
106 |  author    = {Wigner, E.},
107 |  doi       = {10.1039/TF9383400029},
108 |  issn      = {0014-7672},
109 |  journal   = {Trans. Faraday Soc.},
110 |  owner     = {fr},
111 |  pages     = {29--41},
112 |  publisher = {The Royal Society of Chemistry},
113 |  title     = {The transition state method},
114 |  volume    = {34},
115 |  year      = {1938}
116 | }
117 | 
118 | @book{Yablon2005,
119 |  author    = {Yablon, A.D.},
120 |  publisher = {Springer},
121 |  title     = {Optical fiber fusion slicing},
122 |  year      = {2005}
123 | }
124 | 
125 | """
126 |         self.assertEqual(result, expected)
127 | 
128 | 
129 |     def test_entry_separator(self):
130 |         bib_database = BibDatabase()
131 |         bib_database.entries = [{'ID': 'abc123',
132 |                                  'ENTRYTYPE': 'book',
133 |                                  'author': 'test'}]
134 |         writer = BibTexWriter()
135 |         writer.entry_separator = ''
136 |         result = bibtexparser.dumps(bib_database, writer)
137 |         expected = \
138 | """@book{abc123,
139 |  author = {test}
140 | }
141 | """
142 |         self.assertEqual(result, expected)
143 | 
144 |     def test_display_order(self):
145 |         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
146 |             bib_database = bibtexparser.load(bibtex_file)
147 |         writer = BibTexWriter()
148 |         writer.contents = ['entries']
149 |         writer.display_order = ['year', 'publisher', 'title']
150 |         result = bibtexparser.dumps(bib_database, writer)
151 |         expected = \
152 | """@book{Toto3000,
153 |  title = {A title},
154 |  author = {Toto, A and Titi, B}
155 | }
156 | 
157 | @article{Wigner1938,
158 |  year = {1938},
159 |  publisher = {The Royal Society of Chemistry},
160 |  title = {The transition state method},
161 |  author = {Wigner, E.},
162 |  doi = {10.1039/TF9383400029},
163 |  issn = {0014-7672},
164 |  journal = {Trans. Faraday Soc.},
165 |  owner = {fr},
166 |  pages = {29--41},
167 |  volume = {34}
168 | }
169 | 
170 | @book{Yablon2005,
171 |  year = {2005},
172 |  publisher = {Springer},
173 |  title = {Optical fiber fusion slicing},
174 |  author = {Yablon, A.D.}
175 | }
176 | 
177 | """
178 |         self.assertEqual(result, expected)
179 | 
180 | 
181 | class TestEntrySorting(unittest.TestCase):
182 |     bib_database = BibDatabase()
183 |     bib_database.entries = [{'ID': 'b',
184 |                              'ENTRYTYPE': 'article'},
185 |                             {'ID': 'c',
186 |                              'ENTRYTYPE': 'book'},
187 |                             {'ID': 'a',
188 |                              'ENTRYTYPE': 'book'}]
189 | 
190 |     def test_sort_default(self):
191 |         result = bibtexparser.dumps(self.bib_database)
192 |         expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
193 |         self.assertEqual(result, expected)
194 | 
195 |     def test_sort_none(self):
196 |         writer = BibTexWriter()
197 |         writer.order_entries_by = None
198 |         result = bibtexparser.dumps(self.bib_database, writer)
199 |         expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
200 |         self.assertEqual(result, expected)
201 | 
202 |     def test_sort_id(self):
203 |         writer = BibTexWriter()
204 |         writer.order_entries_by = ('ID', )
205 |         result = bibtexparser.dumps(self.bib_database, writer)
206 |         expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
207 |         self.assertEqual(result, expected)
208 | 
209 |     def test_sort_type(self):
210 |         writer = BibTexWriter()
211 |         writer.order_entries_by = ('ENTRYTYPE', )
212 |         result = bibtexparser.dumps(self.bib_database, writer)
213 |         expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
214 |         self.assertEqual(result, expected)
215 | 
216 |     def test_sort_type_id(self):
217 |         writer = BibTexWriter()
218 |         writer.order_entries_by = ('ENTRYTYPE', 'ID')
219 |         result = bibtexparser.dumps(self.bib_database, writer)
220 |         expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n"
221 |         self.assertEqual(result, expected)
222 | 
223 |     def test_sort_missing_field(self):
224 |         bib_database = BibDatabase()
225 |         bib_database.entries = [{'ID': 'b',
226 |                                  'ENTRYTYPE': 'article',
227 |                                  'year': '2000'},
228 |                                 {'ID': 'c',
229 |                                  'ENTRYTYPE': 'book',
230 |                                  'year': '2010'},
231 |                                 {'ID': 'a',
232 |                                  'ENTRYTYPE': 'book'}]
233 |         writer = BibTexWriter()
234 |         writer.order_entries_by = ('year', )
235 |         result = bibtexparser.dumps(bib_database, writer)
236 |         expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
237 |         self.assertEqual(result, expected)
238 | 
239 |     def test_unicode_problems(self):
240 |         # See #51
241 |         bibtex = """
242 |         @article{Mesa-Gresa2013,
243 |             abstract = {During a 4-week period half the mice (n = 16) were exposed to EE and the other half (n = 16) remained in a standard environment (SE). Aggr. Behav. 9999:XX-XX, 2013. © 2013 Wiley Periodicals, Inc.},
244 |             author = {Mesa-Gresa, Patricia and P\'{e}rez-Martinez, Asunci\'{o}n and Redolat, Rosa},
245 |             doi = {10.1002/ab.21481},
246 |             file = {:Users/jscholz/Documents/mendeley/Mesa-Gresa, P\'{e}rez-Martinez, Redolat - 2013 - Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior.pdf:pdf},
247 |             issn = {1098-2337},
248 |             journal = {Aggressive behavior},
249 |             month = "apr",
250 |             number = {April},
251 |             pages = {269--279},
252 |             pmid = {23588702},
253 |             title = {{Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior in Male Mice.}},
254 |             url = {http://www.ncbi.nlm.nih.gov/pubmed/23588702},
255 |             volume = {39},
256 |             year = {2013}
257 |         }
258 |         """
259 |         bibdb = bibtexparser.loads(bibtex)
260 |         with tempfile.TemporaryFile(mode='w+') as bibtex_file:
261 |             bibtexparser.dump(bibdb, bibtex_file)
262 |             # No exception should be raised
263 | 
264 | 


--------------------------------------------------------------------------------
/bibtexparser/bibdatabase.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from collections import OrderedDict
  5 | import sys
  6 | import logging
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | if sys.version_info >= (3, 0):
 11 |     ustr = str
 12 | else:
 13 |     ustr = unicode
 14 | 
 15 | 
 16 | STANDARD_TYPES = set([
 17 |     'article',
 18 |     'book',
 19 |     'booklet',
 20 |     'conference',
 21 |     'inbook',
 22 |     'incollection',
 23 |     'inproceedings',
 24 |     'manual',
 25 |     'mastersthesis',
 26 |     'misc',
 27 |     'phdthesis',
 28 |     'proceedings',
 29 |     'techreport',
 30 |     'unpublished'])
 31 | 
 32 | COMMON_STRINGS = OrderedDict([
 33 |     ('jan', 'January'),
 34 |     ('feb', 'February'),
 35 |     ('mar', 'March'),
 36 |     ('apr', 'April'),
 37 |     ('may', 'May'),
 38 |     ('jun', 'June'),
 39 |     ('jul', 'July'),
 40 |     ('aug', 'August'),
 41 |     ('sep', 'September'),
 42 |     ('oct', 'October'),
 43 |     ('nov', 'November'),
 44 |     ('dec', 'December'),
 45 |     ])
 46 | 
 47 | 
 48 | class UndefinedString(KeyError):
 49 |     pass
 50 | 
 51 | 
 52 | class BibDatabase(object):
 53 |     """
 54 |     Bibliographic database object that follows the data structure of a BibTeX file.
 55 |     """
 56 | 
 57 |     def __init__(self):
 58 |         #: List of BibTeX entries, for example `@book{...}`, `@article{...}`, etc. Each entry is a simple dict with
 59 |         #: BibTeX field-value pairs, for example `'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'` Each
 60 |         #: entry will always have the following dict keys (in addition to other BibTeX fields):
 61 |         #:
 62 |         #: * `ID` (BibTeX key)
 63 |         #: * `ENTRYTYPE` (entry type in lowercase, e.g. `book`, `article` etc.)
 64 |         self.entries = []
 65 |         self._entries_dict = {}
 66 |         #: List of BibTeX comment (`@comment{...}`) blocks.
 67 |         self.comments = []
 68 |         #: OrderedDict of BibTeX string definitions (`@string{...}`). In order of definition.
 69 |         self.strings = OrderedDict()  # Not sure if order is import, keep order just in case
 70 |         #: List of BibTeX preamble (`@preamble{...}`) blocks.
 71 |         self.preambles = []
 72 | 
 73 |         #: List of fields that should not be updated when resolving crossrefs
 74 |         self._not_updated_by_crossref = ['_FROM_CROSSREF']
 75 | 
 76 |     def load_common_strings(self):
 77 |         self.strings.update(COMMON_STRINGS)
 78 | 
 79 |     def get_entry_list(self):
 80 |         """Get a list of bibtex entries.
 81 | 
 82 |         :returns: BibTeX entries
 83 |         :rtype: list
 84 |         .. deprecated:: 0.5.6
 85 |            Use :attr:`entries` instead.
 86 |         """
 87 |         return self.entries
 88 | 
 89 |     @staticmethod
 90 |     def entry_sort_key(entry, fields):
 91 |         result = []
 92 |         for field in fields:
 93 |             result.append(ustr(entry.get(field, '')).lower())  # Sorting always as string
 94 |         return tuple(result)
 95 | 
 96 |     def _make_entries_dict(self):
 97 |         for entry in self.entries:
 98 |             self._entries_dict[entry['ID']] = entry
 99 | 
100 |     def get_entry_dict(self):
101 |         """Return a dictionary of BibTeX entries.
102 |         The dict key is the BibTeX entry key
103 |         """
104 |         # If the hash has never been made, make it
105 |         if not self._entries_dict:
106 |             self._make_entries_dict()
107 |         return self._entries_dict
108 | 
109 |     entries_dict = property(get_entry_dict)
110 | 
111 |     def expand_string(self, name):
112 |         try:
113 |             return BibDataStringExpression.expand_if_expression(
114 |                 self.strings[name])
115 |         except KeyError:
116 |             raise(UndefinedString(name))
117 | 
118 |     def _add_missing_from_crossref_entry(self, entry, dependencies=set()):
119 |         if entry['ID'] in self._crossref_updated:
120 |             return
121 | 
122 |         if entry['_crossref'] not in self.entries_dict:
123 |             logger.error("Crossref reference %s for %s is missing.",
124 |                          entry['_crossref'],
125 |                          entry['ID'])
126 |             return
127 | 
128 |         if entry['_crossref'] in dependencies:
129 |             logger.error("Circular crossref dependency: %s->%s->%s.",
130 |                          "->".join(dependencies),
131 |                          entry['ID'],
132 |                          entry['_crossref'])
133 |             return
134 | 
135 |         crossref_entry = self.entries_dict[entry['_crossref']]
136 |         if '_crossref' in crossref_entry:
137 |             dependencies.add(entry['ID'])
138 |             self._add_missing_from_crossref_entry(crossref_entry, dependencies)
139 |             dependencies.remove(entry['ID'])
140 | 
141 |         from_crossref = {bibfield: bibvalue
142 |                          for (bibfield, bibvalue) in crossref_entry.items()
143 |                          if bibfield not in entry.keys() and
144 |                             bibfield not in self._not_updated_by_crossref}
145 | 
146 |         entry.update(from_crossref)
147 | 
148 |         self._crossref_updated.append(entry['ID'])
149 |         entry['_FROM_CROSSREF'] = sorted(from_crossref.keys())
150 |         del entry['_crossref']
151 | 
152 |     def add_missing_from_crossref(self):
153 |         """Resolve crossrefs and update entries accordingly.
154 |         """
155 |         self._crossref_updated = []
156 |         for entry in self.entries:
157 |             if "_crossref" in entry:
158 |                 self._add_missing_from_crossref_entry(entry)
159 | 
160 | 
161 | class BibDataString(object):
162 |     """
163 |     Represents a bibtex string.
164 | 
165 |     This object enables maintaining string expressions as list of strings
166 |     and BibDataString. Can be interpolated from Bibdatabase.
167 |     """
168 | 
169 |     def __init__(self, bibdatabase, name):
170 |         self._bibdatabase = bibdatabase
171 |         self.name = name.lower()
172 | 
173 |     def __eq__(self, other):
174 |         return isinstance(other, BibDataString) and self.name == other.name
175 | 
176 |     def __repr__(self):
177 |         return "BibDataString({})".format(self.name.__repr__())
178 | 
179 |     def get_value(self):
180 |         """
181 |         Query value from string name.
182 | 
183 |         :returns: string
184 |         """
185 |         return self._bibdatabase.expand_string(self.name)
186 | 
187 |     def get_dependencies(self, known_dependencies=set()):
188 |         """Recursively tracks strings on which the expression depends.
189 | 
190 |         :param kown_dependencies: dependencies to ignore
191 |         """
192 |         raise NotImplementedError
193 | 
194 |     @staticmethod
195 |     def expand_string(string_or_bibdatastring):
196 |         """
197 |         Eventually replaces a bibdatastring by its value.
198 | 
199 |         :param string_or_bibdatastring: the parsed token
200 |         :type string_expr: string or BibDataString
201 |         :returns: string
202 |         """
203 |         if isinstance(string_or_bibdatastring, BibDataString):
204 |             return string_or_bibdatastring.get_value()
205 |         else:
206 |             return string_or_bibdatastring
207 | 
208 | 
209 | class BibDataStringExpression(object):
210 |     """
211 |     Represents a bibtex string expression.
212 | 
213 |     String expressions are sequences of regular strings and bibtex strings.
214 |     This object enables maintaining string expressions as list of strings.
215 |     The expression are represented as lists of regular strings and
216 |     BibDataStrings. They can be interpolated from Bibdatabase.
217 | 
218 |     BibDataStringExpression(e)
219 | 
220 |     :param e: list of strings and BibDataStrings
221 |     """
222 | 
223 |     def __init__(self, expression):
224 |         self.expr = expression
225 | 
226 |     def __eq__(self, other):
227 |         return isinstance(other, BibDataStringExpression) and self.expr == other.expr
228 | 
229 |     def __repr__(self):
230 |         return "BibDataStringExpression({})".format(self.expr.__repr__())
231 | 
232 |     def get_value(self):
233 |         """
234 |         Replaces bibdatastrings by their values in the expression.
235 | 
236 |         :returns: string
237 |         """
238 |         return ''.join([BibDataString.expand_string(s) for s in self.expr])
239 | 
240 |     def apply_on_strings(self, fun):
241 |         """
242 |         Maps a function on strings in expression, keeping unchanged
243 |         BibDataStrings.
244 | 
245 |         :param fun: function from strings to strings
246 |         """
247 |         self.expr = [s if isinstance(s, BibDataString) else fun(s)
248 |                      for s in self.expr]
249 | 
250 |     @staticmethod
251 |     def expand_if_expression(string_or_expression):
252 |         """
253 |         Eventually replaces a BibDataStringExpression by its value.
254 | 
255 |         :param string_or_expression: the object to expand
256 |         :type string_expr: string or BibDataStringExpression
257 |         :returns: string
258 |         """
259 |         if isinstance(string_or_expression, BibDataStringExpression):
260 |             return string_or_expression.get_value()
261 |         else:
262 |             return string_or_expression
263 | 
264 |     @staticmethod
265 |     def expression_if_needed(tokens):
266 |         """Build expression only if tokens are not a regular value.
267 |         """
268 |         if len(tokens) == 1 and not isinstance(tokens[0], BibDataString):
269 |             return tokens[0]
270 |         else:
271 |             return BibDataStringExpression(tokens)
272 | 
273 | 
274 | def as_text(text_string_or_expression):
275 |     if isinstance(text_string_or_expression,
276 |                   (BibDataString, BibDataStringExpression)):
277 |         return text_string_or_expression.get_value()
278 |     else:
279 |         return ustr(text_string_or_expression)
280 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 | Copyright (c) 2012-2018, François Boulogne and the python-bibtexparser contributors
  2 | All rights reserved.
  3 | 
  4 | 
  5 | The code is distributed under a dual license (at your choice).
  6 | 
  7 | #####################################################################
  8 | Redistribution and use in source and binary forms, with or without
  9 | modification, are permitted provided that the following conditions are
 10 | met:
 11 | 
 12 |     (1) Redistributions of source code must retain the above copyright
 13 |     notice, this list of conditions and the following disclaimer. 
 14 | 
 15 |     (2) Redistributions in binary form must reproduce the above copyright
 16 |     notice, this list of conditions and the following disclaimer in
 17 |     the documentation and/or other materials provided with the
 18 |     distribution.  
 19 |     
 20 |     (3)The name of the author may not be used to
 21 |     endorse or promote products derived from this software without
 22 |     specific prior written permission.
 23 | 
 24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 34 | POSSIBILITY OF SUCH DAMAGE.
 35 | 
 36 | #####################################################################
 37 |                    GNU LESSER GENERAL PUBLIC LICENSE
 38 |                        Version 3, 29 June 2007
 39 | 
 40 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 41 |  Everyone is permitted to copy and distribute verbatim copies
 42 |  of this license document, but changing it is not allowed.
 43 | 
 44 | 
 45 |   This version of the GNU Lesser General Public License incorporates
 46 | the terms and conditions of version 3 of the GNU General Public
 47 | License, supplemented by the additional permissions listed below.
 48 | 
 49 |   0. Additional Definitions.
 50 | 
 51 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 52 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 53 | General Public License.
 54 | 
 55 |   "The Library" refers to a covered work governed by this License,
 56 | other than an Application or a Combined Work as defined below.
 57 | 
 58 |   An "Application" is any work that makes use of an interface provided
 59 | by the Library, but which is not otherwise based on the Library.
 60 | Defining a subclass of a class defined by the Library is deemed a mode
 61 | of using an interface provided by the Library.
 62 | 
 63 |   A "Combined Work" is a work produced by combining or linking an
 64 | Application with the Library.  The particular version of the Library
 65 | with which the Combined Work was made is also called the "Linked
 66 | Version".
 67 | 
 68 |   The "Minimal Corresponding Source" for a Combined Work means the
 69 | Corresponding Source for the Combined Work, excluding any source code
 70 | for portions of the Combined Work that, considered in isolation, are
 71 | based on the Application, and not on the Linked Version.
 72 | 
 73 |   The "Corresponding Application Code" for a Combined Work means the
 74 | object code and/or source code for the Application, including any data
 75 | and utility programs needed for reproducing the Combined Work from the
 76 | Application, but excluding the System Libraries of the Combined Work.
 77 | 
 78 |   1. Exception to Section 3 of the GNU GPL.
 79 | 
 80 |   You may convey a covered work under sections 3 and 4 of this License
 81 | without being bound by section 3 of the GNU GPL.
 82 | 
 83 |   2. Conveying Modified Versions.
 84 | 
 85 |   If you modify a copy of the Library, and, in your modifications, a
 86 | facility refers to a function or data to be supplied by an Application
 87 | that uses the facility (other than as an argument passed when the
 88 | facility is invoked), then you may convey a copy of the modified
 89 | version:
 90 | 
 91 |    a) under this License, provided that you make a good faith effort to
 92 |    ensure that, in the event an Application does not supply the
 93 |    function or data, the facility still operates, and performs
 94 |    whatever part of its purpose remains meaningful, or
 95 | 
 96 |    b) under the GNU GPL, with none of the additional permissions of
 97 |    this License applicable to that copy.
 98 | 
 99 |   3. Object Code Incorporating Material from Library Header Files.
100 | 
101 |   The object code form of an Application may incorporate material from
102 | a header file that is part of the Library.  You may convey such object
103 | code under terms of your choice, provided that, if the incorporated
104 | material is not limited to numerical parameters, data structure
105 | layouts and accessors, or small macros, inline functions and templates
106 | (ten or fewer lines in length), you do both of the following:
107 | 
108 |    a) Give prominent notice with each copy of the object code that the
109 |    Library is used in it and that the Library and its use are
110 |    covered by this License.
111 | 
112 |    b) Accompany the object code with a copy of the GNU GPL and this license
113 |    document.
114 | 
115 |   4. Combined Works.
116 | 
117 |   You may convey a Combined Work under terms of your choice that,
118 | taken together, effectively do not restrict modification of the
119 | portions of the Library contained in the Combined Work and reverse
120 | engineering for debugging such modifications, if you also do each of
121 | the following:
122 | 
123 |    a) Give prominent notice with each copy of the Combined Work that
124 |    the Library is used in it and that the Library and its use are
125 |    covered by this License.
126 | 
127 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
128 |    document.
129 | 
130 |    c) For a Combined Work that displays copyright notices during
131 |    execution, include the copyright notice for the Library among
132 |    these notices, as well as a reference directing the user to the
133 |    copies of the GNU GPL and this license document.
134 | 
135 |    d) Do one of the following:
136 | 
137 |        0) Convey the Minimal Corresponding Source under the terms of this
138 |        License, and the Corresponding Application Code in a form
139 |        suitable for, and under terms that permit, the user to
140 |        recombine or relink the Application with a modified version of
141 |        the Linked Version to produce a modified Combined Work, in the
142 |        manner specified by section 6 of the GNU GPL for conveying
143 |        Corresponding Source.
144 | 
145 |        1) Use a suitable shared library mechanism for linking with the
146 |        Library.  A suitable mechanism is one that (a) uses at run time
147 |        a copy of the Library already present on the user's computer
148 |        system, and (b) will operate properly with a modified version
149 |        of the Library that is interface-compatible with the Linked
150 |        Version.
151 | 
152 |    e) Provide Installation Information, but only if you would otherwise
153 |    be required to provide such information under section 6 of the
154 |    GNU GPL, and only to the extent that such information is
155 |    necessary to install and execute a modified version of the
156 |    Combined Work produced by recombining or relinking the
157 |    Application with a modified version of the Linked Version. (If
158 |    you use option 4d0, the Installation Information must accompany
159 |    the Minimal Corresponding Source and Corresponding Application
160 |    Code. If you use option 4d1, you must provide the Installation
161 |    Information in the manner specified by section 6 of the GNU GPL
162 |    for conveying Corresponding Source.)
163 | 
164 |   5. Combined Libraries.
165 | 
166 |   You may place library facilities that are a work based on the
167 | Library side by side in a single library together with other library
168 | facilities that are not Applications and are not covered by this
169 | License, and convey such a combined library under terms of your
170 | choice, if you do both of the following:
171 | 
172 |    a) Accompany the combined library with a copy of the same work based
173 |    on the Library, uncombined with any other library facilities,
174 |    conveyed under the terms of this License.
175 | 
176 |    b) Give prominent notice with the combined library that part of it
177 |    is a work based on the Library, and explaining where to find the
178 |    accompanying uncombined form of the same work.
179 | 
180 |   6. Revised Versions of the GNU Lesser General Public License.
181 | 
182 |   The Free Software Foundation may publish revised and/or new versions
183 | of the GNU Lesser General Public License from time to time. Such new
184 | versions will be similar in spirit to the present version, but may
185 | differ in detail to address new problems or concerns.
186 | 
187 |   Each version is given a distinguishing version number. If the
188 | Library as you received it specifies that a certain numbered version
189 | of the GNU Lesser General Public License "or any later version"
190 | applies to it, you have the option of following the terms and
191 | conditions either of that published version or of any later version
192 | published by the Free Software Foundation. If the Library as you
193 | received it does not specify a version number of the GNU Lesser
194 | General Public License, you may choose any version of the GNU Lesser
195 | General Public License ever published by the Free Software Foundation.
196 | 
197 |   If the Library as you received it specifies that a proxy can decide
198 | whether future versions of the GNU Lesser General Public License shall
199 | apply, that proxy's public statement of acceptance of any version is
200 | permanent authorization for you to choose that version for the
201 | Library.
202 | 


--------------------------------------------------------------------------------
/bibtexparser/bibtexexpression.py:
--------------------------------------------------------------------------------
  1 | import pyparsing as pp
  2 | 
  3 | from .bibdatabase import BibDataStringExpression
  4 | 
  5 | 
  6 | # General helpers
  7 | 
  8 | def _strip_after_new_lines(s):
  9 |     """Removes leading and trailing whitespaces in all but first line."""
 10 |     lines = s.splitlines()
 11 |     if len(lines) > 1:
 12 |         lines = [lines[0]] + [l.lstrip() for l in lines[1:]]
 13 |     return '\n'.join(lines)
 14 | 
 15 | 
 16 | def strip_after_new_lines(s):
 17 |     """Removes leading and trailing whitespaces in all but first line.
 18 | 
 19 |     :param s: string or BibDataStringExpression
 20 |     """
 21 |     if isinstance(s, BibDataStringExpression):
 22 |         s.apply_on_strings(_strip_after_new_lines)
 23 |         return s
 24 |     else:
 25 |         return _strip_after_new_lines(s)
 26 | 
 27 | 
 28 | def add_logger_parse_action(expr, log_func):
 29 |     """Register a callback on expression parsing with the adequate message."""
 30 |     def action(s, l, t):
 31 |         log_func("Found {}: {}".format(expr.resultsName, t))
 32 |     expr.addParseAction(action)
 33 | 
 34 | 
 35 | # Parse action helpers
 36 | # Helpers for returning values from the parsed tokens. Shaped as pyparsing's
 37 | # parse actions. See pyparsing documentation for the arguments.
 38 | 
 39 | def first_token(string_, location, token):
 40 |     # TODO Handle this case correctly!
 41 |     assert(len(token) == 1)
 42 |     return token[0]
 43 | 
 44 | 
 45 | def remove_trailing_newlines(string_, location, token):
 46 |     if token[0]:
 47 |         return token[0].rstrip('\n')
 48 | 
 49 | 
 50 | def remove_braces(string_, location, token):
 51 |     if len(token[0]) < 1:
 52 |         return ''
 53 |     else:
 54 |         start = 1 if token[0][0] == '{' else 0
 55 |         end = -1 if token[0][-1] == '}' else None
 56 |         return token[0][start:end]
 57 | 
 58 | 
 59 | def field_to_pair(string_, location, token):
 60 |     """
 61 |     Looks for parsed element named 'Field'.
 62 | 
 63 |     :returns: (name, value).
 64 |     """
 65 |     field = token.get('Field')
 66 |     value = field.get('Value')
 67 |     if isinstance(value, pp.ParseResults):
 68 |         # For pyparsing >= 2.3.1 (see #225 and API change note in pyparsing's
 69 |         # Changelog).
 70 |         value = value[0]
 71 |     return (field.get('FieldName'),
 72 |             strip_after_new_lines(value))
 73 | 
 74 | 
 75 | # Expressions helpers
 76 | 
 77 | def in_braces_or_pars(exp):
 78 |     """
 79 |     exp -> (exp)|{exp}
 80 |     """
 81 |     return ((pp.Suppress('{') + exp + pp.Suppress('}')) |
 82 |             (pp.Suppress('(') + exp + pp.Suppress(')')))
 83 | 
 84 | 
 85 | class BibtexExpression(object):
 86 |     """Gives access to pyparsing expressions.
 87 | 
 88 |     Attributes are pyparsing expressions for the following elements:
 89 | 
 90 |     * main_expression: the bibtex file
 91 |     * string_def: a string definition
 92 |     * preamble_decl: a preamble declaration
 93 |     * explicit_comment: an explicit comment
 94 |     * entry: an entry definition
 95 |     * implicit_comment: an implicit comment
 96 | 
 97 |     """
 98 | 
 99 |     ParseException = pp.ParseException
100 | 
101 |     def __init__(self):
102 | 
103 |         # Bibtex keywords
104 | 
105 |         string_def_start = pp.CaselessKeyword("@string")
106 |         preamble_start = pp.CaselessKeyword("@preamble")
107 |         comment_line_start = pp.CaselessKeyword('@comment')
108 | 
109 |         # String names
110 |         string_name = pp.Word(pp.alphanums + '_-:')('StringName')
111 |         self.set_string_name_parse_action(lambda s, l, t: None)
112 |         string_name.addParseAction(self._string_name_parse_action)
113 | 
114 |         # Values inside bibtex fields
115 |         # Values can be integer or string expressions. The latter may use
116 |         # quoted or braced values.
117 | 
118 |         # Integer values
119 |         integer = pp.Word(pp.nums)('Integer')
120 | 
121 |         # Braced values: braced values can contain nested (but balanced) braces
122 |         braced_value_content = pp.CharsNotIn('{}')
123 |         braced_value = pp.Forward()  # Recursive definition for nested braces
124 |         braced_value <<= pp.originalTextFor(
125 |             '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}'
126 |             )('BracedValue')
127 |         braced_value.setParseAction(remove_braces)
128 |         # TODO add ignore for "\}" and "\{" ?
129 |         # TODO @ are not parsed by bibtex in braces
130 | 
131 |         # Quoted values: may contain braced content with balanced braces
132 |         brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None)
133 |         text_in_quoted = pp.CharsNotIn('"{}')
134 |         # (quotes should be escaped by braces in quoted value)
135 |         quoted_value = pp.originalTextFor(
136 |             '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"'
137 |             )('QuotedValue')
138 |         quoted_value.addParseAction(pp.removeQuotes)
139 | 
140 |         # String expressions
141 |         string_expr = pp.delimitedList(
142 |             (quoted_value | braced_value | string_name), delim='#'
143 |             )('StringExpression')
144 |         self.set_string_expression_parse_action(lambda s, l, t: None)
145 |         string_expr.addParseAction(self._string_expr_parse_action)
146 | 
147 |         value = (integer | string_expr)('Value')
148 | 
149 |         # Entries
150 | 
151 |         # @EntryType { ...
152 |         entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType')
153 |         entry_type.setParseAction(first_token)
154 | 
155 |         # Entry key: any character up to a ',' without leading and trailing
156 |         # spaces. Also exclude spaces and prevent it from being empty.
157 |         key = pp.SkipTo(',')('Key')  # TODO Maybe also exclude @',\#}{~%
158 | 
159 |         def citekeyParseAction(string_, location, token):
160 |             """Parse action for validating citekeys.
161 | 
162 |             It ensures citekey is not empty and has no space.
163 | 
164 |             :args: see pyparsing documentation.
165 |             """
166 |             key = first_token(string_, location, token).strip()
167 |             if len(key) < 1:
168 |                 raise self.ParseException(
169 |                     string_, loc=location, msg="Empty citekeys are not allowed.")
170 |             for i, c in enumerate(key):
171 |                 if c.isspace():
172 |                     raise self.ParseException(
173 |                         string_, loc=(location + i),
174 |                         msg="Whitespace not allowed in citekeys.")
175 |             return key
176 | 
177 |         key.setParseAction(citekeyParseAction)
178 | 
179 |         # Field name: word of letters, digits, dashes and underscores
180 |         field_name = pp.Word(pp.alphanums + '_-().+')('FieldName')
181 |         field_name.setParseAction(first_token)
182 | 
183 |         # Field: field_name = value
184 |         field = pp.Group(field_name + pp.Suppress('=') + value)('Field')
185 |         field.setParseAction(field_to_pair)
186 | 
187 |         # List of fields: comma separeted fields
188 |         field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(','))
189 |                       )('Fields')
190 |         field_list.setParseAction(
191 |             lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))})
192 | 
193 |         # Entry: type, key, and fields
194 |         self.entry = (entry_type +
195 |                       in_braces_or_pars(key + pp.Suppress(',') + field_list)
196 |                       )('Entry')
197 | 
198 |         # Other stuff: comments, string definitions, and preamble declarations
199 | 
200 |         # Explicit comments: @comment + everything up to next valid declaration
201 |         # starting on new line.
202 |         not_an_implicit_comment = (pp.LineEnd() + pp.Literal('@')
203 |                                    ) | pp.StringEnd()
204 |         self.explicit_comment = (
205 |             pp.Suppress(comment_line_start) +
206 |             pp.originalTextFor(pp.SkipTo(not_an_implicit_comment),
207 |                                asString=True))('ExplicitComment')
208 |         self.explicit_comment.addParseAction(remove_trailing_newlines)
209 |         self.explicit_comment.addParseAction(remove_braces)
210 |         # Previous implementation included comment until next '}'.
211 |         # This is however not inline with bibtex behavior that is to only
212 |         # ignore until EOL. Brace stipping is arbitrary here but avoids
213 |         # duplication on bibtex write.
214 | 
215 |         # Empty implicit_comments lead to infinite loop of zeroOrMore
216 |         def mustNotBeEmpty(t):
217 |             if not t[0]:
218 |                 raise pp.ParseException("Match must not be empty.")
219 | 
220 |         # Implicit comments: not anything else
221 |         self.implicit_comment = pp.originalTextFor(
222 |             pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty),
223 |             asString=True)('ImplicitComment')
224 |         self.implicit_comment.addParseAction(remove_trailing_newlines)
225 | 
226 |         # String definition
227 |         self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars(
228 |             string_name +
229 |             pp.Suppress('=') +
230 |             string_expr('StringValue')
231 |             ))('StringDefinition')
232 | 
233 |         # Preamble declaration
234 |         self.preamble_decl = (pp.Suppress(preamble_start) +
235 |                               in_braces_or_pars(value))('PreambleDeclaration')
236 | 
237 |         # Main bibtex expression
238 | 
239 |         self.main_expression = pp.ZeroOrMore(
240 |                 self.string_def |
241 |                 self.preamble_decl |
242 |                 self.explicit_comment |
243 |                 self.entry |
244 |                 self.implicit_comment)
245 | 
246 |     def add_log_function(self, log_fun):
247 |         """Add notice to logger on entry, comment, preamble, string definitions.
248 | 
249 |         :param log_fun: logger function
250 |         """
251 |         for e in [self.entry,
252 |                   self.implicit_comment,
253 |                   self.explicit_comment,
254 |                   self.preamble_decl,
255 |                   self.string_def]:
256 |             add_logger_parse_action(e, log_fun)
257 | 
258 |     def set_string_name_parse_action(self, fun):
259 |         """Set the parseAction for string name expression.
260 | 
261 |         .. Note::
262 | 
263 |             For some reason pyparsing duplicates the string_name
264 |             expression so setting its parseAction a posteriori has no effect
265 |             in the context of a string expression. This is why this function
266 |             should be used instead.
267 |         """
268 |         self._string_name_parse_action_fun = fun
269 | 
270 |     def _string_name_parse_action(self, s, l, t):
271 |         return self._string_name_parse_action_fun(s, l, t)
272 | 
273 |     def set_string_expression_parse_action(self, fun):
274 |         """Set the parseAction for string_expression expression.
275 | 
276 |         .. Note::
277 | 
278 |             See set_string_name_parse_action.
279 |         """
280 |         self._string_expr_parse_action_fun = fun
281 | 
282 |     def _string_expr_parse_action(self, s, l, t):
283 |         return self._string_expr_parse_action_fun(s, l, t)
284 | 
285 |     def parseFile(self, file_obj):
286 |         return self.main_expression.parseFile(file_obj, parseAll=True)
287 | 


--------------------------------------------------------------------------------
/bibtexparser/bparser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Original source: github.com/okfn/bibserver
  5 | # Authors:
  6 | # markmacgillivray
  7 | # Etienne Posthumus (epoz)
  8 | # Francois Boulogne <fboulogne at april dot org>
  9 | 
 10 | import sys
 11 | import io
 12 | import logging
 13 | 
 14 | from bibtexparser.bibdatabase import (BibDatabase, BibDataString, as_text,
 15 |                                       BibDataStringExpression, STANDARD_TYPES)
 16 | from bibtexparser.bibtexexpression import BibtexExpression
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | __all__ = ['BibTexParser']
 21 | 
 22 | 
 23 | if sys.version_info >= (3, 0):
 24 |     ustr = str
 25 | else:
 26 |     ustr = unicode
 27 | 
 28 | 
 29 | def parse(data, *args, **kwargs):
 30 |     parser = BibTexParser(*args, **kwargs)
 31 |     return parser.parse(data)
 32 | 
 33 | 
 34 | class BibTexParser(object):
 35 |     """
 36 |     A parser for reading BibTeX bibliographic data files.
 37 | 
 38 |     Example::
 39 | 
 40 |         from bibtexparser.bparser import BibTexParser
 41 | 
 42 |         bibtex_str = ...
 43 | 
 44 |         parser = BibTexParser()
 45 |         parser.ignore_nonstandard_types = False
 46 |         parser.homogenize_fields = False
 47 |         parser.common_strings = False
 48 |         bib_database = bibtexparser.loads(bibtex_str, parser)
 49 | 
 50 |     :param customization: function or None (default)
 51 |         Customization to apply to parsed entries.
 52 |     :param ignore_nonstandard_types: bool (default True)
 53 |         If True ignores non-standard bibtex entry types.
 54 |     :param homogenize_fields: bool (default False)
 55 |         Common field name replacements (as set in alt_dict attribute).
 56 |     :param interpolate_strings: bool (default True)
 57 |         If True, replace bibtex string by their value, else uses
 58 |         BibDataString objects.
 59 |     :param common_strings: bool (default False)
 60 |         Include common string definitions (e.g. month abbreviations) to
 61 |         the bibtex file.
 62 |     :param add_missing_from_crossref: bool (default False)
 63 |         Resolve BibTeX references set in the crossref field for BibTeX entries
 64 |         and add the fields from the referenced entry to the referencing entry.
 65 |     """
 66 | 
 67 |     def __new__(cls, data=None, **args):
 68 |         """
 69 |         To catch the old API structure in which creating the parser would
 70 |         immediately parse and return data.
 71 |         """
 72 | 
 73 |         if data is None:
 74 |             return super(BibTexParser, cls).__new__(cls)
 75 |         else:
 76 |             # For backwards compatibility: if data is given, parse
 77 |             # and return the `BibDatabase` object instead of the parser.
 78 |             return parse(data, **args)
 79 | 
 80 |     def __init__(self, data=None,
 81 |                  customization=None,
 82 |                  ignore_nonstandard_types=True,
 83 |                  homogenize_fields=False,
 84 |                  interpolate_strings=True,
 85 |                  common_strings=False,
 86 |                  add_missing_from_crossref=False):
 87 |         """
 88 |         Creates a parser for rading BibTeX files
 89 | 
 90 |         :return: parser
 91 |         :rtype: `BibTexParser`
 92 |         """
 93 |         self.bib_database = BibDatabase()
 94 | 
 95 |         #: Load common strings such as months abbreviation
 96 |         #: Default: `False`.
 97 |         self.common_strings = common_strings
 98 |         if self.common_strings:
 99 |             self.bib_database.load_common_strings()
100 | 
101 |         #: Callback function to process BibTeX entries after parsing,
102 |         #: for example to create a list from a string with multiple values.
103 |         #: By default all BibTeX values are treated as simple strings.
104 |         #: Default: `None`.
105 |         self.customization = customization
106 | 
107 |         #: Ignore non-standard BibTeX types (`book`, `article`, etc).
108 |         #: Default: `True`.
109 |         self.ignore_nonstandard_types = ignore_nonstandard_types
110 | 
111 |         #: Sanitize BibTeX field names, for example change `url` to `link` etc.
112 |         #: Field names are always converted to lowercase names.
113 |         #: Default: `False`.
114 |         self.homogenize_fields = homogenize_fields
115 | 
116 |         #: Interpolate Bibtex Strings or keep the structure
117 |         self.interpolate_strings = interpolate_strings
118 | 
119 |         # On some sample data files, the character encoding detection simply
120 |         # hangs We are going to default to utf8, and mandate it.
121 |         self.encoding = 'utf8'
122 | 
123 |         # Add missing field from cross-ref
124 |         self.add_missing_from_crossref = add_missing_from_crossref
125 | 
126 |         # pre-defined set of key changes
127 |         self.alt_dict = {
128 |             'keyw': u'keyword',
129 |             'keywords': u'keyword',
130 |             'authors': u'author',
131 |             'editors': u'editor',
132 |             'urls': u'url',
133 |             'link': u'url',
134 |             'links': u'url',
135 |             'subjects': u'subject',
136 |             'xref': u'crossref'
137 |         }
138 | 
139 |         # Setup the parser expression
140 |         self._init_expressions()
141 | 
142 |     def parse(self, bibtex_str, partial=False):
143 |         """Parse a BibTeX string into an object
144 | 
145 |         :param bibtex_str: BibTeX string
146 |         :type: str or unicode
147 |         :param partial: If True, print errors only on parsing failures.
148 |             If False, an exception is raised.
149 |         :type: boolean
150 |         :return: bibliographic database
151 |         :rtype: BibDatabase
152 |         """
153 |         bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
154 |         try:
155 |             self._expr.parseFile(bibtex_file_obj)
156 |         except self._expr.ParseException as exc:
157 |             logger.error("Could not parse properly, starting at %s", exc.line)
158 |             if not partial:
159 |                 raise exc
160 | 
161 |         if self.add_missing_from_crossref:
162 |             self.bib_database.add_missing_from_crossref()
163 | 
164 |         return self.bib_database
165 | 
166 |     def parse_file(self, file, partial=False):
167 |         """Parse a BibTeX file into an object
168 | 
169 |         :param file: BibTeX file or file-like object
170 |         :type: file
171 |         :param partial: If True, print errors only on parsing failures.
172 |             If False, an exception is raised.
173 |         :type: boolean
174 |         :return: bibliographic database
175 |         :rtype: BibDatabase
176 |         """
177 |         return self.parse(file.read(), partial=partial)
178 | 
179 |     def _init_expressions(self):
180 |         """
181 |         Defines all parser expressions used internally.
182 |         """
183 |         self._expr = BibtexExpression()
184 | 
185 |         # Handle string as BibDataString object
186 |         self._expr.set_string_name_parse_action(
187 |             lambda s, l, t:
188 |                 BibDataString(self.bib_database, t[0]))
189 |         if self.interpolate_strings:
190 |             maybe_interpolate = lambda expr: as_text(expr)
191 |         else:
192 |             maybe_interpolate = lambda expr: expr
193 |         self._expr.set_string_expression_parse_action(
194 |             lambda s, l, t:
195 |                 maybe_interpolate(
196 |                     BibDataStringExpression.expression_if_needed(t)))
197 | 
198 |         # Add notice to logger
199 |         self._expr.add_log_function(logger.debug)
200 | 
201 |         # Set actions
202 |         self._expr.entry.addParseAction(
203 |             lambda s, l, t: self._add_entry(
204 |                 t.get('EntryType'), t.get('Key'), t.get('Fields'))
205 |             )
206 |         self._expr.implicit_comment.addParseAction(
207 |             lambda s, l, t: self._add_comment(t[0])
208 |             )
209 |         self._expr.explicit_comment.addParseAction(
210 |             lambda s, l, t: self._add_comment(t[0])
211 |             )
212 |         self._expr.preamble_decl.addParseAction(
213 |             lambda s, l, t: self._add_preamble(t[0])
214 |             )
215 |         self._expr.string_def.addParseAction(
216 |             lambda s, l, t: self._add_string(t['StringName'].name,
217 |                                              t['StringValue'])
218 |             )
219 | 
220 |     def _bibtex_file_obj(self, bibtex_str):
221 |         # Some files have Byte-order marks inserted at the start
222 |         byte = b'\xef\xbb\xbf'
223 |         if isinstance(bibtex_str, ustr):
224 |             byte = ustr(byte, self.encoding, 'ignore')
225 |             if bibtex_str[0] == byte:
226 |                 bibtex_str = bibtex_str[1:]
227 |         else:
228 |             if bibtex_str[:3] == byte:
229 |                 bibtex_str = bibtex_str[3:]
230 |             bibtex_str = bibtex_str.decode(encoding=self.encoding)
231 |         return io.StringIO(bibtex_str)
232 | 
233 |     def _clean_val(self, val):
234 |         """ Clean instring before adding to dictionary
235 | 
236 |         :param val: a value
237 |         :type val: string
238 |         :returns: string -- value
239 |         """
240 |         if not val or val == "{}":
241 |             return ''
242 |         return val
243 | 
244 |     def _clean_key(self, key):
245 |         """ Lowercase a key and return as unicode.
246 | 
247 |         :param key: a key
248 |         :type key: string
249 |         :returns: (unicode) string -- value
250 |         """
251 |         key = key.lower()
252 |         if not isinstance(key, ustr):
253 |             return ustr(key, 'utf-8')
254 |         else:
255 |             return key
256 | 
257 |     def _clean_field_key(self, key):
258 |         """ Clean a bibtex field key and homogenize alternative forms.
259 | 
260 |         :param key: a key
261 |         :type key: string
262 |         :returns: string -- value
263 |         """
264 |         key = self._clean_key(key)
265 |         if self.homogenize_fields:
266 |             if key in list(self.alt_dict.keys()):
267 |                 key = self.alt_dict[key]
268 |         return key
269 | 
270 |     def _add_entry(self, entry_type, entry_id, fields):
271 |         """ Adds a parsed entry.
272 |         Includes checking type and fields, cleaning, applying customizations.
273 | 
274 |         :param entry_type: the entry type
275 |         :type entry_type: string
276 |         :param entry_id: the entry bibid
277 |         :type entry_id: string
278 |         :param fields: the fields and values
279 |         :type fields: dictionary
280 |         :returns: string -- value
281 |         """
282 |         d = {}
283 |         entry_type = self._clean_key(entry_type)
284 |         if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES:
285 |             logger.warning('Entry type %s not standard. Not considered.',
286 |                            entry_type)
287 |             return
288 |         for key in fields:
289 |             d[self._clean_field_key(key)] = self._clean_val(fields[key])
290 |         d['ENTRYTYPE'] = entry_type
291 |         d['ID'] = entry_id
292 | 
293 |         crossref = d.get('crossref', None)
294 |         if self.add_missing_from_crossref and crossref is not None:
295 |             d['_crossref'] = crossref
296 | 
297 |         if self.customization is not None:
298 |             logger.debug('Apply customizations and return dict')
299 |             d = self.customization(d)
300 | 
301 |         self.bib_database.entries.append(d)
302 | 
303 |     def _add_comment(self, comment):
304 |         """
305 |         Stores a comment in the list of comment.
306 | 
307 |         :param comment: the parsed comment
308 |         :type comment: string
309 |         """
310 |         logger.debug('Store comment in list of comments: ' +
311 |                      comment.__repr__())
312 |         self.bib_database.comments.append(comment)
313 | 
314 |     def _add_string(self, string_key, string):
315 |         """
316 |         Stores a new string in the string dictionary.
317 | 
318 |         :param string_key: the string key
319 |         :type string_key: string
320 |         :param string: the string value
321 |         :type string: string
322 |         """
323 |         if string_key in self.bib_database.strings:
324 |             logger.warning('Overwritting existing string for key: %s.',
325 |                            string_key)
326 |         logger.debug(u'Store string: {} -> {}'.format(string_key, string))
327 |         self.bib_database.strings[string_key] = self._clean_val(string)
328 | 
329 |     def _add_preamble(self, preamble):
330 |         """
331 |         Stores a preamble.
332 | 
333 |         :param preamble: the parsed preamble
334 |         :type preamble: string
335 |         """
336 |         logger.debug('Store preamble in list of preambles')
337 |         self.bib_database.preambles.append(preamble)
338 | 


--------------------------------------------------------------------------------
/docs/source/tutorial.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Tutorial
  3 | ========
  4 | 
  5 | Step 0: Vocabulary
  6 | ==================
  7 | 
  8 | * An **entry** designates for example `@book{...}`, `@article{...}`, etc.
  9 | * A **comment** is written as `@comment{...}`.
 10 | * A **preamble** is a `@preamble{...}` block.
 11 | * A **string** is `@string{...}`.
 12 | 
 13 | In an entry, you can find
 14 | 
 15 | * an **entry type** like `article`, `book`, etc.
 16 | * **entry keys** or **keys** such as `author`, `title`, `year`...
 17 | * and also **records**, which designates the values of those keys.
 18 | 
 19 | 
 20 | Step 1: Prepare a BibTeX file
 21 | =============================
 22 | 
 23 | First, we prepare a BibTeX sample file. This is just for the purpose of illustration:
 24 | 
 25 | .. code-block:: python
 26 | 
 27 |     bibtex = """@ARTICLE{Cesar2013,
 28 |       author = {Jean César},
 29 |       title = {An amazing title},
 30 |       year = {2013},
 31 |       volume = {12},
 32 |       pages = {12--23},
 33 |       journal = {Nice Journal},
 34 |       abstract = {This is an abstract. This line should be long enough to test
 35 |     	 multilines...},
 36 |       comments = {A comment},
 37 |       keywords = {keyword1, keyword2}
 38 |     }
 39 |     """
 40 | 
 41 |     with open('bibtex.bib', 'w') as bibfile:
 42 |         bibfile.write(bibtex)
 43 | 
 44 | Step 2: Parse it!
 45 | =================
 46 | 
 47 | Simplest call
 48 | -------------
 49 | 
 50 | OK. Everything is in place. Let's parse the BibTeX file.
 51 | 
 52 | .. code-block:: python
 53 | 
 54 |     import bibtexparser
 55 | 
 56 |     with open('bibtex.bib') as bibtex_file:
 57 |         bib_database = bibtexparser.load(bibtex_file)
 58 | 
 59 |     print(bib_database.entries)
 60 | 
 61 | 
 62 | It prints a list of dictionaries for reference entries, for example books, articles:
 63 | 
 64 | .. code-block:: python
 65 | 
 66 |     [{'journal': 'Nice Journal',
 67 |       'comments': 'A comment',
 68 |       'pages': '12--23',
 69 |       'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
 70 |       'title': 'An amazing title',
 71 |       'year': '2013',
 72 |       'volume': '12',
 73 |       'ID': 'Cesar2013',
 74 |       'author': 'Jean César',
 75 |       'keyword': 'keyword1, keyword2',
 76 |       'ENTRYTYPE': 'article'}]
 77 | 
 78 | Note that, by convention, uppercase keys (ID, ENTRYTYPE) are data generated by the parser, while lowercase keys come from the original bibtex file.
 79 | 
 80 | You can also print comments, preambles and string:
 81 | 
 82 | .. code-block:: python
 83 | 
 84 |     print(bib_database.comments)
 85 |     print(bib_database.preambles)
 86 |     print(bib_database.strings)
 87 | 
 88 | .. note::
 89 |   If your bibtex contains months defined as strings such as :code:`month = jan`, you will need to parse it with the :code:`common_strings` option:
 90 |   :code:`bib_database = bibtexparser.bparser.BibTexParser(common_strings=True).parse_file(bibtex_file)`. (More in `Using bibtex strings`_.)
 91 | 
 92 | 
 93 | Parse a string
 94 | --------------
 95 | 
 96 | If for some reason, you prefer to parse a string, that's also possible:
 97 | 
 98 | .. code-block:: python
 99 | 
100 |     import bibtexparser
101 | 
102 |     with open('bibtex.bib') as bibtex_file:
103 |         bibtex_str = bibtex_file.read()
104 | 
105 |     bib_database = bibtexparser.loads(bibtex_str)
106 | 
107 | 
108 | Tune parser's options
109 | ---------------------
110 | 
111 | In the previous snippet, several default options are used.
112 | You can tweak them as you wish.
113 | 
114 | .. code-block:: python
115 | 
116 |    import bibtexparser
117 |    from bibtexparser.bparser import BibTexParser
118 | 
119 |    parser = BibTexParser(common_strings=False)
120 |    parser.ignore_nonstandard_types = False
121 |    parser.homogenise_fields = False
122 | 
123 |    bib_database = bibtexparser.loads(bibtex_str, parser)
124 | 
125 | .. note::
126 |    The :code:`common_strings` option needs to be set when the parser object is created and has no effect if changed afterwards.
127 | 
128 | Step 3: Export
129 | ==============
130 | 
131 | Once you worked on your parsed database, you may want to export the result. This library provides some functions to help on that. However, you can write your own functions if you have specific requirements.
132 | 
133 | Create a BibTeX file or string
134 | --------------------------------
135 | 
136 | The bibliographic data can be converted back into a string :
137 | 
138 | .. code-block:: python
139 | 
140 |     import bibtexparser
141 | 
142 |     bibtex_str = bibtexparser.dumps(bib_database)
143 | 
144 | or a BibTeX file like this:
145 | 
146 | .. code-block:: python
147 | 
148 |     import bibtexparser
149 | 
150 |     with open('bibtex.bib', 'w') as bibtex_file:
151 |         bibtexparser.dump(bibtex_database, bibtex_file)
152 | 
153 | 
154 | Call the writer
155 | ---------------
156 | 
157 | In the first section we prepared a BibTeX sample file, we can prepare the same file using pure python and the ``BibTexWriter`` class.
158 | 
159 | .. code-block:: python
160 | 
161 |     from bibtexparser.bwriter import BibTexWriter
162 |     from bibtexparser.bibdatabase import BibDatabase
163 | 
164 |     db = BibDatabase()
165 |     db.entries = [
166 |         {'journal': 'Nice Journal',
167 |          'comments': 'A comment',
168 |          'pages': '12--23',
169 |          'month': 'jan',
170 |          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
171 |          'title': 'An amazing title',
172 |          'year': '2013',
173 |          'volume': '12',
174 |          'ID': 'Cesar2013',
175 |          'author': 'Jean César',
176 |          'keyword': 'keyword1, keyword2',
177 |          'ENTRYTYPE': 'article'}]
178 | 
179 |     writer = BibTexWriter()
180 |     with open('bibtex.bib', 'w') as bibfile:
181 |         bibfile.write(writer.write(db))
182 | 
183 | This code generates the following file:
184 | 
185 | .. code-block:: latex
186 | 
187 |     @article{Cesar2013,
188 |      abstract = {This is an abstract. This line should be long enough to test
189 |     multilines...},
190 |      author = {Jean César},
191 |      comments = {A comment},
192 |      journal = {Nice Journal},
193 |      keyword = {keyword1, keyword2},
194 |      month = {jan},
195 |      pages = {12--23},
196 |      title = {An amazing title},
197 |      volume = {12},
198 |      year = {2013}
199 |     }
200 | 
201 | The writer also has several flags that can be enabled to customize the output file.
202 | For example we can use ``indent`` and ``comma_first`` to customize the previous entry, first the code:
203 | 
204 | .. code-block:: python
205 | 
206 |     from bibtexparser.bwriter import BibTexWriter
207 |     from bibtexparser.bibdatabase import BibDatabase
208 | 
209 |     db = BibDatabase()
210 |     db.entries = [
211 |         {'journal': 'Nice Journal',
212 |          'comments': 'A comment',
213 |          'pages': '12--23',
214 |          'month': 'jan',
215 |          'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
216 |          'title': 'An amazing title',
217 |          'year': '2013',
218 |          'volume': '12',
219 |          'ID': 'Cesar2013',
220 |          'author': 'Jean César',
221 |          'keyword': 'keyword1, keyword2',
222 |          'ENTRYTYPE': 'article'}]
223 | 
224 |     writer = BibTexWriter()
225 |     writer.indent = '    '     # indent entries with 4 spaces instead of one
226 |     writer.comma_first = True  # place the comma at the beginning of the line
227 |     with open('bibtex.bib', 'w') as bibfile:
228 |         bibfile.write(writer.write(db))
229 | 
230 | This code results in the following, customized, file:
231 | 
232 | .. code-block:: latex
233 | 
234 |     @article{Cesar2013
235 |     ,    abstract = {This is an abstract. This line should be long enough to test
236 |     multilines...}
237 |     ,    author = {Jean César}
238 |     ,    comments = {A comment}
239 |     ,    journal = {Nice Journal}
240 |     ,    keyword = {keyword1, keyword2}
241 |     ,    month = {jan}
242 |     ,    pages = {12--23}
243 |     ,    title = {An amazing title}
244 |     ,    volume = {12}
245 |     ,    year = {2013}
246 |     }
247 | 
248 | 
249 | Flags to the writer object can modify not only how an entry is printed but how several BibTeX entries are sorted and separated.
250 | See the :ref:`API <bibtexparser_api>` for the full list of flags.
251 | 
252 | 
253 | Step 4: Add salt and pepper
254 | ===========================
255 | 
256 | In this section, we discuss about some customizations and details.
257 | 
258 | Customizations
259 | --------------
260 | 
261 | By default, the parser does not alter the content of each field and keeps it as a simple string. There are many cases
262 | where this is not desired. For example, instead of a string with a multiple of authors, it could be parsed as a list.
263 | 
264 | To modify field values during parsing, a callback function can be supplied to the parser which can be used to modify
265 | BibTeX entries. The library includes several functions which may be used. Alternatively, you can read them to create
266 | your own functions.
267 | 
268 | .. code-block:: python
269 | 
270 |     import bibtexparser
271 |     from bibtexparser.bparser import BibTexParser
272 |     from bibtexparser.customization import *
273 | 
274 |     # Let's define a function to customize our entries.
275 |     # It takes a record and return this record.
276 |     def customizations(record):
277 |         """Use some functions delivered by the library
278 | 
279 |         :param record: a record
280 |         :returns: -- customized record
281 |         """
282 |         record = type(record)
283 |         record = author(record)
284 |         record = editor(record)
285 |         record = journal(record)
286 |         record = keyword(record)
287 |         record = link(record)
288 |         record = page_double_hyphen(record)
289 |         record = doi(record)
290 |         return record
291 | 
292 |     with open('bibtex.bib') as bibtex_file:
293 |         parser = BibTexParser()
294 |         parser.customization = customizations
295 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
296 |         print(bib_database.entries)
297 | 
298 | 
299 | If you think that you have a customization which could be useful to others, please share with us!
300 | 
301 | 
302 | Accents and weird characters
303 | ----------------------------
304 | 
305 | Your bibtex may contain accents and specific characters.
306 | They are sometimes coded like this ``\'{e}`` but this is not the correct way, ``{\'e}`` is preferred. Moreover, you may want to manipulate ``é``. There is different situations:
307 | 
308 | * Case 1: you plan to use this library to work with latex and you assume that the original bibtex is clean. You have nothing to do.
309 | 
310 | * Case 2: you plan to use this library to work with latex but your bibtex is not really clean.
311 | 
312 | .. code-block:: python
313 | 
314 |     import bibtexparser
315 |     from bibtexparser.bparser import BibTexParser
316 |     from bibtexparser.customization import homogenize_latex_encoding
317 | 
318 |     with open('bibtex.bib') as bibtex_file:
319 |         parser = BibTexParser()
320 |         parser.customization = homogenize_latex_encoding
321 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
322 |         print(bib_database.entries)
323 | 
324 | 
325 | * Case 3: you plan to use this library to work with something different and your bibtex is not really clean.
326 |   Then, you probably want to use unicode.
327 | 
328 | .. code-block:: python
329 | 
330 |     import bibtexparser
331 |     from bibtexparser.bparser import BibTexParser
332 |     from bibtexparser.customization import convert_to_unicode
333 | 
334 |     with open('bibtex.bib') as bibtex_file:
335 |         parser = BibTexParser()
336 |         parser.customization = convert_to_unicode
337 |         bib_database = bibtexparser.load(bibtex_file, parser=parser)
338 |         print(bib_database.entries)
339 | 
340 | 
341 | .. Note::
342 | 
343 |     If you want to mix different customization functions, you can write your own function.
344 | 
345 | 
346 | Using bibtex strings
347 | --------------------
348 | 
349 | .. Warning:: support for bibtex strings representation is still an experimental feature; the way strings are represented is likely to change in future releases.
350 | 
351 | Bibtex strings and string expressions are expanded by default into the value they represent.
352 | This behavior is controlled by the ``interpolate_string`` argument of the BibTexParser. It defaults to ``True`` but can be set to ``False``, in which case bibtex strings and string expressions from input files are represented with the :class:`bibdatabase.BibDataString` and :class:`bibdatabase.BibDataStringExpression` from the :mod:`bibdatabase` module. Both classes retain the intrinsic structure of the string or expression so that they can be written to a new file, the same way. Each instance provides a :func:`get_value` method to interpolate the string or expression and the module also provide an :func:`bibdatabase.as_text` helper to expand a string or an expression when needed.
353 | 
354 | Using the code would yield the following output.
355 | 
356 | .. code-block:: python
357 | 
358 |     from bibtexparser.bparser import BibTexParser
359 |     from bibtexparser.bibdatabase import as_text
360 | 
361 | 
362 |     bibtex = """@STRING{ jean = "Jean"}
363 |     
364 |     @ARTICLE{Cesar2013,
365 |       author = jean # { César},
366 |       title = {An amazing title},
367 |       year = {2013},
368 |       month = jan,
369 |       volume = {12},
370 |       pages = {12--23},
371 |       journal = {Nice Journal},
372 |     }
373 |     """
374 | 
375 |     bp = BibTexParser(interpolate_strings=False)
376 |     bib_database = bp.parse(bibtex)
377 |     bib_database.entries[0]
378 |     as_text(bd.entries[0]['author'])
379 | 
380 | .. code-block:: python
381 | 
382 |     {'ENTRYTYPE': 'article',
383 |      'ID': 'Cesar2013',
384 |      'author': BibDataStringExpression([BibDataString('jean'), ' César']),
385 |      'journal': 'Nice Journal',
386 |      'month': BibDataStringExpression([BibDataString('jan')]),
387 |      'pages': '12--23',
388 |      'title': 'An amazing title',
389 |      }
390 |     'Jean César'
391 | 


--------------------------------------------------------------------------------
/bibtexparser/tests/test_crossref_resolving.py:
--------------------------------------------------------------------------------
  1 | import unittest2 as unittest
  2 | from bibtexparser.bibdatabase import BibDatabase
  3 | from bibtexparser.bparser import BibTexParser
  4 | 
  5 | 
  6 | class TestCrossRef(unittest.TestCase):
  7 |     def test_crossref(self):
  8 |         self.maxDiff = None
  9 |         input_file_path = 'bibtexparser/tests/data/crossref_entries.bib'
 10 |         entries_expected = {'cr1': {'ENTRYTYPE': 'inbook',
 11 |                                     'ID': 'cr1',
 12 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'year'],
 13 |                                     'archiveprefix': 'SomEPrFiX',
 14 |                                     'author': 'Graham Gullam',
 15 |                                     'crossref': 'cr_m',
 16 |                                     'editor': 'Edgar Erbriss',
 17 |                                     'origdate': '1955',
 18 |                                     'primaryclass': 'SOMECLASS',
 19 |                                     'publisher': 'Grimble',
 20 |                                     'title': 'Great and Good Graphs',
 21 |                                     'year': '1974'},
 22 |                             'cr2': {'ENTRYTYPE': 'inbook',
 23 |                                     'ID': 'cr2',
 24 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'year'],
 25 |                                     'author': 'Frederick Fumble',
 26 |                                     'crossref': 'cr_m',
 27 |                                     'editor': 'Edgar Erbriss',
 28 |                                     'institution': 'Institution',
 29 |                                     'origdate': '1943',
 30 |                                     'publisher': 'Grimble',
 31 |                                     'school': 'School',
 32 |                                     'title': 'Fabulous Fourier Forms',
 33 |                                     'year': '1974'},
 34 |                             'cr3': {'ENTRYTYPE': 'inbook',
 35 |                                     'ID': 'cr3',
 36 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'year'],
 37 |                                     'archiveprefix': 'SomEPrFiX',
 38 |                                     'author': 'Arthur Aptitude',
 39 |                                     'crossref': 'crt',
 40 |                                     'editor': 'Mark Monkley',
 41 |                                     'eprinttype': 'sometype',
 42 |                                     'origdate': '1934',
 43 |                                     'publisher': 'Rancour',
 44 |                                     'title': 'Arrangements of All Articles',
 45 |                                     'year': '1996'},
 46 |                             'cr4': {'ENTRYTYPE': 'inbook',
 47 |                                     'ID': 'cr4',
 48 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'year'],
 49 |                                     'author': 'Morris Mumble',
 50 |                                     'crossref': 'crn',
 51 |                                     'editor': 'Jeremy Jermain',
 52 |                                     'origdate': '1911',
 53 |                                     'publisher': 'Pillsbury',
 54 |                                     'title': 'Enterprising Entities',
 55 |                                     'year': '1945'},
 56 |                             'cr5': {'ENTRYTYPE': 'inbook',
 57 |                                     'ID': 'cr5',
 58 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'year'],
 59 |                                     'author': 'Oliver Ordinary',
 60 |                                     'crossref': 'crn',
 61 |                                     'editor': 'Jeremy Jermain',
 62 |                                     'origdate': '1919',
 63 |                                     'publisher': 'Pillsbury',
 64 |                                     'title': 'Questionable Quidities',
 65 |                                     'year': '1945'},
 66 |                             'cr6': {'ENTRYTYPE': 'inproceedings',
 67 |                                     'ID': 'cr6',
 68 |                                     '_FROM_CROSSREF': ['address',
 69 |                                                        'editor',
 70 |                                                        'eventdate',
 71 |                                                        'eventtitle',
 72 |                                                        'publisher',
 73 |                                                        'venue'],
 74 |                                     'address': 'Address',
 75 |                                     'author': 'Author, Firstname',
 76 |                                     'booktitle': 'Manual booktitle',
 77 |                                     'crossref': 'cr6i',
 78 |                                     'editor': 'Editor',
 79 |                                     'eventdate': '2009-08-21/2009-08-24',
 80 |                                     'eventtitle': 'Title of the event',
 81 |                                     'pages': '123--',
 82 |                                     'publisher': 'Publisher of proceeding',
 83 |                                     'title': 'Title of inproceeding',
 84 |                                     'venue': 'Location of event',
 85 |                                     'year': '2009'},
 86 |                             'cr6i': {'ENTRYTYPE': 'proceedings',
 87 |                                      'ID': 'cr6i',
 88 |                                      'address': 'Address',
 89 |                                      'author': 'Spurious Author',
 90 |                                      'editor': 'Editor',
 91 |                                      'eventdate': '2009-08-21/2009-08-24',
 92 |                                      'eventtitle': 'Title of the event',
 93 |                                      'publisher': 'Publisher of proceeding',
 94 |                                      'title': 'Title of proceeding',
 95 |                                      'venue': 'Location of event',
 96 |                                      'year': '2009'},
 97 |                             'cr7': {'ENTRYTYPE': 'inbook',
 98 |                                     'ID': 'cr7',
 99 |                                     '_FROM_CROSSREF': ['publisher', 'subtitle', 'titleaddon', 'verba'],
100 |                                     'author': 'Author, Firstname',
101 |                                     'crossref': 'cr7i',
102 |                                     'pages': '123--126',
103 |                                     'publisher': 'Publisher of proceeding',
104 |                                     'subtitle': 'Book Subtitle',
105 |                                     'title': 'Title of Book bit',
106 |                                     'titleaddon': 'Book Titleaddon',
107 |                                     'verba': 'String',
108 |                                     'year': '2010'},
109 |                             'cr7i': {'ENTRYTYPE': 'book',
110 |                                      'ID': 'cr7i',
111 |                                      'author': 'Brian Bookauthor',
112 |                                      'publisher': 'Publisher of proceeding',
113 |                                      'subtitle': 'Book Subtitle',
114 |                                      'title': 'Book Title',
115 |                                      'titleaddon': 'Book Titleaddon',
116 |                                      'verba': 'String',
117 |                                      'year': '2009'},
118 |                             'cr8': {'ENTRYTYPE': 'incollection',
119 |                                     'ID': 'cr8',
120 |                                     '_FROM_CROSSREF': ['editor', 'publisher', 'subtitle', 'titleaddon'],
121 |                                     'author': 'Smith, Firstname',
122 |                                     'crossref': 'cr8i',
123 |                                     'editor': 'Brian Editor',
124 |                                     'pages': '1--12',
125 |                                     'publisher': 'Publisher of Collection',
126 |                                     'subtitle': 'Book Subtitle',
127 |                                     'title': 'Title of Collection bit',
128 |                                     'titleaddon': 'Book Titleaddon',
129 |                                     'year': '2010'},
130 |                             'cr8i': {'ENTRYTYPE': 'collection',
131 |                                      'ID': 'cr8i',
132 |                                      'editor': 'Brian Editor',
133 |                                      'publisher': 'Publisher of Collection',
134 |                                      'subtitle': 'Book Subtitle',
135 |                                      'title': 'Book Title',
136 |                                      'titleaddon': 'Book Titleaddon',
137 |                                      'year': '2009'},
138 |                             'cr_m': {'ENTRYTYPE': 'book',
139 |                                      'ID': 'cr_m',
140 |                                      'editor': 'Edgar Erbriss',
141 |                                      'publisher': 'Grimble',
142 |                                      'title': 'Graphs of the Continent',
143 |                                      'year': '1974'},
144 |                             'crn': {'ENTRYTYPE': 'book',
145 |                                     'ID': 'crn',
146 |                                     'editor': 'Jeremy Jermain',
147 |                                     'publisher': 'Pillsbury',
148 |                                     'title': 'Vanquished, Victor, Vandal',
149 |                                     'year': '1945'},
150 |                             'crt': {'ENTRYTYPE': 'book',
151 |                                     'ID': 'crt',
152 |                                     'editor': 'Mark Monkley',
153 |                                     'publisher': 'Rancour',
154 |                                     'title': 'Beasts of the Burbling Burns',
155 |                                     'year': '1996'}}
156 |         parser = BibTexParser(add_missing_from_crossref=True, ignore_nonstandard_types=False)
157 |         with open(input_file_path) as bibtex_file:
158 |             bibtex_database = parser.parse_file(bibtex_file)
159 |         self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
160 | 
161 |     def test_crossref_cascading(self):
162 |         input_file_path = 'bibtexparser/tests/data/crossref_cascading.bib'
163 |         entries_expected = {'r1': {'ENTRYTYPE': 'book',
164 |                                    'ID': 'r1',
165 |                                    '_FROM_CROSSREF': [],
166 |                                    'crossref': 'r2',
167 |                                    'date': '1911'},
168 |                             'r2': {'ENTRYTYPE': 'book',
169 |                                    'ID': 'r2',
170 |                                    '_FROM_CROSSREF': [],
171 |                                    'crossref': 'r3',
172 |                                    'date': '1911'},
173 |                             'r3': {'ENTRYTYPE': 'book',
174 |                                    'ID': 'r3',
175 |                                    '_FROM_CROSSREF': [],
176 |                                    'crossref': 'r4',
177 |                                    'date': '1911'},
178 |                             'r4': {'ENTRYTYPE': 'book',
179 |                                    'ID': 'r4',
180 |                                    'date': '1911'}}
181 | 
182 |         parser = BibTexParser(add_missing_from_crossref=True)
183 |         with open(input_file_path) as bibtex_file:
184 |             bibtex_database = parser.parse_file(bibtex_file)
185 |         self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
186 | 
187 |     def test_crossref_cascading_cycle(self):
188 |         input_file_path = 'bibtexparser/tests/data/crossref_cascading_cycle.bib'
189 |         entries_expected = {'circ1': {'ENTRYTYPE': 'book',
190 |                                       'ID': 'circ1',
191 |                                       '_FROM_CROSSREF': [],
192 |                                       'crossref': 'circ2',
193 |                                       'date': '1911'},
194 |                             'circ2': {'ENTRYTYPE': 'book',
195 |                                       'ID': 'circ2',
196 |                                       '_FROM_CROSSREF': [],
197 |                                       'crossref': 'circ1',
198 |                                       'date': '1911'}}
199 |         parser = BibTexParser(add_missing_from_crossref=True)
200 |         with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm:
201 |             with open(input_file_path) as bibtex_file:
202 |                 bibtex_database = parser.parse_file(bibtex_file)
203 |             self.assertIn("ERROR:bibtexparser.bibdatabase:Circular crossref dependency: circ1->circ2->circ1.", cm.output)
204 |         self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
205 | 
206 |     def test_crossref_missing_entries(self):
207 |         input_file_path = 'bibtexparser/tests/data/crossref_missing_entries.bib'
208 |         entries_expected = {'mcr': {'ENTRYTYPE': 'inbook',
209 |                                     'ID': 'mcr',
210 |                                     '_crossref': 'missing1',
211 |                                     'author': 'Megan Mistrel',
212 |                                     'crossref': 'missing1',
213 |                                     'origdate': '1933',
214 |                                     'title': 'Lumbering Lunatics'}}
215 | 
216 |         parser = BibTexParser(add_missing_from_crossref=True)
217 |         with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm:
218 |             with open(input_file_path) as bibtex_file:
219 |                 bibtex_database = parser.parse_file(bibtex_file)
220 |             self.assertIn("ERROR:bibtexparser.bibdatabase:Crossref reference missing1 for mcr is missing.", cm.output)
221 |         self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
222 | 
223 | if __name__ == '__main__':
224 |     unittest.main()
225 | 


--------------------------------------------------------------------------------
/bibtexparser/customization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | A set of functions useful for customizing bibtex fields.
  6 | You can find inspiration from these functions to design yours.
  7 | Each of them takes a record and return the modified record.
  8 | """
  9 | 
 10 | import re
 11 | import logging
 12 | 
 13 | from builtins import str
 14 | 
 15 | from bibtexparser.latexenc import latex_to_unicode, string_to_latex, protect_uppercase
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | __all__ = ['splitname', 'getnames', 'author', 'editor', 'journal', 'keyword',
 20 |            'link', 'page_double_hyphen', 'doi', 'type', 'convert_to_unicode',
 21 |            'homogenize_latex_encoding', 'add_plaintext_fields']
 22 | 
 23 | 
 24 | class InvalidName(ValueError):
 25 |     """Exception raised by :py:func:`customization.splitname` when an invalid name is input.
 26 | 
 27 |     """
 28 |     pass
 29 | 
 30 | 
 31 | def splitname(name, strict_mode=True):
 32 |     """
 33 |     Break a name into its constituent parts: First, von, Last, and Jr.
 34 | 
 35 |     :param string name: a string containing a single name
 36 |     :param Boolean strict_mode: whether to use strict mode
 37 |     :returns: dictionary of constituent parts
 38 |     :raises `customization.InvalidName`: If an invalid name is given and
 39 |                                          ``strict_mode = True``.
 40 | 
 41 |     In BibTeX, a name can be represented in any of three forms:
 42 |         * First von Last
 43 |         * von Last, First
 44 |         * von Last, Jr, First
 45 | 
 46 |     This function attempts to split a given name into its four parts. The
 47 |     returned dictionary has keys of ``first``, ``last``, ``von`` and ``jr``.
 48 |     Each value is a list of the words making up that part; this may be an empty
 49 |     list.  If the input has no non-whitespace characters, a blank dictionary is
 50 |     returned.
 51 | 
 52 |     It is capable of detecting some errors with the input name. If the
 53 |     ``strict_mode`` parameter is ``True``, which is the default, this results in
 54 |     a :class:`customization.InvalidName` exception being raised. If it is
 55 |     ``False``, the function continues, working around the error as best it can.
 56 |     The errors that can be detected are listed below along with the handling
 57 |     for non-strict mode:
 58 | 
 59 |         * Name finishes with a trailing comma: delete the comma
 60 |         * Too many parts (e.g., von Last, Jr, First, Error): merge extra parts
 61 |           into First
 62 |         * Unterminated opening brace: add closing brace to end of input
 63 |         * Unmatched closing brace: add opening brace at start of word
 64 | 
 65 |     """
 66 |     # Useful references:
 67 |     # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#names
 68 |     # http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf
 69 | 
 70 |     # Whitespace characters that can separate words.
 71 |     whitespace = set(' ~\r\n\t')
 72 | 
 73 |     # We'll iterate over the input once, dividing it into a list of words for
 74 |     # each comma-separated section. We'll also calculate the case of each word
 75 |     # as we work.
 76 |     sections = [[]]      # Sections of the name.
 77 |     cases = [[]]         # 1 = uppercase, 0 = lowercase, -1 = caseless.
 78 |     word = []            # Current word.
 79 |     case = -1            # Case of the current word.
 80 |     level = 0            # Current brace level.
 81 |     bracestart = False   # Will the next character be the first within a brace?
 82 |     controlseq = True    # Are we currently processing a control sequence?
 83 |     specialchar = None   # Are we currently processing a special character?
 84 | 
 85 |     # Using an iterator allows us to deal with escapes in a simple manner.
 86 |     nameiter = iter(name)
 87 |     for char in nameiter:
 88 |         # An escape.
 89 |         if char == '\\':
 90 |             escaped = next(nameiter)
 91 | 
 92 |             # BibTeX doesn't allow whitespace escaping. Copy the slash and fall
 93 |             # through to the normal case to handle the whitespace.
 94 |             if escaped in whitespace:
 95 |                 word.append(char)
 96 |                 char = escaped
 97 | 
 98 |             else:
 99 |                 # Is this the first character in a brace?
100 |                 if bracestart:
101 |                     bracestart = False
102 |                     controlseq = escaped.isalpha()
103 |                     specialchar = True
104 | 
105 |                 # Can we use it to determine the case?
106 |                 elif (case == -1) and escaped.isalpha():
107 |                     if escaped.isupper():
108 |                         case = 1
109 |                     else:
110 |                         case = 0
111 | 
112 |                 # Copy the escape to the current word and go to the next
113 |                 # character in the input.
114 |                 word.append(char)
115 |                 word.append(escaped)
116 |                 continue
117 | 
118 |         # Start of a braced expression.
119 |         if char == '{':
120 |             level += 1
121 |             word.append(char)
122 |             bracestart = True
123 |             controlseq = False
124 |             specialchar = False
125 |             continue
126 | 
127 |         # All the below cases imply this (and don't test its previous value).
128 |         bracestart = False
129 | 
130 |         # End of a braced expression.
131 |         if char == '}':
132 |             # Check and reduce the level.
133 |             if level:
134 |                 level -= 1
135 |             else:
136 |                 if strict_mode:
137 |                     raise InvalidName("Unmatched closing brace in name {{{0}}}.".format(name))
138 |                 word.insert(0, '{')
139 | 
140 |             # Update the state, append the character, and move on.
141 |             controlseq = False
142 |             specialchar = False
143 |             word.append(char)
144 |             continue
145 | 
146 |         # Inside a braced expression.
147 |         if level:
148 |             # Is this the end of a control sequence?
149 |             if controlseq:
150 |                 if not char.isalpha():
151 |                     controlseq = False
152 | 
153 |             # If it's a special character, can we use it for a case?
154 |             elif specialchar:
155 |                 if (case == -1) and char.isalpha():
156 |                     if char.isupper():
157 |                         case = 1
158 |                     else:
159 |                         case = 0
160 | 
161 |             # Append the character and move on.
162 |             word.append(char)
163 |             continue
164 | 
165 |         # End of a word.
166 |         # NB. we know we're not in a brace here due to the previous case.
167 |         if char == ',' or char in whitespace:
168 |             # Don't add empty words due to repeated whitespace.
169 |             if word:
170 |                 sections[-1].append(''.join(word))
171 |                 word = []
172 |                 cases[-1].append(case)
173 |                 case = -1
174 |                 controlseq = False
175 |                 specialchar = False
176 | 
177 |             # End of a section.
178 |             if char == ',':
179 |                 if len(sections) < 3:
180 |                     sections.append([])
181 |                     cases.append([])
182 |                 elif strict_mode:
183 |                     raise InvalidName("Too many commas in the name {{{0}}}.".format(name))
184 |             continue
185 | 
186 |         # Regular character.
187 |         word.append(char)
188 |         if (case == -1) and char.isalpha():
189 |             if char.isupper():
190 |                 case = 1
191 |             else:
192 |                 case = 0
193 | 
194 |     # Unterminated brace?
195 |     if level:
196 |         if strict_mode:
197 |             raise InvalidName("Unterminated opening brace in the name {{{0}}}.".format(name))
198 |         while level:
199 |             word.append('}')
200 |             level -= 1
201 | 
202 |     # Handle the final word.
203 |     if word:
204 |         sections[-1].append(''.join(word))
205 |         cases[-1].append(case)
206 | 
207 |     # Get rid of trailing sections.
208 |     if not sections[-1]:
209 |         # Trailing comma?
210 |         if (len(sections) > 1) and strict_mode:
211 |             raise InvalidName("Trailing comma at end of name {{{0}}}.".format(name))
212 |         sections.pop(-1)
213 |         cases.pop(-1)
214 | 
215 |     # No non-whitespace input.
216 |     if not sections or not any(bool(section) for section in sections):
217 |         return {}
218 | 
219 |     # Initialise the output dictionary.
220 |     parts = {'first': [], 'last': [], 'von': [], 'jr': []}
221 | 
222 |     # Form 1: "First von Last"
223 |     if len(sections) == 1:
224 |         p0 = sections[0]
225 | 
226 |         # One word only: last cannot be empty.
227 |         if len(p0) == 1:
228 |             parts['last'] = p0
229 | 
230 |         # Two words: must be first and last.
231 |         elif len(p0) == 2:
232 |             parts['first'] = p0[:1]
233 |             parts['last'] = p0[1:]
234 | 
235 |         # Need to use the cases to figure it out.
236 |         else:
237 |             cases = cases[0]
238 | 
239 |             # First is the longest sequence of words starting with uppercase
240 |             # that is not the whole string. von is then the longest sequence
241 |             # whose last word starts with lowercase that is not the whole
242 |             # string. Last is the rest. NB., this means last cannot be empty.
243 | 
244 |             # At least one lowercase letter.
245 |             if 0 in cases:
246 |                 # Index from end of list of first and last lowercase word.
247 |                 firstl = cases.index(0) - len(cases)
248 |                 lastl = -cases[::-1].index(0) - 1
249 |                 if lastl == -1:
250 |                     lastl -= 1      # Cannot consume the rest of the string.
251 | 
252 |                 # Pull the parts out.
253 |                 parts['first'] = p0[:firstl]
254 |                 parts['von'] = p0[firstl:lastl+1]
255 |                 parts['last'] = p0[lastl+1:]
256 | 
257 |             # No lowercase: last is the last word, first is everything else.
258 |             else:
259 |                 parts['first'] = p0[:-1]
260 |                 parts['last'] = p0[-1:]
261 | 
262 |     # Form 2 ("von Last, First") or 3 ("von Last, jr, First")
263 |     else:
264 |         # As long as there is content in the first name partition, use it as-is.
265 |         first = sections[-1]
266 |         if first and first[0]:
267 |             parts['first'] = first
268 | 
269 |         # And again with the jr part.
270 |         if len(sections) == 3:
271 |             jr = sections[-2]
272 |             if jr and jr[0]:
273 |                 parts['jr'] = jr
274 | 
275 |         # Last name cannot be empty; if there is only one word in the first
276 |         # partition, we have to use it for the last name.
277 |         last = sections[0]
278 |         if len(last) == 1:
279 |             parts['last'] = last
280 | 
281 |         # Have to look at the cases to figure it out.
282 |         else:
283 |             lcases = cases[0]
284 | 
285 |             # At least one lowercase: von is the longest sequence of whitespace
286 |             # separated words whose last word does not start with an uppercase
287 |             # word, and last is the rest.
288 |             if 0 in lcases:
289 |                 split = len(lcases) - lcases[::-1].index(0)
290 |                 if split == len(lcases):
291 |                     split = 0            # Last cannot be empty.
292 |                 parts['von'] = sections[0][:split]
293 |                 parts['last'] = sections[0][split:]
294 | 
295 |             # All uppercase => all last.
296 |             else:
297 |                 parts['last'] = sections[0]
298 | 
299 |     # Done.
300 |     return parts
301 | 
302 | 
303 | def getnames(names):
304 |     """Convert people names as surname, firstnames
305 |     or surname, initials.
306 | 
307 |     :param names: a list of names
308 |     :type names: list
309 |     :returns: list -- Correctly formated names
310 | 
311 |     .. Note::
312 |         This function is known to be too simple to handle properly
313 |         the complex rules. We would like to enhance this in forthcoming
314 |         releases.
315 |     """
316 |     tidynames = []
317 |     for namestring in names:
318 |         namestring = namestring.strip()
319 |         if len(namestring) < 1:
320 |             continue
321 |         if ',' in namestring:
322 |             namesplit = namestring.split(',', 1)
323 |             last = namesplit[0].strip()
324 |             firsts = [i.strip() for i in namesplit[1].split()]
325 |         else:
326 |             namesplit = namestring.split()
327 |             last = namesplit.pop()
328 |             firsts = [i.replace('.', '. ').strip() for i in namesplit]
329 |         if last in ['jnr', 'jr', 'junior']:
330 |             last = firsts.pop()
331 |         for item in firsts:
332 |             if item in ['ben', 'van', 'der', 'de', 'la', 'le']:
333 |                 last = firsts.pop() + ' ' + last
334 |         tidynames.append(last + ", " + ' '.join(firsts))
335 |     return tidynames
336 | 
337 | 
338 | def author(record):
339 |     """
340 |     Split author field into a list of "Name, Surname".
341 | 
342 |     :param record: the record.
343 |     :type record: dict
344 |     :returns: dict -- the modified record.
345 | 
346 |     """
347 |     if "author" in record:
348 |         if record["author"]:
349 |             record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
350 |         else:
351 |             del record["author"]
352 |     return record
353 | 
354 | 
355 | def editor(record):
356 |     """
357 |     Turn the editor field into a dict composed of the original editor name
358 |     and a editor id (without coma or blank).
359 | 
360 |     :param record: the record.
361 |     :type record: dict
362 |     :returns: dict -- the modified record.
363 | 
364 |     """
365 |     if "editor" in record:
366 |         if record["editor"]:
367 |             record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
368 |             # convert editor to object
369 |             record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]]
370 |         else:
371 |             del record["editor"]
372 |     return record
373 | 
374 | 
375 | def page_double_hyphen(record):
376 |     """
377 |     Separate pages by a double hyphen (--).
378 | 
379 |     :param record: the record.
380 |     :type record: dict
381 |     :returns: dict -- the modified record.
382 | 
383 |     """
384 |     if "pages" in record:
385 |         # hyphen, non-breaking hyphen, en dash, em dash, hyphen-minus, minus sign
386 |         separators = [u'‐', u'‑', u'–', u'—', u'-', u'−']
387 |         for separator in separators:
388 |             if separator in record["pages"]:
389 |                 p = [i.strip().strip(separator) for i in record["pages"].split(separator)]
390 |                 record["pages"] = p[0] + '--' + p[-1]
391 |     return record
392 | 
393 | 
394 | def type(record):
395 |     """
396 |     Put the type into lower case.
397 | 
398 |     :param record: the record.
399 |     :type record: dict
400 |     :returns: dict -- the modified record.
401 | 
402 |     """
403 |     if "type" in record:
404 |         record["type"] = record["type"].lower()
405 |     return record
406 | 
407 | 
408 | def journal(record):
409 |     """
410 |     Turn the journal field into a dict composed of the original journal name
411 |     and a journal id (without coma or blank).
412 | 
413 |     :param record: the record.
414 |     :type record: dict
415 |     :returns: dict -- the modified record.
416 | 
417 |     """
418 |     if "journal" in record:
419 |         # switch journal to object
420 |         if record["journal"]:
421 |             record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
422 | 
423 |     return record
424 | 
425 | 
426 | def keyword(record, sep=',|;'):
427 |     """
428 |     Split keyword field into a list.
429 | 
430 |     :param record: the record.
431 |     :type record: dict
432 |     :param sep: pattern used for the splitting regexp.
433 |     :type record: string, optional
434 |     :returns: dict -- the modified record.
435 | 
436 |     """
437 |     if "keyword" in record:
438 |         record["keyword"] = [i.strip() for i in re.split(sep, record["keyword"].replace('\n', ''))]
439 | 
440 |     return record
441 | 
442 | 
443 | def link(record):
444 |     """
445 | 
446 |     :param record: the record.
447 |     :type record: dict
448 |     :returns: dict -- the modified record.
449 | 
450 |     """
451 |     if "link" in record:
452 |         links = [i.strip().replace("  ", " ") for i in record["link"].split('\n')]
453 |         record['link'] = []
454 |         for link in links:
455 |             parts = link.split(" ")
456 |             linkobj = {"url": parts[0]}
457 |             if len(parts) > 1:
458 |                 linkobj["anchor"] = parts[1]
459 |             if len(parts) > 2:
460 |                 linkobj["format"] = parts[2]
461 |             if len(linkobj["url"]) > 0:
462 |                 record["link"].append(linkobj)
463 | 
464 |     return record
465 | 
466 | 
467 | def doi(record):
468 |     """
469 | 
470 |     :param record: the record.
471 |     :type record: dict
472 |     :returns: dict -- the modified record.
473 | 
474 |     """
475 |     if 'doi' in record:
476 |         if 'link' not in record:
477 |             record['link'] = []
478 |         nodoi = True
479 |         for item in record['link']:
480 |             if 'doi' in item:
481 |                 nodoi = False
482 |         if nodoi:
483 |             link = record['doi']
484 |             if link.startswith('10'):
485 |                 link = 'http://dx.doi.org/' + link
486 |             record['link'].append({"url": link, "anchor": "doi"})
487 |     return record
488 | 
489 | 
490 | def convert_to_unicode(record):
491 |     """
492 |     Convert accent from latex to unicode style.
493 | 
494 |     :param record: the record.
495 |     :type record: dict
496 |     :returns: dict -- the modified record.
497 |     """
498 |     for val in record:
499 |         if isinstance(record[val], list):
500 |             record[val] = [
501 |                 latex_to_unicode(x) for x in record[val]
502 |             ]
503 |         elif isinstance(record[val], dict):
504 |             record[val] = {
505 |                 k: latex_to_unicode(v) for k, v in record[val].items()
506 |             }
507 |         else:
508 |             record[val] = latex_to_unicode(record[val])
509 |     return record
510 | 
511 | 
512 | def homogenize_latex_encoding(record):
513 |     """
514 |     Homogenize the latex enconding style for bibtex
515 | 
516 |     This function is experimental.
517 | 
518 |     :param record: the record.
519 |     :type record: dict
520 |     :returns: dict -- the modified record.
521 |     """
522 |     # First, we convert everything to unicode
523 |     record = convert_to_unicode(record)
524 |     # And then, we fall back
525 |     for val in record:
526 |         if val not in ('ID',):
527 |             logger.debug('Apply string_to_latex to: %s', val)
528 |             record[val] = string_to_latex(record[val])
529 |             if val == 'title':
530 |                 logger.debug('Protect uppercase in title')
531 |                 logger.debug('Before: %s', record[val])
532 |                 record[val] = protect_uppercase(record[val])
533 |                 logger.debug('After: %s', record[val])
534 |     return record
535 | 
536 | 
537 | def add_plaintext_fields(record):
538 |     """
539 |     For each field in the record, add a `plain_` field containing the
540 |     plaintext, stripped from braces and similar. See
541 |     https://github.com/sciunto-org/python-bibtexparser/issues/116.
542 | 
543 |     :param record: the record.
544 |     :type record: dict
545 |     :returns: dict -- the modified record.
546 |     """
547 |     def _strip_string(string):
548 |         for stripped in ['{', '}']:
549 |             string = string.replace(stripped, "")
550 |         return string
551 | 
552 |     for key in list(record.keys()):
553 |         plain_key = "plain_{}".format(key)
554 |         record[plain_key] = record[key]
555 | 
556 |         if isinstance(record[plain_key], str):
557 |             record[plain_key] = _strip_string(record[plain_key])
558 |         elif isinstance(record[plain_key], dict):
559 |             record[plain_key] = {
560 |                 subkey: _strip_string(value)
561 |                 for subkey, value in record[plain_key].items()
562 |             }
563 |         elif isinstance(record[plain_key], list):
564 |             record[plain_key] = [
565 |                 _strip_string(value)
566 |                 for value in record[plain_key]
567 |             ]
568 | 
569 |     return record
570 | 


--------------------------------------------------------------------------------