├── .github └── workflows │ └── lean_action_ci.yml ├── .gitignore ├── BibtexQuery.lean ├── BibtexQuery ├── Entry.lean ├── Format.lean ├── Main.lean ├── Name.lean ├── ParsecExtra.lean ├── Parser.lean ├── Query.lean ├── String.lean └── TexDiacritics.lean ├── LICENSE ├── README.md ├── lake-manifest.json ├── lakefile.toml ├── lean-toolchain └── scripts ├── bibc ├── bibq └── bibrofi /.github/workflows/lean_action_ci.yml: -------------------------------------------------------------------------------- 1 | name: Lean Action CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: leanprover/lean-action@v1 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.lake 2 | -------------------------------------------------------------------------------- /BibtexQuery.lean: -------------------------------------------------------------------------------- 1 | import BibtexQuery.Entry 2 | import BibtexQuery.ParsecExtra 3 | import BibtexQuery.Parser 4 | import BibtexQuery.Query 5 | import BibtexQuery.String 6 | import BibtexQuery.Name 7 | import BibtexQuery.Format 8 | -------------------------------------------------------------------------------- /BibtexQuery/Entry.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | import BibtexQuery.String 8 | 9 | /-! 10 | # Bibtex Entries and related functions 11 | 12 | This file defines inductive types for bibtex entries and tags, along with various related 13 | helper functions. 14 | -/ 15 | 16 | namespace BibtexQuery 17 | 18 | /-- i.e. authors = "Binne, Chose and Truc, Machin" -/ 19 | structure Tag where 20 | name : String 21 | content : String 22 | deriving Repr, Inhabited 23 | 24 | namespace Tag 25 | 26 | /-- Return a standardized representation of the tag. -/ 27 | def toString (t : Tag) : String := s!"{t.name} = " ++ "{" ++ s!"{t.content}" ++ "}" 28 | 29 | --#eval Tag.toString $ Tag.mk "author" "Frédéric Dupuis" 30 | 31 | end Tag 32 | 33 | /-- A whole bibtex entry, i.e. a paper or a book. -/ 34 | inductive Entry where 35 | | normalType (category : String) (name : String) (tags : List Tag) 36 | | stringType (content : String) 37 | | preambleType (content : String) 38 | | commentType 39 | deriving Repr, Inhabited 40 | 41 | namespace Entry 42 | 43 | /-- Returns the authors of a bibtex entry, or an empty string if it is not specified. -/ 44 | def getAuthors : (e : Entry) → String 45 | | .normalType _cl _name tags => 46 | match tags.find? (fun t => t.name = "author") with 47 | | some t => t.content 48 | | none => "" 49 | | _ => "" 50 | 51 | /-- Returns the title of a bibtex entry, or an empty string if it is not specified. -/ 52 | def getTitle : (e : Entry) → String 53 | | .normalType _cl _name tags => 54 | match tags.find? (fun t => t.name = "title") with 55 | | some t => t.content 56 | | none => "" 57 | | _ => "" 58 | 59 | /-- Returns the keywords of a bibtex entry, or an empty string if it is not specified. -/ 60 | def getKeywords : (e : Entry) → String 61 | | normalType _cl _name tags => 62 | match tags.find? (fun t => t.name = "keywords") with 63 | | some t => t.content 64 | | none => "" 65 | | _ => "" 66 | 67 | /-- Returns the class of a bibtex entry (ie book, article, etc). -/ 68 | def getClass : (e : Entry) → String 69 | | .normalType cl _name _tags => cl 70 | | _ => "" 71 | 72 | /-- Returns the key of a bibtex entry. -/ 73 | def getKey : Entry → String 74 | | .normalType _cl name _tags => name 75 | | _ => "No key" 76 | 77 | /-- Returns the arxiv number of a bibtex entry. -/ 78 | def getArxivNumber : Entry → String 79 | | .normalType _cl _name tags => 80 | match tags.find? (fun t => t.name = "eprint") with 81 | | some t => t.content 82 | | none => "" 83 | | _ => "No arxiv number" 84 | 85 | /-- Returns the arxiv link of a bibtex entry. -/ 86 | def getArxivLink : Entry → String 87 | | .normalType _cl _name tags => 88 | match tags.find? (fun t => t.name = "eprint") with 89 | | some t => "http://arxiv.org/abs/" ++ t.content 90 | | none => "" 91 | | _ => "No arxiv number" 92 | 93 | /-- Returns an abridged representation of a bibtex entry. -/ 94 | def toAbridgedRepr (e : Entry) : String := 95 | e.getKey.pad ' ' 25 ++ " | " ++ e.getAuthors.toLastNames.pad ' ' 50 ++ " | " ++ e.getTitle 96 | 97 | /-- Returns a «clean citation» of a bibtex entry. -/ 98 | def toCitation (e : Entry) : String := 99 | e.getAuthors ++ ", «" ++ e.getTitle ++ "», " ++ e.getArxivLink 100 | 101 | 102 | /-- Returns a string containing a standardized representation of a bibtex entry. -/ 103 | def toString : Entry → String 104 | | .normalType cl name tags => Id.run do 105 | let mut output := (s!"@{cl}" ++ "{" ++ s!"{name},").push '\n' 106 | for t in tags do 107 | output := output ++ " " ++ t.toString ++ s!"\n" 108 | return (output ++ "}") 109 | | _ => "" 110 | 111 | --#eval IO.print <| Entry.toString $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric", 112 | -- Tag.mk "title" "Bonsoir la visite"] 113 | 114 | --#eval IO.print <| Entry.toCitation $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric", 115 | -- Tag.mk "title" "Bonsoir la visite", Tag.mk "eprint" "2308.11736"] 116 | 117 | end Entry 118 | 119 | end BibtexQuery 120 | -------------------------------------------------------------------------------- /BibtexQuery/Format.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2024 Jz Pan. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Jz Pan 5 | -/ 6 | 7 | import BibtexQuery.Name 8 | import BibtexQuery.Entry 9 | import Lean.Data.Xml.Basic 10 | import Std.Data.HashMap 11 | import Std.Internal.Parsec 12 | import Std.Internal.Parsec.String 13 | 14 | /-! 15 | 16 | # Bibtex item formatting 17 | 18 | This file contains functions for bibtex item formatting, 19 | tag generating and sorting. 20 | 21 | -/ 22 | 23 | open Lean Xml Std.Internal.Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name 24 | 25 | namespace BibtexQuery 26 | 27 | /-- Represents a processed bibtex entry. -/ 28 | structure ProcessedEntry where 29 | /-- The category of a bibtex entry, e.g. "article" or "book". -/ 30 | category : String 31 | /-- The name, or called citekey, of a bibtex entry. -/ 32 | name : String 33 | /-- The tags of a bibtex entry, stored as a `HashMap`. Don't be confused with `tag`. -/ 34 | tags : Std.HashMap String (Array TexContent) 35 | /-- The tag of a bibtex entry, e.g. `[Doe12]`. Don't be confused with `tags`. -/ 36 | tag : String 37 | /-- The HTML representing a bibtex entry, e.g. 38 | `John Doe. Test. 2012.` Generated by bibtex formatter. -/ 39 | html : Array Content 40 | /-- The array of authors or editors of a bibtex entry. 41 | Only used for tag generation and bibtex entry sorting. -/ 42 | authorOrEditor : Array BibtexName 43 | /-- The HTML representing the authors. -/ 44 | authorHtml : Array Content 45 | /-- The HTML representing the editors. -/ 46 | editorHtml : Array Content 47 | /-- The date of a bibtex entry, in the format `year * 100 + month`. 48 | Only used for tag generation and bibtex entry sorting. -/ 49 | date : Nat 50 | /-- The HTML representing the date. -/ 51 | dateHtml : Array Content 52 | /-- The title of a bibtex entry, with diacritics removed, and converted to uppercase. 53 | Only used for bibtex entry sorting. -/ 54 | titleWithoutDiacritics : String 55 | 56 | /-- Get the date (which is `year * 100 + month`) and the date HTML of a bibitem. -/ 57 | def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Content := 58 | if let .some yearTex := tags["year"]? then 59 | let yearHtml := TexContent.toHtmlArray yearTex 60 | if let .some year := (TexContent.toPlaintextArray yearTex).toList.filter 61 | Char.isDigit |> String.mk |>.toNat? then 62 | let month : Nat := 63 | if let .some monthTex := tags["month"]? then 64 | let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower 65 | match monthStr with 66 | | "jan" => 1 | "feb" => 2 | "mar" => 3 67 | | "apr" => 4 | "may" => 5 | "jun" => 6 68 | | "jul" => 7 | "aug" => 8 | "sep" => 9 69 | | "oct" => 10 | "nov" => 11 | "dec" => 12 70 | | _ => 71 | let month := monthStr.toNat?.getD 0 72 | if month ≥ 1 ∧ month ≤ 12 then month else 0 73 | else 74 | 0 75 | let monthStr : String := 76 | match month with 77 | | 1 => "Jan" | 2 => "Feb" | 3 => "Mar" 78 | | 4 => "Apr" | 5 => "May" | 6 => "Jun" 79 | | 7 => "Jul" | 8 => "Aug" | 9 => "Sep" 80 | | 10 => "Oct" | 11 => "Nov" | 12 => "Dec" 81 | | _ => "" 82 | (year * 100 + month, 83 | if monthStr.isEmpty then 84 | yearHtml 85 | else if yearHtml.isEmpty then 86 | #[.Character monthStr] 87 | else 88 | #[.Character monthStr, .Character " "] ++ yearHtml) 89 | else 90 | (0, yearHtml) 91 | else 92 | (0, #[]) 93 | 94 | /-- Get the tag of a bibitem (e.g. `[ABC12]`) from authors and date. -/ 95 | def getTag (authors : Array BibtexName) (date : Nat) : String := 96 | let authorString := 97 | if authors.size ≥ 5 then 98 | (authors.toSubarray.take 3 |>.toArray.map (·.oneLetterAbbr) |>.toList |> String.join) ++ "+" 99 | else if authors.size ≥ 2 then 100 | authors.map (·.oneLetterAbbr) |>.toList |> String.join 101 | else 102 | authors.map (·.threeLetterAbbr) |>.toList |> String.join 103 | let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else "" 104 | "[" ++ authorString ++ dateString ++ "]" 105 | 106 | partial def removeDuplicatedSpacesAux (s : String) : String := 107 | let s2 := s.replace " " " " 108 | if s2.length < s.length then 109 | removeDuplicatedSpacesAux s2 110 | else 111 | s 112 | 113 | def removeDuplicatedSpaces (s : String) : String := 114 | s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux 115 | 116 | /-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`. 117 | If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/ 118 | def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) := do 119 | match e with 120 | | .normalType category name tags => 121 | let lst : List (String × Array TexContent) ← tags.mapM fun x => do 122 | let s := removeDuplicatedSpaces x.content 123 | if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then 124 | .ok (x.name, #[.normal s]) 125 | else 126 | match texContents s.iter with 127 | | .success _ arr => .ok (x.name, arr) 128 | | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {it.2}: {err}" 129 | let tags := Std.HashMap.ofList lst 130 | let authors := processNames (tags.getD "author" #[]) 131 | let editors := processNames (tags.getD "editor" #[]) 132 | let authorOrEditor := if authors.isEmpty then editors else authors 133 | let (date, dateHtml) := getDate tags 134 | pure <| .some { 135 | category := category 136 | name := name 137 | tags := tags 138 | tag := getTag authorOrEditor date 139 | html := #[] 140 | authorOrEditor := authorOrEditor 141 | authorHtml := BibtexName.toHtmlArray authors 142 | editorHtml := BibtexName.toHtmlArray editors ++ 143 | if editors.size ≥ 2 then 144 | #[.Character ", editors"] 145 | else if editors.size = 1 then 146 | #[.Character ", editor"] 147 | else 148 | #[] 149 | date := date 150 | dateHtml := dateHtml 151 | titleWithoutDiacritics := tags.getD "title" #[] |> TexContent.toPlaintextArray |> 152 | stripDiacriticsFromString |>.map getUpperChar 153 | } 154 | | _ => pure .none 155 | 156 | /-! 157 | 158 | ## Formatting a bibtex item 159 | 160 | The following code is a port of `pybtex/style/formatting/unsrt.py` of `pybtex`, 161 | originally licensed under MIT license. 162 | 163 | -/ 164 | 165 | section 166 | 167 | mutual 168 | 169 | partial def getLastCharOfContent (x : Content) : Option Char := 170 | match x with 171 | | .Element ⟨ _, _, arr ⟩ => getLastCharOfArrayContent arr 172 | | .Comment _ => .none 173 | | .Character s => if s.isEmpty then .none else .some s.back 174 | 175 | partial def getLastCharOfArrayContent (arr : Array Content) : Option Char := 176 | arr.findSomeRev? getLastCharOfContent 177 | 178 | end 179 | 180 | variable (e : ProcessedEntry) 181 | 182 | namespace Formatter 183 | 184 | /-! 185 | 186 | ### Helper functions 187 | 188 | -/ 189 | 190 | def mkTag (name : String) (content : Array TexContent) : Array Content := 191 | let ret := TexContent.toHtmlArray content 192 | if ret.isEmpty then #[] else #[.Element ⟨ name, RBMap.empty, ret ⟩] 193 | 194 | def mkHref (href : String) (content : Array TexContent) : Array Content := 195 | let ret := TexContent.toHtmlArray content 196 | if ret.isEmpty then #[] else #[.Element ⟨ "a", RBMap.empty.insert "href" href, ret ⟩] 197 | 198 | def mkStr (content : Array TexContent) (pre post : String := "") : Array Content := 199 | let ret := TexContent.toHtmlArray content 200 | (if ret.isEmpty || pre.isEmpty then #[] else #[.Character pre]) ++ ret ++ 201 | (if ret.isEmpty || post.isEmpty then #[] else #[.Character post]) 202 | 203 | def arrayConcat {α} (arr : Array (Option (Array α))) (sep : Array α := #[]) : Array α := 204 | arr.filterMap id |>.foldl (fun acc a => 205 | acc ++ (if acc.isEmpty || a.isEmpty then #[] else sep) ++ a) #[] 206 | 207 | def words (arr : Array (Option (Array Content))) : Array Content := 208 | arrayConcat arr #[.Character " "] 209 | 210 | def canEndSentence (c : Char) : Bool := 211 | match c with 212 | | '.' | '?' | '!' | '…' | '。' | '?' | '!' | '\uFF0E' | '\uFF61' 213 | | '\u203C' | '\u203D' | '\u2047' | '\u2048' | '\u2049' => true 214 | | _ => false 215 | 216 | def sentence1 (arr : Array Content) : Array Content := 217 | if arr.isEmpty then 218 | #[] 219 | else if canEndSentence (getLastCharOfArrayContent arr |>.getD ' ') then 220 | arr 221 | else 222 | arr ++ #[.Character "."] 223 | 224 | def sentence (arr : Array (Option (Array Content))) : Array Content := 225 | sentence1 <| arrayConcat arr #[.Character ", "] 226 | 227 | def sentence' (content : Array TexContent) : Array Content := 228 | sentence1 <| TexContent.toHtmlArray content 229 | 230 | def toplevel (arr : Array (Option (Array Content))) : Array Content := 231 | arrayConcat arr #[.Character "\n"] 232 | 233 | def mkUrl (url : String) : Array Content := 234 | if url.isEmpty then #[] else #[.Character "URL: "] ++ mkHref url #[.normal url] 235 | 236 | def mkWebRef (urlPrefix namePrefix url : String) : Array Content := 237 | let s := 238 | if url.toLower.startsWith urlPrefix.toLower then 239 | url.drop urlPrefix.length 240 | else if url.toLower.startsWith namePrefix.toLower then 241 | url.drop namePrefix.length 242 | else 243 | url 244 | if s.toLower.startsWith "http" then 245 | -- the url does not starts with `urlPrefix` or `namePrefix`, 246 | -- but still starts with "http" 247 | mkUrl url 248 | else if s.isEmpty then 249 | #[] 250 | else 251 | mkHref (urlPrefix ++ s) #[.normal (namePrefix ++ s)] 252 | 253 | /-! 254 | 255 | ### Partial templates 256 | 257 | -/ 258 | 259 | def formatVolumeAndPages : Option (Array Content) := do 260 | let pages ← e.tags["pages"]? 261 | match e.tags["volume"]? with 262 | | .some volume => 263 | arrayConcat #[ 264 | mkStr volume, 265 | (mkStr · "(" ")") <$> e.tags["number"]?, 266 | mkStr pages ":" 267 | ] 268 | | .none => mkStr pages "pages " 269 | 270 | def formatEprint : Option (Array Content) := do 271 | let eprint ← TexContent.toPlaintextArray <$> e.tags["eprint"]? 272 | let eprinttype := 273 | TexContent.toPlaintextArray <$> (e.tags["eprinttype"]? <|> e.tags["archiveprefix"]?) 274 | |>.getD "arXiv" 275 | let eprintlist : Array (Array String × String × String) := #[ 276 | (#["arxiv"], "https://arxiv.org/abs/", "arXiv:"), 277 | (#["iacr"], "https://eprint.iacr.org/", "IACR:"), 278 | (#["pmcid", "pmc"], "https://www.ncbi.nlm.nih.gov/pmc/articles/", "PMCID:") 279 | ] 280 | let checkEprint : Array String × String × String → Option (Array Content) := 281 | fun (lst, urlPrefix, namePrefix) => 282 | if lst.contains eprinttype.toLower then 283 | let ret := mkWebRef urlPrefix namePrefix eprint 284 | if ret.isEmpty then .none else .some ret 285 | else 286 | .none 287 | eprintlist.findSome? checkEprint <|> .some 288 | (if eprint.toLower.startsWith "http" then 289 | -- unrecognized eprint type but starts with "http" 290 | mkUrl eprint 291 | else 292 | -- unrecognized eprint type, cannot add link to it 293 | #[.Character (eprinttype ++ ":" ++ eprint)]) 294 | 295 | def formatWebRefs : Array Content := 296 | let formatUrl : Option (Array Content) := 297 | mkUrl <$> TexContent.toPlaintextArray <$> e.tags["url"]? 298 | let formatWebRef (tagName urlPrefix namePrefix : String) : Option (Array Content) := 299 | mkWebRef urlPrefix namePrefix <$> TexContent.toPlaintextArray <$> e.tags[tagName]? 300 | sentence #[ 301 | formatWebRef "pubmed" "https://www.ncbi.nlm.nih.gov/pubmed/" "PMID:", 302 | formatWebRef "doi" "https://doi.org/" "doi:", 303 | formatWebRef "mrnumber" "https://www.ams.org/mathscinet-getitem?mr=MR" "MR:", 304 | formatWebRef "zbl" "https://zbmath.org/?q=an:" "Zbl:", 305 | formatEprint e, 306 | formatUrl 307 | ] 308 | 309 | def formatAuthorOrEditor : Array Content := 310 | sentence1 (if e.authorHtml.isEmpty then e.editorHtml else e.authorHtml) 311 | 312 | def formatVolumeAndSeries (asSentence : Bool) : Array Content := 313 | let series := e.tags["series"]? 314 | let arr : Array Content := 315 | if let .some volume := e.tags["volume"]? then 316 | arrayConcat #[ 317 | mkStr volume (if asSentence then "Volume " else "volume "), 318 | (mkStr · " of ") <$> series 319 | ] 320 | else if let .some number := e.tags["number"]? then 321 | arrayConcat #[ 322 | mkStr number (if asSentence then "Number " else "number "), 323 | (mkStr · " in ") <$> series 324 | ] 325 | else 326 | (mkStr <$> series).getD #[] 327 | if asSentence then sentence1 arr else arr 328 | 329 | def formatChapterAndPages : Array Content := 330 | arrayConcat #[ 331 | (mkStr · "chapter ") <$> e.tags["chapter"]?, 332 | (mkStr · "pages ") <$> e.tags["pages"]? 333 | ] #[.Character ", "] 334 | 335 | def formatEdition : Option (Array Content) := 336 | if let .some arr := e.tags["edition"]? then 337 | let s := TexContent.toPlaintextArray arr |> stripDiacriticsFromString |>.map getLowerChar 338 | if s.endsWith " ed." || s.endsWith " ed" || (s.replace "edition" "").length < s.length then 339 | TexContent.toHtmlArray arr 340 | else if let .some n := s.toNat? then 341 | let s : String := match n with 342 | | 0 => "Zeroth" | 1 => "First" | 2 => "Second" | 3 => "Third" 343 | | 4 => "Fourth" | 5 => "Fifth" | 6 => "Sixth" | 7 => "Seventh" 344 | | 8 => "Eighth" | 9 => "Ninth" | 10 => "Tenth" | 11 => "Eleventh" 345 | | 12 => "Twelfth" 346 | | _ => toString n ++ match n % 100 with 347 | | 11 | 12 => "th" 348 | | _ => match n % 10 with 349 | | 1 => "st" 350 | | 2 => "nd" 351 | | 3 => "rd" 352 | | _ => "th" 353 | .some #[.Character (s ++ " edition")] 354 | else 355 | mkStr arr "" " edition" 356 | else 357 | .none 358 | 359 | def formatAddressOrganizationPublisherDate (includeOrganization : Bool) : 360 | Array Content := 361 | let organization : Option (Array Content) := 362 | if includeOrganization then 363 | mkStr <$> e.tags["organization"]? 364 | else 365 | .none 366 | if let .some address := e.tags["address"]? then 367 | words #[ 368 | sentence #[ 369 | mkStr address, 370 | e.dateHtml 371 | ], 372 | sentence #[ 373 | organization, 374 | mkStr <$> e.tags["publisher"]? 375 | ] 376 | ] 377 | else 378 | sentence #[ 379 | organization, 380 | mkStr <$> e.tags["publisher"]?, 381 | e.dateHtml 382 | ] 383 | 384 | def formatISBN : Option (Array Content) := 385 | (sentence1 <| mkStr · "ISBN ") <$> e.tags["isbn"]? 386 | 387 | /-! 388 | 389 | ### Templates 390 | 391 | -/ 392 | 393 | def formatArticle : Array Content := 394 | toplevel #[ 395 | sentence1 e.authorHtml, 396 | sentence' <$> e.tags["title"]?, 397 | sentence #[ 398 | mkTag "i" <$> e.tags["journal"]?, 399 | formatVolumeAndPages e, 400 | e.dateHtml 401 | ], 402 | sentence' <$> e.tags["note"]?, 403 | formatWebRefs e 404 | ] 405 | 406 | def formatBook : Array Content := 407 | toplevel #[ 408 | formatAuthorOrEditor e, 409 | sentence #[mkTag "i" <$> e.tags["title"]?], 410 | sentence1 <$> formatEdition e, 411 | formatVolumeAndSeries e true, 412 | sentence #[ 413 | mkStr <$> e.tags["publisher"]?, 414 | mkStr <$> e.tags["address"]?, 415 | e.dateHtml 416 | ], 417 | formatISBN e, 418 | sentence' <$> e.tags["note"]?, 419 | formatWebRefs e 420 | ] 421 | 422 | def formatBooklet : Array Content := 423 | toplevel #[ 424 | sentence1 e.authorHtml, 425 | sentence' <$> e.tags["title"]?, 426 | formatVolumeAndSeries e true, 427 | sentence #[ 428 | mkStr <$> e.tags["howpublished"]?, 429 | mkStr <$> e.tags["address"]?, 430 | e.dateHtml, 431 | mkStr <$> e.tags["note"]? 432 | ], 433 | formatWebRefs e 434 | ] 435 | 436 | def formatInBook : Array Content := 437 | toplevel #[ 438 | formatAuthorOrEditor e, 439 | sentence #[ 440 | mkTag "i" <$> e.tags["title"]?, 441 | formatEdition e, 442 | formatChapterAndPages e 443 | ], 444 | formatVolumeAndSeries e true, 445 | sentence #[ 446 | mkStr <$> e.tags["publisher"]?, 447 | mkStr <$> e.tags["address"]?, 448 | e.dateHtml, 449 | mkStr <$> e.tags["note"]? 450 | ], 451 | formatWebRefs e 452 | ] 453 | 454 | def formatInCollection : Array Content := 455 | toplevel #[ 456 | sentence1 e.authorHtml, 457 | sentence' <$> e.tags["title"]?, 458 | (fun x => #[.Character "In "] ++ x) <$> sentence #[ 459 | e.editorHtml, 460 | mkTag "i" <$> e.tags["booktitle"]?, 461 | formatEdition e, 462 | formatVolumeAndSeries e false, 463 | formatChapterAndPages e 464 | ], 465 | sentence #[ 466 | mkStr <$> e.tags["publisher"]?, 467 | mkStr <$> e.tags["address"]?, 468 | e.dateHtml 469 | ], 470 | formatWebRefs e 471 | ] 472 | 473 | def formatInProceedings : Array Content := 474 | toplevel #[ 475 | sentence1 e.authorHtml, 476 | sentence' <$> e.tags["title"]?, 477 | (fun x => #[.Character "In "] ++ x) <$> words #[ 478 | sentence #[ 479 | e.editorHtml, 480 | mkTag "i" <$> e.tags["booktitle"]?, 481 | formatVolumeAndSeries e false, 482 | mkStr <$> e.tags["pages"]? 483 | ], 484 | formatAddressOrganizationPublisherDate e true 485 | ], 486 | sentence' <$> e.tags["note"]?, 487 | formatWebRefs e 488 | ] 489 | 490 | def formatManual : Array Content := 491 | toplevel #[ 492 | sentence1 e.authorHtml, 493 | sentence #[mkTag "i" <$> e.tags["title"]?], 494 | sentence1 <$> formatEdition e, 495 | sentence #[ 496 | mkStr <$> e.tags["organization"]?, 497 | mkStr <$> e.tags["address"]?, 498 | e.dateHtml 499 | ], 500 | sentence' <$> e.tags["note"]?, 501 | formatWebRefs e 502 | ] 503 | 504 | def formatMasterThesis : Array Content := 505 | toplevel #[ 506 | sentence1 e.authorHtml, 507 | sentence' <$> e.tags["title"]?, 508 | sentence #[ 509 | .some #[.Character "Master's thesis"], 510 | mkStr <$> e.tags["school"]?, 511 | mkStr <$> e.tags["address"]?, 512 | e.dateHtml 513 | ], 514 | sentence' <$> e.tags["note"]?, 515 | formatWebRefs e 516 | ] 517 | 518 | def formatMisc : Array Content := 519 | toplevel #[ 520 | sentence1 e.authorHtml, 521 | sentence' <$> e.tags["title"]?, 522 | sentence #[ 523 | mkStr <$> e.tags["howpublished"]?, 524 | e.dateHtml 525 | ], 526 | sentence' <$> e.tags["note"]?, 527 | formatWebRefs e 528 | ] 529 | 530 | def formatPhDThesis : Array Content := 531 | toplevel #[ 532 | sentence1 e.authorHtml, 533 | sentence #[mkTag "i" <$> e.tags["title"]?], 534 | sentence #[ 535 | (mkStr <$> e.tags["type"]?) <|> .some #[.Character "PhD thesis"], 536 | mkStr <$> e.tags["school"]?, 537 | mkStr <$> e.tags["address"]?, 538 | e.dateHtml 539 | ], 540 | sentence' <$> e.tags["note"]?, 541 | formatWebRefs e 542 | ] 543 | 544 | def formatProceedings : Array Content := 545 | let arr : Array (Option (Array Content)) := 546 | if e.editorHtml.isEmpty then 547 | #[ 548 | sentence' <$> e.tags["organization"]?, 549 | sentence #[ 550 | mkTag "i" <$> e.tags["title"]?, 551 | formatVolumeAndSeries e false, 552 | formatAddressOrganizationPublisherDate e false 553 | ] 554 | ] 555 | else 556 | #[ 557 | sentence1 e.editorHtml, 558 | sentence #[ 559 | mkTag "i" <$> e.tags["title"]?, 560 | formatVolumeAndSeries e false, 561 | formatAddressOrganizationPublisherDate e true 562 | ] 563 | ] 564 | toplevel <| arr ++ #[ 565 | sentence' <$> e.tags["note"]?, 566 | formatWebRefs e 567 | ] 568 | 569 | def formatTechReport : Array Content := 570 | toplevel #[ 571 | sentence1 e.authorHtml, 572 | sentence' <$> e.tags["title"]?, 573 | sentence #[ 574 | words #[ 575 | (mkStr <$> e.tags["type"]?) <|> .some #[.Character "Technical Report"], 576 | mkStr <$> e.tags["number"]? 577 | ], 578 | mkStr <$> e.tags["institution"]?, 579 | mkStr <$> e.tags["address"]?, 580 | e.dateHtml 581 | ], 582 | sentence' <$> e.tags["note"]?, 583 | formatWebRefs e 584 | ] 585 | 586 | def formatUnpublished : Array Content := 587 | toplevel #[ 588 | sentence1 e.authorHtml, 589 | sentence' <$> e.tags["title"]?, 590 | sentence #[ 591 | mkStr <$> e.tags["note"]?, 592 | e.dateHtml 593 | ], 594 | formatWebRefs e 595 | ] 596 | 597 | def format : Array Content := 598 | match e.category with 599 | | "article" => formatArticle e 600 | | "book" => formatBook e 601 | | "booklet" => formatBooklet e 602 | | "inbook" => formatInBook e 603 | | "incollection" => formatInCollection e 604 | | "inproceedings" | "conference" => formatInProceedings e 605 | | "manual" => formatManual e 606 | | "mastersthesis" => formatMasterThesis e 607 | | "misc" | "online" => formatMisc e 608 | | "phdthesis" => formatPhDThesis e 609 | | "proceedings" => formatProceedings e 610 | | "techreport" => formatTechReport e 611 | | "unpublished" => formatUnpublished e 612 | | _ => #[.Character ("TODO: " ++ e.category)] 613 | 614 | end Formatter 615 | 616 | /-- Fill the `html` field of `ProcessedEntry`. -/ 617 | def ProcessedEntry.format : ProcessedEntry := 618 | { e with html := Formatter.format e } 619 | 620 | end 621 | 622 | /-! 623 | 624 | ## Sorting bibtex items 625 | 626 | -/ 627 | 628 | def compareAuthor (a b : BibtexName) : Ordering := 629 | if a.lastNameWithoutDiacritics < b.lastNameWithoutDiacritics then 630 | .lt 631 | else if a.lastNameWithoutDiacritics > b.lastNameWithoutDiacritics then 632 | .gt 633 | else if a.firstNameWithoutDiacritics < b.firstNameWithoutDiacritics then 634 | .lt 635 | else if a.firstNameWithoutDiacritics > b.firstNameWithoutDiacritics then 636 | .gt 637 | else 638 | .eq 639 | 640 | partial def compareAuthors (a b : Array BibtexName) (i : Nat := 0) : Ordering := 641 | if ha : i < a.size then 642 | if hb : i < b.size then 643 | match compareAuthor a[i] b[i] with 644 | | .lt => .lt | .gt => .gt 645 | | .eq => compareAuthors a b (i + 1) 646 | else 647 | .gt 648 | else 649 | if i < b.size then .lt else .eq 650 | 651 | def compareEntry (a b : ProcessedEntry) : Ordering := 652 | match compareAuthors a.authorOrEditor b.authorOrEditor with 653 | | .lt => .lt | .gt => .gt 654 | | .eq => 655 | if a.date < b.date then 656 | .lt 657 | else if a.date > b.date then 658 | .gt 659 | else if a.titleWithoutDiacritics < b.titleWithoutDiacritics then 660 | .lt 661 | else if a.titleWithoutDiacritics > b.titleWithoutDiacritics then 662 | .gt 663 | else 664 | .eq 665 | 666 | /-- Sort an array of bibtex entries. -/ 667 | def sortEntry (arr : Array ProcessedEntry) : Array ProcessedEntry := 668 | arr.qsort (compareEntry · · |>.isLT) 669 | 670 | partial def toBase26Aux (n : Nat) (length : Nat) (s : String) : String := 671 | if length = 0 then 672 | s 673 | else 674 | toBase26Aux (n / 26) (length - 1) <| toString (Char.ofNat (97 + n % 26)) ++ s 675 | 676 | partial def toBase26 (n : Nat) (length : Nat := 1) : String := 677 | if n ≥ 26 ^ length then 678 | toBase26 (n - 26 ^ length) (length + 1) 679 | else 680 | toBase26Aux n length "" 681 | 682 | partial def deduplicateTagAux 683 | (x : Array String × Std.HashMap String (Nat × Nat)) (i : Nat) : 684 | Array String × Std.HashMap String (Nat × Nat) := 685 | if h : i < x.1.size then 686 | let tag := x.1[i] 687 | let y : Array String × Std.HashMap String (Nat × Nat) := 688 | if let .some (first, count) := x.2[tag]? then 689 | let z : Array String := 690 | if count = 0 then 691 | x.1.modify first fun x => x.dropRight 1 ++ "a]" 692 | else 693 | x.1 694 | let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]" 695 | (z, x.2.insert tag (first, count + 1)) 696 | else 697 | (x.1, x.2.insert tag (i, 0)) 698 | deduplicateTagAux y (i + 1) 699 | else 700 | x 701 | 702 | /-- Deduplicate an array of tag. -/ 703 | def deduplicateTag' (arr : Array String) : Array String := 704 | (deduplicateTagAux (arr, ∅) 0).1 705 | 706 | /-- Deduplicate the tag of an array of bibtex entries. -/ 707 | def deduplicateTag (arr : Array ProcessedEntry) : Array ProcessedEntry := 708 | Array.zip arr (deduplicateTag' <| arr.map fun x => x.tag) |>.map fun (x, s) => 709 | { x with tag := s } 710 | 711 | end BibtexQuery 712 | -------------------------------------------------------------------------------- /BibtexQuery/Main.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | import BibtexQuery.Parser 8 | import BibtexQuery.String 9 | import BibtexQuery.Query 10 | import BibtexQuery.Format 11 | 12 | /-! 13 | # BibtexQuery: a simple command-line bibtex query utility 14 | 15 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string 16 | of the form "t.querystring", where `t` is either `a` for author, `t` for title or `k` for key, and `querystring` 17 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the 18 | queries given a command-line parameters. Note that the entries are processed in such a way that strips diacritics, 19 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to 20 | firstnamelastname. Hence, for example, "Dupuis, Frédéric" will match the query `a.ericdup`. 21 | -/ 22 | 23 | open Lean BibtexQuery 24 | 25 | def listDoublons (parseRes : List BibtexQuery.Entry) : List String := 26 | let keysOnly := parseRes.filterMap (fun entry => match entry with 27 | | BibtexQuery.Entry.normalType _ name _ => some name 28 | | _ => none) 29 | let ⟨_, dupl⟩ : (Std.HashMap String Unit) × List String := 30 | keysOnly.foldl (init := ⟨∅, []⟩) 31 | (fun ⟨hsh, lst⟩ key => 32 | match hsh[key]? with 33 | | none => ⟨hsh.insert key (), lst⟩ 34 | | some _ => ⟨hsh, (key :: lst)⟩) 35 | dupl 36 | 37 | def printHelp := IO.println 38 | " 39 | bibtex-query - command-line bibtex file processor 40 | 41 | Usage: bibtex-query command filename [args] 42 | 43 | Commands: 44 | h: print this help message 45 | d: check for duplicate entries 46 | l: sort all entries and list them in plaintext form 47 | q: print entries that match the given query 48 | c: print citations of entries that match the given query 49 | 50 | Queries have the form «t.query» (without the quotes) with t being the type of query, 51 | and «query» being the content. The entries printed out are those that match all the queries. 52 | 53 | Types of queries: 54 | k: key (ex: k.d14) 55 | c: class (ex: article, book) 56 | a: author (ex: a.dupuis) 57 | t: title (ex: t.channelcapacity) 58 | w: keywords (ex: w.quantum) 59 | " 60 | 61 | mutual 62 | 63 | partial def eToPlaintext : Xml.Element → String 64 | | .Element _ _ c => s!"{c.map cToPlaintext |>.foldl (· ++ ·) ""}" 65 | 66 | partial def cToPlaintext : Xml.Content → String 67 | | .Element e => eToPlaintext e 68 | | .Comment _ => "" 69 | | .Character c => c 70 | 71 | end 72 | 73 | def printEntry (e : ProcessedEntry) : IO Unit := 74 | let s := e.html.map cToPlaintext |>.toList |> String.join 75 | IO.println (e.tag ++ "{" ++ e.name ++ "} " ++ s ++ "\n") 76 | 77 | def printEntries (ents : List Entry) : IO Unit := 78 | match ents.toArray.mapM ProcessedEntry.ofEntry with 79 | | .ok arr => 80 | discard (arr.filterMap id |> sortEntry |> deduplicateTag |>.map ProcessedEntry.format 81 | |>.mapM printEntry) 82 | | .error err => throw <| IO.userError err 83 | 84 | def printMatchingEntries (ents : List Entry) (qs : List Query) : IO Unit := do 85 | printEntries <| ents.filter (·.matchQueries qs) 86 | 87 | def printMatchingCitations (ents : List Entry) (qs : List Query) : IO Unit := do 88 | for e in ents do 89 | if e.matchQueries qs then IO.println e.toCitation 90 | 91 | def main : List String → IO Unit 92 | | ["h"] => printHelp 93 | | ["-h"] => printHelp 94 | | ["--help"] => printHelp 95 | | ["h", _] => printHelp 96 | | ["-h", _] => printHelp 97 | | ["--help", _] => printHelp 98 | | ["d", fname] => do 99 | IO.println s!"Reading {fname} to find doubled keys" 100 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter 101 | match parsed with 102 | | .success _pos res => 103 | let lst := listDoublons res 104 | IO.println lst 105 | | .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}" 106 | | ["l", fname] => do 107 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter 108 | match parsed with 109 | | .success _pos res => printEntries res 110 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" 111 | | "q" :: (fname :: queries) => do 112 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter 113 | match parsed with 114 | | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString 115 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" 116 | | "c" :: (fname :: queries) => do 117 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter 118 | match parsed with 119 | | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString 120 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" 121 | | _ => do IO.eprintln "Invalid command-line arguments"; printHelp 122 | -------------------------------------------------------------------------------- /BibtexQuery/Name.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2024 Jz Pan. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Jz Pan 5 | -/ 6 | 7 | import BibtexQuery.TexDiacritics 8 | import UnicodeBasic 9 | import Std.Internal.Parsec 10 | import Std.Internal.Parsec.String 11 | 12 | 13 | /-! 14 | 15 | # Bibtex name processing 16 | 17 | This file contains functions for bibtex name processing. 18 | 19 | The main function is `processNames` which inputs an array of `TexContent`, 20 | return an array of `BibtexName`. 21 | 22 | -/ 23 | 24 | open Lean Unicode BibtexQuery.TexDiacritics 25 | 26 | namespace BibtexQuery.Name 27 | 28 | /-- Input an array of `TexContent`, split them by " " and ",". -/ 29 | def getNameAux (arr : Array TexContent) : Array (Array TexContent) := 30 | arr.foldl (fun acc s => 31 | match s with 32 | | .char ' ' => 33 | acc.push #[] 34 | | .char ',' => 35 | acc.push #[s] |>.push #[] 36 | | _ => 37 | acc.modify (acc.size - 1) (Array.push · s)) #[#[]] |>.filter (not ·.isEmpty) 38 | 39 | /-- Input a name string already split by " " and ",", return `(Firstname, Lastname)`. 40 | The braces in the name are preserved. The logic is: 41 | 42 | 1. If there is a "," in the array, then the items before the first "," are the last name, 43 | and the items after the first "," are the first name. 44 | 2. Otherwise, if the last item begins with "{" and ends with "}", then it is the last name, 45 | the remaining items are the first name. 46 | 3. Otherwise, if there is an item that begins with a lowercase letter, then the items before 47 | the first of such item are the first name, the remaining items are the last name. 48 | 4. Otherwise, the last item is the last name, the remaining items are the first name. 49 | -/ 50 | def getName (arr : Array (Array TexContent)) : 51 | Array (Array TexContent) × Array (Array TexContent) := 52 | match arr.findIdx? (fun 53 | | #[.char ','] => true 54 | | _ => false) with 55 | | .some n => 56 | (arr.toSubarray.drop (n + 1) |>.toArray, arr.toSubarray.take n |>.toArray) 57 | | .none => 58 | let i := match arr.back? with 59 | | .some #[.braced _] => arr.size - 1 60 | | _ => arr.findIdx? (fun s => s.findSome? TexContent.getFirstChar 61 | |>.getD ' ' |> isLowercase) |>.getD (arr.size - 1) 62 | (arr.toSubarray.take i |>.toArray, arr.toSubarray.drop i |>.toArray) 63 | 64 | /-- Input an array of `TexContent`, return an array of `(Firstname, Lastname)`. 65 | The braces in the name are preserevd. -/ 66 | def getNames (arr : Array TexContent) : 67 | Array (Array (Array TexContent) × Array (Array TexContent)) := 68 | let arr := getNameAux arr 69 | let arr2 : Array (Array (Array TexContent)) := arr.foldl (fun acc s => 70 | match s with 71 | | #[.normal "and"] => acc.push #[] 72 | | _ => acc.modify (acc.size - 1) (Array.push · s)) #[#[]] 73 | arr2.filterMap fun x => 74 | let ret := getName x 75 | if ret.1.isEmpty && ret.2.isEmpty then .none else .some ret 76 | 77 | /-- Strip diacritics from a character. -/ 78 | def stripDiacritics (c : Char) : Char := 79 | match c with 80 | | 'œ' => 'o' | 'Œ' => 'O' 81 | | 'æ' => 'a' | 'Æ' => 'A' 82 | | 'å' => 'a' | 'Å' => 'A' 83 | | 'ø' => 'o' | 'Ø' => 'O' 84 | | 'ł' => 'l' | 'Ł' => 'L' 85 | | 'ı' => 'i' 86 | | 'ȷ' => 'j' 87 | | '\u00DF' => 's' | '\u1E9E' => 'S' 88 | | _ => 89 | let s := getCanonicalDecomposition c 90 | s.get? (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c 91 | 92 | /-- Strip diacritics from a string. -/ 93 | def stripDiacriticsFromString (s : String) : String := 94 | s.toList.toArray.map stripDiacritics |>.filter (not <| GeneralCategory.isMark ·) 95 | |>.toList |> String.mk 96 | 97 | /-- Get the array of alphabets of a string after stripping diacritics. -/ 98 | def getAlphabets (s : String) : Array Char := 99 | s.toList.toArray.map stripDiacritics |>.filter isAlphabetic 100 | 101 | /-- Check if a string is an upper case Roman numerals. 102 | It does not check the validity of the number, for example, it accepts `IXIX`. -/ 103 | def isUppercaseRomanNumerals (s : String) : Bool := 104 | not s.isEmpty && s.all fun c => 105 | match c with 106 | | 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M' => true 107 | | _ => false 108 | 109 | /-- Input a last name string without TeX commands, braces 110 | and math equations, already split by spaces and comma, 111 | return `(oneLetterAbbr, threeLetterAbbr)` of the last name. 112 | Note that they are not necessarily of one-letter and 113 | three-letter; they are if the last name contains no spaces and with only one uppercase letter. 114 | The logic is: 115 | 116 | First, if there are more than one items, remove all items which are upper case Roman numerals. 117 | For example, this removes "III" in "Gerth III". 118 | 119 | 1. If the number of items is not exactly one, then both of the abbreviations are the concatenation 120 | of the first alphabets of each item. 121 | 2. Otherwise, if there are exactly two uppercase alphabets and there is exactly one alphabet 122 | between them, then both of the abbreviations are these three alphabets. 123 | For example, `McCrimmon => McC`. 124 | 3. Otherwise, if there are at least two uppercase alphabets, then both of the abbreviations are 125 | the concatenation of uppercase alphabets. 126 | For example, `Heath-Brown => HB`. 127 | 4. Otherwise, the abbreviations are the first one and three alphabets of the last name, 128 | respectively. 129 | -/ 130 | def getLastNameAbbr (arr : Array String) : String × String := 131 | let arr := if arr.size ≤ 1 then arr else arr.filter (not <| isUppercaseRomanNumerals ·) 132 | match arr with 133 | | #[] => ("", "") 134 | | #[s] => 135 | let s := getAlphabets s 136 | let arr : Array Nat := s.zipIdx.filterMap fun x => 137 | if isUppercase x.1 then .some x.2 else .none 138 | if arr.size = 2 ∧ arr[0]! + 2 = arr[1]! then 139 | let s := s.toSubarray.drop arr[0]! |>.take 3 |>.toArray.toList |> String.mk 140 | (s, s) 141 | else if arr.size ≥ 2 then 142 | let s := arr.map (s[·]!) |>.toList |> String.mk 143 | (s, s) 144 | else 145 | let s := String.mk s.toList 146 | (s.take 1, s.take 3) 147 | | _ => 148 | let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.mk 149 | (s, s) 150 | 151 | /-- Represents the name of a person in bibtex author field. -/ 152 | structure BibtexName where 153 | /-- The first name. -/ 154 | firstName : Array (Array TexContent) 155 | /-- The last name. -/ 156 | lastName : Array (Array TexContent) 157 | /-- The first name without TeX commands, braces and diacritics, 158 | all letters converted to uppercase. -/ 159 | firstNameWithoutDiacritics : String 160 | /-- The last name without TeX commands, braces and diacritics, 161 | all letters converted to uppercase. -/ 162 | lastNameWithoutDiacritics : String 163 | /-- The one-letter abbreviation of the last name, which is used if a bibitem has at least 164 | two authors. Note that this is not necessarily of one-letter; 165 | it is if the last name contains no spaces and with only one uppercase letter. -/ 166 | oneLetterAbbr : String 167 | /-- The three-letter abbreviation of the last name, which is used if a bibitem has exactly 168 | one author. Note that this is not necessarily of three-letter; 169 | it is if the last name contains no spaces and with only one uppercase letter. -/ 170 | threeLetterAbbr : String 171 | deriving Repr 172 | 173 | namespace BibtexName 174 | 175 | /-- Convert a `BibtexName` to `Firstname Lastname` with TeX commands and braces. 176 | This is not necessarily identical to the original input. -/ 177 | def toString (x : BibtexName) : String := 178 | let g (arr : Array (Array TexContent)) : String := 179 | " ".intercalate (arr.map TexContent.toStringArray |>.toList) 180 | g <| x.firstName ++ x.lastName 181 | 182 | def toStringArray (arr : Array BibtexName) : String := 183 | " and ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList) 184 | 185 | /-- Convert a `BibtexName` to `Firstname Lastname` without TeX commands and braces. -/ 186 | def toPlaintext (x : BibtexName) : String := 187 | let g (arr : Array (Array TexContent)) : String := 188 | " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList) 189 | g <| x.firstName ++ x.lastName 190 | 191 | def toPlaintextArray (arr : Array BibtexName) : String := 192 | ", ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList) 193 | 194 | /-- Convert a `BibtexName` to `Firstname Lastname` of HTML form. -/ 195 | def toHtml (x : BibtexName) : Array Xml.Content := 196 | let g (arr : Array (Array TexContent)) : Array Xml.Content := 197 | arr.foldl (fun acc s => 198 | let t := TexContent.toHtmlArray s 199 | acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character " "]) ++ t) #[] 200 | g <| x.firstName ++ x.lastName 201 | 202 | def toHtmlArray (arr : Array BibtexName) : Array Xml.Content := 203 | arr.foldl (fun acc s => 204 | let t := s.toHtml 205 | acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character ", "]) ++ t) #[] 206 | 207 | end BibtexName 208 | 209 | /-- Process the first name and last name, produce abbreviations of the last name. -/ 210 | def processName (s : Array (Array TexContent) × Array (Array TexContent)) : BibtexName := 211 | let g (arr : Array (Array TexContent)) : String := 212 | " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList) 213 | let abbr := getLastNameAbbr <| s.2.map TexContent.toPlaintextArray 214 | { 215 | firstName := s.1 216 | lastName := s.2 217 | firstNameWithoutDiacritics := stripDiacriticsFromString (g s.1) |>.map getUpperChar 218 | lastNameWithoutDiacritics := stripDiacriticsFromString (g s.2) |>.map getUpperChar 219 | oneLetterAbbr := abbr.1 220 | threeLetterAbbr := abbr.2 221 | } 222 | 223 | /-- Input an array of `TexContent`, return an array of `BibtexName`. -/ 224 | def processNames (arr: Array TexContent) : Array BibtexName := 225 | arr |> getNames |>.map processName 226 | 227 | end BibtexQuery.Name 228 | -------------------------------------------------------------------------------- /BibtexQuery/ParsecExtra.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | import Std.Internal.Parsec 8 | import Std.Internal.Parsec.Basic 9 | import Std.Internal.Parsec.String 10 | 11 | 12 | 13 | /-! 14 | # Extra Parsec material 15 | 16 | This file contains additional material for the Parsec library, some 17 | of which is modelled after its Haskell counterpart. 18 | -/ 19 | 20 | 21 | open Lean Std.Internal.Parsec.String Std.Internal.Parsec 22 | 23 | namespace BibtexQuery.ParsecExtra 24 | 25 | def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α := 26 | match p s.iter with 27 | | .success _ x => some x 28 | | .error _ _ => none 29 | 30 | def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) := 31 | match p s.iter with 32 | | .success pos x => some ⟨x, pos.i⟩ 33 | | .error _ _ => none 34 | 35 | @[inline] 36 | def noneOf (bad : String) : Parser Char := satisfy (fun z => ¬bad.contains z) 37 | 38 | @[inline] 39 | def noneOfStr (bad : String) : Parser String := manyChars (noneOf bad) 40 | 41 | @[inline] 42 | def eol : ByteArray.Parser String := 43 | ByteArray.pstring "\n\r" <|> ByteArray.pstring "\r\n" <|> ByteArray.pstring "\n" 44 | 45 | @[inline] 46 | def maybeSkip (p : Parser α) : Parser Unit := (attempt (p *> pure ())) <|> pure () 47 | 48 | @[inline] 49 | partial def manyCore' (p : Parser α) (acc : List α) : Parser (List α) := 50 | (do manyCore' p (acc ++ [← p])) <|> pure acc 51 | 52 | @[inline] 53 | def many' (p : Parser α) : Parser (List α) := manyCore' p [] 54 | 55 | @[inline] 56 | partial def manyStrCore (p : Parser String) (acc : String) : Parser String := 57 | (do manyStrCore p (acc ++ (← p))) <|> pure acc 58 | 59 | @[inline] 60 | def manyStr (p : Parser String) : Parser String := manyStrCore p "" 61 | 62 | @[inline] 63 | partial def sepByCore (pcont : Parser α) (psep : Parser β) (acc : List α) : 64 | Parser (List α) := 65 | attempt (do let _ ← psep; sepByCore pcont psep (acc ++ [← pcont])) <|> pure acc 66 | 67 | @[inline] 68 | def sepBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := 69 | (do sepByCore pcont psep [← pcont]) <|> pure [] 70 | 71 | @[inline] 72 | def sepOrEndBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := 73 | (do let output ← sepByCore pcont psep [← pcont]; maybeSkip psep; return output) <|> pure [] 74 | 75 | @[inline] 76 | partial def endByCore (pcont : Parser α) (psep : Parser β) (acc : List α) : 77 | Parser (List α) := 78 | attempt (do let x ← pcont; let _ ← psep; endByCore pcont psep (acc ++ [x])) <|> pure acc 79 | 80 | @[inline] 81 | def endBy (pcont : Parser α) (psep : Parser β) : Parser (List α) := 82 | (do endByCore pcont psep []) <|> pure [] 83 | 84 | @[inline] 85 | def alphaNum : Parser Char := attempt do 86 | let c ← any 87 | if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') ∨ ('0' ≤ c ∧ c ≤ '9') then 88 | return c 89 | else fail s!"ASCII alphanumeric character expected" 90 | 91 | @[inline] 92 | def asciiLetterToLower : Parser Char := return (← asciiLetter).toLower 93 | 94 | @[inline] 95 | def alphaNumToLower : Parser Char := return (← alphaNum).toLower 96 | 97 | @[inline] 98 | def asciiWordToLower : Parser String := manyChars asciiLetterToLower 99 | 100 | @[inline] 101 | def between (op : Parser α) (cl : Parser α) (mid : Parser β) : Parser β := attempt do 102 | let _ ← op 103 | let s ← mid 104 | let _ ← cl 105 | return s 106 | 107 | @[inline] 108 | def natNum : Parser Nat := attempt do 109 | let some n := (← manyChars digit).toNat? | fail "Not a natural number" 110 | return n 111 | 112 | def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it => 113 | let out := 114 | it.foldUntil "" fun acc c => 115 | let prev : Option Char := if acc == "" then none else acc.back 116 | if test prev c then none else some (acc ++ c.toString) 117 | .success out.2 out.1 118 | 119 | end BibtexQuery.ParsecExtra 120 | -------------------------------------------------------------------------------- /BibtexQuery/Parser.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | import BibtexQuery.ParsecExtra 8 | import BibtexQuery.Entry 9 | import Std.Internal.Parsec 10 | import Std.Internal.Parsec.String 11 | 12 | /-! 13 | # Bibtex Parser 14 | 15 | This file contains a parser for the Bibtex format. Note that currently, only a subset of the official 16 | Bibtex format is supported; features such as predefined strings and concatenation using `#` are not 17 | supported. 18 | -/ 19 | 20 | open Lean Std.Internal.Parsec Std.Internal.Parsec.String BibtexQuery.ParsecExtra 21 | 22 | namespace BibtexQuery.Parser 23 | 24 | /-- The name of the bibtex entry (i.e. what goes in the cite command). -/ 25 | def name : Parser String := attempt do 26 | let firstChar ← asciiLetter 27 | let remainder ← manyChars <| (alphaNum <|> pchar ':' <|> pchar '-' <|> pchar '_') 28 | return firstChar.toString ++ remainder 29 | 30 | /-- "article", "book", etc -/ 31 | def category : Parser String := attempt do skipChar '@'; asciiWordToLower 32 | 33 | partial def bracedContentTail (acc : String) : Parser String := attempt do 34 | let c ← any 35 | if c = '{' then 36 | let s ← bracedContentTail "" 37 | bracedContentTail (acc ++ "{" ++ s) 38 | else 39 | if c = '}' then return acc ++ "}" 40 | else 41 | bracedContentTail (acc ++ c.toString) 42 | 43 | def bracedContent : Parser String := attempt do 44 | skipChar '{' 45 | let s ← bracedContentTail "" 46 | return s.dropRight 1 47 | 48 | def quotedContent : Parser String := attempt do 49 | skipChar '"' 50 | let s ← manyCharsUntilWithPrev fun | (some '\\'), '"' => false | _, '"' => true | _, _ => false 51 | skipChar '"' 52 | return (s.replace "\n" "").replace "\r" "" 53 | 54 | def month : Parser String := attempt do 55 | let s ← asciiWordToLower 56 | match s with 57 | | "jan" => return s 58 | | "feb" => return s 59 | | "mar" => return s 60 | | "apr" => return s 61 | | "may" => return s 62 | | "jun" => return s 63 | | "jul" => return s 64 | | "aug" => return s 65 | | "sep" => return s 66 | | "oct" => return s 67 | | "nov" => return s 68 | | "dec" => return s 69 | | _ => fail "Not a valid month" 70 | 71 | /-- The content field of a tag. -/ 72 | def tagContent : Parser String := attempt do 73 | let c ← peek! 74 | if c.isDigit then manyChars digit else 75 | if c.isAlpha then month else 76 | match c with 77 | | '"' => quotedContent 78 | | '{' => bracedContent 79 | | _ => fail "Tag content expected" 80 | 81 | /-- i.e. journal = {Journal of Musical Deontology} -/ 82 | def tag : Parser Tag := attempt do 83 | let tagName ← manyChars (alphaNumToLower <|> pchar '_' <|> pchar '-') 84 | ws; skipChar '='; ws 85 | let tagContent ← tagContent 86 | return { name := tagName, content := tagContent } 87 | 88 | def outsideEntry : Parser Unit := attempt do 89 | let _ ← manyChars <| noneOf "@" 90 | 91 | /-- A Bibtex entry. TODO deal with "preamble" etc. -/ 92 | def entry : Parser Entry := attempt do 93 | outsideEntry 94 | let typeOfEntry ← category 95 | ws; skipChar '{'; ws 96 | let nom ← name 97 | skipChar ','; ws 98 | let t : List Tag ← sepOrEndBy tag (do ws; skipChar ','; ws) 99 | ws; skipChar '}'; ws 100 | return Entry.normalType typeOfEntry nom t 101 | 102 | def bibtexFile : Parser (List Entry) := many' entry 103 | 104 | --#eval "auTHOr23:z ".parseDebug name 105 | --#eval "auTHOr23:z".parseDebug name 106 | --#eval "@ARTICLE ".parseDebug category 107 | --#eval "@ARtiCLE".parseDebug category 108 | --#eval "auTHOr = \n{Dès Noël où un zéphyr haï\n me vêt de glaçons würmiens, je dîne d'exquis rôtis de bœuf au kir à l'aÿ d'âge mûr}".parseDebug tag 109 | --#eval "auTHOr = \"Test\"".parseDebug tag 110 | --#eval "journal = {Journal of Musical\n Deontology}".parseDebug tag 111 | --#eval "year = 2022".parseDebug tag 112 | --#eval "Bdsk-Url-1 = {https://doi.org/10.1007/s00220-020-03839-5}".parseDebug tag 113 | --#eval "year = 2022,\n author = {Frédéric Dupuis},".parseDebug (sepOrEndBy tag (do ws; skipChar ','; ws)) 114 | --#eval "@article{bla23,\n year = 2022,\n author = {Frédéric Dupuis}\n}\n".parseDebug entry 115 | --#eval "\"Bachem, Achim and Korte, Bernhard and Gr{\\\"o}tschel\"".parseDebug quotedContent 116 | --#eval "@article{bla23,\n year = 2022,\n author = \"Bachem, Achim and Korte, Bernhard and Gr{\"o}tschel\"\n}\n".parseDebug entry 117 | 118 | end BibtexQuery.Parser 119 | -------------------------------------------------------------------------------- /BibtexQuery/Query.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | import BibtexQuery.String 8 | import BibtexQuery.Entry 9 | 10 | /-! 11 | # Bibtex Queries 12 | 13 | This file deals with bibtex queries of the limited form handled by the program. 14 | -/ 15 | 16 | namespace BibtexQuery 17 | 18 | inductive Query where 19 | | key (s : String) 20 | | author (s : String) 21 | | title (s : String) 22 | | word (s : String) 23 | | class (s : String) 24 | deriving Repr, Inhabited 25 | 26 | def Query.ofString (s : String) : Option Query := 27 | if s.startsWith "k." then some <| .key <| s.drop 2 28 | else if s.startsWith "a." then some <| .author <| s.drop 2 29 | else if s.startsWith "t." then some <| .title <| s.drop 2 30 | else if s.startsWith "w." then some <| .word <| s.drop 2 31 | else if s.startsWith "c." then some <| .class <| s.drop 2 32 | else none 33 | 34 | def Entry.matchQuery (e : Entry) (q : Query) : Bool := 35 | match q with 36 | | .key s => e.getKey.flattenWords.containsSubstr s 37 | | .author s => e.getAuthors.toFullNames.containsSubstr s 38 | | .title s => e.getTitle.flattenWords.containsSubstr s 39 | | .word s => e.getKeywords.flattenWords.containsSubstr s 40 | | .class s => e.getClass.flattenWords.containsSubstr s 41 | 42 | def Entry.matchQueries (e : Entry) (lq : List Query) : Bool := 43 | lq.foldl (fun acc q => acc && e.matchQuery q) true 44 | 45 | end BibtexQuery 46 | -------------------------------------------------------------------------------- /BibtexQuery/String.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Frédéric Dupuis 5 | -/ 6 | 7 | /-! 8 | # String processing 9 | 10 | This file contains various string processing functions. 11 | -/ 12 | 13 | /-- Get the line number of the current position of the iterator. -/ 14 | def String.Iterator.lineNumber (it : String.Iterator) : Nat := 15 | let s : Substring := ⟨it.toString, 0, it.pos⟩ 16 | s.foldl (fun n c => if c = '\n' then n+1 else n) 1 17 | 18 | /-- Strip diacritics from a string. -/ 19 | def Char.asciify : Char → Char 20 | | 'á' => 'a' 21 | | 'à' => 'a' 22 | | 'â' => 'a' 23 | | 'ä' => 'a' 24 | | 'æ' => 'a' 25 | | 'ǎ' => 'a' 26 | | 'ã' => 'a' 27 | | 'å' => 'a' 28 | | 'ą' => 'a' 29 | | 'Á' => 'A' 30 | | 'À' => 'A' 31 | | 'Â' => 'A' 32 | | 'Ä' => 'A' 33 | | 'Æ' => 'A' 34 | | 'Ǎ' => 'A' 35 | | 'Ã' => 'A' 36 | | 'Å' => 'A' 37 | | 'Ą' => 'A' 38 | | 'č' => 'c' 39 | | 'ç' => 'c' 40 | | 'ć' => 'c' 41 | | 'Č' => 'C' 42 | | 'Ç' => 'C' 43 | | 'Ć' => 'C' 44 | | 'ď' => 'd' 45 | | 'Ď' => 'D' 46 | | 'é' => 'e' 47 | | 'è' => 'e' 48 | | 'ê' => 'e' 49 | | 'ë' => 'e' 50 | | 'ę' => 'e' 51 | | 'ě' => 'e' 52 | | 'É' => 'E' 53 | | 'È' => 'E' 54 | | 'Ê' => 'E' 55 | | 'Ë' => 'E' 56 | | 'Ę' => 'E' 57 | | 'ğ' => 'g' 58 | | 'Ğ' => 'G' 59 | | 'í' => 'i' 60 | | 'ì' => 'i' 61 | | 'î' => 'i' 62 | | 'ï' => 'i' 63 | | 'ı' => 'i' 64 | | 'Í' => 'I' 65 | | 'Ì' => 'I' 66 | | 'Î' => 'I' 67 | | 'Ï' => 'I' 68 | | 'İ' => 'I' 69 | | 'ł' => 'l' 70 | | 'ľ' => 'l' 71 | | 'Ł' => 'L' 72 | | 'Ľ' => 'L' 73 | | 'ñ' => 'n' 74 | | 'ň' => 'n' 75 | | 'ń' => 'n' 76 | | 'Ñ' => 'N' 77 | | 'Ň' => 'N' 78 | | 'Ń' => 'N' 79 | | 'ó' => 'o' 80 | | 'ò' => 'o' 81 | | 'ô' => 'o' 82 | | 'ö' => 'o' 83 | | 'õ' => 'o' 84 | | 'ø' => 'o' 85 | | 'œ' => 'o' 86 | | 'Ó' => 'O' 87 | | 'Ò' => 'O' 88 | | 'Ô' => 'O' 89 | | 'Ö' => 'O' 90 | | 'Õ' => 'O' 91 | | 'Ø' => 'O' 92 | | 'Œ' => 'O' 93 | | 'ř' => 'r' 94 | | 'Ř' => 'R' 95 | | 'š' => 's' 96 | | 'ś' => 's' 97 | | 'ş' => 's' 98 | | 'Š' => 'S' 99 | | 'Ś' => 'S' 100 | | 'Ş' => 'S' 101 | | 'ť' => 't' 102 | | 'Ť' => 'T' 103 | | 'ú' => 'u' 104 | | 'ù' => 'u' 105 | | 'û' => 'u' 106 | | 'ü' => 'u' 107 | | 'ů' => 'u' 108 | | 'Ú' => 'U' 109 | | 'Ù' => 'U' 110 | | 'Û' => 'U' 111 | | 'Ü' => 'U' 112 | | 'Ů' => 'U' 113 | | 'ý' => 'y' 114 | | 'ÿ' => 'y' 115 | | 'Ý' => 'Y' 116 | | 'Ÿ' => 'Y' 117 | | 'ž' => 'z' 118 | | 'Ž' => 'Z' 119 | | 'ß' => 's' 120 | | c => c 121 | 122 | def String.asciify (s : String) : String := s.map Char.asciify 123 | 124 | --#eval "Dès Noël où un zéphyr haï me vêt de glaçons würmiens, je dîne d'exquis rôtis de 125 | --bœuf au kir à l'aÿ d'âge mûr & cætera".asciify 126 | 127 | --#eval "Testfile aisdfjoai".foldl (fun s c => s ++ "A") "" 128 | --#eval '{'.asciify.toLower 129 | 130 | def String.flattenWords (s : String) : String := s.foldl 131 | (fun s c => s ++ (if c.asciify.toLower.isAlphanum then c.asciify.toLower.toString else "")) "" 132 | 133 | --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords 134 | 135 | def String.splitIntoNames (s : String) : List String := 136 | (s.splitOn (sep := " and ")).map trim 137 | 138 | def String.toLastName (s : String) : String := 139 | let s' := (s.split (fun c => c = ',')).map trim 140 | match s' with 141 | | [s₁] => s₁ 142 | | (s₁ :: _) => s₁ 143 | | _ => "" 144 | 145 | def String.toLastNames (s : String) : String := 146 | String.intercalate " " $ s.splitIntoNames.map String.toLastName 147 | 148 | /-- Standardize to "Firstname Lastname" -/ 149 | def String.toFirstnameLastname (s : String) : String := 150 | let s' := (s.split (fun c => c = ',')).map trim 151 | match s' with 152 | | [s₁] => s₁ 153 | | [s₁, s₂] => s₂ ++ " " ++ s₁ 154 | | _ => "" 155 | 156 | def String.toFullNames (s : String) : String := 157 | String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords 158 | 159 | partial def Substring.containsSubstrStartingAt (s : Substring) (q : String) : Bool := 160 | if s.toString.length = 0 then q.length = 0 161 | else if q.isPrefixOf s.toString then true 162 | else (s.drop 1).containsSubstrStartingAt q 163 | 164 | def String.containsSubstr (s : String) (q : String) : Bool := 165 | s.toSubstring.containsSubstrStartingAt q 166 | 167 | def String.pad (s : String) (c : Char) (n : Nat) : String := 168 | (s ++ ⟨List.replicate n c⟩).take n 169 | -------------------------------------------------------------------------------- /BibtexQuery/TexDiacritics.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright (c) 2024 Jz Pan. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Author: Jz Pan 5 | -/ 6 | 7 | import Std.Internal.Parsec 8 | import Std.Internal.Parsec.String 9 | import Lean.Data.Xml.Basic 10 | import UnicodeBasic 11 | 12 | /-! 13 | 14 | # TeX diacritics 15 | 16 | This file contains functions for TeX diacritics processing. 17 | The main functions are `texContent` and `texContents`, which 18 | will convert all TeX commands for diacritics into UTF-8 characters, 19 | and preserve other TeX commands. 20 | 21 | -/ 22 | 23 | open Lean Xml Std.Internal.Parsec Std.Internal.Parsec.String Unicode 24 | 25 | namespace BibtexQuery.TexDiacritics 26 | 27 | /-- Represents a segment of TeX content of in bibitem. -/ 28 | inductive TexContent 29 | /-- Represents a non-empty normal string. -/ 30 | | normal (s : String) : TexContent 31 | /-- Represents some special characters. When output into HTML, some of them (e.g. `\`, `$`) 32 | will be put into `` to prevent MathJax from recognizing them. 33 | Some of them (e.g. ` `, `,`) have special meaning in bibitem name processing. -/ 34 | | char (c : Char) : TexContent 35 | /-- Represents a TeX command. It always starts with `\`. It may have trailing spaces. -/ 36 | | command (s : String) : TexContent 37 | /-- Represents a math environment. -/ 38 | | math (dollar s : String) : TexContent 39 | /-- Represents contents inside `{ ... }`. -/ 40 | | braced (arr : Array TexContent) : TexContent 41 | deriving Repr 42 | 43 | namespace TexContent 44 | 45 | /-- Add a diacritics character after the first character of the content. -/ 46 | partial def addDiacritics (x : TexContent) (ch : String) : 47 | Except String TexContent := do 48 | match x with 49 | | .normal s => 50 | if s.isEmpty then 51 | throw "expected a non-empty normal string, but got ''" 52 | else if GeneralCategory.isLetter s.front then 53 | return .normal <| s.take 1 ++ ch ++ s.drop 1 54 | else 55 | throw s!"diacritics character can only be added after a letter, but got '{s.front}'" 56 | | .char c => throw s!"expected a non-empty normal string, but got '{c}'" 57 | | .command _ => throw "expected a non-empty normal string, but got a TeX command" 58 | | .math _ _ => throw "expected a non-empty normal string, but got a math environment" 59 | | .braced arr => 60 | if h : 0 < arr.size then 61 | return .braced <| #[← arr[0].addDiacritics ch] ++ (arr.toSubarray.drop 1 |>.toArray) 62 | else 63 | throw "expected a non-empty normal string, but got '{}'" 64 | 65 | mutual 66 | 67 | /-- Convert a TeX content to its original string. 68 | This is not necessarily identical to the original input. -/ 69 | partial def toString (x : TexContent) : String := 70 | match x with 71 | | .normal s => s 72 | | .char c => c.toString 73 | | .command s => s 74 | | .math dollar s => dollar ++ s ++ dollar 75 | | .braced arr => "{" ++ toStringArray arr ++ "}" 76 | 77 | partial def toStringArray (arr : Array TexContent) : String := 78 | arr.map toString |>.toList |> String.join 79 | 80 | end 81 | 82 | mutual 83 | 84 | /-- Convert a TeX content to plaintext, discarding TeX commands and braces. -/ 85 | partial def toPlaintext (x : TexContent) : String := 86 | match x with 87 | | .normal s => s 88 | | .char c => c.toString 89 | | .command _ => "" 90 | | .math dollar s => dollar ++ s ++ dollar 91 | | .braced arr => toPlaintextArray arr 92 | 93 | partial def toPlaintextArray (arr : Array TexContent) : String := 94 | arr.map toPlaintext |>.toList |> String.join 95 | 96 | end 97 | 98 | mutual 99 | 100 | /-- Get the first character of the plaintext of a TeX content. -/ 101 | partial def getFirstChar (x : TexContent) : Option Char := 102 | match x with 103 | | .normal s => s.get? 0 104 | | .char c => c 105 | | .command _ => .none 106 | | .math dollar _ => dollar.get? 0 107 | | .braced arr => getFirstCharArray arr 108 | 109 | partial def getFirstCharArray (arr : Array TexContent) : Option Char := 110 | arr.findSome? getFirstChar 111 | 112 | end 113 | 114 | mutual 115 | 116 | /-- Get the last character of the plaintext of a TeX content. -/ 117 | partial def getLastChar (x : TexContent) : Option Char := 118 | match x with 119 | | .normal s => if s.isEmpty then .none else s.back 120 | | .char c => c 121 | | .command _ => .none 122 | | .math dollar _ => dollar.get? 0 123 | | .braced arr => getLastCharArray arr 124 | 125 | partial def getLastCharArray (arr : Array TexContent) : Option Char := 126 | arr.findSomeRev? getLastChar 127 | 128 | end 129 | 130 | mutual 131 | 132 | /-- Convert a TeX content to HTML, represented by an array of `Lean.Xml.Content`. 133 | A few TeX commands can be converted to corresponding HTML. -/ 134 | partial def toHtml (x : TexContent) : Array Content := 135 | match x with 136 | | .normal s => #[.Character s] 137 | | .char c => 138 | let ret : Content := match c with 139 | | '\\' | '$' => .Element ⟨ "span", RBMap.empty, #[.Character c.toString] ⟩ 140 | | _ => .Character c.toString 141 | #[ret] 142 | | .command cmd => 143 | let ret : Content := match cmd.trim with 144 | | "\\\\" => .Element ⟨ "br", RBMap.empty, #[] ⟩ 145 | | _ => .Element ⟨ "span", RBMap.empty.insert "style" "color:red;", #[.Character cmd] ⟩ 146 | #[ret] 147 | | .math dollar s => #[.Character (dollar ++ s ++ dollar)] 148 | | .braced arr => toHtmlArray arr 149 | 150 | partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0) 151 | (ret : Array Content := #[]) : Array Content := 152 | if h : i < arr.size then 153 | if h' : i + 1 < arr.size then 154 | if let .command cmd := arr[i] then 155 | match cmd.trim with 156 | | "\\url" => 157 | let next := arr[i + 1] 158 | let x : Content := .Element ⟨ "a", RBMap.empty.insert "href" 159 | next.toPlaintext, next.toHtml ⟩ 160 | toHtmlArray arr (i + 2) (ret ++ #[x]) 161 | | "\\textrm" => 162 | let next := arr[i + 1] 163 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style" 164 | "font-style: normal; font-weight: normal", next.toHtml ⟩ 165 | toHtmlArray arr (i + 2) (ret ++ #[x]) 166 | | "\\textbf" => 167 | let next := arr[i + 1] 168 | let x : Content := .Element ⟨ "b", RBMap.empty, next.toHtml ⟩ 169 | toHtmlArray arr (i + 2) (ret ++ #[x]) 170 | | "\\textit" => 171 | let next := arr[i + 1] 172 | let x : Content := .Element ⟨ "i", RBMap.empty, next.toHtml ⟩ 173 | toHtmlArray arr (i + 2) (ret ++ #[x]) 174 | | "\\emph" => 175 | let next := arr[i + 1] 176 | let x : Content := .Element ⟨ "em", RBMap.empty, next.toHtml ⟩ 177 | toHtmlArray arr (i + 2) (ret ++ #[x]) 178 | | "\\texttt" => 179 | let next := arr[i + 1] 180 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style" 181 | "font-family: monospace", next.toHtml ⟩ 182 | toHtmlArray arr (i + 2) (ret ++ #[x]) 183 | | "\\textsc" => 184 | let next := arr[i + 1] 185 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style" 186 | "font-variant: small-caps", next.toHtml ⟩ 187 | toHtmlArray arr (i + 2) (ret ++ #[x]) 188 | | _ => toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml) 189 | else 190 | toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml) 191 | else 192 | toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml) 193 | else 194 | ret 195 | 196 | end 197 | 198 | end TexContent 199 | 200 | /-- Match a sequence of space characters and return it. -/ 201 | def ws' : Parser String := manyChars <| satisfy fun 202 | | ' ' | '\t' | '\r' | '\n' => true 203 | | _ => false 204 | 205 | /-- Replace certain sequences (e.g. "--") by their UTF-8 representations. -/ 206 | def replaceChars (s : String) : String := 207 | let arr : Array (String × String) := #[ 208 | ("---", "\u2014"), 209 | ("--", "\u2013"), 210 | ("~", "\u00A0"), 211 | ("?`", "\u00BF"), 212 | ("!`", "\u00A1") 213 | ] 214 | arr.foldl (fun acc (o, r) => acc.replace o r) s 215 | 216 | /-- Match a TeX command starting with `\`, potentially with trailing whitespaces. -/ 217 | def texCommand : Parser String := pchar '\\' *> attempt do 218 | let s ← manyChars asciiLetter 219 | if s.isEmpty then 220 | -- some commands preserve trailing whitespaces 221 | let c ← any 222 | match c with 223 | | '&' | '#' | '{' | '}' | '$' | '_' => return "\\" ++ toString c 224 | | _ => return "\\" ++ toString c ++ (← ws') 225 | else if let .some '*' ← peek? then 226 | skip 227 | return "\\" ++ s ++ "*" ++ (← ws') 228 | else 229 | return "\\" ++ s ++ (← ws') 230 | 231 | /-- Similar to `texCommand` but it excludes some commands. -/ 232 | def texCommand' (exclude : Array String) : Parser String := attempt do 233 | let s ← texCommand 234 | match exclude.find? (· == s.trim) with 235 | | .some _ => fail s!"'{s.trim}' is not allowed" 236 | | .none => return s 237 | 238 | /-- Match a sequence starting with `{` and ending with `}`. -/ 239 | def bracedContent (p : Parser String) : Parser String := 240 | pchar '{' *> (("{" ++ · ++ "}") <$> p) <* pchar '}' 241 | 242 | partial def manyOptions {α} (p : Parser (Option α)) (acc : Array α := #[]) : 243 | Parser (Array α) := fun it => 244 | match p it with 245 | | .success it ret => 246 | match ret with 247 | | .some ret => manyOptions p (acc.push ret) it 248 | | .none => .success it acc 249 | | .error it err => .error it err 250 | 251 | partial def mathContentAux : Parser String := do 252 | let normalChars : Parser String := many1Chars <| satisfy fun 253 | | '\\' | '$' | '{' | '}' => false 254 | | _ => true 255 | let doOne : Parser (Option String) := fun it => 256 | if it.hasNext then 257 | match it.curr with 258 | | '{' => (.some <$> bracedContent mathContentAux) it 259 | | '\\' => 260 | match texCommand' #["\\(", "\\)", "\\[", "\\]"] it with 261 | | .success it ret => .success it (.some ret) 262 | | .error _ _ => .success it .none 263 | | '}' | '$' => .success it .none 264 | | _ => (.some <$> normalChars) it 265 | else 266 | .success it .none 267 | return String.join (← manyOptions doOne).toList 268 | 269 | /-- Match a math content. Returns `Option.none` if it does not start with `\(`, `\[` or `$`. -/ 270 | def mathContent : Parser (Option TexContent) := fun it => 271 | let aux (beginning ending : String) : Parser String := 272 | pstring beginning *> mathContentAux <* pstring ending 273 | let substr := it.extract (it.forward 2) 274 | if substr = "\\[" then 275 | ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it 276 | else if substr = "\\(" then 277 | ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it 278 | else if substr = "$$" then 279 | ((.some <| .math "$$" ·) <$> aux "$$" "$$") it 280 | else if it.curr = '$' then 281 | ((.some <| .math "$" ·) <$> aux "$" "$") it 282 | else 283 | .success it .none 284 | 285 | partial def rawContentAux : Parser String := do 286 | let normalChars : Parser String := many1Chars <| satisfy fun 287 | | '\\' | '{' | '}' => false 288 | | _ => true 289 | let doOne : Parser (Option String) := fun it => 290 | if it.hasNext then 291 | match it.curr with 292 | | '{' => (.some <$> bracedContent rawContentAux) it 293 | | '\\' => (.some <$> texCommand) it 294 | | '}' => .success it .none 295 | | _ => (.some <$> normalChars) it 296 | else 297 | .success it .none 298 | return String.join (← manyOptions doOne).toList 299 | 300 | /-- Match a TeX command for diacritics, return the processed TeX contents. 301 | Sometimes it needs to read the contents after the command, in this case the `p` is used. -/ 302 | def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do 303 | let cmd ← texCommand 304 | -- some special commands 305 | if cmd.trim = "\\url" then 306 | let s ← pchar '{' *> rawContentAux <* pchar '}' 307 | return .some <| .braced #[.command cmd, .braced <| #[.normal s]] 308 | -- some special characters need to put into `` 309 | let c : Char := match cmd.trim with 310 | | "\\$" => '$' | "\\textbackslash" => '\\' 311 | | _ => ' ' 312 | if c ≠ ' ' then return .some <| .char c 313 | -- some other characters 314 | let s : String := match cmd.trim with 315 | | "\\oe" => "œ" | "\\OE" => "Œ" 316 | | "\\ae" => "æ" | "\\AE" => "Æ" 317 | | "\\aa" => "å" | "\\AA" => "Å" 318 | | "\\o" => "ø" | "\\O" => "Ø" 319 | | "\\l" => "ł" | "\\L" => "Ł" 320 | | "\\i" => "ı" | "\\j" => "ȷ" 321 | | "\\ss" => "\u00DF" | "\\SS" => "\u1E9E" 322 | | "\\cprime" => "\u02B9" 323 | | "\\&" => "&" | "\\#" => "#" 324 | | "\\{" => "{" | "\\}" => "}" 325 | | "\\_" => "_" 326 | | "\\" => "\u00A0" -- This should be "\ " but the space is trimmed 327 | | _ => "" 328 | if not s.isEmpty then return .some <| .normal s 329 | -- diacritics characters 330 | let s : String := match cmd.trim with 331 | | "\\`" => "\u0300" | "\\'" => "\u0301" 332 | | "\\^" => "\u0302" | "\\\"" => "\u0308" 333 | | "\\~" => "\u0303" | "\\=" => "\u0304" 334 | | "\\." => "\u0307" | "\\u" => "\u0306" 335 | | "\\v" => "\u030C" | "\\H" => "\u030B" 336 | | "\\t" => "\u0361" | "\\c" => "\u0327" 337 | | "\\d" => "\u0323" | "\\b" => "\u0331" 338 | | "\\k" => "\u0328" 339 | | _ => "" 340 | if s.isEmpty then return .some <| .command cmd 341 | match ← p with 342 | | .some next => 343 | match next.addDiacritics s with 344 | | .ok ret => return .some ret 345 | | .error err => fail err 346 | | .none => fail "expected a non-empty normal string" 347 | 348 | /-- Match a segment of TeX content. 349 | The TeX commands for diacritics will be converted into UTF-8 characters. 350 | Other TeX commands are preserved. 351 | Returns `Option.none` if it can't match any and there are no errors. -/ 352 | partial def texContent : Parser (Option TexContent) := fun it => 353 | let normalChars' : Parser String := many1Chars <| satisfy fun 354 | | '\\' | '$' | '{' | '}' | ' ' | '\t' | '\r' | '\n' | ',' => false 355 | | _ => true 356 | match mathContent it with 357 | | .success it ret => 358 | match ret with 359 | | .some ret => .success it (.some ret) 360 | | .none => 361 | if it.hasNext then 362 | match it.curr with 363 | | ' ' | '\t' | '\r' | '\n' => ((fun _ => .some (.char ' ')) <$> ws) it 364 | | ',' => .success it.next <| .some <| .char it.curr 365 | | '\\' => texDiacriticsCommand texContent it 366 | | '{' => ((.some <| .braced ·) <$> (pchar '{' *> manyOptions texContent <* pchar '}')) it 367 | | '}' => .success it .none 368 | | _ => ((.some <| .normal <| replaceChars ·) <$> normalChars') it 369 | else 370 | .success it .none 371 | | .error it err => .error it err 372 | 373 | /-- Match a sequence of TeX contents. -/ 374 | def texContents : Parser (Array TexContent) := manyOptions texContent 375 | 376 | end BibtexQuery.TexDiacritics 377 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BibtexQuery: a simple command-line bibtex query utility 2 | 3 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string 4 | of the form ``q.querystring``, where ``q`` is either ``a`` for author, ``t`` for title, ``k`` for key, ``c`` for class (i.e. book, article, etc), or ``w`` for keywords, and ``querystring`` 5 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the 6 | queries given as command-line parameters. Note that the entries are processed in such a way that strips diacritics, 7 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to 8 | ``firstnamelastname``. Hence, for example, ``Dupuis, Frédéric`` will match the query ``a.ericdup``. 9 | 10 | Note that currently, only a subset of the official Bibtex format is supported; features such as predefined strings and concatenation using ``#`` are not supported. It is unclear whether this 11 | will ever be supported in the future; I only wrote this to learn how to do "normal programming" in [Lean 4](https://github.com/leanprover/lean4/), and I don't personally use these Bibtex features. 12 | 13 | ## Installation instructions 14 | 15 | Make sure you have a working Lean 4 installation (see [here](https://leanprover.github.io/lean4/doc/quickstart.html) for how to do that), and then type `lake build` in the main directory. The 16 | executable should appear in ``.lake/build/bin``. 17 | -------------------------------------------------------------------------------- /lake-manifest.json: -------------------------------------------------------------------------------- 1 | {"version": "1.1.0", 2 | "packagesDir": ".lake/packages", 3 | "packages": 4 | [{"url": "https://github.com/jcommelin/lean4-unicode-basic", 5 | "type": "git", 6 | "subDir": null, 7 | "scope": "", 8 | "rev": "458e2d3feda3999490987eabee57b8bb88b1949c", 9 | "name": "UnicodeBasic", 10 | "manifestFile": "lake-manifest.json", 11 | "inputRev": "bump_to_v4.18.0-rc1", 12 | "inherited": false, 13 | "configFile": "lakefile.lean"}], 14 | "name": "BibtexQuery", 15 | "lakeDir": ".lake"} 16 | -------------------------------------------------------------------------------- /lakefile.toml: -------------------------------------------------------------------------------- 1 | name = "BibtexQuery" 2 | defaultTargets = ["BibtexQuery", "«bibtex-query»"] 3 | 4 | [[require]] 5 | name = "UnicodeBasic" 6 | git = "https://github.com/jcommelin/lean4-unicode-basic" # switch back to fgdorais 7 | rev = "bump_to_v4.18.0-rc1" # switch back to main 8 | 9 | [[lean_lib]] 10 | name = "BibtexQuery" 11 | 12 | [[lean_exe]] 13 | name = "«bibtex-query»" 14 | root = "BibtexQuery.Main" 15 | -------------------------------------------------------------------------------- /lean-toolchain: -------------------------------------------------------------------------------- 1 | leanprover/lean4:v4.19.0-rc1 2 | -------------------------------------------------------------------------------- /scripts/bibc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | bibtex-query c /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9 4 | -------------------------------------------------------------------------------- /scripts/bibq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | bibtex-query q /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9 4 | -------------------------------------------------------------------------------- /scripts/bibrofi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | shopt -s nullglob globstar 4 | 5 | papers=( ~/git/w/bib/*.pdf ~/git/w/bib/*.djvu ) 6 | 7 | bibfile=$(printf '%s\n' "${papers[@]}" | rofi -i -dmenu "$@") 8 | 9 | [[ -n $bibfile ]] || exit 10 | 11 | zathura $bibfile & 12 | --------------------------------------------------------------------------------