├── .github
    └── workflows
    │   └── lean_action_ci.yml
├── .gitignore
├── BibtexQuery.lean
├── BibtexQuery
    ├── Entry.lean
    ├── Format.lean
    ├── Main.lean
    ├── Name.lean
    ├── ParsecExtra.lean
    ├── Parser.lean
    ├── Query.lean
    ├── String.lean
    └── TexDiacritics.lean
├── LICENSE
├── README.md
├── lake-manifest.json
├── lakefile.toml
├── lean-toolchain
└── scripts
    ├── bibc
    ├── bibq
    └── bibrofi


/.github/workflows/lean_action_ci.yml:
--------------------------------------------------------------------------------
 1 | name: Lean Action CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: leanprover/lean-action@v1
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.lake
2 | 


--------------------------------------------------------------------------------
/BibtexQuery.lean:
--------------------------------------------------------------------------------
1 | import BibtexQuery.Entry
2 | import BibtexQuery.ParsecExtra
3 | import BibtexQuery.Parser
4 | import BibtexQuery.Query
5 | import BibtexQuery.String
6 | import BibtexQuery.Name
7 | import BibtexQuery.Format
8 | 


--------------------------------------------------------------------------------
/BibtexQuery/Entry.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Frédéric Dupuis
  5 | -/
  6 | 
  7 | import BibtexQuery.String
  8 | 
  9 | /-!
 10 | # Bibtex Entries and related functions
 11 | 
 12 | This file defines inductive types for bibtex entries and tags, along with various related
 13 | helper functions.
 14 | -/
 15 | 
 16 | namespace BibtexQuery
 17 | 
 18 | /-- i.e. authors = "Binne, Chose and Truc, Machin" -/
 19 | structure Tag where
 20 |   name : String
 21 |   content : String
 22 | deriving Repr, Inhabited
 23 | 
 24 | namespace Tag
 25 | 
 26 | /-- Return a standardized representation of the tag. -/
 27 | def toString (t : Tag) : String := s!"{t.name} = " ++ "{" ++ s!"{t.content}" ++ "}"
 28 | 
 29 | --#eval Tag.toString $ Tag.mk "author" "Frédéric Dupuis"
 30 | 
 31 | end Tag
 32 | 
 33 | /-- A whole bibtex entry, i.e. a paper or a book. -/
 34 | inductive Entry where
 35 |   | normalType (category : String) (name : String) (tags : List Tag)
 36 |   | stringType (content : String)
 37 |   | preambleType (content : String)
 38 |   | commentType
 39 | deriving Repr, Inhabited
 40 | 
 41 | namespace Entry
 42 | 
 43 | /-- Returns the authors of a bibtex entry, or an empty string if it is not specified. -/
 44 | def getAuthors : (e : Entry) → String
 45 |   | .normalType _cl _name tags =>
 46 |     match tags.find? (fun t => t.name = "author") with
 47 |     | some t => t.content
 48 |     | none   => ""
 49 |   | _ => ""
 50 | 
 51 | /-- Returns the title of a bibtex entry, or an empty string if it is not specified. -/
 52 | def getTitle : (e : Entry) → String
 53 |   | .normalType _cl _name tags =>
 54 |     match tags.find? (fun t => t.name = "title") with
 55 |     | some t => t.content
 56 |     | none   => ""
 57 |   | _ => ""
 58 | 
 59 | /-- Returns the keywords of a bibtex entry, or an empty string if it is not specified. -/
 60 | def getKeywords : (e : Entry) → String
 61 |   | normalType _cl _name tags =>
 62 |     match tags.find? (fun t => t.name = "keywords") with
 63 |     | some t => t.content
 64 |     | none   => ""
 65 |   | _ => ""
 66 | 
 67 | /-- Returns the class of a bibtex entry (ie book, article, etc). -/
 68 | def getClass : (e : Entry) → String
 69 |   | .normalType cl _name _tags => cl
 70 |   | _ => ""
 71 | 
 72 | /-- Returns the key of a bibtex entry. -/
 73 | def getKey : Entry → String
 74 |   | .normalType _cl name _tags => name
 75 |   | _ => "No key"
 76 | 
 77 | /-- Returns the arxiv number of a bibtex entry. -/
 78 | def getArxivNumber : Entry → String
 79 |   | .normalType _cl _name tags =>
 80 |     match tags.find? (fun t => t.name = "eprint") with
 81 |     | some t => t.content
 82 |     | none   => ""
 83 |   | _ => "No arxiv number"
 84 | 
 85 | /-- Returns the arxiv link of a bibtex entry. -/
 86 | def getArxivLink : Entry → String
 87 |   | .normalType _cl _name tags =>
 88 |     match tags.find? (fun t => t.name = "eprint") with
 89 |     | some t => "http://arxiv.org/abs/" ++ t.content
 90 |     | none   => ""
 91 |   | _ => "No arxiv number"
 92 | 
 93 | /-- Returns an abridged representation of a bibtex entry. -/
 94 | def toAbridgedRepr (e : Entry) : String :=
 95 |   e.getKey.pad ' ' 25 ++ " | " ++ e.getAuthors.toLastNames.pad ' ' 50 ++ " | " ++ e.getTitle
 96 | 
 97 | /-- Returns a «clean citation» of a bibtex entry. -/
 98 | def toCitation (e : Entry) : String :=
 99 |   e.getAuthors ++ ", «" ++ e.getTitle ++ "», " ++ e.getArxivLink
100 | 
101 | 
102 | /-- Returns a string containing a standardized representation of a bibtex entry. -/
103 | def toString : Entry → String
104 |   | .normalType cl name tags => Id.run do
105 |       let mut output := (s!"@{cl}" ++ "{" ++ s!"{name},").push '\n'
106 |       for t in tags do
107 |         output := output ++ "  " ++ t.toString ++ s!"\n"
108 |       return (output ++ "}")
109 |   | _ => ""
110 | 
111 | --#eval IO.print <| Entry.toString $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric",
112 | --                                                 Tag.mk "title" "Bonsoir la visite"]
113 | 
114 | --#eval IO.print <| Entry.toCitation $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric",
115 | --                                                 Tag.mk "title" "Bonsoir la visite", Tag.mk "eprint" "2308.11736"]
116 | 
117 | end Entry
118 | 
119 | end BibtexQuery
120 | 


--------------------------------------------------------------------------------
/BibtexQuery/Format.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2024 Jz Pan. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Jz Pan
  5 | -/
  6 | 
  7 | import BibtexQuery.Name
  8 | import BibtexQuery.Entry
  9 | import Lean.Data.Xml.Basic
 10 | import Std.Data.HashMap
 11 | import Std.Internal.Parsec
 12 | import Std.Internal.Parsec.String
 13 | 
 14 | /-!
 15 | 
 16 | # Bibtex item formatting
 17 | 
 18 | This file contains functions for bibtex item formatting,
 19 | tag generating and sorting.
 20 | 
 21 | -/
 22 | 
 23 | open Lean Xml Std.Internal.Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name
 24 | 
 25 | namespace BibtexQuery
 26 | 
 27 | /-- Represents a processed bibtex entry. -/
 28 | structure ProcessedEntry where
 29 |   /-- The category of a bibtex entry, e.g. "article" or "book". -/
 30 |   category : String
 31 |   /-- The name, or called citekey, of a bibtex entry. -/
 32 |   name : String
 33 |   /-- The tags of a bibtex entry, stored as a `HashMap`. Don't be confused with `tag`. -/
 34 |   tags : Std.HashMap String (Array TexContent)
 35 |   /-- The tag of a bibtex entry, e.g. `[Doe12]`. Don't be confused with `tags`. -/
 36 |   tag : String
 37 |   /-- The HTML representing a bibtex entry, e.g.
 38 |   `John Doe. <i>Test</i>. 2012.` Generated by bibtex formatter. -/
 39 |   html : Array Content
 40 |   /-- The array of authors or editors of a bibtex entry.
 41 |   Only used for tag generation and bibtex entry sorting. -/
 42 |   authorOrEditor : Array BibtexName
 43 |   /-- The HTML representing the authors. -/
 44 |   authorHtml : Array Content
 45 |   /-- The HTML representing the editors. -/
 46 |   editorHtml : Array Content
 47 |   /-- The date of a bibtex entry, in the format `year * 100 + month`.
 48 |   Only used for tag generation and bibtex entry sorting. -/
 49 |   date : Nat
 50 |   /-- The HTML representing the date. -/
 51 |   dateHtml : Array Content
 52 |   /-- The title of a bibtex entry, with diacritics removed, and converted to uppercase.
 53 |   Only used for bibtex entry sorting. -/
 54 |   titleWithoutDiacritics : String
 55 | 
 56 | /-- Get the date (which is `year * 100 + month`) and the date HTML of a bibitem. -/
 57 | def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Content :=
 58 |   if let .some yearTex := tags["year"]? then
 59 |     let yearHtml := TexContent.toHtmlArray yearTex
 60 |     if let .some year := (TexContent.toPlaintextArray yearTex).toList.filter
 61 |         Char.isDigit |> String.mk |>.toNat? then
 62 |       let month : Nat :=
 63 |         if let .some monthTex := tags["month"]? then
 64 |           let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower
 65 |           match monthStr with
 66 |           | "jan" => 1 | "feb" => 2 | "mar" => 3
 67 |           | "apr" => 4 | "may" => 5 | "jun" => 6
 68 |           | "jul" => 7 | "aug" => 8 | "sep" => 9
 69 |           | "oct" => 10 | "nov" => 11 | "dec" => 12
 70 |           | _ =>
 71 |             let month := monthStr.toNat?.getD 0
 72 |             if month ≥ 1 ∧ month ≤ 12 then month else 0
 73 |         else
 74 |           0
 75 |       let monthStr : String :=
 76 |         match month with
 77 |         | 1 => "Jan" | 2 => "Feb" | 3 => "Mar"
 78 |         | 4 => "Apr" | 5 => "May" | 6 => "Jun"
 79 |         | 7 => "Jul" | 8 => "Aug" | 9 => "Sep"
 80 |         | 10 => "Oct" | 11 => "Nov" | 12 => "Dec"
 81 |         | _ => ""
 82 |       (year * 100 + month,
 83 |         if monthStr.isEmpty then
 84 |           yearHtml
 85 |         else if yearHtml.isEmpty then
 86 |           #[.Character monthStr]
 87 |         else
 88 |           #[.Character monthStr, .Character " "] ++ yearHtml)
 89 |     else
 90 |       (0, yearHtml)
 91 |   else
 92 |     (0, #[])
 93 | 
 94 | /-- Get the tag of a bibitem (e.g. `[ABC12]`) from authors and date. -/
 95 | def getTag (authors : Array BibtexName) (date : Nat) : String :=
 96 |   let authorString :=
 97 |     if authors.size ≥ 5 then
 98 |       (authors.toSubarray.take 3 |>.toArray.map (·.oneLetterAbbr) |>.toList |> String.join) ++ "+"
 99 |     else if authors.size ≥ 2 then
100 |       authors.map (·.oneLetterAbbr) |>.toList |> String.join
101 |     else
102 |       authors.map (·.threeLetterAbbr) |>.toList |> String.join
103 |   let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else ""
104 |   "[" ++ authorString ++ dateString ++ "]"
105 | 
106 | partial def removeDuplicatedSpacesAux (s : String) : String :=
107 |   let s2 := s.replace "  " " "
108 |   if s2.length < s.length then
109 |     removeDuplicatedSpacesAux s2
110 |   else
111 |     s
112 | 
113 | def removeDuplicatedSpaces (s : String) : String :=
114 |   s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux
115 | 
116 | /-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`.
117 | If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/
118 | def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) := do
119 |   match e with
120 |   | .normalType category name tags =>
121 |     let lst : List (String × Array TexContent) ← tags.mapM fun x => do
122 |       let s := removeDuplicatedSpaces x.content
123 |       if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then
124 |         .ok (x.name, #[.normal s])
125 |       else
126 |         match texContents s.iter with
127 |         | .success _ arr => .ok (x.name, arr)
128 |         | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {it.2}: {err}"
129 |     let tags := Std.HashMap.ofList lst
130 |     let authors := processNames (tags.getD "author" #[])
131 |     let editors := processNames (tags.getD "editor" #[])
132 |     let authorOrEditor := if authors.isEmpty then editors else authors
133 |     let (date, dateHtml) := getDate tags
134 |     pure <| .some {
135 |       category := category
136 |       name := name
137 |       tags := tags
138 |       tag := getTag authorOrEditor date
139 |       html := #[]
140 |       authorOrEditor := authorOrEditor
141 |       authorHtml := BibtexName.toHtmlArray authors
142 |       editorHtml := BibtexName.toHtmlArray editors ++
143 |         if editors.size ≥ 2 then
144 |           #[.Character ", editors"]
145 |         else if editors.size = 1 then
146 |           #[.Character ", editor"]
147 |         else
148 |           #[]
149 |       date := date
150 |       dateHtml := dateHtml
151 |       titleWithoutDiacritics := tags.getD "title" #[] |> TexContent.toPlaintextArray |>
152 |         stripDiacriticsFromString |>.map getUpperChar
153 |     }
154 |   | _ => pure .none
155 | 
156 | /-!
157 | 
158 | ## Formatting a bibtex item
159 | 
160 | The following code is a port of `pybtex/style/formatting/unsrt.py` of `pybtex`,
161 | originally licensed under MIT license.
162 | 
163 | -/
164 | 
165 | section
166 | 
167 | mutual
168 | 
169 | partial def getLastCharOfContent (x : Content) : Option Char :=
170 |   match x with
171 |   | .Element ⟨ _, _, arr ⟩ => getLastCharOfArrayContent arr
172 |   | .Comment _ => .none
173 |   | .Character s => if s.isEmpty then .none else .some s.back
174 | 
175 | partial def getLastCharOfArrayContent (arr : Array Content) : Option Char :=
176 |   arr.findSomeRev? getLastCharOfContent
177 | 
178 | end
179 | 
180 | variable (e : ProcessedEntry)
181 | 
182 | namespace Formatter
183 | 
184 | /-!
185 | 
186 | ### Helper functions
187 | 
188 | -/
189 | 
190 | def mkTag (name : String) (content : Array TexContent) : Array Content :=
191 |   let ret := TexContent.toHtmlArray content
192 |   if ret.isEmpty then #[] else #[.Element ⟨ name, RBMap.empty, ret ⟩]
193 | 
194 | def mkHref (href : String) (content : Array TexContent) : Array Content :=
195 |   let ret := TexContent.toHtmlArray content
196 |   if ret.isEmpty then #[] else #[.Element ⟨ "a", RBMap.empty.insert "href" href, ret ⟩]
197 | 
198 | def mkStr (content : Array TexContent) (pre post : String := "") : Array Content :=
199 |   let ret := TexContent.toHtmlArray content
200 |   (if ret.isEmpty || pre.isEmpty then #[] else #[.Character pre]) ++ ret ++
201 |     (if ret.isEmpty || post.isEmpty then #[] else #[.Character post])
202 | 
203 | def arrayConcat {α} (arr : Array (Option (Array α))) (sep : Array α := #[]) : Array α :=
204 |   arr.filterMap id |>.foldl (fun acc a =>
205 |     acc ++ (if acc.isEmpty || a.isEmpty then #[] else sep) ++ a) #[]
206 | 
207 | def words (arr : Array (Option (Array Content))) : Array Content :=
208 |   arrayConcat arr #[.Character " "]
209 | 
210 | def canEndSentence (c : Char) : Bool :=
211 |   match c with
212 |   | '.' | '?' | '!' | '…' | '。' | '？' | '！' | '\uFF0E' | '\uFF61'
213 |   | '\u203C' | '\u203D' | '\u2047' | '\u2048' | '\u2049' => true
214 |   | _ => false
215 | 
216 | def sentence1 (arr : Array Content) : Array Content :=
217 |   if arr.isEmpty then
218 |     #[]
219 |   else if canEndSentence (getLastCharOfArrayContent arr |>.getD ' ') then
220 |     arr
221 |   else
222 |     arr ++ #[.Character "."]
223 | 
224 | def sentence (arr : Array (Option (Array Content))) : Array Content :=
225 |   sentence1 <| arrayConcat arr #[.Character ", "]
226 | 
227 | def sentence' (content : Array TexContent) : Array Content :=
228 |   sentence1 <| TexContent.toHtmlArray content
229 | 
230 | def toplevel (arr : Array (Option (Array Content))) : Array Content :=
231 |   arrayConcat arr #[.Character "\n"]
232 | 
233 | def mkUrl (url : String) : Array Content :=
234 |   if url.isEmpty then #[] else #[.Character "URL: "] ++ mkHref url #[.normal url]
235 | 
236 | def mkWebRef (urlPrefix namePrefix url : String) : Array Content :=
237 |     let s :=
238 |       if url.toLower.startsWith urlPrefix.toLower then
239 |         url.drop urlPrefix.length
240 |       else if url.toLower.startsWith namePrefix.toLower then
241 |         url.drop namePrefix.length
242 |       else
243 |         url
244 |     if s.toLower.startsWith "http" then
245 |       -- the url does not starts with `urlPrefix` or `namePrefix`,
246 |       -- but still starts with "http"
247 |       mkUrl url
248 |     else if s.isEmpty then
249 |       #[]
250 |     else
251 |       mkHref (urlPrefix ++ s) #[.normal (namePrefix ++ s)]
252 | 
253 | /-!
254 | 
255 | ### Partial templates
256 | 
257 | -/
258 | 
259 | def formatVolumeAndPages : Option (Array Content) := do
260 |   let pages ← e.tags["pages"]?
261 |   match e.tags["volume"]? with
262 |   | .some volume =>
263 |     arrayConcat #[
264 |       mkStr volume,
265 |       (mkStr · "(" ")") <$> e.tags["number"]?,
266 |       mkStr pages ":"
267 |     ]
268 |   | .none => mkStr pages "pages "
269 | 
270 | def formatEprint : Option (Array Content) := do
271 |   let eprint ← TexContent.toPlaintextArray <$> e.tags["eprint"]?
272 |   let eprinttype :=
273 |     TexContent.toPlaintextArray <$> (e.tags["eprinttype"]? <|> e.tags["archiveprefix"]?)
274 |       |>.getD "arXiv"
275 |   let eprintlist : Array (Array String × String × String) := #[
276 |     (#["arxiv"], "https://arxiv.org/abs/", "arXiv:"),
277 |     (#["iacr"], "https://eprint.iacr.org/", "IACR:"),
278 |     (#["pmcid", "pmc"], "https://www.ncbi.nlm.nih.gov/pmc/articles/", "PMCID:")
279 |   ]
280 |   let checkEprint : Array String × String × String → Option (Array Content) :=
281 |     fun (lst, urlPrefix, namePrefix) =>
282 |       if lst.contains eprinttype.toLower then
283 |         let ret := mkWebRef urlPrefix namePrefix eprint
284 |         if ret.isEmpty then .none else .some ret
285 |       else
286 |         .none
287 |   eprintlist.findSome? checkEprint <|> .some
288 |     (if eprint.toLower.startsWith "http" then
289 |       -- unrecognized eprint type but starts with "http"
290 |       mkUrl eprint
291 |     else
292 |       -- unrecognized eprint type, cannot add link to it
293 |       #[.Character (eprinttype ++ ":" ++ eprint)])
294 | 
295 | def formatWebRefs : Array Content :=
296 |   let formatUrl : Option (Array Content) :=
297 |     mkUrl <$> TexContent.toPlaintextArray <$> e.tags["url"]?
298 |   let formatWebRef (tagName urlPrefix namePrefix : String) : Option (Array Content) :=
299 |     mkWebRef urlPrefix namePrefix <$> TexContent.toPlaintextArray <$> e.tags[tagName]?
300 |   sentence #[
301 |     formatWebRef "pubmed" "https://www.ncbi.nlm.nih.gov/pubmed/" "PMID:",
302 |     formatWebRef "doi" "https://doi.org/" "doi:",
303 |     formatWebRef "mrnumber" "https://www.ams.org/mathscinet-getitem?mr=MR" "MR:",
304 |     formatWebRef "zbl" "https://zbmath.org/?q=an:" "Zbl:",
305 |     formatEprint e,
306 |     formatUrl
307 |   ]
308 | 
309 | def formatAuthorOrEditor : Array Content :=
310 |   sentence1 (if e.authorHtml.isEmpty then e.editorHtml else e.authorHtml)
311 | 
312 | def formatVolumeAndSeries (asSentence : Bool) : Array Content :=
313 |   let series := e.tags["series"]?
314 |   let arr : Array Content :=
315 |     if let .some volume := e.tags["volume"]? then
316 |       arrayConcat #[
317 |         mkStr volume (if asSentence then "Volume " else "volume "),
318 |         (mkStr · " of ") <$> series
319 |       ]
320 |     else if let .some number := e.tags["number"]? then
321 |       arrayConcat #[
322 |         mkStr number (if asSentence then "Number " else "number "),
323 |         (mkStr · " in ") <$> series
324 |       ]
325 |     else
326 |       (mkStr <$> series).getD #[]
327 |   if asSentence then sentence1 arr else arr
328 | 
329 | def formatChapterAndPages : Array Content :=
330 |   arrayConcat #[
331 |     (mkStr · "chapter ") <$> e.tags["chapter"]?,
332 |     (mkStr · "pages ") <$> e.tags["pages"]?
333 |   ] #[.Character ", "]
334 | 
335 | def formatEdition : Option (Array Content) :=
336 |   if let .some arr := e.tags["edition"]? then
337 |     let s := TexContent.toPlaintextArray arr |> stripDiacriticsFromString |>.map getLowerChar
338 |     if s.endsWith " ed." || s.endsWith " ed" || (s.replace "edition" "").length < s.length then
339 |       TexContent.toHtmlArray arr
340 |     else if let .some n := s.toNat? then
341 |       let s : String := match n with
342 |       | 0 => "Zeroth" | 1 => "First" | 2 => "Second" | 3 => "Third"
343 |       | 4 => "Fourth" | 5 => "Fifth" | 6 => "Sixth" | 7 => "Seventh"
344 |       | 8 => "Eighth" | 9 => "Ninth" | 10 => "Tenth" | 11 => "Eleventh"
345 |       | 12 => "Twelfth"
346 |       | _ => toString n ++ match n % 100 with
347 |         | 11 | 12 => "th"
348 |         | _ => match n % 10 with
349 |           | 1 => "st"
350 |           | 2 => "nd"
351 |           | 3 => "rd"
352 |           | _ => "th"
353 |       .some #[.Character (s ++ " edition")]
354 |     else
355 |       mkStr arr "" " edition"
356 |   else
357 |     .none
358 | 
359 | def formatAddressOrganizationPublisherDate (includeOrganization : Bool) :
360 |     Array Content :=
361 |   let organization : Option (Array Content) :=
362 |     if includeOrganization then
363 |       mkStr <$> e.tags["organization"]?
364 |     else
365 |       .none
366 |   if let .some address := e.tags["address"]? then
367 |     words #[
368 |       sentence #[
369 |         mkStr address,
370 |         e.dateHtml
371 |       ],
372 |       sentence #[
373 |         organization,
374 |         mkStr <$> e.tags["publisher"]?
375 |       ]
376 |     ]
377 |   else
378 |     sentence #[
379 |       organization,
380 |       mkStr <$> e.tags["publisher"]?,
381 |       e.dateHtml
382 |     ]
383 | 
384 | def formatISBN : Option (Array Content) :=
385 |   (sentence1 <| mkStr · "ISBN ") <$> e.tags["isbn"]?
386 | 
387 | /-!
388 | 
389 | ### Templates
390 | 
391 | -/
392 | 
393 | def formatArticle : Array Content :=
394 |   toplevel #[
395 |     sentence1 e.authorHtml,
396 |     sentence' <$> e.tags["title"]?,
397 |     sentence #[
398 |       mkTag "i" <$> e.tags["journal"]?,
399 |       formatVolumeAndPages e,
400 |       e.dateHtml
401 |     ],
402 |     sentence' <$> e.tags["note"]?,
403 |     formatWebRefs e
404 |   ]
405 | 
406 | def formatBook : Array Content :=
407 |   toplevel #[
408 |     formatAuthorOrEditor e,
409 |     sentence #[mkTag "i" <$> e.tags["title"]?],
410 |     sentence1 <$> formatEdition e,
411 |     formatVolumeAndSeries e true,
412 |     sentence #[
413 |       mkStr <$> e.tags["publisher"]?,
414 |       mkStr <$> e.tags["address"]?,
415 |       e.dateHtml
416 |     ],
417 |     formatISBN e,
418 |     sentence' <$> e.tags["note"]?,
419 |     formatWebRefs e
420 |   ]
421 | 
422 | def formatBooklet : Array Content :=
423 |   toplevel #[
424 |     sentence1 e.authorHtml,
425 |     sentence' <$> e.tags["title"]?,
426 |     formatVolumeAndSeries e true,
427 |     sentence #[
428 |       mkStr <$> e.tags["howpublished"]?,
429 |       mkStr <$> e.tags["address"]?,
430 |       e.dateHtml,
431 |       mkStr <$> e.tags["note"]?
432 |     ],
433 |     formatWebRefs e
434 |   ]
435 | 
436 | def formatInBook : Array Content :=
437 |   toplevel #[
438 |     formatAuthorOrEditor e,
439 |     sentence #[
440 |       mkTag "i" <$> e.tags["title"]?,
441 |       formatEdition e,
442 |       formatChapterAndPages e
443 |     ],
444 |     formatVolumeAndSeries e true,
445 |     sentence #[
446 |       mkStr <$> e.tags["publisher"]?,
447 |       mkStr <$> e.tags["address"]?,
448 |       e.dateHtml,
449 |       mkStr <$> e.tags["note"]?
450 |     ],
451 |     formatWebRefs e
452 |   ]
453 | 
454 | def formatInCollection : Array Content :=
455 |   toplevel #[
456 |     sentence1 e.authorHtml,
457 |     sentence' <$> e.tags["title"]?,
458 |     (fun x => #[.Character "In "] ++ x) <$> sentence #[
459 |       e.editorHtml,
460 |       mkTag "i" <$> e.tags["booktitle"]?,
461 |       formatEdition e,
462 |       formatVolumeAndSeries e false,
463 |       formatChapterAndPages e
464 |     ],
465 |     sentence #[
466 |       mkStr <$> e.tags["publisher"]?,
467 |       mkStr <$> e.tags["address"]?,
468 |       e.dateHtml
469 |     ],
470 |     formatWebRefs e
471 |   ]
472 | 
473 | def formatInProceedings : Array Content :=
474 |   toplevel #[
475 |     sentence1 e.authorHtml,
476 |     sentence' <$> e.tags["title"]?,
477 |     (fun x => #[.Character "In "] ++ x) <$> words #[
478 |       sentence #[
479 |         e.editorHtml,
480 |         mkTag "i" <$> e.tags["booktitle"]?,
481 |         formatVolumeAndSeries e false,
482 |         mkStr <$> e.tags["pages"]?
483 |       ],
484 |       formatAddressOrganizationPublisherDate e true
485 |     ],
486 |     sentence' <$> e.tags["note"]?,
487 |     formatWebRefs e
488 |   ]
489 | 
490 | def formatManual : Array Content :=
491 |   toplevel #[
492 |     sentence1 e.authorHtml,
493 |     sentence #[mkTag "i" <$> e.tags["title"]?],
494 |     sentence1 <$> formatEdition e,
495 |     sentence #[
496 |       mkStr <$> e.tags["organization"]?,
497 |       mkStr <$> e.tags["address"]?,
498 |       e.dateHtml
499 |     ],
500 |     sentence' <$> e.tags["note"]?,
501 |     formatWebRefs e
502 |   ]
503 | 
504 | def formatMasterThesis : Array Content :=
505 |   toplevel #[
506 |     sentence1 e.authorHtml,
507 |     sentence' <$> e.tags["title"]?,
508 |     sentence #[
509 |       .some #[.Character "Master's thesis"],
510 |       mkStr <$> e.tags["school"]?,
511 |       mkStr <$> e.tags["address"]?,
512 |       e.dateHtml
513 |     ],
514 |     sentence' <$> e.tags["note"]?,
515 |     formatWebRefs e
516 |   ]
517 | 
518 | def formatMisc : Array Content :=
519 |   toplevel #[
520 |     sentence1 e.authorHtml,
521 |     sentence' <$> e.tags["title"]?,
522 |     sentence #[
523 |       mkStr <$> e.tags["howpublished"]?,
524 |       e.dateHtml
525 |     ],
526 |     sentence' <$> e.tags["note"]?,
527 |     formatWebRefs e
528 |   ]
529 | 
530 | def formatPhDThesis : Array Content :=
531 |   toplevel #[
532 |     sentence1 e.authorHtml,
533 |     sentence #[mkTag "i" <$> e.tags["title"]?],
534 |     sentence #[
535 |       (mkStr <$> e.tags["type"]?) <|> .some #[.Character "PhD thesis"],
536 |       mkStr <$> e.tags["school"]?,
537 |       mkStr <$> e.tags["address"]?,
538 |       e.dateHtml
539 |     ],
540 |     sentence' <$> e.tags["note"]?,
541 |     formatWebRefs e
542 |   ]
543 | 
544 | def formatProceedings : Array Content :=
545 |   let arr : Array (Option (Array Content)) :=
546 |     if e.editorHtml.isEmpty then
547 |       #[
548 |         sentence' <$> e.tags["organization"]?,
549 |         sentence #[
550 |           mkTag "i" <$> e.tags["title"]?,
551 |           formatVolumeAndSeries e false,
552 |           formatAddressOrganizationPublisherDate e false
553 |         ]
554 |       ]
555 |     else
556 |       #[
557 |         sentence1 e.editorHtml,
558 |         sentence #[
559 |           mkTag "i" <$> e.tags["title"]?,
560 |           formatVolumeAndSeries e false,
561 |           formatAddressOrganizationPublisherDate e true
562 |         ]
563 |       ]
564 |   toplevel <| arr ++ #[
565 |     sentence' <$> e.tags["note"]?,
566 |     formatWebRefs e
567 |   ]
568 | 
569 | def formatTechReport : Array Content :=
570 |   toplevel #[
571 |     sentence1 e.authorHtml,
572 |     sentence' <$> e.tags["title"]?,
573 |     sentence #[
574 |       words #[
575 |         (mkStr <$> e.tags["type"]?) <|> .some #[.Character "Technical Report"],
576 |         mkStr <$> e.tags["number"]?
577 |       ],
578 |       mkStr <$> e.tags["institution"]?,
579 |       mkStr <$> e.tags["address"]?,
580 |       e.dateHtml
581 |     ],
582 |     sentence' <$> e.tags["note"]?,
583 |     formatWebRefs e
584 |   ]
585 | 
586 | def formatUnpublished : Array Content :=
587 |   toplevel #[
588 |     sentence1 e.authorHtml,
589 |     sentence' <$> e.tags["title"]?,
590 |     sentence #[
591 |       mkStr <$> e.tags["note"]?,
592 |       e.dateHtml
593 |     ],
594 |     formatWebRefs e
595 |   ]
596 | 
597 | def format : Array Content :=
598 |   match e.category with
599 |   | "article" => formatArticle e
600 |   | "book" => formatBook e
601 |   | "booklet" => formatBooklet e
602 |   | "inbook" => formatInBook e
603 |   | "incollection" => formatInCollection e
604 |   | "inproceedings" | "conference" => formatInProceedings e
605 |   | "manual" => formatManual e
606 |   | "mastersthesis" => formatMasterThesis e
607 |   | "misc" | "online" => formatMisc e
608 |   | "phdthesis" => formatPhDThesis e
609 |   | "proceedings" => formatProceedings e
610 |   | "techreport" => formatTechReport e
611 |   | "unpublished" => formatUnpublished e
612 |   | _ => #[.Character ("TODO: " ++ e.category)]
613 | 
614 | end Formatter
615 | 
616 | /-- Fill the `html` field of `ProcessedEntry`. -/
617 | def ProcessedEntry.format : ProcessedEntry :=
618 |   { e with html := Formatter.format e }
619 | 
620 | end
621 | 
622 | /-!
623 | 
624 | ## Sorting bibtex items
625 | 
626 | -/
627 | 
628 | def compareAuthor (a b : BibtexName) : Ordering :=
629 |   if a.lastNameWithoutDiacritics < b.lastNameWithoutDiacritics then
630 |     .lt
631 |   else if a.lastNameWithoutDiacritics > b.lastNameWithoutDiacritics then
632 |     .gt
633 |   else if a.firstNameWithoutDiacritics < b.firstNameWithoutDiacritics then
634 |     .lt
635 |   else if a.firstNameWithoutDiacritics > b.firstNameWithoutDiacritics then
636 |     .gt
637 |   else
638 |     .eq
639 | 
640 | partial def compareAuthors (a b : Array BibtexName) (i : Nat := 0) : Ordering :=
641 |   if ha : i < a.size then
642 |     if hb : i < b.size then
643 |       match compareAuthor a[i] b[i] with
644 |       | .lt => .lt | .gt => .gt
645 |       | .eq => compareAuthors a b (i + 1)
646 |     else
647 |       .gt
648 |   else
649 |     if i < b.size then .lt else .eq
650 | 
651 | def compareEntry (a b : ProcessedEntry) : Ordering :=
652 |   match compareAuthors a.authorOrEditor b.authorOrEditor with
653 |   | .lt => .lt | .gt => .gt
654 |   | .eq =>
655 |     if a.date < b.date then
656 |       .lt
657 |     else if a.date > b.date then
658 |       .gt
659 |     else if a.titleWithoutDiacritics < b.titleWithoutDiacritics then
660 |       .lt
661 |     else if a.titleWithoutDiacritics > b.titleWithoutDiacritics then
662 |       .gt
663 |     else
664 |       .eq
665 | 
666 | /-- Sort an array of bibtex entries. -/
667 | def sortEntry (arr : Array ProcessedEntry) : Array ProcessedEntry :=
668 |   arr.qsort (compareEntry · · |>.isLT)
669 | 
670 | partial def toBase26Aux (n : Nat) (length : Nat) (s : String) : String :=
671 |   if length = 0 then
672 |     s
673 |   else
674 |     toBase26Aux (n / 26) (length - 1) <| toString (Char.ofNat (97 + n % 26)) ++ s
675 | 
676 | partial def toBase26 (n : Nat) (length : Nat := 1) : String :=
677 |   if n ≥ 26 ^ length then
678 |     toBase26 (n - 26 ^ length) (length + 1)
679 |   else
680 |     toBase26Aux n length ""
681 | 
682 | partial def deduplicateTagAux
683 |     (x : Array String × Std.HashMap String (Nat × Nat)) (i : Nat) :
684 |     Array String × Std.HashMap String (Nat × Nat) :=
685 |   if h : i < x.1.size then
686 |     let tag := x.1[i]
687 |     let y : Array String × Std.HashMap String (Nat × Nat) :=
688 |       if let .some (first, count) := x.2[tag]? then
689 |         let z : Array String :=
690 |           if count = 0 then
691 |             x.1.modify first fun x => x.dropRight 1 ++ "a]"
692 |           else
693 |             x.1
694 |         let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]"
695 |         (z, x.2.insert tag (first, count + 1))
696 |       else
697 |         (x.1, x.2.insert tag (i, 0))
698 |     deduplicateTagAux y (i + 1)
699 |   else
700 |     x
701 | 
702 | /-- Deduplicate an array of tag. -/
703 | def deduplicateTag' (arr : Array String) : Array String :=
704 |   (deduplicateTagAux (arr, ∅) 0).1
705 | 
706 | /-- Deduplicate the tag of an array of bibtex entries. -/
707 | def deduplicateTag (arr : Array ProcessedEntry) : Array ProcessedEntry :=
708 |   Array.zip arr (deduplicateTag' <| arr.map fun x => x.tag) |>.map fun (x, s) =>
709 |     { x with tag := s }
710 | 
711 | end BibtexQuery
712 | 


--------------------------------------------------------------------------------
/BibtexQuery/Main.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Frédéric Dupuis
  5 | -/
  6 | 
  7 | import BibtexQuery.Parser
  8 | import BibtexQuery.String
  9 | import BibtexQuery.Query
 10 | import BibtexQuery.Format
 11 | 
 12 | /-!
 13 | # BibtexQuery: a simple command-line bibtex query utility
 14 | 
 15 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string
 16 | of the form "t.querystring", where `t` is either `a` for author, `t` for title or `k` for key, and `querystring`
 17 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the
 18 | queries given a command-line parameters. Note that the entries are processed in such a way that strips diacritics,
 19 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to
 20 | firstnamelastname. Hence, for example, "Dupuis, Frédéric" will match the query `a.ericdup`.
 21 | -/
 22 | 
 23 | open Lean BibtexQuery
 24 | 
 25 | def listDoublons (parseRes : List BibtexQuery.Entry) : List String :=
 26 |   let keysOnly := parseRes.filterMap (fun entry => match entry with
 27 |                                                    | BibtexQuery.Entry.normalType _ name _ => some name
 28 |                                                    | _ => none)
 29 |   let ⟨_, dupl⟩ : (Std.HashMap String Unit) × List String :=
 30 |     keysOnly.foldl (init := ⟨∅, []⟩)
 31 |       (fun ⟨hsh, lst⟩ key =>
 32 |         match hsh[key]? with
 33 |         | none => ⟨hsh.insert key (), lst⟩
 34 |         | some _ => ⟨hsh, (key :: lst)⟩)
 35 |   dupl
 36 | 
 37 | def printHelp := IO.println
 38 | "
 39 | bibtex-query - command-line bibtex file processor
 40 | 
 41 | Usage: bibtex-query command filename [args]
 42 | 
 43 | Commands:
 44 |   h: print this help message
 45 |   d: check for duplicate entries
 46 |   l: sort all entries and list them in plaintext form
 47 |   q: print entries that match the given query
 48 |   c: print citations of entries that match the given query
 49 | 
 50 | Queries have the form «t.query» (without the quotes) with t being the type of query,
 51 | and «query» being the content. The entries printed out are those that match all the queries.
 52 | 
 53 | Types of queries:
 54 |   k: key (ex: k.d14)
 55 |   c: class (ex: article, book)
 56 |   a: author (ex: a.dupuis)
 57 |   t: title (ex: t.channelcapacity)
 58 |   w: keywords (ex: w.quantum)
 59 | "
 60 | 
 61 | mutual
 62 | 
 63 | partial def eToPlaintext : Xml.Element → String
 64 | | .Element _ _ c => s!"{c.map cToPlaintext |>.foldl (· ++ ·) ""}"
 65 | 
 66 | partial def cToPlaintext : Xml.Content → String
 67 | | .Element e => eToPlaintext e
 68 | | .Comment _ => ""
 69 | | .Character c => c
 70 | 
 71 | end
 72 | 
 73 | def printEntry (e : ProcessedEntry) : IO Unit :=
 74 |   let s := e.html.map cToPlaintext |>.toList |> String.join
 75 |   IO.println (e.tag ++ "{" ++ e.name ++ "} " ++ s ++ "\n")
 76 | 
 77 | def printEntries (ents : List Entry) : IO Unit :=
 78 |   match ents.toArray.mapM ProcessedEntry.ofEntry with
 79 |   | .ok arr =>
 80 |     discard (arr.filterMap id |> sortEntry |> deduplicateTag |>.map ProcessedEntry.format
 81 |       |>.mapM printEntry)
 82 |   | .error err => throw <| IO.userError err
 83 | 
 84 | def printMatchingEntries (ents : List Entry) (qs : List Query) : IO Unit := do
 85 |   printEntries <| ents.filter (·.matchQueries qs)
 86 | 
 87 | def printMatchingCitations (ents : List Entry) (qs : List Query) : IO Unit := do
 88 |   for e in ents do
 89 |     if e.matchQueries qs then IO.println e.toCitation
 90 | 
 91 | def main : List String → IO Unit
 92 |   | ["h"]           => printHelp
 93 |   | ["-h"]          => printHelp
 94 |   | ["--help"]      => printHelp
 95 |   | ["h", _]        => printHelp
 96 |   | ["-h", _]       => printHelp
 97 |   | ["--help", _]   => printHelp
 98 |   | ["d", fname]    => do
 99 |     IO.println s!"Reading {fname} to find doubled keys"
100 |     let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
101 |     match parsed with
102 |     | .success _pos res =>
103 |       let lst := listDoublons res
104 |       IO.println lst
105 |     | .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}"
106 |   | ["l", fname]    => do
107 |     let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
108 |     match parsed with
109 |     | .success _pos res => printEntries res
110 |     | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
111 |   | "q" :: (fname :: queries) => do
112 |     let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
113 |     match parsed with
114 |     | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString
115 |     | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
116 |   | "c" :: (fname :: queries) => do
117 |     let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
118 |     match parsed with
119 |     | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString
120 |     | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
121 |   | _            => do IO.eprintln "Invalid command-line arguments"; printHelp
122 | 


--------------------------------------------------------------------------------
/BibtexQuery/Name.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2024 Jz Pan. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Jz Pan
  5 | -/
  6 | 
  7 | import BibtexQuery.TexDiacritics
  8 | import UnicodeBasic
  9 | import Std.Internal.Parsec
 10 | import Std.Internal.Parsec.String
 11 | 
 12 | 
 13 | /-!
 14 | 
 15 | # Bibtex name processing
 16 | 
 17 | This file contains functions for bibtex name processing.
 18 | 
 19 | The main function is `processNames` which inputs an array of `TexContent`,
 20 | return an array of `BibtexName`.
 21 | 
 22 | -/
 23 | 
 24 | open Lean Unicode BibtexQuery.TexDiacritics
 25 | 
 26 | namespace BibtexQuery.Name
 27 | 
 28 | /-- Input an array of `TexContent`, split them by " " and ",". -/
 29 | def getNameAux (arr : Array TexContent) : Array (Array TexContent) :=
 30 |   arr.foldl (fun acc s =>
 31 |     match s with
 32 |     | .char ' ' =>
 33 |       acc.push #[]
 34 |     | .char ',' =>
 35 |       acc.push #[s] |>.push #[]
 36 |     | _ =>
 37 |       acc.modify (acc.size - 1) (Array.push · s)) #[#[]] |>.filter (not ·.isEmpty)
 38 | 
 39 | /-- Input a name string already split by " " and ",", return `(Firstname, Lastname)`.
 40 | The braces in the name are preserved. The logic is:
 41 | 
 42 | 1. If there is a "," in the array, then the items before the first "," are the last name,
 43 |    and the items after the first "," are the first name.
 44 | 2. Otherwise, if the last item begins with "{" and ends with "}", then it is the last name,
 45 |    the remaining items are the first name.
 46 | 3. Otherwise, if there is an item that begins with a lowercase letter, then the items before
 47 |    the first of such item are the first name, the remaining items are the last name.
 48 | 4. Otherwise, the last item is the last name, the remaining items are the first name.
 49 | -/
 50 | def getName (arr : Array (Array TexContent)) :
 51 |     Array (Array TexContent) × Array (Array TexContent) :=
 52 |   match arr.findIdx? (fun
 53 |     | #[.char ','] => true
 54 |     | _ => false) with
 55 |   | .some n =>
 56 |     (arr.toSubarray.drop (n + 1) |>.toArray, arr.toSubarray.take n |>.toArray)
 57 |   | .none =>
 58 |     let i := match arr.back? with
 59 |     | .some #[.braced _] => arr.size - 1
 60 |     | _ => arr.findIdx? (fun s => s.findSome? TexContent.getFirstChar
 61 |       |>.getD ' ' |> isLowercase) |>.getD (arr.size - 1)
 62 |     (arr.toSubarray.take i |>.toArray, arr.toSubarray.drop i |>.toArray)
 63 | 
 64 | /-- Input an array of `TexContent`, return an array of `(Firstname, Lastname)`.
 65 | The braces in the name are preserevd. -/
 66 | def getNames (arr : Array TexContent) :
 67 |     Array (Array (Array TexContent) × Array (Array TexContent)) :=
 68 |   let arr := getNameAux arr
 69 |   let arr2 : Array (Array (Array TexContent)) := arr.foldl (fun acc s =>
 70 |     match s with
 71 |     | #[.normal "and"] => acc.push #[]
 72 |     | _ => acc.modify (acc.size - 1) (Array.push · s)) #[#[]]
 73 |   arr2.filterMap fun x =>
 74 |     let ret := getName x
 75 |     if ret.1.isEmpty && ret.2.isEmpty then .none else .some ret
 76 | 
 77 | /-- Strip diacritics from a character. -/
 78 | def stripDiacritics (c : Char) : Char :=
 79 |   match c with
 80 |   | 'œ' => 'o' | 'Œ' => 'O'
 81 |   | 'æ' => 'a' | 'Æ' => 'A'
 82 |   | 'å' => 'a' | 'Å' => 'A'
 83 |   | 'ø' => 'o' | 'Ø' => 'O'
 84 |   | 'ł' => 'l' | 'Ł' => 'L'
 85 |   | 'ı' => 'i'
 86 |   | 'ȷ' => 'j'
 87 |   | '\u00DF' => 's' | '\u1E9E' => 'S'
 88 |   | _ =>
 89 |     let s := getCanonicalDecomposition c
 90 |     s.get? (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c
 91 | 
 92 | /-- Strip diacritics from a string. -/
 93 | def stripDiacriticsFromString (s : String) : String :=
 94 |   s.toList.toArray.map stripDiacritics |>.filter (not <| GeneralCategory.isMark ·)
 95 |     |>.toList |> String.mk
 96 | 
 97 | /-- Get the array of alphabets of a string after stripping diacritics. -/
 98 | def getAlphabets (s : String) : Array Char :=
 99 |   s.toList.toArray.map stripDiacritics |>.filter isAlphabetic
100 | 
101 | /-- Check if a string is an upper case Roman numerals.
102 | It does not check the validity of the number, for example, it accepts `IXIX`. -/
103 | def isUppercaseRomanNumerals (s : String) : Bool :=
104 |   not s.isEmpty && s.all fun c =>
105 |     match c with
106 |     | 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M' => true
107 |     | _ => false
108 | 
109 | /-- Input a last name string without TeX commands, braces
110 | and math equations, already split by spaces and comma,
111 | return `(oneLetterAbbr, threeLetterAbbr)` of the last name.
112 | Note that they are not necessarily of one-letter and
113 | three-letter; they are if the last name contains no spaces and with only one uppercase letter.
114 | The logic is:
115 | 
116 | First, if there are more than one items, remove all items which are upper case Roman numerals.
117 | For example, this removes "III" in "Gerth III".
118 | 
119 | 1. If the number of items is not exactly one, then both of the abbreviations are the concatenation
120 |    of the first alphabets of each item.
121 | 2. Otherwise, if there are exactly two uppercase alphabets and there is exactly one alphabet
122 |    between them, then both of the abbreviations are these three alphabets.
123 |    For example, `McCrimmon => McC`.
124 | 3. Otherwise, if there are at least two uppercase alphabets, then both of the abbreviations are
125 |    the concatenation of uppercase alphabets.
126 |    For example, `Heath-Brown => HB`.
127 | 4. Otherwise, the abbreviations are the first one and three alphabets of the last name,
128 |    respectively.
129 | -/
130 | def getLastNameAbbr (arr : Array String) : String × String :=
131 |   let arr := if arr.size ≤ 1 then arr else arr.filter (not <| isUppercaseRomanNumerals ·)
132 |   match arr with
133 |   | #[] => ("", "")
134 |   | #[s] =>
135 |     let s := getAlphabets s
136 |     let arr : Array Nat := s.zipIdx.filterMap fun x =>
137 |       if isUppercase x.1 then .some x.2 else .none
138 |     if arr.size = 2 ∧ arr[0]! + 2 = arr[1]! then
139 |       let s := s.toSubarray.drop arr[0]! |>.take 3 |>.toArray.toList |> String.mk
140 |       (s, s)
141 |     else if arr.size ≥ 2 then
142 |       let s := arr.map (s[·]!) |>.toList |> String.mk
143 |       (s, s)
144 |     else
145 |       let s := String.mk s.toList
146 |       (s.take 1, s.take 3)
147 |   | _ =>
148 |     let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.mk
149 |     (s, s)
150 | 
151 | /-- Represents the name of a person in bibtex author field. -/
152 | structure BibtexName where
153 |   /-- The first name. -/
154 |   firstName : Array (Array TexContent)
155 |   /-- The last name. -/
156 |   lastName : Array (Array TexContent)
157 |   /-- The first name without TeX commands, braces and diacritics,
158 |   all letters converted to uppercase. -/
159 |   firstNameWithoutDiacritics : String
160 |   /-- The last name without TeX commands, braces and diacritics,
161 |   all letters converted to uppercase. -/
162 |   lastNameWithoutDiacritics : String
163 |   /-- The one-letter abbreviation of the last name, which is used if a bibitem has at least
164 |   two authors. Note that this is not necessarily of one-letter;
165 |   it is if the last name contains no spaces and with only one uppercase letter. -/
166 |   oneLetterAbbr : String
167 |   /-- The three-letter abbreviation of the last name, which is used if a bibitem has exactly
168 |   one author. Note that this is not necessarily of three-letter;
169 |   it is if the last name contains no spaces and with only one uppercase letter. -/
170 |   threeLetterAbbr : String
171 | deriving Repr
172 | 
173 | namespace BibtexName
174 | 
175 | /-- Convert a `BibtexName` to `Firstname Lastname` with TeX commands and braces.
176 | This is not necessarily identical to the original input. -/
177 | def toString (x : BibtexName) : String :=
178 |   let g (arr : Array (Array TexContent)) : String :=
179 |     " ".intercalate (arr.map TexContent.toStringArray |>.toList)
180 |   g <| x.firstName ++ x.lastName
181 | 
182 | def toStringArray (arr : Array BibtexName) : String :=
183 |   " and ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList)
184 | 
185 | /-- Convert a `BibtexName` to `Firstname Lastname` without TeX commands and braces. -/
186 | def toPlaintext (x : BibtexName) : String :=
187 |   let g (arr : Array (Array TexContent)) : String :=
188 |     " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList)
189 |   g <| x.firstName ++ x.lastName
190 | 
191 | def toPlaintextArray (arr : Array BibtexName) : String :=
192 |   ", ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList)
193 | 
194 | /-- Convert a `BibtexName` to `Firstname Lastname` of HTML form. -/
195 | def toHtml (x : BibtexName) : Array Xml.Content :=
196 |   let g (arr : Array (Array TexContent)) : Array Xml.Content :=
197 |     arr.foldl (fun acc s =>
198 |       let t := TexContent.toHtmlArray s
199 |       acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character " "]) ++ t) #[]
200 |   g <| x.firstName ++ x.lastName
201 | 
202 | def toHtmlArray (arr : Array BibtexName) : Array Xml.Content :=
203 |   arr.foldl (fun acc s =>
204 |     let t := s.toHtml
205 |     acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character ", "]) ++ t) #[]
206 | 
207 | end BibtexName
208 | 
209 | /-- Process the first name and last name, produce abbreviations of the last name. -/
210 | def processName (s : Array (Array TexContent) × Array (Array TexContent)) : BibtexName :=
211 |   let g (arr : Array (Array TexContent)) : String :=
212 |     " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList)
213 |   let abbr := getLastNameAbbr <| s.2.map TexContent.toPlaintextArray
214 |   {
215 |     firstName := s.1
216 |     lastName := s.2
217 |     firstNameWithoutDiacritics := stripDiacriticsFromString (g s.1) |>.map getUpperChar
218 |     lastNameWithoutDiacritics := stripDiacriticsFromString (g s.2) |>.map getUpperChar
219 |     oneLetterAbbr := abbr.1
220 |     threeLetterAbbr := abbr.2
221 |   }
222 | 
223 | /-- Input an array of `TexContent`, return an array of `BibtexName`. -/
224 | def processNames (arr: Array TexContent) : Array BibtexName :=
225 |   arr |> getNames |>.map processName
226 | 
227 | end BibtexQuery.Name
228 | 


--------------------------------------------------------------------------------
/BibtexQuery/ParsecExtra.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Frédéric Dupuis
  5 | -/
  6 | 
  7 | import Std.Internal.Parsec
  8 | import Std.Internal.Parsec.Basic
  9 | import Std.Internal.Parsec.String
 10 | 
 11 | 
 12 | 
 13 | /-!
 14 | # Extra Parsec material
 15 | 
 16 | This file contains additional material for the Parsec library, some
 17 | of which is modelled after its Haskell counterpart.
 18 | -/
 19 | 
 20 | 
 21 | open Lean Std.Internal.Parsec.String Std.Internal.Parsec
 22 | 
 23 | namespace BibtexQuery.ParsecExtra
 24 | 
 25 | def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α :=
 26 |   match p s.iter with
 27 |   | .success _ x => some x
 28 |   | .error _ _ => none
 29 | 
 30 | def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) :=
 31 |   match p s.iter with
 32 |   | .success pos x => some ⟨x, pos.i⟩
 33 |   | .error _ _ => none
 34 | 
 35 | @[inline]
 36 | def noneOf (bad : String) : Parser Char := satisfy (fun z => ¬bad.contains z)
 37 | 
 38 | @[inline]
 39 | def noneOfStr (bad : String) : Parser String := manyChars (noneOf bad)
 40 | 
 41 | @[inline]
 42 | def eol : ByteArray.Parser String :=
 43 |   ByteArray.pstring "\n\r" <|> ByteArray.pstring "\r\n" <|> ByteArray.pstring "\n"
 44 | 
 45 | @[inline]
 46 | def maybeSkip (p : Parser α) : Parser Unit := (attempt (p *> pure ())) <|> pure ()
 47 | 
 48 | @[inline]
 49 | partial def manyCore' (p : Parser α) (acc : List α) : Parser (List α) :=
 50 |   (do manyCore' p (acc ++ [← p])) <|> pure acc
 51 | 
 52 | @[inline]
 53 | def many' (p : Parser α) : Parser (List α) := manyCore' p []
 54 | 
 55 | @[inline]
 56 | partial def manyStrCore (p : Parser String) (acc : String) : Parser String :=
 57 |   (do manyStrCore p (acc ++ (← p))) <|> pure acc
 58 | 
 59 | @[inline]
 60 | def manyStr (p : Parser String) : Parser String := manyStrCore p ""
 61 | 
 62 | @[inline]
 63 | partial def sepByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
 64 |     Parser (List α) :=
 65 |   attempt (do let _ ← psep; sepByCore pcont psep (acc ++ [← pcont])) <|> pure acc
 66 | 
 67 | @[inline]
 68 | def sepBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
 69 |   (do sepByCore pcont psep [← pcont]) <|> pure []
 70 | 
 71 | @[inline]
 72 | def sepOrEndBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
 73 |   (do let output ← sepByCore pcont psep [← pcont]; maybeSkip psep; return output) <|> pure []
 74 | 
 75 | @[inline]
 76 | partial def endByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
 77 |     Parser (List α) :=
 78 |   attempt (do let x ← pcont; let _ ← psep; endByCore pcont psep (acc ++ [x])) <|> pure acc
 79 | 
 80 | @[inline]
 81 | def endBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
 82 |   (do endByCore pcont psep []) <|> pure []
 83 | 
 84 | @[inline]
 85 | def alphaNum : Parser Char := attempt do
 86 |   let c ← any
 87 |   if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') ∨ ('0' ≤ c ∧ c ≤ '9') then
 88 |     return c
 89 |   else fail s!"ASCII alphanumeric character expected"
 90 | 
 91 | @[inline]
 92 | def asciiLetterToLower : Parser Char := return (← asciiLetter).toLower
 93 | 
 94 | @[inline]
 95 | def alphaNumToLower : Parser Char := return (← alphaNum).toLower
 96 | 
 97 | @[inline]
 98 | def asciiWordToLower : Parser String := manyChars asciiLetterToLower
 99 | 
100 | @[inline]
101 | def between (op : Parser α) (cl : Parser α) (mid : Parser β) : Parser β := attempt do
102 |   let _ ← op
103 |   let s ← mid
104 |   let _ ← cl
105 |   return s
106 | 
107 | @[inline]
108 | def natNum : Parser Nat := attempt do
109 |   let some n := (← manyChars digit).toNat? | fail "Not a natural number"
110 |   return n
111 | 
112 | def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it =>
113 |   let out :=
114 |     it.foldUntil "" fun acc c =>
115 |       let prev : Option Char := if acc == "" then none else acc.back
116 |       if test prev c then none else some (acc ++ c.toString)
117 |   .success out.2 out.1
118 | 
119 | end BibtexQuery.ParsecExtra
120 | 


--------------------------------------------------------------------------------
/BibtexQuery/Parser.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Frédéric Dupuis
  5 | -/
  6 | 
  7 | import BibtexQuery.ParsecExtra
  8 | import BibtexQuery.Entry
  9 | import Std.Internal.Parsec
 10 | import Std.Internal.Parsec.String
 11 | 
 12 | /-!
 13 | # Bibtex Parser
 14 | 
 15 | This file contains a parser for the Bibtex format. Note that currently, only a subset of the official
 16 | Bibtex format is supported; features such as predefined strings and concatenation using `#` are not
 17 | supported.
 18 | -/
 19 | 
 20 | open Lean Std.Internal.Parsec Std.Internal.Parsec.String BibtexQuery.ParsecExtra
 21 | 
 22 | namespace BibtexQuery.Parser
 23 | 
 24 | /-- The name of the bibtex entry (i.e. what goes in the cite command). -/
 25 | def name : Parser String := attempt do
 26 |   let firstChar ← asciiLetter
 27 |   let remainder ← manyChars <| (alphaNum <|> pchar ':' <|> pchar '-' <|> pchar '_')
 28 |   return firstChar.toString ++ remainder
 29 | 
 30 | /-- "article", "book", etc -/
 31 | def category : Parser String := attempt do skipChar '@'; asciiWordToLower
 32 | 
 33 | partial def bracedContentTail (acc : String) : Parser String := attempt do
 34 |   let c ← any
 35 |   if c = '{' then
 36 |     let s ← bracedContentTail ""
 37 |     bracedContentTail (acc ++ "{" ++ s)
 38 |   else
 39 |     if c = '}' then return acc ++ "}"
 40 |     else
 41 |       bracedContentTail (acc ++ c.toString)
 42 | 
 43 | def bracedContent : Parser String := attempt do
 44 |   skipChar '{'
 45 |   let s ← bracedContentTail ""
 46 |   return s.dropRight 1
 47 | 
 48 | def quotedContent : Parser String := attempt do
 49 |   skipChar '"'
 50 |   let s ← manyCharsUntilWithPrev fun | (some '\\'), '"' => false | _, '"' => true | _, _ => false
 51 |   skipChar '"'
 52 |   return (s.replace "\n" "").replace "\r" ""
 53 | 
 54 | def month : Parser String := attempt do
 55 |   let s ← asciiWordToLower
 56 |   match s with
 57 |   | "jan" => return s
 58 |   | "feb" => return s
 59 |   | "mar" => return s
 60 |   | "apr" => return s
 61 |   | "may" => return s
 62 |   | "jun" => return s
 63 |   | "jul" => return s
 64 |   | "aug" => return s
 65 |   | "sep" => return s
 66 |   | "oct" => return s
 67 |   | "nov" => return s
 68 |   | "dec" => return s
 69 |   | _     => fail "Not a valid month"
 70 | 
 71 | /-- The content field of a tag. -/
 72 | def tagContent : Parser String := attempt do
 73 |   let c ← peek!
 74 |   if c.isDigit then manyChars digit else
 75 |     if c.isAlpha then month else
 76 |       match c with
 77 |       | '"' => quotedContent
 78 |       | '{' => bracedContent
 79 |       | _   => fail "Tag content expected"
 80 | 
 81 | /-- i.e. journal = {Journal of Musical Deontology} -/
 82 | def tag : Parser Tag := attempt do
 83 |   let tagName ← manyChars (alphaNumToLower <|> pchar '_' <|> pchar '-')
 84 |   ws; skipChar '='; ws
 85 |   let tagContent ← tagContent
 86 |   return { name := tagName, content := tagContent }
 87 | 
 88 | def outsideEntry : Parser Unit := attempt do
 89 |   let _ ← manyChars <| noneOf "@"
 90 | 
 91 | /-- A Bibtex entry. TODO deal with "preamble" etc. -/
 92 | def entry : Parser Entry := attempt do
 93 |   outsideEntry
 94 |   let typeOfEntry ← category
 95 |   ws; skipChar '{'; ws
 96 |   let nom ← name
 97 |   skipChar ','; ws
 98 |   let t : List Tag ← sepOrEndBy tag (do ws; skipChar ','; ws)
 99 |   ws; skipChar '}'; ws
100 |   return Entry.normalType typeOfEntry nom t
101 | 
102 | def bibtexFile : Parser (List Entry) := many' entry
103 | 
104 | --#eval "auTHOr23:z  ".parseDebug name
105 | --#eval "auTHOr23:z".parseDebug name
106 | --#eval "@ARTICLE ".parseDebug category
107 | --#eval "@ARtiCLE".parseDebug category
108 | --#eval "auTHOr =   \n{Dès Noël où un zéphyr haï\n me vêt de glaçons würmiens, je dîne d'exquis rôtis de bœuf au kir à l'aÿ d'âge mûr}".parseDebug tag
109 | --#eval "auTHOr = \"Test\"".parseDebug tag
110 | --#eval "journal = {Journal of Musical\n Deontology}".parseDebug tag
111 | --#eval "year = 2022".parseDebug tag
112 | --#eval "Bdsk-Url-1 = {https://doi.org/10.1007/s00220-020-03839-5}".parseDebug tag
113 | --#eval "year = 2022,\n author = {Frédéric Dupuis},".parseDebug (sepOrEndBy tag (do ws; skipChar ','; ws))
114 | --#eval "@article{bla23,\n year = 2022,\n author = {Frédéric Dupuis}\n}\n".parseDebug entry
115 | --#eval "\"Bachem, Achim and Korte, Bernhard and Gr{\\\"o}tschel\"".parseDebug quotedContent
116 | --#eval "@article{bla23,\n year = 2022,\n author = \"Bachem, Achim and Korte, Bernhard and Gr{\"o}tschel\"\n}\n".parseDebug entry
117 | 
118 | end BibtexQuery.Parser
119 | 


--------------------------------------------------------------------------------
/BibtexQuery/Query.lean:
--------------------------------------------------------------------------------
 1 | /-
 2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
 3 | Released under Apache 2.0 license as described in the file LICENSE.
 4 | Author: Frédéric Dupuis
 5 | -/
 6 | 
 7 | import BibtexQuery.String
 8 | import BibtexQuery.Entry
 9 | 
10 | /-!
11 | # Bibtex Queries
12 | 
13 | This file deals with bibtex queries of the limited form handled by the program.
14 | -/
15 | 
16 | namespace BibtexQuery
17 | 
18 | inductive Query where
19 | | key (s : String)
20 | | author (s : String)
21 | | title (s : String)
22 | | word (s : String)
23 | | class (s : String)
24 | deriving Repr, Inhabited
25 | 
26 | def Query.ofString (s : String) : Option Query :=
27 |   if s.startsWith "k." then some <| .key <| s.drop 2
28 |   else if s.startsWith "a." then some <| .author <| s.drop 2
29 |   else if s.startsWith "t." then some <| .title <| s.drop 2
30 |   else if s.startsWith "w." then some <| .word <| s.drop 2
31 |   else if s.startsWith "c." then some <| .class <| s.drop 2
32 |   else none
33 | 
34 | def Entry.matchQuery (e : Entry) (q : Query) : Bool :=
35 |   match q with
36 |   | .key s => e.getKey.flattenWords.containsSubstr s
37 |   | .author s => e.getAuthors.toFullNames.containsSubstr s
38 |   | .title s => e.getTitle.flattenWords.containsSubstr s
39 |   | .word s => e.getKeywords.flattenWords.containsSubstr s
40 |   | .class s => e.getClass.flattenWords.containsSubstr s
41 | 
42 | def Entry.matchQueries (e : Entry) (lq : List Query) : Bool :=
43 |   lq.foldl (fun acc q => acc && e.matchQuery q) true
44 | 
45 | end BibtexQuery
46 | 


--------------------------------------------------------------------------------
/BibtexQuery/String.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Frédéric Dupuis
  5 | -/
  6 | 
  7 | /-!
  8 | # String processing
  9 | 
 10 | This file contains various string processing functions.
 11 | -/
 12 | 
 13 | /-- Get the line number of the current position of the iterator. -/
 14 | def String.Iterator.lineNumber (it : String.Iterator) : Nat :=
 15 |   let s : Substring := ⟨it.toString, 0, it.pos⟩
 16 |   s.foldl (fun n c => if c = '\n' then n+1 else n) 1
 17 | 
 18 | /-- Strip diacritics from a string. -/
 19 | def Char.asciify : Char → Char
 20 | | 'á' => 'a'
 21 | | 'à' => 'a'
 22 | | 'â' => 'a'
 23 | | 'ä' => 'a'
 24 | | 'æ' => 'a'
 25 | | 'ǎ' => 'a'
 26 | | 'ã' => 'a'
 27 | | 'å' => 'a'
 28 | | 'ą' => 'a'
 29 | | 'Á' => 'A'
 30 | | 'À' => 'A'
 31 | | 'Â' => 'A'
 32 | | 'Ä' => 'A'
 33 | | 'Æ' => 'A'
 34 | | 'Ǎ' => 'A'
 35 | | 'Ã' => 'A'
 36 | | 'Å' => 'A'
 37 | | 'Ą' => 'A'
 38 | | 'č' => 'c'
 39 | | 'ç' => 'c'
 40 | | 'ć' => 'c'
 41 | | 'Č' => 'C'
 42 | | 'Ç' => 'C'
 43 | | 'Ć' => 'C'
 44 | | 'ď' => 'd'
 45 | | 'Ď' => 'D'
 46 | | 'é' => 'e'
 47 | | 'è' => 'e'
 48 | | 'ê' => 'e'
 49 | | 'ë' => 'e'
 50 | | 'ę' => 'e'
 51 | | 'ě' => 'e'
 52 | | 'É' => 'E'
 53 | | 'È' => 'E'
 54 | | 'Ê' => 'E'
 55 | | 'Ë' => 'E'
 56 | | 'Ę' => 'E'
 57 | | 'ğ' => 'g'
 58 | | 'Ğ' => 'G'
 59 | | 'í' => 'i'
 60 | | 'ì' => 'i'
 61 | | 'î' => 'i'
 62 | | 'ï' => 'i'
 63 | | 'ı' => 'i'
 64 | | 'Í' => 'I'
 65 | | 'Ì' => 'I'
 66 | | 'Î' => 'I'
 67 | | 'Ï' => 'I'
 68 | | 'İ' => 'I'
 69 | | 'ł' => 'l'
 70 | | 'ľ' => 'l'
 71 | | 'Ł' => 'L'
 72 | | 'Ľ' => 'L'
 73 | | 'ñ' => 'n'
 74 | | 'ň' => 'n'
 75 | | 'ń' => 'n'
 76 | | 'Ñ' => 'N'
 77 | | 'Ň' => 'N'
 78 | | 'Ń' => 'N'
 79 | | 'ó' => 'o'
 80 | | 'ò' => 'o'
 81 | | 'ô' => 'o'
 82 | | 'ö' => 'o'
 83 | | 'õ' => 'o'
 84 | | 'ø' => 'o'
 85 | | 'œ' => 'o'
 86 | | 'Ó' => 'O'
 87 | | 'Ò' => 'O'
 88 | | 'Ô' => 'O'
 89 | | 'Ö' => 'O'
 90 | | 'Õ' => 'O'
 91 | | 'Ø' => 'O'
 92 | | 'Œ' => 'O'
 93 | | 'ř' => 'r'
 94 | | 'Ř' => 'R'
 95 | | 'š' => 's'
 96 | | 'ś' => 's'
 97 | | 'ş' => 's'
 98 | | 'Š' => 'S'
 99 | | 'Ś' => 'S'
100 | | 'Ş' => 'S'
101 | | 'ť' => 't'
102 | | 'Ť' => 'T'
103 | | 'ú' => 'u'
104 | | 'ù' => 'u'
105 | | 'û' => 'u'
106 | | 'ü' => 'u'
107 | | 'ů' => 'u'
108 | | 'Ú' => 'U'
109 | | 'Ù' => 'U'
110 | | 'Û' => 'U'
111 | | 'Ü' => 'U'
112 | | 'Ů' => 'U'
113 | | 'ý' => 'y'
114 | | 'ÿ' => 'y'
115 | | 'Ý' => 'Y'
116 | | 'Ÿ' => 'Y'
117 | | 'ž' => 'z'
118 | | 'Ž' => 'Z'
119 | | 'ß' => 's'
120 | | c   => c
121 | 
122 | def String.asciify (s : String) : String := s.map Char.asciify
123 | 
124 | --#eval "Dès Noël où un zéphyr haï me vêt de glaçons würmiens, je dîne d'exquis rôtis de 
125 | --bœuf au kir à l'aÿ d'âge mûr & cætera".asciify
126 | 
127 | --#eval "Testfile aisdfjoai".foldl (fun s c => s ++ "A") ""
128 | --#eval '{'.asciify.toLower
129 | 
130 | def String.flattenWords (s : String) : String := s.foldl 
131 |   (fun s c => s ++ (if c.asciify.toLower.isAlphanum then c.asciify.toLower.toString else "")) ""
132 | 
133 | --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords
134 | 
135 | def String.splitIntoNames (s : String) : List String :=
136 |   (s.splitOn (sep := " and ")).map trim
137 | 
138 | def String.toLastName (s : String) : String :=
139 |   let s' := (s.split (fun c => c = ',')).map trim
140 |   match s' with
141 |   | [s₁] => s₁
142 |   | (s₁ :: _) => s₁
143 |   | _ => ""
144 | 
145 | def String.toLastNames (s : String) : String :=
146 |   String.intercalate " " $ s.splitIntoNames.map String.toLastName
147 | 
148 | /-- Standardize to "Firstname Lastname" -/
149 | def String.toFirstnameLastname (s : String) : String :=
150 |   let s' := (s.split (fun c => c = ',')).map trim
151 |   match s' with
152 |   | [s₁] => s₁
153 |   | [s₁, s₂] => s₂ ++ " " ++ s₁
154 |   | _ => ""
155 | 
156 | def String.toFullNames (s : String) : String :=
157 | String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords
158 | 
159 | partial def Substring.containsSubstrStartingAt (s : Substring) (q : String) : Bool :=
160 |   if s.toString.length = 0 then q.length = 0
161 |   else if q.isPrefixOf s.toString then true
162 |   else (s.drop 1).containsSubstrStartingAt q
163 |   
164 | def String.containsSubstr (s : String) (q : String) : Bool :=
165 |   s.toSubstring.containsSubstrStartingAt q
166 | 
167 | def String.pad (s : String) (c : Char) (n : Nat) : String :=
168 |   (s ++ ⟨List.replicate n c⟩).take n
169 | 


--------------------------------------------------------------------------------
/BibtexQuery/TexDiacritics.lean:
--------------------------------------------------------------------------------
  1 | /-
  2 | Copyright (c) 2024 Jz Pan. All rights reserved.
  3 | Released under Apache 2.0 license as described in the file LICENSE.
  4 | Author: Jz Pan
  5 | -/
  6 | 
  7 | import Std.Internal.Parsec
  8 | import Std.Internal.Parsec.String
  9 | import Lean.Data.Xml.Basic
 10 | import UnicodeBasic
 11 | 
 12 | /-!
 13 | 
 14 | # TeX diacritics
 15 | 
 16 | This file contains functions for TeX diacritics processing.
 17 | The main functions are `texContent` and `texContents`, which
 18 | will convert all TeX commands for diacritics into UTF-8 characters,
 19 | and preserve other TeX commands.
 20 | 
 21 | -/
 22 | 
 23 | open Lean Xml Std.Internal.Parsec Std.Internal.Parsec.String Unicode
 24 | 
 25 | namespace BibtexQuery.TexDiacritics
 26 | 
 27 | /-- Represents a segment of TeX content of in bibitem. -/
 28 | inductive TexContent
 29 | /-- Represents a non-empty normal string. -/
 30 | | normal (s : String) : TexContent
 31 | /-- Represents some special characters. When output into HTML, some of them (e.g. `\`, `$`)
 32 | will be put into `<span>` to prevent MathJax from recognizing them.
 33 | Some of them (e.g. ` `, `,`) have special meaning in bibitem name processing. -/
 34 | | char (c : Char) : TexContent
 35 | /-- Represents a TeX command. It always starts with `\`. It may have trailing spaces. -/
 36 | | command (s : String) : TexContent
 37 | /-- Represents a math environment. -/
 38 | | math (dollar s : String) : TexContent
 39 | /-- Represents contents inside `{ ... }`. -/
 40 | | braced (arr : Array TexContent) : TexContent
 41 | deriving Repr
 42 | 
 43 | namespace TexContent
 44 | 
 45 | /-- Add a diacritics character after the first character of the content. -/
 46 | partial def addDiacritics (x : TexContent) (ch : String) :
 47 |     Except String TexContent := do
 48 |   match x with
 49 |   | .normal s =>
 50 |     if s.isEmpty then
 51 |       throw "expected a non-empty normal string, but got ''"
 52 |     else if GeneralCategory.isLetter s.front then
 53 |       return .normal <| s.take 1 ++ ch ++ s.drop 1
 54 |     else
 55 |       throw s!"diacritics character can only be added after a letter, but got '{s.front}'"
 56 |   | .char c => throw s!"expected a non-empty normal string, but got '{c}'"
 57 |   | .command _ => throw "expected a non-empty normal string, but got a TeX command"
 58 |   | .math _ _ => throw "expected a non-empty normal string, but got a math environment"
 59 |   | .braced arr =>
 60 |     if h : 0 < arr.size then
 61 |       return .braced <| #[← arr[0].addDiacritics ch] ++ (arr.toSubarray.drop 1 |>.toArray)
 62 |     else
 63 |       throw "expected a non-empty normal string, but got '{}'"
 64 | 
 65 | mutual
 66 | 
 67 | /-- Convert a TeX content to its original string.
 68 | This is not necessarily identical to the original input. -/
 69 | partial def toString (x : TexContent) : String :=
 70 |   match x with
 71 |   | .normal s => s
 72 |   | .char c => c.toString
 73 |   | .command s => s
 74 |   | .math dollar s => dollar ++ s ++ dollar
 75 |   | .braced arr => "{" ++ toStringArray arr ++ "}"
 76 | 
 77 | partial def toStringArray (arr : Array TexContent) : String :=
 78 |   arr.map toString |>.toList |> String.join
 79 | 
 80 | end
 81 | 
 82 | mutual
 83 | 
 84 | /-- Convert a TeX content to plaintext, discarding TeX commands and braces. -/
 85 | partial def toPlaintext (x : TexContent) : String :=
 86 |   match x with
 87 |   | .normal s => s
 88 |   | .char c => c.toString
 89 |   | .command _ => ""
 90 |   | .math dollar s => dollar ++ s ++ dollar
 91 |   | .braced arr => toPlaintextArray arr
 92 | 
 93 | partial def toPlaintextArray (arr : Array TexContent) : String :=
 94 |   arr.map toPlaintext |>.toList |> String.join
 95 | 
 96 | end
 97 | 
 98 | mutual
 99 | 
100 | /-- Get the first character of the plaintext of a TeX content. -/
101 | partial def getFirstChar (x : TexContent) : Option Char :=
102 |   match x with
103 |   | .normal s => s.get? 0
104 |   | .char c => c
105 |   | .command _ => .none
106 |   | .math dollar _ => dollar.get? 0
107 |   | .braced arr => getFirstCharArray arr
108 | 
109 | partial def getFirstCharArray (arr : Array TexContent) : Option Char :=
110 |   arr.findSome? getFirstChar
111 | 
112 | end
113 | 
114 | mutual
115 | 
116 | /-- Get the last character of the plaintext of a TeX content. -/
117 | partial def getLastChar (x : TexContent) : Option Char :=
118 |   match x with
119 |   | .normal s => if s.isEmpty then .none else s.back
120 |   | .char c => c
121 |   | .command _ => .none
122 |   | .math dollar _ => dollar.get? 0
123 |   | .braced arr => getLastCharArray arr
124 | 
125 | partial def getLastCharArray (arr : Array TexContent) : Option Char :=
126 |   arr.findSomeRev? getLastChar
127 | 
128 | end
129 | 
130 | mutual
131 | 
132 | /-- Convert a TeX content to HTML, represented by an array of `Lean.Xml.Content`.
133 | A few TeX commands can be converted to corresponding HTML. -/
134 | partial def toHtml (x : TexContent) : Array Content :=
135 |   match x with
136 |   | .normal s => #[.Character s]
137 |   | .char c =>
138 |     let ret : Content := match c with
139 |     | '\\' | '$' => .Element ⟨ "span", RBMap.empty, #[.Character c.toString] ⟩
140 |     | _ => .Character c.toString
141 |     #[ret]
142 |   | .command cmd =>
143 |     let ret : Content := match cmd.trim with
144 |     | "\\\\" => .Element ⟨ "br", RBMap.empty, #[] ⟩
145 |     | _ => .Element ⟨ "span", RBMap.empty.insert "style" "color:red;", #[.Character cmd] ⟩
146 |     #[ret]
147 |   | .math dollar s => #[.Character (dollar ++ s ++ dollar)]
148 |   | .braced arr => toHtmlArray arr
149 | 
150 | partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0)
151 |     (ret : Array Content := #[]) : Array Content :=
152 |   if h : i < arr.size then
153 |     if h' : i + 1 < arr.size then
154 |       if let .command cmd := arr[i] then
155 |         match cmd.trim with
156 |         | "\\url" =>
157 |           let next := arr[i + 1]
158 |           let x : Content := .Element ⟨ "a", RBMap.empty.insert "href"
159 |             next.toPlaintext, next.toHtml ⟩
160 |           toHtmlArray arr (i + 2) (ret ++ #[x])
161 |         | "\\textrm" =>
162 |           let next := arr[i + 1]
163 |           let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
164 |             "font-style: normal; font-weight: normal", next.toHtml ⟩
165 |           toHtmlArray arr (i + 2) (ret ++ #[x])
166 |         | "\\textbf" =>
167 |           let next := arr[i + 1]
168 |           let x : Content := .Element ⟨ "b", RBMap.empty, next.toHtml ⟩
169 |           toHtmlArray arr (i + 2) (ret ++ #[x])
170 |         | "\\textit" =>
171 |           let next := arr[i + 1]
172 |           let x : Content := .Element ⟨ "i", RBMap.empty, next.toHtml ⟩
173 |           toHtmlArray arr (i + 2) (ret ++ #[x])
174 |         | "\\emph" =>
175 |           let next := arr[i + 1]
176 |           let x : Content := .Element ⟨ "em", RBMap.empty, next.toHtml ⟩
177 |           toHtmlArray arr (i + 2) (ret ++ #[x])
178 |         | "\\texttt" =>
179 |           let next := arr[i + 1]
180 |           let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
181 |             "font-family: monospace", next.toHtml ⟩
182 |           toHtmlArray arr (i + 2) (ret ++ #[x])
183 |         | "\\textsc" =>
184 |           let next := arr[i + 1]
185 |           let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
186 |             "font-variant: small-caps", next.toHtml ⟩
187 |           toHtmlArray arr (i + 2) (ret ++ #[x])
188 |         | _ => toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
189 |       else
190 |         toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
191 |     else
192 |        toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
193 |   else
194 |     ret
195 | 
196 | end
197 | 
198 | end TexContent
199 | 
200 | /-- Match a sequence of space characters and return it. -/
201 | def ws' : Parser String := manyChars <| satisfy fun
202 |   | ' ' | '\t' | '\r' | '\n' => true
203 |   | _ => false
204 | 
205 | /-- Replace certain sequences (e.g. "--") by their UTF-8 representations. -/
206 | def replaceChars (s : String) : String :=
207 |   let arr : Array (String × String) := #[
208 |     ("---", "\u2014"),
209 |     ("--", "\u2013"),
210 |     ("~", "\u00A0"),
211 |     ("?`", "\u00BF"),
212 |     ("!`", "\u00A1")
213 |   ]
214 |   arr.foldl (fun acc (o, r) => acc.replace o r) s
215 | 
216 | /-- Match a TeX command starting with `\`, potentially with trailing whitespaces. -/
217 | def texCommand : Parser String := pchar '\\' *> attempt do
218 |   let s ← manyChars asciiLetter
219 |   if s.isEmpty then
220 |     -- some commands preserve trailing whitespaces
221 |     let c ← any
222 |     match c with
223 |     | '&' | '#' | '{' | '}' | '$' | '_' => return "\\" ++ toString c
224 |     | _ => return "\\" ++ toString c ++ (← ws')
225 |   else if let .some '*' ← peek? then
226 |     skip
227 |     return "\\" ++ s ++ "*" ++ (← ws')
228 |   else
229 |     return "\\" ++ s ++ (← ws')
230 | 
231 | /-- Similar to `texCommand` but it excludes some commands. -/
232 | def texCommand' (exclude : Array String) : Parser String := attempt do
233 |   let s ← texCommand
234 |   match exclude.find? (· == s.trim) with
235 |   | .some _ => fail s!"'{s.trim}' is not allowed"
236 |   | .none => return s
237 | 
238 | /-- Match a sequence starting with `{` and ending with `}`. -/
239 | def bracedContent (p : Parser String) : Parser String :=
240 |   pchar '{' *> (("{" ++ · ++ "}") <$> p) <* pchar '}'
241 | 
242 | partial def manyOptions {α} (p : Parser (Option α)) (acc : Array α := #[]) :
243 |     Parser (Array α) := fun it =>
244 |   match p it with
245 |   | .success it ret =>
246 |     match ret with
247 |     | .some ret => manyOptions p (acc.push ret) it
248 |     | .none => .success it acc
249 |   | .error it err => .error it err
250 | 
251 | partial def mathContentAux : Parser String := do
252 |   let normalChars : Parser String := many1Chars <| satisfy fun
253 |     | '\\' | '$' | '{' | '}' => false
254 |     | _ => true
255 |   let doOne : Parser (Option String) := fun it =>
256 |     if it.hasNext then
257 |       match it.curr with
258 |       | '{' => (.some <$> bracedContent mathContentAux) it
259 |       | '\\' =>
260 |         match texCommand' #["\\(", "\\)", "\\[", "\\]"] it with
261 |         | .success it ret => .success it (.some ret)
262 |         | .error _ _ => .success it .none
263 |       | '}' | '$' => .success it .none
264 |       | _ => (.some <$> normalChars) it
265 |     else
266 |       .success it .none
267 |   return String.join (← manyOptions doOne).toList
268 | 
269 | /-- Match a math content. Returns `Option.none` if it does not start with `\(`, `\[` or `$`. -/
270 | def mathContent : Parser (Option TexContent) := fun it =>
271 |   let aux (beginning ending : String) : Parser String :=
272 |     pstring beginning *> mathContentAux <* pstring ending
273 |   let substr := it.extract (it.forward 2)
274 |   if substr = "\\[" then
275 |     ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it
276 |   else if substr = "\\(" then
277 |     ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it
278 |   else if substr = "$$" then
279 |     ((.some <| .math "$$" ·) <$> aux "$$" "$$") it
280 |   else if it.curr = '$' then
281 |     ((.some <| .math "$" ·) <$> aux "$" "$") it
282 |   else
283 |     .success it .none
284 | 
285 | partial def rawContentAux : Parser String := do
286 |   let normalChars : Parser String := many1Chars <| satisfy fun
287 |     | '\\' | '{' | '}' => false
288 |     | _ => true
289 |   let doOne : Parser (Option String) := fun it =>
290 |     if it.hasNext then
291 |       match it.curr with
292 |       | '{' => (.some <$> bracedContent rawContentAux) it
293 |       | '\\' => (.some <$> texCommand) it
294 |       | '}' => .success it .none
295 |       | _ => (.some <$> normalChars) it
296 |     else
297 |       .success it .none
298 |   return String.join (← manyOptions doOne).toList
299 | 
300 | /-- Match a TeX command for diacritics, return the processed TeX contents.
301 | Sometimes it needs to read the contents after the command, in this case the `p` is used. -/
302 | def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do
303 |   let cmd ← texCommand
304 |   -- some special commands
305 |   if cmd.trim = "\\url" then
306 |     let s ← pchar '{' *> rawContentAux <* pchar '}'
307 |     return .some <| .braced #[.command cmd, .braced <| #[.normal s]]
308 |   -- some special characters need to put into `<span>`
309 |   let c : Char := match cmd.trim with
310 |   | "\\$" => '$' | "\\textbackslash" => '\\'
311 |   | _ => ' '
312 |   if c ≠ ' ' then return .some <| .char c
313 |   -- some other characters
314 |   let s : String := match cmd.trim with
315 |   | "\\oe" => "œ" | "\\OE" => "Œ"
316 |   | "\\ae" => "æ" | "\\AE" => "Æ"
317 |   | "\\aa" => "å" | "\\AA" => "Å"
318 |   | "\\o" => "ø" | "\\O" => "Ø"
319 |   | "\\l" => "ł" | "\\L" => "Ł"
320 |   | "\\i" => "ı" | "\\j" => "ȷ"
321 |   | "\\ss" => "\u00DF" | "\\SS" => "\u1E9E"
322 |   | "\\cprime" => "\u02B9"
323 |   | "\\&" => "&" | "\\#" => "#"
324 |   | "\\{" => "{" | "\\}" => "}"
325 |   | "\\_" => "_"
326 |   | "\\" => "\u00A0" -- This should be "\ " but the space is trimmed
327 |   | _ => ""
328 |   if not s.isEmpty then return .some <| .normal s
329 |   -- diacritics characters
330 |   let s : String := match cmd.trim with
331 |   | "\\`" => "\u0300" | "\\'" => "\u0301"
332 |   | "\\^" => "\u0302" | "\\\"" => "\u0308"
333 |   | "\\~" => "\u0303" | "\\=" => "\u0304"
334 |   | "\\." => "\u0307" | "\\u" => "\u0306"
335 |   | "\\v" => "\u030C" | "\\H" => "\u030B"
336 |   | "\\t" => "\u0361" | "\\c" => "\u0327"
337 |   | "\\d" => "\u0323" | "\\b" => "\u0331"
338 |   | "\\k" => "\u0328"
339 |   | _ => ""
340 |   if s.isEmpty then return .some <| .command cmd
341 |   match ← p with
342 |   | .some next =>
343 |     match next.addDiacritics s with
344 |     | .ok ret => return .some ret
345 |     | .error err => fail err
346 |   | .none => fail "expected a non-empty normal string"
347 | 
348 | /-- Match a segment of TeX content.
349 | The TeX commands for diacritics will be converted into UTF-8 characters.
350 | Other TeX commands are preserved.
351 | Returns `Option.none` if it can't match any and there are no errors. -/
352 | partial def texContent : Parser (Option TexContent) := fun it =>
353 |   let normalChars' : Parser String := many1Chars <| satisfy fun
354 |     | '\\' | '$' | '{' | '}' | ' ' | '\t' | '\r' | '\n' | ',' => false
355 |     | _ => true
356 |   match mathContent it with
357 |   | .success it ret =>
358 |     match ret with
359 |     | .some ret => .success it (.some ret)
360 |     | .none =>
361 |       if it.hasNext then
362 |         match it.curr with
363 |         | ' ' | '\t' | '\r' | '\n' => ((fun _ => .some (.char ' ')) <$> ws) it
364 |         | ',' => .success it.next <| .some <| .char it.curr
365 |         | '\\' => texDiacriticsCommand texContent it
366 |         | '{' => ((.some <| .braced ·) <$> (pchar '{' *> manyOptions texContent <* pchar '}')) it
367 |         | '}' => .success it .none
368 |         | _ => ((.some <| .normal <| replaceChars ·) <$> normalChars') it
369 |       else
370 |         .success it .none
371 |   | .error it err => .error it err
372 | 
373 | /-- Match a sequence of TeX contents. -/
374 | def texContents : Parser (Array TexContent) := manyOptions texContent
375 | 
376 | end BibtexQuery.TexDiacritics
377 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BibtexQuery: a simple command-line bibtex query utility
 2 | 
 3 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string
 4 | of the form ``q.querystring``, where ``q`` is either ``a`` for author, ``t`` for title, ``k`` for key, ``c`` for class (i.e. book, article, etc), or ``w`` for keywords, and ``querystring``
 5 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the
 6 | queries given as command-line parameters. Note that the entries are processed in such a way that strips diacritics,
 7 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to
 8 | ``firstnamelastname``. Hence, for example, ``Dupuis, Frédéric`` will match the query ``a.ericdup``.
 9 | 
10 | Note that currently, only a subset of the official Bibtex format is supported; features such as predefined strings and concatenation using ``#`` are not supported. It is unclear whether this
11 | will ever be supported in the future; I only wrote this to learn how to do "normal programming" in [Lean 4](https://github.com/leanprover/lean4/), and I don't personally use these Bibtex features.
12 | 
13 | ## Installation instructions
14 | 
15 | Make sure you have a working Lean 4 installation (see [here](https://leanprover.github.io/lean4/doc/quickstart.html) for how to do that), and then type `lake build` in the main directory. The
16 | executable should appear in ``.lake/build/bin``.
17 | 


--------------------------------------------------------------------------------
/lake-manifest.json:
--------------------------------------------------------------------------------
 1 | {"version": "1.1.0",
 2 |  "packagesDir": ".lake/packages",
 3 |  "packages":
 4 |  [{"url": "https://github.com/jcommelin/lean4-unicode-basic",
 5 |    "type": "git",
 6 |    "subDir": null,
 7 |    "scope": "",
 8 |    "rev": "458e2d3feda3999490987eabee57b8bb88b1949c",
 9 |    "name": "UnicodeBasic",
10 |    "manifestFile": "lake-manifest.json",
11 |    "inputRev": "bump_to_v4.18.0-rc1",
12 |    "inherited": false,
13 |    "configFile": "lakefile.lean"}],
14 |  "name": "BibtexQuery",
15 |  "lakeDir": ".lake"}
16 | 


--------------------------------------------------------------------------------
/lakefile.toml:
--------------------------------------------------------------------------------
 1 | name = "BibtexQuery"
 2 | defaultTargets = ["BibtexQuery", "«bibtex-query»"]
 3 | 
 4 | [[require]]
 5 | name = "UnicodeBasic"
 6 | git = "https://github.com/jcommelin/lean4-unicode-basic" # switch back to fgdorais
 7 | rev = "bump_to_v4.18.0-rc1" # switch back to main
 8 | 
 9 | [[lean_lib]]
10 | name = "BibtexQuery"
11 | 
12 | [[lean_exe]]
13 | name = "«bibtex-query»"
14 | root = "BibtexQuery.Main"
15 | 


--------------------------------------------------------------------------------
/lean-toolchain:
--------------------------------------------------------------------------------
1 | leanprover/lean4:v4.19.0-rc1
2 | 


--------------------------------------------------------------------------------
/scripts/bibc:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 | 
3 | bibtex-query c /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9
4 | 


--------------------------------------------------------------------------------
/scripts/bibq:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 | 
3 | bibtex-query q /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9
4 | 


--------------------------------------------------------------------------------
/scripts/bibrofi:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | shopt -s nullglob globstar
 4 | 
 5 | papers=( ~/git/w/bib/*.pdf ~/git/w/bib/*.djvu )
 6 | 
 7 | bibfile=$(printf '%s\n' "${papers[@]}" | rofi -i -dmenu "$@")
 8 | 
 9 | [[ -n $bibfile ]] || exit
10 | 
11 | zathura $bibfile &
12 | 


--------------------------------------------------------------------------------