├── .github
└── workflows
│ └── lean_action_ci.yml
├── .gitignore
├── BibtexQuery.lean
├── BibtexQuery
├── Entry.lean
├── Format.lean
├── Main.lean
├── Name.lean
├── ParsecExtra.lean
├── Parser.lean
├── Query.lean
├── String.lean
└── TexDiacritics.lean
├── LICENSE
├── README.md
├── lake-manifest.json
├── lakefile.toml
├── lean-toolchain
└── scripts
├── bibc
├── bibq
└── bibrofi
/.github/workflows/lean_action_ci.yml:
--------------------------------------------------------------------------------
1 | name: Lean Action CI
2 |
3 | on:
4 | push:
5 | pull_request:
6 | workflow_dispatch:
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v4
14 | - uses: leanprover/lean-action@v1
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.lake
2 |
--------------------------------------------------------------------------------
/BibtexQuery.lean:
--------------------------------------------------------------------------------
1 | import BibtexQuery.Entry
2 | import BibtexQuery.ParsecExtra
3 | import BibtexQuery.Parser
4 | import BibtexQuery.Query
5 | import BibtexQuery.String
6 | import BibtexQuery.Name
7 | import BibtexQuery.Format
8 |
--------------------------------------------------------------------------------
/BibtexQuery/Entry.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | import BibtexQuery.String
8 |
9 | /-!
10 | # Bibtex Entries and related functions
11 |
12 | This file defines inductive types for bibtex entries and tags, along with various related
13 | helper functions.
14 | -/
15 |
16 | namespace BibtexQuery
17 |
18 | /-- i.e. authors = "Binne, Chose and Truc, Machin" -/
19 | structure Tag where
20 | name : String
21 | content : String
22 | deriving Repr, Inhabited
23 |
24 | namespace Tag
25 |
26 | /-- Return a standardized representation of the tag. -/
27 | def toString (t : Tag) : String := s!"{t.name} = " ++ "{" ++ s!"{t.content}" ++ "}"
28 |
29 | --#eval Tag.toString $ Tag.mk "author" "Frédéric Dupuis"
30 |
31 | end Tag
32 |
33 | /-- A whole bibtex entry, i.e. a paper or a book. -/
34 | inductive Entry where
35 | | normalType (category : String) (name : String) (tags : List Tag)
36 | | stringType (content : String)
37 | | preambleType (content : String)
38 | | commentType
39 | deriving Repr, Inhabited
40 |
41 | namespace Entry
42 |
43 | /-- Returns the authors of a bibtex entry, or an empty string if it is not specified. -/
44 | def getAuthors : (e : Entry) → String
45 | | .normalType _cl _name tags =>
46 | match tags.find? (fun t => t.name = "author") with
47 | | some t => t.content
48 | | none => ""
49 | | _ => ""
50 |
51 | /-- Returns the title of a bibtex entry, or an empty string if it is not specified. -/
52 | def getTitle : (e : Entry) → String
53 | | .normalType _cl _name tags =>
54 | match tags.find? (fun t => t.name = "title") with
55 | | some t => t.content
56 | | none => ""
57 | | _ => ""
58 |
59 | /-- Returns the keywords of a bibtex entry, or an empty string if it is not specified. -/
60 | def getKeywords : (e : Entry) → String
61 | | normalType _cl _name tags =>
62 | match tags.find? (fun t => t.name = "keywords") with
63 | | some t => t.content
64 | | none => ""
65 | | _ => ""
66 |
67 | /-- Returns the class of a bibtex entry (ie book, article, etc). -/
68 | def getClass : (e : Entry) → String
69 | | .normalType cl _name _tags => cl
70 | | _ => ""
71 |
72 | /-- Returns the key of a bibtex entry. -/
73 | def getKey : Entry → String
74 | | .normalType _cl name _tags => name
75 | | _ => "No key"
76 |
77 | /-- Returns the arxiv number of a bibtex entry. -/
78 | def getArxivNumber : Entry → String
79 | | .normalType _cl _name tags =>
80 | match tags.find? (fun t => t.name = "eprint") with
81 | | some t => t.content
82 | | none => ""
83 | | _ => "No arxiv number"
84 |
85 | /-- Returns the arxiv link of a bibtex entry. -/
86 | def getArxivLink : Entry → String
87 | | .normalType _cl _name tags =>
88 | match tags.find? (fun t => t.name = "eprint") with
89 | | some t => "http://arxiv.org/abs/" ++ t.content
90 | | none => ""
91 | | _ => "No arxiv number"
92 |
93 | /-- Returns an abridged representation of a bibtex entry. -/
94 | def toAbridgedRepr (e : Entry) : String :=
95 | e.getKey.pad ' ' 25 ++ " | " ++ e.getAuthors.toLastNames.pad ' ' 50 ++ " | " ++ e.getTitle
96 |
97 | /-- Returns a «clean citation» of a bibtex entry. -/
98 | def toCitation (e : Entry) : String :=
99 | e.getAuthors ++ ", «" ++ e.getTitle ++ "», " ++ e.getArxivLink
100 |
101 |
102 | /-- Returns a string containing a standardized representation of a bibtex entry. -/
103 | def toString : Entry → String
104 | | .normalType cl name tags => Id.run do
105 | let mut output := (s!"@{cl}" ++ "{" ++ s!"{name},").push '\n'
106 | for t in tags do
107 | output := output ++ " " ++ t.toString ++ s!"\n"
108 | return (output ++ "}")
109 | | _ => ""
110 |
111 | --#eval IO.print <| Entry.toString $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric",
112 | -- Tag.mk "title" "Bonsoir la visite"]
113 |
114 | --#eval IO.print <| Entry.toCitation $ .normalType "book" "d12" [Tag.mk "author" "Dupuis, Frédéric",
115 | -- Tag.mk "title" "Bonsoir la visite", Tag.mk "eprint" "2308.11736"]
116 |
117 | end Entry
118 |
119 | end BibtexQuery
120 |
--------------------------------------------------------------------------------
/BibtexQuery/Format.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2024 Jz Pan. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Jz Pan
5 | -/
6 |
7 | import BibtexQuery.Name
8 | import BibtexQuery.Entry
9 | import Lean.Data.Xml.Basic
10 | import Std.Data.HashMap
11 | import Std.Internal.Parsec
12 | import Std.Internal.Parsec.String
13 |
14 | /-!
15 |
16 | # Bibtex item formatting
17 |
18 | This file contains functions for bibtex item formatting,
19 | tag generating and sorting.
20 |
21 | -/
22 |
23 | open Lean Xml Std.Internal.Parsec Unicode BibtexQuery.TexDiacritics BibtexQuery.Name
24 |
25 | namespace BibtexQuery
26 |
27 | /-- Represents a processed bibtex entry. -/
28 | structure ProcessedEntry where
29 | /-- The category of a bibtex entry, e.g. "article" or "book". -/
30 | category : String
31 | /-- The name, or called citekey, of a bibtex entry. -/
32 | name : String
33 | /-- The tags of a bibtex entry, stored as a `HashMap`. Don't be confused with `tag`. -/
34 | tags : Std.HashMap String (Array TexContent)
35 | /-- The tag of a bibtex entry, e.g. `[Doe12]`. Don't be confused with `tags`. -/
36 | tag : String
37 | /-- The HTML representing a bibtex entry, e.g.
38 | `John Doe. Test. 2012.` Generated by bibtex formatter. -/
39 | html : Array Content
40 | /-- The array of authors or editors of a bibtex entry.
41 | Only used for tag generation and bibtex entry sorting. -/
42 | authorOrEditor : Array BibtexName
43 | /-- The HTML representing the authors. -/
44 | authorHtml : Array Content
45 | /-- The HTML representing the editors. -/
46 | editorHtml : Array Content
47 | /-- The date of a bibtex entry, in the format `year * 100 + month`.
48 | Only used for tag generation and bibtex entry sorting. -/
49 | date : Nat
50 | /-- The HTML representing the date. -/
51 | dateHtml : Array Content
52 | /-- The title of a bibtex entry, with diacritics removed, and converted to uppercase.
53 | Only used for bibtex entry sorting. -/
54 | titleWithoutDiacritics : String
55 |
56 | /-- Get the date (which is `year * 100 + month`) and the date HTML of a bibitem. -/
57 | def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Content :=
58 | if let .some yearTex := tags["year"]? then
59 | let yearHtml := TexContent.toHtmlArray yearTex
60 | if let .some year := (TexContent.toPlaintextArray yearTex).toList.filter
61 | Char.isDigit |> String.mk |>.toNat? then
62 | let month : Nat :=
63 | if let .some monthTex := tags["month"]? then
64 | let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower
65 | match monthStr with
66 | | "jan" => 1 | "feb" => 2 | "mar" => 3
67 | | "apr" => 4 | "may" => 5 | "jun" => 6
68 | | "jul" => 7 | "aug" => 8 | "sep" => 9
69 | | "oct" => 10 | "nov" => 11 | "dec" => 12
70 | | _ =>
71 | let month := monthStr.toNat?.getD 0
72 | if month ≥ 1 ∧ month ≤ 12 then month else 0
73 | else
74 | 0
75 | let monthStr : String :=
76 | match month with
77 | | 1 => "Jan" | 2 => "Feb" | 3 => "Mar"
78 | | 4 => "Apr" | 5 => "May" | 6 => "Jun"
79 | | 7 => "Jul" | 8 => "Aug" | 9 => "Sep"
80 | | 10 => "Oct" | 11 => "Nov" | 12 => "Dec"
81 | | _ => ""
82 | (year * 100 + month,
83 | if monthStr.isEmpty then
84 | yearHtml
85 | else if yearHtml.isEmpty then
86 | #[.Character monthStr]
87 | else
88 | #[.Character monthStr, .Character " "] ++ yearHtml)
89 | else
90 | (0, yearHtml)
91 | else
92 | (0, #[])
93 |
94 | /-- Get the tag of a bibitem (e.g. `[ABC12]`) from authors and date. -/
95 | def getTag (authors : Array BibtexName) (date : Nat) : String :=
96 | let authorString :=
97 | if authors.size ≥ 5 then
98 | (authors.toSubarray.take 3 |>.toArray.map (·.oneLetterAbbr) |>.toList |> String.join) ++ "+"
99 | else if authors.size ≥ 2 then
100 | authors.map (·.oneLetterAbbr) |>.toList |> String.join
101 | else
102 | authors.map (·.threeLetterAbbr) |>.toList |> String.join
103 | let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else ""
104 | "[" ++ authorString ++ dateString ++ "]"
105 |
106 | partial def removeDuplicatedSpacesAux (s : String) : String :=
107 | let s2 := s.replace " " " "
108 | if s2.length < s.length then
109 | removeDuplicatedSpacesAux s2
110 | else
111 | s
112 |
113 | def removeDuplicatedSpaces (s : String) : String :=
114 | s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux
115 |
116 | /-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`.
117 | If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/
118 | def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) := do
119 | match e with
120 | | .normalType category name tags =>
121 | let lst : List (String × Array TexContent) ← tags.mapM fun x => do
122 | let s := removeDuplicatedSpaces x.content
123 | if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then
124 | .ok (x.name, #[.normal s])
125 | else
126 | match texContents s.iter with
127 | | .success _ arr => .ok (x.name, arr)
128 | | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {it.2}: {err}"
129 | let tags := Std.HashMap.ofList lst
130 | let authors := processNames (tags.getD "author" #[])
131 | let editors := processNames (tags.getD "editor" #[])
132 | let authorOrEditor := if authors.isEmpty then editors else authors
133 | let (date, dateHtml) := getDate tags
134 | pure <| .some {
135 | category := category
136 | name := name
137 | tags := tags
138 | tag := getTag authorOrEditor date
139 | html := #[]
140 | authorOrEditor := authorOrEditor
141 | authorHtml := BibtexName.toHtmlArray authors
142 | editorHtml := BibtexName.toHtmlArray editors ++
143 | if editors.size ≥ 2 then
144 | #[.Character ", editors"]
145 | else if editors.size = 1 then
146 | #[.Character ", editor"]
147 | else
148 | #[]
149 | date := date
150 | dateHtml := dateHtml
151 | titleWithoutDiacritics := tags.getD "title" #[] |> TexContent.toPlaintextArray |>
152 | stripDiacriticsFromString |>.map getUpperChar
153 | }
154 | | _ => pure .none
155 |
156 | /-!
157 |
158 | ## Formatting a bibtex item
159 |
160 | The following code is a port of `pybtex/style/formatting/unsrt.py` of `pybtex`,
161 | originally licensed under MIT license.
162 |
163 | -/
164 |
165 | section
166 |
167 | mutual
168 |
169 | partial def getLastCharOfContent (x : Content) : Option Char :=
170 | match x with
171 | | .Element ⟨ _, _, arr ⟩ => getLastCharOfArrayContent arr
172 | | .Comment _ => .none
173 | | .Character s => if s.isEmpty then .none else .some s.back
174 |
175 | partial def getLastCharOfArrayContent (arr : Array Content) : Option Char :=
176 | arr.findSomeRev? getLastCharOfContent
177 |
178 | end
179 |
180 | variable (e : ProcessedEntry)
181 |
182 | namespace Formatter
183 |
184 | /-!
185 |
186 | ### Helper functions
187 |
188 | -/
189 |
190 | def mkTag (name : String) (content : Array TexContent) : Array Content :=
191 | let ret := TexContent.toHtmlArray content
192 | if ret.isEmpty then #[] else #[.Element ⟨ name, RBMap.empty, ret ⟩]
193 |
194 | def mkHref (href : String) (content : Array TexContent) : Array Content :=
195 | let ret := TexContent.toHtmlArray content
196 | if ret.isEmpty then #[] else #[.Element ⟨ "a", RBMap.empty.insert "href" href, ret ⟩]
197 |
198 | def mkStr (content : Array TexContent) (pre post : String := "") : Array Content :=
199 | let ret := TexContent.toHtmlArray content
200 | (if ret.isEmpty || pre.isEmpty then #[] else #[.Character pre]) ++ ret ++
201 | (if ret.isEmpty || post.isEmpty then #[] else #[.Character post])
202 |
203 | def arrayConcat {α} (arr : Array (Option (Array α))) (sep : Array α := #[]) : Array α :=
204 | arr.filterMap id |>.foldl (fun acc a =>
205 | acc ++ (if acc.isEmpty || a.isEmpty then #[] else sep) ++ a) #[]
206 |
207 | def words (arr : Array (Option (Array Content))) : Array Content :=
208 | arrayConcat arr #[.Character " "]
209 |
210 | def canEndSentence (c : Char) : Bool :=
211 | match c with
212 | | '.' | '?' | '!' | '…' | '。' | '?' | '!' | '\uFF0E' | '\uFF61'
213 | | '\u203C' | '\u203D' | '\u2047' | '\u2048' | '\u2049' => true
214 | | _ => false
215 |
216 | def sentence1 (arr : Array Content) : Array Content :=
217 | if arr.isEmpty then
218 | #[]
219 | else if canEndSentence (getLastCharOfArrayContent arr |>.getD ' ') then
220 | arr
221 | else
222 | arr ++ #[.Character "."]
223 |
224 | def sentence (arr : Array (Option (Array Content))) : Array Content :=
225 | sentence1 <| arrayConcat arr #[.Character ", "]
226 |
227 | def sentence' (content : Array TexContent) : Array Content :=
228 | sentence1 <| TexContent.toHtmlArray content
229 |
230 | def toplevel (arr : Array (Option (Array Content))) : Array Content :=
231 | arrayConcat arr #[.Character "\n"]
232 |
233 | def mkUrl (url : String) : Array Content :=
234 | if url.isEmpty then #[] else #[.Character "URL: "] ++ mkHref url #[.normal url]
235 |
236 | def mkWebRef (urlPrefix namePrefix url : String) : Array Content :=
237 | let s :=
238 | if url.toLower.startsWith urlPrefix.toLower then
239 | url.drop urlPrefix.length
240 | else if url.toLower.startsWith namePrefix.toLower then
241 | url.drop namePrefix.length
242 | else
243 | url
244 | if s.toLower.startsWith "http" then
245 | -- the url does not starts with `urlPrefix` or `namePrefix`,
246 | -- but still starts with "http"
247 | mkUrl url
248 | else if s.isEmpty then
249 | #[]
250 | else
251 | mkHref (urlPrefix ++ s) #[.normal (namePrefix ++ s)]
252 |
253 | /-!
254 |
255 | ### Partial templates
256 |
257 | -/
258 |
259 | def formatVolumeAndPages : Option (Array Content) := do
260 | let pages ← e.tags["pages"]?
261 | match e.tags["volume"]? with
262 | | .some volume =>
263 | arrayConcat #[
264 | mkStr volume,
265 | (mkStr · "(" ")") <$> e.tags["number"]?,
266 | mkStr pages ":"
267 | ]
268 | | .none => mkStr pages "pages "
269 |
270 | def formatEprint : Option (Array Content) := do
271 | let eprint ← TexContent.toPlaintextArray <$> e.tags["eprint"]?
272 | let eprinttype :=
273 | TexContent.toPlaintextArray <$> (e.tags["eprinttype"]? <|> e.tags["archiveprefix"]?)
274 | |>.getD "arXiv"
275 | let eprintlist : Array (Array String × String × String) := #[
276 | (#["arxiv"], "https://arxiv.org/abs/", "arXiv:"),
277 | (#["iacr"], "https://eprint.iacr.org/", "IACR:"),
278 | (#["pmcid", "pmc"], "https://www.ncbi.nlm.nih.gov/pmc/articles/", "PMCID:")
279 | ]
280 | let checkEprint : Array String × String × String → Option (Array Content) :=
281 | fun (lst, urlPrefix, namePrefix) =>
282 | if lst.contains eprinttype.toLower then
283 | let ret := mkWebRef urlPrefix namePrefix eprint
284 | if ret.isEmpty then .none else .some ret
285 | else
286 | .none
287 | eprintlist.findSome? checkEprint <|> .some
288 | (if eprint.toLower.startsWith "http" then
289 | -- unrecognized eprint type but starts with "http"
290 | mkUrl eprint
291 | else
292 | -- unrecognized eprint type, cannot add link to it
293 | #[.Character (eprinttype ++ ":" ++ eprint)])
294 |
295 | def formatWebRefs : Array Content :=
296 | let formatUrl : Option (Array Content) :=
297 | mkUrl <$> TexContent.toPlaintextArray <$> e.tags["url"]?
298 | let formatWebRef (tagName urlPrefix namePrefix : String) : Option (Array Content) :=
299 | mkWebRef urlPrefix namePrefix <$> TexContent.toPlaintextArray <$> e.tags[tagName]?
300 | sentence #[
301 | formatWebRef "pubmed" "https://www.ncbi.nlm.nih.gov/pubmed/" "PMID:",
302 | formatWebRef "doi" "https://doi.org/" "doi:",
303 | formatWebRef "mrnumber" "https://www.ams.org/mathscinet-getitem?mr=MR" "MR:",
304 | formatWebRef "zbl" "https://zbmath.org/?q=an:" "Zbl:",
305 | formatEprint e,
306 | formatUrl
307 | ]
308 |
309 | def formatAuthorOrEditor : Array Content :=
310 | sentence1 (if e.authorHtml.isEmpty then e.editorHtml else e.authorHtml)
311 |
312 | def formatVolumeAndSeries (asSentence : Bool) : Array Content :=
313 | let series := e.tags["series"]?
314 | let arr : Array Content :=
315 | if let .some volume := e.tags["volume"]? then
316 | arrayConcat #[
317 | mkStr volume (if asSentence then "Volume " else "volume "),
318 | (mkStr · " of ") <$> series
319 | ]
320 | else if let .some number := e.tags["number"]? then
321 | arrayConcat #[
322 | mkStr number (if asSentence then "Number " else "number "),
323 | (mkStr · " in ") <$> series
324 | ]
325 | else
326 | (mkStr <$> series).getD #[]
327 | if asSentence then sentence1 arr else arr
328 |
329 | def formatChapterAndPages : Array Content :=
330 | arrayConcat #[
331 | (mkStr · "chapter ") <$> e.tags["chapter"]?,
332 | (mkStr · "pages ") <$> e.tags["pages"]?
333 | ] #[.Character ", "]
334 |
335 | def formatEdition : Option (Array Content) :=
336 | if let .some arr := e.tags["edition"]? then
337 | let s := TexContent.toPlaintextArray arr |> stripDiacriticsFromString |>.map getLowerChar
338 | if s.endsWith " ed." || s.endsWith " ed" || (s.replace "edition" "").length < s.length then
339 | TexContent.toHtmlArray arr
340 | else if let .some n := s.toNat? then
341 | let s : String := match n with
342 | | 0 => "Zeroth" | 1 => "First" | 2 => "Second" | 3 => "Third"
343 | | 4 => "Fourth" | 5 => "Fifth" | 6 => "Sixth" | 7 => "Seventh"
344 | | 8 => "Eighth" | 9 => "Ninth" | 10 => "Tenth" | 11 => "Eleventh"
345 | | 12 => "Twelfth"
346 | | _ => toString n ++ match n % 100 with
347 | | 11 | 12 => "th"
348 | | _ => match n % 10 with
349 | | 1 => "st"
350 | | 2 => "nd"
351 | | 3 => "rd"
352 | | _ => "th"
353 | .some #[.Character (s ++ " edition")]
354 | else
355 | mkStr arr "" " edition"
356 | else
357 | .none
358 |
359 | def formatAddressOrganizationPublisherDate (includeOrganization : Bool) :
360 | Array Content :=
361 | let organization : Option (Array Content) :=
362 | if includeOrganization then
363 | mkStr <$> e.tags["organization"]?
364 | else
365 | .none
366 | if let .some address := e.tags["address"]? then
367 | words #[
368 | sentence #[
369 | mkStr address,
370 | e.dateHtml
371 | ],
372 | sentence #[
373 | organization,
374 | mkStr <$> e.tags["publisher"]?
375 | ]
376 | ]
377 | else
378 | sentence #[
379 | organization,
380 | mkStr <$> e.tags["publisher"]?,
381 | e.dateHtml
382 | ]
383 |
384 | def formatISBN : Option (Array Content) :=
385 | (sentence1 <| mkStr · "ISBN ") <$> e.tags["isbn"]?
386 |
387 | /-!
388 |
389 | ### Templates
390 |
391 | -/
392 |
393 | def formatArticle : Array Content :=
394 | toplevel #[
395 | sentence1 e.authorHtml,
396 | sentence' <$> e.tags["title"]?,
397 | sentence #[
398 | mkTag "i" <$> e.tags["journal"]?,
399 | formatVolumeAndPages e,
400 | e.dateHtml
401 | ],
402 | sentence' <$> e.tags["note"]?,
403 | formatWebRefs e
404 | ]
405 |
406 | def formatBook : Array Content :=
407 | toplevel #[
408 | formatAuthorOrEditor e,
409 | sentence #[mkTag "i" <$> e.tags["title"]?],
410 | sentence1 <$> formatEdition e,
411 | formatVolumeAndSeries e true,
412 | sentence #[
413 | mkStr <$> e.tags["publisher"]?,
414 | mkStr <$> e.tags["address"]?,
415 | e.dateHtml
416 | ],
417 | formatISBN e,
418 | sentence' <$> e.tags["note"]?,
419 | formatWebRefs e
420 | ]
421 |
422 | def formatBooklet : Array Content :=
423 | toplevel #[
424 | sentence1 e.authorHtml,
425 | sentence' <$> e.tags["title"]?,
426 | formatVolumeAndSeries e true,
427 | sentence #[
428 | mkStr <$> e.tags["howpublished"]?,
429 | mkStr <$> e.tags["address"]?,
430 | e.dateHtml,
431 | mkStr <$> e.tags["note"]?
432 | ],
433 | formatWebRefs e
434 | ]
435 |
436 | def formatInBook : Array Content :=
437 | toplevel #[
438 | formatAuthorOrEditor e,
439 | sentence #[
440 | mkTag "i" <$> e.tags["title"]?,
441 | formatEdition e,
442 | formatChapterAndPages e
443 | ],
444 | formatVolumeAndSeries e true,
445 | sentence #[
446 | mkStr <$> e.tags["publisher"]?,
447 | mkStr <$> e.tags["address"]?,
448 | e.dateHtml,
449 | mkStr <$> e.tags["note"]?
450 | ],
451 | formatWebRefs e
452 | ]
453 |
454 | def formatInCollection : Array Content :=
455 | toplevel #[
456 | sentence1 e.authorHtml,
457 | sentence' <$> e.tags["title"]?,
458 | (fun x => #[.Character "In "] ++ x) <$> sentence #[
459 | e.editorHtml,
460 | mkTag "i" <$> e.tags["booktitle"]?,
461 | formatEdition e,
462 | formatVolumeAndSeries e false,
463 | formatChapterAndPages e
464 | ],
465 | sentence #[
466 | mkStr <$> e.tags["publisher"]?,
467 | mkStr <$> e.tags["address"]?,
468 | e.dateHtml
469 | ],
470 | formatWebRefs e
471 | ]
472 |
473 | def formatInProceedings : Array Content :=
474 | toplevel #[
475 | sentence1 e.authorHtml,
476 | sentence' <$> e.tags["title"]?,
477 | (fun x => #[.Character "In "] ++ x) <$> words #[
478 | sentence #[
479 | e.editorHtml,
480 | mkTag "i" <$> e.tags["booktitle"]?,
481 | formatVolumeAndSeries e false,
482 | mkStr <$> e.tags["pages"]?
483 | ],
484 | formatAddressOrganizationPublisherDate e true
485 | ],
486 | sentence' <$> e.tags["note"]?,
487 | formatWebRefs e
488 | ]
489 |
490 | def formatManual : Array Content :=
491 | toplevel #[
492 | sentence1 e.authorHtml,
493 | sentence #[mkTag "i" <$> e.tags["title"]?],
494 | sentence1 <$> formatEdition e,
495 | sentence #[
496 | mkStr <$> e.tags["organization"]?,
497 | mkStr <$> e.tags["address"]?,
498 | e.dateHtml
499 | ],
500 | sentence' <$> e.tags["note"]?,
501 | formatWebRefs e
502 | ]
503 |
504 | def formatMasterThesis : Array Content :=
505 | toplevel #[
506 | sentence1 e.authorHtml,
507 | sentence' <$> e.tags["title"]?,
508 | sentence #[
509 | .some #[.Character "Master's thesis"],
510 | mkStr <$> e.tags["school"]?,
511 | mkStr <$> e.tags["address"]?,
512 | e.dateHtml
513 | ],
514 | sentence' <$> e.tags["note"]?,
515 | formatWebRefs e
516 | ]
517 |
518 | def formatMisc : Array Content :=
519 | toplevel #[
520 | sentence1 e.authorHtml,
521 | sentence' <$> e.tags["title"]?,
522 | sentence #[
523 | mkStr <$> e.tags["howpublished"]?,
524 | e.dateHtml
525 | ],
526 | sentence' <$> e.tags["note"]?,
527 | formatWebRefs e
528 | ]
529 |
530 | def formatPhDThesis : Array Content :=
531 | toplevel #[
532 | sentence1 e.authorHtml,
533 | sentence #[mkTag "i" <$> e.tags["title"]?],
534 | sentence #[
535 | (mkStr <$> e.tags["type"]?) <|> .some #[.Character "PhD thesis"],
536 | mkStr <$> e.tags["school"]?,
537 | mkStr <$> e.tags["address"]?,
538 | e.dateHtml
539 | ],
540 | sentence' <$> e.tags["note"]?,
541 | formatWebRefs e
542 | ]
543 |
544 | def formatProceedings : Array Content :=
545 | let arr : Array (Option (Array Content)) :=
546 | if e.editorHtml.isEmpty then
547 | #[
548 | sentence' <$> e.tags["organization"]?,
549 | sentence #[
550 | mkTag "i" <$> e.tags["title"]?,
551 | formatVolumeAndSeries e false,
552 | formatAddressOrganizationPublisherDate e false
553 | ]
554 | ]
555 | else
556 | #[
557 | sentence1 e.editorHtml,
558 | sentence #[
559 | mkTag "i" <$> e.tags["title"]?,
560 | formatVolumeAndSeries e false,
561 | formatAddressOrganizationPublisherDate e true
562 | ]
563 | ]
564 | toplevel <| arr ++ #[
565 | sentence' <$> e.tags["note"]?,
566 | formatWebRefs e
567 | ]
568 |
569 | def formatTechReport : Array Content :=
570 | toplevel #[
571 | sentence1 e.authorHtml,
572 | sentence' <$> e.tags["title"]?,
573 | sentence #[
574 | words #[
575 | (mkStr <$> e.tags["type"]?) <|> .some #[.Character "Technical Report"],
576 | mkStr <$> e.tags["number"]?
577 | ],
578 | mkStr <$> e.tags["institution"]?,
579 | mkStr <$> e.tags["address"]?,
580 | e.dateHtml
581 | ],
582 | sentence' <$> e.tags["note"]?,
583 | formatWebRefs e
584 | ]
585 |
586 | def formatUnpublished : Array Content :=
587 | toplevel #[
588 | sentence1 e.authorHtml,
589 | sentence' <$> e.tags["title"]?,
590 | sentence #[
591 | mkStr <$> e.tags["note"]?,
592 | e.dateHtml
593 | ],
594 | formatWebRefs e
595 | ]
596 |
597 | def format : Array Content :=
598 | match e.category with
599 | | "article" => formatArticle e
600 | | "book" => formatBook e
601 | | "booklet" => formatBooklet e
602 | | "inbook" => formatInBook e
603 | | "incollection" => formatInCollection e
604 | | "inproceedings" | "conference" => formatInProceedings e
605 | | "manual" => formatManual e
606 | | "mastersthesis" => formatMasterThesis e
607 | | "misc" | "online" => formatMisc e
608 | | "phdthesis" => formatPhDThesis e
609 | | "proceedings" => formatProceedings e
610 | | "techreport" => formatTechReport e
611 | | "unpublished" => formatUnpublished e
612 | | _ => #[.Character ("TODO: " ++ e.category)]
613 |
614 | end Formatter
615 |
616 | /-- Fill the `html` field of `ProcessedEntry`. -/
617 | def ProcessedEntry.format : ProcessedEntry :=
618 | { e with html := Formatter.format e }
619 |
620 | end
621 |
622 | /-!
623 |
624 | ## Sorting bibtex items
625 |
626 | -/
627 |
628 | def compareAuthor (a b : BibtexName) : Ordering :=
629 | if a.lastNameWithoutDiacritics < b.lastNameWithoutDiacritics then
630 | .lt
631 | else if a.lastNameWithoutDiacritics > b.lastNameWithoutDiacritics then
632 | .gt
633 | else if a.firstNameWithoutDiacritics < b.firstNameWithoutDiacritics then
634 | .lt
635 | else if a.firstNameWithoutDiacritics > b.firstNameWithoutDiacritics then
636 | .gt
637 | else
638 | .eq
639 |
640 | partial def compareAuthors (a b : Array BibtexName) (i : Nat := 0) : Ordering :=
641 | if ha : i < a.size then
642 | if hb : i < b.size then
643 | match compareAuthor a[i] b[i] with
644 | | .lt => .lt | .gt => .gt
645 | | .eq => compareAuthors a b (i + 1)
646 | else
647 | .gt
648 | else
649 | if i < b.size then .lt else .eq
650 |
651 | def compareEntry (a b : ProcessedEntry) : Ordering :=
652 | match compareAuthors a.authorOrEditor b.authorOrEditor with
653 | | .lt => .lt | .gt => .gt
654 | | .eq =>
655 | if a.date < b.date then
656 | .lt
657 | else if a.date > b.date then
658 | .gt
659 | else if a.titleWithoutDiacritics < b.titleWithoutDiacritics then
660 | .lt
661 | else if a.titleWithoutDiacritics > b.titleWithoutDiacritics then
662 | .gt
663 | else
664 | .eq
665 |
666 | /-- Sort an array of bibtex entries. -/
667 | def sortEntry (arr : Array ProcessedEntry) : Array ProcessedEntry :=
668 | arr.qsort (compareEntry · · |>.isLT)
669 |
670 | partial def toBase26Aux (n : Nat) (length : Nat) (s : String) : String :=
671 | if length = 0 then
672 | s
673 | else
674 | toBase26Aux (n / 26) (length - 1) <| toString (Char.ofNat (97 + n % 26)) ++ s
675 |
676 | partial def toBase26 (n : Nat) (length : Nat := 1) : String :=
677 | if n ≥ 26 ^ length then
678 | toBase26 (n - 26 ^ length) (length + 1)
679 | else
680 | toBase26Aux n length ""
681 |
682 | partial def deduplicateTagAux
683 | (x : Array String × Std.HashMap String (Nat × Nat)) (i : Nat) :
684 | Array String × Std.HashMap String (Nat × Nat) :=
685 | if h : i < x.1.size then
686 | let tag := x.1[i]
687 | let y : Array String × Std.HashMap String (Nat × Nat) :=
688 | if let .some (first, count) := x.2[tag]? then
689 | let z : Array String :=
690 | if count = 0 then
691 | x.1.modify first fun x => x.dropRight 1 ++ "a]"
692 | else
693 | x.1
694 | let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]"
695 | (z, x.2.insert tag (first, count + 1))
696 | else
697 | (x.1, x.2.insert tag (i, 0))
698 | deduplicateTagAux y (i + 1)
699 | else
700 | x
701 |
702 | /-- Deduplicate an array of tag. -/
703 | def deduplicateTag' (arr : Array String) : Array String :=
704 | (deduplicateTagAux (arr, ∅) 0).1
705 |
706 | /-- Deduplicate the tag of an array of bibtex entries. -/
707 | def deduplicateTag (arr : Array ProcessedEntry) : Array ProcessedEntry :=
708 | Array.zip arr (deduplicateTag' <| arr.map fun x => x.tag) |>.map fun (x, s) =>
709 | { x with tag := s }
710 |
711 | end BibtexQuery
712 |
--------------------------------------------------------------------------------
/BibtexQuery/Main.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | import BibtexQuery.Parser
8 | import BibtexQuery.String
9 | import BibtexQuery.Query
10 | import BibtexQuery.Format
11 |
12 | /-!
13 | # BibtexQuery: a simple command-line bibtex query utility
14 |
15 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string
16 | of the form "t.querystring", where `t` is either `a` for author, `t` for title or `k` for key, and `querystring`
17 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the
18 | queries given a command-line parameters. Note that the entries are processed in such a way that strips diacritics,
19 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to
20 | firstnamelastname. Hence, for example, "Dupuis, Frédéric" will match the query `a.ericdup`.
21 | -/
22 |
23 | open Lean BibtexQuery
24 |
25 | def listDoublons (parseRes : List BibtexQuery.Entry) : List String :=
26 | let keysOnly := parseRes.filterMap (fun entry => match entry with
27 | | BibtexQuery.Entry.normalType _ name _ => some name
28 | | _ => none)
29 | let ⟨_, dupl⟩ : (Std.HashMap String Unit) × List String :=
30 | keysOnly.foldl (init := ⟨∅, []⟩)
31 | (fun ⟨hsh, lst⟩ key =>
32 | match hsh[key]? with
33 | | none => ⟨hsh.insert key (), lst⟩
34 | | some _ => ⟨hsh, (key :: lst)⟩)
35 | dupl
36 |
37 | def printHelp := IO.println
38 | "
39 | bibtex-query - command-line bibtex file processor
40 |
41 | Usage: bibtex-query command filename [args]
42 |
43 | Commands:
44 | h: print this help message
45 | d: check for duplicate entries
46 | l: sort all entries and list them in plaintext form
47 | q: print entries that match the given query
48 | c: print citations of entries that match the given query
49 |
50 | Queries have the form «t.query» (without the quotes) with t being the type of query,
51 | and «query» being the content. The entries printed out are those that match all the queries.
52 |
53 | Types of queries:
54 | k: key (ex: k.d14)
55 | c: class (ex: article, book)
56 | a: author (ex: a.dupuis)
57 | t: title (ex: t.channelcapacity)
58 | w: keywords (ex: w.quantum)
59 | "
60 |
61 | mutual
62 |
63 | partial def eToPlaintext : Xml.Element → String
64 | | .Element _ _ c => s!"{c.map cToPlaintext |>.foldl (· ++ ·) ""}"
65 |
66 | partial def cToPlaintext : Xml.Content → String
67 | | .Element e => eToPlaintext e
68 | | .Comment _ => ""
69 | | .Character c => c
70 |
71 | end
72 |
73 | def printEntry (e : ProcessedEntry) : IO Unit :=
74 | let s := e.html.map cToPlaintext |>.toList |> String.join
75 | IO.println (e.tag ++ "{" ++ e.name ++ "} " ++ s ++ "\n")
76 |
77 | def printEntries (ents : List Entry) : IO Unit :=
78 | match ents.toArray.mapM ProcessedEntry.ofEntry with
79 | | .ok arr =>
80 | discard (arr.filterMap id |> sortEntry |> deduplicateTag |>.map ProcessedEntry.format
81 | |>.mapM printEntry)
82 | | .error err => throw <| IO.userError err
83 |
84 | def printMatchingEntries (ents : List Entry) (qs : List Query) : IO Unit := do
85 | printEntries <| ents.filter (·.matchQueries qs)
86 |
87 | def printMatchingCitations (ents : List Entry) (qs : List Query) : IO Unit := do
88 | for e in ents do
89 | if e.matchQueries qs then IO.println e.toCitation
90 |
91 | def main : List String → IO Unit
92 | | ["h"] => printHelp
93 | | ["-h"] => printHelp
94 | | ["--help"] => printHelp
95 | | ["h", _] => printHelp
96 | | ["-h", _] => printHelp
97 | | ["--help", _] => printHelp
98 | | ["d", fname] => do
99 | IO.println s!"Reading {fname} to find doubled keys"
100 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
101 | match parsed with
102 | | .success _pos res =>
103 | let lst := listDoublons res
104 | IO.println lst
105 | | .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}"
106 | | ["l", fname] => do
107 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
108 | match parsed with
109 | | .success _pos res => printEntries res
110 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
111 | | "q" :: (fname :: queries) => do
112 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
113 | match parsed with
114 | | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString
115 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
116 | | "c" :: (fname :: queries) => do
117 | let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter
118 | match parsed with
119 | | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString
120 | | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}"
121 | | _ => do IO.eprintln "Invalid command-line arguments"; printHelp
122 |
--------------------------------------------------------------------------------
/BibtexQuery/Name.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2024 Jz Pan. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Jz Pan
5 | -/
6 |
7 | import BibtexQuery.TexDiacritics
8 | import UnicodeBasic
9 | import Std.Internal.Parsec
10 | import Std.Internal.Parsec.String
11 |
12 |
13 | /-!
14 |
15 | # Bibtex name processing
16 |
17 | This file contains functions for bibtex name processing.
18 |
19 | The main function is `processNames` which inputs an array of `TexContent`,
20 | return an array of `BibtexName`.
21 |
22 | -/
23 |
24 | open Lean Unicode BibtexQuery.TexDiacritics
25 |
26 | namespace BibtexQuery.Name
27 |
28 | /-- Input an array of `TexContent`, split them by " " and ",". -/
29 | def getNameAux (arr : Array TexContent) : Array (Array TexContent) :=
30 | arr.foldl (fun acc s =>
31 | match s with
32 | | .char ' ' =>
33 | acc.push #[]
34 | | .char ',' =>
35 | acc.push #[s] |>.push #[]
36 | | _ =>
37 | acc.modify (acc.size - 1) (Array.push · s)) #[#[]] |>.filter (not ·.isEmpty)
38 |
39 | /-- Input a name string already split by " " and ",", return `(Firstname, Lastname)`.
40 | The braces in the name are preserved. The logic is:
41 |
42 | 1. If there is a "," in the array, then the items before the first "," are the last name,
43 | and the items after the first "," are the first name.
44 | 2. Otherwise, if the last item begins with "{" and ends with "}", then it is the last name,
45 | the remaining items are the first name.
46 | 3. Otherwise, if there is an item that begins with a lowercase letter, then the items before
47 | the first of such item are the first name, the remaining items are the last name.
48 | 4. Otherwise, the last item is the last name, the remaining items are the first name.
49 | -/
50 | def getName (arr : Array (Array TexContent)) :
51 | Array (Array TexContent) × Array (Array TexContent) :=
52 | match arr.findIdx? (fun
53 | | #[.char ','] => true
54 | | _ => false) with
55 | | .some n =>
56 | (arr.toSubarray.drop (n + 1) |>.toArray, arr.toSubarray.take n |>.toArray)
57 | | .none =>
58 | let i := match arr.back? with
59 | | .some #[.braced _] => arr.size - 1
60 | | _ => arr.findIdx? (fun s => s.findSome? TexContent.getFirstChar
61 | |>.getD ' ' |> isLowercase) |>.getD (arr.size - 1)
62 | (arr.toSubarray.take i |>.toArray, arr.toSubarray.drop i |>.toArray)
63 |
64 | /-- Input an array of `TexContent`, return an array of `(Firstname, Lastname)`.
65 | The braces in the name are preserevd. -/
66 | def getNames (arr : Array TexContent) :
67 | Array (Array (Array TexContent) × Array (Array TexContent)) :=
68 | let arr := getNameAux arr
69 | let arr2 : Array (Array (Array TexContent)) := arr.foldl (fun acc s =>
70 | match s with
71 | | #[.normal "and"] => acc.push #[]
72 | | _ => acc.modify (acc.size - 1) (Array.push · s)) #[#[]]
73 | arr2.filterMap fun x =>
74 | let ret := getName x
75 | if ret.1.isEmpty && ret.2.isEmpty then .none else .some ret
76 |
77 | /-- Strip diacritics from a character. -/
78 | def stripDiacritics (c : Char) : Char :=
79 | match c with
80 | | 'œ' => 'o' | 'Œ' => 'O'
81 | | 'æ' => 'a' | 'Æ' => 'A'
82 | | 'å' => 'a' | 'Å' => 'A'
83 | | 'ø' => 'o' | 'Ø' => 'O'
84 | | 'ł' => 'l' | 'Ł' => 'L'
85 | | 'ı' => 'i'
86 | | 'ȷ' => 'j'
87 | | '\u00DF' => 's' | '\u1E9E' => 'S'
88 | | _ =>
89 | let s := getCanonicalDecomposition c
90 | s.get? (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c
91 |
92 | /-- Strip diacritics from a string. -/
93 | def stripDiacriticsFromString (s : String) : String :=
94 | s.toList.toArray.map stripDiacritics |>.filter (not <| GeneralCategory.isMark ·)
95 | |>.toList |> String.mk
96 |
97 | /-- Get the array of alphabets of a string after stripping diacritics. -/
98 | def getAlphabets (s : String) : Array Char :=
99 | s.toList.toArray.map stripDiacritics |>.filter isAlphabetic
100 |
101 | /-- Check if a string is an upper case Roman numerals.
102 | It does not check the validity of the number, for example, it accepts `IXIX`. -/
103 | def isUppercaseRomanNumerals (s : String) : Bool :=
104 | not s.isEmpty && s.all fun c =>
105 | match c with
106 | | 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M' => true
107 | | _ => false
108 |
109 | /-- Input a last name string without TeX commands, braces
110 | and math equations, already split by spaces and comma,
111 | return `(oneLetterAbbr, threeLetterAbbr)` of the last name.
112 | Note that they are not necessarily of one-letter and
113 | three-letter; they are if the last name contains no spaces and with only one uppercase letter.
114 | The logic is:
115 |
116 | First, if there are more than one items, remove all items which are upper case Roman numerals.
117 | For example, this removes "III" in "Gerth III".
118 |
119 | 1. If the number of items is not exactly one, then both of the abbreviations are the concatenation
120 | of the first alphabets of each item.
121 | 2. Otherwise, if there are exactly two uppercase alphabets and there is exactly one alphabet
122 | between them, then both of the abbreviations are these three alphabets.
123 | For example, `McCrimmon => McC`.
124 | 3. Otherwise, if there are at least two uppercase alphabets, then both of the abbreviations are
125 | the concatenation of uppercase alphabets.
126 | For example, `Heath-Brown => HB`.
127 | 4. Otherwise, the abbreviations are the first one and three alphabets of the last name,
128 | respectively.
129 | -/
130 | def getLastNameAbbr (arr : Array String) : String × String :=
131 | let arr := if arr.size ≤ 1 then arr else arr.filter (not <| isUppercaseRomanNumerals ·)
132 | match arr with
133 | | #[] => ("", "")
134 | | #[s] =>
135 | let s := getAlphabets s
136 | let arr : Array Nat := s.zipIdx.filterMap fun x =>
137 | if isUppercase x.1 then .some x.2 else .none
138 | if arr.size = 2 ∧ arr[0]! + 2 = arr[1]! then
139 | let s := s.toSubarray.drop arr[0]! |>.take 3 |>.toArray.toList |> String.mk
140 | (s, s)
141 | else if arr.size ≥ 2 then
142 | let s := arr.map (s[·]!) |>.toList |> String.mk
143 | (s, s)
144 | else
145 | let s := String.mk s.toList
146 | (s.take 1, s.take 3)
147 | | _ =>
148 | let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.mk
149 | (s, s)
150 |
151 | /-- Represents the name of a person in bibtex author field. -/
152 | structure BibtexName where
153 | /-- The first name. -/
154 | firstName : Array (Array TexContent)
155 | /-- The last name. -/
156 | lastName : Array (Array TexContent)
157 | /-- The first name without TeX commands, braces and diacritics,
158 | all letters converted to uppercase. -/
159 | firstNameWithoutDiacritics : String
160 | /-- The last name without TeX commands, braces and diacritics,
161 | all letters converted to uppercase. -/
162 | lastNameWithoutDiacritics : String
163 | /-- The one-letter abbreviation of the last name, which is used if a bibitem has at least
164 | two authors. Note that this is not necessarily of one-letter;
165 | it is if the last name contains no spaces and with only one uppercase letter. -/
166 | oneLetterAbbr : String
167 | /-- The three-letter abbreviation of the last name, which is used if a bibitem has exactly
168 | one author. Note that this is not necessarily of three-letter;
169 | it is if the last name contains no spaces and with only one uppercase letter. -/
170 | threeLetterAbbr : String
171 | deriving Repr
172 |
173 | namespace BibtexName
174 |
175 | /-- Convert a `BibtexName` to `Firstname Lastname` with TeX commands and braces.
176 | This is not necessarily identical to the original input. -/
177 | def toString (x : BibtexName) : String :=
178 | let g (arr : Array (Array TexContent)) : String :=
179 | " ".intercalate (arr.map TexContent.toStringArray |>.toList)
180 | g <| x.firstName ++ x.lastName
181 |
182 | def toStringArray (arr : Array BibtexName) : String :=
183 | " and ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList)
184 |
185 | /-- Convert a `BibtexName` to `Firstname Lastname` without TeX commands and braces. -/
186 | def toPlaintext (x : BibtexName) : String :=
187 | let g (arr : Array (Array TexContent)) : String :=
188 | " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList)
189 | g <| x.firstName ++ x.lastName
190 |
191 | def toPlaintextArray (arr : Array BibtexName) : String :=
192 | ", ".intercalate (arr.map toString |>.filter (not ·.isEmpty) |>.toList)
193 |
194 | /-- Convert a `BibtexName` to `Firstname Lastname` of HTML form. -/
195 | def toHtml (x : BibtexName) : Array Xml.Content :=
196 | let g (arr : Array (Array TexContent)) : Array Xml.Content :=
197 | arr.foldl (fun acc s =>
198 | let t := TexContent.toHtmlArray s
199 | acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character " "]) ++ t) #[]
200 | g <| x.firstName ++ x.lastName
201 |
202 | def toHtmlArray (arr : Array BibtexName) : Array Xml.Content :=
203 | arr.foldl (fun acc s =>
204 | let t := s.toHtml
205 | acc ++ (if acc.isEmpty || t.isEmpty then #[] else #[.Character ", "]) ++ t) #[]
206 |
207 | end BibtexName
208 |
209 | /-- Process the first name and last name, produce abbreviations of the last name. -/
210 | def processName (s : Array (Array TexContent) × Array (Array TexContent)) : BibtexName :=
211 | let g (arr : Array (Array TexContent)) : String :=
212 | " ".intercalate (arr.map TexContent.toPlaintextArray |>.toList)
213 | let abbr := getLastNameAbbr <| s.2.map TexContent.toPlaintextArray
214 | {
215 | firstName := s.1
216 | lastName := s.2
217 | firstNameWithoutDiacritics := stripDiacriticsFromString (g s.1) |>.map getUpperChar
218 | lastNameWithoutDiacritics := stripDiacriticsFromString (g s.2) |>.map getUpperChar
219 | oneLetterAbbr := abbr.1
220 | threeLetterAbbr := abbr.2
221 | }
222 |
223 | /-- Input an array of `TexContent`, return an array of `BibtexName`. -/
224 | def processNames (arr: Array TexContent) : Array BibtexName :=
225 | arr |> getNames |>.map processName
226 |
227 | end BibtexQuery.Name
228 |
--------------------------------------------------------------------------------
/BibtexQuery/ParsecExtra.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | import Std.Internal.Parsec
8 | import Std.Internal.Parsec.Basic
9 | import Std.Internal.Parsec.String
10 |
11 |
12 |
13 | /-!
14 | # Extra Parsec material
15 |
16 | This file contains additional material for the Parsec library, some
17 | of which is modelled after its Haskell counterpart.
18 | -/
19 |
20 |
21 | open Lean Std.Internal.Parsec.String Std.Internal.Parsec
22 |
23 | namespace BibtexQuery.ParsecExtra
24 |
25 | def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α :=
26 | match p s.iter with
27 | | .success _ x => some x
28 | | .error _ _ => none
29 |
30 | def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) :=
31 | match p s.iter with
32 | | .success pos x => some ⟨x, pos.i⟩
33 | | .error _ _ => none
34 |
35 | @[inline]
36 | def noneOf (bad : String) : Parser Char := satisfy (fun z => ¬bad.contains z)
37 |
38 | @[inline]
39 | def noneOfStr (bad : String) : Parser String := manyChars (noneOf bad)
40 |
41 | @[inline]
42 | def eol : ByteArray.Parser String :=
43 | ByteArray.pstring "\n\r" <|> ByteArray.pstring "\r\n" <|> ByteArray.pstring "\n"
44 |
45 | @[inline]
46 | def maybeSkip (p : Parser α) : Parser Unit := (attempt (p *> pure ())) <|> pure ()
47 |
48 | @[inline]
49 | partial def manyCore' (p : Parser α) (acc : List α) : Parser (List α) :=
50 | (do manyCore' p (acc ++ [← p])) <|> pure acc
51 |
52 | @[inline]
53 | def many' (p : Parser α) : Parser (List α) := manyCore' p []
54 |
55 | @[inline]
56 | partial def manyStrCore (p : Parser String) (acc : String) : Parser String :=
57 | (do manyStrCore p (acc ++ (← p))) <|> pure acc
58 |
59 | @[inline]
60 | def manyStr (p : Parser String) : Parser String := manyStrCore p ""
61 |
62 | @[inline]
63 | partial def sepByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
64 | Parser (List α) :=
65 | attempt (do let _ ← psep; sepByCore pcont psep (acc ++ [← pcont])) <|> pure acc
66 |
67 | @[inline]
68 | def sepBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
69 | (do sepByCore pcont psep [← pcont]) <|> pure []
70 |
71 | @[inline]
72 | def sepOrEndBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
73 | (do let output ← sepByCore pcont psep [← pcont]; maybeSkip psep; return output) <|> pure []
74 |
75 | @[inline]
76 | partial def endByCore (pcont : Parser α) (psep : Parser β) (acc : List α) :
77 | Parser (List α) :=
78 | attempt (do let x ← pcont; let _ ← psep; endByCore pcont psep (acc ++ [x])) <|> pure acc
79 |
80 | @[inline]
81 | def endBy (pcont : Parser α) (psep : Parser β) : Parser (List α) :=
82 | (do endByCore pcont psep []) <|> pure []
83 |
84 | @[inline]
85 | def alphaNum : Parser Char := attempt do
86 | let c ← any
87 | if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') ∨ ('0' ≤ c ∧ c ≤ '9') then
88 | return c
89 | else fail s!"ASCII alphanumeric character expected"
90 |
91 | @[inline]
92 | def asciiLetterToLower : Parser Char := return (← asciiLetter).toLower
93 |
94 | @[inline]
95 | def alphaNumToLower : Parser Char := return (← alphaNum).toLower
96 |
97 | @[inline]
98 | def asciiWordToLower : Parser String := manyChars asciiLetterToLower
99 |
100 | @[inline]
101 | def between (op : Parser α) (cl : Parser α) (mid : Parser β) : Parser β := attempt do
102 | let _ ← op
103 | let s ← mid
104 | let _ ← cl
105 | return s
106 |
107 | @[inline]
108 | def natNum : Parser Nat := attempt do
109 | let some n := (← manyChars digit).toNat? | fail "Not a natural number"
110 | return n
111 |
112 | def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it =>
113 | let out :=
114 | it.foldUntil "" fun acc c =>
115 | let prev : Option Char := if acc == "" then none else acc.back
116 | if test prev c then none else some (acc ++ c.toString)
117 | .success out.2 out.1
118 |
119 | end BibtexQuery.ParsecExtra
120 |
--------------------------------------------------------------------------------
/BibtexQuery/Parser.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | import BibtexQuery.ParsecExtra
8 | import BibtexQuery.Entry
9 | import Std.Internal.Parsec
10 | import Std.Internal.Parsec.String
11 |
12 | /-!
13 | # Bibtex Parser
14 |
15 | This file contains a parser for the Bibtex format. Note that currently, only a subset of the official
16 | Bibtex format is supported; features such as predefined strings and concatenation using `#` are not
17 | supported.
18 | -/
19 |
20 | open Lean Std.Internal.Parsec Std.Internal.Parsec.String BibtexQuery.ParsecExtra
21 |
22 | namespace BibtexQuery.Parser
23 |
24 | /-- The name of the bibtex entry (i.e. what goes in the cite command). -/
25 | def name : Parser String := attempt do
26 | let firstChar ← asciiLetter
27 | let remainder ← manyChars <| (alphaNum <|> pchar ':' <|> pchar '-' <|> pchar '_')
28 | return firstChar.toString ++ remainder
29 |
30 | /-- "article", "book", etc -/
31 | def category : Parser String := attempt do skipChar '@'; asciiWordToLower
32 |
33 | partial def bracedContentTail (acc : String) : Parser String := attempt do
34 | let c ← any
35 | if c = '{' then
36 | let s ← bracedContentTail ""
37 | bracedContentTail (acc ++ "{" ++ s)
38 | else
39 | if c = '}' then return acc ++ "}"
40 | else
41 | bracedContentTail (acc ++ c.toString)
42 |
43 | def bracedContent : Parser String := attempt do
44 | skipChar '{'
45 | let s ← bracedContentTail ""
46 | return s.dropRight 1
47 |
48 | def quotedContent : Parser String := attempt do
49 | skipChar '"'
50 | let s ← manyCharsUntilWithPrev fun | (some '\\'), '"' => false | _, '"' => true | _, _ => false
51 | skipChar '"'
52 | return (s.replace "\n" "").replace "\r" ""
53 |
54 | def month : Parser String := attempt do
55 | let s ← asciiWordToLower
56 | match s with
57 | | "jan" => return s
58 | | "feb" => return s
59 | | "mar" => return s
60 | | "apr" => return s
61 | | "may" => return s
62 | | "jun" => return s
63 | | "jul" => return s
64 | | "aug" => return s
65 | | "sep" => return s
66 | | "oct" => return s
67 | | "nov" => return s
68 | | "dec" => return s
69 | | _ => fail "Not a valid month"
70 |
71 | /-- The content field of a tag. -/
72 | def tagContent : Parser String := attempt do
73 | let c ← peek!
74 | if c.isDigit then manyChars digit else
75 | if c.isAlpha then month else
76 | match c with
77 | | '"' => quotedContent
78 | | '{' => bracedContent
79 | | _ => fail "Tag content expected"
80 |
81 | /-- i.e. journal = {Journal of Musical Deontology} -/
82 | def tag : Parser Tag := attempt do
83 | let tagName ← manyChars (alphaNumToLower <|> pchar '_' <|> pchar '-')
84 | ws; skipChar '='; ws
85 | let tagContent ← tagContent
86 | return { name := tagName, content := tagContent }
87 |
88 | def outsideEntry : Parser Unit := attempt do
89 | let _ ← manyChars <| noneOf "@"
90 |
91 | /-- A Bibtex entry. TODO deal with "preamble" etc. -/
92 | def entry : Parser Entry := attempt do
93 | outsideEntry
94 | let typeOfEntry ← category
95 | ws; skipChar '{'; ws
96 | let nom ← name
97 | skipChar ','; ws
98 | let t : List Tag ← sepOrEndBy tag (do ws; skipChar ','; ws)
99 | ws; skipChar '}'; ws
100 | return Entry.normalType typeOfEntry nom t
101 |
102 | def bibtexFile : Parser (List Entry) := many' entry
103 |
104 | --#eval "auTHOr23:z ".parseDebug name
105 | --#eval "auTHOr23:z".parseDebug name
106 | --#eval "@ARTICLE ".parseDebug category
107 | --#eval "@ARtiCLE".parseDebug category
108 | --#eval "auTHOr = \n{Dès Noël où un zéphyr haï\n me vêt de glaçons würmiens, je dîne d'exquis rôtis de bœuf au kir à l'aÿ d'âge mûr}".parseDebug tag
109 | --#eval "auTHOr = \"Test\"".parseDebug tag
110 | --#eval "journal = {Journal of Musical\n Deontology}".parseDebug tag
111 | --#eval "year = 2022".parseDebug tag
112 | --#eval "Bdsk-Url-1 = {https://doi.org/10.1007/s00220-020-03839-5}".parseDebug tag
113 | --#eval "year = 2022,\n author = {Frédéric Dupuis},".parseDebug (sepOrEndBy tag (do ws; skipChar ','; ws))
114 | --#eval "@article{bla23,\n year = 2022,\n author = {Frédéric Dupuis}\n}\n".parseDebug entry
115 | --#eval "\"Bachem, Achim and Korte, Bernhard and Gr{\\\"o}tschel\"".parseDebug quotedContent
116 | --#eval "@article{bla23,\n year = 2022,\n author = \"Bachem, Achim and Korte, Bernhard and Gr{\"o}tschel\"\n}\n".parseDebug entry
117 |
118 | end BibtexQuery.Parser
119 |
--------------------------------------------------------------------------------
/BibtexQuery/Query.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | import BibtexQuery.String
8 | import BibtexQuery.Entry
9 |
10 | /-!
11 | # Bibtex Queries
12 |
13 | This file deals with bibtex queries of the limited form handled by the program.
14 | -/
15 |
16 | namespace BibtexQuery
17 |
18 | inductive Query where
19 | | key (s : String)
20 | | author (s : String)
21 | | title (s : String)
22 | | word (s : String)
23 | | class (s : String)
24 | deriving Repr, Inhabited
25 |
26 | def Query.ofString (s : String) : Option Query :=
27 | if s.startsWith "k." then some <| .key <| s.drop 2
28 | else if s.startsWith "a." then some <| .author <| s.drop 2
29 | else if s.startsWith "t." then some <| .title <| s.drop 2
30 | else if s.startsWith "w." then some <| .word <| s.drop 2
31 | else if s.startsWith "c." then some <| .class <| s.drop 2
32 | else none
33 |
34 | def Entry.matchQuery (e : Entry) (q : Query) : Bool :=
35 | match q with
36 | | .key s => e.getKey.flattenWords.containsSubstr s
37 | | .author s => e.getAuthors.toFullNames.containsSubstr s
38 | | .title s => e.getTitle.flattenWords.containsSubstr s
39 | | .word s => e.getKeywords.flattenWords.containsSubstr s
40 | | .class s => e.getClass.flattenWords.containsSubstr s
41 |
42 | def Entry.matchQueries (e : Entry) (lq : List Query) : Bool :=
43 | lq.foldl (fun acc q => acc && e.matchQuery q) true
44 |
45 | end BibtexQuery
46 |
--------------------------------------------------------------------------------
/BibtexQuery/String.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2022 Frédéric Dupuis. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Frédéric Dupuis
5 | -/
6 |
7 | /-!
8 | # String processing
9 |
10 | This file contains various string processing functions.
11 | -/
12 |
13 | /-- Get the line number of the current position of the iterator. -/
14 | def String.Iterator.lineNumber (it : String.Iterator) : Nat :=
15 | let s : Substring := ⟨it.toString, 0, it.pos⟩
16 | s.foldl (fun n c => if c = '\n' then n+1 else n) 1
17 |
18 | /-- Strip diacritics from a string. -/
19 | def Char.asciify : Char → Char
20 | | 'á' => 'a'
21 | | 'à' => 'a'
22 | | 'â' => 'a'
23 | | 'ä' => 'a'
24 | | 'æ' => 'a'
25 | | 'ǎ' => 'a'
26 | | 'ã' => 'a'
27 | | 'å' => 'a'
28 | | 'ą' => 'a'
29 | | 'Á' => 'A'
30 | | 'À' => 'A'
31 | | 'Â' => 'A'
32 | | 'Ä' => 'A'
33 | | 'Æ' => 'A'
34 | | 'Ǎ' => 'A'
35 | | 'Ã' => 'A'
36 | | 'Å' => 'A'
37 | | 'Ą' => 'A'
38 | | 'č' => 'c'
39 | | 'ç' => 'c'
40 | | 'ć' => 'c'
41 | | 'Č' => 'C'
42 | | 'Ç' => 'C'
43 | | 'Ć' => 'C'
44 | | 'ď' => 'd'
45 | | 'Ď' => 'D'
46 | | 'é' => 'e'
47 | | 'è' => 'e'
48 | | 'ê' => 'e'
49 | | 'ë' => 'e'
50 | | 'ę' => 'e'
51 | | 'ě' => 'e'
52 | | 'É' => 'E'
53 | | 'È' => 'E'
54 | | 'Ê' => 'E'
55 | | 'Ë' => 'E'
56 | | 'Ę' => 'E'
57 | | 'ğ' => 'g'
58 | | 'Ğ' => 'G'
59 | | 'í' => 'i'
60 | | 'ì' => 'i'
61 | | 'î' => 'i'
62 | | 'ï' => 'i'
63 | | 'ı' => 'i'
64 | | 'Í' => 'I'
65 | | 'Ì' => 'I'
66 | | 'Î' => 'I'
67 | | 'Ï' => 'I'
68 | | 'İ' => 'I'
69 | | 'ł' => 'l'
70 | | 'ľ' => 'l'
71 | | 'Ł' => 'L'
72 | | 'Ľ' => 'L'
73 | | 'ñ' => 'n'
74 | | 'ň' => 'n'
75 | | 'ń' => 'n'
76 | | 'Ñ' => 'N'
77 | | 'Ň' => 'N'
78 | | 'Ń' => 'N'
79 | | 'ó' => 'o'
80 | | 'ò' => 'o'
81 | | 'ô' => 'o'
82 | | 'ö' => 'o'
83 | | 'õ' => 'o'
84 | | 'ø' => 'o'
85 | | 'œ' => 'o'
86 | | 'Ó' => 'O'
87 | | 'Ò' => 'O'
88 | | 'Ô' => 'O'
89 | | 'Ö' => 'O'
90 | | 'Õ' => 'O'
91 | | 'Ø' => 'O'
92 | | 'Œ' => 'O'
93 | | 'ř' => 'r'
94 | | 'Ř' => 'R'
95 | | 'š' => 's'
96 | | 'ś' => 's'
97 | | 'ş' => 's'
98 | | 'Š' => 'S'
99 | | 'Ś' => 'S'
100 | | 'Ş' => 'S'
101 | | 'ť' => 't'
102 | | 'Ť' => 'T'
103 | | 'ú' => 'u'
104 | | 'ù' => 'u'
105 | | 'û' => 'u'
106 | | 'ü' => 'u'
107 | | 'ů' => 'u'
108 | | 'Ú' => 'U'
109 | | 'Ù' => 'U'
110 | | 'Û' => 'U'
111 | | 'Ü' => 'U'
112 | | 'Ů' => 'U'
113 | | 'ý' => 'y'
114 | | 'ÿ' => 'y'
115 | | 'Ý' => 'Y'
116 | | 'Ÿ' => 'Y'
117 | | 'ž' => 'z'
118 | | 'Ž' => 'Z'
119 | | 'ß' => 's'
120 | | c => c
121 |
122 | def String.asciify (s : String) : String := s.map Char.asciify
123 |
124 | --#eval "Dès Noël où un zéphyr haï me vêt de glaçons würmiens, je dîne d'exquis rôtis de
125 | --bœuf au kir à l'aÿ d'âge mûr & cætera".asciify
126 |
127 | --#eval "Testfile aisdfjoai".foldl (fun s c => s ++ "A") ""
128 | --#eval '{'.asciify.toLower
129 |
130 | def String.flattenWords (s : String) : String := s.foldl
131 | (fun s c => s ++ (if c.asciify.toLower.isAlphanum then c.asciify.toLower.toString else "")) ""
132 |
133 | --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords
134 |
135 | def String.splitIntoNames (s : String) : List String :=
136 | (s.splitOn (sep := " and ")).map trim
137 |
138 | def String.toLastName (s : String) : String :=
139 | let s' := (s.split (fun c => c = ',')).map trim
140 | match s' with
141 | | [s₁] => s₁
142 | | (s₁ :: _) => s₁
143 | | _ => ""
144 |
145 | def String.toLastNames (s : String) : String :=
146 | String.intercalate " " $ s.splitIntoNames.map String.toLastName
147 |
148 | /-- Standardize to "Firstname Lastname" -/
149 | def String.toFirstnameLastname (s : String) : String :=
150 | let s' := (s.split (fun c => c = ',')).map trim
151 | match s' with
152 | | [s₁] => s₁
153 | | [s₁, s₂] => s₂ ++ " " ++ s₁
154 | | _ => ""
155 |
156 | def String.toFullNames (s : String) : String :=
157 | String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords
158 |
159 | partial def Substring.containsSubstrStartingAt (s : Substring) (q : String) : Bool :=
160 | if s.toString.length = 0 then q.length = 0
161 | else if q.isPrefixOf s.toString then true
162 | else (s.drop 1).containsSubstrStartingAt q
163 |
164 | def String.containsSubstr (s : String) (q : String) : Bool :=
165 | s.toSubstring.containsSubstrStartingAt q
166 |
167 | def String.pad (s : String) (c : Char) (n : Nat) : String :=
168 | (s ++ ⟨List.replicate n c⟩).take n
169 |
--------------------------------------------------------------------------------
/BibtexQuery/TexDiacritics.lean:
--------------------------------------------------------------------------------
1 | /-
2 | Copyright (c) 2024 Jz Pan. All rights reserved.
3 | Released under Apache 2.0 license as described in the file LICENSE.
4 | Author: Jz Pan
5 | -/
6 |
7 | import Std.Internal.Parsec
8 | import Std.Internal.Parsec.String
9 | import Lean.Data.Xml.Basic
10 | import UnicodeBasic
11 |
12 | /-!
13 |
14 | # TeX diacritics
15 |
16 | This file contains functions for TeX diacritics processing.
17 | The main functions are `texContent` and `texContents`, which
18 | will convert all TeX commands for diacritics into UTF-8 characters,
19 | and preserve other TeX commands.
20 |
21 | -/
22 |
23 | open Lean Xml Std.Internal.Parsec Std.Internal.Parsec.String Unicode
24 |
25 | namespace BibtexQuery.TexDiacritics
26 |
27 | /-- Represents a segment of TeX content of in bibitem. -/
28 | inductive TexContent
29 | /-- Represents a non-empty normal string. -/
30 | | normal (s : String) : TexContent
31 | /-- Represents some special characters. When output into HTML, some of them (e.g. `\`, `$`)
32 | will be put into `` to prevent MathJax from recognizing them.
33 | Some of them (e.g. ` `, `,`) have special meaning in bibitem name processing. -/
34 | | char (c : Char) : TexContent
35 | /-- Represents a TeX command. It always starts with `\`. It may have trailing spaces. -/
36 | | command (s : String) : TexContent
37 | /-- Represents a math environment. -/
38 | | math (dollar s : String) : TexContent
39 | /-- Represents contents inside `{ ... }`. -/
40 | | braced (arr : Array TexContent) : TexContent
41 | deriving Repr
42 |
43 | namespace TexContent
44 |
45 | /-- Add a diacritics character after the first character of the content. -/
46 | partial def addDiacritics (x : TexContent) (ch : String) :
47 | Except String TexContent := do
48 | match x with
49 | | .normal s =>
50 | if s.isEmpty then
51 | throw "expected a non-empty normal string, but got ''"
52 | else if GeneralCategory.isLetter s.front then
53 | return .normal <| s.take 1 ++ ch ++ s.drop 1
54 | else
55 | throw s!"diacritics character can only be added after a letter, but got '{s.front}'"
56 | | .char c => throw s!"expected a non-empty normal string, but got '{c}'"
57 | | .command _ => throw "expected a non-empty normal string, but got a TeX command"
58 | | .math _ _ => throw "expected a non-empty normal string, but got a math environment"
59 | | .braced arr =>
60 | if h : 0 < arr.size then
61 | return .braced <| #[← arr[0].addDiacritics ch] ++ (arr.toSubarray.drop 1 |>.toArray)
62 | else
63 | throw "expected a non-empty normal string, but got '{}'"
64 |
65 | mutual
66 |
67 | /-- Convert a TeX content to its original string.
68 | This is not necessarily identical to the original input. -/
69 | partial def toString (x : TexContent) : String :=
70 | match x with
71 | | .normal s => s
72 | | .char c => c.toString
73 | | .command s => s
74 | | .math dollar s => dollar ++ s ++ dollar
75 | | .braced arr => "{" ++ toStringArray arr ++ "}"
76 |
77 | partial def toStringArray (arr : Array TexContent) : String :=
78 | arr.map toString |>.toList |> String.join
79 |
80 | end
81 |
82 | mutual
83 |
84 | /-- Convert a TeX content to plaintext, discarding TeX commands and braces. -/
85 | partial def toPlaintext (x : TexContent) : String :=
86 | match x with
87 | | .normal s => s
88 | | .char c => c.toString
89 | | .command _ => ""
90 | | .math dollar s => dollar ++ s ++ dollar
91 | | .braced arr => toPlaintextArray arr
92 |
93 | partial def toPlaintextArray (arr : Array TexContent) : String :=
94 | arr.map toPlaintext |>.toList |> String.join
95 |
96 | end
97 |
98 | mutual
99 |
100 | /-- Get the first character of the plaintext of a TeX content. -/
101 | partial def getFirstChar (x : TexContent) : Option Char :=
102 | match x with
103 | | .normal s => s.get? 0
104 | | .char c => c
105 | | .command _ => .none
106 | | .math dollar _ => dollar.get? 0
107 | | .braced arr => getFirstCharArray arr
108 |
109 | partial def getFirstCharArray (arr : Array TexContent) : Option Char :=
110 | arr.findSome? getFirstChar
111 |
112 | end
113 |
114 | mutual
115 |
116 | /-- Get the last character of the plaintext of a TeX content. -/
117 | partial def getLastChar (x : TexContent) : Option Char :=
118 | match x with
119 | | .normal s => if s.isEmpty then .none else s.back
120 | | .char c => c
121 | | .command _ => .none
122 | | .math dollar _ => dollar.get? 0
123 | | .braced arr => getLastCharArray arr
124 |
125 | partial def getLastCharArray (arr : Array TexContent) : Option Char :=
126 | arr.findSomeRev? getLastChar
127 |
128 | end
129 |
130 | mutual
131 |
132 | /-- Convert a TeX content to HTML, represented by an array of `Lean.Xml.Content`.
133 | A few TeX commands can be converted to corresponding HTML. -/
134 | partial def toHtml (x : TexContent) : Array Content :=
135 | match x with
136 | | .normal s => #[.Character s]
137 | | .char c =>
138 | let ret : Content := match c with
139 | | '\\' | '$' => .Element ⟨ "span", RBMap.empty, #[.Character c.toString] ⟩
140 | | _ => .Character c.toString
141 | #[ret]
142 | | .command cmd =>
143 | let ret : Content := match cmd.trim with
144 | | "\\\\" => .Element ⟨ "br", RBMap.empty, #[] ⟩
145 | | _ => .Element ⟨ "span", RBMap.empty.insert "style" "color:red;", #[.Character cmd] ⟩
146 | #[ret]
147 | | .math dollar s => #[.Character (dollar ++ s ++ dollar)]
148 | | .braced arr => toHtmlArray arr
149 |
150 | partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0)
151 | (ret : Array Content := #[]) : Array Content :=
152 | if h : i < arr.size then
153 | if h' : i + 1 < arr.size then
154 | if let .command cmd := arr[i] then
155 | match cmd.trim with
156 | | "\\url" =>
157 | let next := arr[i + 1]
158 | let x : Content := .Element ⟨ "a", RBMap.empty.insert "href"
159 | next.toPlaintext, next.toHtml ⟩
160 | toHtmlArray arr (i + 2) (ret ++ #[x])
161 | | "\\textrm" =>
162 | let next := arr[i + 1]
163 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
164 | "font-style: normal; font-weight: normal", next.toHtml ⟩
165 | toHtmlArray arr (i + 2) (ret ++ #[x])
166 | | "\\textbf" =>
167 | let next := arr[i + 1]
168 | let x : Content := .Element ⟨ "b", RBMap.empty, next.toHtml ⟩
169 | toHtmlArray arr (i + 2) (ret ++ #[x])
170 | | "\\textit" =>
171 | let next := arr[i + 1]
172 | let x : Content := .Element ⟨ "i", RBMap.empty, next.toHtml ⟩
173 | toHtmlArray arr (i + 2) (ret ++ #[x])
174 | | "\\emph" =>
175 | let next := arr[i + 1]
176 | let x : Content := .Element ⟨ "em", RBMap.empty, next.toHtml ⟩
177 | toHtmlArray arr (i + 2) (ret ++ #[x])
178 | | "\\texttt" =>
179 | let next := arr[i + 1]
180 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
181 | "font-family: monospace", next.toHtml ⟩
182 | toHtmlArray arr (i + 2) (ret ++ #[x])
183 | | "\\textsc" =>
184 | let next := arr[i + 1]
185 | let x : Content := .Element ⟨ "span", RBMap.empty.insert "style"
186 | "font-variant: small-caps", next.toHtml ⟩
187 | toHtmlArray arr (i + 2) (ret ++ #[x])
188 | | _ => toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
189 | else
190 | toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
191 | else
192 | toHtmlArray arr (i + 1) (ret ++ arr[i].toHtml)
193 | else
194 | ret
195 |
196 | end
197 |
198 | end TexContent
199 |
200 | /-- Match a sequence of space characters and return it. -/
201 | def ws' : Parser String := manyChars <| satisfy fun
202 | | ' ' | '\t' | '\r' | '\n' => true
203 | | _ => false
204 |
205 | /-- Replace certain sequences (e.g. "--") by their UTF-8 representations. -/
206 | def replaceChars (s : String) : String :=
207 | let arr : Array (String × String) := #[
208 | ("---", "\u2014"),
209 | ("--", "\u2013"),
210 | ("~", "\u00A0"),
211 | ("?`", "\u00BF"),
212 | ("!`", "\u00A1")
213 | ]
214 | arr.foldl (fun acc (o, r) => acc.replace o r) s
215 |
216 | /-- Match a TeX command starting with `\`, potentially with trailing whitespaces. -/
217 | def texCommand : Parser String := pchar '\\' *> attempt do
218 | let s ← manyChars asciiLetter
219 | if s.isEmpty then
220 | -- some commands preserve trailing whitespaces
221 | let c ← any
222 | match c with
223 | | '&' | '#' | '{' | '}' | '$' | '_' => return "\\" ++ toString c
224 | | _ => return "\\" ++ toString c ++ (← ws')
225 | else if let .some '*' ← peek? then
226 | skip
227 | return "\\" ++ s ++ "*" ++ (← ws')
228 | else
229 | return "\\" ++ s ++ (← ws')
230 |
231 | /-- Similar to `texCommand` but it excludes some commands. -/
232 | def texCommand' (exclude : Array String) : Parser String := attempt do
233 | let s ← texCommand
234 | match exclude.find? (· == s.trim) with
235 | | .some _ => fail s!"'{s.trim}' is not allowed"
236 | | .none => return s
237 |
238 | /-- Match a sequence starting with `{` and ending with `}`. -/
239 | def bracedContent (p : Parser String) : Parser String :=
240 | pchar '{' *> (("{" ++ · ++ "}") <$> p) <* pchar '}'
241 |
242 | partial def manyOptions {α} (p : Parser (Option α)) (acc : Array α := #[]) :
243 | Parser (Array α) := fun it =>
244 | match p it with
245 | | .success it ret =>
246 | match ret with
247 | | .some ret => manyOptions p (acc.push ret) it
248 | | .none => .success it acc
249 | | .error it err => .error it err
250 |
251 | partial def mathContentAux : Parser String := do
252 | let normalChars : Parser String := many1Chars <| satisfy fun
253 | | '\\' | '$' | '{' | '}' => false
254 | | _ => true
255 | let doOne : Parser (Option String) := fun it =>
256 | if it.hasNext then
257 | match it.curr with
258 | | '{' => (.some <$> bracedContent mathContentAux) it
259 | | '\\' =>
260 | match texCommand' #["\\(", "\\)", "\\[", "\\]"] it with
261 | | .success it ret => .success it (.some ret)
262 | | .error _ _ => .success it .none
263 | | '}' | '$' => .success it .none
264 | | _ => (.some <$> normalChars) it
265 | else
266 | .success it .none
267 | return String.join (← manyOptions doOne).toList
268 |
269 | /-- Match a math content. Returns `Option.none` if it does not start with `\(`, `\[` or `$`. -/
270 | def mathContent : Parser (Option TexContent) := fun it =>
271 | let aux (beginning ending : String) : Parser String :=
272 | pstring beginning *> mathContentAux <* pstring ending
273 | let substr := it.extract (it.forward 2)
274 | if substr = "\\[" then
275 | ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it
276 | else if substr = "\\(" then
277 | ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it
278 | else if substr = "$$" then
279 | ((.some <| .math "$$" ·) <$> aux "$$" "$$") it
280 | else if it.curr = '$' then
281 | ((.some <| .math "$" ·) <$> aux "$" "$") it
282 | else
283 | .success it .none
284 |
285 | partial def rawContentAux : Parser String := do
286 | let normalChars : Parser String := many1Chars <| satisfy fun
287 | | '\\' | '{' | '}' => false
288 | | _ => true
289 | let doOne : Parser (Option String) := fun it =>
290 | if it.hasNext then
291 | match it.curr with
292 | | '{' => (.some <$> bracedContent rawContentAux) it
293 | | '\\' => (.some <$> texCommand) it
294 | | '}' => .success it .none
295 | | _ => (.some <$> normalChars) it
296 | else
297 | .success it .none
298 | return String.join (← manyOptions doOne).toList
299 |
300 | /-- Match a TeX command for diacritics, return the processed TeX contents.
301 | Sometimes it needs to read the contents after the command, in this case the `p` is used. -/
302 | def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do
303 | let cmd ← texCommand
304 | -- some special commands
305 | if cmd.trim = "\\url" then
306 | let s ← pchar '{' *> rawContentAux <* pchar '}'
307 | return .some <| .braced #[.command cmd, .braced <| #[.normal s]]
308 | -- some special characters need to put into ``
309 | let c : Char := match cmd.trim with
310 | | "\\$" => '$' | "\\textbackslash" => '\\'
311 | | _ => ' '
312 | if c ≠ ' ' then return .some <| .char c
313 | -- some other characters
314 | let s : String := match cmd.trim with
315 | | "\\oe" => "œ" | "\\OE" => "Œ"
316 | | "\\ae" => "æ" | "\\AE" => "Æ"
317 | | "\\aa" => "å" | "\\AA" => "Å"
318 | | "\\o" => "ø" | "\\O" => "Ø"
319 | | "\\l" => "ł" | "\\L" => "Ł"
320 | | "\\i" => "ı" | "\\j" => "ȷ"
321 | | "\\ss" => "\u00DF" | "\\SS" => "\u1E9E"
322 | | "\\cprime" => "\u02B9"
323 | | "\\&" => "&" | "\\#" => "#"
324 | | "\\{" => "{" | "\\}" => "}"
325 | | "\\_" => "_"
326 | | "\\" => "\u00A0" -- This should be "\ " but the space is trimmed
327 | | _ => ""
328 | if not s.isEmpty then return .some <| .normal s
329 | -- diacritics characters
330 | let s : String := match cmd.trim with
331 | | "\\`" => "\u0300" | "\\'" => "\u0301"
332 | | "\\^" => "\u0302" | "\\\"" => "\u0308"
333 | | "\\~" => "\u0303" | "\\=" => "\u0304"
334 | | "\\." => "\u0307" | "\\u" => "\u0306"
335 | | "\\v" => "\u030C" | "\\H" => "\u030B"
336 | | "\\t" => "\u0361" | "\\c" => "\u0327"
337 | | "\\d" => "\u0323" | "\\b" => "\u0331"
338 | | "\\k" => "\u0328"
339 | | _ => ""
340 | if s.isEmpty then return .some <| .command cmd
341 | match ← p with
342 | | .some next =>
343 | match next.addDiacritics s with
344 | | .ok ret => return .some ret
345 | | .error err => fail err
346 | | .none => fail "expected a non-empty normal string"
347 |
348 | /-- Match a segment of TeX content.
349 | The TeX commands for diacritics will be converted into UTF-8 characters.
350 | Other TeX commands are preserved.
351 | Returns `Option.none` if it can't match any and there are no errors. -/
352 | partial def texContent : Parser (Option TexContent) := fun it =>
353 | let normalChars' : Parser String := many1Chars <| satisfy fun
354 | | '\\' | '$' | '{' | '}' | ' ' | '\t' | '\r' | '\n' | ',' => false
355 | | _ => true
356 | match mathContent it with
357 | | .success it ret =>
358 | match ret with
359 | | .some ret => .success it (.some ret)
360 | | .none =>
361 | if it.hasNext then
362 | match it.curr with
363 | | ' ' | '\t' | '\r' | '\n' => ((fun _ => .some (.char ' ')) <$> ws) it
364 | | ',' => .success it.next <| .some <| .char it.curr
365 | | '\\' => texDiacriticsCommand texContent it
366 | | '{' => ((.some <| .braced ·) <$> (pchar '{' *> manyOptions texContent <* pchar '}')) it
367 | | '}' => .success it .none
368 | | _ => ((.some <| .normal <| replaceChars ·) <$> normalChars') it
369 | else
370 | .success it .none
371 | | .error it err => .error it err
372 |
373 | /-- Match a sequence of TeX contents. -/
374 | def texContents : Parser (Array TexContent) := manyOptions texContent
375 |
376 | end BibtexQuery.TexDiacritics
377 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # BibtexQuery: a simple command-line bibtex query utility
2 |
3 | BibtexQuery is a command-line utility that reads in a bibtex file and performs simple queries. A query is a string
4 | of the form ``q.querystring``, where ``q`` is either ``a`` for author, ``t`` for title, ``k`` for key, ``c`` for class (i.e. book, article, etc), or ``w`` for keywords, and ``querystring``
5 | is a string (without spaces). BibtexQuery reads in a bibtex file, and returns the entries that match all the
6 | queries given as command-line parameters. Note that the entries are processed in such a way that strips diacritics,
7 | spaces and special characters before the queries are performed. In addition, the list of authors is normalized to
8 | ``firstnamelastname``. Hence, for example, ``Dupuis, Frédéric`` will match the query ``a.ericdup``.
9 |
10 | Note that currently, only a subset of the official Bibtex format is supported; features such as predefined strings and concatenation using ``#`` are not supported. It is unclear whether this
11 | will ever be supported in the future; I only wrote this to learn how to do "normal programming" in [Lean 4](https://github.com/leanprover/lean4/), and I don't personally use these Bibtex features.
12 |
13 | ## Installation instructions
14 |
15 | Make sure you have a working Lean 4 installation (see [here](https://leanprover.github.io/lean4/doc/quickstart.html) for how to do that), and then type `lake build` in the main directory. The
16 | executable should appear in ``.lake/build/bin``.
17 |
--------------------------------------------------------------------------------
/lake-manifest.json:
--------------------------------------------------------------------------------
1 | {"version": "1.1.0",
2 | "packagesDir": ".lake/packages",
3 | "packages":
4 | [{"url": "https://github.com/jcommelin/lean4-unicode-basic",
5 | "type": "git",
6 | "subDir": null,
7 | "scope": "",
8 | "rev": "458e2d3feda3999490987eabee57b8bb88b1949c",
9 | "name": "UnicodeBasic",
10 | "manifestFile": "lake-manifest.json",
11 | "inputRev": "bump_to_v4.18.0-rc1",
12 | "inherited": false,
13 | "configFile": "lakefile.lean"}],
14 | "name": "BibtexQuery",
15 | "lakeDir": ".lake"}
16 |
--------------------------------------------------------------------------------
/lakefile.toml:
--------------------------------------------------------------------------------
1 | name = "BibtexQuery"
2 | defaultTargets = ["BibtexQuery", "«bibtex-query»"]
3 |
4 | [[require]]
5 | name = "UnicodeBasic"
6 | git = "https://github.com/jcommelin/lean4-unicode-basic" # switch back to fgdorais
7 | rev = "bump_to_v4.18.0-rc1" # switch back to main
8 |
9 | [[lean_lib]]
10 | name = "BibtexQuery"
11 |
12 | [[lean_exe]]
13 | name = "«bibtex-query»"
14 | root = "BibtexQuery.Main"
15 |
--------------------------------------------------------------------------------
/lean-toolchain:
--------------------------------------------------------------------------------
1 | leanprover/lean4:v4.19.0-rc1
2 |
--------------------------------------------------------------------------------
/scripts/bibc:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 |
3 | bibtex-query c /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9
4 |
--------------------------------------------------------------------------------
/scripts/bibq:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 |
3 | bibtex-query q /home/fred/git/w/bib/big.bib $1 $2 $3 $4 $5 $6 $7 $8 $9
4 |
--------------------------------------------------------------------------------
/scripts/bibrofi:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | shopt -s nullglob globstar
4 |
5 | papers=( ~/git/w/bib/*.pdf ~/git/w/bib/*.djvu )
6 |
7 | bibfile=$(printf '%s\n' "${papers[@]}" | rofi -i -dmenu "$@")
8 |
9 | [[ -n $bibfile ]] || exit
10 |
11 | zathura $bibfile &
12 |
--------------------------------------------------------------------------------