├── __init__.py
├── .gitignore
├── init.sh
├── setup.py
├── LICENSE
├── README.md
├── bibtidy.py
├── BaseXClient.py
└── overleaf-bibtidy.js
/__init__.py:
--------------------------------------------------------------------------------
1 | from . import bibtidy
2 |
3 | __all__ = ["bibtidy"]
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | bibtidy.egg-info
3 | __pycache__
4 | .vscode
--------------------------------------------------------------------------------
/init.sh:
--------------------------------------------------------------------------------
1 | sudo apt install basex -y
2 | wget https://dblp.org/xml/dblp.xml.gz
3 | gunzip dblp.xml.gz
4 | wget https://dblp.org/xml/dblp.dtd
5 |
6 | basex -c "ALTER PASSWORD admin admin"
7 | basex -c "SET INTPARSE true; SET DTD true; SET TEXTINDEX true; SET TOKENINDEX true; SET FTINDEX true; CREATE DB dblp dblp.xml"
8 |
9 | basexserver -S
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | version = "0.0.2"
4 |
5 | def load_readme():
6 | with open("README.md") as f:
7 | return f.read()
8 |
9 | setuptools.setup(
10 | name="bibtidy",
11 | version=version,
12 | author='Jinsheng Ba',
13 | author_email='bajinsheng@gmail.com',
14 | description="A tool for simplifying BiBTex creation.",
15 | long_description=load_readme(),
16 | long_description_content_type="text/markdown",
17 | url="https://github.com/bajinsheng/bibtidy",
18 | py_modules=["bibtidy"],
19 | packages=setuptools.find_packages(),
20 | install_requires=['bibtexparser==1.4.0',
21 | 'requests',
22 | ],
23 | classifiers=[
24 | "Programming Language :: Python :: 3",
25 | "License :: OSI Approved :: MIT License",
26 | "Operating System :: OS Independent",
27 | ],
28 | python_requires='>=3.6',
29 | entry_points={
30 | "console_scripts": [
31 | "bibtidy = bibtidy:main"
32 | ]
33 | }
34 | )
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 (Bill) Yuchen Lin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bibtidy
2 |
3 | Welcome to bibtidy, a simple tool for simplifying the creation of your BibTeX files!
4 |
5 | ## Welcome Stars
6 | If you like this tool, don't forget to star it! Your support is my motivation to keep updating this tool.
7 |
8 | ## Features
9 | 1. Searching: Search a keyword from DBLP and show all relevant BibTex entries in order.
10 | 2. Checking: Automatically fix incorrect and fill incomplete entries with reference to DBLP.
11 | 3. **Overleaf BibTeX Checker [NEW]:**
12 | - Real-time BibTeX error detection and highlights in Overleaf
13 | - DBLP integration for authoritative corrections
14 | - Error icons and popups with diff-style suggestions
15 | - Works only when a `.bib` file is selected in Overleaf
16 |
17 | ## Requirements
18 | Python 3.10.0
19 | (Other versions are not tested. Welcome to test and report.)
20 |
21 | ## Installation
22 | ```shell
23 | pip3 install git+https://github.com/bajinsheng/bibtidy@release
24 | ```
25 | If you want to install it in an isolated virtualenv, you can use `pipx` instead:
26 | ```shell
27 | pipx install git+https://github.com/bajinsheng/bibtidy@release
28 | ```
29 |
30 | ## Usage 1: Searching for a paper
31 | ```python
32 | bibtidy --keyword "How Good Are Query Optimizers"
33 | ```
34 | The output will be:
35 | ```bibtex
36 | @article{viktor2015how,
37 | author = {Viktor Leis and
38 | Andrey Gubichev and
39 | Atanas Mirchev and
40 | Peter A. Boncz and
41 | Alfons Kemper and
42 | Thomas Neumann},
43 | bibsource = {dblp computer science bibliography, https://dblp.org},
44 | biburl = {https://dblp.org/rec/journals/pvldb/LeisGMBK015.bib},
45 | doi = {10.14778/2850583.2850594},
46 | journal = {Proc. {VLDB} Endow.},
47 | number = {3},
48 | pages = {204--215},
49 | timestamp = {Sat, 25 Apr 2020 01:00:00 +0200},
50 | title = {How Good Are Query Optimizers, Really?},
51 | url = {http://www.vldb.org/pvldb/vol9/p204-leis.pdf},
52 | volume = {9},
53 | year = {2015}
54 | }
55 | ```
56 |
57 | ## Usage 2: Checking a BibTeX file
58 | Suppose we have a file named "test.bib" with some BibTex entries from unknown sources:
59 | ```bibtex
60 | @misc{ba2022efficient,
61 | title={Efficient Greybox Fuzzing to Detect Memory Errors},
62 | author={Jinsheng Ba and Gregory J. Duck and Abhik Roychoudhury},
63 | year={2022},
64 | eprint={2204.02773},
65 | archivePrefix={arXiv},
66 | primaryClass={cs.CR}
67 | }
68 |
69 | ```
70 | We can check and autofix it by running the following command:
71 | ```python
72 | bibtidy --file "test.bib"
73 | ```
74 |
75 | The output will be:
76 | ```bibtex
77 | @inproceedings{ba2022efficient,
78 | author = {Jinsheng Ba and
79 | Gregory J. Duck and
80 | Abhik Roychoudhury},
81 | bibsource = {dblp computer science bibliography, https://dblp.org},
82 | biburl = {https://dblp.org/rec/conf/kbse/BaDR22.bib},
83 | booktitle = {37th {IEEE/ACM} International Conference on Automated Software Engineering,
84 | {ASE} 2022, Rochester, MI, USA, October 10-14, 2022},
85 | doi = {10.1145/3551349.3561161},
86 | pages = {37:1--37:12},
87 | publisher = {{ACM}},
88 | timestamp = {Sun, 15 Jan 2023 00:00:00 +0100},
89 | title = {Efficient Greybox Fuzzing to Detect Memory Errors},
90 | url = {https://doi.org/10.1145/3551349.3561161},
91 | year = {2022}
92 | }
93 | ```
94 |
95 | ``Note: due to the rate limitation of DBLP, the checking process may be failed for large BibTeX files. In this case, you can split the file into smaller parts and check them separately.``
96 |
97 | ## Usage 3: Overleaf BibTeX Checker
98 |
99 | This is a Tampermonkey/Greasemonkey userscript for Overleaf. It provides real-time BibTeX validation and DBLP-powered corrections directly in the Overleaf editor.
100 |
101 | **How to use:**
102 | 1. Install [Tampermonkey](https://www.tampermonkey.net/) or [Greasemonkey](https://www.greasespot.net/) in your browser.
103 | 2. Add the script from [`overleaf-bibtidy.user.js`](./overleaf-bibtidy.user.js) or Greasy Fork.
104 | 3. Open your Overleaf project and select a `.bib` file in the file tree.
105 | 4. Use the "Check BibTeX" button and enjoy instant feedback and corrections!
106 |
107 | **Source & Issues:**
108 | - [GitHub Repo](https://github.com/bajinsheng/bibtidy)
109 | - [Greasy Fork Script](https://greasyfork.org/en/scripts/544119-bibtidy-plugin-for-overleaf-editor)
110 |
--------------------------------------------------------------------------------
/bibtidy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | import argparse
3 | import os, re
4 | import bibtexparser
5 | from difflib import SequenceMatcher
6 | from bibtexparser.bparser import BibTexParser
7 | import BaseXClient
8 | from lxml import etree
9 | from tqdm import tqdm
10 |
11 | session = BaseXClient.Session('localhost', 1984, 'admin', 'admin')
12 |
13 | def dblp_search(title):
14 | data = session.execute("xquery for $x in doc('dblp')/dblp/* where $x/title contains text '" + title + "' return $x")
15 | data = "\n" + data + "\n"
16 | root = etree.fromstring(data)
17 | return root
18 |
19 | def bibtex_prioritize(entries, title):
20 | """
21 | Prioritize the entries according to the similarity of the title.
22 | """
23 | selected_entries = []
24 | selected_eprint_entries = []
25 | for entry in entries:
26 | similarity = SequenceMatcher(None, entry.find('title').text, title).ratio()
27 | if entry.find('journal') is not None and entry.find('journal').text == 'CoRR': # Lower the priorities of the Arxiv papers
28 | selected_eprint_entries.append({"entry": entry, "similarity": similarity})
29 | else:
30 | selected_entries.append({"entry": entry, "similarity": similarity})
31 |
32 | sorted_selected_entries = sorted(selected_entries, key=lambda x: x["similarity"], reverse=True)
33 | sorted_selected_eprint_entries = sorted(selected_eprint_entries, key=lambda x: x["similarity"], reverse=True)
34 | sorted_selected_entries.extend(sorted_selected_eprint_entries)
35 | return sorted_selected_entries
36 |
37 | def write_bibtex(entry, output):
38 | """
39 | Write the bibtex entry to the file.
40 | """
41 | dblp_library = bibtexparser.bibdatabase.BibDatabase()
42 | dblp_library.entries = [entry]
43 | writer = bibtexparser.bwriter.BibTexWriter()
44 | writer.order_entries_by = None
45 | result = bibtexparser.dumps(dblp_library, writer=writer)
46 | with open(output, "a") as f:
47 | f.write(result)
48 |
49 |
50 | def bibtex_checking(bibtex_library, args):
51 | '''
52 | correct the bibtex file with the dblp database and return the results.
53 | '''
54 | for entry in tqdm(bibtex_library.entries, desc='Processing bibtex', unit='entry'):
55 | old_title = entry['title']
56 | bibtex_matched = dblp_search(old_title)
57 | if (len(bibtex_matched) > 0):
58 | bibtex_best_match = bibtex_prioritize(bibtex_matched, entry['title'])[0]
59 | authors = ""
60 | for key in bibtex_best_match['entry']:
61 | if key.tag == 'author':
62 | if authors != "":
63 | authors += " and "
64 | if key.text.split(" ")[-1].isdigit(): # Remove the number at the end of the author name
65 | authors += key.text.rsplit(' ', 1)[0]
66 | authors += re.sub(r'\s*\d+$', '', key.text)
67 | elif key.tag == 'url' or key.tag == 'crossref': # Ignore interrnal tags
68 | continue
69 | elif key.tag == 'ee': # Convert DOI
70 | entry['url'] = key.text
71 | if key.text.startswith('https://doi.org/'): # Some publishers, such as USENIX, do not provide DOI
72 | entry['doi'] = key.text.split('https://doi.org/')[1].rstrip('}')
73 | else:
74 | entry[key.tag] = key.text
75 | if authors != "":
76 | entry['author'] = authors
77 | entry['ENTRYTYPE'] = bibtex_best_match['entry'].tag # Update the entry type
78 |
79 | # Reporting results
80 | if bibtex_best_match['similarity'] < 0.5 and args.debug:
81 | print("[Debug] \"" + entry['title'] + "\" has no similar entires.")
82 | elif bibtex_best_match['similarity'] < 0.8:
83 | print("[Warning] Suspicious update: \"" + old_title + "\" -> \"" + entry['title'] + "\". Please check whether it is correct.")
84 | elif bibtex_best_match['similarity'] < 1 and args.debug:
85 | print("[Debug] \"" + old_title + "\" -> \"" + entry['title'] + "\".")
86 | write_bibtex(entry, args.file + "_revised.bib")
87 | else:
88 | if args.debug:
89 | print("[Debug] \"" + entry['title'] + "\" is not found in the DBLP database.")
90 | return
91 |
92 | def main():
93 | parser = argparse.ArgumentParser(description='bibtidy: Make your research easier with correct citations!')
94 | parser.add_argument('file')
95 | parser.add_argument('-o', '--output', type=str, default='stdout', help='the file path of the output')
96 | parser.add_argument('-d', '--debug', action='store_true', default=False, help='enable debug mode')
97 | args = parser.parse_args()
98 |
99 | if os.path.isfile(args.file + "_revised.bib") == True:
100 | print("The output file already exists! Please delete it first.")
101 | return
102 |
103 | with open(args.file) as bibtex_file:
104 | parser = BibTexParser()
105 | parser.ignore_nonstandard_types = False
106 | bibtex_library = bibtexparser.load(bibtex_file, parser)
107 | bibtex_checking(bibtex_library, args)
108 |
109 | # close session
110 | if session:
111 | session.close()
112 |
113 |
114 | if __name__ == "__main__":
115 | main()
--------------------------------------------------------------------------------
/BaseXClient.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Python 2.7.3 and 3.x client for BaseX.
4 | Works with BaseX 7.0 and later
5 |
6 | Requires Python 3.x or Python 2.x having some backports like bytearray.
7 | (I've tested Python 3.2.3, and Python 2.7.3 on Fedora 16 linux x86_64.)
8 |
9 | LIMITATIONS:
10 |
11 | * binary content would corrupt, maybe. (I didn't test it)
12 | * also, will fail to extract stored binary content, maybe.
13 | (both my code, and original don't care escaped 0xff.)
14 |
15 | Documentation: http://docs.basex.org/wiki/Clients
16 |
17 | (C) 2012, Hiroaki Itoh. BSD License
18 | updated 2014 by Marc van Grootel
19 |
20 | """
21 |
22 | import hashlib
23 | import socket
24 | import threading
25 |
26 | # ---------------------------------
27 | #
28 |
29 |
30 | class SocketWrapper(object):
31 | """a wrapper to python native socket module."""
32 |
33 | def __init__(self, sock,
34 | receive_bytes_encoding='utf-8',
35 | send_bytes_encoding='utf-8'):
36 |
37 | self.receive_bytes_encoding = receive_bytes_encoding
38 | self.send_bytes_encoding = send_bytes_encoding
39 |
40 | self.terminator = bytearray(chr(0), self.receive_bytes_encoding)
41 | self.__s = sock
42 | self.__buf = bytearray(chr(0) * 0x1000, self.receive_bytes_encoding)
43 | self.__bpos = 0
44 | self.__bsize = 0
45 |
46 | def clear_buffer(self):
47 | """reset buffer status for next invocation ``recv_until_terminator()``
48 | or ``recv_single_byte()``."""
49 | self.__bpos = 0
50 | self.__bsize = 0
51 |
52 | def __fill_buffer(self):
53 | """cache next bytes"""
54 | if self.__bpos >= self.__bsize:
55 | self.__bsize = self.__s.recv_into(self.__buf)
56 | self.__bpos = 0
57 |
58 | # Returns a single byte from the socket.
59 | def recv_single_byte(self):
60 | """recv a single byte from previously fetched buffer."""
61 | self.__fill_buffer()
62 | result_byte = self.__buf[self.__bpos]
63 | self.__bpos += 1
64 | return result_byte
65 |
66 | # Reads until terminator byte is found.
67 | def recv_until_terminator(self):
68 | """recv a nul(or specified as terminator_byte)-terminated whole string
69 | from previously fetched buffer."""
70 | result_bytes = bytearray()
71 | while True:
72 | self.__fill_buffer()
73 | pos = self.__buf.find(self.terminator, self.__bpos, self.__bsize)
74 | if pos >= 0:
75 | result_bytes.extend(self.__buf[self.__bpos:pos])
76 | self.__bpos = pos + 1
77 | break
78 | else:
79 | result_bytes.extend(self.__buf[self.__bpos:self.__bsize])
80 | self.__bpos = self.__bsize
81 | return result_bytes.decode(self.receive_bytes_encoding)
82 |
83 | def sendall(self, data):
84 | """sendall with specified byte encoding if data is not bytearray, bytes
85 | (maybe str). if data is bytearray or bytes, it will be passed to native sendall API
86 | directly."""
87 | if isinstance(data, (bytearray, bytes)):
88 | return self.__s.sendall(data)
89 | return self.__s.sendall(bytearray(data, self.send_bytes_encoding))
90 |
91 | def __getattr__(self, name):
92 | return lambda *arg, **kw: getattr(self.__s, name)(*arg, **kw)
93 |
94 |
95 | # ---------------------------------
96 | #
97 | class Session(object):
98 | """class Session.
99 |
100 | see http://docs.basex.org/wiki/Server_Protocol
101 | """
102 |
103 | def __init__(self, host, port, user, password,
104 | receive_bytes_encoding='utf-8',
105 | send_bytes_encoding='utf-8'):
106 | """Create and return session with host, port, user name and password"""
107 |
108 | self.__info = None
109 |
110 | # create server connection
111 | self.__swrapper = SocketWrapper(
112 | socket.socket(socket.AF_INET, socket.SOCK_STREAM),
113 | receive_bytes_encoding=receive_bytes_encoding,
114 | send_bytes_encoding=send_bytes_encoding)
115 |
116 | self.__swrapper.connect((host, port))
117 |
118 | # receive timestamp
119 | response = self.recv_c_str().split(':')
120 |
121 | # send username and hashed password/timestamp
122 | hfun = hashlib.md5()
123 |
124 | if len(response) > 1:
125 | code = "%s:%s:%s" % (user, response[0], password)
126 | nonce = response[1]
127 | else:
128 | code = password
129 | nonce = response[0]
130 |
131 | hfun.update(hashlib.md5(code.encode('us-ascii')).hexdigest().encode('us-ascii'))
132 | hfun.update(nonce.encode('us-ascii'))
133 | self.send(user + chr(0) + hfun.hexdigest())
134 |
135 | # evaluate success flag
136 | if not self.server_response_success():
137 | raise IOError('Access Denied.')
138 |
139 | def execute(self, com):
140 | """Execute a command and return the result"""
141 | # send command to server
142 | self.send(com)
143 |
144 | # receive result
145 | result = self.receive()
146 | self.__info = self.recv_c_str()
147 | if not self.server_response_success():
148 | raise IOError(self.__info)
149 | return result
150 |
151 | def query(self, querytxt):
152 | """Creates a new query instance (having id returned from server)."""
153 | return Query(self, querytxt)
154 |
155 | def create(self, name, content):
156 | """Creates a new database with the specified input (may be empty)."""
157 | self.__send_input(8, name, content)
158 |
159 | def add(self, path, content):
160 | """Adds a new resource to the opened database."""
161 | self.__send_input(9, path, content)
162 |
163 | def replace(self, path, content):
164 | """Replaces a resource with the specified input."""
165 | self.__send_input(12, path, content)
166 |
167 | def store(self, path, content):
168 | """Stores a binary resource in the opened database.
169 |
170 | api won't escape 0x00, 0xff automatically, so you must do it
171 | yourself explicitly."""
172 | # ------------------------------------------
173 | # chr(13) + path + chr(0) + content + chr(0)
174 | self.__send_binary_input(13, path, content)
175 | #
176 | # ------------------------------------------
177 |
178 | def info(self):
179 | """Return process information"""
180 | return self.__info
181 |
182 | def close(self):
183 | """Close the session"""
184 | self.send('exit')
185 | self.__swrapper.close()
186 |
187 | def recv_c_str(self):
188 | """Retrieve a string from the socket"""
189 | return self.__swrapper.recv_until_terminator()
190 |
191 | def send(self, value):
192 | """Send the defined string"""
193 | self.__swrapper.sendall(value + chr(0))
194 |
195 | def __send_input(self, code, arg, content):
196 | """internal. don't care."""
197 | self.__swrapper.sendall(chr(code) + arg + chr(0) + content + chr(0))
198 | self.__info = self.recv_c_str()
199 | if not self.server_response_success():
200 | raise IOError(self.info())
201 |
202 | def __send_binary_input(self, code, path, content):
203 | """internal. don't care."""
204 | # at this time, we can't use __send_input itself because of encoding
205 | # problem. we have to build bytearray directly.
206 | if not isinstance(content, (bytearray, bytes)):
207 | raise ValueError("Sorry, content must be bytearray or bytes, not " +
208 | str(type(content)))
209 |
210 | # ------------------------------------------
211 | # chr(code) + path + chr(0) + content + chr(0)
212 | data = bytearray([code])
213 | try:
214 | data.extend(path)
215 | except:
216 | data.extend(path.encode('utf-8'))
217 | data.extend([0])
218 | data.extend(content)
219 | data.extend([0])
220 | #
221 | # ------------------------------------------
222 | self.__swrapper.sendall(data)
223 | self.__info = self.recv_c_str()
224 | if not self.server_response_success():
225 | raise IOError(self.info())
226 |
227 | def server_response_success(self):
228 | """Return success check"""
229 | return self.__swrapper.recv_single_byte() == 0
230 |
231 | def receive(self):
232 | """Return received string"""
233 | self.__swrapper.clear_buffer()
234 | return self.recv_c_str()
235 |
236 | def iter_receive(self):
237 | """iter_receive() -> (typecode, item)
238 |
239 | iterate while the query returns items.
240 | typecode list is in http://docs.basex.org/wiki/Server_Protocol:_Types
241 | """
242 | self.__swrapper.clear_buffer()
243 | typecode = self.__swrapper.recv_single_byte()
244 | while typecode > 0:
245 | string = self.recv_c_str()
246 | yield (typecode, string)
247 | typecode = self.__swrapper.recv_single_byte()
248 | if not self.server_response_success():
249 | raise IOError(self.recv_c_str())
250 |
251 | # ---------------------------------
252 | #
253 |
254 |
255 | class Query():
256 | """class Query.
257 |
258 | see http://docs.basex.org/wiki/Server_Protocol
259 | """
260 |
261 | def __init__(self, session, querytxt):
262 | """Create query object with session and query"""
263 | self.__session = session
264 | self.__id = self.__exc(chr(0), querytxt)
265 |
266 | def bind(self, name, value, datatype=''):
267 | """Binds a value to a variable.
268 | An empty string can be specified as data type."""
269 | self.__exc(chr(3), self.__id + chr(0) + name + chr(0) + value + chr(0) + datatype)
270 |
271 | def context(self, value, datatype=''):
272 | """Bind the context item"""
273 | self.__exc(chr(14), self.__id + chr(0) + value + chr(0) + datatype)
274 |
275 | def iter(self):
276 | """iterate while the query returns items"""
277 | self.__session.send(chr(4) + self.__id)
278 | return self.__session.iter_receive()
279 |
280 | def execute(self):
281 | """Execute the query and return the result"""
282 | return self.__exc(chr(5), self.__id)
283 |
284 | def info(self):
285 | """Return query information"""
286 | return self.__exc(chr(6), self.__id)
287 |
288 | def options(self):
289 | """Return serialization parameters"""
290 | return self.__exc(chr(7), self.__id)
291 |
292 | def updating(self):
293 | """Returns true if the query may perform updates; false otherwise."""
294 | return self.__exc(chr(30), self.__id)
295 |
296 | def full(self):
297 | """Returns all resulting items as strings, prefixed by XDM Meta Data."""
298 | return self.__exc(chr(31), self.__id)
299 |
300 | def close(self):
301 | """Close the query"""
302 | self.__exc(chr(2), self.__id)
303 |
304 | def __exc(self, cmd, arg):
305 | """internal. don't care."""
306 | # should we expose this?
307 | # (this makes sense only when mismatch between C/S is existing.)
308 | self.__session.send(cmd + arg)
309 | result = self.__session.receive()
310 | if not self.__session.server_response_success():
311 | raise IOError(self.__session.recv_c_str())
312 | return result
313 |
--------------------------------------------------------------------------------
/overleaf-bibtidy.js:
--------------------------------------------------------------------------------
1 | // ==UserScript==
2 | // @name BibTidy plugin for Overleaf Editor
3 | // @namespace http://tampermonkey.net/
4 | // @version 1.1
5 | // @description Simple BibTeX validation in Overleaf
6 | // @author Jinsheng BA
7 | // @match https://www.overleaf.com/project/*
8 | // @grant GM_xmlhttpRequest
9 | // @grant GM_addStyle
10 | // @license MIT
11 | // @homepage https://github.com/bajinsheng/bibtidy
12 | // ==/UserScript==
13 |
14 | (function() {
15 | 'use strict';
16 |
17 | // Grammarly-like styles
18 | GM_addStyle(`
19 | .bibtex-error-icon {
20 | position: absolute;
21 | width: 14px;
22 | height: 14px;
23 | background: #ff4d4f;
24 | border-radius: 50%;
25 | color: white;
26 | font-size: 9px;
27 | display: flex;
28 | align-items: center;
29 | justify-content: center;
30 | cursor: pointer;
31 | z-index: 1000;
32 | font-weight: bold;
33 | box-shadow: 0 1px 3px rgba(0,0,0,0.3);
34 | pointer-events: auto;
35 | }
36 |
37 | .bibtex-error-icon:hover {
38 | background: #ff7875;
39 | transform: scale(1.1);
40 | }
41 |
42 | .bibtex-popup {
43 | position: fixed;
44 | background: white;
45 | border: 1px solid #d9d9d9;
46 | border-radius: 6px;
47 | box-shadow: 0 6px 16px rgba(0,0,0,0.12);
48 | padding: 12px;
49 | z-index: 10000;
50 | max-width: 450px;
51 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
52 | font-size: 13px;
53 | display: none;
54 | }
55 |
56 | .bibtex-popup-header {
57 | font-weight: 600;
58 | color: #ff4d4f;
59 | margin-bottom: 8px;
60 | display: flex;
61 | align-items: center;
62 | gap: 6px;
63 | }
64 |
65 | .bibtex-popup-body {
66 | margin-bottom: 12px;
67 | color: #262626;
68 | line-height: 1.4;
69 | }
70 |
71 | .bibtex-suggestion {
72 | background: #f6ffed;
73 | border-left: 3px solid #52c41a;
74 | padding: 8px;
75 | margin: 8px 0;
76 | font-family: monospace;
77 | font-size: 11px;
78 | white-space: pre-wrap;
79 | max-height: 150px;
80 | overflow-y: auto;
81 | }
82 |
83 | .bibtex-popup-actions {
84 | display: flex;
85 | gap: 8px;
86 | justify-content: flex-end;
87 | }
88 |
89 | .bibtex-btn {
90 | padding: 4px 12px;
91 | border: 1px solid #d9d9d9;
92 | border-radius: 4px;
93 | background: white;
94 | cursor: pointer;
95 | font-size: 12px;
96 | }
97 |
98 | .bibtex-btn-apply {
99 | background: #1890ff;
100 | color: white;
101 | border-color: #1890ff;
102 | }
103 |
104 | .bibtex-btn-apply:hover {
105 | background: #40a9ff;
106 | }
107 |
108 | .bibtex-btn:hover {
109 | border-color: #40a9ff;
110 | }
111 |
112 | .bibtex-overlay {
113 | position: fixed;
114 | top: 0;
115 | left: 0;
116 | width: 100vw;
117 | height: 100vh;
118 | pointer-events: none;
119 | z-index: 999;
120 | }
121 | `);
122 |
123 | class BibTeXChecker {
124 | constructor() {
125 | this.overlay = null;
126 | this.editor = null;
127 | this.errors = [];
128 | this.titleSimilarityThreshold = 0.6; // Minimum similarity threshold (0-1)
129 | }
130 |
131 | init() {
132 | this.createOverlay();
133 | this.watchForEditor();
134 | }
135 | // Ensure method separation with commas
136 |
137 | createOverlay() {
138 | this.overlay = document.createElement('div');
139 | this.overlay.className = 'bibtex-overlay';
140 | document.body.appendChild(this.overlay);
141 | }
142 |
143 | watchForEditor() {
144 | const observer = new MutationObserver(() => {
145 | // Find selected file in file tree
146 | const selectedFile = document.querySelector('li[aria-selected="true"][aria-label$=".bib"]');
147 | // Find editor for bibtex
148 | const editor = document.querySelector('.cm-content[contenteditable="true"][data-language="bibtex"]');
149 | if (selectedFile && editor && editor !== this.editor) {
150 | this.editor = editor;
151 | this.setupEditorWatcher();
152 | this.checkBibTeX(); // Run BibTeX check immediately when editor loads
153 | } else if ((!selectedFile || !editor) && this.editor) {
154 | // If bib file/editor is no longer selected, clear overlay and editor reference
155 | this.editor = null;
156 | this.clearErrors();
157 | }
158 | });
159 |
160 | observer.observe(document.body, {
161 | childList: true,
162 | subtree: true
163 | });
164 |
165 | // Manual check button (only visible when .bib file is selected)
166 | // Create a draggable, beautiful button in the left bottom
167 | const btn = document.createElement('button');
168 | btn.setAttribute('id', 'bibtidy-check-btn');
169 | btn.style.cssText = `
170 | position:fixed;
171 | left:24px;
172 | bottom:24px;
173 | z-index:10000;
174 | background:linear-gradient(90deg,#1890ff 0%,#52c41a 100%);
175 | color:white;
176 | border:none;
177 | box-shadow:0 2px 8px rgba(0,0,0,0.15);
178 | padding:12px 28px;
179 | border-radius:24px;
180 | cursor:pointer;
181 | font-size:16px;
182 | font-weight:600;
183 | letter-spacing:0.5px;
184 | display:none;
185 | transition:box-shadow 0.2s,transform 0.2s;
186 | display: flex;
187 | align-items: center;
188 | gap: 10px;
189 | `;
190 | // Button content: text and cycle icon
191 | btn.innerHTML = `
192 | Check BibTeX
193 |
194 | ⟳
195 |
196 | `;
197 | document.body.appendChild(btn);
198 |
199 | // Cycle icon click: call checkBibTeX
200 | const cycleIcon = btn.querySelector('#bibtidy-cycle-icon');
201 | cycleIcon.onclick = (e) => {
202 | e.stopPropagation();
203 | const editor = document.querySelector('.cm-content[contenteditable="true"][data-language="bibtex"]');
204 | if (editor) {
205 | this.editor = editor;
206 | this.checkBibTeX();
207 | } else {
208 | alert('Please select a .bib file in the file tree and open it in the editor.');
209 | }
210 | };
211 |
212 | // Main button click (not cycle icon): open DBLP search popup
213 | btn.addEventListener('click', (e) => {
214 | if (e.target === cycleIcon) return; // already handled
215 | this.showDBLPSearchPopup();
216 | });
217 |
218 | // Make button draggable
219 | let isDragging = false, dragOffsetX = 0, dragOffsetY = 0;
220 | btn.addEventListener('mousedown', function(e) {
221 | if (e.target === cycleIcon) return;
222 | isDragging = true;
223 | dragOffsetX = e.clientX - btn.getBoundingClientRect().left;
224 | dragOffsetY = e.clientY - btn.getBoundingClientRect().top;
225 | btn.style.transition = 'none';
226 | document.body.style.userSelect = 'none';
227 | });
228 | document.addEventListener('mousemove', function(e) {
229 | if (isDragging) {
230 | btn.style.left = (e.clientX - dragOffsetX) + 'px';
231 | btn.style.top = (e.clientY - dragOffsetY) + 'px';
232 | btn.style.bottom = 'auto';
233 | }
234 | });
235 | document.addEventListener('mouseup', function() {
236 | if (isDragging) {
237 | isDragging = false;
238 | btn.style.transition = 'box-shadow 0.2s,transform 0.2s';
239 | document.body.style.userSelect = '';
240 | }
241 | });
242 |
243 | // Show/hide button based on .bib file selection
244 | const updateBtnVisibility = () => {
245 | const selectedFile = document.querySelector('li[aria-selected="true"][aria-label$=".bib"]');
246 | btn.style.display = selectedFile ? 'flex' : 'none';
247 | };
248 | // Initial check
249 | updateBtnVisibility();
250 | // Observe file tree selection changes
251 | const fileTreeObserver = new MutationObserver(updateBtnVisibility);
252 | fileTreeObserver.observe(document.body, { childList: true, subtree: true });
253 | }
254 |
255 | // Show DBLP search popup for keyword search
256 | showDBLPSearchPopup() {
257 | document.querySelectorAll('.bibtidy-dblp-popup').forEach(p => p.remove());
258 | const popup = document.createElement('div');
259 | popup.className = 'bibtidy-dblp-popup';
260 | popup.style.cssText = `
261 | position: fixed;
262 | left: 50%;
263 | top: 20%;
264 | transform: translate(-50%, 0);
265 | background: #fff;
266 | border: 1px solid #d9d9d9;
267 | border-radius: 8px;
268 | box-shadow: 0 6px 16px rgba(0,0,0,0.12);
269 | padding: 24px 24px 16px 24px;
270 | z-index: 10001;
271 | min-width: 400px;
272 | max-width: 90vw;
273 | `;
274 | popup.innerHTML = `
275 |
276 | 🔎 Search DBLP
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 | `;
287 | document.body.appendChild(popup);
288 |
289 | // Close button
290 | popup.querySelector('#bibtidy-dblp-close-btn').onclick = () => popup.remove();
291 |
292 | // Search button
293 | popup.querySelector('#bibtidy-dblp-search-btn').onclick = async () => {
294 | const keyword = popup.querySelector('#bibtidy-dblp-keyword').value.trim();
295 | const resultsDiv = popup.querySelector('#bibtidy-dblp-results');
296 | resultsDiv.innerHTML = 'Searching...
';
297 | if (!keyword) {
298 | resultsDiv.innerHTML = 'Please enter a keyword.
';
299 | return;
300 | }
301 | // Query DBLP
302 | const bibs = await this.searchDBLPByKeyword(keyword, 3);
303 | if (!bibs || bibs.length === 0) {
304 | resultsDiv.innerHTML = 'No results found.
';
305 | return;
306 | }
307 | resultsDiv.innerHTML = bibs.map((bib, idx) => `
308 |
309 |
310 | ${bib}
311 |
312 | `).join('');
313 |
314 | // Attach copy event listeners after rendering
315 | setTimeout(() => {
316 | const copyBtns = popup.querySelectorAll('.bibtidy-copy-btn');
317 | copyBtns.forEach(btn => {
318 | btn.onclick = function() {
319 | const bibDiv = btn.closest('div');
320 | const bibText = bibDiv.querySelector('.bibtidy-bibtex-text').innerText;
321 | navigator.clipboard.writeText(bibText).then(() => {
322 | btn.textContent = 'Copied!';
323 | setTimeout(() => { btn.textContent = 'Copy'; }, 1200);
324 | });
325 | };
326 | });
327 | }, 0);
328 | };
329 | }
330 |
331 | // Query DBLP for top-N BibTeX entries by keyword
332 | async searchDBLPByKeyword(keyword, n = 3) {
333 | return new Promise((resolve) => {
334 | const query = encodeURIComponent(keyword);
335 | const url = `https://dblp.org/search/publ/api?q=${query}&format=bib&h=${n}`;
336 | GM_xmlhttpRequest({
337 | method: 'GET',
338 | url: url,
339 | timeout: 5000,
340 | onload: (response) => {
341 | try {
342 | if (response.status === 200) {
343 | const bibText = response.responseText.trim();
344 | if (bibText && bibText.includes('@')) {
345 | // Split into entries
346 | const entries = bibText.split(/(?=@\w+\s*\{)/g).map(e => e.trim()).filter(e => e);
347 | resolve(entries.slice(0, n));
348 | } else {
349 | resolve([]);
350 | }
351 | } else {
352 | resolve([]);
353 | }
354 | } catch (e) {
355 | resolve([]);
356 | }
357 | },
358 | onerror: () => resolve([]),
359 | ontimeout: () => resolve([])
360 | });
361 | });
362 | }
363 |
364 | setupEditorWatcher() {
365 | let timeout;
366 | this.editor.addEventListener('input', () => {
367 | clearTimeout(timeout);
368 | timeout = setTimeout(() => this.checkBibTeX(), 2000);
369 | });
370 |
371 | // Update error icon positions and check for new entries on scroll
372 | this.editor.parentElement.addEventListener('scroll', () => {
373 | this.updateErrorIconPositions();
374 | // Debounced check for new entries
375 | if (this._scrollCheckTimeout) clearTimeout(this._scrollCheckTimeout);
376 | this._scrollCheckTimeout = setTimeout(() => {
377 | this.checkForNewEntriesOnScroll();
378 | }, 300);
379 | });
380 | }
381 | checkForNewEntriesOnScroll() {
382 | // Get current BibTeX entries
383 | const content = this.getEditorContent();
384 | const entries = this.parseBibTeX(content);
385 | // Get keys of entries already shown as errors
386 | const shownKeys = new Set(this.errors.map(e => e.entry.key));
387 | // Find new entries not yet shown
388 | const newEntries = entries.filter(e => !shownKeys.has(e.key));
389 | if (newEntries.length > 0) {
390 | // For each new entry, check DBLP and show icon
391 | newEntries.forEach(async entry => {
392 | const { issues, correctBibTeX, status } = await this.getEntryIssuesAndCorrection(entry);
393 | this.showError(entry, issues, correctBibTeX, status);
394 | });
395 | }
396 | }
397 | /**
398 | * Shared logic for BibTeX entry validation and DBLP comparison.
399 | * Returns { issues, correctBibTeX }
400 | */
401 | /**
402 | * Returns { issues, correctBibTeX, status }
403 | * status: 'pass', 'not_found', 'mismatch', 'error'
404 | */
405 | async getEntryIssuesAndCorrection(entry) {
406 | let issues = [];
407 | let correctBibTeX = null;
408 | let status = 'not_found';
409 | if (entry.fields.title) {
410 | try {
411 | const dblpEntry = await this.searchDBLP(entry.fields.title);
412 | if (dblpEntry) {
413 | correctBibTeX = this.formatDBLPEntry(dblpEntry, entry.key);
414 | issues = this.compareWithDBLP(entry, dblpEntry);
415 | if (issues.length === 0) {
416 | status = 'pass';
417 | } else {
418 | status = 'mismatch';
419 | }
420 | } else {
421 | status = 'not_found';
422 | }
423 | } catch (error) {
424 | issues = ['Error fetching DBLP data'];
425 | status = 'error';
426 | }
427 | } else {
428 | status = 'not_found';
429 | }
430 | return { issues, correctBibTeX, status };
431 | }
432 |
433 | updateErrorIconPositions() {
434 | if (!this.errors || !this.editor) return;
435 | this.errors.forEach(({ entry, icon }) => {
436 | const range = this.getEntryRange(entry);
437 | if (range) {
438 | const rect = range.getBoundingClientRect();
439 | icon.style.left = (rect.right + 5) + 'px';
440 | icon.style.top = (rect.top + 2) + 'px';
441 | }
442 | });
443 | }
444 |
445 | getEditorContent() {
446 | if (!this.editor) return '';
447 |
448 | // Use innerText to preserve line breaks, or traverse DOM nodes
449 | const walker = document.createTreeWalker(
450 | this.editor,
451 | NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT,
452 | {
453 | acceptNode: (node) => {
454 | if (node.nodeType === Node.TEXT_NODE) {
455 | return NodeFilter.FILTER_ACCEPT;
456 | }
457 | if (node.nodeName === 'BR' || node.nodeName === 'DIV') {
458 | return NodeFilter.FILTER_ACCEPT;
459 | }
460 | return NodeFilter.FILTER_SKIP;
461 | }
462 | }
463 | );
464 |
465 | let content = '';
466 | let node;
467 |
468 | while (node = walker.nextNode()) {
469 | if (node.nodeType === Node.TEXT_NODE) {
470 | content += node.textContent;
471 | } else if (node.nodeName === 'BR') {
472 | content += '\n';
473 | } else if (node.nodeName === 'DIV' && content && !content.endsWith('\n')) {
474 | content += '\n';
475 | }
476 | }
477 |
478 | return content;
479 | }
480 |
481 | setEditorContent(newContent) {
482 | if (!this.editor) return;
483 |
484 | // For CodeMirror editors, we need to set content properly
485 | // Try multiple approaches to ensure compatibility
486 |
487 | // Method 1: Direct textContent (may lose formatting)
488 | this.editor.textContent = newContent;
489 |
490 | // Method 2: Try to trigger CodeMirror updates if available
491 | if (this.editor.CodeMirror) {
492 | this.editor.CodeMirror.setValue(newContent);
493 | } else {
494 | // Method 3: Use innerText to preserve some formatting
495 | this.editor.innerText = newContent;
496 | }
497 | }
498 |
499 | async checkBibTeX() {
500 | if (!this.editor) return;
501 |
502 | const content = this.getEditorContent();
503 | if (!content.includes('@')) {
504 | this.clearErrors();
505 | return;
506 | }
507 |
508 | this.clearErrors();
509 | const entries = this.parseBibTeX(content);
510 |
511 | for (const entry of entries) {
512 | const { issues, correctBibTeX, status } = await this.getEntryIssuesAndCorrection(entry);
513 | this.showError(entry, issues, correctBibTeX, status);
514 | }
515 | }
516 |
517 | parseBibTeX(content) {
518 | const entries = [];
519 | const entryRegex = /@(\w+)\s*\{\s*([^,}]+)/g;
520 | let match;
521 |
522 | while ((match = entryRegex.exec(content)) !== null) {
523 | const type = match[1].toLowerCase();
524 | const key = match[2].trim();
525 | const startPos = match.index;
526 |
527 | // Find entry end (only accept if braces are balanced)
528 | let braces = 0;
529 | let endPos = -1;
530 | let foundFirstBrace = false;
531 |
532 | for (let i = startPos; i < content.length; i++) {
533 | if (content[i] === '{') {
534 | braces++;
535 | foundFirstBrace = true;
536 | } else if (content[i] === '}') {
537 | braces--;
538 | if (foundFirstBrace && braces === 0) {
539 | endPos = i;
540 | break;
541 | }
542 | }
543 | }
544 |
545 | // Only parse if a complete entry (balanced braces) was found
546 | if (endPos !== -1) {
547 | const entryText = content.substring(startPos, endPos + 1);
548 | const fields = this.parseFields(entryText);
549 |
550 | entries.push({
551 | type,
552 | key,
553 | startPos,
554 | endPos,
555 | text: entryText,
556 | fields
557 | });
558 | }
559 | }
560 |
561 | return entries;
562 | }
563 |
564 | parseFields(entryText) {
565 | const fields = {};
566 | let i = 0;
567 |
568 | while (i < entryText.length) {
569 | // Find field name
570 | const fieldMatch = entryText.substring(i).match(/(\w+)\s*=\s*/);
571 | if (!fieldMatch) {
572 | i++;
573 | continue;
574 | }
575 |
576 | const fieldName = fieldMatch[1].toLowerCase();
577 | i += fieldMatch.index + fieldMatch[0].length;
578 |
579 | // Parse field value
580 | if (i < entryText.length && entryText[i] === '{') {
581 | // Braced value - handle nested braces
582 | let braceCount = 0;
583 | let start = i;
584 |
585 | while (i < entryText.length) {
586 | if (entryText[i] === '{') braceCount++;
587 | else if (entryText[i] === '}') braceCount--;
588 | i++;
589 | if (braceCount === 0) break;
590 | }
591 |
592 | const value = entryText.substring(start + 1, i - 1); // Remove outer braces
593 | fields[fieldName] = value;
594 | } else {
595 | // Unbraced value - read until comma or end
596 | let start = i;
597 | while (i < entryText.length && entryText[i] !== ',' && entryText[i] !== '}') {
598 | i++;
599 | }
600 | const value = entryText.substring(start, i).trim();
601 | fields[fieldName] = value;
602 | }
603 | }
604 |
605 | return fields;
606 | }
607 |
608 | calculateTitleSimilarity(title1, title2) {
609 | if (!title1 || !title2) return 0;
610 |
611 | // Normalize titles for comparison
612 | const normalize = (title) => title.toLowerCase()
613 | .replace(/[{}]/g, '')
614 | .replace(/[^\w\s]/g, ' ')
615 | .replace(/\s+/g, ' ')
616 | .trim();
617 |
618 | const norm1 = normalize(title1);
619 | const norm2 = normalize(title2);
620 |
621 | // Simple word-based similarity using Jaccard coefficient
622 | const words1 = new Set(norm1.split(' '));
623 | const words2 = new Set(norm2.split(' '));
624 |
625 | const intersection = new Set([...words1].filter(x => words2.has(x)));
626 | const union = new Set([...words1, ...words2]);
627 |
628 | return intersection.size / union.size;
629 | }
630 |
631 | cleanFieldValue(value) {
632 | if (!value) return '';
633 |
634 | return value
635 | .replace(/[\n\r\t]/g, ' ') // Replace newlines, carriage returns, tabs with spaces
636 | .replace(/\s+/g, ' ') // Replace multiple spaces with single space
637 | .trim(); // Remove leading/trailing whitespace
638 | }
639 |
640 | async searchDBLP(title) {
641 | return new Promise((resolve) => {
642 | const query = encodeURIComponent(title.replace(/[{}]/g, ''));
643 | const url = `https://dblp.org/search/publ/api?q=${query}&format=bib&h=5`;
644 |
645 | GM_xmlhttpRequest({
646 | method: 'GET',
647 | url: url,
648 | timeout: 5000,
649 | onload: (response) => {
650 | try {
651 | if (response.status === 200) {
652 | const bibText = response.responseText.trim();
653 | if (bibText && bibText.includes('@')) {
654 | const entries = this.parseBibTeX(bibText);
655 | if (entries.length === 0) {
656 | resolve(null);
657 | return;
658 | }
659 | let bestEntry = null;
660 | let bestSimilarity = 0;
661 | for (const entry of entries) {
662 | if (entry.fields.title) {
663 | for (const [fieldName, fieldValue] of Object.entries(entry.fields)) {
664 | entry.fields[fieldName] = this.cleanFieldValue(fieldValue);
665 | }
666 | const similarity = this.calculateTitleSimilarity(title, entry.fields.title);
667 | if (similarity > bestSimilarity && similarity >= this.titleSimilarityThreshold) {
668 | bestSimilarity = similarity;
669 | bestEntry = entry;
670 | }
671 | }
672 | }
673 | resolve(bestEntry);
674 | } else {
675 | resolve(null);
676 | }
677 | } else {
678 | resolve(null);
679 | }
680 | } catch (e) {
681 | resolve(null);
682 | }
683 | },
684 | onerror: () => resolve(null),
685 | ontimeout: () => resolve(null)
686 | });
687 | });
688 | }
689 |
690 | formatDBLPEntry(dblpEntry, originalKey) {
691 | let bibtex = `@${dblpEntry.type}{${originalKey},\n`;
692 |
693 | const fieldOrder = ['title', 'author', 'booktitle', 'journal', 'year', 'volume', 'pages', 'doi'];
694 |
695 | for (const field of fieldOrder) {
696 | if (dblpEntry.fields[field]) {
697 | bibtex += ` ${field} = {${dblpEntry.fields[field]}},\n`;
698 | }
699 | }
700 |
701 | bibtex = bibtex.replace(/,\n$/, '\n');
702 | bibtex += '}';
703 |
704 | return bibtex;
705 | }
706 |
707 | compareWithDBLP(entry, dblpEntry) {
708 | const issues = [];
709 |
710 | // Compare key fields
711 | const fieldsToCheck = ['title', 'author', 'year', 'journal', 'booktitle'];
712 |
713 | for (const field of fieldsToCheck) {
714 | const originalValue = entry.fields[field];
715 | const dblpValue = dblpEntry.fields[field];
716 |
717 | if (dblpValue && (!originalValue ||
718 | this.normalizeForComparison(originalValue) !== this.normalizeForComparison(dblpValue))) {
719 |
720 | const originalDisplay = originalValue ? `"${originalValue}"` : "(missing)";
721 | const dblpDisplay = `"${dblpValue}"`;
722 | issues.push(`${field}: ${originalDisplay} → ${dblpDisplay}`);
723 | }
724 | }
725 |
726 | if (entry.type !== dblpEntry.type) {
727 | issues.push(`Entry type: ${entry.type} → ${dblpEntry.type}`);
728 | }
729 |
730 | return issues;
731 | }
732 |
733 | normalizeForComparison(value) {
734 | if (!value) return '';
735 | return value.toLowerCase()
736 | .replace(/\s+/g, ' ') // Replace all whitespace (including \n, \t) with single space
737 | .trim();
738 | }
739 |
740 | validateEntry(entry) {
741 | const issues = [];
742 |
743 | if (!entry.fields.title || entry.fields.title.trim() === '') {
744 | issues.push('Missing title');
745 | }
746 |
747 | if (!entry.fields.author || entry.fields.author.trim() === '') {
748 | issues.push('Missing author');
749 | }
750 |
751 | if (!entry.fields.year || entry.fields.year.trim() === '') {
752 | issues.push('Missing year');
753 | }
754 |
755 | if (entry.type === 'article' && !entry.fields.journal) {
756 | issues.push('Missing journal for article');
757 | }
758 |
759 | if (entry.type === 'inproceedings' && !entry.fields.booktitle) {
760 | issues.push('Missing booktitle for conference paper');
761 | }
762 |
763 | return issues;
764 | }
765 |
766 | /**
767 | * Show an icon for the entry based on status: 'pass', 'not_found', 'mismatch', 'error'.
768 | * status: 'pass' (green check), 'not_found' (gray question), 'mismatch' (red !), 'error' (red X)
769 | */
770 | showError(entry, issues, correctBibTeX = null, status = 'mismatch') {
771 | const range = this.getEntryRange(entry);
772 | if (!range) return;
773 |
774 | const rect = range.getBoundingClientRect();
775 |
776 | const icon = document.createElement('div');
777 | icon.className = 'bibtex-error-icon';
778 | icon.style.position = 'fixed';
779 | icon.style.left = (rect.right + 5) + 'px';
780 | icon.style.top = (rect.top + 2) + 'px';
781 | icon.style.pointerEvents = 'auto';
782 |
783 | // Set icon style and content based on status
784 | if (status === 'pass') {
785 | icon.style.background = '#52c41a'; // green
786 | icon.textContent = '✓';
787 | icon.title = 'Entry matches DBLP';
788 | } else if (status === 'not_found') {
789 | icon.style.background = '#bfbfbf'; // gray
790 | icon.textContent = '?';
791 | icon.title = 'Entry not found in DBLP';
792 | } else if (status === 'error') {
793 | icon.style.background = '#d32f2f'; // red
794 | icon.textContent = '✗';
795 | icon.title = 'Error fetching DBLP data';
796 | } else {
797 | // mismatch or default
798 | icon.style.background = '#ff4d4f'; // red
799 | icon.textContent = '!';
800 | icon.title = 'Entry differs from DBLP';
801 | }
802 |
803 | icon.onclick = (e) => {
804 | e.stopPropagation();
805 | this.showPopup(entry, issues, icon, correctBibTeX, status);
806 | };
807 |
808 | this.overlay.appendChild(icon);
809 | this.errors.push({ entry, icon, issues, correctBibTeX, status });
810 | }
811 |
812 | getEntryRange(entry) {
813 | const walker = document.createTreeWalker(
814 | this.editor,
815 | NodeFilter.SHOW_TEXT
816 | );
817 |
818 | let node;
819 | while (node = walker.nextNode()) {
820 | if (node.textContent.includes(entry.key)) {
821 | const range = document.createRange();
822 | range.selectNode(node);
823 | return range;
824 | }
825 | }
826 | return null;
827 | }
828 |
829 | showPopup(entry, issues, icon, correctBibTeX = null, status = 'mismatch') {
830 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove());
831 |
832 | const popup = document.createElement('div');
833 | popup.className = 'bibtex-popup';
834 |
835 | // Diff-style highlight for issues
836 | function diffHighlight(issue) {
837 | // Try to parse: field: "old" → "new"
838 | const match = issue.match(/^(\w+): (".*?") → (".*?")$/);
839 | if (match) {
840 | const field = match[1];
841 | const oldVal = match[2];
842 | const newVal = match[3];
843 | return `- ${field}: ${oldVal}
+ ${field}: ${newVal}`;
844 | }
845 | // Entry type change
846 | const typeMatch = issue.match(/^Entry type: (\w+) → (\w+)$/);
847 | if (typeMatch) {
848 | return `- type: ${typeMatch[1]}
+ type: ${typeMatch[2]}`;
849 | }
850 | // Missing field
851 | if (issue.startsWith('Missing')) {
852 | return `- ${issue}`;
853 | }
854 | // Default
855 | return `${issue}`;
856 | }
857 |
858 | let suggestionHtml = '';
859 | if (correctBibTeX) {
860 | suggestionHtml = `
861 | ${correctBibTeX}
862 | `;
863 | }
864 |
865 | // Choose popup icon and header based on status
866 | let popupIcon = '⚠️';
867 | let popupHeader = 'BibTeX Issues Found';
868 | if (status === 'pass') {
869 | popupIcon = '✅';
870 | popupHeader = 'Entry matches DBLP';
871 | } else if (status === 'not_found') {
872 | popupIcon = '❓';
873 | popupHeader = 'Entry not found in DBLP';
874 | } else if (status === 'error') {
875 | popupIcon = '❌';
876 | popupHeader = 'Error fetching DBLP data';
877 | } else if (correctBibTeX) {
878 | popupIcon = '🔍';
879 | popupHeader = 'DBLP Correction Available';
880 | }
881 |
882 | popup.innerHTML = `
883 |
887 |
888 |
Entry "${entry.key}" ${status === 'pass' ? 'matches DBLP.' : status === 'not_found' ? 'not found in DBLP.' : status === 'error' ? 'error fetching DBLP data.' : correctBibTeX ? 'differs from DBLP:' : 'has issues:'}
889 |
890 | ${issues.map(diffHighlight).join('')}
891 |
892 | ${suggestionHtml}
893 |
894 |
907 | `;
908 |
909 | document.body.appendChild(popup);
910 |
911 | // Add event listener for the dismiss button
912 | const dismissBtn = popup.querySelector('.bibtex-btn-dismiss');
913 | if (dismissBtn) {
914 | dismissBtn.addEventListener('click', () => {
915 | popup.remove();
916 | });
917 | }
918 |
919 | // Add event listener for the apply button
920 | const applyBtn = popup.querySelector('.bibtex-btn-apply');
921 | if (applyBtn) {
922 | applyBtn.addEventListener('click', () => {
923 | const entryKey = applyBtn.getAttribute('data-entry-key');
924 | this.applyCorrection(entryKey);
925 | });
926 | }
927 |
928 | const iconRect = icon.getBoundingClientRect();
929 | popup.style.left = (iconRect.right + 10) + 'px';
930 | popup.style.top = iconRect.top + 'px';
931 | popup.style.display = 'block';
932 |
933 | setTimeout(() => {
934 | if (popup.parentElement) popup.remove();
935 | }, 15000);
936 |
937 | setTimeout(() => {
938 | const hidePopup = (e) => {
939 | if (!popup.contains(e.target) && !icon.contains(e.target)) {
940 | popup.remove();
941 | document.removeEventListener('click', hidePopup);
942 | }
943 | };
944 | document.addEventListener('click', hidePopup);
945 | }, 100);
946 | }
947 |
948 | applyCorrection(entryKey) {
949 | const errorData = this.errors.find(e => e.entry.key === entryKey);
950 | if (!errorData || !errorData.correctBibTeX) return;
951 | const newEntry = errorData.correctBibTeX;
952 |
953 | // Locate the entry key position in the editor nodes
954 | let walker = document.createTreeWalker(this.editor, NodeFilter.SHOW_TEXT);
955 | let nodes = [], fullText = '', keyIdx = -1, keyNodeIdx = -1, keyOffset = -1;
956 | while (true) {
957 | let node = walker.nextNode();
958 | if (!node) break;
959 | nodes.push(node);
960 | }
961 | fullText = nodes.map(n => n.textContent).join('');
962 | keyIdx = fullText.indexOf(entryKey);
963 | if (keyIdx !== -1) {
964 | // Find node and offset for entry key
965 | let count = 0;
966 | for (let i = 0; i < nodes.length; i++) {
967 | let nodeLen = nodes[i].textContent.length;
968 | if (count + nodeLen > keyIdx) {
969 | keyNodeIdx = i;
970 | keyOffset = keyIdx - count;
971 | break;
972 | }
973 | count += nodeLen;
974 | }
975 | // Expand range to cover the full BibTeX entry
976 | // Scan forward from keyIdx to find the opening '{' and then match braces to find closing '}'
977 | let entryStartIdx = fullText.lastIndexOf('@', keyIdx);
978 | let braceCount = 0, entryEndIdx = -1;
979 | let foundFirstBrace = false;
980 | for (let i = keyIdx - 1; i < fullText.length; i++) {
981 | if (fullText[i] === '{') {
982 | braceCount++;
983 | foundFirstBrace = true;
984 | } else if (fullText[i] === '}') {
985 | braceCount--;
986 | if (foundFirstBrace && braceCount === 0) {
987 | entryEndIdx = i;
988 | break;
989 | }
990 | }
991 | }
992 | if (entryStartIdx !== -1 && entryEndIdx !== -1) {
993 | // Map entryStartIdx and entryEndIdx to node/offsets
994 | let startNode = null, startOffset = 0, endNode = null, endOffset = 0;
995 | let count = 0;
996 | for (let i = 0; i < nodes.length; i++) {
997 | let nodeLen = nodes[i].textContent.length;
998 | if (!startNode && count + nodeLen > entryStartIdx) {
999 | startNode = nodes[i];
1000 | startOffset = entryStartIdx - count;
1001 | }
1002 | if (!endNode && count + nodeLen > entryEndIdx) {
1003 | endNode = nodes[i];
1004 | endOffset = entryEndIdx - count + 1;
1005 | break;
1006 | }
1007 | count += nodeLen;
1008 | }
1009 | if (startNode && endNode) {
1010 | // Select the entry across nodes
1011 | const range = document.createRange();
1012 | range.setStart(startNode, startOffset);
1013 | range.setEnd(endNode, endOffset);
1014 | const sel = window.getSelection();
1015 | sel.removeAllRanges();
1016 | sel.addRange(range);
1017 | // Remove the old entry
1018 | document.execCommand('delete');
1019 | // Insert the new entry at the same position
1020 | document.execCommand('insertText', false, newEntry);
1021 | }
1022 | }
1023 | } else {
1024 | // Fallback: use regex to find and replace
1025 | const content = this.getEditorContent();
1026 | const entryRegex = new RegExp(`@\\w+\\s*\\{\\s*${entryKey}[^@]*?\\}`, 's');
1027 | const updatedContent = content.replace(entryRegex, newEntry);
1028 | this.setEditorContent(updatedContent);
1029 | }
1030 |
1031 | // Simulate user input
1032 | const inputEvent = new Event('input', { bubbles: true });
1033 | const changeEvent = new Event('change', { bubbles: true });
1034 | this.editor.dispatchEvent(inputEvent);
1035 | this.editor.dispatchEvent(changeEvent);
1036 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove());
1037 | this.checkBibTeX();
1038 | }
1039 |
1040 | clearErrors() {
1041 | if (this.overlay) {
1042 | this.overlay.innerHTML = '';
1043 | }
1044 | this.errors = [];
1045 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove());
1046 | }
1047 | }
1048 |
1049 | // Initialize
1050 | const checker = new BibTeXChecker();
1051 | window.bibTeXChecker = checker; // Make globally accessible
1052 |
1053 | if (document.readyState === 'loading') {
1054 | document.addEventListener('DOMContentLoaded', () => checker.init());
1055 | } else {
1056 | checker.init();
1057 | }
1058 |
1059 | })();
1060 |
--------------------------------------------------------------------------------