├── __init__.py ├── .gitignore ├── init.sh ├── setup.py ├── LICENSE ├── README.md ├── bibtidy.py ├── BaseXClient.py └── overleaf-bibtidy.js /__init__.py: -------------------------------------------------------------------------------- 1 | from . import bibtidy 2 | 3 | __all__ = ["bibtidy"] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | bibtidy.egg-info 3 | __pycache__ 4 | .vscode -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | sudo apt install basex -y 2 | wget https://dblp.org/xml/dblp.xml.gz 3 | gunzip dblp.xml.gz 4 | wget https://dblp.org/xml/dblp.dtd 5 | 6 | basex -c "ALTER PASSWORD admin admin" 7 | basex -c "SET INTPARSE true; SET DTD true; SET TEXTINDEX true; SET TOKENINDEX true; SET FTINDEX true; CREATE DB dblp dblp.xml" 8 | 9 | basexserver -S -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | version = "0.0.2" 4 | 5 | def load_readme(): 6 | with open("README.md") as f: 7 | return f.read() 8 | 9 | setuptools.setup( 10 | name="bibtidy", 11 | version=version, 12 | author='Jinsheng Ba', 13 | author_email='bajinsheng@gmail.com', 14 | description="A tool for simplifying BiBTex creation.", 15 | long_description=load_readme(), 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/bajinsheng/bibtidy", 18 | py_modules=["bibtidy"], 19 | packages=setuptools.find_packages(), 20 | install_requires=['bibtexparser==1.4.0', 21 | 'requests', 22 | ], 23 | classifiers=[ 24 | "Programming Language :: Python :: 3", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | ], 28 | python_requires='>=3.6', 29 | entry_points={ 30 | "console_scripts": [ 31 | "bibtidy = bibtidy:main" 32 | ] 33 | } 34 | ) 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 (Bill) Yuchen Lin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bibtidy 2 | 3 | Welcome to bibtidy, a simple tool for simplifying the creation of your BibTeX files! 4 | 5 | ## Welcome Stars 6 | If you like this tool, don't forget to star it! Your support is my motivation to keep updating this tool. 7 | 8 | ## Features 9 | 1. Searching: Search a keyword from DBLP and show all relevant BibTex entries in order. 10 | 2. Checking: Automatically fix incorrect and fill incomplete entries with reference to DBLP. 11 | 3. **Overleaf BibTeX Checker [NEW]:** 12 | - Real-time BibTeX error detection and highlights in Overleaf 13 | - DBLP integration for authoritative corrections 14 | - Error icons and popups with diff-style suggestions 15 | - Works only when a `.bib` file is selected in Overleaf 16 | 17 | ## Requirements 18 | Python 3.10.0 19 | (Other versions are not tested. Welcome to test and report.) 20 | 21 | ## Installation 22 | ```shell 23 | pip3 install git+https://github.com/bajinsheng/bibtidy@release 24 | ``` 25 | If you want to install it in an isolated virtualenv, you can use `pipx` instead: 26 | ```shell 27 | pipx install git+https://github.com/bajinsheng/bibtidy@release 28 | ``` 29 | 30 | ## Usage 1: Searching for a paper 31 | ```python 32 | bibtidy --keyword "How Good Are Query Optimizers" 33 | ``` 34 | The output will be: 35 | ```bibtex 36 | @article{viktor2015how, 37 | author = {Viktor Leis and 38 | Andrey Gubichev and 39 | Atanas Mirchev and 40 | Peter A. Boncz and 41 | Alfons Kemper and 42 | Thomas Neumann}, 43 | bibsource = {dblp computer science bibliography, https://dblp.org}, 44 | biburl = {https://dblp.org/rec/journals/pvldb/LeisGMBK015.bib}, 45 | doi = {10.14778/2850583.2850594}, 46 | journal = {Proc. {VLDB} Endow.}, 47 | number = {3}, 48 | pages = {204--215}, 49 | timestamp = {Sat, 25 Apr 2020 01:00:00 +0200}, 50 | title = {How Good Are Query Optimizers, Really?}, 51 | url = {http://www.vldb.org/pvldb/vol9/p204-leis.pdf}, 52 | volume = {9}, 53 | year = {2015} 54 | } 55 | ``` 56 | 57 | ## Usage 2: Checking a BibTeX file 58 | Suppose we have a file named "test.bib" with some BibTex entries from unknown sources: 59 | ```bibtex 60 | @misc{ba2022efficient, 61 | title={Efficient Greybox Fuzzing to Detect Memory Errors}, 62 | author={Jinsheng Ba and Gregory J. Duck and Abhik Roychoudhury}, 63 | year={2022}, 64 | eprint={2204.02773}, 65 | archivePrefix={arXiv}, 66 | primaryClass={cs.CR} 67 | } 68 | 69 | ``` 70 | We can check and autofix it by running the following command: 71 | ```python 72 | bibtidy --file "test.bib" 73 | ``` 74 | 75 | The output will be: 76 | ```bibtex 77 | @inproceedings{ba2022efficient, 78 | author = {Jinsheng Ba and 79 | Gregory J. Duck and 80 | Abhik Roychoudhury}, 81 | bibsource = {dblp computer science bibliography, https://dblp.org}, 82 | biburl = {https://dblp.org/rec/conf/kbse/BaDR22.bib}, 83 | booktitle = {37th {IEEE/ACM} International Conference on Automated Software Engineering, 84 | {ASE} 2022, Rochester, MI, USA, October 10-14, 2022}, 85 | doi = {10.1145/3551349.3561161}, 86 | pages = {37:1--37:12}, 87 | publisher = {{ACM}}, 88 | timestamp = {Sun, 15 Jan 2023 00:00:00 +0100}, 89 | title = {Efficient Greybox Fuzzing to Detect Memory Errors}, 90 | url = {https://doi.org/10.1145/3551349.3561161}, 91 | year = {2022} 92 | } 93 | ``` 94 | 95 | ``Note: due to the rate limitation of DBLP, the checking process may be failed for large BibTeX files. In this case, you can split the file into smaller parts and check them separately.`` 96 | 97 | ## Usage 3: Overleaf BibTeX Checker 98 | 99 | This is a Tampermonkey/Greasemonkey userscript for Overleaf. It provides real-time BibTeX validation and DBLP-powered corrections directly in the Overleaf editor. 100 | 101 | **How to use:** 102 | 1. Install [Tampermonkey](https://www.tampermonkey.net/) or [Greasemonkey](https://www.greasespot.net/) in your browser. 103 | 2. Add the script from [`overleaf-bibtidy.user.js`](./overleaf-bibtidy.user.js) or Greasy Fork. 104 | 3. Open your Overleaf project and select a `.bib` file in the file tree. 105 | 4. Use the "Check BibTeX" button and enjoy instant feedback and corrections! 106 | 107 | **Source & Issues:** 108 | - [GitHub Repo](https://github.com/bajinsheng/bibtidy) 109 | - [Greasy Fork Script](https://greasyfork.org/en/scripts/544119-bibtidy-plugin-for-overleaf-editor) 110 | -------------------------------------------------------------------------------- /bibtidy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | import os, re 4 | import bibtexparser 5 | from difflib import SequenceMatcher 6 | from bibtexparser.bparser import BibTexParser 7 | import BaseXClient 8 | from lxml import etree 9 | from tqdm import tqdm 10 | 11 | session = BaseXClient.Session('localhost', 1984, 'admin', 'admin') 12 | 13 | def dblp_search(title): 14 | data = session.execute("xquery for $x in doc('dblp')/dblp/* where $x/title contains text '" + title + "' return $x") 15 | data = "\n" + data + "\n" 16 | root = etree.fromstring(data) 17 | return root 18 | 19 | def bibtex_prioritize(entries, title): 20 | """ 21 | Prioritize the entries according to the similarity of the title. 22 | """ 23 | selected_entries = [] 24 | selected_eprint_entries = [] 25 | for entry in entries: 26 | similarity = SequenceMatcher(None, entry.find('title').text, title).ratio() 27 | if entry.find('journal') is not None and entry.find('journal').text == 'CoRR': # Lower the priorities of the Arxiv papers 28 | selected_eprint_entries.append({"entry": entry, "similarity": similarity}) 29 | else: 30 | selected_entries.append({"entry": entry, "similarity": similarity}) 31 | 32 | sorted_selected_entries = sorted(selected_entries, key=lambda x: x["similarity"], reverse=True) 33 | sorted_selected_eprint_entries = sorted(selected_eprint_entries, key=lambda x: x["similarity"], reverse=True) 34 | sorted_selected_entries.extend(sorted_selected_eprint_entries) 35 | return sorted_selected_entries 36 | 37 | def write_bibtex(entry, output): 38 | """ 39 | Write the bibtex entry to the file. 40 | """ 41 | dblp_library = bibtexparser.bibdatabase.BibDatabase() 42 | dblp_library.entries = [entry] 43 | writer = bibtexparser.bwriter.BibTexWriter() 44 | writer.order_entries_by = None 45 | result = bibtexparser.dumps(dblp_library, writer=writer) 46 | with open(output, "a") as f: 47 | f.write(result) 48 | 49 | 50 | def bibtex_checking(bibtex_library, args): 51 | ''' 52 | correct the bibtex file with the dblp database and return the results. 53 | ''' 54 | for entry in tqdm(bibtex_library.entries, desc='Processing bibtex', unit='entry'): 55 | old_title = entry['title'] 56 | bibtex_matched = dblp_search(old_title) 57 | if (len(bibtex_matched) > 0): 58 | bibtex_best_match = bibtex_prioritize(bibtex_matched, entry['title'])[0] 59 | authors = "" 60 | for key in bibtex_best_match['entry']: 61 | if key.tag == 'author': 62 | if authors != "": 63 | authors += " and " 64 | if key.text.split(" ")[-1].isdigit(): # Remove the number at the end of the author name 65 | authors += key.text.rsplit(' ', 1)[0] 66 | authors += re.sub(r'\s*\d+$', '', key.text) 67 | elif key.tag == 'url' or key.tag == 'crossref': # Ignore interrnal tags 68 | continue 69 | elif key.tag == 'ee': # Convert DOI 70 | entry['url'] = key.text 71 | if key.text.startswith('https://doi.org/'): # Some publishers, such as USENIX, do not provide DOI 72 | entry['doi'] = key.text.split('https://doi.org/')[1].rstrip('}') 73 | else: 74 | entry[key.tag] = key.text 75 | if authors != "": 76 | entry['author'] = authors 77 | entry['ENTRYTYPE'] = bibtex_best_match['entry'].tag # Update the entry type 78 | 79 | # Reporting results 80 | if bibtex_best_match['similarity'] < 0.5 and args.debug: 81 | print("[Debug] \"" + entry['title'] + "\" has no similar entires.") 82 | elif bibtex_best_match['similarity'] < 0.8: 83 | print("[Warning] Suspicious update: \"" + old_title + "\" -> \"" + entry['title'] + "\". Please check whether it is correct.") 84 | elif bibtex_best_match['similarity'] < 1 and args.debug: 85 | print("[Debug] \"" + old_title + "\" -> \"" + entry['title'] + "\".") 86 | write_bibtex(entry, args.file + "_revised.bib") 87 | else: 88 | if args.debug: 89 | print("[Debug] \"" + entry['title'] + "\" is not found in the DBLP database.") 90 | return 91 | 92 | def main(): 93 | parser = argparse.ArgumentParser(description='bibtidy: Make your research easier with correct citations!') 94 | parser.add_argument('file') 95 | parser.add_argument('-o', '--output', type=str, default='stdout', help='the file path of the output') 96 | parser.add_argument('-d', '--debug', action='store_true', default=False, help='enable debug mode') 97 | args = parser.parse_args() 98 | 99 | if os.path.isfile(args.file + "_revised.bib") == True: 100 | print("The output file already exists! Please delete it first.") 101 | return 102 | 103 | with open(args.file) as bibtex_file: 104 | parser = BibTexParser() 105 | parser.ignore_nonstandard_types = False 106 | bibtex_library = bibtexparser.load(bibtex_file, parser) 107 | bibtex_checking(bibtex_library, args) 108 | 109 | # close session 110 | if session: 111 | session.close() 112 | 113 | 114 | if __name__ == "__main__": 115 | main() -------------------------------------------------------------------------------- /BaseXClient.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Python 2.7.3 and 3.x client for BaseX. 4 | Works with BaseX 7.0 and later 5 | 6 | Requires Python 3.x or Python 2.x having some backports like bytearray. 7 | (I've tested Python 3.2.3, and Python 2.7.3 on Fedora 16 linux x86_64.) 8 | 9 | LIMITATIONS: 10 | 11 | * binary content would corrupt, maybe. (I didn't test it) 12 | * also, will fail to extract stored binary content, maybe. 13 | (both my code, and original don't care escaped 0xff.) 14 | 15 | Documentation: http://docs.basex.org/wiki/Clients 16 | 17 | (C) 2012, Hiroaki Itoh. BSD License 18 | updated 2014 by Marc van Grootel 19 | 20 | """ 21 | 22 | import hashlib 23 | import socket 24 | import threading 25 | 26 | # --------------------------------- 27 | # 28 | 29 | 30 | class SocketWrapper(object): 31 | """a wrapper to python native socket module.""" 32 | 33 | def __init__(self, sock, 34 | receive_bytes_encoding='utf-8', 35 | send_bytes_encoding='utf-8'): 36 | 37 | self.receive_bytes_encoding = receive_bytes_encoding 38 | self.send_bytes_encoding = send_bytes_encoding 39 | 40 | self.terminator = bytearray(chr(0), self.receive_bytes_encoding) 41 | self.__s = sock 42 | self.__buf = bytearray(chr(0) * 0x1000, self.receive_bytes_encoding) 43 | self.__bpos = 0 44 | self.__bsize = 0 45 | 46 | def clear_buffer(self): 47 | """reset buffer status for next invocation ``recv_until_terminator()`` 48 | or ``recv_single_byte()``.""" 49 | self.__bpos = 0 50 | self.__bsize = 0 51 | 52 | def __fill_buffer(self): 53 | """cache next bytes""" 54 | if self.__bpos >= self.__bsize: 55 | self.__bsize = self.__s.recv_into(self.__buf) 56 | self.__bpos = 0 57 | 58 | # Returns a single byte from the socket. 59 | def recv_single_byte(self): 60 | """recv a single byte from previously fetched buffer.""" 61 | self.__fill_buffer() 62 | result_byte = self.__buf[self.__bpos] 63 | self.__bpos += 1 64 | return result_byte 65 | 66 | # Reads until terminator byte is found. 67 | def recv_until_terminator(self): 68 | """recv a nul(or specified as terminator_byte)-terminated whole string 69 | from previously fetched buffer.""" 70 | result_bytes = bytearray() 71 | while True: 72 | self.__fill_buffer() 73 | pos = self.__buf.find(self.terminator, self.__bpos, self.__bsize) 74 | if pos >= 0: 75 | result_bytes.extend(self.__buf[self.__bpos:pos]) 76 | self.__bpos = pos + 1 77 | break 78 | else: 79 | result_bytes.extend(self.__buf[self.__bpos:self.__bsize]) 80 | self.__bpos = self.__bsize 81 | return result_bytes.decode(self.receive_bytes_encoding) 82 | 83 | def sendall(self, data): 84 | """sendall with specified byte encoding if data is not bytearray, bytes 85 | (maybe str). if data is bytearray or bytes, it will be passed to native sendall API 86 | directly.""" 87 | if isinstance(data, (bytearray, bytes)): 88 | return self.__s.sendall(data) 89 | return self.__s.sendall(bytearray(data, self.send_bytes_encoding)) 90 | 91 | def __getattr__(self, name): 92 | return lambda *arg, **kw: getattr(self.__s, name)(*arg, **kw) 93 | 94 | 95 | # --------------------------------- 96 | # 97 | class Session(object): 98 | """class Session. 99 | 100 | see http://docs.basex.org/wiki/Server_Protocol 101 | """ 102 | 103 | def __init__(self, host, port, user, password, 104 | receive_bytes_encoding='utf-8', 105 | send_bytes_encoding='utf-8'): 106 | """Create and return session with host, port, user name and password""" 107 | 108 | self.__info = None 109 | 110 | # create server connection 111 | self.__swrapper = SocketWrapper( 112 | socket.socket(socket.AF_INET, socket.SOCK_STREAM), 113 | receive_bytes_encoding=receive_bytes_encoding, 114 | send_bytes_encoding=send_bytes_encoding) 115 | 116 | self.__swrapper.connect((host, port)) 117 | 118 | # receive timestamp 119 | response = self.recv_c_str().split(':') 120 | 121 | # send username and hashed password/timestamp 122 | hfun = hashlib.md5() 123 | 124 | if len(response) > 1: 125 | code = "%s:%s:%s" % (user, response[0], password) 126 | nonce = response[1] 127 | else: 128 | code = password 129 | nonce = response[0] 130 | 131 | hfun.update(hashlib.md5(code.encode('us-ascii')).hexdigest().encode('us-ascii')) 132 | hfun.update(nonce.encode('us-ascii')) 133 | self.send(user + chr(0) + hfun.hexdigest()) 134 | 135 | # evaluate success flag 136 | if not self.server_response_success(): 137 | raise IOError('Access Denied.') 138 | 139 | def execute(self, com): 140 | """Execute a command and return the result""" 141 | # send command to server 142 | self.send(com) 143 | 144 | # receive result 145 | result = self.receive() 146 | self.__info = self.recv_c_str() 147 | if not self.server_response_success(): 148 | raise IOError(self.__info) 149 | return result 150 | 151 | def query(self, querytxt): 152 | """Creates a new query instance (having id returned from server).""" 153 | return Query(self, querytxt) 154 | 155 | def create(self, name, content): 156 | """Creates a new database with the specified input (may be empty).""" 157 | self.__send_input(8, name, content) 158 | 159 | def add(self, path, content): 160 | """Adds a new resource to the opened database.""" 161 | self.__send_input(9, path, content) 162 | 163 | def replace(self, path, content): 164 | """Replaces a resource with the specified input.""" 165 | self.__send_input(12, path, content) 166 | 167 | def store(self, path, content): 168 | """Stores a binary resource in the opened database. 169 | 170 | api won't escape 0x00, 0xff automatically, so you must do it 171 | yourself explicitly.""" 172 | # ------------------------------------------ 173 | # chr(13) + path + chr(0) + content + chr(0) 174 | self.__send_binary_input(13, path, content) 175 | # 176 | # ------------------------------------------ 177 | 178 | def info(self): 179 | """Return process information""" 180 | return self.__info 181 | 182 | def close(self): 183 | """Close the session""" 184 | self.send('exit') 185 | self.__swrapper.close() 186 | 187 | def recv_c_str(self): 188 | """Retrieve a string from the socket""" 189 | return self.__swrapper.recv_until_terminator() 190 | 191 | def send(self, value): 192 | """Send the defined string""" 193 | self.__swrapper.sendall(value + chr(0)) 194 | 195 | def __send_input(self, code, arg, content): 196 | """internal. don't care.""" 197 | self.__swrapper.sendall(chr(code) + arg + chr(0) + content + chr(0)) 198 | self.__info = self.recv_c_str() 199 | if not self.server_response_success(): 200 | raise IOError(self.info()) 201 | 202 | def __send_binary_input(self, code, path, content): 203 | """internal. don't care.""" 204 | # at this time, we can't use __send_input itself because of encoding 205 | # problem. we have to build bytearray directly. 206 | if not isinstance(content, (bytearray, bytes)): 207 | raise ValueError("Sorry, content must be bytearray or bytes, not " + 208 | str(type(content))) 209 | 210 | # ------------------------------------------ 211 | # chr(code) + path + chr(0) + content + chr(0) 212 | data = bytearray([code]) 213 | try: 214 | data.extend(path) 215 | except: 216 | data.extend(path.encode('utf-8')) 217 | data.extend([0]) 218 | data.extend(content) 219 | data.extend([0]) 220 | # 221 | # ------------------------------------------ 222 | self.__swrapper.sendall(data) 223 | self.__info = self.recv_c_str() 224 | if not self.server_response_success(): 225 | raise IOError(self.info()) 226 | 227 | def server_response_success(self): 228 | """Return success check""" 229 | return self.__swrapper.recv_single_byte() == 0 230 | 231 | def receive(self): 232 | """Return received string""" 233 | self.__swrapper.clear_buffer() 234 | return self.recv_c_str() 235 | 236 | def iter_receive(self): 237 | """iter_receive() -> (typecode, item) 238 | 239 | iterate while the query returns items. 240 | typecode list is in http://docs.basex.org/wiki/Server_Protocol:_Types 241 | """ 242 | self.__swrapper.clear_buffer() 243 | typecode = self.__swrapper.recv_single_byte() 244 | while typecode > 0: 245 | string = self.recv_c_str() 246 | yield (typecode, string) 247 | typecode = self.__swrapper.recv_single_byte() 248 | if not self.server_response_success(): 249 | raise IOError(self.recv_c_str()) 250 | 251 | # --------------------------------- 252 | # 253 | 254 | 255 | class Query(): 256 | """class Query. 257 | 258 | see http://docs.basex.org/wiki/Server_Protocol 259 | """ 260 | 261 | def __init__(self, session, querytxt): 262 | """Create query object with session and query""" 263 | self.__session = session 264 | self.__id = self.__exc(chr(0), querytxt) 265 | 266 | def bind(self, name, value, datatype=''): 267 | """Binds a value to a variable. 268 | An empty string can be specified as data type.""" 269 | self.__exc(chr(3), self.__id + chr(0) + name + chr(0) + value + chr(0) + datatype) 270 | 271 | def context(self, value, datatype=''): 272 | """Bind the context item""" 273 | self.__exc(chr(14), self.__id + chr(0) + value + chr(0) + datatype) 274 | 275 | def iter(self): 276 | """iterate while the query returns items""" 277 | self.__session.send(chr(4) + self.__id) 278 | return self.__session.iter_receive() 279 | 280 | def execute(self): 281 | """Execute the query and return the result""" 282 | return self.__exc(chr(5), self.__id) 283 | 284 | def info(self): 285 | """Return query information""" 286 | return self.__exc(chr(6), self.__id) 287 | 288 | def options(self): 289 | """Return serialization parameters""" 290 | return self.__exc(chr(7), self.__id) 291 | 292 | def updating(self): 293 | """Returns true if the query may perform updates; false otherwise.""" 294 | return self.__exc(chr(30), self.__id) 295 | 296 | def full(self): 297 | """Returns all resulting items as strings, prefixed by XDM Meta Data.""" 298 | return self.__exc(chr(31), self.__id) 299 | 300 | def close(self): 301 | """Close the query""" 302 | self.__exc(chr(2), self.__id) 303 | 304 | def __exc(self, cmd, arg): 305 | """internal. don't care.""" 306 | # should we expose this? 307 | # (this makes sense only when mismatch between C/S is existing.) 308 | self.__session.send(cmd + arg) 309 | result = self.__session.receive() 310 | if not self.__session.server_response_success(): 311 | raise IOError(self.__session.recv_c_str()) 312 | return result 313 | -------------------------------------------------------------------------------- /overleaf-bibtidy.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name BibTidy plugin for Overleaf Editor 3 | // @namespace http://tampermonkey.net/ 4 | // @version 1.1 5 | // @description Simple BibTeX validation in Overleaf 6 | // @author Jinsheng BA 7 | // @match https://www.overleaf.com/project/* 8 | // @grant GM_xmlhttpRequest 9 | // @grant GM_addStyle 10 | // @license MIT 11 | // @homepage https://github.com/bajinsheng/bibtidy 12 | // ==/UserScript== 13 | 14 | (function() { 15 | 'use strict'; 16 | 17 | // Grammarly-like styles 18 | GM_addStyle(` 19 | .bibtex-error-icon { 20 | position: absolute; 21 | width: 14px; 22 | height: 14px; 23 | background: #ff4d4f; 24 | border-radius: 50%; 25 | color: white; 26 | font-size: 9px; 27 | display: flex; 28 | align-items: center; 29 | justify-content: center; 30 | cursor: pointer; 31 | z-index: 1000; 32 | font-weight: bold; 33 | box-shadow: 0 1px 3px rgba(0,0,0,0.3); 34 | pointer-events: auto; 35 | } 36 | 37 | .bibtex-error-icon:hover { 38 | background: #ff7875; 39 | transform: scale(1.1); 40 | } 41 | 42 | .bibtex-popup { 43 | position: fixed; 44 | background: white; 45 | border: 1px solid #d9d9d9; 46 | border-radius: 6px; 47 | box-shadow: 0 6px 16px rgba(0,0,0,0.12); 48 | padding: 12px; 49 | z-index: 10000; 50 | max-width: 450px; 51 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; 52 | font-size: 13px; 53 | display: none; 54 | } 55 | 56 | .bibtex-popup-header { 57 | font-weight: 600; 58 | color: #ff4d4f; 59 | margin-bottom: 8px; 60 | display: flex; 61 | align-items: center; 62 | gap: 6px; 63 | } 64 | 65 | .bibtex-popup-body { 66 | margin-bottom: 12px; 67 | color: #262626; 68 | line-height: 1.4; 69 | } 70 | 71 | .bibtex-suggestion { 72 | background: #f6ffed; 73 | border-left: 3px solid #52c41a; 74 | padding: 8px; 75 | margin: 8px 0; 76 | font-family: monospace; 77 | font-size: 11px; 78 | white-space: pre-wrap; 79 | max-height: 150px; 80 | overflow-y: auto; 81 | } 82 | 83 | .bibtex-popup-actions { 84 | display: flex; 85 | gap: 8px; 86 | justify-content: flex-end; 87 | } 88 | 89 | .bibtex-btn { 90 | padding: 4px 12px; 91 | border: 1px solid #d9d9d9; 92 | border-radius: 4px; 93 | background: white; 94 | cursor: pointer; 95 | font-size: 12px; 96 | } 97 | 98 | .bibtex-btn-apply { 99 | background: #1890ff; 100 | color: white; 101 | border-color: #1890ff; 102 | } 103 | 104 | .bibtex-btn-apply:hover { 105 | background: #40a9ff; 106 | } 107 | 108 | .bibtex-btn:hover { 109 | border-color: #40a9ff; 110 | } 111 | 112 | .bibtex-overlay { 113 | position: fixed; 114 | top: 0; 115 | left: 0; 116 | width: 100vw; 117 | height: 100vh; 118 | pointer-events: none; 119 | z-index: 999; 120 | } 121 | `); 122 | 123 | class BibTeXChecker { 124 | constructor() { 125 | this.overlay = null; 126 | this.editor = null; 127 | this.errors = []; 128 | this.titleSimilarityThreshold = 0.6; // Minimum similarity threshold (0-1) 129 | } 130 | 131 | init() { 132 | this.createOverlay(); 133 | this.watchForEditor(); 134 | } 135 | // Ensure method separation with commas 136 | 137 | createOverlay() { 138 | this.overlay = document.createElement('div'); 139 | this.overlay.className = 'bibtex-overlay'; 140 | document.body.appendChild(this.overlay); 141 | } 142 | 143 | watchForEditor() { 144 | const observer = new MutationObserver(() => { 145 | // Find selected file in file tree 146 | const selectedFile = document.querySelector('li[aria-selected="true"][aria-label$=".bib"]'); 147 | // Find editor for bibtex 148 | const editor = document.querySelector('.cm-content[contenteditable="true"][data-language="bibtex"]'); 149 | if (selectedFile && editor && editor !== this.editor) { 150 | this.editor = editor; 151 | this.setupEditorWatcher(); 152 | this.checkBibTeX(); // Run BibTeX check immediately when editor loads 153 | } else if ((!selectedFile || !editor) && this.editor) { 154 | // If bib file/editor is no longer selected, clear overlay and editor reference 155 | this.editor = null; 156 | this.clearErrors(); 157 | } 158 | }); 159 | 160 | observer.observe(document.body, { 161 | childList: true, 162 | subtree: true 163 | }); 164 | 165 | // Manual check button (only visible when .bib file is selected) 166 | // Create a draggable, beautiful button in the left bottom 167 | const btn = document.createElement('button'); 168 | btn.setAttribute('id', 'bibtidy-check-btn'); 169 | btn.style.cssText = ` 170 | position:fixed; 171 | left:24px; 172 | bottom:24px; 173 | z-index:10000; 174 | background:linear-gradient(90deg,#1890ff 0%,#52c41a 100%); 175 | color:white; 176 | border:none; 177 | box-shadow:0 2px 8px rgba(0,0,0,0.15); 178 | padding:12px 28px; 179 | border-radius:24px; 180 | cursor:pointer; 181 | font-size:16px; 182 | font-weight:600; 183 | letter-spacing:0.5px; 184 | display:none; 185 | transition:box-shadow 0.2s,transform 0.2s; 186 | display: flex; 187 | align-items: center; 188 | gap: 10px; 189 | `; 190 | // Button content: text and cycle icon 191 | btn.innerHTML = ` 192 | Check BibTeX 193 | 194 | ⟳ 195 | 196 | `; 197 | document.body.appendChild(btn); 198 | 199 | // Cycle icon click: call checkBibTeX 200 | const cycleIcon = btn.querySelector('#bibtidy-cycle-icon'); 201 | cycleIcon.onclick = (e) => { 202 | e.stopPropagation(); 203 | const editor = document.querySelector('.cm-content[contenteditable="true"][data-language="bibtex"]'); 204 | if (editor) { 205 | this.editor = editor; 206 | this.checkBibTeX(); 207 | } else { 208 | alert('Please select a .bib file in the file tree and open it in the editor.'); 209 | } 210 | }; 211 | 212 | // Main button click (not cycle icon): open DBLP search popup 213 | btn.addEventListener('click', (e) => { 214 | if (e.target === cycleIcon) return; // already handled 215 | this.showDBLPSearchPopup(); 216 | }); 217 | 218 | // Make button draggable 219 | let isDragging = false, dragOffsetX = 0, dragOffsetY = 0; 220 | btn.addEventListener('mousedown', function(e) { 221 | if (e.target === cycleIcon) return; 222 | isDragging = true; 223 | dragOffsetX = e.clientX - btn.getBoundingClientRect().left; 224 | dragOffsetY = e.clientY - btn.getBoundingClientRect().top; 225 | btn.style.transition = 'none'; 226 | document.body.style.userSelect = 'none'; 227 | }); 228 | document.addEventListener('mousemove', function(e) { 229 | if (isDragging) { 230 | btn.style.left = (e.clientX - dragOffsetX) + 'px'; 231 | btn.style.top = (e.clientY - dragOffsetY) + 'px'; 232 | btn.style.bottom = 'auto'; 233 | } 234 | }); 235 | document.addEventListener('mouseup', function() { 236 | if (isDragging) { 237 | isDragging = false; 238 | btn.style.transition = 'box-shadow 0.2s,transform 0.2s'; 239 | document.body.style.userSelect = ''; 240 | } 241 | }); 242 | 243 | // Show/hide button based on .bib file selection 244 | const updateBtnVisibility = () => { 245 | const selectedFile = document.querySelector('li[aria-selected="true"][aria-label$=".bib"]'); 246 | btn.style.display = selectedFile ? 'flex' : 'none'; 247 | }; 248 | // Initial check 249 | updateBtnVisibility(); 250 | // Observe file tree selection changes 251 | const fileTreeObserver = new MutationObserver(updateBtnVisibility); 252 | fileTreeObserver.observe(document.body, { childList: true, subtree: true }); 253 | } 254 | 255 | // Show DBLP search popup for keyword search 256 | showDBLPSearchPopup() { 257 | document.querySelectorAll('.bibtidy-dblp-popup').forEach(p => p.remove()); 258 | const popup = document.createElement('div'); 259 | popup.className = 'bibtidy-dblp-popup'; 260 | popup.style.cssText = ` 261 | position: fixed; 262 | left: 50%; 263 | top: 20%; 264 | transform: translate(-50%, 0); 265 | background: #fff; 266 | border: 1px solid #d9d9d9; 267 | border-radius: 8px; 268 | box-shadow: 0 6px 16px rgba(0,0,0,0.12); 269 | padding: 24px 24px 16px 24px; 270 | z-index: 10001; 271 | min-width: 400px; 272 | max-width: 90vw; 273 | `; 274 | popup.innerHTML = ` 275 |
276 | 🔎 Search DBLP 277 |
278 |
279 | 280 | 281 |
282 |
283 |
284 | 285 |
286 | `; 287 | document.body.appendChild(popup); 288 | 289 | // Close button 290 | popup.querySelector('#bibtidy-dblp-close-btn').onclick = () => popup.remove(); 291 | 292 | // Search button 293 | popup.querySelector('#bibtidy-dblp-search-btn').onclick = async () => { 294 | const keyword = popup.querySelector('#bibtidy-dblp-keyword').value.trim(); 295 | const resultsDiv = popup.querySelector('#bibtidy-dblp-results'); 296 | resultsDiv.innerHTML = '
Searching...
'; 297 | if (!keyword) { 298 | resultsDiv.innerHTML = '
Please enter a keyword.
'; 299 | return; 300 | } 301 | // Query DBLP 302 | const bibs = await this.searchDBLPByKeyword(keyword, 3); 303 | if (!bibs || bibs.length === 0) { 304 | resultsDiv.innerHTML = '
No results found.
'; 305 | return; 306 | } 307 | resultsDiv.innerHTML = bibs.map((bib, idx) => ` 308 |
309 | 310 | ${bib} 311 |
312 | `).join(''); 313 | 314 | // Attach copy event listeners after rendering 315 | setTimeout(() => { 316 | const copyBtns = popup.querySelectorAll('.bibtidy-copy-btn'); 317 | copyBtns.forEach(btn => { 318 | btn.onclick = function() { 319 | const bibDiv = btn.closest('div'); 320 | const bibText = bibDiv.querySelector('.bibtidy-bibtex-text').innerText; 321 | navigator.clipboard.writeText(bibText).then(() => { 322 | btn.textContent = 'Copied!'; 323 | setTimeout(() => { btn.textContent = 'Copy'; }, 1200); 324 | }); 325 | }; 326 | }); 327 | }, 0); 328 | }; 329 | } 330 | 331 | // Query DBLP for top-N BibTeX entries by keyword 332 | async searchDBLPByKeyword(keyword, n = 3) { 333 | return new Promise((resolve) => { 334 | const query = encodeURIComponent(keyword); 335 | const url = `https://dblp.org/search/publ/api?q=${query}&format=bib&h=${n}`; 336 | GM_xmlhttpRequest({ 337 | method: 'GET', 338 | url: url, 339 | timeout: 5000, 340 | onload: (response) => { 341 | try { 342 | if (response.status === 200) { 343 | const bibText = response.responseText.trim(); 344 | if (bibText && bibText.includes('@')) { 345 | // Split into entries 346 | const entries = bibText.split(/(?=@\w+\s*\{)/g).map(e => e.trim()).filter(e => e); 347 | resolve(entries.slice(0, n)); 348 | } else { 349 | resolve([]); 350 | } 351 | } else { 352 | resolve([]); 353 | } 354 | } catch (e) { 355 | resolve([]); 356 | } 357 | }, 358 | onerror: () => resolve([]), 359 | ontimeout: () => resolve([]) 360 | }); 361 | }); 362 | } 363 | 364 | setupEditorWatcher() { 365 | let timeout; 366 | this.editor.addEventListener('input', () => { 367 | clearTimeout(timeout); 368 | timeout = setTimeout(() => this.checkBibTeX(), 2000); 369 | }); 370 | 371 | // Update error icon positions and check for new entries on scroll 372 | this.editor.parentElement.addEventListener('scroll', () => { 373 | this.updateErrorIconPositions(); 374 | // Debounced check for new entries 375 | if (this._scrollCheckTimeout) clearTimeout(this._scrollCheckTimeout); 376 | this._scrollCheckTimeout = setTimeout(() => { 377 | this.checkForNewEntriesOnScroll(); 378 | }, 300); 379 | }); 380 | } 381 | checkForNewEntriesOnScroll() { 382 | // Get current BibTeX entries 383 | const content = this.getEditorContent(); 384 | const entries = this.parseBibTeX(content); 385 | // Get keys of entries already shown as errors 386 | const shownKeys = new Set(this.errors.map(e => e.entry.key)); 387 | // Find new entries not yet shown 388 | const newEntries = entries.filter(e => !shownKeys.has(e.key)); 389 | if (newEntries.length > 0) { 390 | // For each new entry, check DBLP and show icon 391 | newEntries.forEach(async entry => { 392 | const { issues, correctBibTeX, status } = await this.getEntryIssuesAndCorrection(entry); 393 | this.showError(entry, issues, correctBibTeX, status); 394 | }); 395 | } 396 | } 397 | /** 398 | * Shared logic for BibTeX entry validation and DBLP comparison. 399 | * Returns { issues, correctBibTeX } 400 | */ 401 | /** 402 | * Returns { issues, correctBibTeX, status } 403 | * status: 'pass', 'not_found', 'mismatch', 'error' 404 | */ 405 | async getEntryIssuesAndCorrection(entry) { 406 | let issues = []; 407 | let correctBibTeX = null; 408 | let status = 'not_found'; 409 | if (entry.fields.title) { 410 | try { 411 | const dblpEntry = await this.searchDBLP(entry.fields.title); 412 | if (dblpEntry) { 413 | correctBibTeX = this.formatDBLPEntry(dblpEntry, entry.key); 414 | issues = this.compareWithDBLP(entry, dblpEntry); 415 | if (issues.length === 0) { 416 | status = 'pass'; 417 | } else { 418 | status = 'mismatch'; 419 | } 420 | } else { 421 | status = 'not_found'; 422 | } 423 | } catch (error) { 424 | issues = ['Error fetching DBLP data']; 425 | status = 'error'; 426 | } 427 | } else { 428 | status = 'not_found'; 429 | } 430 | return { issues, correctBibTeX, status }; 431 | } 432 | 433 | updateErrorIconPositions() { 434 | if (!this.errors || !this.editor) return; 435 | this.errors.forEach(({ entry, icon }) => { 436 | const range = this.getEntryRange(entry); 437 | if (range) { 438 | const rect = range.getBoundingClientRect(); 439 | icon.style.left = (rect.right + 5) + 'px'; 440 | icon.style.top = (rect.top + 2) + 'px'; 441 | } 442 | }); 443 | } 444 | 445 | getEditorContent() { 446 | if (!this.editor) return ''; 447 | 448 | // Use innerText to preserve line breaks, or traverse DOM nodes 449 | const walker = document.createTreeWalker( 450 | this.editor, 451 | NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT, 452 | { 453 | acceptNode: (node) => { 454 | if (node.nodeType === Node.TEXT_NODE) { 455 | return NodeFilter.FILTER_ACCEPT; 456 | } 457 | if (node.nodeName === 'BR' || node.nodeName === 'DIV') { 458 | return NodeFilter.FILTER_ACCEPT; 459 | } 460 | return NodeFilter.FILTER_SKIP; 461 | } 462 | } 463 | ); 464 | 465 | let content = ''; 466 | let node; 467 | 468 | while (node = walker.nextNode()) { 469 | if (node.nodeType === Node.TEXT_NODE) { 470 | content += node.textContent; 471 | } else if (node.nodeName === 'BR') { 472 | content += '\n'; 473 | } else if (node.nodeName === 'DIV' && content && !content.endsWith('\n')) { 474 | content += '\n'; 475 | } 476 | } 477 | 478 | return content; 479 | } 480 | 481 | setEditorContent(newContent) { 482 | if (!this.editor) return; 483 | 484 | // For CodeMirror editors, we need to set content properly 485 | // Try multiple approaches to ensure compatibility 486 | 487 | // Method 1: Direct textContent (may lose formatting) 488 | this.editor.textContent = newContent; 489 | 490 | // Method 2: Try to trigger CodeMirror updates if available 491 | if (this.editor.CodeMirror) { 492 | this.editor.CodeMirror.setValue(newContent); 493 | } else { 494 | // Method 3: Use innerText to preserve some formatting 495 | this.editor.innerText = newContent; 496 | } 497 | } 498 | 499 | async checkBibTeX() { 500 | if (!this.editor) return; 501 | 502 | const content = this.getEditorContent(); 503 | if (!content.includes('@')) { 504 | this.clearErrors(); 505 | return; 506 | } 507 | 508 | this.clearErrors(); 509 | const entries = this.parseBibTeX(content); 510 | 511 | for (const entry of entries) { 512 | const { issues, correctBibTeX, status } = await this.getEntryIssuesAndCorrection(entry); 513 | this.showError(entry, issues, correctBibTeX, status); 514 | } 515 | } 516 | 517 | parseBibTeX(content) { 518 | const entries = []; 519 | const entryRegex = /@(\w+)\s*\{\s*([^,}]+)/g; 520 | let match; 521 | 522 | while ((match = entryRegex.exec(content)) !== null) { 523 | const type = match[1].toLowerCase(); 524 | const key = match[2].trim(); 525 | const startPos = match.index; 526 | 527 | // Find entry end (only accept if braces are balanced) 528 | let braces = 0; 529 | let endPos = -1; 530 | let foundFirstBrace = false; 531 | 532 | for (let i = startPos; i < content.length; i++) { 533 | if (content[i] === '{') { 534 | braces++; 535 | foundFirstBrace = true; 536 | } else if (content[i] === '}') { 537 | braces--; 538 | if (foundFirstBrace && braces === 0) { 539 | endPos = i; 540 | break; 541 | } 542 | } 543 | } 544 | 545 | // Only parse if a complete entry (balanced braces) was found 546 | if (endPos !== -1) { 547 | const entryText = content.substring(startPos, endPos + 1); 548 | const fields = this.parseFields(entryText); 549 | 550 | entries.push({ 551 | type, 552 | key, 553 | startPos, 554 | endPos, 555 | text: entryText, 556 | fields 557 | }); 558 | } 559 | } 560 | 561 | return entries; 562 | } 563 | 564 | parseFields(entryText) { 565 | const fields = {}; 566 | let i = 0; 567 | 568 | while (i < entryText.length) { 569 | // Find field name 570 | const fieldMatch = entryText.substring(i).match(/(\w+)\s*=\s*/); 571 | if (!fieldMatch) { 572 | i++; 573 | continue; 574 | } 575 | 576 | const fieldName = fieldMatch[1].toLowerCase(); 577 | i += fieldMatch.index + fieldMatch[0].length; 578 | 579 | // Parse field value 580 | if (i < entryText.length && entryText[i] === '{') { 581 | // Braced value - handle nested braces 582 | let braceCount = 0; 583 | let start = i; 584 | 585 | while (i < entryText.length) { 586 | if (entryText[i] === '{') braceCount++; 587 | else if (entryText[i] === '}') braceCount--; 588 | i++; 589 | if (braceCount === 0) break; 590 | } 591 | 592 | const value = entryText.substring(start + 1, i - 1); // Remove outer braces 593 | fields[fieldName] = value; 594 | } else { 595 | // Unbraced value - read until comma or end 596 | let start = i; 597 | while (i < entryText.length && entryText[i] !== ',' && entryText[i] !== '}') { 598 | i++; 599 | } 600 | const value = entryText.substring(start, i).trim(); 601 | fields[fieldName] = value; 602 | } 603 | } 604 | 605 | return fields; 606 | } 607 | 608 | calculateTitleSimilarity(title1, title2) { 609 | if (!title1 || !title2) return 0; 610 | 611 | // Normalize titles for comparison 612 | const normalize = (title) => title.toLowerCase() 613 | .replace(/[{}]/g, '') 614 | .replace(/[^\w\s]/g, ' ') 615 | .replace(/\s+/g, ' ') 616 | .trim(); 617 | 618 | const norm1 = normalize(title1); 619 | const norm2 = normalize(title2); 620 | 621 | // Simple word-based similarity using Jaccard coefficient 622 | const words1 = new Set(norm1.split(' ')); 623 | const words2 = new Set(norm2.split(' ')); 624 | 625 | const intersection = new Set([...words1].filter(x => words2.has(x))); 626 | const union = new Set([...words1, ...words2]); 627 | 628 | return intersection.size / union.size; 629 | } 630 | 631 | cleanFieldValue(value) { 632 | if (!value) return ''; 633 | 634 | return value 635 | .replace(/[\n\r\t]/g, ' ') // Replace newlines, carriage returns, tabs with spaces 636 | .replace(/\s+/g, ' ') // Replace multiple spaces with single space 637 | .trim(); // Remove leading/trailing whitespace 638 | } 639 | 640 | async searchDBLP(title) { 641 | return new Promise((resolve) => { 642 | const query = encodeURIComponent(title.replace(/[{}]/g, '')); 643 | const url = `https://dblp.org/search/publ/api?q=${query}&format=bib&h=5`; 644 | 645 | GM_xmlhttpRequest({ 646 | method: 'GET', 647 | url: url, 648 | timeout: 5000, 649 | onload: (response) => { 650 | try { 651 | if (response.status === 200) { 652 | const bibText = response.responseText.trim(); 653 | if (bibText && bibText.includes('@')) { 654 | const entries = this.parseBibTeX(bibText); 655 | if (entries.length === 0) { 656 | resolve(null); 657 | return; 658 | } 659 | let bestEntry = null; 660 | let bestSimilarity = 0; 661 | for (const entry of entries) { 662 | if (entry.fields.title) { 663 | for (const [fieldName, fieldValue] of Object.entries(entry.fields)) { 664 | entry.fields[fieldName] = this.cleanFieldValue(fieldValue); 665 | } 666 | const similarity = this.calculateTitleSimilarity(title, entry.fields.title); 667 | if (similarity > bestSimilarity && similarity >= this.titleSimilarityThreshold) { 668 | bestSimilarity = similarity; 669 | bestEntry = entry; 670 | } 671 | } 672 | } 673 | resolve(bestEntry); 674 | } else { 675 | resolve(null); 676 | } 677 | } else { 678 | resolve(null); 679 | } 680 | } catch (e) { 681 | resolve(null); 682 | } 683 | }, 684 | onerror: () => resolve(null), 685 | ontimeout: () => resolve(null) 686 | }); 687 | }); 688 | } 689 | 690 | formatDBLPEntry(dblpEntry, originalKey) { 691 | let bibtex = `@${dblpEntry.type}{${originalKey},\n`; 692 | 693 | const fieldOrder = ['title', 'author', 'booktitle', 'journal', 'year', 'volume', 'pages', 'doi']; 694 | 695 | for (const field of fieldOrder) { 696 | if (dblpEntry.fields[field]) { 697 | bibtex += ` ${field} = {${dblpEntry.fields[field]}},\n`; 698 | } 699 | } 700 | 701 | bibtex = bibtex.replace(/,\n$/, '\n'); 702 | bibtex += '}'; 703 | 704 | return bibtex; 705 | } 706 | 707 | compareWithDBLP(entry, dblpEntry) { 708 | const issues = []; 709 | 710 | // Compare key fields 711 | const fieldsToCheck = ['title', 'author', 'year', 'journal', 'booktitle']; 712 | 713 | for (const field of fieldsToCheck) { 714 | const originalValue = entry.fields[field]; 715 | const dblpValue = dblpEntry.fields[field]; 716 | 717 | if (dblpValue && (!originalValue || 718 | this.normalizeForComparison(originalValue) !== this.normalizeForComparison(dblpValue))) { 719 | 720 | const originalDisplay = originalValue ? `"${originalValue}"` : "(missing)"; 721 | const dblpDisplay = `"${dblpValue}"`; 722 | issues.push(`${field}: ${originalDisplay} → ${dblpDisplay}`); 723 | } 724 | } 725 | 726 | if (entry.type !== dblpEntry.type) { 727 | issues.push(`Entry type: ${entry.type} → ${dblpEntry.type}`); 728 | } 729 | 730 | return issues; 731 | } 732 | 733 | normalizeForComparison(value) { 734 | if (!value) return ''; 735 | return value.toLowerCase() 736 | .replace(/\s+/g, ' ') // Replace all whitespace (including \n, \t) with single space 737 | .trim(); 738 | } 739 | 740 | validateEntry(entry) { 741 | const issues = []; 742 | 743 | if (!entry.fields.title || entry.fields.title.trim() === '') { 744 | issues.push('Missing title'); 745 | } 746 | 747 | if (!entry.fields.author || entry.fields.author.trim() === '') { 748 | issues.push('Missing author'); 749 | } 750 | 751 | if (!entry.fields.year || entry.fields.year.trim() === '') { 752 | issues.push('Missing year'); 753 | } 754 | 755 | if (entry.type === 'article' && !entry.fields.journal) { 756 | issues.push('Missing journal for article'); 757 | } 758 | 759 | if (entry.type === 'inproceedings' && !entry.fields.booktitle) { 760 | issues.push('Missing booktitle for conference paper'); 761 | } 762 | 763 | return issues; 764 | } 765 | 766 | /** 767 | * Show an icon for the entry based on status: 'pass', 'not_found', 'mismatch', 'error'. 768 | * status: 'pass' (green check), 'not_found' (gray question), 'mismatch' (red !), 'error' (red X) 769 | */ 770 | showError(entry, issues, correctBibTeX = null, status = 'mismatch') { 771 | const range = this.getEntryRange(entry); 772 | if (!range) return; 773 | 774 | const rect = range.getBoundingClientRect(); 775 | 776 | const icon = document.createElement('div'); 777 | icon.className = 'bibtex-error-icon'; 778 | icon.style.position = 'fixed'; 779 | icon.style.left = (rect.right + 5) + 'px'; 780 | icon.style.top = (rect.top + 2) + 'px'; 781 | icon.style.pointerEvents = 'auto'; 782 | 783 | // Set icon style and content based on status 784 | if (status === 'pass') { 785 | icon.style.background = '#52c41a'; // green 786 | icon.textContent = '✓'; 787 | icon.title = 'Entry matches DBLP'; 788 | } else if (status === 'not_found') { 789 | icon.style.background = '#bfbfbf'; // gray 790 | icon.textContent = '?'; 791 | icon.title = 'Entry not found in DBLP'; 792 | } else if (status === 'error') { 793 | icon.style.background = '#d32f2f'; // red 794 | icon.textContent = '✗'; 795 | icon.title = 'Error fetching DBLP data'; 796 | } else { 797 | // mismatch or default 798 | icon.style.background = '#ff4d4f'; // red 799 | icon.textContent = '!'; 800 | icon.title = 'Entry differs from DBLP'; 801 | } 802 | 803 | icon.onclick = (e) => { 804 | e.stopPropagation(); 805 | this.showPopup(entry, issues, icon, correctBibTeX, status); 806 | }; 807 | 808 | this.overlay.appendChild(icon); 809 | this.errors.push({ entry, icon, issues, correctBibTeX, status }); 810 | } 811 | 812 | getEntryRange(entry) { 813 | const walker = document.createTreeWalker( 814 | this.editor, 815 | NodeFilter.SHOW_TEXT 816 | ); 817 | 818 | let node; 819 | while (node = walker.nextNode()) { 820 | if (node.textContent.includes(entry.key)) { 821 | const range = document.createRange(); 822 | range.selectNode(node); 823 | return range; 824 | } 825 | } 826 | return null; 827 | } 828 | 829 | showPopup(entry, issues, icon, correctBibTeX = null, status = 'mismatch') { 830 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove()); 831 | 832 | const popup = document.createElement('div'); 833 | popup.className = 'bibtex-popup'; 834 | 835 | // Diff-style highlight for issues 836 | function diffHighlight(issue) { 837 | // Try to parse: field: "old" → "new" 838 | const match = issue.match(/^(\w+): (".*?") → (".*?")$/); 839 | if (match) { 840 | const field = match[1]; 841 | const oldVal = match[2]; 842 | const newVal = match[3]; 843 | return `
  • - ${field}: ${oldVal}
    + ${field}: ${newVal}
  • `; 844 | } 845 | // Entry type change 846 | const typeMatch = issue.match(/^Entry type: (\w+) → (\w+)$/); 847 | if (typeMatch) { 848 | return `
  • - type: ${typeMatch[1]}
    + type: ${typeMatch[2]}
  • `; 849 | } 850 | // Missing field 851 | if (issue.startsWith('Missing')) { 852 | return `
  • - ${issue}
  • `; 853 | } 854 | // Default 855 | return `
  • ${issue}
  • `; 856 | } 857 | 858 | let suggestionHtml = ''; 859 | if (correctBibTeX) { 860 | suggestionHtml = ` 861 |
    ${correctBibTeX}
    862 | `; 863 | } 864 | 865 | // Choose popup icon and header based on status 866 | let popupIcon = '⚠️'; 867 | let popupHeader = 'BibTeX Issues Found'; 868 | if (status === 'pass') { 869 | popupIcon = '✅'; 870 | popupHeader = 'Entry matches DBLP'; 871 | } else if (status === 'not_found') { 872 | popupIcon = '❓'; 873 | popupHeader = 'Entry not found in DBLP'; 874 | } else if (status === 'error') { 875 | popupIcon = '❌'; 876 | popupHeader = 'Error fetching DBLP data'; 877 | } else if (correctBibTeX) { 878 | popupIcon = '🔍'; 879 | popupHeader = 'DBLP Correction Available'; 880 | } 881 | 882 | popup.innerHTML = ` 883 |
    884 | ${popupIcon} 885 | ${popupHeader} 886 |
    887 |
    888 | Entry "${entry.key}" ${status === 'pass' ? 'matches DBLP.' : status === 'not_found' ? 'not found in DBLP.' : status === 'error' ? 'error fetching DBLP data.' : correctBibTeX ? 'differs from DBLP:' : 'has issues:'} 889 | 892 | ${suggestionHtml} 893 |
    894 |
    895 | 898 | ${correctBibTeX && status === 'mismatch' ? 899 | `` : 902 | `` 905 | } 906 |
    907 | `; 908 | 909 | document.body.appendChild(popup); 910 | 911 | // Add event listener for the dismiss button 912 | const dismissBtn = popup.querySelector('.bibtex-btn-dismiss'); 913 | if (dismissBtn) { 914 | dismissBtn.addEventListener('click', () => { 915 | popup.remove(); 916 | }); 917 | } 918 | 919 | // Add event listener for the apply button 920 | const applyBtn = popup.querySelector('.bibtex-btn-apply'); 921 | if (applyBtn) { 922 | applyBtn.addEventListener('click', () => { 923 | const entryKey = applyBtn.getAttribute('data-entry-key'); 924 | this.applyCorrection(entryKey); 925 | }); 926 | } 927 | 928 | const iconRect = icon.getBoundingClientRect(); 929 | popup.style.left = (iconRect.right + 10) + 'px'; 930 | popup.style.top = iconRect.top + 'px'; 931 | popup.style.display = 'block'; 932 | 933 | setTimeout(() => { 934 | if (popup.parentElement) popup.remove(); 935 | }, 15000); 936 | 937 | setTimeout(() => { 938 | const hidePopup = (e) => { 939 | if (!popup.contains(e.target) && !icon.contains(e.target)) { 940 | popup.remove(); 941 | document.removeEventListener('click', hidePopup); 942 | } 943 | }; 944 | document.addEventListener('click', hidePopup); 945 | }, 100); 946 | } 947 | 948 | applyCorrection(entryKey) { 949 | const errorData = this.errors.find(e => e.entry.key === entryKey); 950 | if (!errorData || !errorData.correctBibTeX) return; 951 | const newEntry = errorData.correctBibTeX; 952 | 953 | // Locate the entry key position in the editor nodes 954 | let walker = document.createTreeWalker(this.editor, NodeFilter.SHOW_TEXT); 955 | let nodes = [], fullText = '', keyIdx = -1, keyNodeIdx = -1, keyOffset = -1; 956 | while (true) { 957 | let node = walker.nextNode(); 958 | if (!node) break; 959 | nodes.push(node); 960 | } 961 | fullText = nodes.map(n => n.textContent).join(''); 962 | keyIdx = fullText.indexOf(entryKey); 963 | if (keyIdx !== -1) { 964 | // Find node and offset for entry key 965 | let count = 0; 966 | for (let i = 0; i < nodes.length; i++) { 967 | let nodeLen = nodes[i].textContent.length; 968 | if (count + nodeLen > keyIdx) { 969 | keyNodeIdx = i; 970 | keyOffset = keyIdx - count; 971 | break; 972 | } 973 | count += nodeLen; 974 | } 975 | // Expand range to cover the full BibTeX entry 976 | // Scan forward from keyIdx to find the opening '{' and then match braces to find closing '}' 977 | let entryStartIdx = fullText.lastIndexOf('@', keyIdx); 978 | let braceCount = 0, entryEndIdx = -1; 979 | let foundFirstBrace = false; 980 | for (let i = keyIdx - 1; i < fullText.length; i++) { 981 | if (fullText[i] === '{') { 982 | braceCount++; 983 | foundFirstBrace = true; 984 | } else if (fullText[i] === '}') { 985 | braceCount--; 986 | if (foundFirstBrace && braceCount === 0) { 987 | entryEndIdx = i; 988 | break; 989 | } 990 | } 991 | } 992 | if (entryStartIdx !== -1 && entryEndIdx !== -1) { 993 | // Map entryStartIdx and entryEndIdx to node/offsets 994 | let startNode = null, startOffset = 0, endNode = null, endOffset = 0; 995 | let count = 0; 996 | for (let i = 0; i < nodes.length; i++) { 997 | let nodeLen = nodes[i].textContent.length; 998 | if (!startNode && count + nodeLen > entryStartIdx) { 999 | startNode = nodes[i]; 1000 | startOffset = entryStartIdx - count; 1001 | } 1002 | if (!endNode && count + nodeLen > entryEndIdx) { 1003 | endNode = nodes[i]; 1004 | endOffset = entryEndIdx - count + 1; 1005 | break; 1006 | } 1007 | count += nodeLen; 1008 | } 1009 | if (startNode && endNode) { 1010 | // Select the entry across nodes 1011 | const range = document.createRange(); 1012 | range.setStart(startNode, startOffset); 1013 | range.setEnd(endNode, endOffset); 1014 | const sel = window.getSelection(); 1015 | sel.removeAllRanges(); 1016 | sel.addRange(range); 1017 | // Remove the old entry 1018 | document.execCommand('delete'); 1019 | // Insert the new entry at the same position 1020 | document.execCommand('insertText', false, newEntry); 1021 | } 1022 | } 1023 | } else { 1024 | // Fallback: use regex to find and replace 1025 | const content = this.getEditorContent(); 1026 | const entryRegex = new RegExp(`@\\w+\\s*\\{\\s*${entryKey}[^@]*?\\}`, 's'); 1027 | const updatedContent = content.replace(entryRegex, newEntry); 1028 | this.setEditorContent(updatedContent); 1029 | } 1030 | 1031 | // Simulate user input 1032 | const inputEvent = new Event('input', { bubbles: true }); 1033 | const changeEvent = new Event('change', { bubbles: true }); 1034 | this.editor.dispatchEvent(inputEvent); 1035 | this.editor.dispatchEvent(changeEvent); 1036 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove()); 1037 | this.checkBibTeX(); 1038 | } 1039 | 1040 | clearErrors() { 1041 | if (this.overlay) { 1042 | this.overlay.innerHTML = ''; 1043 | } 1044 | this.errors = []; 1045 | document.querySelectorAll('.bibtex-popup').forEach(p => p.remove()); 1046 | } 1047 | } 1048 | 1049 | // Initialize 1050 | const checker = new BibTeXChecker(); 1051 | window.bibTeXChecker = checker; // Make globally accessible 1052 | 1053 | if (document.readyState === 'loading') { 1054 | document.addEventListener('DOMContentLoaded', () => checker.init()); 1055 | } else { 1056 | checker.init(); 1057 | } 1058 | 1059 | })(); 1060 | --------------------------------------------------------------------------------