├── .gitignore ├── LICENSE ├── Readme.md ├── screenshot.png └── zettelcon.py /.gitignore: -------------------------------------------------------------------------------- 1 | private_* 2 | notes* 3 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 max (whateverforever) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # zettelcon 2 | 3 | > An external CLI tool for Zettlr note collections to **automatically add backlinks** to your note files. 4 | > Edits files in-place, so be careful and try on a copy of your files first. Run whenever you feel like an update would be worth it. 5 | > 6 | > Also check out [zettelwarmer](https://github.com/whateverforever/zettelwarmer) for finding new interconnections between your notes. 7 | 8 | ![](screenshot.png) 9 | 10 | ## Assumptions 11 | 12 | - Zettlr-standard wiki-links are used (`[[...]]`) 13 | - Single-line markdown syntax for headings is used (`# some heading`) 14 | - Note IDs are unique, also relative to the names of the notes 15 | - The backlink section is the last thing of a page 16 | - Two spaces are used for list indentation 17 | 18 | ## Features 19 | 20 | - Single python file, no dependencies, under 250 sloc 21 | - Can run multi-core for large collections 22 | - Supports any note ID syntax without explicit regex 23 | - Supports collections that contain more than one note ID style 24 | 25 | ``` 26 | $ python3 zettelcon.py --help 27 | usage: zettelcon.py [-h] -f FOLDER [-s SUFFIX] [-c] [-n NPROCS] [-ic] 28 | 29 | Tool to insert automatic backlinks into Zettlr note collections or other 30 | interlinked (markdown) files. 31 | 32 | optional arguments: 33 | -h, --help show this help message and exit 34 | -f FOLDER, --folder FOLDER 35 | Path to folder with all the zettels in it. 36 | -s SUFFIX, --suffix SUFFIX 37 | Suffix for the files to consider. Defaults to .md 38 | -c, --clear-backlinks 39 | Instead of generating backlinks, revert all files to a 40 | no-backlinks state 41 | -n NPROCS, --nprocs NPROCS 42 | Number of worker processes to run for file reading and 43 | writing. 44 | -ic, --ignore-cache Don't use zettelcon's cache, force writing to _all_ 45 | Zettel files (even the ones where backlinks haven't 46 | changed). 47 | ``` 48 | 49 | --- 50 | 51 | --- 52 | 53 | ## Future Work 54 | 55 | - [ ] Only cite a few words before and after the citation 56 | - [ ] Add horizontal break before backlinks 57 | - [ ] Output additional info such as islands, sinks, sources, etc. 58 | - [ ] Check out what happens if a file that links to another doesn't have a title 59 | - [x] Somehow reduce the number of files that are written 60 | - Zettlr takes quite a while to update its indices after the files get changed 61 | so it would be benefitial to reduce write operations only to files that actually 62 | get new backlinks 63 | - [x] Add option to clear all backlinks 64 | - [x] Make file writing multi core 65 | - [x] Add "last edited XXX" info field to markdown 66 | - [x] Do an analysis of computation complexity as a function of number of files (or links) 67 | - As expected, it's linear in the number of files 68 | - Doubling the cores increases performance by ~1.5x 69 | - Ca. 1500 notes per second dual core, 2300 n/s quad core 70 | ## Discarded Ideas 71 | 72 | - [ ] ~~Add option to commit everything to git before updating links~~ 73 | - Not needed, because you can simply define a shell function to do that 74 | - ```bash 75 | gen-backlinks() { 76 | cd $ZETTELPATH && git add -A && git commit -m "pre-zettelcon backup" ;\ 77 | python ~/...path.../zettelcon.py -f $ZETTELPATH 78 | } 79 | ``` -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whateverforever/zettelcon/a4fbd7d579c9a45bd8ae316edb370392bc918da0/screenshot.png -------------------------------------------------------------------------------- /zettelcon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import datetime 3 | import glob 4 | import os 5 | import pickle 6 | import re 7 | import textwrap 8 | import time 9 | from argparse import ArgumentParser 10 | from collections import defaultdict 11 | from multiprocessing import Pool 12 | from pprint import pformat 13 | 14 | BACKLINK_START = "## Backlinks" 15 | CACHEFILENAME = ".zettelcon_cache.pickle" 16 | # ASSUMES the standard zettlr wikilink syntax for links 17 | REX_LINK = re.compile(r"\[\[(?P.+?)\]\]") 18 | # ASSUMES the standard single-line hashtag syntax for titles 19 | REX_TITLE = re.compile(r"^#\s+(.+)") 20 | REX_LINECLEANER = re.compile(r"^\s*(\*|-|\+|\d+\.|>) (\[ \]|\[x\])? *") 21 | REX_TRAILINGNEWLINES = re.compile(r"(\n*)\Z", re.MULTILINE) 22 | 23 | NOWSTR = datetime.datetime.now().isoformat(sep=" ", timespec="seconds") 24 | 25 | 26 | def main(): 27 | parser = ArgumentParser( 28 | description="Tool to insert automatic backlinks into Zettlr note collections or other interlinked (markdown) files." 29 | ) 30 | parser.add_argument( 31 | "-f", 32 | "--folder", 33 | help="Path to folder with all the zettels in it.", 34 | required=True 35 | ) 36 | parser.add_argument( 37 | "-s", 38 | "--suffix", 39 | help="Suffix for the files to consider. Defaults to .md", 40 | default=".md", 41 | ) 42 | parser.add_argument( 43 | "-c", 44 | "--clear-backlinks", 45 | help="Instead of generating backlinks, revert all files to a no-backlinks state", 46 | action="store_true", 47 | ) 48 | parser.add_argument( 49 | "-n", 50 | "--nprocs", 51 | help="Number of worker processes to run for file reading and writing.", 52 | default=2, 53 | type=int, 54 | ) 55 | parser.add_argument( 56 | "-ic", 57 | "--ignore-cache", 58 | help="Don't use zettelcon's cache, force writing to _all_ Zettel files (even the ones where backlinks haven't changed).", 59 | action="store_true", 60 | ) 61 | 62 | args = parser.parse_args() 63 | params = vars(args) 64 | 65 | process_directory(**params) 66 | 67 | 68 | def process_directory( 69 | folder, suffix, nprocs, clear_backlinks=False, ignore_cache=False 70 | ): 71 | t_start = time.time() 72 | files = glob.glob(os.path.join(folder, f"**/*{suffix}"), recursive=True) 73 | 74 | pool = Pool(processes=nprocs) 75 | 76 | if clear_backlinks: 77 | pool.map(clear_backlinks_from_file, files) 78 | print("Cleared backlinks from all files") 79 | return 80 | 81 | links = [] 82 | res = pool.map(get_file_outlinks, files) 83 | for outlinks in res: 84 | links.extend(outlinks) 85 | links = change_ids_to_filepaths(links, files) 86 | 87 | bundled_links_current = bundle_backlinks_per_targetfile(links) 88 | bundled_links_to_write = {**bundled_links_current} 89 | 90 | cachefile = os.path.join(folder, CACHEFILENAME) 91 | 92 | if not ignore_cache and os.path.isfile(cachefile): 93 | with open(cachefile, "rb") as fh: 94 | bundled_links_cached = pickle.load(fh) 95 | 96 | for targetfile, links_current in bundled_links_current.items(): 97 | links_cached = None 98 | if targetfile in bundled_links_cached: 99 | links_cached = bundled_links_cached[targetfile] 100 | 101 | if links_cached == links_current: 102 | del bundled_links_to_write[targetfile] 103 | 104 | with open(cachefile, "wb") as fh: 105 | pickle.dump(bundled_links_current, fh) 106 | 107 | unreferenced_files = set(files) - set(bundled_links_current.keys()) 108 | pool.map(clear_backlinks_from_file, unreferenced_files) 109 | 110 | print(f"\nFound {len(unreferenced_files)} files with no links to them") 111 | for file in sorted(unreferenced_files): 112 | print(f" - {os.path.basename(file)}") 113 | 114 | print(f"\nUpdating {len(bundled_links_to_write)} files in place...") 115 | 116 | if len(bundled_links_to_write) == 0: 117 | print(" - No new links to write.") 118 | 119 | for target in bundled_links_to_write.keys(): 120 | print(" - Updating {}".format(os.path.basename(target))) 121 | 122 | pool.map(write_backlinks_to_file, bundled_links_to_write.values()) 123 | 124 | t_end = time.time() 125 | duration = t_end - t_start 126 | print( 127 | f"\nWrote backlinks to {len(bundled_links_to_write)} files in {duration:.3f}s" 128 | ) 129 | print(NOWSTR) 130 | 131 | 132 | def clear_backlinks_from_file(filepath): 133 | write_backlink_section_to_file("", filepath) 134 | 135 | 136 | def bundle_backlinks_per_targetfile(links): 137 | """ 138 | Takes a list of backlinks that contain metadata about source and target file. 139 | Returns a dict that maps target file names to backlinks which point to it. 140 | """ 141 | backlinks_for_file = defaultdict(list) 142 | 143 | for link_i in links: 144 | filename = link_i["link_target"] 145 | backlinks_for_file[filename].append(link_i) 146 | 147 | return backlinks_for_file 148 | 149 | 150 | def write_backlinks_to_file(backlinks): 151 | """ 152 | ASSUMES all the backlinks point to the same file 153 | """ 154 | 155 | target_file = backlinks[0]["link_target"] 156 | backlinks_by_src = defaultdict(list) 157 | 158 | for backlink in backlinks: 159 | backlinks_by_src[backlink["link_source"]].append(backlink) 160 | 161 | entries = [] 162 | for source_file, src_backlinks in backlinks_by_src.items(): 163 | source_file_title = src_backlinks[0]["link_source_title"] 164 | source_file_relative = os.path.relpath( 165 | source_file, start=os.path.dirname(target_file) 166 | ) 167 | entry = "> - [{}]({})\n".format(source_file_title, source_file_relative) 168 | 169 | for backlink in src_backlinks: 170 | # ASSUMES two spaces are used for list indentation 171 | entry += "> - {}\n".format(backlink["link_context"]) 172 | 173 | entries.append(entry) 174 | 175 | backlink_section = f"{BACKLINK_START}\n\n" 176 | backlink_section += "> \n".join(entries) 177 | 178 | backlink_section += f"\n_Backlinks last generated {NOWSTR}_\n" 179 | 180 | write_backlink_section_to_file(backlink_section, target_file) 181 | 182 | 183 | def write_backlink_section_to_file(section_text, filepath): 184 | with open(filepath, "r", encoding="utf-8") as fh: 185 | contents = fh.read() 186 | 187 | with open(filepath, "w", encoding="utf-8") as fh: 188 | try: 189 | backlink_sec_idx = contents.index(BACKLINK_START) 190 | except ValueError: 191 | # no backlink section in file 192 | backlink_sec_idx = None 193 | 194 | main_content = contents[:backlink_sec_idx] 195 | res = REX_TRAILINGNEWLINES.search(main_content) 196 | 197 | num_existing_newlines = len(res.group(1)) 198 | num_needed_newlines = max(2 - num_existing_newlines, 0) 199 | 200 | backlink_section = "\n" * num_needed_newlines 201 | backlink_section += section_text 202 | 203 | # ASSUMES backlink section is last part of page 204 | contents_backlinked = main_content + backlink_section 205 | fh.write(contents_backlinked) 206 | 207 | 208 | def change_ids_to_filepaths(links, all_filenames): 209 | out = [] 210 | 211 | for entry in links: 212 | target_candidates = [] 213 | 214 | for filename in all_filenames: 215 | if entry["link_target"] in filename: 216 | target_candidates.append(filename) 217 | 218 | # ASSUMES note IDs are unique, also among rest of file names 219 | if len(target_candidates) == 1: 220 | entry["link_target_orig"] = entry["link_target"] 221 | entry["link_target"] = target_candidates[0] 222 | out.append(entry) 223 | elif len(target_candidates) == 0: 224 | print( 225 | "\nTARGET '{}' NOT FOUND (linked from {})".format( 226 | entry["link_target"], os.path.basename(entry["link_source"]) 227 | ) 228 | ) 229 | print(" - {}".format(textwrap.fill(entry["link_context"]))) 230 | elif len(target_candidates) > 1: 231 | print( 232 | "\nMULTIPLE TARGETS FOUND FOR {}: {}".format( 233 | entry, pformat(target_candidates) 234 | ) 235 | ) 236 | pass # multiple targets found 237 | 238 | return out 239 | 240 | 241 | def get_file_outlinks(path): 242 | with open(path, "r", encoding="utf-8") as fh: 243 | contents = fh.read() 244 | 245 | paragraphs = [para.strip() for para in contents.split("\n")] 246 | 247 | outlinks = [] 248 | first_header = "" 249 | 250 | for para in paragraphs: 251 | reached_backlink_section = BACKLINK_START in para 252 | if reached_backlink_section: 253 | break 254 | 255 | if first_header == "": 256 | res = REX_TITLE.match(para) 257 | if res: 258 | first_header = res.group(1) 259 | 260 | links = find_links_in_text(para) 261 | links = [ 262 | {"link_source_title": first_header, "link_source": path, **entry} 263 | for entry in links 264 | ] 265 | outlinks.extend(links) 266 | 267 | return outlinks 268 | 269 | 270 | def find_links_in_text(paragraph): 271 | clean_para = REX_LINECLEANER.sub("", paragraph) 272 | 273 | out = [] 274 | for res in REX_LINK.finditer(paragraph): 275 | link = { 276 | "link_target": res.group("linktarget"), 277 | "link_context": clean_para, 278 | "context_pos_start": res.start(), 279 | "context_pos_end": res.end(), 280 | } 281 | 282 | out.append(link) 283 | 284 | return out 285 | 286 | 287 | if __name__ == "__main__": 288 | main() 289 | --------------------------------------------------------------------------------