├── .gitignore ├── LICENSE.txt ├── README.md ├── requirements.txt ├── zettel_link_rewriter.ini └── zettel_link_rewriter.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # User-specific stuff 5 | .idea/**/workspace.xml 6 | .idea/**/tasks.xml 7 | .idea/**/usage.statistics.xml 8 | .idea/**/dictionaries 9 | .idea/**/shelf 10 | 11 | # Generated files 12 | .idea/**/contentModel.xml 13 | 14 | # Sensitive or high-churn files 15 | .idea/**/dataSources/ 16 | .idea/**/dataSources.ids 17 | .idea/**/dataSources.local.xml 18 | .idea/**/sqlDataSources.xml 19 | .idea/**/dynamic.xml 20 | .idea/**/uiDesigner.xml 21 | .idea/**/dbnavigator.xml 22 | 23 | # Gradle 24 | .idea/**/gradle.xml 25 | .idea/**/libraries 26 | 27 | # Gradle and Maven with auto-import 28 | # When using Gradle or Maven with auto-import, you should exclude module files, 29 | # since they will be recreated, and may cause churn. Uncomment if using 30 | # auto-import. 31 | # .idea/artifacts 32 | # .idea/compiler.xml 33 | # .idea/jarRepositories.xml 34 | .idea/modules.xml 35 | .idea/*.iml 36 | # .idea/modules 37 | # *.iml 38 | # *.ipr 39 | 40 | # CMake 41 | cmake-build-*/ 42 | 43 | # Mongo Explorer plugin 44 | .idea/**/mongoSettings.xml 45 | 46 | # File-based project format 47 | *.iws 48 | 49 | # IntelliJ 50 | out/ 51 | 52 | # mpeltonen/sbt-idea plugin 53 | .idea_modules/ 54 | 55 | # JIRA plugin 56 | atlassian-ide-plugin.xml 57 | 58 | # Cursive Clojure plugin 59 | .idea/replstate.xml 60 | 61 | # Crashlytics plugin (for Android Studio and IntelliJ) 62 | com_crashlytics_export_strings.xml 63 | crashlytics.properties 64 | crashlytics-build.properties 65 | fabric.properties 66 | 67 | # Editor-based Rest Client 68 | .idea/httpRequests 69 | 70 | # Android studio 3.1+ serialized cache file 71 | .idea/caches/build_file_checksums.ser 72 | 73 | # Ignore PyCharm misc 74 | .idea/misc.xml 75 | 76 | # Other .idea files 77 | .idea/inspectionProfiles/** 78 | .idea/vcs.xml 79 | 80 | # Ignore test files 81 | source/** 82 | dest/** 83 | writer.ini 84 | 85 | #Ignore Python venv 86 | venv/** 87 | .venv/** -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Balaji Dutt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Rewrite Wikilinks in your Zettelkasten as Markdown Links 2 | 3 | This repository contains a Python script that can take a folder of Markdown files (or any other compatible format) and 4 | convert any links that are in `[[wikilink]]` format into the standard Markdown link format. 5 | 6 | ### Why? 7 | Most Zettelkasten software today will handle linking between different notes by adding a link using the `[[wikilink]]` 8 | syntax. This is great for reading and writing within the Zettelkasten ecosystem but almost no standard Markdown renderer 9 | will automatically recognize the `[[wikilink]]` syntax. This script attempts to give you more inter-operability with 10 | other Markdown software by converting `[[wikilinks]]` into standard Markdown `[wikilink](wikilink)` syntax. 11 | 12 | ### Features 13 | - Fully cross-platform. Script was developed in a Windows environment and is extensively tested to work in 14 | Windows. 15 | - Provides a number of parameters to configure the script. Parameters can be specified either on the command-line or 16 | via a configuration file. Refer to [zettel_link_rewriter.ini](zettel_link_rewriter.ini) to see an example. 17 | - Handles links within Markdown code blocks correctly, i.e., does not rewrite them. This includes fenced code blocks, 18 | inline code snippets and code blocks indented with four spaces. Do note the [Caveats](#caveats) though. 19 | - Minimal dependencies. The script requires only one additional package to be installed (which is because Python's 20 | built-in `argparse` module is _terrible_.) 21 | 22 | ### Dependencies 23 | - Python 3.4 or higher (Script has only been tested with Python 3.8) 24 | - List of packages specified in `requirements.txt` 25 | 26 | ### Getting Started 27 | You can use this script by cloning the repo and installing Python and the script dependencies in a Python venv. 28 | ```shell 29 | git clone https://github.com/balaji-dutt/zettel-link-rewriter.git 30 | python -m venv .venv 31 | ./venv/scripts/activate 32 | pip install -r requirements.txt 33 | python zettel_link_rewriter.py 34 | ``` 35 | Running the script as shown above will make the script run with a set of built-in defaults. But you can configure 36 | the script either by supplying a list of parameters on the command-line: 37 | ```shell 38 | python zettel_link_rewriter.py -v debug -p all --target_files ./dest/ 39 | ``` 40 | 41 | Or you can configure the script by passing a path to a configuration file: 42 | 43 | ```shell 44 | python zettel_link_rewriter.py -c myconfig.ini 45 | ``` 46 | 47 | An explanation of the different parameters the script recognizes are provided below. 48 | 49 | ### Parameters 50 | 51 | |Parameter|Mandatory|Description| 52 | |---------|---------|-----------| 53 | |`-h`|No|Show a help message| 54 | |`-c` / `--config`|No|Specify path to Configuration file.
By default, the script will look for a configuration file named zettel_link_rewriter.ini in the same directory| 55 | |`-v`|No|Verbosity option. Configures the logging level for the script. You can specify 3 levels of verbosity - `warning/info/debug`. The default is `warning`| 56 | |`-f` / `--file`|No|Write log messages to a file instead of on the console.| 57 | |`--source_files`|No|Specify path to directory containing source markdown files to be processed.
Default is to use a "source" folder in the current directory.| 58 | |`--target_files`|No|Specify path to directory where processed markdown files should be saved.
Default is to use a "dest" folder in the current directory.
The folder where markdown files should be saved after processing will be created if it does not exist.| 59 | |`-p` / `--process`|No|Flag to tell the script whether it should process all files in the source directory or only receently modified files.
The parameter supports two values - `all` or `modified`| 60 | |`-m` / `--minutes`|No|Specify in minutes the time-limit for finding recently modified files. Can be used with `-p modified` option.
If this is not specified, the script will use a default value of `60` minutes.| 61 | 62 | 63 | ### Caveats 64 | - In order to avoid processing wikilinks inside code blocks, the script will ignore lines beginning with 4 spaces. 65 | However, this means that a wikilink in a list that is on the 3rd level or deeper will not be converted. In other words: 66 | ``` 67 | - [[Level 1 wikilink]] # Will be converted 68 | - [[Level 2 wikilink]] # Will be converted 69 | - [[Level 3 wikilink]] # Will *NOT BE* converted 70 | - Any wikilinks in this level or deeper will also not be converted. 71 | ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ConfigArgParse>=1.2.3 2 | regex>=2020.6.8 3 | -------------------------------------------------------------------------------- /zettel_link_rewriter.ini: -------------------------------------------------------------------------------- 1 | [Locations] 2 | ;source_files = 3 | ;target_files = 4 | 5 | [Config] 6 | ;verbosity = DEBUG 7 | ;process = all 8 | ;modified = 60 -------------------------------------------------------------------------------- /zettel_link_rewriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging 4 | import pathlib 5 | import configargparse 6 | import regex 7 | import time 8 | import sys 9 | 10 | # Configure logging early 11 | logging.basicConfig(level=logging.DEBUG, 12 | format="%(asctime)s %(levelname)-8s %(funcName)s():%(lineno)i: %(message)s", 13 | datefmt="%Y-%m-%d %H:%M:%S") 14 | 15 | 16 | def parse_config(): 17 | default_config = pathlib.Path(__file__).stem + ".ini" 18 | config = configargparse.ArgParser(default_config_files=[default_config]) 19 | config.add_argument('-c', '--config', is_config_file=True, 20 | help="Specify path to Configuration file. Default is {0}.ini".format( 21 | pathlib.Path(__file__).stem), metavar='CONFIG' 22 | ) 23 | config.add_argument('-v', '--verbosity', action='store', 24 | help="Specify logging level for script. Default is %(default)s.", 25 | choices=['warning', 'info', 'debug'], 26 | default='warning') 27 | config.add_argument('-f', '--file', action='store', 28 | help='Write log messages to a file', metavar='LOGFILE') 29 | config.add_argument('--source_files', action='store', 30 | help="Specify path to directory containing source markdown files. Default is to use a " 31 | "\"source\" folder in the current directory. ", 32 | default=pathlib.Path.joinpath(pathlib.Path(__file__).parent, "source"), 33 | metavar='DIRECTORY') 34 | config.add_argument('--target_files', action='store', 35 | help="Specify path to directory where processed markdown files should be saved. Default is to " 36 | "use a \"dest\" folder in the current directory. ", 37 | default=pathlib.Path.joinpath(pathlib.Path(__file__).parent, "dest"), metavar='DIRECTORY') 38 | config.add_argument('-p', '--process', action='store', 39 | help="Determine whether to process all source files or only recently modified files. Default " 40 | "is %(default)s.", 41 | choices=['all', 'modified'], 42 | default='all') 43 | config.add_argument('-m', '--modified', action='store', type=int, 44 | help="Specify in minutes what is the time limit for recently modified files. Default is " 45 | "%(default)s.", 46 | default=60, metavar='MINUTES') 47 | if len(sys.argv) == 1: 48 | print('Script is being executed without any parameters and will use built-in defaults. Re-run script with -h ' 49 | 'parameter to understand options available.') 50 | 51 | options = config.parse_known_args() 52 | # Convert tuple of parsed arguments into a dictionary. There are two values within this tuple. 53 | # [0] represents recognized arguments. [1] represents unrecognized arguments on command-line or config file. 54 | option_values = vars(options[0]) 55 | # Assign dictionary values to variables. 56 | config_file = option_values.get("config") 57 | source_files = option_values.get("source_files") 58 | target_files = option_values.get("target_files") 59 | logging_level = option_values.get("verbosity") 60 | log_file = option_values.get("file") 61 | process_type = option_values.get("process") 62 | modified_time = option_values.get("modified") 63 | 64 | # Reset logging levels as per config 65 | logger = logging.getLogger() 66 | logger.setLevel(logging_level.upper()) 67 | 68 | # Configure file-based logging 69 | if log_file is None: 70 | logging.debug("No log file set. All log messages will print to Console only") 71 | else: 72 | filelogger = logging.FileHandler("{0}".format(log_file)) 73 | filelogformatter = logging.Formatter( 74 | '%(asctime)s %(levelname)-8s %(funcName)s():%(lineno)i: %(message)s', 75 | datefmt="%Y-%m-%d %H:%M:%S") 76 | filelogger.setFormatter(filelogformatter) 77 | logger.addHandler(filelogger) 78 | logging.warning("Outputting to log file") 79 | 80 | # Check if specified config file exists else bail 81 | if config_file is None: 82 | config_file = default_config 83 | logging.debug("No configuration file specified. Using the default configuration file %s", default_config) 84 | elif pathlib.Path(config_file).exists(): 85 | logging.debug("Found configuration file %s", config_file) 86 | else: 87 | logging.exception('Did not find the specified configuration file %s', config_file) 88 | raise FileNotFoundError 89 | 90 | # Check if somehow modified_time is set to NIL when processing modified files. 91 | if process_type == 'modified' and not modified_time: 92 | raise ValueError("Script is set to process only recently modified files. But the modified time parameter is " 93 | "incorrectly defined.") 94 | 95 | # Print values of other parameters in debug mode 96 | if process_type == 'all' and modified_time: 97 | logging.debug("Script is set to process all files. Modified time parameter (if any) will have no effect.") 98 | elif process_type == 'modified' and modified_time: 99 | logging.debug("Script is set to only process files modified in last %s minutes", modified_time) 100 | 101 | return config_file, source_files, target_files, log_file, process_type, modified_time 102 | 103 | 104 | def check_dirs(source_dir, target_dir): 105 | """ 106 | Function to check if specified directories exist. The function will create the destination directory if it does 107 | exist. 108 | 109 | :param source_dir: Directory containing files to be processed. 110 | :param target_dir: Directory to store files after they are processed. 111 | :return: Directory paths 112 | """ 113 | if pathlib.Path(source_dir).exists(): 114 | pass 115 | elif source_dir == str(pathlib.Path.joinpath(pathlib.Path(__file__).parent, "source")) and pathlib.Path( 116 | source_dir).exists(): 117 | print('No source directory found in specified configuration file. Using default {} instead'.format(source_dir)) 118 | else: 119 | logging.exception('Did not find the directory %s', source_dir) 120 | raise NotADirectoryError 121 | 122 | if pathlib.Path(target_dir).exists(): 123 | pass 124 | elif target_dir == str(pathlib.Path.joinpath(pathlib.Path(__file__).parent, "dest")): 125 | print('No target directory found in specified configuration file. Using default {} instead'.format(target_dir)) 126 | pathlib.Path(target_dir).mkdir(exist_ok=True) 127 | # exist_ok=True will function like mkdir -p so there is no need to wrap this in a try-except block. 128 | else: 129 | print('Did not find the target directory {}. Will try create it now'.format(target_dir)) 130 | pathlib.Path(target_dir).mkdir(exist_ok=True) 131 | 132 | return [source_dir, target_dir] 133 | 134 | 135 | def modify_links(file_obj): 136 | """ 137 | Function will parse file contents (opened in utf-8 mode) and modify standalone [[wikilinks]] and in-line 138 | [[wikilinks]](wikilinks) into traditional Markdown link syntax. 139 | 140 | :param file_obj: Path to file 141 | :return: String containing modified text. Newlines will be returned as '\\n' in the string. 142 | """ 143 | 144 | file = file_obj 145 | logging.debug("Going to start processing %s.", file) 146 | try: 147 | with open(file, encoding="utf8") as infile: 148 | line = infile.read() 149 | # Read the entire file as a single string 150 | linelist = regex.sub(r"(?V1)" 151 | r"(?s)```.*?```(*SKIP)(*FAIL)(?-s)|(?s)`.*?`(*SKIP)(*FAIL)(?-s)" 152 | # Ignore fenced & inline code blocks. V1 engine allows in-line flags so 153 | # we enable newline matching only here. 154 | r"|(\ {4}|\t).*(*SKIP)(*FAIL)" 155 | # Ignore code blocks beginning with 4 spaces/1 tab 156 | r"|(\[\[(.*)\]\](?!\s\(|\())", r"[\3](\3.md)", line) 157 | # Finds references that are in style [[foo]] only by excluding links in style [[foo]](bar) or 158 | # [[foo]] (bar). Capture group $3 returns just foo 159 | linelist_final = regex.sub(r"(?V1)" 160 | r"(?s)```.*?```(*SKIP)(*FAIL)(?-s)|(?s)`.*?`(*SKIP)(*FAIL)(?-s)" 161 | # Refer comments above for this portion. 162 | r"|(\ {4}).*(*SKIP)(*FAIL)" 163 | # Ignore code blocks beginning with 4 spaces only. Tabs are not supported here 164 | # since Note-Link-Janitor uses tabs for inline references. 165 | r"|(\[\[(\d+)\]\](\s\(|\()(.*)(?=\))\))", r"[\3](\3 \5.md)", linelist) 166 | # Finds only references in style [[123]](bar) or [[123]] (bar). Capture group $3 returns 123 and capture 167 | # group $5 returns bar 168 | except EnvironmentError: 169 | logging.exception("Unable to open file %s for reading", file) 170 | logging.debug("Finished processing %s", file) 171 | return linelist_final 172 | 173 | 174 | def write_file(file_contents, file, target_dir): 175 | """ 176 | Function will take modified contents of file from modify_links() function and output to target directory. File 177 | extensions are preserved and file is written in utf-8 mode. 178 | 179 | :param file_contents: String containing modified text. 180 | :param file: Path to source file. Will be used to construct target file name. 181 | :param target_dir: Path to destination directory 182 | :return: Full path to file that was written to target directory. 183 | """ 184 | name = pathlib.Path(file).name 185 | fullpath = pathlib.Path(target_dir).joinpath(name) 186 | logging.debug("Going to write file %s now.", fullpath) 187 | try: 188 | with open(fullpath, 'w', encoding="utf8") as outfile: 189 | for item in file_contents: 190 | outfile.write("%s" % item) 191 | except EnvironmentError: 192 | logging.exception("Unable to write contents to %s", fullpath) 193 | 194 | logging.debug("Finished writing file %s now.", fullpath) 195 | return fullpath 196 | 197 | 198 | def process_files(source_dir, target_dir, process_type, modified_time): 199 | """ 200 | Function to process input files. Will operate in a loop on all files (process "all") 201 | or recently modified files (process "modified") 202 | 203 | :param source_dir: Path to directory containing files to be processed. 204 | :param target_dir: Path to directory where files should be written to after processing. 205 | :param process_type: Flag to process all or only modified files. 206 | :param modified_time: Time window for finding recently modified files. 207 | :return: Number of files processed. 208 | """ 209 | count = 0 210 | 211 | if process_type == 'all': 212 | logging.info("Start processing all files in %s", source_dir) 213 | for count, file in enumerate(pathlib.Path(source_dir).glob('*.*'), start=1): 214 | # We will not use iterdir() here since that will descend into sub-directories which may have 215 | # unexpected side-effects 216 | modified_text = modify_links(file) 217 | # Return values can only be obtained in the calling function and are captured by 218 | # calling the function while assigning to a variable. 219 | write_file(modified_text, file, target_dir) 220 | # writer_dummy(regex_dummy(file)) 221 | # Short-hand way of calling one function with the return value of another. 222 | elif process_type == 'modified': 223 | logging.info("Start processing recently modified files in %s", source_dir) 224 | for count, file in enumerate(pathlib.Path(source_dir).glob('*.*'), start=1): 225 | if pathlib.Path(file).stat().st_mtime > time.time() - modified_time * 60: 226 | modified_text = modify_links(file) 227 | write_file(modified_text, file, target_dir) 228 | logging.info("Finished processing all files in %s", source_dir) 229 | 230 | return count 231 | 232 | 233 | def main(): 234 | start_time = time.perf_counter() 235 | parameters = parse_config() 236 | check_dirs(source_dir=str(parameters[1]), target_dir=str(parameters[2])) 237 | count = process_files(source_dir=str(parameters[1]), target_dir=str(parameters[2]), process_type=parameters[4], 238 | modified_time=parameters[5]) 239 | end_time = time.perf_counter() 240 | elapsed_time = end_time - start_time 241 | hours, remainder = divmod(elapsed_time, 3600) 242 | minutes, seconds = divmod(remainder, 60) 243 | print("Script took {:02}:{:02}:{:02}".format(int(hours), int(minutes), int(seconds)), "to process {0} files" 244 | .format(count)) 245 | 246 | 247 | if __name__ == '__main__': 248 | main() 249 | --------------------------------------------------------------------------------