├── img ├── readme_screenshots.png ├── readme_screenshot_before.md └── readme_screenshot_after.md ├── .claude ├── settings.local.json └── CLAUDE.md ├── run_normalization.py ├── src └── zettelkasten_normalizer │ ├── __init__.py │ ├── config.py │ ├── file_operations.py │ ├── utils.py │ ├── normalization_zettel.py │ ├── link_processor.py │ ├── frontmatter_parser.py │ └── yfm_processor.py ├── setup.py ├── .gitignore ├── README.md └── tests └── test_normalization_zettel.py /img/readme_screenshots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmatsuzaki/note-normalization-for-zettelkasten/HEAD/img/readme_screenshots.png -------------------------------------------------------------------------------- /.claude/settings.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "permissions": { 3 | "allow": [ 4 | "Bash(mkdir:*)", 5 | "Bash(mv:*)", 6 | "Bash(python -m pytest test_normalization_zettel.py -v)", 7 | "Bash(python:*)", 8 | "Bash(ls:*)", 9 | "Bash(cat:*)", 10 | "Bash(rm:*)", 11 | "Bash(find:*)", 12 | "Bash(tree:*)" 13 | ], 14 | "deny": [] 15 | } 16 | } -------------------------------------------------------------------------------- /run_normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Entry point for running the normalization script from the project root. 4 | """ 5 | 6 | import sys 7 | import os 8 | 9 | # Add the src directory to the Python path 10 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) 11 | 12 | # Import and run the main normalization script 13 | from zettelkasten_normalizer.normalization_zettel import main 14 | 15 | if __name__ == "__main__": 16 | main() -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Zettelkasten Note Normalization Package 3 | 4 | This package provides tools for normalizing Markdown notes in Zettelkasten systems. 5 | """ 6 | 7 | __version__ = "1.0.0" 8 | __author__ = "jMatsuzaki" 9 | 10 | # Import main functions for easier access 11 | from .yfm_processor import check_and_create_yfm 12 | from .link_processor import rename_notes_with_links, rename_images_with_links 13 | from .file_operations import get_files 14 | from .utils import setup_logger, query_yes_no -------------------------------------------------------------------------------- /img/readme_screenshot_before.md: -------------------------------------------------------------------------------- 1 | Front Matter will be added to the beginning of the file. 2 | 3 | ## Adding Yaml Front Matter 4 | 5 | All headings and body text will be retained, including line breaks and text decoration. 6 | 7 | Note files in subdirectories will be moved to the Root directory (excluding image files). 8 | 9 | The note in the Inbox will be `draft: true` in Yaml Front Matter. The default is `draft: false`. 10 | 11 | ## Replace file name and links 12 | 13 | The file name of all notes and images will be replaced by the UUID (41239e884b89465aa206a1cbc24e7166.md). 14 | 15 | And the link will be replaced. Wikilink will be replaced by Markdown link. The link text will retain 16 | the original text; if an Alias is set, the Alias will be adopted as the link text. As shown below: 17 | 18 | [[linked_file]] 19 | [[linked_file_with_alias | alias text]] 20 | ![[image_link.png]] 21 | 22 | Markdown link will only replace the link to the note. 23 | 24 | [markdown_link](markdown_link.md) 25 | 26 | ## Moving Hashtags 27 | 28 | ### All hashtags to Yaml Front Matter 29 | 30 | All headings will remain intact. Headlines are not considered hashtags. 31 | 32 | The hashtag will be moved to Yaml Front Matter and the original hashtag will have its line removed. 33 | 34 | tags: #hashtag1 #hashtag2 #hashtag3 35 | -------------------------------------------------------------------------------- /img/readme_screenshot_after.md: -------------------------------------------------------------------------------- 1 | --- 2 | uid: 9787f826661647389d0ed859f9945d7d 3 | title: readme_screenshot_after 4 | aliases: [] 5 | date: 2025-09-07 05:40:19 6 | update: 2025-09-07 05:41:42 7 | tags: [hashtag1, hashtag2, hashtag3] 8 | draft: false 9 | --- 10 | 11 | Front Matter will be added to the beginning of the file. 12 | 13 | ## Adding Yaml Front Matter 14 | 15 | All headings and body text will be retained, including line breaks and text decoration. 16 | 17 | Note files in subdirectories will be moved to the Root directory (excluding image files). 18 | 19 | The note in the Inbox will be `draft: true` in Yaml Front Matter. The default is `draft: false`. 20 | 21 | ## Replace file name and links 22 | 23 | The file name of all notes and images will be replaced by the UUID (41239e884b89465aa206a1cbc24e7166.md). 24 | 25 | And the link will be replaced. Wikilink will be replaced by Markdown link. The link text will retain 26 | the original text; if an Alias is set, the Alias will be adopted as the link text. As shown below: 27 | 28 | [linked_file](linked_file.md) 29 | [alias text](linked_file_with_alias.md) 30 | ![image_link.png](image_link.png.md) 31 | 32 | Markdown link will only replace the link to the note. 33 | 34 | [markdown_link](markdown_link.md) 35 | 36 | ## Moving Hashtags 37 | 38 | ### All hashtags to Yaml Front Matter 39 | 40 | All headings will remain intact. Headlines are not considered hashtags. 41 | 42 | The hashtag will be moved to Yaml Front Matter and the original hashtag will have its line removed. 43 | 44 | tags: #hashtag1 #hashtag2 #hashtag3 45 | -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration settings for Zettelkasten note normalization. 3 | """ 4 | 5 | # Directory and file settings 6 | INBOX_DIR = [ 7 | "Inbox", 8 | "Draft", 9 | "Pending", 10 | ] # The files in this folder will have the YFM draft key set to true 11 | 12 | EXCLUDE_DIR = { 13 | "Backup", 14 | "Template", 15 | "tmp", 16 | "node_modules", 17 | } # Folders not to be processed (Hidden folders and files are not covered by default) 18 | 19 | EXCLUDE_FILE = { 20 | "tags" 21 | } # Files not to be processed (Hidden folders and files are not covered by default) 22 | 23 | # File extension settings 24 | NOTE_EXT = [".md", ".txt"] # Note file extension 25 | IMG_EXT = [".png", ".jpg", ".jpeg", ".svg", ".gif"] # image file extension 26 | 27 | # YFM default settings 28 | YFM = { 29 | "uid": "", # It will be replaced by the uuid 30 | "title": "", # It will be replaced by the file name 31 | "aliases": "[]", 32 | "date": "", # Replaced by the file creation date 33 | "update": "", # Replaced by the file modification date 34 | "tags": "[]", # If you have a hashtag, it will be generated automatically 35 | "draft": "false", # The following note will be true for the folder specified as INBOX_DIR 36 | } 37 | 38 | # Front matter format settings 39 | FRONT_MATTER_FORMAT = "yaml" # Supported formats: "yaml", "toml", "json" 40 | 41 | # Function execution settings 42 | EXECUTION_FUNCTION_LIST = { 43 | "function_create_yfm": True, # If there is no Yaml FrontMatter at the beginning of the note, it will be generated 44 | "function_rename_notes": True, # Replace the file name of the note with the UID and replace the linked parts from other notes 45 | "function_rename_images": True, # Replace the file name of the image with the UID and replace the linked part from the other note 46 | "function_convert_wikilinks": True, # Convert WikiLinks [[link]] to Markdown links [link](link.md) 47 | } 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Setup script for Zettelkasten Note Normalization Tool 4 | """ 5 | 6 | from setuptools import setup, find_packages 7 | import os 8 | 9 | # Read version from __init__.py 10 | here = os.path.abspath(os.path.dirname(__file__)) 11 | init_path = os.path.join(here, 'src', 'zettelkasten_normalizer', '__init__.py') 12 | version = "0.0.0" # Default version if not found 13 | with open(init_path, 'r', encoding='utf-8') as f: 14 | for line in f: 15 | if line.startswith('__version__'): 16 | version = line.split('=')[1].strip().strip('"\'') 17 | break 18 | 19 | # Read README 20 | readme_path = os.path.join(here, 'README.md') 21 | if os.path.exists(readme_path): 22 | with open(readme_path, 'r', encoding='utf-8') as f: 23 | long_description = f.read() 24 | else: 25 | long_description = "A tool for normalizing Markdown notes for Zettelkasten systems" 26 | 27 | setup( 28 | name="zettelkasten-normalizer", 29 | version=version, 30 | author="jMatsuzaki", 31 | author_email="", 32 | description="A tool for normalizing Markdown notes for Zettelkasten systems", 33 | long_description=long_description, 34 | long_description_content_type="text/markdown", 35 | url="https://github.com/jmatsuzaki/note-normalization-for-zettelkasten", 36 | package_dir={"": "src"}, 37 | packages=find_packages(where="src"), 38 | classifiers=[ 39 | "Development Status :: 4 - Beta", 40 | "Intended Audience :: End Users/Desktop", 41 | "License :: OSI Approved :: MIT License", 42 | "Operating System :: OS Independent", 43 | "Programming Language :: Python :: 3", 44 | "Programming Language :: Python :: 3.9", 45 | "Programming Language :: Python :: 3.10", 46 | "Programming Language :: Python :: 3.11", 47 | "Programming Language :: Python :: 3.12", 48 | "Topic :: Text Processing :: Markup", 49 | "Topic :: Office/Business :: Groupware", 50 | ], 51 | python_requires=">=3.9", 52 | entry_points={ 53 | "console_scripts": [ 54 | "zettelkasten-normalizer=zettelkasten_normalizer.normalization_zettel:main", 55 | ], 56 | }, 57 | install_requires=[ 58 | # No external dependencies - uses only standard library 59 | ], 60 | extras_require={ 61 | "dev": [ 62 | "pytest>=6.0", 63 | "black", 64 | "flake8", 65 | "mypy", 66 | ], 67 | }, 68 | ) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | test_data/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # ctags 133 | tags 134 | -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/file_operations.py: -------------------------------------------------------------------------------- 1 | """ 2 | File operations for Zettelkasten note normalization. 3 | """ 4 | 5 | import os 6 | import uuid 7 | import re 8 | import logging 9 | from .config import EXCLUDE_DIR, EXCLUDE_FILE, NOTE_EXT, IMG_EXT 10 | from .utils import get_file_name 11 | 12 | # Get logger 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def get_files(start_path, type): 17 | """Retrieves a file of the specified path and type""" 18 | files = [] 19 | if os.path.isfile(start_path): 20 | if check_note_type(start_path, type): 21 | files.append(start_path) 22 | else: 23 | # get all files 24 | for pathname, dirnames, filenames in os.walk(start_path, topdown=True): 25 | # exclude dir and files 26 | dirnames[:] = list(filter(lambda d: not d in EXCLUDE_DIR, dirnames)) 27 | filenames[:] = list(filter(lambda f: not f in EXCLUDE_FILE, filenames)) 28 | dirnames[:] = list( 29 | filter(lambda d: not d[0] == ".", dirnames) 30 | ) # Hidden directory beginning with "." 31 | filenames[:] = list( 32 | filter(lambda f: not f[0] == ".", filenames) 33 | ) # Hidden files beginning with "." 34 | for filename in filenames: 35 | file_path = os.path.join(pathname, filename) 36 | if check_note_type(file_path, type): 37 | # append target notes to array 38 | files.append(file_path) 39 | return files 40 | 41 | 42 | def check_note_type(file_path, type): 43 | """Check if the specified file has an extension of the specified type""" 44 | if type == "note": 45 | target_ext = tuple(NOTE_EXT) 46 | elif type == "image": 47 | target_ext = tuple(IMG_EXT) 48 | # Filtering files 49 | if file_path.endswith(target_ext): 50 | return True 51 | else: 52 | return False 53 | 54 | 55 | def check_note_has_uid(file): 56 | """Check if a note file already has a UID as filename""" 57 | file_title = get_file_name(file)[1] 58 | return re.match("^[a-f0-9]{32}$", file_title) 59 | 60 | 61 | def get_new_filepath_with_uid(file, root_path): 62 | """get new filepath with uid""" 63 | # UID is UUID v4 (32-digit hexadecimal without hyphens) 64 | uid = uuid.uuid4().hex 65 | ext = get_file_name(file)[2] 66 | # Target path to check for duplicate UID 67 | if ext == ".md": 68 | path = root_path 69 | else: 70 | path = os.path.dirname(file) 71 | # Generate new UUID if duplicated (very unlikely but possible) 72 | while os.path.exists(build_filepath_by_uid(uid, path, ext)): 73 | uid = uuid.uuid4().hex 74 | return build_filepath_by_uid(uid, path, ext) 75 | 76 | 77 | def build_filepath_by_uid(uid, path, ext=".md"): 78 | """Build file path using UID""" 79 | return path + "/" + str(uid) + ext -------------------------------------------------------------------------------- /.claude/CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Overview 6 | 7 | This is a modular Python tool for normalizing Markdown notes for Zettelkasten systems. The tool processes notes to add front matter (YAML, TOML, or JSON), rename files with UIDs, and convert WikiLinks to Markdown links. 8 | 9 | ### Project Structure 10 | 11 | The codebase follows the standard Python `src` layout: 12 | 13 | ``` 14 | . 15 | ├── src/ 16 | │ └── zettelkasten_normalizer/ 17 | │ ├── __init__.py 18 | │ ├── config.py # Configuration settings 19 | │ ├── utils.py # Utility functions 20 | │ ├── file_operations.py # File discovery and validation 21 | │ ├── frontmatter_parser.py # Front matter parsing (YAML/TOML/JSON) 22 | │ ├── yfm_processor.py # Front Matter processing 23 | │ ├── link_processor.py # Link substitution and file renaming 24 | │ └── normalization_zettel.py # Main entry point 25 | ├── tests/ 26 | │ └── test_normalization_zettel.py # Comprehensive test suite 27 | ├── run_normalization.py # Command line entry point 28 | └── setup.py # Package configuration 29 | ``` 30 | 31 | ## Running the Tool 32 | 33 | ### Basic Usage 34 | 35 | ```bash 36 | python run_normalization.py /path/to/your/zettelkasten_root_folder 37 | ``` 38 | 39 | ### With Options 40 | 41 | ```bash 42 | # Target specific folder/file 43 | python run_normalization.py /path/to/root -t /path/to/target 44 | 45 | # Use TOML front matter 46 | python run_normalization.py /path/to/root -f toml 47 | 48 | # Use JSON front matter 49 | python run_normalization.py /path/to/root -f json 50 | 51 | # Skip front matter processing (only rename files) 52 | python run_normalization.py /path/to/root --skip-frontmatter 53 | 54 | # Only process front matter (skip renaming) 55 | python run_normalization.py /path/to/root --skip-rename-notes --skip-rename-images 56 | 57 | # Auto-answer yes to all prompts 58 | python run_normalization.py /path/to/root -y 59 | 60 | # Combine options 61 | python run_normalization.py /path/to/root -t /path/to/target -f toml --skip-rename-images -y 62 | ``` 63 | 64 | ### Git Hook Integration 65 | 66 | For automated processing of changed files: 67 | 68 | ```bash 69 | # In .git/hooks/pre-commit 70 | git diff --cached --name-status | grep -e "^M" -e "^A" | while read a b; do 71 | python /path/to/run_normalization.py /path/to/zettelkasten_root -t "$b" -y 72 | git add "$b" 73 | done 74 | ``` 75 | 76 | ## Configuration 77 | 78 | ### Key Settings (lines 18-39 in normalization_zettel.py) 79 | 80 | - `INBOX_DIR`: Folders where files get `draft: true` in YAML front matter 81 | - `EXCLUDE_DIR`: Folders to skip during processing 82 | - `EXCLUDE_FILE`: Files to skip during processing 83 | - `NOTE_EXT`: Supported note file extensions (.md, .txt) 84 | - `IMG_EXT`: Supported image extensions (.png, .jpg, .jpeg, .svg, .gif) 85 | 86 | ### Function Controls (lines 35-39) 87 | 88 | - `function_create_yfm`: Add/update YAML front matter 89 | - `function_rename_notes`: Rename notes to UUID and update links 90 | - `function_rename_images`: Rename images to UUID and update links 91 | 92 | ## Architecture 93 | 94 | ### Core Functions 95 | 96 | - `check_and_create_yfm()`: Creates/updates YAML front matter with title, date, tags, etc. 97 | - `rename_notes_with_links()`: Renames note files to UUID format and updates all references 98 | - `rename_images_with_links()`: Renames image files to UUID format and updates all references 99 | - `substitute_wikilinks_to_markdown_links()`: Converts `[[wikilinks]]` to `[markdown](links)` 100 | 101 | ### File Processing Flow 102 | 103 | 1. Parse command line arguments for root path and options 104 | 2. Collect target files based on extensions and exclusion rules 105 | 3. Process YAML front matter (if enabled) 106 | 4. Rename notes to UUID and update links (if enabled) 107 | 5. Rename images to UUID and update links (if enabled) 108 | 6. Log all operations to `normalization_zettel.log` 109 | 110 | ### UUID Generation 111 | 112 | - Uses UUID v4 (32-character hex without hyphens) 113 | - Automatically checks for duplicates and regenerates if needed 114 | - Markdown files move to root directory, images stay in place 115 | 116 | ## Requirements 117 | 118 | - Python 3.9.1 or above 119 | - Standard library only (no external dependencies) 120 | - Cross-platform support: Windows, macOS, and Linux 121 | 122 | ## Testing 123 | 124 | The tool includes a comprehensive test suite with automated tests covering: 125 | 126 | - Cross-platform compatibility (line endings, file operations) 127 | - Multiple front matter formats (YAML, TOML, JSON) 128 | - Unicode handling and file encoding 129 | - All core functionality modules 130 | 131 | Run tests with: 132 | 133 | ```bash 134 | python tests/test_normalization_zettel.py 135 | ``` 136 | 137 | It's still recommended to test on a copy of your Zettelkasten before running on production data. 138 | -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for Zettelkasten note normalization. 3 | """ 4 | 5 | import os 6 | import datetime 7 | import platform 8 | import unicodedata 9 | import logging 10 | import sys 11 | from logging import Formatter 12 | from logging.handlers import RotatingFileHandler 13 | 14 | 15 | def setup_logger(log_dir): 16 | """setup logger""" 17 | if os.path.isdir(log_dir): 18 | # Normalize path for cross-platform compatibility 19 | log_dir = normalize_path(log_dir) 20 | else: 21 | print("The specified root folder does not exist") 22 | print("Abort the process") 23 | print("You can see how to use it with the -h option") 24 | sys.exit() 25 | log_file_format = "%(asctime)s [%(levelname)s] %(message)s" 26 | log_console_format = "%(message)s" 27 | # main logger 28 | logger = logging.getLogger(__name__) 29 | # console handler 30 | console_handler = logging.StreamHandler() 31 | console_handler.setLevel(logging.INFO) 32 | console_handler.setFormatter(Formatter(log_console_format)) 33 | # logger.addHandler(console_handler) 34 | 35 | # Create log file path with proper separator 36 | log_file_path = os.path.join(log_dir, "normalization_zettel.log") 37 | file_handler = RotatingFileHandler( 38 | log_file_path, maxBytes=1000000, backupCount=3 39 | ) 40 | file_handler.setLevel(logging.DEBUG) 41 | file_handler.setFormatter(Formatter(log_file_format)) 42 | 43 | # common config 44 | logging.basicConfig( 45 | level=logging.DEBUG, 46 | format="%(asctime)s [%(levelname)s] %(message)s", 47 | handlers=[console_handler, file_handler], 48 | ) 49 | return logger 50 | 51 | 52 | def get_file_name(file_path): 53 | """Retrieves a file name from the specified path. The format of the return value is as below: 54 | ('filename.ext', 'filename', '.ext')""" 55 | fullname = unicodedata.normalize("NFC", os.path.basename(file_path)) 56 | name = os.path.splitext(fullname)[0] 57 | ext = os.path.splitext(fullname)[1] 58 | return (fullname, name, ext) 59 | 60 | 61 | def get_dir_name(file_path): 62 | """Retrieves a folder name from the specified path. The format of the return value is as below: 63 | ('fullpath', 'basepath')""" 64 | fullpath = unicodedata.normalize("NFC", os.path.dirname(file_path)) 65 | basepath = os.path.basename(fullpath) 66 | return (fullpath, basepath) 67 | 68 | 69 | def format_date(unix_time): 70 | """format unix time to %Y-%m-%d %H:%M:%S""" 71 | date_value = datetime.datetime.fromtimestamp(unix_time) 72 | return date_value.strftime("%Y-%m-%d %H:%M:%S") 73 | 74 | 75 | def format_uid_from_date(unix_time): 76 | """format unix time to yyyymmddhhmmss""" 77 | date_value = datetime.datetime.fromtimestamp(unix_time) 78 | return date_value.strftime("%Y%m%d%H%M%S") 79 | 80 | 81 | def get_creation_date(file): 82 | """Try to get the date that a file was created, falling back to when it was 83 | last modified if that isn't possible.""" 84 | if platform.system() == "Windows": 85 | return os.path.getctime(file) 86 | else: 87 | stat = os.stat(file) 88 | try: 89 | return stat.st_birthtime 90 | except AttributeError: 91 | # On Linux, the file creation date is not available, so use the modification date 92 | return stat.st_mtime 93 | 94 | 95 | def get_modification_date(unix_time): 96 | """try to get the date that a file was changed""" 97 | return os.path.getmtime(unix_time) 98 | 99 | 100 | def query_yes_no(question, default="yes"): 101 | """Ask a yes/no question""" 102 | # Acceptable responses 103 | valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} 104 | # set default Value 105 | if default is None: 106 | prompt = " [y/n] " 107 | elif default == "yes": 108 | prompt = " [Y/n] " 109 | elif default == "no": 110 | prompt = " [y/N] " 111 | else: 112 | raise ValueError("invalid default answer: '%s'" % default) 113 | # check input process 114 | while True: 115 | sys.stdout.write(question + prompt) 116 | choice = input().lower() 117 | if default is not None and choice == "": 118 | return valid[default] 119 | elif choice in valid: 120 | return valid[choice] 121 | else: 122 | sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').") 123 | 124 | 125 | def normalize_line_endings(content): 126 | """Normalize line endings to Unix style (LF) regardless of platform""" 127 | # Replace Windows CRLF and old Mac CR with Unix LF 128 | content = content.replace('\r\n', '\n') # Windows CRLF -> LF 129 | content = content.replace('\r', '\n') # Old Mac CR -> LF 130 | return content 131 | 132 | 133 | def read_file_cross_platform(file_path, encoding='utf-8'): 134 | """Read file with cross-platform line ending normalization""" 135 | try: 136 | with open(file_path, 'r', encoding=encoding, newline='') as f: 137 | content = f.read() 138 | # Normalize line endings 139 | return normalize_line_endings(content) 140 | except UnicodeDecodeError: 141 | # Fallback to different encoding if UTF-8 fails 142 | with open(file_path, 'r', encoding='latin-1', newline='') as f: 143 | content = f.read() 144 | return normalize_line_endings(content) 145 | 146 | 147 | def write_file_cross_platform(file_path, content, encoding='utf-8'): 148 | """Write file with cross-platform line ending handling""" 149 | # Ensure content uses Unix line endings 150 | content = normalize_line_endings(content) 151 | 152 | # On Windows, let Python handle the conversion to CRLF automatically 153 | # On Unix systems, keep LF as is 154 | with open(file_path, 'w', encoding=encoding, newline='\n') as f: 155 | f.write(content) 156 | 157 | 158 | def get_platform_path_separator(): 159 | """Get the correct path separator for the current platform""" 160 | return os.sep 161 | 162 | 163 | def normalize_path(path): 164 | """Normalize path separators for the current platform""" 165 | return os.path.normpath(path) -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/normalization_zettel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Zettelkasten Note Normalization Tool 4 | 5 | This tool normalizes Markdown notes for Zettelkasten systems by: 6 | - Adding YAML front matter 7 | - Renaming files with UIDs 8 | - Converting WikiLinks to Markdown links 9 | 10 | Author: jMatsuzaki 11 | Repository: https://github.com/jmatsuzaki/note-normalization-for-zettelkasten 12 | """ 13 | 14 | import sys 15 | import os 16 | import argparse 17 | 18 | # Import our modules 19 | from .config import EXECUTION_FUNCTION_LIST 20 | from .utils import setup_logger, query_yes_no 21 | from .file_operations import get_files 22 | from .yfm_processor import check_and_create_yfm 23 | from .link_processor import rename_notes_with_links, rename_images_with_links, convert_wikilinks_to_markdown 24 | 25 | 26 | def parse_arguments(): 27 | """Parse command line arguments""" 28 | parser = argparse.ArgumentParser( 29 | description="This program will normalize Markdown notes for Zettelkasten", 30 | epilog="This program will add Front Matter, add UIDs and rename files, replace Wikilink with Markdown link, etc.\nFurther details can be found in the repository. See below:\n\nhttps://github.com/jmatsuzaki/note-normalization-for-zettelkasten", 31 | formatter_class=argparse.RawTextHelpFormatter, 32 | ) 33 | parser.add_argument("root", help="Zettelkasten's root folder") 34 | parser.add_argument("-t", "--target", help="normalization target folder or file") 35 | parser.add_argument( 36 | "-y", "--yes", action="store_true", help="automatically answer yes to all questions" 37 | ) 38 | parser.add_argument( 39 | "-f", "--format", choices=["yaml", "toml", "json"], default="yaml", 40 | help="Front matter format (default: yaml)" 41 | ) 42 | parser.add_argument( 43 | "--skip-frontmatter", action="store_true", 44 | help="Skip front matter processing" 45 | ) 46 | parser.add_argument( 47 | "--skip-rename-notes", action="store_true", 48 | help="Skip note renaming and link updating" 49 | ) 50 | parser.add_argument( 51 | "--skip-rename-images", action="store_true", 52 | help="Skip image renaming and link updating" 53 | ) 54 | parser.add_argument( 55 | "--skip-wikilinks", action="store_true", 56 | help="Skip WikiLinks to Markdown links conversion" 57 | ) 58 | return parser.parse_args() 59 | 60 | 61 | def validate_paths(args): 62 | """Validate and set up paths""" 63 | # Validate root path 64 | if not os.path.isdir(args.root): 65 | print("The specified root folder does not exist") 66 | print("Abort the process") 67 | print("You can see how to use it with the -h option") 68 | sys.exit(1) 69 | 70 | root_path = args.root 71 | 72 | # Validate target path 73 | if args.target: 74 | if not os.path.exists(args.target): 75 | print("The specified target folder or file does not seem to exist.") 76 | print("Abort the process") 77 | sys.exit(1) 78 | target_path = args.target 79 | else: 80 | target_path = args.root 81 | 82 | return root_path, target_path 83 | 84 | 85 | def confirm_execution(args, logger): 86 | """Confirm execution with user""" 87 | if args.yes: 88 | logger.info("--yes option has been specified, continue processing automatically") 89 | return True 90 | 91 | logger.info("Can I normalize these notes?") 92 | if not query_yes_no("Can I normalize these notes?"): 93 | logger.info("okay. Abort the process") 94 | return False 95 | 96 | logger.info("okay. Continue processing") 97 | return True 98 | 99 | 100 | def get_execution_functions(args): 101 | """Get execution function settings based on command line arguments and config""" 102 | # Start with default config settings 103 | execution_functions = { 104 | "function_create_yfm": EXECUTION_FUNCTION_LIST["function_create_yfm"], 105 | "function_rename_notes": EXECUTION_FUNCTION_LIST["function_rename_notes"], 106 | "function_rename_images": EXECUTION_FUNCTION_LIST["function_rename_images"], 107 | "function_convert_wikilinks": EXECUTION_FUNCTION_LIST.get("function_convert_wikilinks", True), # Default from config 108 | } 109 | 110 | # Override with command line arguments if specified 111 | if args.skip_frontmatter: 112 | execution_functions["function_create_yfm"] = False 113 | if args.skip_rename_notes: 114 | execution_functions["function_rename_notes"] = False 115 | if args.skip_rename_images: 116 | execution_functions["function_rename_images"] = False 117 | if hasattr(args, 'skip_wikilinks') and args.skip_wikilinks: 118 | execution_functions["function_convert_wikilinks"] = False 119 | 120 | return execution_functions 121 | 122 | 123 | def show_function_status(logger, execution_functions, format_type="yaml"): 124 | """Show which functions are enabled""" 125 | function_desc = { 126 | "function_create_yfm": f"- {format_type.upper()} FrontMatter formatting\t\t\t......\t", 127 | "function_rename_notes": "- Rename the note to UID and update the link\t.......\t", 128 | "function_rename_images": "- Rename the image to UID and update the link\t.......\t", 129 | "function_convert_wikilinks": "- Convert WikiLinks to Markdown links\t\t.......\t", 130 | } 131 | on_off_text = ["ON", "OFF"] 132 | 133 | for key in execution_functions: 134 | if key in function_desc: 135 | status = on_off_text[0] if execution_functions[key] else on_off_text[1] 136 | logger.info(function_desc[key] + status) 137 | 138 | 139 | def confirm_functions(args, logger): 140 | """Confirm function execution with user""" 141 | if args.yes: 142 | logger.info("--yes option has been specified, continue processing automatically") 143 | return True 144 | 145 | if not query_yes_no("\nAre you sure you want to perform the above functions?"): 146 | logger.info("okay. Abort the process") 147 | return False 148 | 149 | logger.info("okay. Continue processing") 150 | return True 151 | 152 | 153 | def execute_normalization(target_path, root_path, logger, execution_functions, format_type="yaml"): 154 | """Execute the normalization process""" 155 | # Execute Front Matter processing 156 | if execution_functions["function_create_yfm"]: 157 | check_and_create_yfm(get_files(target_path, "note"), format_type) 158 | 159 | # Execute WikiLinks conversion 160 | if execution_functions.get("function_convert_wikilinks", False): 161 | convert_wikilinks_to_markdown(get_files(target_path, "note"), root_path) 162 | 163 | # Execute note renaming 164 | if execution_functions["function_rename_notes"]: 165 | rename_notes_with_links(get_files(target_path, "note"), root_path) 166 | 167 | # Execute image renaming 168 | if execution_functions["function_rename_images"]: 169 | rename_images_with_links(get_files(target_path, "image"), root_path) 170 | 171 | 172 | def main(): 173 | """Main execution function""" 174 | # Parse command line arguments 175 | args = parse_arguments() 176 | 177 | # Validate paths 178 | root_path, target_path = validate_paths(args) 179 | 180 | # Setup logger 181 | logger = setup_logger(root_path) 182 | 183 | # Welcome message 184 | logger.info("=================================================") 185 | logger.info("Welcome to Note normalization for Zettelkasten!") 186 | logger.info("=================================================") 187 | 188 | # Log paths 189 | logger.debug("Folder has been specified") 190 | logger.debug("The existence of the folder has been confirmed!") 191 | logger.info("Set the specified folder as the root folder of Zettelkasten and process all files under it") 192 | logger.info("Zettelkasten ROOT PATH is: " + root_path) 193 | logger.info("Normalize TARGET PATH is: " + target_path) 194 | 195 | # Get execution functions based on command line arguments 196 | execution_functions = get_execution_functions(args) 197 | 198 | # Confirm execution 199 | if not confirm_execution(args, logger): 200 | sys.exit(0) 201 | 202 | # Show function status 203 | logger.debug("Checking the process to be executed") 204 | show_function_status(logger, execution_functions, args.format) 205 | 206 | # Confirm functions 207 | if not confirm_functions(args, logger): 208 | sys.exit(0) 209 | 210 | # Execute normalization 211 | execute_normalization(target_path, root_path, logger, execution_functions, args.format) 212 | 213 | # Completion message 214 | logger.info("All processing is complete!") 215 | logger.info("The execution log was saved to a log file. please see /path/to/your/zettelkasten_root_folder/normalization_zettel.log files.") 216 | logger.info("=================================================") 217 | logger.info("Enjoy building your SECOND BRAIN!") 218 | logger.info("=================================================") 219 | 220 | 221 | if __name__ == "__main__": 222 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Note normalization for Zettelkasten 2 | 3 | Note normalization for Zettelkasten. Add Yaml Front Matter, add UIDs and rename files, replace Wikilink with Markdown link, etc. 4 | 5 | Normalizing notes reduces dependence on tools and increases the flexibility and independence of Zettelkasten notes. This is useful for transforming a knowledge notebook into a permanent notebook. 6 | 7 | 1. [Screenshots](#screenshots) 8 | 2. [Features](#features) 9 | 3. [Project Structure](#project-structure) 10 | 4. [Requirements](#requirements) 11 | 5. [Installation](#installation) 12 | 6. [Usage](#usage) 13 | 7. [Note](#note) 14 | 8. [Development](#development) 15 | 9. [Future works (TODO)](#future-works-todo) 16 | 10. [Author](#author) 17 | 11. [Preview images](#preview-images) 18 | 19 | ## Screenshots 20 | 21 | ![Screenshots](img/readme_screenshots.png) 22 | 23 | ## Features 24 | 25 | - Automatically generate Front Matter from the note information and insert it into the header 26 | - Support for multiple front matter formats: **YAML**, **TOML**, and **JSON** 27 | - Move hashtags to Front Matter 28 | - Rename the file to UUID 29 | - Move the Markdown file to the Zettelkasten's root folder 30 | - Replace link (filename and folder) 31 | - Change Wikilinks to Markdown links (with Relative Paths and Extensions) 32 | - Support for Markdown files and images 33 | 34 | ## Project Structure 35 | 36 | The project follows the standard Python `src` layout: 37 | 38 | ``` 39 | . 40 | ├── src/ 41 | │ └── zettelkasten_normalizer/ 42 | │ ├── __init__.py 43 | │ ├── config.py # Configuration settings 44 | │ ├── utils.py # Utility functions 45 | │ ├── file_operations.py # File discovery and validation 46 | │ ├── frontmatter_parser.py # Front matter parsing (YAML/TOML/JSON) 47 | │ ├── yfm_processor.py # Front Matter processing 48 | │ ├── link_processor.py # Link substitution and file renaming 49 | │ └── normalization_zettel.py # Main entry point 50 | ├── tests/ 51 | │ └── test_normalization_zettel.py # Comprehensive test suite 52 | ├── run_normalization.py # Command line entry point 53 | └── setup.py # Package configuration 54 | ``` 55 | 56 | ## Requirements 57 | 58 | - Python 3.9.1 or above 59 | - **Cross-platform support**: Windows, macOS, and Linux 60 | 61 | ## Installation 62 | 63 | Download or clone this repository. 64 | 65 | ```bash 66 | git clone https://github.com/jmatsuzaki/note-normalization-for-zettelkasten.git 67 | cd note-normalization-for-zettelkasten 68 | ``` 69 | 70 | ### Option 1: Direct Usage (Recommended) 71 | 72 | No installation required. Just use the `run_normalization.py` script directly: 73 | 74 | ```bash 75 | python run_normalization.py /path/to/your/zettelkasten_root_folder 76 | ``` 77 | 78 | ### Option 2: Install as Package 79 | 80 | If you want to install it as a package: 81 | 82 | ```bash 83 | pip install -e . 84 | ``` 85 | 86 | After installation, you can use: 87 | 88 | ```bash 89 | zettelkasten-normalizer /path/to/your/zettelkasten_root_folder 90 | ``` 91 | 92 | ## Usage 93 | 94 | ### Basic Usage 95 | 96 | ```bash 97 | python run_normalization.py /path/to/your/zettelkasten_root_folder 98 | ``` 99 | 100 | ### Command Line Options 101 | 102 | - **Positional arguments:** 103 | 104 | - `root`: Zettelkasten's root folder 105 | 106 | - **Optional arguments:** 107 | - `-h, --help`: Show help message and exit 108 | - `-t TARGET, --target TARGET`: Normalization target folder or file 109 | - `-y, --yes`: Automatically answer yes to all questions 110 | - `-f FORMAT, --format FORMAT`: Front matter format (yaml, toml, json). Default: yaml 111 | - `--skip-frontmatter`: Skip front matter processing 112 | - `--skip-rename-notes`: Skip note renaming and link updating 113 | - `--skip-rename-images`: Skip image renaming and link updating 114 | 115 | ### Examples 116 | 117 | ```bash 118 | # Process entire Zettelkasten (default: all functions enabled, YAML format) 119 | python run_normalization.py ~/Documents/MyZettelkasten 120 | 121 | # Process with TOML front matter 122 | python run_normalization.py ~/Documents/MyZettelkasten -f toml 123 | 124 | # Process with JSON front matter 125 | python run_normalization.py ~/Documents/MyZettelkasten -f json 126 | 127 | # Skip front matter processing (only rename files and update links) 128 | python run_normalization.py ~/Documents/MyZettelkasten --skip-frontmatter 129 | 130 | # Only process front matter (skip file renaming) 131 | python run_normalization.py ~/Documents/MyZettelkasten --skip-rename-notes --skip-rename-images 132 | 133 | # Skip image renaming but process notes and front matter 134 | python run_normalization.py ~/Documents/MyZettelkasten --skip-rename-images 135 | 136 | # Process specific file without confirmation prompts 137 | python run_normalization.py ~/Documents/MyZettelkasten -t ~/Documents/MyZettelkasten/new-note.md -y 138 | 139 | # Combine multiple options 140 | python run_normalization.py ~/Documents/MyZettelkasten -f toml --skip-rename-images -y 141 | ``` 142 | 143 | ### Git Hook Integration 144 | 145 | To automatically process changed files, add this to your pre-commit hook (`.git/hooks/pre-commit`): 146 | 147 | ```bash 148 | #!/bin/bash 149 | git diff --cached --name-status | grep -e "^M" -e "^A" | while read a b; do 150 | python /path/to/run_normalization.py /path/to/your/zettelkasten_root_folder -t "$b" -y 151 | git add "$b" 152 | done 153 | ``` 154 | 155 | ### Logging 156 | 157 | The execution log is saved to `normalization_zettel.log` in the current directory. 158 | 159 | ## Note 160 | 161 | This program is mainly designed to fix my Zettelkasten, so if you use it, please test it beforehand to make sure it fits your Zettelkasten well. 162 | 163 | ### Testing Recommendations 164 | 165 | 1. **Test on a Copy First**: Create a copy of your Zettelkasten and test the tool on the copy before running it on your actual data. 166 | 167 | 2. **Check Logs**: Review the execution results in `normalization_zettel.log`. 168 | 169 | 3. **Use Version Control**: It is strongly recommended to use Git or another version control system with your Zettelkasten. This allows you to: 170 | - Revert changes if needed 171 | - Review differences with `git diff` 172 | - Repair specific changes if necessary 173 | 174 | ### Configuration 175 | 176 | You can modify the behavior by editing `src/zettelkasten_normalizer/config.py`: 177 | 178 | - `FRONT_MATTER_FORMAT`: Default front matter format ("yaml", "toml", "json") 179 | - `EXECUTION_FUNCTION_LIST`: Default function execution settings 180 | - `INBOX_DIR`: Folders where files get `draft: true` in front matter 181 | - `EXCLUDE_DIR`: Folders to skip during processing 182 | - `EXCLUDE_FILE`: Files to skip during processing 183 | - `NOTE_EXT`: Supported note file extensions 184 | - `IMG_EXT`: Supported image extensions 185 | 186 | ### Function Control Priority 187 | 188 | The tool uses the following priority order for function control: 189 | 190 | 1. **Default**: Configuration in `config.py` (`EXECUTION_FUNCTION_LIST`) 191 | 2. **Override**: Command line arguments (`--skip-*` options) 192 | 193 | **Example scenarios:** 194 | 195 | ```python 196 | # In config.py 197 | EXECUTION_FUNCTION_LIST = { 198 | "function_create_yfm": True, # Enabled by default 199 | "function_rename_notes": True, # Enabled by default 200 | "function_rename_images": True, # Enabled by default 201 | } 202 | ``` 203 | 204 | ```bash 205 | # No command line options → Uses config settings 206 | # Result: frontmatter=OFF, rename_notes=ON, rename_images=ON 207 | python run_normalization.py ~/zettelkasten 208 | 209 | # With --skip-rename-notes → Command line overrides config 210 | # Result: frontmatter=OFF (config), rename_notes=OFF (command), rename_images=ON (config) 211 | python run_normalization.py ~/zettelkasten --skip-rename-notes 212 | ``` 213 | 214 | ### Front Matter Formats 215 | 216 | The tool supports three front matter formats: 217 | 218 | **YAML (default)** 219 | 220 | ```yaml 221 | --- 222 | title: Note Title 223 | tags: [tag1, tag2] 224 | draft: false 225 | --- 226 | ``` 227 | 228 | **TOML** 229 | 230 | ```toml 231 | +++ 232 | title = "Note Title" 233 | tags = ["tag1", "tag2"] 234 | draft = false 235 | +++ 236 | ``` 237 | 238 | **JSON** 239 | 240 | ```json 241 | { 242 | "title": "Note Title", 243 | "tags": ["tag1", "tag2"], 244 | "draft": false 245 | } 246 | ``` 247 | 248 | ### Cross-Platform Compatibility 249 | 250 | The tool handles cross-platform compatibility automatically: 251 | 252 | - **Line Endings**: Automatically normalizes different line ending styles (CRLF, LF, CR) 253 | - **Path Separators**: Uses appropriate path separators for each platform 254 | - **Character Encoding**: Supports UTF-8 encoding with fallback handling 255 | - **File Operations**: Cross-platform file reading and writing with proper encoding 256 | 257 | ## Development 258 | 259 | ### Running Tests 260 | 261 | ```bash 262 | python -m pytest tests/ 263 | # or 264 | python tests/test_normalization_zettel.py 265 | ``` 266 | 267 | ### Development Installation 268 | 269 | ```bash 270 | pip install -e ".[dev]" 271 | ``` 272 | 273 | This installs development dependencies including pytest, black, flake8, and mypy. 274 | 275 | ## Future works (TODO) 276 | 277 | - Performance optimizations for large repositories 278 | 279 | ## Author 280 | 281 | - [jMatsuzaki](https://jmatsuzaki.com/) 282 | - [jMatsuzaki Inc.](https://jmatsuzaki.com/company) 283 | - [@jmatsuzaki](https://twitter.com/jmatsuzaki) 284 | 285 | ## Preview images 286 | 287 | Preview images were taken using: 288 | 289 | - [iTerm2](https://iterm2.com/) terminal emulator on macOS 290 | - [onedark.vim](https://github.com/joshdick/onedark.vim) on [Neovim](https://github.com/neovim/neovim) 291 | -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/link_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Link processing functions for Zettelkasten note normalization. 3 | """ 4 | 5 | import re 6 | import os 7 | import shutil 8 | import logging 9 | from .utils import get_file_name, read_file_cross_platform, write_file_cross_platform 10 | from .file_operations import get_files, check_note_has_uid, get_new_filepath_with_uid 11 | from .frontmatter_parser import FrontMatterParser 12 | 13 | # Get logger 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def substitute_wikilinks_to_markdown_links(old_file_path, new_file_path, root_path): 18 | """substitute wikilinks to markdown links""" 19 | # build file info 20 | old_file_names = get_file_name(old_file_path) 21 | new_file_link = get_file_name(new_file_path)[0] 22 | logger.debug("substitute Wikilinks...") 23 | update_link_files = get_files(root_path, "note") 24 | check_substitute_flg = False # Whether it has been replaced or not 25 | # check all notes links 26 | logger.debug("checking " + str(len(update_link_files)) + " files...") 27 | substitute_file_cnt = 0 # For counting the number of replaced files 28 | substitute_line_cnt = 0 29 | 30 | for update_link_file in update_link_files: 31 | substitute_flg = False # For counting the number of replaced files 32 | 33 | # Use cross-platform file reading 34 | content = read_file_cross_platform(update_link_file) 35 | lines = content.split('\n') 36 | 37 | for i, line in enumerate(lines): 38 | # Replace the target Wikilinks if any 39 | match = re.search( 40 | "\[\[(" 41 | + re.escape(old_file_names[1]) 42 | + "(" 43 | + re.escape(old_file_names[2]) 44 | + ")?" 45 | + "(\s\|\s(.+))?)?\]\]", 46 | line, 47 | ) 48 | if match: 49 | logger.debug("Wikilink match: " + update_link_file) 50 | logger.debug("substitute: " + match.group(0)) 51 | if not check_substitute_flg: 52 | check_substitute_flg = True 53 | if not substitute_flg: 54 | substitute_flg = True 55 | substitute_line_cnt += 1 56 | # If Alias is set in the Link, use Alias as the Link Text 57 | if match.group(4): 58 | lines[i] = line.replace( 59 | match.group(0), 60 | "[" + match.group(4) + "](" + new_file_link + ")", 61 | ) 62 | else: 63 | lines[i] = line.replace( 64 | match.group(0), 65 | "[" + match.group(1) + "](" + new_file_link + ")", 66 | ) 67 | logger.debug(lines[i]) 68 | 69 | # Replace the target Markdownlinks if any 70 | match = re.search( 71 | "\[.+\]\(((?!http.*).*" + re.escape(old_file_names[0]) + ")\)", line 72 | ) 73 | if match: 74 | logger.debug("Markdown link match: " + update_link_file) 75 | logger.debug("substitute: " + match.group(0)) 76 | if not check_substitute_flg: 77 | check_substitute_flg = True 78 | if not substitute_flg: 79 | substitute_flg = True 80 | substitute_line_cnt += 1 81 | lines[i] = line.replace(match.group(1), new_file_link) 82 | logger.debug(lines[i]) 83 | 84 | # Write back the modified content using cross-platform function 85 | if substitute_flg: 86 | modified_content = '\n'.join(lines) 87 | write_file_cross_platform(update_link_file, modified_content) 88 | substitute_file_cnt += 1 89 | 90 | logger.debug(str(substitute_line_cnt) + " lines replaced!") 91 | logger.debug( 92 | "The link that existed in file " 93 | + str(substitute_file_cnt) 94 | + " has been updated!" 95 | ) 96 | logger.debug("done!") 97 | return check_substitute_flg 98 | 99 | 100 | def rename_notes_with_links(files, root_path): 101 | """Rename the all file names to UID and update wikilinks to Markdownlinks""" 102 | logger.info("====== Start Rename Notes And Substitute Wikilinks ======") 103 | logger.info("the target is: " + str(len(files)) + " files") 104 | rename_file_cnt = 0 # Counting the number of files processed 105 | substitute_file_cnt = 0 # Number of files with links 106 | 107 | for i, file in enumerate(files): 108 | logger.debug("target: " + file) 109 | if check_note_has_uid(file): 110 | logger.debug("It seems that this file already has a UID") 111 | continue 112 | else: 113 | new_file_path = get_new_filepath_with_uid(file, root_path) 114 | uid = get_file_name(new_file_path)[1] 115 | logger.debug("uid: " + uid) 116 | logger.debug("rename: " + new_file_path) 117 | # rename and move ROOT PATH 118 | new_file_path_result = shutil.move(file, new_file_path) 119 | logger.info("rename done: " + new_file_path_result) 120 | rename_file_cnt += 1 121 | # add or update UID in front matter 122 | logger.debug("Insert or update UID in Front Matter") 123 | content = read_file_cross_platform(new_file_path_result) 124 | 125 | # Detect front matter format and parse it 126 | parser = FrontMatterParser() 127 | detected_format = parser.detect_format(content) 128 | 129 | if detected_format: 130 | # Parse existing front matter 131 | metadata, body_content = parser.parse_frontmatter(content) 132 | if metadata is not None: 133 | # Update or add the uid property 134 | metadata['uid'] = uid 135 | 136 | # Use the detected format to serialize back 137 | parser_with_format = FrontMatterParser(detected_format) 138 | modified_content = parser_with_format.serialize_frontmatter(metadata, body_content) 139 | write_file_cross_platform(new_file_path_result, modified_content) 140 | else: 141 | # Failed to parse, fallback to simple insertion 142 | logger.warning(f"Failed to parse frontmatter for {new_file_path_result}, using fallback method") 143 | lines = content.split('\n') 144 | lines.insert(1, "uid: " + uid) 145 | modified_content = '\n'.join(lines) 146 | write_file_cross_platform(new_file_path_result, modified_content) 147 | else: 148 | # No front matter detected, use original logic 149 | lines = content.split('\n') 150 | lines.insert(1, "uid: " + uid) 151 | modified_content = '\n'.join(lines) 152 | write_file_cross_platform(new_file_path_result, modified_content) 153 | # Replace backlinks 154 | if substitute_wikilinks_to_markdown_links(file, new_file_path_result, root_path): 155 | substitute_file_cnt += 1 156 | logger.debug("processing done! [" + str(i + 1) + "/" + str(len(files)) + "]") 157 | 158 | logger.info(str(rename_file_cnt) + " files have been renamed!") 159 | logger.info(str(substitute_file_cnt) + " linked files have been updated!") 160 | 161 | 162 | def convert_wikilinks_to_markdown(files, root_path): 163 | """Convert all WikiLinks to Markdown links in the given files""" 164 | logger.info("====== Start Converting WikiLinks to Markdown Links ======") 165 | logger.info("the target is: " + str(len(files)) + " files") 166 | total_files_modified = 0 167 | total_links_converted = 0 168 | 169 | for file in files: 170 | logger.debug("Processing: " + file) 171 | content = read_file_cross_platform(file) 172 | lines = content.split('\n') 173 | file_modified = False 174 | links_in_file = 0 175 | 176 | for i, line in enumerate(lines): 177 | # Convert WikiLinks [[target]] or [[target|alias]] to Markdown links 178 | # Pattern matches [[filename]] or [[filename|alias text]] 179 | pattern = r'\[\[([^\]\|]+)(\s*\|\s*([^\]]+))?\]\]' 180 | 181 | def replace_wikilink(match): 182 | nonlocal file_modified, links_in_file 183 | file_modified = True 184 | links_in_file += 1 185 | 186 | target = match.group(1).strip() 187 | alias = match.group(3).strip() if match.group(3) else None 188 | 189 | # Remove .md extension if present in the target 190 | if target.endswith('.md'): 191 | target_without_ext = target[:-3] 192 | else: 193 | target_without_ext = target 194 | 195 | # Create the markdown link 196 | link_text = alias if alias else target_without_ext 197 | link_target = target_without_ext + '.md' 198 | 199 | return f'[{link_text}]({link_target})' 200 | 201 | # Replace all WikiLinks in the line 202 | new_line = re.sub(pattern, replace_wikilink, line) 203 | if new_line != line: 204 | lines[i] = new_line 205 | logger.debug(f"Converted in {file}: {line.strip()} -> {new_line.strip()}") 206 | 207 | # Write back the modified content 208 | if file_modified: 209 | modified_content = '\n'.join(lines) 210 | write_file_cross_platform(file, modified_content) 211 | total_files_modified += 1 212 | total_links_converted += links_in_file 213 | logger.info(f"Modified {file}: converted {links_in_file} WikiLinks") 214 | 215 | logger.info(f"Converted {total_links_converted} WikiLinks in {total_files_modified} files") 216 | logger.info("====== WikiLinks Conversion Complete ======") 217 | 218 | 219 | def rename_images_with_links(files, root_path): 220 | """Rename image files to UID and update links""" 221 | logger.info("====== Start Rename Images And Substitute Wikilinks ======") 222 | logger.info("the target is: " + str(len(files)) + " files") 223 | rename_file_cnt = 0 # Counting the number of files processed 224 | substitute_file_cnt = 0 # Number of files with links 225 | 226 | for i, file in enumerate(files): 227 | logger.debug("target: " + file) 228 | if check_note_has_uid(file): 229 | logger.debug("It seems that this file already has a UID") 230 | continue 231 | else: 232 | # rename image 233 | new_file_path = get_new_filepath_with_uid(file, root_path) 234 | uid = get_file_name(new_file_path)[1] 235 | logger.debug("uid: " + uid) 236 | os.rename(file, new_file_path) 237 | rename_file_cnt += 1 238 | logger.info("rename done: " + new_file_path) 239 | # Replace backlinks 240 | if substitute_wikilinks_to_markdown_links(file, new_file_path, root_path): 241 | substitute_file_cnt += 1 242 | logger.debug("processing done! [" + str(i + 1) + "/" + str(len(files)) + "]") 243 | 244 | logger.info(str(rename_file_cnt) + " files have been renamed!") 245 | logger.info(str(substitute_file_cnt) + " linked files have been updated!") -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/frontmatter_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Front matter parsing utilities for YAML, TOML, and JSON formats. 3 | """ 4 | 5 | import json 6 | import re 7 | import logging 8 | from typing import Dict, Tuple, Optional, List 9 | 10 | # Get logger 11 | logger = logging.getLogger(__name__) 12 | 13 | # Try to import TOML parser 14 | try: 15 | import tomllib # Python 3.11+ 16 | except ImportError: 17 | try: 18 | import tomli as tomllib # Fallback for older Python versions 19 | except ImportError: 20 | tomllib = None 21 | 22 | 23 | class FrontMatterParser: 24 | """Parser for different front matter formats.""" 25 | 26 | def __init__(self, format_type: str = "yaml"): 27 | """Initialize parser with specified format.""" 28 | self.format_type = format_type.lower() 29 | if self.format_type not in ["yaml", "toml", "json"]: 30 | raise ValueError(f"Unsupported format: {format_type}") 31 | 32 | if self.format_type == "toml" and tomllib is None: 33 | logger.warning("TOML parser not available. Install 'tomli' package for Python < 3.11") 34 | raise ImportError("TOML parser not available") 35 | 36 | def detect_format(self, content: str) -> Optional[str]: 37 | """Detect front matter format from content.""" 38 | lines = content.split('\n') 39 | if not lines: 40 | return None 41 | 42 | first_line = lines[0].strip() 43 | 44 | # YAML format 45 | if first_line == "---": 46 | return "yaml" 47 | 48 | # TOML format 49 | if first_line == "+++": 50 | return "toml" 51 | 52 | # JSON format 53 | if first_line == "{": 54 | return "json" 55 | 56 | return None 57 | 58 | def parse_frontmatter(self, content: str) -> Tuple[Optional[Dict], str]: 59 | """Parse front matter from content and return metadata and remaining content.""" 60 | detected_format = self.detect_format(content) 61 | if not detected_format: 62 | return None, content 63 | 64 | if detected_format == "yaml": 65 | return self._parse_yaml(content) 66 | elif detected_format == "toml": 67 | return self._parse_toml(content) 68 | elif detected_format == "json": 69 | return self._parse_json(content) 70 | 71 | return None, content 72 | 73 | def _parse_yaml(self, content: str) -> Tuple[Optional[Dict], str]: 74 | """Parse YAML front matter.""" 75 | lines = content.split('\n') 76 | if not lines or lines[0].strip() != "---": 77 | return None, content 78 | 79 | # Find the closing --- 80 | end_index = -1 81 | for i, line in enumerate(lines[1:], 1): 82 | if line.strip() == "---": 83 | end_index = i 84 | break 85 | 86 | if end_index == -1: 87 | return None, content 88 | 89 | # Extract YAML content 90 | yaml_lines = lines[1:end_index] 91 | yaml_content = '\n'.join(yaml_lines) 92 | 93 | # Simple YAML parser (basic key: value pairs) 94 | metadata = {} 95 | for line in yaml_lines: 96 | line = line.strip() 97 | if ':' in line and not line.startswith('#'): 98 | key, value = line.split(':', 1) 99 | key = key.strip() 100 | value = value.strip() 101 | # Remove quotes if present 102 | if value.startswith('"') and value.endswith('"'): 103 | value = value[1:-1] 104 | elif value.startswith("'") and value.endswith("'"): 105 | value = value[1:-1] 106 | metadata[key] = value 107 | 108 | # Return remaining content 109 | remaining_content = '\n'.join(lines[end_index + 1:]) 110 | return metadata, remaining_content 111 | 112 | def _parse_toml(self, content: str) -> Tuple[Optional[Dict], str]: 113 | """Parse TOML front matter.""" 114 | if tomllib is None: 115 | logger.error("TOML parser not available") 116 | return None, content 117 | 118 | lines = content.split('\n') 119 | if not lines or lines[0].strip() != "+++": 120 | return None, content 121 | 122 | # Find the closing +++ 123 | end_index = -1 124 | for i, line in enumerate(lines[1:], 1): 125 | if line.strip() == "+++": 126 | end_index = i 127 | break 128 | 129 | if end_index == -1: 130 | return None, content 131 | 132 | # Extract TOML content 133 | toml_lines = lines[1:end_index] 134 | toml_content = '\n'.join(toml_lines) 135 | 136 | try: 137 | metadata = tomllib.loads(toml_content) 138 | except Exception as e: 139 | logger.error(f"Failed to parse TOML: {e}") 140 | return None, content 141 | 142 | # Return remaining content 143 | remaining_content = '\n'.join(lines[end_index + 1:]) 144 | return metadata, remaining_content 145 | 146 | def _parse_json(self, content: str) -> Tuple[Optional[Dict], str]: 147 | """Parse JSON front matter.""" 148 | lines = content.split('\n') 149 | if not lines or not lines[0].strip().startswith('{'): 150 | return None, content 151 | 152 | # Find the closing } 153 | brace_count = 0 154 | end_index = -1 155 | json_lines = [] 156 | 157 | for i, line in enumerate(lines): 158 | json_lines.append(line) 159 | for char in line: 160 | if char == '{': 161 | brace_count += 1 162 | elif char == '}': 163 | brace_count -= 1 164 | if brace_count == 0: 165 | end_index = i 166 | break 167 | if end_index != -1: 168 | break 169 | 170 | if end_index == -1: 171 | return None, content 172 | 173 | # Extract JSON content 174 | json_content = '\n'.join(json_lines) 175 | 176 | try: 177 | metadata = json.loads(json_content) 178 | except Exception as e: 179 | logger.error(f"Failed to parse JSON: {e}") 180 | return None, content 181 | 182 | # Return remaining content 183 | remaining_content = '\n'.join(lines[end_index + 1:]) 184 | return metadata, remaining_content 185 | 186 | def serialize_frontmatter(self, metadata: Dict, content: str) -> str: 187 | """Serialize metadata and content into the specified format.""" 188 | if self.format_type == "yaml": 189 | return self._serialize_yaml(metadata, content) 190 | elif self.format_type == "toml": 191 | return self._serialize_toml(metadata, content) 192 | elif self.format_type == "json": 193 | return self._serialize_json(metadata, content) 194 | 195 | return content 196 | 197 | def _serialize_yaml(self, metadata: Dict, content: str) -> str: 198 | """Serialize to YAML format.""" 199 | yaml_lines = ["---"] 200 | 201 | # Define the order of fields 202 | field_order = ["uid", "title", "aliases", "date", "update", "tags", "draft"] 203 | 204 | # Add fields in the specified order 205 | for key in field_order: 206 | if key in metadata: 207 | value = metadata[key] 208 | if isinstance(value, str) and (value.startswith('[') or ' ' in value): 209 | yaml_lines.append(f"{key}: {value}") 210 | else: 211 | yaml_lines.append(f"{key}: {value}") 212 | 213 | # Add any remaining fields not in the order list 214 | for key, value in metadata.items(): 215 | if key not in field_order: 216 | if isinstance(value, str) and (value.startswith('[') or ' ' in value): 217 | yaml_lines.append(f"{key}: {value}") 218 | else: 219 | yaml_lines.append(f"{key}: {value}") 220 | 221 | yaml_lines.append("---") 222 | yaml_lines.append("") # Empty line after front matter 223 | 224 | # Join frontmatter lines and ensure blank line before content 225 | frontmatter = '\n'.join(yaml_lines) 226 | 227 | # Ensure content doesn't start with extra newlines 228 | if content: 229 | content = content.lstrip('\n') 230 | return frontmatter + '\n' + content 231 | else: 232 | return frontmatter 233 | 234 | def _serialize_toml(self, metadata: Dict, content: str) -> str: 235 | """Serialize to TOML format.""" 236 | toml_lines = ["+++"] 237 | 238 | # Define the order of fields 239 | field_order = ["uid", "title", "aliases", "date", "update", "tags", "draft"] 240 | 241 | # Add fields in the specified order 242 | for key in field_order: 243 | if key in metadata: 244 | value = metadata[key] 245 | if isinstance(value, str): 246 | # Handle arrays and strings 247 | if value.startswith('[') and value.endswith(']'): 248 | toml_lines.append(f"{key} = {value}") 249 | elif value in ['true', 'false']: 250 | toml_lines.append(f"{key} = {value}") 251 | else: 252 | toml_lines.append(f'{key} = "{value}"') 253 | else: 254 | toml_lines.append(f"{key} = {json.dumps(value)}") 255 | 256 | # Add any remaining fields not in the order list 257 | for key, value in metadata.items(): 258 | if key not in field_order: 259 | if isinstance(value, str): 260 | # Handle arrays and strings 261 | if value.startswith('[') and value.endswith(']'): 262 | toml_lines.append(f"{key} = {value}") 263 | elif value in ['true', 'false']: 264 | toml_lines.append(f"{key} = {value}") 265 | else: 266 | toml_lines.append(f'{key} = "{value}"') 267 | else: 268 | toml_lines.append(f"{key} = {json.dumps(value)}") 269 | 270 | toml_lines.append("+++") 271 | toml_lines.append("") # Empty line after front matter 272 | 273 | # Join frontmatter lines and ensure blank line before content 274 | frontmatter = '\n'.join(toml_lines) 275 | 276 | # Ensure content doesn't start with extra newlines 277 | if content: 278 | content = content.lstrip('\n') 279 | return frontmatter + '\n' + content 280 | else: 281 | return frontmatter 282 | 283 | def _serialize_json(self, metadata: Dict, content: str) -> str: 284 | """Serialize to JSON format.""" 285 | # Define the order of fields 286 | field_order = ["uid", "title", "aliases", "date", "update", "tags", "draft"] 287 | 288 | # Create ordered metadata 289 | ordered_metadata = {} 290 | 291 | # Add fields in the specified order 292 | for key in field_order: 293 | if key in metadata: 294 | ordered_metadata[key] = metadata[key] 295 | 296 | # Add any remaining fields 297 | for key, value in metadata.items(): 298 | if key not in field_order: 299 | ordered_metadata[key] = value 300 | 301 | # Convert string representations of arrays/booleans to proper types 302 | converted_metadata = {} 303 | for key, value in ordered_metadata.items(): 304 | if isinstance(value, str): 305 | if value.startswith('[') and value.endswith(']'): 306 | try: 307 | converted_metadata[key] = json.loads(value) 308 | except: 309 | converted_metadata[key] = value 310 | elif value == 'true': 311 | converted_metadata[key] = True 312 | elif value == 'false': 313 | converted_metadata[key] = False 314 | else: 315 | converted_metadata[key] = value 316 | else: 317 | converted_metadata[key] = value 318 | 319 | json_str = json.dumps(converted_metadata, indent=2, ensure_ascii=False) 320 | # Ensure content doesn't start with extra newlines 321 | if content and not content.startswith('\n'): 322 | return json_str + "\n\n" + content 323 | else: 324 | return json_str + "\n\n" + content.lstrip('\n') 325 | 326 | 327 | def get_frontmatter_delimiters(format_type: str) -> Tuple[str, str]: 328 | """Get front matter delimiters for specified format.""" 329 | if format_type == "yaml": 330 | return "---", "---" 331 | elif format_type == "toml": 332 | return "+++", "+++" 333 | elif format_type == "json": 334 | return "{", "}" 335 | else: 336 | raise ValueError(f"Unsupported format: {format_type}") -------------------------------------------------------------------------------- /src/zettelkasten_normalizer/yfm_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Front Matter processing functions for Zettelkasten note normalization. 3 | Supports YAML, TOML, and JSON formats. 4 | """ 5 | 6 | import re 7 | import logging 8 | from .config import YFM, INBOX_DIR, FRONT_MATTER_FORMAT 9 | from .utils import get_file_name, get_dir_name, format_date, get_creation_date, get_modification_date, read_file_cross_platform, write_file_cross_platform 10 | from .frontmatter_parser import FrontMatterParser, get_frontmatter_delimiters 11 | 12 | # Get logger 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def create_tag_line_from_lines(lines): 17 | """create tag line for YFM from hashtags""" 18 | logger.debug("checking tags...") 19 | tag_line = "" 20 | for line in lines: 21 | for tag in re.findall("(\s|^)\#([^\s|^\#]+)", line): 22 | if tag_line == "": 23 | tag_line += str(tag[1]) 24 | else: 25 | tag_line += ", " + str(tag[1]) 26 | tag_line = "[" + tag_line + "]" 27 | return tag_line 28 | 29 | 30 | def writing_lines_without_hashtags(target, lines): 31 | """writing lines without hashtags""" 32 | # Convert string to lines if necessary 33 | if isinstance(lines, str): 34 | lines = lines.split('\n') 35 | 36 | logger.debug("writing file...") 37 | content_lines = [] 38 | frontmatter_end_found = False 39 | 40 | for i, line in enumerate(lines): 41 | # Check if this is the closing frontmatter delimiter 42 | if not frontmatter_end_found and line.strip() in ['---', '+++', '}']: 43 | # Check if the next line would be the empty line after frontmatter 44 | if i + 1 < len(lines) and i > 0: 45 | frontmatter_end_found = True 46 | 47 | # Delete the hashtag line (but preserve the line after frontmatter) 48 | if not re.match("^\#[^\#|^\s].+", line): 49 | content_lines.append(line) 50 | 51 | # Join lines and ensure proper line endings 52 | content = '\n'.join(content_lines) 53 | 54 | # Remove excessive trailing newlines but keep at least one 55 | content = content.rstrip('\n') + '\n' 56 | 57 | # Use cross-platform write function 58 | write_file_cross_platform(target, content) 59 | logger.debug("done!") 60 | 61 | 62 | def check_and_create_yfm(files, format_type=None): 63 | """If there is no Front Matter, create one.""" 64 | if format_type is None: 65 | format_type = FRONT_MATTER_FORMAT 66 | 67 | logger.info("====== Start Check Front Matter ======") 68 | logger.info(f"Format: {format_type}") 69 | logger.info("the target is: " + str(len(files)) + " files") 70 | 71 | # Initialize parser 72 | try: 73 | parser = FrontMatterParser(format_type) 74 | except (ValueError, ImportError) as e: 75 | logger.error(f"Failed to initialize parser: {e}") 76 | return 77 | 78 | update_yfm_files = [] # if note have front matter 79 | create_yfm_files = [] # if note doesn't have front matter 80 | 81 | # check and classify files by exists front matter 82 | for i, file in enumerate(files): 83 | logger.debug("Checking Front Matter...") 84 | logger.debug("target: " + file) 85 | 86 | try: 87 | # Use cross-platform file reading 88 | content = read_file_cross_platform(file) 89 | 90 | # Detect front matter format 91 | detected_format = parser.detect_format(content) 92 | if detected_format: 93 | update_yfm_files.append(file) 94 | logger.debug(f"Have already Front Matter ({detected_format})") 95 | else: 96 | create_yfm_files.append(file) 97 | logger.debug("No Front Matter yet") 98 | 99 | except Exception as e: 100 | logger.error(f"Error reading file {file}: {e}") 101 | continue 102 | 103 | logger.info("check done! [" + str(i + 1) + "/" + str(len(files)) + "]") 104 | 105 | # Update existing front matter files 106 | _update_existing_yfm(update_yfm_files, parser) 107 | 108 | # Create new front matter for files without it 109 | _create_new_yfm(create_yfm_files, parser) 110 | 111 | 112 | def _update_existing_yfm(update_yfm_files, parser): 113 | """Update existing Front Matter files""" 114 | logger.info("====== Start Update Front Matter ======") 115 | logger.info("the target is: " + str(len(update_yfm_files)) + " files") 116 | processing_file_cnt = 0 # Counting the number of files processed 117 | 118 | for j, update_yfm_file in enumerate(update_yfm_files): 119 | logger.debug("Updating Front Matter...") 120 | logger.info("target: " + update_yfm_file) 121 | 122 | try: 123 | # Use cross-platform file reading 124 | content = read_file_cross_platform(update_yfm_file) 125 | 126 | # Parse existing front matter 127 | metadata, body_content = parser.parse_frontmatter(content) 128 | if metadata is None: 129 | logger.debug("Failed to parse front matter, skipping") 130 | continue 131 | 132 | # Check for missing fields and update 133 | update_flg = False 134 | 135 | # Generate uid if not present 136 | import hashlib 137 | if "uid" not in metadata: 138 | file_hash = hashlib.md5(update_yfm_file.encode()).hexdigest() 139 | metadata["uid"] = file_hash 140 | update_flg = True 141 | 142 | required_fields = { 143 | "title": get_file_name(update_yfm_file)[1], 144 | "aliases": "[]", 145 | "date": format_date(get_creation_date(update_yfm_file)), 146 | "update": format_date(get_modification_date(update_yfm_file)), 147 | "tags": create_tag_line_from_lines(content.split('\n')), 148 | "draft": "true" if get_dir_name(update_yfm_file)[1] in INBOX_DIR else "false" 149 | } 150 | 151 | # Add missing fields 152 | for key, default_value in required_fields.items(): 153 | if key not in metadata: 154 | metadata[key] = default_value 155 | update_flg = True 156 | logger.debug(f"Added missing field: {key}") 157 | 158 | # Always update the 'update' field 159 | if "update" in metadata: 160 | old_update = metadata["update"] 161 | new_update = format_date(get_modification_date(update_yfm_file)) 162 | if old_update != new_update: 163 | metadata["update"] = new_update 164 | update_flg = True 165 | logger.debug(f"Updated 'update' field: {old_update} -> {new_update}") 166 | 167 | if update_flg: 168 | # Regenerate content with updated metadata 169 | updated_content = parser.serialize_frontmatter(metadata, body_content) 170 | 171 | # Remove hashtag lines from body while preserving frontmatter format 172 | lines = updated_content.split('\n') 173 | 174 | # Find where frontmatter ends 175 | frontmatter_end_idx = -1 176 | for i, line in enumerate(lines): 177 | if i > 0 and line.strip() == '---': # Found closing delimiter 178 | frontmatter_end_idx = i 179 | break 180 | 181 | # Process only the content after frontmatter for hashtag removal 182 | if frontmatter_end_idx > 0: 183 | frontmatter_lines = lines[:frontmatter_end_idx + 2] # Include the blank line 184 | content_lines = lines[frontmatter_end_idx + 2:] 185 | 186 | # Remove hashtag lines from content only 187 | filtered_content = [] 188 | for line in content_lines: 189 | if not re.match("^\#[^\#|^\s].+", line): 190 | filtered_content.append(line) 191 | 192 | # Combine frontmatter with filtered content 193 | final_lines = frontmatter_lines + filtered_content 194 | final_content = '\n'.join(final_lines) 195 | 196 | # Write the final content 197 | final_content = final_content.rstrip('\n') + '\n' 198 | write_file_cross_platform(update_yfm_file, final_content) 199 | else: 200 | # Fallback to original behavior if no frontmatter found 201 | writing_lines_without_hashtags(update_yfm_file, lines) 202 | 203 | processing_file_cnt += 1 204 | logger.debug("Updated Front Matter!") 205 | else: 206 | logger.debug("There is no Front Matter to update") 207 | 208 | except Exception as e: 209 | logger.error(f"Error updating front matter for {update_yfm_file}: {e}") 210 | continue 211 | 212 | logger.debug( 213 | "processing done! [" + str(j + 1) + "/" + str(len(update_yfm_files)) + "]" 214 | ) 215 | 216 | logger.info(str(processing_file_cnt) + " files have been updated!") 217 | 218 | 219 | def _create_new_yfm(create_yfm_files, parser): 220 | """Create new Front Matter for files without it""" 221 | logger.info("====== Start Add New Front Matter ======") 222 | logger.info("the target is: " + str(len(create_yfm_files)) + " files") 223 | processing_file_cnt = 0 # Counting the number of files processed 224 | 225 | for i, create_yfm_file in enumerate(create_yfm_files): 226 | logger.debug("Creating Front Matter...") 227 | logger.info("target: " + create_yfm_file) 228 | 229 | try: 230 | # Use cross-platform file reading 231 | content = read_file_cross_platform(create_yfm_file) 232 | 233 | lines = content.split('\n') 234 | tag_line = create_tag_line_from_lines(lines) 235 | 236 | logger.debug("insert Front Matter...") 237 | 238 | # Create metadata dictionary with uid first 239 | # Generate a uid from the filename (will be updated if file is renamed) 240 | from .utils import format_uid_from_date 241 | from .file_operations import get_file_name 242 | import hashlib 243 | 244 | # Generate a unique uid based on file path 245 | file_hash = hashlib.md5(create_yfm_file.encode()).hexdigest() 246 | 247 | metadata = { 248 | "uid": file_hash, 249 | "title": get_file_name(create_yfm_file)[1], 250 | "aliases": "[]", 251 | "date": format_date(get_creation_date(create_yfm_file)), 252 | "update": format_date(get_modification_date(create_yfm_file)), 253 | "tags": tag_line, 254 | "draft": "true" if get_dir_name(create_yfm_file)[1] in INBOX_DIR else "false" 255 | } 256 | 257 | # Serialize front matter with content 258 | updated_content = parser.serialize_frontmatter(metadata, content) 259 | 260 | # Remove hashtag lines from body while preserving frontmatter format 261 | lines = updated_content.split('\n') 262 | 263 | # Find where frontmatter ends 264 | frontmatter_end_idx = -1 265 | for i, line in enumerate(lines): 266 | if i > 0 and line.strip() == '---': # Found closing delimiter 267 | frontmatter_end_idx = i 268 | break 269 | 270 | # Process only the content after frontmatter for hashtag removal 271 | if frontmatter_end_idx > 0: 272 | frontmatter_lines = lines[:frontmatter_end_idx + 2] # Include the blank line 273 | content_lines = lines[frontmatter_end_idx + 2:] 274 | 275 | # Remove hashtag lines from content only 276 | filtered_content = [] 277 | for line in content_lines: 278 | if not re.match("^\#[^\#|^\s].+", line): 279 | filtered_content.append(line) 280 | 281 | # Combine frontmatter with filtered content 282 | final_lines = frontmatter_lines + filtered_content 283 | final_content = '\n'.join(final_lines) 284 | else: 285 | # Fallback to original behavior if no frontmatter found 286 | writing_lines_without_hashtags(create_yfm_file, lines) 287 | continue 288 | 289 | # Write the final content 290 | final_content = final_content.rstrip('\n') + '\n' 291 | write_file_cross_platform(create_yfm_file, final_content) 292 | 293 | processing_file_cnt += 1 # Counting the number of files processed 294 | logger.debug(f"Created {parser.format_type.upper()} Front Matter") 295 | 296 | except Exception as e: 297 | logger.error(f"Error creating front matter for {create_yfm_file}: {e}") 298 | continue 299 | 300 | logger.debug( 301 | "processing done! [" + str(i + 1) + "/" + str(len(create_yfm_files)) + "]" 302 | ) 303 | 304 | logger.info(str(processing_file_cnt) + " files have been updated!") -------------------------------------------------------------------------------- /tests/test_normalization_zettel.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tempfile 3 | import shutil 4 | import os 5 | import sys 6 | import datetime 7 | import re 8 | from unittest.mock import patch, MagicMock 9 | from io import StringIO 10 | 11 | # Import the modules to test 12 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) 13 | 14 | from zettelkasten_normalizer import utils, file_operations, yfm_processor, link_processor, config, frontmatter_parser 15 | 16 | 17 | class TestUtilityFunctions(unittest.TestCase): 18 | """ユーティリティ関数のテスト""" 19 | 20 | def test_get_file_name(self): 21 | """ファイル名解析のテスト""" 22 | # 通常のファイル 23 | result = utils.get_file_name("/path/to/test.md") 24 | self.assertEqual(result, ("test.md", "test", ".md")) 25 | 26 | # 拡張子なし 27 | result = utils.get_file_name("/path/to/test") 28 | self.assertEqual(result, ("test", "test", "")) 29 | 30 | # 日本語ファイル名 31 | result = utils.get_file_name("/path/to/テスト.md") 32 | self.assertEqual(result, ("テスト.md", "テスト", ".md")) 33 | 34 | def test_get_dir_name(self): 35 | """ディレクトリ名解析のテスト""" 36 | result = utils.get_dir_name("/path/to/test.md") 37 | self.assertEqual(result, ("/path/to", "to")) 38 | 39 | result = utils.get_dir_name("/home/user/documents/file.txt") 40 | self.assertEqual(result, ("/home/user/documents", "documents")) 41 | 42 | def test_format_date(self): 43 | """日付フォーマットのテスト""" 44 | # 固定のタイムスタンプをテスト 45 | timestamp = 1609459200 # 2021-01-01 00:00:00 UTC 46 | result = utils.format_date(timestamp) 47 | # タイムゾーンによる差異を考慮し、年月日のみチェック 48 | self.assertTrue(result.startswith("2021-01-01") or result.startswith("2020-12-31")) 49 | 50 | def test_format_uid_from_date(self): 51 | """UID形式の日付フォーマットのテスト""" 52 | timestamp = 1609459200 # 2021-01-01 00:00:00 UTC 53 | result = utils.format_uid_from_date(timestamp) 54 | # タイムゾーンによる差異を考慮 55 | self.assertTrue(result.startswith("20210101") or result.startswith("20201231")) 56 | 57 | def test_check_note_type(self): 58 | """ファイルタイプチェックのテスト""" 59 | # ノートファイル 60 | self.assertTrue(file_operations.check_note_type("/path/test.md", "note")) 61 | self.assertTrue(file_operations.check_note_type("/path/test.txt", "note")) 62 | self.assertFalse(file_operations.check_note_type("/path/test.pdf", "note")) 63 | 64 | # 画像ファイル 65 | self.assertTrue(file_operations.check_note_type("/path/test.png", "image")) 66 | self.assertTrue(file_operations.check_note_type("/path/test.jpg", "image")) 67 | self.assertFalse(file_operations.check_note_type("/path/test.md", "image")) 68 | 69 | def test_check_note_has_uid(self): 70 | """UIDチェックのテスト""" 71 | # 有効なUID(32文字の16進数) 72 | self.assertTrue(file_operations.check_note_has_uid("/path/abcdef0123456789abcdef0123456789.md")) 73 | 74 | # 無効なUID 75 | self.assertFalse(file_operations.check_note_has_uid("/path/test.md")) 76 | self.assertFalse(file_operations.check_note_has_uid("/path/short123.md")) 77 | self.assertFalse(file_operations.check_note_has_uid("/path/abcdef0123456789ABCDEF0123456789.md")) # 大文字 78 | 79 | def test_build_filepath_by_uid(self): 80 | """UIDによるファイルパス構築のテスト""" 81 | uid = "abcdef0123456789abcdef0123456789" 82 | path = "/test/path" 83 | 84 | result = file_operations.build_filepath_by_uid(uid, path, ".md") 85 | expected = "/test/path/abcdef0123456789abcdef0123456789.md" 86 | self.assertEqual(result, expected) 87 | 88 | def test_create_tag_line_from_lines(self): 89 | """ハッシュタグからタグライン作成のテスト""" 90 | lines = [ 91 | "This is a test line with #tag1\n", 92 | "Another line with #tag2 and #tag3\n", 93 | "No tags here\n", 94 | "Line with #duplicate and #tag1 again\n" 95 | ] 96 | 97 | # logger をモック 98 | mock_logger = MagicMock() 99 | yfm_processor.logger = mock_logger 100 | 101 | try: 102 | result = yfm_processor.create_tag_line_from_lines(lines) 103 | finally: 104 | if hasattr(yfm_processor, 'logger'): 105 | delattr(yfm_processor, 'logger') 106 | 107 | # タグが正しく抽出されることを確認 108 | self.assertIn("tag1", result) 109 | self.assertIn("tag2", result) 110 | self.assertIn("tag3", result) 111 | self.assertTrue(result.startswith("[") and result.endswith("]")) 112 | 113 | 114 | class TestFileOperations(unittest.TestCase): 115 | """ファイル操作のテスト""" 116 | 117 | def setUp(self): 118 | """テスト用の一時ディレクトリを作成""" 119 | self.test_dir = tempfile.mkdtemp() 120 | self.addCleanup(shutil.rmtree, self.test_dir) 121 | 122 | def test_get_files_single_file(self): 123 | """単一ファイルの取得テスト""" 124 | # テストファイルを作成 125 | test_file = os.path.join(self.test_dir, "test.md") 126 | with open(test_file, 'w') as f: 127 | f.write("test content") 128 | 129 | result = file_operations.get_files(test_file, "note") 130 | self.assertEqual(result, [test_file]) 131 | 132 | def test_get_files_directory(self): 133 | """ディレクトリからのファイル取得テスト""" 134 | # テストファイルを作成 135 | test_files = [ 136 | os.path.join(self.test_dir, "test1.md"), 137 | os.path.join(self.test_dir, "test2.txt"), 138 | os.path.join(self.test_dir, "image.png"), 139 | os.path.join(self.test_dir, "ignore.pdf") 140 | ] 141 | 142 | for file_path in test_files: 143 | with open(file_path, 'w') as f: 144 | f.write("test content") 145 | 146 | # ノートファイルのみ取得 147 | note_files = file_operations.get_files(self.test_dir, "note") 148 | self.assertEqual(len(note_files), 2) 149 | self.assertTrue(any("test1.md" in f for f in note_files)) 150 | self.assertTrue(any("test2.txt" in f for f in note_files)) 151 | 152 | # 画像ファイルのみ取得 153 | image_files = file_operations.get_files(self.test_dir, "image") 154 | self.assertEqual(len(image_files), 1) 155 | self.assertTrue(any("image.png" in f for f in image_files)) 156 | 157 | def test_get_files_exclude_hidden(self): 158 | """隠しファイル除外のテスト""" 159 | # 隠しファイルを作成 160 | hidden_file = os.path.join(self.test_dir, ".hidden.md") 161 | normal_file = os.path.join(self.test_dir, "normal.md") 162 | 163 | with open(hidden_file, 'w') as f: 164 | f.write("hidden content") 165 | with open(normal_file, 'w') as f: 166 | f.write("normal content") 167 | 168 | result = file_operations.get_files(self.test_dir, "note") 169 | self.assertEqual(len(result), 1) 170 | self.assertTrue(any("normal.md" in f for f in result)) 171 | 172 | def test_get_files_exclude_directories(self): 173 | """除外ディレクトリのテスト""" 174 | # 除外対象のディレクトリを作成 175 | backup_dir = os.path.join(self.test_dir, "Backup") 176 | os.makedirs(backup_dir) 177 | 178 | backup_file = os.path.join(backup_dir, "backup.md") 179 | normal_file = os.path.join(self.test_dir, "normal.md") 180 | 181 | with open(backup_file, 'w') as f: 182 | f.write("backup content") 183 | with open(normal_file, 'w') as f: 184 | f.write("normal content") 185 | 186 | result = file_operations.get_files(self.test_dir, "note") 187 | self.assertEqual(len(result), 1) 188 | self.assertTrue(any("normal.md" in f for f in result)) 189 | 190 | def test_get_new_filepath_with_uid(self): 191 | """UID付きファイルパス生成のテスト""" 192 | test_file = os.path.join(self.test_dir, "test.md") 193 | with open(test_file, 'w') as f: 194 | f.write("test content") 195 | 196 | result = file_operations.get_new_filepath_with_uid(test_file, self.test_dir) 197 | 198 | # 結果がUUID形式であることを確認 199 | filename = os.path.basename(result) 200 | name, ext = os.path.splitext(filename) 201 | self.assertEqual(ext, ".md") 202 | self.assertEqual(len(name), 32) 203 | self.assertTrue(all(c in "0123456789abcdef" for c in name)) 204 | 205 | 206 | class TestYAMLFrontMatter(unittest.TestCase): 207 | """YAML Front Matter処理のテスト""" 208 | 209 | def setUp(self): 210 | """テスト用の一時ディレクトリを作成""" 211 | self.test_dir = tempfile.mkdtemp() 212 | self.addCleanup(shutil.rmtree, self.test_dir) 213 | 214 | def test_check_and_create_yfm_new_file(self): 215 | """新しいファイルへのYFM追加テスト""" 216 | # YFMなしのファイルを作成 217 | test_file = os.path.join(self.test_dir, "test.md") 218 | with open(test_file, 'w') as f: 219 | f.write("# Test Note\n\nThis is a test note with #tag1 and #tag2.") 220 | 221 | # loggerをモック 222 | import logging 223 | mock_logger = MagicMock() 224 | yfm_processor.logger = mock_logger 225 | 226 | try: 227 | # 関数を実行 228 | yfm_processor.check_and_create_yfm([test_file]) 229 | 230 | # ファイルを読み込んで確認 231 | with open(test_file, 'r') as f: 232 | content = f.read() 233 | 234 | # YFMが追加されていることを確認 235 | self.assertTrue(content.startswith("---\n")) 236 | self.assertIn("title: test", content) 237 | self.assertIn("aliases: []", content) 238 | self.assertIn("date:", content) 239 | self.assertIn("update:", content) 240 | # タグの確認(句読点が含まれる可能性があるため柔軟にチェック) 241 | self.assertTrue("tag1" in content and "tag2" in content) 242 | self.assertIn("draft: false", content) 243 | finally: 244 | # loggerをクリーンアップ 245 | if hasattr(yfm_processor, 'logger'): 246 | delattr(yfm_processor, 'logger') 247 | 248 | def test_check_and_create_yfm_existing_file(self): 249 | """既存YFMファイルの更新テスト""" 250 | # YFM付きのファイルを作成 251 | test_file = os.path.join(self.test_dir, "test.md") 252 | with open(test_file, 'w') as f: 253 | f.write("""--- 254 | title: Test 255 | date: 2021-01-01 00:00:00 256 | --- 257 | 258 | # Test Note 259 | 260 | Content here.""") 261 | 262 | # loggerをモック 263 | mock_logger = MagicMock() 264 | yfm_processor.logger = mock_logger 265 | 266 | try: 267 | # 関数を実行 268 | yfm_processor.check_and_create_yfm([test_file]) 269 | finally: 270 | if hasattr(yfm_processor, 'logger'): 271 | delattr(yfm_processor, 'logger') 272 | 273 | # ファイルを読み込んで確認 274 | with open(test_file, 'r') as f: 275 | content = f.read() 276 | 277 | # updateフィールドが追加されていることを確認 278 | self.assertIn("update:", content) 279 | 280 | def test_writing_lines_without_hashtags(self): 281 | """ハッシュタグ除去のテスト""" 282 | test_file = os.path.join(self.test_dir, "test.md") 283 | lines = [ 284 | "---\n", 285 | "title: Test\n", 286 | "---\n", 287 | "\n", 288 | "# Title\n", 289 | "\n", 290 | "This is content with #tag1\n", 291 | "#standalone_hashtag\n", 292 | "More content\n", 293 | "\n" 294 | ] 295 | 296 | # loggerをモック 297 | mock_logger = MagicMock() 298 | yfm_processor.logger = mock_logger 299 | 300 | try: 301 | yfm_processor.writing_lines_without_hashtags(test_file, lines) 302 | finally: 303 | if hasattr(yfm_processor, 'logger'): 304 | delattr(yfm_processor, 'logger') 305 | 306 | with open(test_file, 'r') as f: 307 | content = f.read() 308 | 309 | # ハッシュタグ行が除去されていることを確認 310 | self.assertNotIn("#standalone_hashtag", content) 311 | # インラインハッシュタグは残ることを確認 312 | self.assertIn("This is content with #tag1", content) 313 | 314 | 315 | class TestLinkSubstitution(unittest.TestCase): 316 | """リンク置換機能のテスト""" 317 | 318 | def setUp(self): 319 | """テスト用の一時ディレクトリを作成""" 320 | self.test_dir = tempfile.mkdtemp() 321 | self.addCleanup(shutil.rmtree, self.test_dir) 322 | 323 | def test_substitute_wikilinks_to_markdown_links(self): 324 | """Wikilink→Markdownリンク置換のテスト""" 325 | # テストファイルを作成 326 | source_file = os.path.join(self.test_dir, "source.md") 327 | target_file = os.path.join(self.test_dir, "abcdef0123456789abcdef0123456789.md") 328 | link_file = os.path.join(self.test_dir, "linking.md") 329 | 330 | with open(source_file, 'w') as f: 331 | f.write("Source content") 332 | 333 | with open(target_file, 'w') as f: 334 | f.write("Target content") 335 | 336 | with open(link_file, 'w') as f: 337 | f.write("""# Linking Note 338 | 339 | This note links to [[source]] and [[source.md]]. 340 | Also links to [[source | alias text]]. 341 | And markdown links [source](source.md). 342 | """) 343 | 344 | # loggerをモック 345 | mock_logger = MagicMock() 346 | link_processor.logger = mock_logger 347 | 348 | try: 349 | # リンク置換を実行 350 | with patch('zettelkasten_normalizer.file_operations.get_files') as mock_get_files: 351 | mock_get_files.return_value = [link_file] 352 | result = link_processor.substitute_wikilinks_to_markdown_links( 353 | source_file, target_file, self.test_dir 354 | ) 355 | finally: 356 | if hasattr(link_processor, 'logger'): 357 | delattr(link_processor, 'logger') 358 | 359 | # ファイルを読み込んで確認 360 | with open(link_file, 'r') as f: 361 | content = f.read() 362 | 363 | # リンクが置換されていることを確認 364 | target_filename = os.path.basename(target_file) 365 | self.assertIn(f"[source]({target_filename})", content) 366 | self.assertIn(f"[alias text]({target_filename})", content) 367 | self.assertTrue(result) # 置換が行われたことを確認 368 | 369 | 370 | class TestMainFunctions(unittest.TestCase): 371 | """メイン機能のテスト""" 372 | 373 | def setUp(self): 374 | """テスト用の一時ディレクトリを作成""" 375 | self.test_dir = tempfile.mkdtemp() 376 | self.addCleanup(shutil.rmtree, self.test_dir) 377 | 378 | def test_rename_notes_with_links(self): 379 | """ノートリネーム機能のテスト""" 380 | # テストファイルを作成 381 | test_file = os.path.join(self.test_dir, "test_note.md") 382 | with open(test_file, 'w') as f: 383 | f.write("""--- 384 | title: Test Note 385 | --- 386 | 387 | # Test Note 388 | 389 | This is a test note.""") 390 | 391 | # loggerをモック 392 | mock_logger = MagicMock() 393 | link_processor.logger = mock_logger 394 | 395 | try: 396 | # rename_notes_with_links を実行 397 | with patch('zettelkasten_normalizer.link_processor.substitute_wikilinks_to_markdown_links') as mock_substitute: 398 | mock_substitute.return_value = False 399 | link_processor.rename_notes_with_links([test_file], self.test_dir) 400 | finally: 401 | if hasattr(link_processor, 'logger'): 402 | delattr(link_processor, 'logger') 403 | 404 | # 元のファイルが存在しないことを確認 405 | self.assertFalse(os.path.exists(test_file)) 406 | 407 | # UUIDファイルが作成されていることを確認 408 | files = os.listdir(self.test_dir) 409 | uuid_files = [f for f in files if len(os.path.splitext(f)[0]) == 32] 410 | self.assertEqual(len(uuid_files), 1) 411 | 412 | # 新しいファイルにUIDが追加されていることを確認 413 | new_file = os.path.join(self.test_dir, uuid_files[0]) 414 | with open(new_file, 'r') as f: 415 | content = f.read() 416 | self.assertIn("uid:", content) 417 | 418 | def test_query_yes_no(self): 419 | """ユーザー入力確認のテスト""" 420 | # "yes"の場合 421 | with patch('builtins.input', return_value='yes'): 422 | result = utils.query_yes_no("Test question?") 423 | self.assertTrue(result) 424 | 425 | # "no"の場合 426 | with patch('builtins.input', return_value='no'): 427 | result = utils.query_yes_no("Test question?") 428 | self.assertFalse(result) 429 | 430 | # デフォルト値(空入力)の場合 431 | with patch('builtins.input', return_value=''): 432 | result = utils.query_yes_no("Test question?", default="yes") 433 | self.assertTrue(result) 434 | 435 | 436 | class TestArgumentParsing(unittest.TestCase): 437 | """引数解析のテスト""" 438 | 439 | def test_argument_parsing(self): 440 | """引数解析のテスト""" 441 | # sys.argvを一時的に変更 442 | original_argv = sys.argv 443 | try: 444 | sys.argv = ['normalization_zettel.py', '/test/path', '-t', '/test/target', '-y', '-f', 'toml'] 445 | 446 | # パーサーを再作成 447 | from zettelkasten_normalizer import normalization_zettel 448 | args = normalization_zettel.parse_arguments() 449 | 450 | # 引数が正しく解析されることを確認 451 | self.assertEqual(args.root, '/test/path') 452 | self.assertEqual(args.target, '/test/target') 453 | self.assertTrue(args.yes) 454 | self.assertEqual(args.format, 'toml') 455 | # デフォルトでは全ての機能が有効(skipオプションがFalse) 456 | self.assertFalse(args.skip_frontmatter) 457 | self.assertFalse(args.skip_rename_notes) 458 | self.assertFalse(args.skip_rename_images) 459 | 460 | finally: 461 | sys.argv = original_argv 462 | 463 | def test_argument_parsing_with_skip_options(self): 464 | """スキップオプション付き引数解析のテスト""" 465 | original_argv = sys.argv 466 | try: 467 | sys.argv = ['normalization_zettel.py', '/test/path', '--skip-frontmatter', '--skip-rename-images'] 468 | 469 | from zettelkasten_normalizer import normalization_zettel 470 | args = normalization_zettel.parse_arguments() 471 | 472 | # スキップオプションが正しく解析されることを確認 473 | self.assertEqual(args.root, '/test/path') 474 | self.assertTrue(args.skip_frontmatter) 475 | self.assertFalse(args.skip_rename_notes) # 指定されていないのでFalse 476 | self.assertTrue(args.skip_rename_images) 477 | 478 | finally: 479 | sys.argv = original_argv 480 | 481 | def test_get_execution_functions(self): 482 | """実行関数設定のテスト""" 483 | from zettelkasten_normalizer import normalization_zettel 484 | 485 | # モックargs作成 486 | class MockArgs: 487 | def __init__(self, skip_frontmatter=False, skip_rename_notes=False, skip_rename_images=False): 488 | self.skip_frontmatter = skip_frontmatter 489 | self.skip_rename_notes = skip_rename_notes 490 | self.skip_rename_images = skip_rename_images 491 | 492 | # デフォルト設定(コマンドライン引数なし) 493 | # config.pyのEXECUTION_FUNCTION_LISTがすべてTrueの場合 494 | args = MockArgs() 495 | execution_functions = normalization_zettel.get_execution_functions(args) 496 | self.assertTrue(execution_functions["function_create_yfm"]) 497 | self.assertTrue(execution_functions["function_rename_notes"]) 498 | self.assertTrue(execution_functions["function_rename_images"]) 499 | 500 | # フロントマター処理をスキップ(コマンドライン引数で上書き) 501 | args = MockArgs(skip_frontmatter=True) 502 | execution_functions = normalization_zettel.get_execution_functions(args) 503 | self.assertFalse(execution_functions["function_create_yfm"]) 504 | self.assertTrue(execution_functions["function_rename_notes"]) 505 | self.assertTrue(execution_functions["function_rename_images"]) 506 | 507 | # ノートリネームと画像リネームをスキップ 508 | args = MockArgs(skip_rename_notes=True, skip_rename_images=True) 509 | execution_functions = normalization_zettel.get_execution_functions(args) 510 | self.assertTrue(execution_functions["function_create_yfm"]) 511 | self.assertFalse(execution_functions["function_rename_notes"]) 512 | self.assertFalse(execution_functions["function_rename_images"]) 513 | 514 | # 全機能をスキップ 515 | args = MockArgs(skip_frontmatter=True, skip_rename_notes=True, skip_rename_images=True) 516 | execution_functions = normalization_zettel.get_execution_functions(args) 517 | self.assertFalse(execution_functions["function_create_yfm"]) 518 | self.assertFalse(execution_functions["function_rename_notes"]) 519 | self.assertFalse(execution_functions["function_rename_images"]) 520 | 521 | def test_get_execution_functions_with_config_override(self): 522 | """config設定とコマンドライン引数の組み合わせテスト""" 523 | from zettelkasten_normalizer import normalization_zettel, config 524 | 525 | # 元の設定を保存 526 | original_config = config.EXECUTION_FUNCTION_LIST.copy() 527 | 528 | try: 529 | # config.pyで一部機能を無効にした場合 530 | config.EXECUTION_FUNCTION_LIST["function_create_yfm"] = False 531 | config.EXECUTION_FUNCTION_LIST["function_rename_notes"] = True 532 | config.EXECUTION_FUNCTION_LIST["function_rename_images"] = True 533 | 534 | class MockArgs: 535 | def __init__(self, skip_frontmatter=False, skip_rename_notes=False, skip_rename_images=False): 536 | self.skip_frontmatter = skip_frontmatter 537 | self.skip_rename_notes = skip_rename_notes 538 | self.skip_rename_images = skip_rename_images 539 | 540 | # コマンドライン引数なし(config設定を使用) 541 | args = MockArgs() 542 | execution_functions = normalization_zettel.get_execution_functions(args) 543 | self.assertFalse(execution_functions["function_create_yfm"]) # configでFalse 544 | self.assertTrue(execution_functions["function_rename_notes"]) # configでTrue 545 | self.assertTrue(execution_functions["function_rename_images"]) # configでTrue 546 | 547 | # configでFalseだが、コマンドライン引数でさらに別の機能をスキップ 548 | args = MockArgs(skip_rename_notes=True) 549 | execution_functions = normalization_zettel.get_execution_functions(args) 550 | self.assertFalse(execution_functions["function_create_yfm"]) # configでFalse 551 | self.assertFalse(execution_functions["function_rename_notes"]) # 引数でFalse 552 | self.assertTrue(execution_functions["function_rename_images"]) # configでTrue、引数指定なし 553 | 554 | finally: 555 | # 設定を元に戻す 556 | config.EXECUTION_FUNCTION_LIST.update(original_config) 557 | 558 | 559 | class TestFrontMatterParser(unittest.TestCase): 560 | """フロントマターパーサーのテスト""" 561 | 562 | def test_yaml_parser(self): 563 | """YAMLパーサーのテスト""" 564 | parser = frontmatter_parser.FrontMatterParser("yaml") 565 | 566 | # YAML形式のコンテンツ 567 | content = """--- 568 | title: Test Note 569 | tags: [test, example] 570 | draft: false 571 | --- 572 | 573 | # Test Content 574 | 575 | This is a test note.""" 576 | 577 | metadata, body = parser.parse_frontmatter(content) 578 | 579 | self.assertIsNotNone(metadata) 580 | self.assertEqual(metadata["title"], "Test Note") 581 | self.assertEqual(metadata["tags"], "[test, example]") 582 | self.assertEqual(metadata["draft"], "false") 583 | self.assertIn("# Test Content", body) 584 | 585 | def test_toml_parser(self): 586 | """TOMLパーサーのテスト(利用可能な場合)""" 587 | try: 588 | parser = frontmatter_parser.FrontMatterParser("toml") 589 | except ImportError: 590 | self.skipTest("TOML parser not available") 591 | 592 | # TOML形式のコンテンツ 593 | content = """+++ 594 | title = "Test Note" 595 | tags = ["test", "example"] 596 | draft = false 597 | +++ 598 | 599 | # Test Content 600 | 601 | This is a test note.""" 602 | 603 | metadata, body = parser.parse_frontmatter(content) 604 | 605 | self.assertIsNotNone(metadata) 606 | self.assertEqual(metadata["title"], "Test Note") 607 | self.assertEqual(metadata["tags"], ["test", "example"]) 608 | self.assertEqual(metadata["draft"], False) 609 | self.assertIn("# Test Content", body) 610 | 611 | def test_json_parser(self): 612 | """JSONパーサーのテスト""" 613 | parser = frontmatter_parser.FrontMatterParser("json") 614 | 615 | # JSON形式のコンテンツ 616 | content = """{ 617 | "title": "Test Note", 618 | "tags": ["test", "example"], 619 | "draft": false 620 | } 621 | 622 | # Test Content 623 | 624 | This is a test note.""" 625 | 626 | metadata, body = parser.parse_frontmatter(content) 627 | 628 | self.assertIsNotNone(metadata) 629 | self.assertEqual(metadata["title"], "Test Note") 630 | self.assertEqual(metadata["tags"], ["test", "example"]) 631 | self.assertEqual(metadata["draft"], False) 632 | self.assertIn("# Test Content", body) 633 | 634 | def test_format_detection(self): 635 | """フォーマット検出のテスト""" 636 | parser = frontmatter_parser.FrontMatterParser("yaml") 637 | 638 | # YAML 639 | yaml_content = "---\ntitle: test\n---\ncontent" 640 | self.assertEqual(parser.detect_format(yaml_content), "yaml") 641 | 642 | # TOML 643 | toml_content = "+++\ntitle = \"test\"\n+++\ncontent" 644 | self.assertEqual(parser.detect_format(toml_content), "toml") 645 | 646 | # JSON 647 | json_content = '{\n "title": "test"\n}\ncontent' 648 | self.assertEqual(parser.detect_format(json_content), "json") 649 | 650 | # No front matter 651 | plain_content = "# Title\nRegular content" 652 | self.assertIsNone(parser.detect_format(plain_content)) 653 | 654 | def test_yaml_serialization(self): 655 | """YAMLシリアライゼーションのテスト""" 656 | parser = frontmatter_parser.FrontMatterParser("yaml") 657 | 658 | metadata = { 659 | "title": "Test Note", 660 | "tags": "[test, example]", 661 | "draft": "false" 662 | } 663 | content = "# Test Content\n\nThis is a test note." 664 | 665 | result = parser.serialize_frontmatter(metadata, content) 666 | 667 | self.assertIn("---", result) 668 | self.assertIn("title: Test Note", result) 669 | self.assertIn("tags: [test, example]", result) 670 | self.assertIn("draft: false", result) 671 | self.assertIn("# Test Content", result) 672 | 673 | def test_toml_serialization(self): 674 | """TOMLシリアライゼーションのテスト(利用可能な場合)""" 675 | try: 676 | parser = frontmatter_parser.FrontMatterParser("toml") 677 | except ImportError: 678 | self.skipTest("TOML parser not available") 679 | 680 | metadata = { 681 | "title": "Test Note", 682 | "tags": "[test, example]", 683 | "draft": "false" 684 | } 685 | content = "# Test Content\n\nThis is a test note." 686 | 687 | result = parser.serialize_frontmatter(metadata, content) 688 | 689 | self.assertIn("+++", result) 690 | self.assertIn('title = "Test Note"', result) 691 | self.assertIn('tags = [test, example]', result) 692 | self.assertIn('draft = false', result) 693 | self.assertIn("# Test Content", result) 694 | 695 | def test_json_serialization(self): 696 | """JSONシリアライゼーションのテスト""" 697 | parser = frontmatter_parser.FrontMatterParser("json") 698 | 699 | metadata = { 700 | "title": "Test Note", 701 | "tags": "[\"test\", \"example\"]", 702 | "draft": "false" 703 | } 704 | content = "# Test Content\n\nThis is a test note." 705 | 706 | result = parser.serialize_frontmatter(metadata, content) 707 | 708 | self.assertIn('"title": "Test Note"', result) 709 | # JSONの配列は複数行で表示される可能性があるため、柔軟にチェック 710 | self.assertIn('"tags":', result) 711 | self.assertIn('"test"', result) 712 | self.assertIn('"example"', result) 713 | self.assertIn('"draft": false', result) 714 | self.assertIn("# Test Content", result) 715 | 716 | 717 | class TestFrontMatterIntegration(unittest.TestCase): 718 | """フロントマター統合テスト""" 719 | 720 | def setUp(self): 721 | """テスト用の一時ディレクトリを作成""" 722 | self.test_dir = tempfile.mkdtemp() 723 | self.addCleanup(shutil.rmtree, self.test_dir) 724 | 725 | def test_toml_frontmatter_creation(self): 726 | """TOMLフロントマター作成のテスト(利用可能な場合)""" 727 | try: 728 | frontmatter_parser.FrontMatterParser("toml") 729 | except ImportError: 730 | self.skipTest("TOML parser not available") 731 | 732 | # TOMLフロントマターなしのファイルを作成 733 | test_file = os.path.join(self.test_dir, "test.md") 734 | with open(test_file, 'w') as f: 735 | f.write("# Test Note\n\nThis is a test note with #tag1.") 736 | 737 | # loggerをモック 738 | mock_logger = MagicMock() 739 | yfm_processor.logger = mock_logger 740 | 741 | try: 742 | # TOMLフロントマターでフロントマター作成を実行 743 | yfm_processor.check_and_create_yfm([test_file], "toml") 744 | 745 | # ファイルを読み込んで確認 746 | with open(test_file, 'r') as f: 747 | content = f.read() 748 | 749 | # TOMLフロントマターが追加されていることを確認 750 | self.assertTrue(content.startswith("+++\n")) 751 | self.assertIn('title = "test"', content) 752 | self.assertIn('aliases = []', content) 753 | self.assertIn('draft = false', content) 754 | 755 | finally: 756 | if hasattr(yfm_processor, 'logger'): 757 | delattr(yfm_processor, 'logger') 758 | 759 | def test_json_frontmatter_creation(self): 760 | """JSONフロントマター作成のテスト""" 761 | # JSONフロントマターなしのファイルを作成 762 | test_file = os.path.join(self.test_dir, "test.md") 763 | with open(test_file, 'w') as f: 764 | f.write("# Test Note\n\nThis is a test note with #tag1.") 765 | 766 | # loggerをモック 767 | mock_logger = MagicMock() 768 | yfm_processor.logger = mock_logger 769 | 770 | try: 771 | # JSONフロントマターでフロントマター作成を実行 772 | yfm_processor.check_and_create_yfm([test_file], "json") 773 | 774 | # ファイルを読み込んで確認 775 | with open(test_file, 'r') as f: 776 | content = f.read() 777 | 778 | # JSONフロントマターが追加されていることを確認 779 | self.assertTrue(content.startswith("{\n")) 780 | self.assertIn('"title": "test"', content) 781 | self.assertIn('"aliases": []', content) 782 | self.assertIn('"draft": false', content) 783 | 784 | finally: 785 | if hasattr(yfm_processor, 'logger'): 786 | delattr(yfm_processor, 'logger') 787 | 788 | 789 | class TestCrossPlatformSupport(unittest.TestCase): 790 | """クロスプラットフォーム対応のテスト""" 791 | 792 | def setUp(self): 793 | """テスト用の一時ディレクトリを作成""" 794 | self.test_dir = tempfile.mkdtemp() 795 | self.addCleanup(shutil.rmtree, self.test_dir) 796 | 797 | def test_normalize_line_endings(self): 798 | """改行コード正規化のテスト""" 799 | # Windows CRLF 800 | windows_content = "line1\r\nline2\r\nline3\r\n" 801 | normalized = utils.normalize_line_endings(windows_content) 802 | self.assertEqual(normalized, "line1\nline2\nline3\n") 803 | 804 | # Old Mac CR 805 | mac_content = "line1\rline2\rline3\r" 806 | normalized = utils.normalize_line_endings(mac_content) 807 | self.assertEqual(normalized, "line1\nline2\nline3\n") 808 | 809 | # Unix LF (should remain unchanged) 810 | unix_content = "line1\nline2\nline3\n" 811 | normalized = utils.normalize_line_endings(unix_content) 812 | self.assertEqual(normalized, "line1\nline2\nline3\n") 813 | 814 | # Mixed line endings 815 | mixed_content = "line1\r\nline2\nline3\r" 816 | normalized = utils.normalize_line_endings(mixed_content) 817 | self.assertEqual(normalized, "line1\nline2\nline3\n") 818 | 819 | def test_cross_platform_file_operations(self): 820 | """クロスプラットフォームファイル操作のテスト""" 821 | test_file = os.path.join(self.test_dir, "test.md") 822 | 823 | # Windows style content with CRLF 824 | original_content = "# Test\r\n\r\nThis is a test with CRLF line endings.\r\n" 825 | 826 | # Write and read back 827 | utils.write_file_cross_platform(test_file, original_content) 828 | read_content = utils.read_file_cross_platform(test_file) 829 | 830 | # Content should be normalized to LF 831 | expected_content = "# Test\n\nThis is a test with CRLF line endings.\n" 832 | self.assertEqual(read_content, expected_content) 833 | 834 | def test_path_normalization(self): 835 | """パス正規化のテスト""" 836 | # Test various path separators 837 | if os.name == 'nt': # Windows 838 | # Forward slashes should be converted to backslashes on Windows 839 | path = "path/to/file.md" 840 | normalized = utils.normalize_path(path) 841 | self.assertEqual(normalized, "path\\to\\file.md") 842 | else: # Unix-like systems 843 | # Backslashes should be preserved but path should be normalized 844 | path = "path/to/../to/file.md" 845 | normalized = utils.normalize_path(path) 846 | self.assertEqual(normalized, "path/to/file.md") 847 | 848 | # Test with double separators 849 | path_with_double = "path//to//file.md" 850 | normalized = utils.normalize_path(path_with_double) 851 | expected_sep = utils.get_platform_path_separator() 852 | expected = f"path{expected_sep}to{expected_sep}file.md" 853 | self.assertEqual(normalized, expected) 854 | 855 | def test_frontmatter_with_different_line_endings(self): 856 | """異なる改行コードでのフロントマター処理テスト""" 857 | test_file = os.path.join(self.test_dir, "test.md") 858 | 859 | # Create content with Windows line endings 860 | content_crlf = "# Test Note\r\n\r\nThis is a test note with #tag1.\r\n" 861 | 862 | # Write file with Windows line endings 863 | with open(test_file, 'w', newline='\r\n') as f: 864 | f.write(content_crlf) 865 | 866 | # loggerをモック 867 | mock_logger = MagicMock() 868 | yfm_processor.logger = mock_logger 869 | 870 | try: 871 | # フロントマター作成を実行 872 | yfm_processor.check_and_create_yfm([test_file], "yaml") 873 | 874 | # ファイルを読み込んで確認 875 | result_content = utils.read_file_cross_platform(test_file) 876 | 877 | # YAMLフロントマターが追加されていることを確認 878 | self.assertTrue(result_content.startswith("---\n")) 879 | self.assertIn("title: test", result_content) 880 | self.assertIn("tag1", result_content) 881 | 882 | finally: 883 | if hasattr(yfm_processor, 'logger'): 884 | delattr(yfm_processor, 'logger') 885 | 886 | def test_unicode_file_handling(self): 887 | """Unicode文字を含むファイルの処理テスト""" 888 | test_file = os.path.join(self.test_dir, "日本語ファイル.md") 889 | 890 | # Unicode content with Japanese characters 891 | unicode_content = "# テストノート\n\n日本語のコンテンツです。\n" 892 | 893 | # Write and read back 894 | utils.write_file_cross_platform(test_file, unicode_content) 895 | read_content = utils.read_file_cross_platform(test_file) 896 | 897 | self.assertEqual(read_content, unicode_content) 898 | 899 | 900 | if __name__ == '__main__': 901 | # テストの実行 902 | unittest.main(verbosity=2) --------------------------------------------------------------------------------