├── .gitignore ├── .prettierignore ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── custom_pygments_style.py ├── favicon.ico ├── modd.conf ├── pyproject.toml ├── requirements.txt ├── run.py └── templates ├── atom.xml ├── dir.html ├── feed.svg ├── footer.html ├── index.html ├── lastweek.html ├── out.svg ├── page.html ├── search.html └── style.css /.gitignore: -------------------------------------------------------------------------------- 1 | output 2 | .venv 3 | .DS_Store 4 | __pycache__ 5 | .mise.toml 6 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | # these templated HTML files can break if prettier tries to format them 2 | templates/*.html 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CDN_BUCKET = obsidian_html 2 | MDPATH ?= "~/Library/Mobile Documents/iCloud~md~obsidian/Documents/personal" 3 | 4 | build: requirements 5 | .venv/bin/python run.py --path ${MDPATH} --use-git-times --feed link_blog --feed music_blog --feed blog 6 | cp favicon.ico output/ 7 | 8 | # only for use in dev, for quick iteration 9 | build-quick: 10 | .venv/bin/python run.py --path ${MDPATH} --feed link_blog --feed music_blog --feed blog 11 | 12 | requirements: 13 | if [ ! -d ".venv" ]; then python -mvenv .venv; fi 14 | .venv/bin/pip install -r requirements.txt 15 | 16 | clean: 17 | rm -rf output 18 | 19 | pull: 20 | git pull 21 | 22 | serve: 23 | modd 24 | 25 | sync: 26 | s3cmd sync --no-mime-magic --guess-mime-type --acl-public --no-preserve \ 27 | output/ s3://llimllib/${CDN_BUCKET}/ 28 | 29 | # flush the digital ocean CDN cache 30 | flush: 31 | doctl compute cdn flush \ 32 | $$(doctl compute cdn list --format ID | tail -n1) \ 33 | --files ${CDN_BUCKET}/* 34 | 35 | publish: pull build sync flush 36 | 37 | .PHONY: build build-quick clean pull requirements serve sync flush publish 38 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | mdx-linkify = "*" 8 | python-markdown-math = "*" 9 | pygments = "*" 10 | jinja2 = "*" 11 | strict-rfc3339 = "*" 12 | markdown-it-py = "*" 13 | mdit-py-plugins = "*" 14 | linkify-it-py = "*" 15 | 16 | [dev-packages] 17 | 18 | [requires] 19 | python_version = "3.10" 20 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "5981f8ede38e1d355626505b39514eb69fcb3ab575f54e1b2c1137fc1a233393" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.10" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "bleach": { 20 | "hashes": [ 21 | "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414", 22 | "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4" 23 | ], 24 | "markers": "python_version >= '3.7'", 25 | "version": "==6.0.0" 26 | }, 27 | "jinja2": { 28 | "hashes": [ 29 | "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", 30 | "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" 31 | ], 32 | "index": "pypi", 33 | "version": "==3.1.2" 34 | }, 35 | "linkify-it-py": { 36 | "hashes": [ 37 | "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2", 38 | "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541" 39 | ], 40 | "index": "pypi", 41 | "version": "==2.0.2" 42 | }, 43 | "markdown": { 44 | "hashes": [ 45 | "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2", 46 | "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520" 47 | ], 48 | "markers": "python_version >= '3.7'", 49 | "version": "==3.4.3" 50 | }, 51 | "markdown-it-py": { 52 | "hashes": [ 53 | "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", 54 | "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb" 55 | ], 56 | "index": "pypi", 57 | "version": "==3.0.0" 58 | }, 59 | "markupsafe": { 60 | "hashes": [ 61 | "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e", 62 | "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e", 63 | "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431", 64 | "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686", 65 | "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559", 66 | "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc", 67 | "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c", 68 | "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0", 69 | "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4", 70 | "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9", 71 | "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575", 72 | "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba", 73 | "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d", 74 | "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3", 75 | "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00", 76 | "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155", 77 | "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac", 78 | "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52", 79 | "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f", 80 | "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8", 81 | "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b", 82 | "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24", 83 | "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea", 84 | "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198", 85 | "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0", 86 | "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee", 87 | "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be", 88 | "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2", 89 | "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707", 90 | "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6", 91 | "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58", 92 | "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779", 93 | "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636", 94 | "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c", 95 | "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad", 96 | "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee", 97 | "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc", 98 | "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2", 99 | "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48", 100 | "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7", 101 | "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e", 102 | "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b", 103 | "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa", 104 | "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5", 105 | "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e", 106 | "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb", 107 | "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9", 108 | "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57", 109 | "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc", 110 | "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2" 111 | ], 112 | "markers": "python_version >= '3.7'", 113 | "version": "==2.1.3" 114 | }, 115 | "mdit-py-plugins": { 116 | "hashes": [ 117 | "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9", 118 | "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b" 119 | ], 120 | "index": "pypi", 121 | "version": "==0.4.0" 122 | }, 123 | "mdurl": { 124 | "hashes": [ 125 | "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", 126 | "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" 127 | ], 128 | "markers": "python_version >= '3.7'", 129 | "version": "==0.1.2" 130 | }, 131 | "mdx-linkify": { 132 | "hashes": [ 133 | "sha256:e09278e43e5076b63398238b069a361913779683183481e9206235667cd89f54" 134 | ], 135 | "index": "pypi", 136 | "version": "==2.1" 137 | }, 138 | "pygments": { 139 | "hashes": [ 140 | "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c", 141 | "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1" 142 | ], 143 | "index": "pypi", 144 | "version": "==2.15.1" 145 | }, 146 | "python-markdown-math": { 147 | "hashes": [ 148 | "sha256:8564212af679fc18d53f38681f16080fcd3d186073f23825c7ce86fadd3e3635", 149 | "sha256:c685249d84b5b697e9114d7beb352bd8ca2e07fd268fd4057ffca888c14641e5" 150 | ], 151 | "index": "pypi", 152 | "version": "==0.8" 153 | }, 154 | "six": { 155 | "hashes": [ 156 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 157 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 158 | ], 159 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 160 | "version": "==1.16.0" 161 | }, 162 | "strict-rfc3339": { 163 | "hashes": [ 164 | "sha256:5cad17bedfc3af57b399db0fed32771f18fc54bbd917e85546088607ac5e1277" 165 | ], 166 | "index": "pypi", 167 | "version": "==0.7" 168 | }, 169 | "uc-micro-py": { 170 | "hashes": [ 171 | "sha256:30ae2ac9c49f39ac6dce743bd187fcd2b574b16ca095fa74cd9396795c954c54", 172 | "sha256:8c9110c309db9d9e87302e2f4ad2c3152770930d88ab385cd544e7a7e75f3de0" 173 | ], 174 | "markers": "python_version >= '3.7'", 175 | "version": "==1.0.2" 176 | }, 177 | "webencodings": { 178 | "hashes": [ 179 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", 180 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" 181 | ], 182 | "version": "==0.5.1" 183 | } 184 | }, 185 | "develop": {} 186 | } 187 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # obsidian_notes 2 | 3 | My personal code for turning my [obsidian](https://obsidian.md/) notes into a website. Extremely incomplete and hacked together, but feel free to take anything useful from it. 4 | 5 | You can visit the site at http://notes.billmill.org 6 | 7 | I deploy this with `make publish`, run via a github action in my notes repo. 8 | -------------------------------------------------------------------------------- /custom_pygments_style.py: -------------------------------------------------------------------------------- 1 | """ 2 | pygments.styles.default 3 | ~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | The default highlighting style. 6 | 7 | :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS. 8 | :license: BSD, see LICENSE for details. 9 | """ 10 | 11 | from pygments.style import Style 12 | from pygments.token import ( 13 | Keyword, 14 | Name, 15 | Comment, 16 | String, 17 | Error, 18 | Number, 19 | Operator, 20 | Generic, 21 | Whitespace, 22 | ) 23 | 24 | 25 | class LlimllibStyle(Style): 26 | """ 27 | llimllib's custom style 28 | """ 29 | 30 | name = "default" 31 | 32 | background_color = "#f8f8f8" 33 | 34 | styles = { 35 | Whitespace: "#bbbbbb", 36 | Comment: "italic #3D7B7B", 37 | Comment.Preproc: "noitalic #9C6500", 38 | Keyword: "bold #C500CC", 39 | Keyword.Pseudo: "nobold", 40 | Keyword.Type: "nobold #B00040", 41 | Operator: "#666666", 42 | Operator.Word: "bold #AA22FF", 43 | Name.Builtin: "#007093", 44 | Name.Function: "#4D44BB", 45 | Name.Class: "bold #4D44BB", 46 | Name.Namespace: "bold #4D44BB", 47 | Name.Exception: "bold #CB3F38", 48 | Name.Variable: "#19177C", 49 | Name.Constant: "#880000", 50 | Name.Label: "#767600", 51 | Name.Entity: "bold #717171", 52 | Name.Attribute: "#687822", 53 | Name.Tag: "bold #C500CC", 54 | Name.Decorator: "#AA22FF", 55 | String: "#4D44BB", 56 | String.Doc: "italic", 57 | String.Interpol: "bold #A45A77", 58 | String.Escape: "bold #AA5D1F", 59 | String.Regex: "#A45A77", 60 | # String.Symbol: "#B8860B", 61 | String.Symbol: "#19177C", 62 | String.Other: "#C500CC", 63 | Number: "#666666", 64 | Generic.Heading: "bold #000080", 65 | Generic.Subheading: "bold #800080", 66 | Generic.Deleted: "#A00000", 67 | Generic.Inserted: "#008400", 68 | Generic.Error: "#E40000", 69 | Generic.Emph: "italic", 70 | Generic.Strong: "bold", 71 | Generic.EmphStrong: "bold italic", 72 | Generic.Prompt: "bold #000080", 73 | Generic.Output: "#717171", 74 | Generic.Traceback: "#04D", 75 | Error: "border:#FF0000", 76 | } 77 | -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llimllib/obsidian_notes/c93b9b5c46fe4f08d51a0cbeddb588d4f3682969/favicon.ico -------------------------------------------------------------------------------- /modd.conf: -------------------------------------------------------------------------------- 1 | **/*.py templates/**/*.html templates/**/*.css templates/**/*.xml { 2 | prep: make clean build-quick 3 | daemon +sigterm: devd -m ./output 4 | } 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # https://microsoft.github.io/pyright/#/configuration 2 | [tool.pyright] 3 | include = ["."] 4 | exclude = [] 5 | ignore = [] 6 | # defineConstant = { DEBUG = true } 7 | # stubPath = "src/stubs" 8 | venv = ".venv" 9 | 10 | reportMissingImports = true 11 | reportMissingTypeStubs = false 12 | 13 | pythonVersion = "3.10" 14 | pythonPlatform = "Darwin" 15 | 16 | executionEnvironments = [ 17 | { root = "src" } 18 | ] 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | Jinja2 3 | linkify-it-py 4 | markdown-it-py 5 | mdit-py-plugins 6 | pygments 7 | python-magic 8 | pyyaml 9 | strict-rfc3339 10 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from custom_pygments_style import LlimllibStyle 3 | 4 | import argparse 5 | from collections import defaultdict 6 | from dataclasses import dataclass 7 | from datetime import datetime 8 | from html import escape 9 | import os 10 | from pathlib import Path 11 | import re 12 | import subprocess 13 | import shutil 14 | import sys 15 | from time import strftime, localtime, time 16 | from typing import Any, Callable, DefaultDict, Generator, Optional 17 | 18 | from bs4 import BeautifulSoup 19 | from jinja2 import Environment, FileSystemLoader 20 | from markdown_it import MarkdownIt 21 | import yaml 22 | 23 | from mdit_py_plugins.anchors import anchors_plugin 24 | from mdit_py_plugins.footnote import footnote_plugin 25 | from mdit_py_plugins.front_matter import front_matter_plugin 26 | from pygments import highlight as pygmentize 27 | from pygments.lexers import get_lexer_by_name 28 | from pygments.formatters import HtmlFormatter 29 | 30 | from strict_rfc3339 import timestamp_to_rfc3339_utcoffset, rfc3339_to_timestamp 31 | 32 | JINJA = Environment(loader=FileSystemLoader("templates")) 33 | 34 | 35 | @dataclass 36 | class GitStat: 37 | st_mtime: float 38 | st_ctime: float 39 | 40 | 41 | @dataclass 42 | class FileStat: 43 | st_mtime: float 44 | st_ctime: float 45 | 46 | 47 | @dataclass 48 | class Attachment: 49 | title: str 50 | canon_title: str 51 | fullpath: str 52 | link_path: str 53 | file: str 54 | relpath: str 55 | links: list[str] 56 | backlinks: set["Page | Attachment"] 57 | 58 | def __eq__(self, b) -> bool: 59 | return b.title == self.title 60 | 61 | def __hash__(self) -> int: 62 | return hash(("title", self.title)) 63 | 64 | 65 | @dataclass 66 | class Page: 67 | # the page's title 68 | title: str 69 | canon_title: str 70 | 71 | # the file path 72 | file: str 73 | 74 | # the relative path to the markdown source 75 | relpath: str 76 | 77 | # the absolute path to the markdown source 78 | fullpath: str 79 | 80 | # titlepath is the relative path and the file name of the page stripped of 81 | # its extension, for example it might be 'visualization/bar_charts' 82 | titlepath: str 83 | link_path: str 84 | 85 | # links is a list of (what, exactly?) 86 | links: list[str] 87 | 88 | backlinks: set["Page | Attachment"] 89 | 90 | frontmatter: dict[str, Any] 91 | 92 | # the mtime of the file 93 | mtime: float 94 | ctime: float 95 | 96 | # file creation and modification time, ISO timestamped 97 | rfc3339_ctime: str 98 | rfc3339_mtime: str 99 | 100 | # created and updated dates, formatted for humans 101 | created_date: str 102 | updated_date: str 103 | 104 | # the contents of the page, in markdown 105 | source: str 106 | 107 | # the contents of the page, rendered to HTML 108 | html: str = "" 109 | 110 | # the same HTML, escaped for use in the atom feed 111 | html_escaped_content: str = "" 112 | 113 | def __eq__(self, b) -> bool: 114 | return b.title == self.title 115 | 116 | def __hash__(self) -> int: 117 | return hash(("title", self.title)) 118 | 119 | def dirlinks(self) -> Generator[str, None, None]: 120 | """yield a link to each dir page, all the way back to the root""" 121 | pathparts = self.titlepath.split("/")[:-1] 122 | for i in range(len(pathparts)): 123 | link = "/" + "/".join(pathparts[: i + 1]) 124 | yield f'{ pathparts[i] }' 125 | 126 | 127 | def info(msg: str, *args) -> None: 128 | yellow = "\033[0;33m" 129 | reset = "\033[0m" 130 | print(f"{yellow}{msg}{reset}", *args) 131 | 132 | 133 | def err(msg: str, *args) -> None: 134 | red = "\033[0;31m" 135 | reset = "\033[0m" 136 | print(f"{red}{msg}{reset}", *args) 137 | 138 | 139 | FRONT_MATTER_RE = re.compile(r"^\s*---(.*?)\n---\n", re.S) 140 | 141 | 142 | def parse_frontmatter(raw_fm: str) -> dict[str, Any]: 143 | """parse yaml frontmatter and return a dictionary. yaml could be any data 144 | type, but in this context we're expecting to get a dict out of this, so 145 | throw an exception if we find anything else""" 146 | anything = yaml.safe_load(raw_fm) 147 | if not anything: 148 | return {} 149 | elif type(anything) != dict: 150 | raise Exception(f"Expected dict, got {type(anything)}") 151 | return anything 152 | 153 | 154 | def split_front_matter(buf: str) -> tuple[dict[str, Any], str]: 155 | """split the front matter from the rest of the markdown document's content. 156 | Parse the front matter if present.""" 157 | parts = FRONT_MATTER_RE.split(buf, 1) 158 | if len(parts) == 1: 159 | return ({}, parts[0]) 160 | else: 161 | return (parse_frontmatter(parts[1]), parts[2]) 162 | 163 | 164 | WHITESPACE_RE = re.compile(r"[^\w\-\._~]") 165 | MARKDOWN_RE = re.compile(r"\.md$") 166 | 167 | 168 | def outname(fname: str) -> str: 169 | """Turn a markdown filename into an html file name""" 170 | clean = WHITESPACE_RE.sub("_", fname) 171 | return MARKDOWN_RE.sub(".html", clean) 172 | 173 | 174 | SANITIZE_PATH = re.compile(r"[^\w\-\._~\\\/]") 175 | 176 | 177 | def pathname(dname: str) -> str: 178 | """Sanitize a name""" 179 | return SANITIZE_PATH.sub("_", dname) 180 | 181 | 182 | def mkdir(dir_: str | Path) -> str | Path: 183 | """Recursively make dir_ if it does not exist""" 184 | if not os.path.isdir(dir_): 185 | os.makedirs(dir_, exist_ok=True) 186 | return dir_ 187 | 188 | 189 | def formatted_time(t: float) -> str: 190 | return strftime("%b %d, %Y", localtime(t)) 191 | 192 | 193 | def rfc3339_time(t: float) -> str: 194 | return timestamp_to_rfc3339_utcoffset(t) 195 | 196 | 197 | def render(template: str, **kwargs) -> str: 198 | return JINJA.get_template(template).render(**kwargs) 199 | 200 | 201 | def generate_stylesheet() -> None: 202 | """Use pygments to generate a stylesheet""" 203 | with open(f"output/pygments.css", "w") as f: 204 | f.write(HtmlFormatter(style=LlimllibStyle).get_style_defs()) 205 | 206 | 207 | def copy_static(from_dir: Path, to_dir: Path) -> None: 208 | """Copy stylesheet from the templates dir to output""" 209 | for ext in ["*.css", "*.svg"]: 210 | for f in from_dir.glob(ext): 211 | shutil.copy(f, to_dir) 212 | 213 | 214 | def strip_fancy_name(link: str) -> str: 215 | """Return the name of a link 216 | 217 | Strip a pipe, which is used in a link like [[Some long title|link text]] 218 | 219 | or a hash, which is used like [[Somepage#anchor]] 220 | """ 221 | if "|" in link: 222 | return link.split("|")[0] 223 | if "#" in link: 224 | return link.split("#")[0] 225 | return link 226 | 227 | 228 | LINK_RE = re.compile(r"\[\[(.*?)\]\]") 229 | 230 | 231 | def findlinks(md: str) -> list[str]: 232 | """Find all links in a markdown document""" 233 | # XXX: right now this grabs some "links" from code blocks; i.e. pandas lets 234 | # you do stuff like df[["columnA", "columnB"]]. Fix the regex so it 235 | # doesn't match that 236 | return list(map(strip_fancy_name, LINK_RE.findall(md))) 237 | 238 | 239 | def canonicalize(title: str) -> str: 240 | """return the canonical form of a title""" 241 | return title.lower() 242 | 243 | 244 | def canonical_path(relative_path: str) -> str: 245 | """Given a relative path, return the canonical form 246 | 247 | For example, if you pass "Data Analytics/Duckdb", this returns 248 | "Data_Analytics/duckdb". The value returned by this will match the 249 | "titlepath" attribute of a page 250 | """ 251 | path, page = os.path.split(relative_path) 252 | if path: 253 | return pathname(path) + "/" + canonicalize(page) 254 | return page.lower() 255 | 256 | 257 | def find( 258 | pages: dict[str, Page], attachments: dict[str, Attachment], link: str 259 | ) -> Optional[Page | Attachment]: 260 | """find a page referred to by `link` 261 | 262 | Pages can be linked in two ways: 263 | - By their title 264 | - Titles may not be unique and this function will just return the 265 | first result in our random search if there are multiple pages 266 | with the same title 267 | - By their path+title 268 | """ 269 | clink = canonical_path(link) 270 | for relpath, page in pages.items(): 271 | if page.canon_title == clink or relpath == clink: 272 | return page 273 | for link_path, attach in attachments.items(): 274 | if attach.file == link or link_path == link: 275 | return attach 276 | 277 | 278 | def split_files(files: list[str]) -> tuple[list[str], list[str]]: 279 | """Split a file list into markdown and non-markdown files""" 280 | return ( 281 | [f for f in files if f.endswith(".md")], 282 | [f for f in files if not f.endswith(".md")], 283 | ) 284 | 285 | 286 | class FileTree: 287 | def __init__( 288 | self, dir: Optional[str] = None, page: Optional[Page | Attachment] = None 289 | ): 290 | self.dir = dir 291 | if isinstance(self.dir, str): 292 | self.basename = os.path.basename(self.dir) 293 | self.reldir = pathname(self.dir) 294 | self.dirparts = self.dir.split("/") 295 | self.reldirparts = self.reldir.split("/") 296 | self.page = page 297 | self.children: list[FileTree] = [] 298 | 299 | def child_pages(self, idx=None) -> dict[str, Page]: 300 | if not isinstance(idx, dict): 301 | idx = {} 302 | for c in self.children: 303 | if isinstance(c.page, Page): 304 | idx[c.page.titlepath] = c.page 305 | elif c.dir: 306 | c.child_pages(idx) 307 | return idx 308 | 309 | def find_dir(self, dir: str) -> Optional["FileTree"]: 310 | if self.reldirparts[-1] == dir: 311 | return self 312 | for subdir in (c for c in self.children if c.dir): 313 | d = subdir.find_dir(dir) 314 | if d: 315 | return d 316 | 317 | def dir_backlinks(self) -> set[Page | Attachment]: 318 | """return backlinks for all direct children of this node""" 319 | return set( 320 | backlink 321 | for child in self.children 322 | if child.page 323 | for backlink in child.page.backlinks 324 | ) 325 | 326 | def has_child_dirs(self) -> bool: 327 | """return true if there are child dirs""" 328 | return any(child.dir for child in self.children) 329 | 330 | def dirlinks(self) -> Generator[str, None, None]: 331 | """yield a link to each dir page, all the way back to the root""" 332 | assert self.dir 333 | 334 | for i in range(len(self.dirparts)): 335 | link = "/" + "/".join(self.reldirparts[: i + 1]) 336 | yield f'{ self.dirparts[i] }' 337 | 338 | def dirlink(self) -> str: 339 | """return a link to this directory""" 340 | assert self.dir 341 | href = "/" + "/".join(self.reldirparts) + ".html" 342 | return f'🔗' 343 | 344 | def __str__(self) -> str: 345 | if self.dir: 346 | return os.path.basename(self.dir) 347 | elif self.page: 348 | return self.page.title 349 | raise AssertionError("either page or dir must be true") 350 | 351 | def __repr__(self) -> str: 352 | return self.__str__() 353 | 354 | 355 | def gitstat(dir: str, path: str) -> GitStat: 356 | """return the created and modified times for a file from git""" 357 | # Here's an example of what the output looks like for this: 358 | # 359 | # $ git -C . log --pretty="format:%cI %aI" README.md 360 | # 2023-10-30T08:18:52-04:00 2023-10-30T08:18:52-04:00 361 | # 2022-04-28T21:54:00-04:00 2022-04-28T21:54:00-04:00 362 | # 2022-04-09T22:09:23-04:00 2022-04-09T22:09:23-04:00 363 | # 2022-04-08T21:36:48-04:00 2022-04-08T21:36:48-04:00 364 | # 365 | # we need to use the -C argument to tell git to look in the notes 366 | # repository instead of this repository 367 | # 368 | # possibly I should add --follow so that this persists through renames? 369 | # though maybe I ought to consider a rename a recreation of a file? not 370 | # clear to me whether it's worth it or not. 371 | # 372 | # this is really slow, and I don't see a path to speeding it up a 373 | # tremendous amount 374 | times = ( 375 | subprocess.check_output( 376 | ["git", "-C", dir, "log", "--pretty=format:%aI %cI", "--", path] 377 | ) 378 | .decode("utf8") 379 | .split("\n") 380 | ) 381 | 382 | # The modified time is the second timestamp on the first line 383 | mtime = rfc3339_to_timestamp(times[0].split(" ")[1]) 384 | 385 | # ctime is not useful, I should remove this from here 386 | ctime = rfc3339_to_timestamp(times[-1].split(" ")[0]) 387 | 388 | return GitStat(st_mtime=mtime, st_ctime=ctime) 389 | 390 | 391 | def parse_fm_datetime(d: str | datetime) -> float: 392 | """ 393 | parse a datetime from frontmatter, which may be a string or datetime, into 394 | a timestamp 395 | 396 | I don't quite understand why the yaml lib seems to be converting it 397 | sometimes, but not others 398 | """ 399 | if isinstance(d, str): 400 | return rfc3339_to_timestamp(d) 401 | return d.timestamp() 402 | 403 | 404 | def handle_file(path: str, root: str, use_git_times: bool) -> Page | Attachment: 405 | """given a full path and the root of the tree, return a page dict 406 | 407 | path: full path to a file 408 | root: the root of the tree we're building 409 | """ 410 | _, extension = os.path.splitext(path) 411 | if extension == ".md": 412 | with open(path) as f: 413 | buf = f.read() 414 | frontmatter, source = split_front_matter(buf) 415 | links = findlinks(source) 416 | dir, filename = os.path.split(path) 417 | relpath = pathname(dir.removeprefix(root).lstrip("/")) 418 | title, _ = os.path.splitext(filename) 419 | titlepath = os.path.join(relpath, canonicalize(title)) 420 | 421 | # get created and modified times for the file. Preference order: 422 | # - updated time in frontmatter 423 | # - file info from git (if gistat is enabled) 424 | # - file modified time from the file system 425 | if "updated" in frontmatter and "created" in frontmatter: 426 | t = FileStat( 427 | parse_fm_datetime(frontmatter["updated"]), 428 | parse_fm_datetime(frontmatter["created"]), 429 | ) 430 | # if use_git_times is true, assume that the file is stored in git, 431 | # and get ctime and mtime from git. 432 | elif use_git_times: 433 | try: 434 | t = gitstat(dir, path) 435 | # if the file is not in git, the function currently throws an 436 | # IndexError. Take the mtime in that case instead, rather 437 | # than failing 438 | except IndexError: 439 | t = os.stat(path) 440 | else: 441 | t = os.stat(path) 442 | 443 | return Page( 444 | # `title` contains the title, cased as the author cased it 445 | title=title, 446 | # `canon_title` contains the canonicalized title 447 | canon_title=canonicalize(title), 448 | links=links, 449 | fullpath=path, 450 | link_path=os.path.join(relpath, outname(filename)), 451 | file=filename, 452 | relpath=relpath, 453 | titlepath=titlepath, 454 | source=source, 455 | backlinks=set(), 456 | frontmatter=frontmatter, 457 | ctime=t.st_ctime, 458 | mtime=t.st_mtime, 459 | # would be better to put file creation time in front matter 460 | # at file create time and pull it from there, but this will 461 | # do for now 462 | rfc3339_ctime=rfc3339_time(t.st_ctime), 463 | rfc3339_mtime=rfc3339_time(t.st_mtime), 464 | created_date=formatted_time(t.st_ctime), 465 | updated_date=formatted_time(t.st_mtime), 466 | ) 467 | 468 | # if it's not a markdown file, parse it as an attachment 469 | dir, filename = os.path.split(path) 470 | title, _ = os.path.splitext(filename) 471 | relpath = pathname(dir.removeprefix(root).lstrip("/")) 472 | return Attachment( 473 | title=title, 474 | canon_title=canonicalize(title), 475 | fullpath=path, 476 | link_path=os.path.join(relpath, filename), 477 | file=filename, 478 | relpath=relpath, 479 | links=[], 480 | backlinks=set(), 481 | ) 482 | 483 | 484 | def build_file_tree( 485 | dir: str, ignore: set[str], use_git_times: bool 486 | ) -> tuple[FileTree, dict[str, Page], dict[str, Attachment]]: 487 | """build a file tree from a given directory 488 | 489 | dir: the directory to build the file tree from 490 | ignore: a set of directory names to ignore 491 | 492 | returns a FileTree, an index of pages, and an index of attachments. 493 | 494 | The page index is keyed on titlepath, which is the relative path plus the 495 | canonicalized title, so something like 'visualization/bar_charts'. The 496 | index contains only content pages. 497 | 498 | The attachments dictionary is keyed on relative path + filename, so 499 | something like 'images/some image 2928984588.png' 500 | """ 501 | index = {} 502 | attachments = {} 503 | return ( 504 | build_file_tree_helper( 505 | FileTree(dir=dir), ignore, dir, index, attachments, use_git_times 506 | ), 507 | index, 508 | attachments, 509 | ) 510 | 511 | 512 | def isEmptyFile(path: str) -> bool: 513 | """return true if a file is empty 514 | 515 | more precisely, if it doesn't have any non-whitespace characters in the 516 | first 16 bytes 517 | 518 | must open the file in binary mode because if it's a binary file it may not 519 | be decodable to unicode 520 | """ 521 | return not open(path, "rb").read(16).strip() 522 | 523 | 524 | def build_file_tree_helper( 525 | node: FileTree, 526 | ignore: set[str], 527 | root_path: str, 528 | index: dict[str, Page], 529 | attachments: dict[str, Attachment], 530 | use_git_times: bool, 531 | ) -> FileTree: 532 | assert node.dir 533 | for de in sorted( 534 | os.scandir(os.path.join(root_path, node.dir)), 535 | key=lambda x: x.path.lower(), 536 | ): 537 | if de.name in ignore: 538 | info(f"Ignoring file", de) 539 | continue 540 | 541 | # ignore untitled files or directories 542 | if de.name == "Untitled.md" or de.name == "Untitled": 543 | info(f"Ignoring untitled object", de) 544 | continue 545 | 546 | if de.is_dir(): 547 | path = de.path.removeprefix(root_path).lstrip("/") 548 | node.children.append( 549 | build_file_tree_helper( 550 | FileTree(dir=path), 551 | ignore, 552 | root_path, 553 | index, 554 | attachments, 555 | use_git_times, 556 | ) 557 | ) 558 | else: 559 | if isEmptyFile(de.path): 560 | info(f"Ignoring empty file", de) 561 | continue 562 | page = handle_file(de.path, root_path, use_git_times) 563 | 564 | # we want to index each page by its titlepath, which is something 565 | # like 'visualization/bar_charts'. If the page does not have a 566 | # titlepath attribute, assume that it's not a content page 567 | if isinstance(page, Page): 568 | # if a page has frontmatter, and that frontmatter contains a 569 | # "draft" key that is non-empty, consider it a draft and don't 570 | # render it 571 | if type(page) == Page and page.frontmatter.get("draft"): 572 | continue 573 | 574 | index[page.titlepath] = page 575 | else: 576 | attachments[page.link_path] = page 577 | 578 | node.children.append(FileTree(page=page)) 579 | 580 | return node 581 | 582 | 583 | def calculate_backlinks( 584 | pages: dict[str, Page], attachments: dict[str, Attachment] 585 | ) -> None: 586 | for page in pages.values(): 587 | for link in page.links: 588 | linked_page = find(pages, attachments, link) 589 | if not linked_page: 590 | info(f"unable to find link", link, page.title) 591 | continue 592 | linked_page.backlinks.add(page) 593 | 594 | 595 | def generate_lastweek_page(pages: dict[str, Page], outdir: Path) -> None: 596 | today = datetime.today() 597 | pages_by_weeks_ago: DefaultDict[int, list[Page]] = defaultdict(list) 598 | for p in reversed(sorted(pages.values(), key=lambda x: x.mtime)): 599 | daysago = (today - datetime.fromtimestamp(p.mtime)).days 600 | pages_by_weeks_ago[(daysago - 1) // 7].append(p) 601 | if daysago > 21: 602 | break 603 | 604 | open(outdir / "lastweek.html", "w").write( 605 | render("lastweek.html", pages_by_weeks_ago=pages_by_weeks_ago) 606 | ) 607 | 608 | 609 | def generate_search(pages: dict[str, Page], outdir: Path) -> None: 610 | index = [ 611 | { 612 | "id": i, 613 | "title": page.title, 614 | "contents": BeautifulSoup(page.html, features="html.parser").get_text(), 615 | "title_path": page.titlepath, 616 | "link_path": page.link_path, 617 | } 618 | for (i, page) in enumerate(pages.values()) 619 | ] 620 | 621 | open(outdir / "search.html", "w").write(render("search.html", index=index)) 622 | 623 | 624 | def generate_index_page( 625 | tree: FileTree, pages: dict[str, Page], outdir: Path, recent: int 626 | ) -> None: 627 | # get the most recently created files 628 | by_ctime = list(reversed(sorted(pages.values(), key=lambda x: x.ctime)))[:recent] 629 | recently_created = [p.link_path for p in by_ctime] 630 | by_mtime = list( 631 | reversed( 632 | sorted( 633 | ( 634 | p 635 | for p in pages.values() 636 | if p.link_path not in recently_created and p.mtime != p.ctime 637 | ), 638 | key=lambda x: x.mtime, 639 | ) 640 | ) 641 | )[:recent] 642 | open(outdir / "index.html", "w").write( 643 | render( 644 | "index.html", 645 | recently_created=by_ctime, 646 | recently_updated=by_mtime, 647 | tree=tree, 648 | ) 649 | ) 650 | 651 | 652 | def generate_feed(pages: dict[str, Page], outfile: Path, recent: int) -> None: 653 | """generate a single feed file from the atom.xml template""" 654 | by_mtime = list(reversed(sorted(pages.values(), key=lambda x: x.mtime))) 655 | 656 | posts = by_mtime[:recent] 657 | for p in posts: 658 | if not p.html_escaped_content: 659 | p.html_escaped_content = escape(render_content(p)) 660 | 661 | open(outfile, "w").write( 662 | render("atom.xml", posts=by_mtime[:recent], timestamp=rfc3339_time(time())) 663 | ) 664 | 665 | 666 | def generate_feeds( 667 | tree: FileTree, pages: dict[str, Page], feeds: list[str], outdir: Path, recent: int 668 | ) -> None: 669 | """ 670 | generate an atom feed for the root tree, then go through each sub-feed in 671 | the feeds array, find the directory with that name, and generate a feed for 672 | it 673 | """ 674 | generate_feed(pages, outdir / "atom.xml", recent) 675 | 676 | for feed in feeds: 677 | subtree = tree.find_dir(feed) 678 | if not subtree: 679 | raise ValueError(f"unable to find {feed}") 680 | generate_feed(subtree.child_pages(), outdir / f"{feed}.atom.xml", recent) 681 | 682 | 683 | def generate_dir_pages(root: FileTree, pages: dict[str, Page], outdir: Path) -> None: 684 | for child in root.children: 685 | if child.dir: 686 | open(outdir / f"{child.reldir}.html", "w").write( 687 | render("dir.html", tree=child) 688 | ) 689 | generate_dir_pages(child, pages, outdir) 690 | 691 | 692 | def highlight(code, name, _) -> str: 693 | """Highlight a block of code""" 694 | if not name: 695 | return f'
{escape(code)}
' 696 | 697 | # admonishment adapted from mkdocs-material 698 | # https://squidfunk.github.io/mkdocs-material/reference/admonitions/ 699 | if name == "warning": 700 | return f'

{escape(code)}

' 701 | 702 | try: 703 | lexer = get_lexer_by_name(name) 704 | except: 705 | print(f"failed to get lexer for {name}") 706 | return f'
{escape(code)}
' 707 | formatter = HtmlFormatter() 708 | 709 | return pygmentize(code, lexer, formatter) 710 | 711 | 712 | MD = ( 713 | MarkdownIt("gfm-like", {"breaks": True, "html": True, "highlight": highlight}) 714 | .use(anchors_plugin, max_level=6) 715 | .use(front_matter_plugin) 716 | .use(footnote_plugin) 717 | ) 718 | 719 | 720 | def render_link(self, tokens, idx, options, env): 721 | # don't count anchor links as external 722 | if not tokens[idx].attrs["href"].startswith("#"): 723 | tokens[idx].attrSet("class", "external-link") 724 | return self.renderToken(tokens, idx, options, env) 725 | 726 | 727 | MD.add_render_rule("link_open", render_link) 728 | 729 | 730 | def render_content(page: Page) -> str: 731 | """ 732 | Given a "page" object, render the markdown within to escaped HTML 733 | """ 734 | return MD.render(page.source) 735 | 736 | 737 | def generate_html_pages(pages: dict[str, Page], outdir: Path) -> None: 738 | for page in pages.values(): 739 | output_path = outdir / page.link_path 740 | 741 | # Optimization: If the file has already been converted to HTML and the 742 | # HTML is newer than the source, don't regenerate the file. Do convert 743 | # the markdown to HTML because we'll need that 744 | if os.path.isfile(output_path) and page.mtime < os.stat(output_path).st_mtime: 745 | page.html = render_content(page) 746 | continue 747 | 748 | page.html = render_content(page) 749 | page.html_escaped_content = escape(page.html) 750 | 751 | mkdir(str(outdir / page.relpath)) 752 | with open(output_path, "w") as fout: 753 | text = render("page.html", page=page) 754 | fout.write(text) 755 | 756 | 757 | def copy_attachments(attachments: dict[str, Attachment], outdir: Path) -> None: 758 | for page in attachments.values(): 759 | mkdir(outdir / page.relpath) 760 | shutil.copy(page.fullpath, outdir / page.link_path) 761 | 762 | 763 | def attachment_replacer(pages: dict[str, Page], attachments: dict[str, Attachment]): 764 | def _attachment_replacer(m: re.Match) -> str: 765 | filename = m.group(1) 766 | linked_attch = find(pages, attachments, filename) 767 | if not linked_attch: 768 | err(f"Unable to find attachment", filename) 769 | return "" 770 | path = linked_attch.link_path 771 | # assume it's an image unless it ends with pdf, mov, mp4 or webm 772 | if filename.endswith(".pdf"): 773 | return f'' 774 | elif filename.endswith(".mov"): 775 | return f'' 776 | elif filename.endswith(".mp4"): 777 | return f'' 778 | elif filename.endswith(".webm"): 779 | return f'' 780 | return f'' 781 | 782 | return _attachment_replacer 783 | 784 | 785 | IMAGE_LINK_RE = re.compile(r"!\[\[(.*?)\]\]") 786 | 787 | 788 | def substitute_images( 789 | pages: dict[str, Page], attachments: dict[str, Attachment] 790 | ) -> None: 791 | replacer = attachment_replacer(pages, attachments) 792 | for page in pages.values(): 793 | page.source = IMAGE_LINK_RE.sub(replacer, page.source) 794 | 795 | 796 | def sanitize(s: str) -> str: 797 | """Replace non-alphanum-characters with dash and lowercase 798 | 799 | I'm sure this doesn't quite match the ids we're giving to section headers, 800 | but it seems to be close enough for now. Ultimately, I should look into the 801 | generation function and figure out exactly how it's turning: 802 | 803 | ## Day 2 804 | 805 | into 806 | 807 |

Day 2

808 | 809 | so that I can match it precisely. 810 | """ 811 | return re.sub(r"[^\w]", "-", s.rstrip()).lower() 812 | 813 | 814 | # maybe move to https://github.com/jsepia/markdown-it-wikilinks eventually? 815 | def crosslink_replacer(pages: dict[str, Page]) -> Callable[[re.Match], str]: 816 | def _crosslink_replacer(m: re.Match) -> str: 817 | rawlink = m.group(1) 818 | title = rawlink 819 | nicetitle = None 820 | anchor = None 821 | 822 | # [[page|nice title]] -> title: page, nicetitle: nice title 823 | if "|" in rawlink: 824 | title, nicetitle = rawlink.split("|") 825 | 826 | # [[page#anchor]] -> title: page, anchor: anchor 827 | if "#" in title: 828 | title, anchor = title.split("#") 829 | 830 | linked_page = find(pages, {}, title) 831 | 832 | # if we don't find the linked page, assume that the group is not in 833 | # fact a link. There are several places in my notes where we use the 834 | # string `[[` but it's not a link; leave them be 835 | if not linked_page: 836 | err(f"Unable to find page", title) 837 | return m.group(0) 838 | 839 | linktitle = nicetitle if nicetitle else title 840 | anchor = f"#{sanitize(anchor)}" if anchor else "" 841 | 842 | return f'{linktitle}' 843 | 844 | return _crosslink_replacer 845 | 846 | 847 | # match: 848 | # - two open square brackets [[ 849 | # - capture anything up to the closing square bracket pair ]] 850 | CROSSLINK_RE = re.compile(r"\[\[(.*?)\]\]") 851 | 852 | 853 | def substitute_crosslinks(pages: dict[str, Page]) -> None: 854 | replacer = crosslink_replacer(pages) 855 | for page in pages.values(): 856 | page.source = CROSSLINK_RE.sub(replacer, page.source) 857 | 858 | 859 | def parse( 860 | mddir: str, 861 | recent: int, 862 | use_git_times: bool, 863 | ignore: Optional[set[str]] = None, 864 | feeds: list[str] = [], 865 | ) -> None: 866 | """parse a directory of markdown files, ignoring a list of folder names 867 | 868 | mddir: the name of the directory to parse files in 869 | recent: how many posts to show in the "recently updated" section 870 | ignore: an optional list of directory names to ignore. Will be ignored at 871 | any level in the tree. 872 | """ 873 | # make the type checker happy 874 | ignore = ignore if ignore else set() 875 | dir = os.path.normpath(os.path.expanduser(mddir)) 876 | 877 | tree, pages, attachments = build_file_tree(dir, ignore, use_git_times) 878 | calculate_backlinks(pages, attachments) 879 | 880 | outdir = Path(mkdir("./output")) 881 | 882 | generate_stylesheet() 883 | copy_static(Path("./templates"), outdir) 884 | copy_attachments(attachments, outdir) 885 | 886 | substitute_images(pages, attachments) 887 | substitute_crosslinks(pages) 888 | 889 | # should come before generate_index_page because it generates the HTML that 890 | # is necessary for the atom file output 891 | generate_html_pages(pages, outdir) 892 | generate_search(pages, outdir) 893 | generate_index_page(tree, pages, outdir, recent) 894 | generate_feeds(tree, pages, feeds, outdir, recent) 895 | generate_dir_pages(tree, pages, outdir) 896 | generate_lastweek_page(pages, outdir) 897 | 898 | 899 | if __name__ == "__main__": 900 | parser = argparse.ArgumentParser() 901 | parser.add_argument( 902 | "--recent", 903 | help="number of recent entries to show", 904 | type=int, 905 | default=15, 906 | ) 907 | parser.add_argument( 908 | "--path", 909 | help="path to folder of .md files to publish. This must be an absolute path", 910 | type=str, 911 | default="~/Library/Mobile Documents/iCloud~md~obsidian/Documents/personal", 912 | ) 913 | parser.add_argument( 914 | "--use-git-times", 915 | action="store_true", 916 | help="use git modified time instead of mtime for file timestamps", 917 | ) 918 | parser.add_argument( 919 | "--feed", 920 | action="append", 921 | default=[], 922 | help="a directory to generate a separate feed for. Can be given multiple times", 923 | ) 924 | args = parser.parse_args(sys.argv[1:]) 925 | 926 | default_ignores = { 927 | ".DS_Store", 928 | "private", 929 | ".obsidian", 930 | ".github", 931 | ".git", 932 | ".gitignore", 933 | } 934 | parse( 935 | args.path, 936 | args.recent, 937 | args.use_git_times, 938 | ignore=default_ignores, 939 | feeds=args.feed, 940 | ) 941 | -------------------------------------------------------------------------------- /templates/atom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | llimllib's notes 4 | http://notes.billmill.org/ 5 | 6 | {{ timestamp }} 7 | 8 | Obsidian Notes 9 | 10 | Bill Mill 11 | 12 | {%- for post in posts %} 13 | 14 | https://notes.billmill.org/{{ post.link_path }} 15 | 16 | {{ post.title }} 17 | {{ post.rfc3339_ctime }} 18 | {{ post.rfc3339_mtime }} 19 | {{ post.html_escaped_content }} 20 | 21 | {%- endfor %} 22 | 23 | -------------------------------------------------------------------------------- /templates/dir.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 17 | 19 | 20 | 21 | 22 | llimllib notes 23 | 24 | 25 | 26 |
27 | 33 | {%- if tree.has_child_dirs() %} 34 | {# for now just expand all child pages, eventually might want to do 35 | something smarter? #} 36 | collapse all 37 | 53 | {%- else %} 54 | {# if there aren't any child dirs, we want to show a ul with regular 55 | bullets instead of the collapsible tree we normally show #} 56 | 65 |
66 | {%- endif %} 67 | {%- if tree.dir_backlinks()%} 68 | 78 | {%- endif %} 79 |
80 | {% include 'footer.html' %} 81 | 82 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /templates/feed.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /templates/footer.html: -------------------------------------------------------------------------------- 1 | 14 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 12 | 22 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 32 | llimllib notes 33 | 34 | 35 | 36 |
37 | 40 | feeds: 41 | everything ⁜ 42 | link blog ⁜ 43 | music blog ⁜ 44 | blog blog 45 |

Recently Created

46 | 56 |

Recently Updated

57 | 67 |
68 | 🔍 search notes 69 |
70 |

All Notes

71 | expand all 72 | 88 |
89 | {% include 'footer.html' %} 90 | 91 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /templates/lastweek.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | llimllib notes 13 | 14 | 15 | 16 |
17 |

Last Week's Links

18 | 28 |

Two weeks ago

29 | 39 |

Three weeks ago

40 | 50 |
51 | {% include 'footer.html' %} 52 | 53 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /templates/out.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | external link 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /templates/page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 22 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 32 | {{ page.title }} - llimllib notes 33 | 34 | 35 | 36 |
37 | 43 |

{{ page.title }}

44 | 48 | 49 |
last updated: {{ page.updated_date }}
50 | {{ page.html }} 51 | {% if page.backlinks %} 52 | 62 | 63 | {% endif %} 64 | 65 |
66 | {% include 'footer.html' %} 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /templates/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 20 | 22 | 100 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 139 | llimllib notes search 140 | 141 | 142 | 143 |
144 |
145 | 146 | 147 |
148 |
149 |
    150 |
151 |
152 |
153 | {% include 'footer.html' %} 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /templates/style.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --body-width: 800px; 3 | } 4 | 5 | html { 6 | /* fixes font size on iOS */ 7 | text-size-adjust: none; 8 | -webkit-text-size-adjust: none; 9 | } 10 | 11 | body { 12 | font-family: Georgia, "Book Antiqua", serif; 13 | font-size: 1.2rem; 14 | background-color: #fffff8; 15 | margin: 20px; 16 | padding: 0; 17 | } 18 | 19 | p > code, 20 | li > code { 21 | background-color: #f6fcff; 22 | padding: 2px; 23 | font-size: 1rem; 24 | } 25 | 26 | main { 27 | color: #111; 28 | max-width: var(--body-width); 29 | } 30 | 31 | @media screen and (max-width: 600px) { 32 | body { 33 | font-size: 1rem; 34 | } 35 | 36 | .recent { 37 | display: none; 38 | } 39 | 40 | p > code, 41 | li > code { 42 | background-color: #f6fcff; 43 | padding: 1px; 44 | font-size: 1rem; 45 | } 46 | 47 | main { 48 | line-height: 1.5rem; 49 | margin: 1.5em auto; 50 | } 51 | } 52 | 53 | @media screen and (min-width: 600px) { 54 | main { 55 | line-height: 2rem; 56 | margin: 2em auto; 57 | } 58 | } 59 | 60 | footer { 61 | font-size: 1rem; 62 | margin-top: 20px; 63 | margin-bottom: 100px; 64 | text-align: center; 65 | } 66 | 67 | blockquote { 68 | border-left: 1px solid hsl(298 100% 70%); 69 | padding-left: 12px; 70 | } 71 | 72 | div.highlight { 73 | font-size: 1rem; 74 | padding-left: 1rem; 75 | padding-top: 0.05rem; 76 | padding-bottom: 0.05rem; 77 | background-color: #f9f9ef; 78 | /* This is a hack to get the code boxes wider than the parents, but still 79 | in the document flow... I would like to figure out how to get the width 80 | to be auto-figured-out, but for now I'm just going to set it wide 81 | because it works-ish */ 82 | position: relative; 83 | overflow: auto; 84 | } 85 | 86 | a { 87 | color: hsl(298 100% 40%); 88 | } 89 | 90 | a:visited { 91 | color: hsl(298 100% 70%); 92 | } 93 | 94 | h1 { 95 | font-size: 2rem; 96 | line-height: 3rem; 97 | } 98 | 99 | h2 { 100 | font-size: 1.8rem; 101 | } 102 | 103 | table { 104 | width: 800px; 105 | border-collapse: collapse; 106 | } 107 | 108 | thead th { 109 | border-bottom: solid 1px #aaa; 110 | } 111 | 112 | tbody tr td { 113 | border-bottom: solid 1px #aaa; 114 | } 115 | 116 | td { 117 | padding: 8px; 118 | } 119 | 120 | .recent { 121 | display: inline-block; 122 | width: 130px; 123 | } 124 | 125 | /* https://iamkate.com/code/tree-views/ */ 126 | .tree { 127 | --spacing: 1.5rem; 128 | --radius: 10px; 129 | /* shove the tree closer to the margin - not sure how to do this better */ 130 | margin-left: -40px; 131 | } 132 | 133 | .tree li { 134 | display: block; 135 | position: relative; 136 | padding-left: calc(2 * var(--spacing) - var(--radius) - 2px); 137 | } 138 | 139 | .tree ul { 140 | margin-left: calc(var(--radius) - var(--spacing)); 141 | padding-left: 0; 142 | } 143 | 144 | .tree ul li { 145 | border-left: 2px solid #ddd; 146 | } 147 | 148 | .tree ul li:last-child { 149 | border-color: transparent; 150 | } 151 | 152 | .tree ul li::before { 153 | content: ""; 154 | display: block; 155 | position: absolute; 156 | top: calc(var(--spacing) / -2); 157 | left: -2px; 158 | width: calc(var(--spacing) + 2px); 159 | height: calc(var(--spacing) + 1px); 160 | border: solid #ddd; 161 | border-width: 0 0 2px 2px; 162 | } 163 | 164 | .tree summary { 165 | /* to remove the disclosure arrow, uncomment this and the following rule */ 166 | /* display: block; */ 167 | cursor: pointer; 168 | } 169 | 170 | .time { 171 | font-size: 0.9em; 172 | } 173 | 174 | .time { 175 | font-size: 0.9em; 176 | } 177 | 178 | /* I think I prefer keeping these? 179 | .tree summary::marker, 180 | .tree summary::-webkit-details-marker{ 181 | display : none; 182 | } */ 183 | 184 | .tree summary:focus { 185 | outline: none; 186 | } 187 | 188 | .tree summary:focus-visible { 189 | outline: 1px dotted #000; 190 | } 191 | 192 | #collapseall { 193 | font-size: 1rem; 194 | } 195 | 196 | video { 197 | width: 100%; 198 | } 199 | 200 | .dirlink { 201 | text-decoration: none; 202 | font-size: 14px; 203 | } 204 | 205 | a.external-link { 206 | background-image: url(/out.svg); 207 | background-position: center right; 208 | background-repeat: no-repeat; 209 | background-size: 0.7em; 210 | padding-right: 0.8em; 211 | } 212 | 213 | /* adapted from mkdocs-material 214 | * https://squidfunk.github.io/mkdocs-material/reference/admonitions/ 215 | * */ 216 | .admon-warning { 217 | position: relative; 218 | border: 0.075rem solid #ff9100; 219 | border-radius: 0.2rem; 220 | display: flow-root; 221 | margin: 1.5625em 0; 222 | font-size: 1rem; 223 | padding: 0 0.8rem; 224 | page-break-inside: avoid; 225 | } 226 | 227 | p.warning::before { 228 | content: ""; 229 | height: 2rem; 230 | -webkit-mask-image: var(--md-admonition-icon--note); 231 | mask-image: var(--md-admonition-icon--note); 232 | -webkit-mask-position: center; 233 | mask-position: center; 234 | -webkit-mask-repeat: no-repeat; 235 | mask-repeat: no-repeat; 236 | -webkit-mask-size: contain; 237 | mask-size: contain; 238 | position: absolute; 239 | top: -0.35em; 240 | left: 0; 241 | width: 2rem; 242 | background-color: #ff9100; 243 | -webkit-mask-image: var(--md-admonition-icon--warning); 244 | mask-image: var(--md-admonition-icon--warning); 245 | } 246 | 247 | p.warning { 248 | padding-left: 3rem; 249 | position: relative; 250 | } 251 | 252 | .bodyimg { 253 | max-width: min(100%, var(--body-width)); 254 | } 255 | 256 | /* copied from mkdocs-material 257 | * https://squidfunk.github.io/mkdocs-material/reference/admonitions/ 258 | * */ 259 | :root { 260 | --md-admonition-icon--note: url('data:image/svg+xml;charset=utf-8,'); 261 | --md-admonition-icon--abstract: url('data:image/svg+xml;charset=utf-8,'); 262 | --md-admonition-icon--info: url('data:image/svg+xml;charset=utf-8,'); 263 | --md-admonition-icon--tip: url('data:image/svg+xml;charset=utf-8,'); 264 | --md-admonition-icon--success: url('data:image/svg+xml;charset=utf-8,'); 265 | --md-admonition-icon--question: url('data:image/svg+xml;charset=utf-8,'); 266 | --md-admonition-icon--warning: url('data:image/svg+xml;charset=utf-8,'); 267 | --md-admonition-icon--failure: url('data:image/svg+xml;charset=utf-8,'); 268 | --md-admonition-icon--danger: url('data:image/svg+xml;charset=utf-8,'); 269 | --md-admonition-icon--bug: url('data:image/svg+xml;charset=utf-8,'); 270 | --md-admonition-icon--example: url('data:image/svg+xml;charset=utf-8,'); 271 | --md-admonition-icon--quote: url('data:image/svg+xml;charset=utf-8,'); 272 | } 273 | --------------------------------------------------------------------------------