├── .gitignore
├── .prettierignore
├── LICENSE
├── Makefile
├── Pipfile
├── Pipfile.lock
├── README.md
├── custom_pygments_style.py
├── favicon.ico
├── modd.conf
├── pyproject.toml
├── requirements.txt
├── run.py
└── templates
├── atom.xml
├── dir.html
├── feed.svg
├── footer.html
├── index.html
├── lastweek.html
├── out.svg
├── page.html
├── search.html
└── style.css
/.gitignore:
--------------------------------------------------------------------------------
1 | output
2 | .venv
3 | .DS_Store
4 | __pycache__
5 | .mise.toml
6 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | # these templated HTML files can break if prettier tries to format them
2 | templates/*.html
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CDN_BUCKET = obsidian_html
2 | MDPATH ?= "~/Library/Mobile Documents/iCloud~md~obsidian/Documents/personal"
3 |
4 | build: requirements
5 | .venv/bin/python run.py --path ${MDPATH} --use-git-times --feed link_blog --feed music_blog --feed blog
6 | cp favicon.ico output/
7 |
8 | # only for use in dev, for quick iteration
9 | build-quick:
10 | .venv/bin/python run.py --path ${MDPATH} --feed link_blog --feed music_blog --feed blog
11 |
12 | requirements:
13 | if [ ! -d ".venv" ]; then python -mvenv .venv; fi
14 | .venv/bin/pip install -r requirements.txt
15 |
16 | clean:
17 | rm -rf output
18 |
19 | pull:
20 | git pull
21 |
22 | serve:
23 | modd
24 |
25 | sync:
26 | s3cmd sync --no-mime-magic --guess-mime-type --acl-public --no-preserve \
27 | output/ s3://llimllib/${CDN_BUCKET}/
28 |
29 | # flush the digital ocean CDN cache
30 | flush:
31 | doctl compute cdn flush \
32 | $$(doctl compute cdn list --format ID | tail -n1) \
33 | --files ${CDN_BUCKET}/*
34 |
35 | publish: pull build sync flush
36 |
37 | .PHONY: build build-quick clean pull requirements serve sync flush publish
38 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | mdx-linkify = "*"
8 | python-markdown-math = "*"
9 | pygments = "*"
10 | jinja2 = "*"
11 | strict-rfc3339 = "*"
12 | markdown-it-py = "*"
13 | mdit-py-plugins = "*"
14 | linkify-it-py = "*"
15 |
16 | [dev-packages]
17 |
18 | [requires]
19 | python_version = "3.10"
20 |
--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_meta": {
3 | "hash": {
4 | "sha256": "5981f8ede38e1d355626505b39514eb69fcb3ab575f54e1b2c1137fc1a233393"
5 | },
6 | "pipfile-spec": 6,
7 | "requires": {
8 | "python_version": "3.10"
9 | },
10 | "sources": [
11 | {
12 | "name": "pypi",
13 | "url": "https://pypi.org/simple",
14 | "verify_ssl": true
15 | }
16 | ]
17 | },
18 | "default": {
19 | "bleach": {
20 | "hashes": [
21 | "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414",
22 | "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4"
23 | ],
24 | "markers": "python_version >= '3.7'",
25 | "version": "==6.0.0"
26 | },
27 | "jinja2": {
28 | "hashes": [
29 | "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852",
30 | "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"
31 | ],
32 | "index": "pypi",
33 | "version": "==3.1.2"
34 | },
35 | "linkify-it-py": {
36 | "hashes": [
37 | "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2",
38 | "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"
39 | ],
40 | "index": "pypi",
41 | "version": "==2.0.2"
42 | },
43 | "markdown": {
44 | "hashes": [
45 | "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2",
46 | "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"
47 | ],
48 | "markers": "python_version >= '3.7'",
49 | "version": "==3.4.3"
50 | },
51 | "markdown-it-py": {
52 | "hashes": [
53 | "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1",
54 | "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"
55 | ],
56 | "index": "pypi",
57 | "version": "==3.0.0"
58 | },
59 | "markupsafe": {
60 | "hashes": [
61 | "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e",
62 | "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e",
63 | "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431",
64 | "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686",
65 | "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559",
66 | "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc",
67 | "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c",
68 | "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0",
69 | "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4",
70 | "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9",
71 | "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575",
72 | "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba",
73 | "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d",
74 | "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3",
75 | "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00",
76 | "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155",
77 | "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac",
78 | "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52",
79 | "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f",
80 | "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8",
81 | "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b",
82 | "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24",
83 | "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea",
84 | "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198",
85 | "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0",
86 | "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee",
87 | "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be",
88 | "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2",
89 | "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707",
90 | "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6",
91 | "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58",
92 | "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779",
93 | "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636",
94 | "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c",
95 | "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad",
96 | "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee",
97 | "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc",
98 | "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2",
99 | "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48",
100 | "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7",
101 | "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e",
102 | "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b",
103 | "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa",
104 | "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5",
105 | "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e",
106 | "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb",
107 | "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9",
108 | "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57",
109 | "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc",
110 | "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"
111 | ],
112 | "markers": "python_version >= '3.7'",
113 | "version": "==2.1.3"
114 | },
115 | "mdit-py-plugins": {
116 | "hashes": [
117 | "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9",
118 | "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"
119 | ],
120 | "index": "pypi",
121 | "version": "==0.4.0"
122 | },
123 | "mdurl": {
124 | "hashes": [
125 | "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8",
126 | "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"
127 | ],
128 | "markers": "python_version >= '3.7'",
129 | "version": "==0.1.2"
130 | },
131 | "mdx-linkify": {
132 | "hashes": [
133 | "sha256:e09278e43e5076b63398238b069a361913779683183481e9206235667cd89f54"
134 | ],
135 | "index": "pypi",
136 | "version": "==2.1"
137 | },
138 | "pygments": {
139 | "hashes": [
140 | "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c",
141 | "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"
142 | ],
143 | "index": "pypi",
144 | "version": "==2.15.1"
145 | },
146 | "python-markdown-math": {
147 | "hashes": [
148 | "sha256:8564212af679fc18d53f38681f16080fcd3d186073f23825c7ce86fadd3e3635",
149 | "sha256:c685249d84b5b697e9114d7beb352bd8ca2e07fd268fd4057ffca888c14641e5"
150 | ],
151 | "index": "pypi",
152 | "version": "==0.8"
153 | },
154 | "six": {
155 | "hashes": [
156 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
157 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
158 | ],
159 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
160 | "version": "==1.16.0"
161 | },
162 | "strict-rfc3339": {
163 | "hashes": [
164 | "sha256:5cad17bedfc3af57b399db0fed32771f18fc54bbd917e85546088607ac5e1277"
165 | ],
166 | "index": "pypi",
167 | "version": "==0.7"
168 | },
169 | "uc-micro-py": {
170 | "hashes": [
171 | "sha256:30ae2ac9c49f39ac6dce743bd187fcd2b574b16ca095fa74cd9396795c954c54",
172 | "sha256:8c9110c309db9d9e87302e2f4ad2c3152770930d88ab385cd544e7a7e75f3de0"
173 | ],
174 | "markers": "python_version >= '3.7'",
175 | "version": "==1.0.2"
176 | },
177 | "webencodings": {
178 | "hashes": [
179 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
180 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
181 | ],
182 | "version": "==0.5.1"
183 | }
184 | },
185 | "develop": {}
186 | }
187 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # obsidian_notes
2 |
3 | My personal code for turning my [obsidian](https://obsidian.md/) notes into a website. Extremely incomplete and hacked together, but feel free to take anything useful from it.
4 |
5 | You can visit the site at http://notes.billmill.org
6 |
7 | I deploy this with `make publish`, run via a github action in my notes repo.
8 |
--------------------------------------------------------------------------------
/custom_pygments_style.py:
--------------------------------------------------------------------------------
1 | """
2 | pygments.styles.default
3 | ~~~~~~~~~~~~~~~~~~~~~~~
4 |
5 | The default highlighting style.
6 |
7 | :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
8 | :license: BSD, see LICENSE for details.
9 | """
10 |
11 | from pygments.style import Style
12 | from pygments.token import (
13 | Keyword,
14 | Name,
15 | Comment,
16 | String,
17 | Error,
18 | Number,
19 | Operator,
20 | Generic,
21 | Whitespace,
22 | )
23 |
24 |
25 | class LlimllibStyle(Style):
26 | """
27 | llimllib's custom style
28 | """
29 |
30 | name = "default"
31 |
32 | background_color = "#f8f8f8"
33 |
34 | styles = {
35 | Whitespace: "#bbbbbb",
36 | Comment: "italic #3D7B7B",
37 | Comment.Preproc: "noitalic #9C6500",
38 | Keyword: "bold #C500CC",
39 | Keyword.Pseudo: "nobold",
40 | Keyword.Type: "nobold #B00040",
41 | Operator: "#666666",
42 | Operator.Word: "bold #AA22FF",
43 | Name.Builtin: "#007093",
44 | Name.Function: "#4D44BB",
45 | Name.Class: "bold #4D44BB",
46 | Name.Namespace: "bold #4D44BB",
47 | Name.Exception: "bold #CB3F38",
48 | Name.Variable: "#19177C",
49 | Name.Constant: "#880000",
50 | Name.Label: "#767600",
51 | Name.Entity: "bold #717171",
52 | Name.Attribute: "#687822",
53 | Name.Tag: "bold #C500CC",
54 | Name.Decorator: "#AA22FF",
55 | String: "#4D44BB",
56 | String.Doc: "italic",
57 | String.Interpol: "bold #A45A77",
58 | String.Escape: "bold #AA5D1F",
59 | String.Regex: "#A45A77",
60 | # String.Symbol: "#B8860B",
61 | String.Symbol: "#19177C",
62 | String.Other: "#C500CC",
63 | Number: "#666666",
64 | Generic.Heading: "bold #000080",
65 | Generic.Subheading: "bold #800080",
66 | Generic.Deleted: "#A00000",
67 | Generic.Inserted: "#008400",
68 | Generic.Error: "#E40000",
69 | Generic.Emph: "italic",
70 | Generic.Strong: "bold",
71 | Generic.EmphStrong: "bold italic",
72 | Generic.Prompt: "bold #000080",
73 | Generic.Output: "#717171",
74 | Generic.Traceback: "#04D",
75 | Error: "border:#FF0000",
76 | }
77 |
--------------------------------------------------------------------------------
/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/llimllib/obsidian_notes/c93b9b5c46fe4f08d51a0cbeddb588d4f3682969/favicon.ico
--------------------------------------------------------------------------------
/modd.conf:
--------------------------------------------------------------------------------
1 | **/*.py templates/**/*.html templates/**/*.css templates/**/*.xml {
2 | prep: make clean build-quick
3 | daemon +sigterm: devd -m ./output
4 | }
5 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # https://microsoft.github.io/pyright/#/configuration
2 | [tool.pyright]
3 | include = ["."]
4 | exclude = []
5 | ignore = []
6 | # defineConstant = { DEBUG = true }
7 | # stubPath = "src/stubs"
8 | venv = ".venv"
9 |
10 | reportMissingImports = true
11 | reportMissingTypeStubs = false
12 |
13 | pythonVersion = "3.10"
14 | pythonPlatform = "Darwin"
15 |
16 | executionEnvironments = [
17 | { root = "src" }
18 | ]
19 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | Jinja2
3 | linkify-it-py
4 | markdown-it-py
5 | mdit-py-plugins
6 | pygments
7 | python-magic
8 | pyyaml
9 | strict-rfc3339
10 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from custom_pygments_style import LlimllibStyle
3 |
4 | import argparse
5 | from collections import defaultdict
6 | from dataclasses import dataclass
7 | from datetime import datetime
8 | from html import escape
9 | import os
10 | from pathlib import Path
11 | import re
12 | import subprocess
13 | import shutil
14 | import sys
15 | from time import strftime, localtime, time
16 | from typing import Any, Callable, DefaultDict, Generator, Optional
17 |
18 | from bs4 import BeautifulSoup
19 | from jinja2 import Environment, FileSystemLoader
20 | from markdown_it import MarkdownIt
21 | import yaml
22 |
23 | from mdit_py_plugins.anchors import anchors_plugin
24 | from mdit_py_plugins.footnote import footnote_plugin
25 | from mdit_py_plugins.front_matter import front_matter_plugin
26 | from pygments import highlight as pygmentize
27 | from pygments.lexers import get_lexer_by_name
28 | from pygments.formatters import HtmlFormatter
29 |
30 | from strict_rfc3339 import timestamp_to_rfc3339_utcoffset, rfc3339_to_timestamp
31 |
32 | JINJA = Environment(loader=FileSystemLoader("templates"))
33 |
34 |
35 | @dataclass
36 | class GitStat:
37 | st_mtime: float
38 | st_ctime: float
39 |
40 |
41 | @dataclass
42 | class FileStat:
43 | st_mtime: float
44 | st_ctime: float
45 |
46 |
47 | @dataclass
48 | class Attachment:
49 | title: str
50 | canon_title: str
51 | fullpath: str
52 | link_path: str
53 | file: str
54 | relpath: str
55 | links: list[str]
56 | backlinks: set["Page | Attachment"]
57 |
58 | def __eq__(self, b) -> bool:
59 | return b.title == self.title
60 |
61 | def __hash__(self) -> int:
62 | return hash(("title", self.title))
63 |
64 |
65 | @dataclass
66 | class Page:
67 | # the page's title
68 | title: str
69 | canon_title: str
70 |
71 | # the file path
72 | file: str
73 |
74 | # the relative path to the markdown source
75 | relpath: str
76 |
77 | # the absolute path to the markdown source
78 | fullpath: str
79 |
80 | # titlepath is the relative path and the file name of the page stripped of
81 | # its extension, for example it might be 'visualization/bar_charts'
82 | titlepath: str
83 | link_path: str
84 |
85 | # links is a list of (what, exactly?)
86 | links: list[str]
87 |
88 | backlinks: set["Page | Attachment"]
89 |
90 | frontmatter: dict[str, Any]
91 |
92 | # the mtime of the file
93 | mtime: float
94 | ctime: float
95 |
96 | # file creation and modification time, ISO timestamped
97 | rfc3339_ctime: str
98 | rfc3339_mtime: str
99 |
100 | # created and updated dates, formatted for humans
101 | created_date: str
102 | updated_date: str
103 |
104 | # the contents of the page, in markdown
105 | source: str
106 |
107 | # the contents of the page, rendered to HTML
108 | html: str = ""
109 |
110 | # the same HTML, escaped for use in the atom feed
111 | html_escaped_content: str = ""
112 |
113 | def __eq__(self, b) -> bool:
114 | return b.title == self.title
115 |
116 | def __hash__(self) -> int:
117 | return hash(("title", self.title))
118 |
119 | def dirlinks(self) -> Generator[str, None, None]:
120 | """yield a link to each dir page, all the way back to the root"""
121 | pathparts = self.titlepath.split("/")[:-1]
122 | for i in range(len(pathparts)):
123 | link = "/" + "/".join(pathparts[: i + 1])
124 | yield f'{ pathparts[i] }'
125 |
126 |
127 | def info(msg: str, *args) -> None:
128 | yellow = "\033[0;33m"
129 | reset = "\033[0m"
130 | print(f"{yellow}{msg}{reset}", *args)
131 |
132 |
133 | def err(msg: str, *args) -> None:
134 | red = "\033[0;31m"
135 | reset = "\033[0m"
136 | print(f"{red}{msg}{reset}", *args)
137 |
138 |
139 | FRONT_MATTER_RE = re.compile(r"^\s*---(.*?)\n---\n", re.S)
140 |
141 |
142 | def parse_frontmatter(raw_fm: str) -> dict[str, Any]:
143 | """parse yaml frontmatter and return a dictionary. yaml could be any data
144 | type, but in this context we're expecting to get a dict out of this, so
145 | throw an exception if we find anything else"""
146 | anything = yaml.safe_load(raw_fm)
147 | if not anything:
148 | return {}
149 | elif type(anything) != dict:
150 | raise Exception(f"Expected dict, got {type(anything)}")
151 | return anything
152 |
153 |
154 | def split_front_matter(buf: str) -> tuple[dict[str, Any], str]:
155 | """split the front matter from the rest of the markdown document's content.
156 | Parse the front matter if present."""
157 | parts = FRONT_MATTER_RE.split(buf, 1)
158 | if len(parts) == 1:
159 | return ({}, parts[0])
160 | else:
161 | return (parse_frontmatter(parts[1]), parts[2])
162 |
163 |
164 | WHITESPACE_RE = re.compile(r"[^\w\-\._~]")
165 | MARKDOWN_RE = re.compile(r"\.md$")
166 |
167 |
168 | def outname(fname: str) -> str:
169 | """Turn a markdown filename into an html file name"""
170 | clean = WHITESPACE_RE.sub("_", fname)
171 | return MARKDOWN_RE.sub(".html", clean)
172 |
173 |
174 | SANITIZE_PATH = re.compile(r"[^\w\-\._~\\\/]")
175 |
176 |
177 | def pathname(dname: str) -> str:
178 | """Sanitize a name"""
179 | return SANITIZE_PATH.sub("_", dname)
180 |
181 |
182 | def mkdir(dir_: str | Path) -> str | Path:
183 | """Recursively make dir_ if it does not exist"""
184 | if not os.path.isdir(dir_):
185 | os.makedirs(dir_, exist_ok=True)
186 | return dir_
187 |
188 |
189 | def formatted_time(t: float) -> str:
190 | return strftime("%b %d, %Y", localtime(t))
191 |
192 |
193 | def rfc3339_time(t: float) -> str:
194 | return timestamp_to_rfc3339_utcoffset(t)
195 |
196 |
197 | def render(template: str, **kwargs) -> str:
198 | return JINJA.get_template(template).render(**kwargs)
199 |
200 |
201 | def generate_stylesheet() -> None:
202 | """Use pygments to generate a stylesheet"""
203 | with open(f"output/pygments.css", "w") as f:
204 | f.write(HtmlFormatter(style=LlimllibStyle).get_style_defs())
205 |
206 |
207 | def copy_static(from_dir: Path, to_dir: Path) -> None:
208 | """Copy stylesheet from the templates dir to output"""
209 | for ext in ["*.css", "*.svg"]:
210 | for f in from_dir.glob(ext):
211 | shutil.copy(f, to_dir)
212 |
213 |
214 | def strip_fancy_name(link: str) -> str:
215 | """Return the name of a link
216 |
217 | Strip a pipe, which is used in a link like [[Some long title|link text]]
218 |
219 | or a hash, which is used like [[Somepage#anchor]]
220 | """
221 | if "|" in link:
222 | return link.split("|")[0]
223 | if "#" in link:
224 | return link.split("#")[0]
225 | return link
226 |
227 |
228 | LINK_RE = re.compile(r"\[\[(.*?)\]\]")
229 |
230 |
231 | def findlinks(md: str) -> list[str]:
232 | """Find all links in a markdown document"""
233 | # XXX: right now this grabs some "links" from code blocks; i.e. pandas lets
234 | # you do stuff like df[["columnA", "columnB"]]. Fix the regex so it
235 | # doesn't match that
236 | return list(map(strip_fancy_name, LINK_RE.findall(md)))
237 |
238 |
239 | def canonicalize(title: str) -> str:
240 | """return the canonical form of a title"""
241 | return title.lower()
242 |
243 |
244 | def canonical_path(relative_path: str) -> str:
245 | """Given a relative path, return the canonical form
246 |
247 | For example, if you pass "Data Analytics/Duckdb", this returns
248 | "Data_Analytics/duckdb". The value returned by this will match the
249 | "titlepath" attribute of a page
250 | """
251 | path, page = os.path.split(relative_path)
252 | if path:
253 | return pathname(path) + "/" + canonicalize(page)
254 | return page.lower()
255 |
256 |
257 | def find(
258 | pages: dict[str, Page], attachments: dict[str, Attachment], link: str
259 | ) -> Optional[Page | Attachment]:
260 | """find a page referred to by `link`
261 |
262 | Pages can be linked in two ways:
263 | - By their title
264 | - Titles may not be unique and this function will just return the
265 | first result in our random search if there are multiple pages
266 | with the same title
267 | - By their path+title
268 | """
269 | clink = canonical_path(link)
270 | for relpath, page in pages.items():
271 | if page.canon_title == clink or relpath == clink:
272 | return page
273 | for link_path, attach in attachments.items():
274 | if attach.file == link or link_path == link:
275 | return attach
276 |
277 |
278 | def split_files(files: list[str]) -> tuple[list[str], list[str]]:
279 | """Split a file list into markdown and non-markdown files"""
280 | return (
281 | [f for f in files if f.endswith(".md")],
282 | [f for f in files if not f.endswith(".md")],
283 | )
284 |
285 |
286 | class FileTree:
287 | def __init__(
288 | self, dir: Optional[str] = None, page: Optional[Page | Attachment] = None
289 | ):
290 | self.dir = dir
291 | if isinstance(self.dir, str):
292 | self.basename = os.path.basename(self.dir)
293 | self.reldir = pathname(self.dir)
294 | self.dirparts = self.dir.split("/")
295 | self.reldirparts = self.reldir.split("/")
296 | self.page = page
297 | self.children: list[FileTree] = []
298 |
299 | def child_pages(self, idx=None) -> dict[str, Page]:
300 | if not isinstance(idx, dict):
301 | idx = {}
302 | for c in self.children:
303 | if isinstance(c.page, Page):
304 | idx[c.page.titlepath] = c.page
305 | elif c.dir:
306 | c.child_pages(idx)
307 | return idx
308 |
309 | def find_dir(self, dir: str) -> Optional["FileTree"]:
310 | if self.reldirparts[-1] == dir:
311 | return self
312 | for subdir in (c for c in self.children if c.dir):
313 | d = subdir.find_dir(dir)
314 | if d:
315 | return d
316 |
317 | def dir_backlinks(self) -> set[Page | Attachment]:
318 | """return backlinks for all direct children of this node"""
319 | return set(
320 | backlink
321 | for child in self.children
322 | if child.page
323 | for backlink in child.page.backlinks
324 | )
325 |
326 | def has_child_dirs(self) -> bool:
327 | """return true if there are child dirs"""
328 | return any(child.dir for child in self.children)
329 |
330 | def dirlinks(self) -> Generator[str, None, None]:
331 | """yield a link to each dir page, all the way back to the root"""
332 | assert self.dir
333 |
334 | for i in range(len(self.dirparts)):
335 | link = "/" + "/".join(self.reldirparts[: i + 1])
336 | yield f'{ self.dirparts[i] }'
337 |
338 | def dirlink(self) -> str:
339 | """return a link to this directory"""
340 | assert self.dir
341 | href = "/" + "/".join(self.reldirparts) + ".html"
342 | return f'🔗'
343 |
344 | def __str__(self) -> str:
345 | if self.dir:
346 | return os.path.basename(self.dir)
347 | elif self.page:
348 | return self.page.title
349 | raise AssertionError("either page or dir must be true")
350 |
351 | def __repr__(self) -> str:
352 | return self.__str__()
353 |
354 |
355 | def gitstat(dir: str, path: str) -> GitStat:
356 | """return the created and modified times for a file from git"""
357 | # Here's an example of what the output looks like for this:
358 | #
359 | # $ git -C . log --pretty="format:%cI %aI" README.md
360 | # 2023-10-30T08:18:52-04:00 2023-10-30T08:18:52-04:00
361 | # 2022-04-28T21:54:00-04:00 2022-04-28T21:54:00-04:00
362 | # 2022-04-09T22:09:23-04:00 2022-04-09T22:09:23-04:00
363 | # 2022-04-08T21:36:48-04:00 2022-04-08T21:36:48-04:00
364 | #
365 | # we need to use the -C argument to tell git to look in the notes
366 | # repository instead of this repository
367 | #
368 | # possibly I should add --follow so that this persists through renames?
369 | # though maybe I ought to consider a rename a recreation of a file? not
370 | # clear to me whether it's worth it or not.
371 | #
372 | # this is really slow, and I don't see a path to speeding it up a
373 | # tremendous amount
374 | times = (
375 | subprocess.check_output(
376 | ["git", "-C", dir, "log", "--pretty=format:%aI %cI", "--", path]
377 | )
378 | .decode("utf8")
379 | .split("\n")
380 | )
381 |
382 | # The modified time is the second timestamp on the first line
383 | mtime = rfc3339_to_timestamp(times[0].split(" ")[1])
384 |
385 | # ctime is not useful, I should remove this from here
386 | ctime = rfc3339_to_timestamp(times[-1].split(" ")[0])
387 |
388 | return GitStat(st_mtime=mtime, st_ctime=ctime)
389 |
390 |
391 | def parse_fm_datetime(d: str | datetime) -> float:
392 | """
393 | parse a datetime from frontmatter, which may be a string or datetime, into
394 | a timestamp
395 |
396 | I don't quite understand why the yaml lib seems to be converting it
397 | sometimes, but not others
398 | """
399 | if isinstance(d, str):
400 | return rfc3339_to_timestamp(d)
401 | return d.timestamp()
402 |
403 |
404 | def handle_file(path: str, root: str, use_git_times: bool) -> Page | Attachment:
405 | """given a full path and the root of the tree, return a page dict
406 |
407 | path: full path to a file
408 | root: the root of the tree we're building
409 | """
410 | _, extension = os.path.splitext(path)
411 | if extension == ".md":
412 | with open(path) as f:
413 | buf = f.read()
414 | frontmatter, source = split_front_matter(buf)
415 | links = findlinks(source)
416 | dir, filename = os.path.split(path)
417 | relpath = pathname(dir.removeprefix(root).lstrip("/"))
418 | title, _ = os.path.splitext(filename)
419 | titlepath = os.path.join(relpath, canonicalize(title))
420 |
421 | # get created and modified times for the file. Preference order:
422 | # - updated time in frontmatter
423 | # - file info from git (if gistat is enabled)
424 | # - file modified time from the file system
425 | if "updated" in frontmatter and "created" in frontmatter:
426 | t = FileStat(
427 | parse_fm_datetime(frontmatter["updated"]),
428 | parse_fm_datetime(frontmatter["created"]),
429 | )
430 | # if use_git_times is true, assume that the file is stored in git,
431 | # and get ctime and mtime from git.
432 | elif use_git_times:
433 | try:
434 | t = gitstat(dir, path)
435 | # if the file is not in git, the function currently throws an
436 | # IndexError. Take the mtime in that case instead, rather
437 | # than failing
438 | except IndexError:
439 | t = os.stat(path)
440 | else:
441 | t = os.stat(path)
442 |
443 | return Page(
444 | # `title` contains the title, cased as the author cased it
445 | title=title,
446 | # `canon_title` contains the canonicalized title
447 | canon_title=canonicalize(title),
448 | links=links,
449 | fullpath=path,
450 | link_path=os.path.join(relpath, outname(filename)),
451 | file=filename,
452 | relpath=relpath,
453 | titlepath=titlepath,
454 | source=source,
455 | backlinks=set(),
456 | frontmatter=frontmatter,
457 | ctime=t.st_ctime,
458 | mtime=t.st_mtime,
459 | # would be better to put file creation time in front matter
460 | # at file create time and pull it from there, but this will
461 | # do for now
462 | rfc3339_ctime=rfc3339_time(t.st_ctime),
463 | rfc3339_mtime=rfc3339_time(t.st_mtime),
464 | created_date=formatted_time(t.st_ctime),
465 | updated_date=formatted_time(t.st_mtime),
466 | )
467 |
468 | # if it's not a markdown file, parse it as an attachment
469 | dir, filename = os.path.split(path)
470 | title, _ = os.path.splitext(filename)
471 | relpath = pathname(dir.removeprefix(root).lstrip("/"))
472 | return Attachment(
473 | title=title,
474 | canon_title=canonicalize(title),
475 | fullpath=path,
476 | link_path=os.path.join(relpath, filename),
477 | file=filename,
478 | relpath=relpath,
479 | links=[],
480 | backlinks=set(),
481 | )
482 |
483 |
484 | def build_file_tree(
485 | dir: str, ignore: set[str], use_git_times: bool
486 | ) -> tuple[FileTree, dict[str, Page], dict[str, Attachment]]:
487 | """build a file tree from a given directory
488 |
489 | dir: the directory to build the file tree from
490 | ignore: a set of directory names to ignore
491 |
492 | returns a FileTree, an index of pages, and an index of attachments.
493 |
494 | The page index is keyed on titlepath, which is the relative path plus the
495 | canonicalized title, so something like 'visualization/bar_charts'. The
496 | index contains only content pages.
497 |
498 | The attachments dictionary is keyed on relative path + filename, so
499 | something like 'images/some image 2928984588.png'
500 | """
501 | index = {}
502 | attachments = {}
503 | return (
504 | build_file_tree_helper(
505 | FileTree(dir=dir), ignore, dir, index, attachments, use_git_times
506 | ),
507 | index,
508 | attachments,
509 | )
510 |
511 |
512 | def isEmptyFile(path: str) -> bool:
513 | """return true if a file is empty
514 |
515 | more precisely, if it doesn't have any non-whitespace characters in the
516 | first 16 bytes
517 |
518 | must open the file in binary mode because if it's a binary file it may not
519 | be decodable to unicode
520 | """
521 | return not open(path, "rb").read(16).strip()
522 |
523 |
524 | def build_file_tree_helper(
525 | node: FileTree,
526 | ignore: set[str],
527 | root_path: str,
528 | index: dict[str, Page],
529 | attachments: dict[str, Attachment],
530 | use_git_times: bool,
531 | ) -> FileTree:
532 | assert node.dir
533 | for de in sorted(
534 | os.scandir(os.path.join(root_path, node.dir)),
535 | key=lambda x: x.path.lower(),
536 | ):
537 | if de.name in ignore:
538 | info(f"Ignoring file", de)
539 | continue
540 |
541 | # ignore untitled files or directories
542 | if de.name == "Untitled.md" or de.name == "Untitled":
543 | info(f"Ignoring untitled object", de)
544 | continue
545 |
546 | if de.is_dir():
547 | path = de.path.removeprefix(root_path).lstrip("/")
548 | node.children.append(
549 | build_file_tree_helper(
550 | FileTree(dir=path),
551 | ignore,
552 | root_path,
553 | index,
554 | attachments,
555 | use_git_times,
556 | )
557 | )
558 | else:
559 | if isEmptyFile(de.path):
560 | info(f"Ignoring empty file", de)
561 | continue
562 | page = handle_file(de.path, root_path, use_git_times)
563 |
564 | # we want to index each page by its titlepath, which is something
565 | # like 'visualization/bar_charts'. If the page does not have a
566 | # titlepath attribute, assume that it's not a content page
567 | if isinstance(page, Page):
568 | # if a page has frontmatter, and that frontmatter contains a
569 | # "draft" key that is non-empty, consider it a draft and don't
570 | # render it
571 | if type(page) == Page and page.frontmatter.get("draft"):
572 | continue
573 |
574 | index[page.titlepath] = page
575 | else:
576 | attachments[page.link_path] = page
577 |
578 | node.children.append(FileTree(page=page))
579 |
580 | return node
581 |
582 |
583 | def calculate_backlinks(
584 | pages: dict[str, Page], attachments: dict[str, Attachment]
585 | ) -> None:
586 | for page in pages.values():
587 | for link in page.links:
588 | linked_page = find(pages, attachments, link)
589 | if not linked_page:
590 | info(f"unable to find link", link, page.title)
591 | continue
592 | linked_page.backlinks.add(page)
593 |
594 |
595 | def generate_lastweek_page(pages: dict[str, Page], outdir: Path) -> None:
596 | today = datetime.today()
597 | pages_by_weeks_ago: DefaultDict[int, list[Page]] = defaultdict(list)
598 | for p in reversed(sorted(pages.values(), key=lambda x: x.mtime)):
599 | daysago = (today - datetime.fromtimestamp(p.mtime)).days
600 | pages_by_weeks_ago[(daysago - 1) // 7].append(p)
601 | if daysago > 21:
602 | break
603 |
604 | open(outdir / "lastweek.html", "w").write(
605 | render("lastweek.html", pages_by_weeks_ago=pages_by_weeks_ago)
606 | )
607 |
608 |
609 | def generate_search(pages: dict[str, Page], outdir: Path) -> None:
610 | index = [
611 | {
612 | "id": i,
613 | "title": page.title,
614 | "contents": BeautifulSoup(page.html, features="html.parser").get_text(),
615 | "title_path": page.titlepath,
616 | "link_path": page.link_path,
617 | }
618 | for (i, page) in enumerate(pages.values())
619 | ]
620 |
621 | open(outdir / "search.html", "w").write(render("search.html", index=index))
622 |
623 |
624 | def generate_index_page(
625 | tree: FileTree, pages: dict[str, Page], outdir: Path, recent: int
626 | ) -> None:
627 | # get the most recently created files
628 | by_ctime = list(reversed(sorted(pages.values(), key=lambda x: x.ctime)))[:recent]
629 | recently_created = [p.link_path for p in by_ctime]
630 | by_mtime = list(
631 | reversed(
632 | sorted(
633 | (
634 | p
635 | for p in pages.values()
636 | if p.link_path not in recently_created and p.mtime != p.ctime
637 | ),
638 | key=lambda x: x.mtime,
639 | )
640 | )
641 | )[:recent]
642 | open(outdir / "index.html", "w").write(
643 | render(
644 | "index.html",
645 | recently_created=by_ctime,
646 | recently_updated=by_mtime,
647 | tree=tree,
648 | )
649 | )
650 |
651 |
652 | def generate_feed(pages: dict[str, Page], outfile: Path, recent: int) -> None:
653 | """generate a single feed file from the atom.xml template"""
654 | by_mtime = list(reversed(sorted(pages.values(), key=lambda x: x.mtime)))
655 |
656 | posts = by_mtime[:recent]
657 | for p in posts:
658 | if not p.html_escaped_content:
659 | p.html_escaped_content = escape(render_content(p))
660 |
661 | open(outfile, "w").write(
662 | render("atom.xml", posts=by_mtime[:recent], timestamp=rfc3339_time(time()))
663 | )
664 |
665 |
666 | def generate_feeds(
667 | tree: FileTree, pages: dict[str, Page], feeds: list[str], outdir: Path, recent: int
668 | ) -> None:
669 | """
670 | generate an atom feed for the root tree, then go through each sub-feed in
671 | the feeds array, find the directory with that name, and generate a feed for
672 | it
673 | """
674 | generate_feed(pages, outdir / "atom.xml", recent)
675 |
676 | for feed in feeds:
677 | subtree = tree.find_dir(feed)
678 | if not subtree:
679 | raise ValueError(f"unable to find {feed}")
680 | generate_feed(subtree.child_pages(), outdir / f"{feed}.atom.xml", recent)
681 |
682 |
683 | def generate_dir_pages(root: FileTree, pages: dict[str, Page], outdir: Path) -> None:
684 | for child in root.children:
685 | if child.dir:
686 | open(outdir / f"{child.reldir}.html", "w").write(
687 | render("dir.html", tree=child)
688 | )
689 | generate_dir_pages(child, pages, outdir)
690 |
691 |
692 | def highlight(code, name, _) -> str:
693 | """Highlight a block of code"""
694 | if not name:
695 | return f'
{escape(code)}
'
696 |
697 | # admonishment adapted from mkdocs-material
698 | # https://squidfunk.github.io/mkdocs-material/reference/admonitions/
699 | if name == "warning":
700 | return f'
{escape(code)}
'
701 |
702 | try:
703 | lexer = get_lexer_by_name(name)
704 | except:
705 | print(f"failed to get lexer for {name}")
706 | return f'
{escape(code)}
'
707 | formatter = HtmlFormatter()
708 |
709 | return pygmentize(code, lexer, formatter)
710 |
711 |
712 | MD = (
713 | MarkdownIt("gfm-like", {"breaks": True, "html": True, "highlight": highlight})
714 | .use(anchors_plugin, max_level=6)
715 | .use(front_matter_plugin)
716 | .use(footnote_plugin)
717 | )
718 |
719 |
720 | def render_link(self, tokens, idx, options, env):
721 | # don't count anchor links as external
722 | if not tokens[idx].attrs["href"].startswith("#"):
723 | tokens[idx].attrSet("class", "external-link")
724 | return self.renderToken(tokens, idx, options, env)
725 |
726 |
727 | MD.add_render_rule("link_open", render_link)
728 |
729 |
730 | def render_content(page: Page) -> str:
731 | """
732 | Given a "page" object, render the markdown within to escaped HTML
733 | """
734 | return MD.render(page.source)
735 |
736 |
737 | def generate_html_pages(pages: dict[str, Page], outdir: Path) -> None:
738 | for page in pages.values():
739 | output_path = outdir / page.link_path
740 |
741 | # Optimization: If the file has already been converted to HTML and the
742 | # HTML is newer than the source, don't regenerate the file. Do convert
743 | # the markdown to HTML because we'll need that
744 | if os.path.isfile(output_path) and page.mtime < os.stat(output_path).st_mtime:
745 | page.html = render_content(page)
746 | continue
747 |
748 | page.html = render_content(page)
749 | page.html_escaped_content = escape(page.html)
750 |
751 | mkdir(str(outdir / page.relpath))
752 | with open(output_path, "w") as fout:
753 | text = render("page.html", page=page)
754 | fout.write(text)
755 |
756 |
757 | def copy_attachments(attachments: dict[str, Attachment], outdir: Path) -> None:
758 | for page in attachments.values():
759 | mkdir(outdir / page.relpath)
760 | shutil.copy(page.fullpath, outdir / page.link_path)
761 |
762 |
763 | def attachment_replacer(pages: dict[str, Page], attachments: dict[str, Attachment]):
764 | def _attachment_replacer(m: re.Match) -> str:
765 | filename = m.group(1)
766 | linked_attch = find(pages, attachments, filename)
767 | if not linked_attch:
768 | err(f"Unable to find attachment", filename)
769 | return ""
770 | path = linked_attch.link_path
771 | # assume it's an image unless it ends with pdf, mov, mp4 or webm
772 | if filename.endswith(".pdf"):
773 | return f''
774 | elif filename.endswith(".mov"):
775 | return f''
776 | elif filename.endswith(".mp4"):
777 | return f''
778 | elif filename.endswith(".webm"):
779 | return f''
780 | return f''
781 |
782 | return _attachment_replacer
783 |
784 |
785 | IMAGE_LINK_RE = re.compile(r"!\[\[(.*?)\]\]")
786 |
787 |
788 | def substitute_images(
789 | pages: dict[str, Page], attachments: dict[str, Attachment]
790 | ) -> None:
791 | replacer = attachment_replacer(pages, attachments)
792 | for page in pages.values():
793 | page.source = IMAGE_LINK_RE.sub(replacer, page.source)
794 |
795 |
796 | def sanitize(s: str) -> str:
797 | """Replace non-alphanum-characters with dash and lowercase
798 |
799 | I'm sure this doesn't quite match the ids we're giving to section headers,
800 | but it seems to be close enough for now. Ultimately, I should look into the
801 | generation function and figure out exactly how it's turning:
802 |
803 | ## Day 2
804 |
805 | into
806 |
807 |
Day 2
808 |
809 | so that I can match it precisely.
810 | """
811 | return re.sub(r"[^\w]", "-", s.rstrip()).lower()
812 |
813 |
814 | # maybe move to https://github.com/jsepia/markdown-it-wikilinks eventually?
815 | def crosslink_replacer(pages: dict[str, Page]) -> Callable[[re.Match], str]:
816 | def _crosslink_replacer(m: re.Match) -> str:
817 | rawlink = m.group(1)
818 | title = rawlink
819 | nicetitle = None
820 | anchor = None
821 |
822 | # [[page|nice title]] -> title: page, nicetitle: nice title
823 | if "|" in rawlink:
824 | title, nicetitle = rawlink.split("|")
825 |
826 | # [[page#anchor]] -> title: page, anchor: anchor
827 | if "#" in title:
828 | title, anchor = title.split("#")
829 |
830 | linked_page = find(pages, {}, title)
831 |
832 | # if we don't find the linked page, assume that the group is not in
833 | # fact a link. There are several places in my notes where we use the
834 | # string `[[` but it's not a link; leave them be
835 | if not linked_page:
836 | err(f"Unable to find page", title)
837 | return m.group(0)
838 |
839 | linktitle = nicetitle if nicetitle else title
840 | anchor = f"#{sanitize(anchor)}" if anchor else ""
841 |
842 | return f'{linktitle}'
843 |
844 | return _crosslink_replacer
845 |
846 |
847 | # match:
848 | # - two open square brackets [[
849 | # - capture anything up to the closing square bracket pair ]]
850 | CROSSLINK_RE = re.compile(r"\[\[(.*?)\]\]")
851 |
852 |
853 | def substitute_crosslinks(pages: dict[str, Page]) -> None:
854 | replacer = crosslink_replacer(pages)
855 | for page in pages.values():
856 | page.source = CROSSLINK_RE.sub(replacer, page.source)
857 |
858 |
859 | def parse(
860 | mddir: str,
861 | recent: int,
862 | use_git_times: bool,
863 | ignore: Optional[set[str]] = None,
864 | feeds: list[str] = [],
865 | ) -> None:
866 | """parse a directory of markdown files, ignoring a list of folder names
867 |
868 | mddir: the name of the directory to parse files in
869 | recent: how many posts to show in the "recently updated" section
870 | ignore: an optional list of directory names to ignore. Will be ignored at
871 | any level in the tree.
872 | """
873 | # make the type checker happy
874 | ignore = ignore if ignore else set()
875 | dir = os.path.normpath(os.path.expanduser(mddir))
876 |
877 | tree, pages, attachments = build_file_tree(dir, ignore, use_git_times)
878 | calculate_backlinks(pages, attachments)
879 |
880 | outdir = Path(mkdir("./output"))
881 |
882 | generate_stylesheet()
883 | copy_static(Path("./templates"), outdir)
884 | copy_attachments(attachments, outdir)
885 |
886 | substitute_images(pages, attachments)
887 | substitute_crosslinks(pages)
888 |
889 | # should come before generate_index_page because it generates the HTML that
890 | # is necessary for the atom file output
891 | generate_html_pages(pages, outdir)
892 | generate_search(pages, outdir)
893 | generate_index_page(tree, pages, outdir, recent)
894 | generate_feeds(tree, pages, feeds, outdir, recent)
895 | generate_dir_pages(tree, pages, outdir)
896 | generate_lastweek_page(pages, outdir)
897 |
898 |
899 | if __name__ == "__main__":
900 | parser = argparse.ArgumentParser()
901 | parser.add_argument(
902 | "--recent",
903 | help="number of recent entries to show",
904 | type=int,
905 | default=15,
906 | )
907 | parser.add_argument(
908 | "--path",
909 | help="path to folder of .md files to publish. This must be an absolute path",
910 | type=str,
911 | default="~/Library/Mobile Documents/iCloud~md~obsidian/Documents/personal",
912 | )
913 | parser.add_argument(
914 | "--use-git-times",
915 | action="store_true",
916 | help="use git modified time instead of mtime for file timestamps",
917 | )
918 | parser.add_argument(
919 | "--feed",
920 | action="append",
921 | default=[],
922 | help="a directory to generate a separate feed for. Can be given multiple times",
923 | )
924 | args = parser.parse_args(sys.argv[1:])
925 |
926 | default_ignores = {
927 | ".DS_Store",
928 | "private",
929 | ".obsidian",
930 | ".github",
931 | ".git",
932 | ".gitignore",
933 | }
934 | parse(
935 | args.path,
936 | args.recent,
937 | args.use_git_times,
938 | ignore=default_ignores,
939 | feeds=args.feed,
940 | )
941 |
--------------------------------------------------------------------------------
/templates/atom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | llimllib's notes
4 | http://notes.billmill.org/
5 |
6 | {{ timestamp }}
7 |
8 | Obsidian Notes
9 |
10 | Bill Mill
11 |
12 | {%- for post in posts %}
13 |
14 | https://notes.billmill.org/{{ post.link_path }}
15 |
16 | {{ post.title }}
17 | {{ post.rfc3339_ctime }}
18 | {{ post.rfc3339_mtime }}
19 | {{ post.html_escaped_content }}
20 |
21 | {%- endfor %}
22 |
23 |
--------------------------------------------------------------------------------
/templates/dir.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
17 |
19 |
20 |
21 |
22 | llimllib notes
23 |
24 |
25 |
26 |
27 |
28 | notes.billmill.org /
29 | {%- for link in tree.dirlinks() %}
30 | {{ link }} /
31 | {%- endfor %}
32 |
33 | {%- if tree.has_child_dirs() %}
34 | {# for now just expand all child pages, eventually might want to do
35 | something smarter? #}
36 | collapse all
37 |
38 | {%- for node in tree.children recursive %}
39 | {%- if node.children -%}
40 |
53 | {%- else %}
54 | {# if there aren't any child dirs, we want to show a ul with regular
55 | bullets instead of the collapsible tree we normally show #}
56 |
57 | {%- for node in tree.children %}
58 | {%- if not node.page.attachment %}
59 |