├── example.pdf ├── example ├── cover.jpg ├── qrcode.jpg ├── back-cover.jpg ├── SUMMARY.md ├── chapter0-preface │ └── index.md ├── _index.md ├── chapter4-mono-code-block │ └── index.md ├── chapter3-advanced-formatting │ └── index.md ├── chapter1-basics │ └── index.md └── chapter2-code-and-tables │ └── index.md ├── example-screenshot.jpg ├── .gitignore ├── Makefile ├── filters ├── number-lines.lua ├── simple-image-attr-cleanup.lua ├── README.md ├── fix-lstinline.lua ├── minted-filter.lua ├── image-attr-cleanup.lua ├── ansi-cleanup.lua ├── cleanup-filter.lua ├── symbol-fallback-filter.lua ├── table-filter.lua ├── table-wrap.lua └── emoji-passthrough.lua ├── diagnose_env.sh ├── cli.py ├── install_pdf_dependencies.sh ├── tree.py ├── table-filter.lua ├── emoji_support.py ├── emoji-commands.tex ├── cache_utils.py ├── frontmatter.py ├── LICENSE ├── table-wrap.lua ├── validate_lua_dependencies.py ├── README.md └── image_utils.py /example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootsongjc/pdf-book-exporter/main/example.pdf -------------------------------------------------------------------------------- /example/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootsongjc/pdf-book-exporter/main/example/cover.jpg -------------------------------------------------------------------------------- /example/qrcode.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootsongjc/pdf-book-exporter/main/example/qrcode.jpg -------------------------------------------------------------------------------- /example-screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootsongjc/pdf-book-exporter/main/example-screenshot.jpg -------------------------------------------------------------------------------- /example/back-cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootsongjc/pdf-book-exporter/main/example/back-cover.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | example/image-caches/ 3 | __pycache__/ 4 | .pytest_cache/ 5 | .ipynb_checkpoints/ 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /example/SUMMARY.md: -------------------------------------------------------------------------------- 1 | - [说明](chapter0-preface/index.md) 2 | - [基础格式测试](chapter1-basics/index.md) 3 | - [代码块和表格测试](chapter2-code-and-tables/index.md) 4 | - [高级格式和特殊内容测试](chapter3-advanced-formatting/index.md) 5 | - [SourceHanMono 字体测试](chapter4-mono-code-block/index.md) 6 | -------------------------------------------------------------------------------- /example/chapter0-preface/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 说明 3 | weight: 1 4 | --- 5 | 6 | 本书是专门用于测试 PDF 电子书导出功能的综合示例文档,包含了基础格式、代码表格、高级格式等各种 Markdown 内容,用于验证 PDF 导出工具的渲染效果。 7 | 8 | ## 测试目标 9 | 10 | 本测试文档涵盖以下内容: 11 | 12 | ### 基础格式测试 13 | 14 | - ✅ 中英文混排显示 15 | - ✅ Emoji 表情符号渲染 16 | - ✅ 基础文本格式(粗体、斜体、删除线等) 17 | - ✅ 列表和引用格式 18 | - ✅ 链接和特殊字符 19 | 20 | ### 代码和表格测试 21 | 22 | - ✅ 多语言代码块语法高亮 23 | - ✅ 超宽代码块处理 24 | - ✅ 各种表格格式 25 | - ✅ 代码与表格混合内容 26 | 27 | ### 高级格式测试 28 | 29 | - ✅ 数学公式渲染 30 | - ✅ 任务列表和复选框 31 | - ✅ 定义列表 32 | - ✅ 复杂嵌套结构 33 | - ✅ 特殊布局样式 34 | 35 | **开始全面测试!** 🚀 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for PDF book exporter tools 2 | # Contains utilities for the PDF export system 3 | 4 | .PHONY: help clean install diagnostics 5 | 6 | # Default target 7 | help: 8 | @echo "PDF Book Exporter - Available targets:" 9 | @echo " install - Install required dependencies" 10 | @echo " diagnostics - Run system diagnostics for emoji support" 11 | @echo " clean - Clean generated files" 12 | @echo "" 13 | @echo "Usage: python cli.py [book_directory] -o [output.pdf] [options]" 14 | @echo "Run 'python cli.py --help' for more information" 15 | 16 | # Install dependencies 17 | install: 18 | @echo "Installing PDF export dependencies..." 19 | @chmod +x install_pdf_dependencies.sh 20 | @./install_pdf_dependencies.sh 21 | 22 | # Run system diagnostics 23 | diagnostics: 24 | @echo "Running PDF export diagnostics..." 25 | @python3 cli.py --diagnostics . 26 | 27 | # Clean generated files 28 | clean: 29 | @echo "Cleaning generated files..." 30 | @rm -f *.pdf 31 | @rm -f emoji-font-config.tex 32 | @rm -rf test-output 33 | @echo "Clean complete." 34 | -------------------------------------------------------------------------------- /filters/number-lines.lua: -------------------------------------------------------------------------------- 1 | -- number-lines.lua 2 | -- A Pandoc Lua filter to add line numbers to all code blocks 3 | -- This filter adds the 'numberLines' attribute to CodeBlock elements 4 | -- so that Pandoc's built-in syntax highlighter will output a two-column table 5 | -- with line numbers in the left column 6 | 7 | function CodeBlock(elem) 8 | -- Add the numberLines attribute to enable line numbering 9 | elem.attributes.numberLines = "" 10 | 11 | -- Set the starting line number (defaults to 1 if not specified) 12 | if not elem.attributes.startFrom then 13 | elem.attributes.startFrom = "1" 14 | end 15 | 16 | -- Force table format for line numbers in HTML output 17 | elem.attributes["number-lines"] = "" 18 | 19 | -- Return the modified code block 20 | return elem 21 | end 22 | 23 | -- Post-process to ensure line numbers are displayed as a table 24 | function Div(elem) 25 | if elem.classes and elem.classes:includes("sourceCode") then 26 | -- Add additional styling class for better CSS targeting 27 | elem.classes:insert("numbered-code") 28 | end 29 | return elem 30 | end 31 | -------------------------------------------------------------------------------- /filters/simple-image-attr-cleanup.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Simple image attribute cleanup filter 3 | This filter removes standalone paragraphs that contain only image attributes 4 | like {width=1486 height=518} 5 | ]] 6 | 7 | function Para(elem) 8 | -- Get the text content of the paragraph 9 | local content_str = pandoc.utils.stringify(elem) 10 | 11 | -- Check if this paragraph contains only image attribute syntax 12 | -- Patterns to match various formats: 13 | -- {width=123 height=456} 14 | -- {height=456 width=123} 15 | -- {width=123} 16 | -- {height=456} 17 | local patterns = { 18 | "^%s*{%s*width%s*=%s*%d+%s+height%s*=%s*%d+%s*}%s*$", 19 | "^%s*{%s*height%s*=%s*%d+%s+width%s*=%s*%d+%s*}%s*$", 20 | "^%s*{%s*width%s*=%s*%d+%s*}%s*$", 21 | "^%s*{%s*height%s*=%s*%d+%s*}%s*$" 22 | } 23 | 24 | -- Check if content matches any image attribute pattern 25 | for _, pattern in ipairs(patterns) do 26 | if content_str:match(pattern) then 27 | return {} -- Remove this paragraph completely 28 | end 29 | end 30 | 31 | -- Return unchanged if not an image attribute 32 | return elem 33 | end 34 | -------------------------------------------------------------------------------- /example/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: PDF 导出功能完整测试 3 | author: PDF Book Exporter 4 | date: '2025-08-06' 5 | description: 全面测试 PDF 导出工具的各种格式渲染效果 6 | language: zh-hans 7 | weight: 1 8 | book: 9 | title: PDF 导出功能完整测试 10 | description: 全面测试 PDF 导出工具的各种格式渲染效果 11 | language: zh-hans 12 | author: PDF Book Exporter 13 | website: https://jimmysong.io/book/pdf-book-exporter/ 14 | appendix: false 15 | cover: cover.jpg 16 | backcover_image: back-cover.jpg 17 | backcover_text: 「几米宋」微信公众号 18 | backcover_link_text: jimmysong.io 19 | backcover_link_url: https://jimmysong.io 20 | backcover_text_color: '#FFFFFF' 21 | backcover_link_color: '#1d09d8' 22 | cover_title_text: PDF 导出功能测试 23 | cover_author_text: PDF Book Exporter 24 | cover_subtitle_text: 完整测试版本,全面测试 PDF 导出工具的各种格式渲染效果 25 | cover_title_color: '#FFFFFF' 26 | cover_author_color: '#E0E0E0' 27 | cover_subtitle_color: '#C0C0C0' 28 | cover_title_font_size: 42 29 | cover_author_font_size: 28 30 | cover_subtitle_font_size: 20 31 | cover_title_position: center 32 | cover_author_position: bottom 33 | cover_overlay_enabled: true 34 | cover_text_shadow: false 35 | cover_background_overlay: false 36 | cover_overlay_opacity: 0 37 | body_color: '#333333' 38 | heading_color: '#2C3E50' 39 | link_color: '#3498DB' 40 | code_color: '#E74C3C' 41 | quote_color: '#7F8C8D' 42 | caption_color: '#95A5A6' 43 | lastmod: '2025-08-06' 44 | --- 45 | 46 | -------------------------------------------------------------------------------- /filters/README.md: -------------------------------------------------------------------------------- 1 | # PDF Book Exporter Filters 2 | 3 | A collection of Pandoc Lua filters for enhanced PDF book generation. 4 | 5 | ## Available Filters 6 | 7 | ### emoji-passthrough.lua 8 | 9 | Handles emoji characters for LaTeX output with proper font switching. 10 | 11 | ### minted-filter.lua 12 | 13 | Converts fenced code blocks to minted environments for better syntax highlighting. 14 | 15 | ### table-wrap.lua 16 | 17 | Converts pipe tables to longtable format for better page wrapping. 18 | 19 | ### cleanup-filter.lua 20 | 21 | Cleans up problematic characters and formatting issues. 22 | 23 | ### ansi-cleanup.lua 24 | 25 | Removes ANSI escape codes from content. 26 | 27 | ### fix-lstinline.lua 28 | 29 | Fixes inline code styling issues with CJK characters. 30 | 31 | ### symbol-fallback-filter.lua 32 | 33 | Provides fallback handling for special symbols and characters. 34 | 35 | ## Usage 36 | 37 | These filters are automatically applied by the `cli.py` script. They are located in the `filters/` directory and are applied in the correct order during PDF generation. 38 | 39 | ## Requirements 40 | 41 | - Pandoc with Lua support 42 | - LaTeX distribution with required packages (see main documentation) 43 | 44 | ## Filter Details 45 | 46 | Each filter serves a specific purpose in the PDF generation pipeline: 47 | 48 | - **emoji-passthrough.lua**: Ensures proper emoji rendering with font switching 49 | - **minted-filter.lua**: Provides enhanced syntax highlighting for code blocks 50 | - **table-wrap.lua**: Improves table formatting and page breaks 51 | - **cleanup-filter.lua**: Removes problematic characters that can break LaTeX compilation 52 | - **ansi-cleanup.lua**: Strips ANSI escape sequences from content 53 | - **fix-lstinline.lua**: Fixes inline code rendering issues with CJK characters 54 | - **symbol-fallback-filter.lua**: Handles special symbols and provides fallbacks 55 | -------------------------------------------------------------------------------- /example/chapter4-mono-code-block/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: SourceHanMono 字体测试 3 | weight: 40 4 | --- 5 | 6 | 本文档用于验证 SourceHanMono 字体在代码块中的应用效果。 7 | 8 | ## Python 代码示例 9 | 10 | ```python 11 | def hello_world(): 12 | """ 13 | 一个简单的函数,展示 SourceHanMono 字体效果 14 | """ 15 | print("Hello, 世界!") # 中文注释 16 | print("こんにちは、世界!") # 日文注释 17 | print("안녕하세요, 세계!") # 韩文注释 18 | 19 | # 数字和符号测试 20 | numbers = [1, 2, 3, 4, 5] 21 | symbols = ['!', '@', '#', '$', '%', '^', '&', '*'] 22 | 23 | return "测试成功" 24 | ``` 25 | 26 | ## Bash 脚本示例 27 | 28 | ```bash 29 | #!/bin/bash 30 | # 这是一个包含中文的脚本 31 | 32 | echo "开始执行脚本..." 33 | echo "正在处理中文文件名:测试文档.txt" 34 | 35 | # 创建包含中文路径的目录 36 | mkdir -p "./测试目录/子目录" 37 | ls -la "./测试目录" 38 | 39 | # 函数定义 40 | function 显示消息() { 41 | echo "函数名也可以是中文:$1" 42 | } 43 | 44 | 显示消息 "这是一个测试消息" 45 | ``` 46 | 47 | ## JavaScript 代码示例 48 | 49 | ```javascript 50 | // JavaScript 中的中文变量和注释 51 | const 问候语 = "你好,世界!"; 52 | const 数字列表 = [1, 2, 3, 4, 5]; 53 | 54 | function 显示问候(名字) { 55 | console.log(`${问候语} ${名字}`); 56 | // 这里展示中文字符在等宽字体中的效果 57 | console.log("中文字符测试:测试"); 58 | console.log("English text: test"); 59 | console.log("混合文本:mix 测试 test"); 60 | } 61 | 62 | 显示问候("张三"); 63 | ``` 64 | 65 | ## 内联代码测试 66 | 67 | 以下是内联代码的测试: 68 | 69 | - Python 变量:`变量名 = "中文值"` 70 | - 文件路径:`/home/用户/文档/测试文件.txt` 71 | - 命令示例:`ls -la 中文目录` 72 | - 混合内容:`hello世界test` 73 | 74 | ## 等宽字符对齐测试 75 | 76 | ``` 77 | ASCII字符: ABCDEFGHIJKLMNOPQRSTUVWXYZ 78 | 中文字符: 你好世界测试字体显示效果验证 79 | 日文字符: こんにちはテストフォント 80 | 韩文字符: 안녕하세요테스트폰트 81 | 混合内容: Test测试テストテスト 82 | ``` 83 | 84 | ## 代码注释多语言测试 85 | 86 | ```go 87 | package main 88 | 89 | import "fmt" 90 | 91 | // 主函数 - 中文注释 92 | // メイン関数 - 日文注释 93 | // 메인 함수 - 韩文注释 94 | func main() { 95 | // 变量声明 96 | message := "多语言字体测试" 97 | 98 | fmt.Println(message) 99 | fmt.Println("Hello, 世界!") // 英文 + 中文 100 | fmt.Println("こんにちは、世界!") // 日文 101 | fmt.Println("안녕하세요, 세계!") // 韩文 102 | } 103 | ``` 104 | 105 | 此文档将帮助验证 SourceHanMono 字体是否正确应用于所有代码块和内联代码。 106 | -------------------------------------------------------------------------------- /filters/fix-lstinline.lua: -------------------------------------------------------------------------------- 1 | -- Enhanced fix-lstinline filter 2 | -- This filter fixes the issue where Pandoc generates \passthrough{\lstinline!...!} 3 | -- and ensures inline code uses our template's table-aware wrapping logic 4 | 5 | function RawInline(elem) 6 | -- Only process LaTeX output 7 | if not FORMAT:match 'latex' then 8 | return elem 9 | end 10 | 11 | -- Check if this is a passthrough lstinline command 12 | local content = elem.text 13 | if content:match("^\\passthrough{\\lstinline!.*!}$") then 14 | -- Extract the content between the exclamation marks 15 | local inline_content = content:match("\\passthrough{\\lstinline!(.-)}$") 16 | if inline_content then 17 | -- Remove the trailing exclamation mark 18 | inline_content = inline_content:gsub("!$", "") 19 | -- Use our template's texttt command which has table-aware line breaking 20 | return pandoc.RawInline('latex', '\\texttt{' .. inline_content .. '}') 21 | end 22 | end 23 | 24 | return elem 25 | end 26 | 27 | -- Function to escape LaTeX special characters for safe inclusion in texttt 28 | function escape_latex_special_chars(text) 29 | if not text then return "" end 30 | -- Escape characters that could cause issues in LaTeX 31 | text = text:gsub("\\", "\\textbackslash{}") 32 | text = text:gsub("{", "\\{") 33 | text = text:gsub("}", "\\}") 34 | text = text:gsub("%$", "\\$") 35 | text = text:gsub("&", "\\&") 36 | text = text:gsub("%%", "\\%%") 37 | text = text:gsub("#", "\\#") 38 | text = text:gsub("%^", "\\textasciicircum{}") 39 | text = text:gsub("_", "\\_") 40 | text = text:gsub("~", "\\textasciitilde{}") 41 | return text 42 | end 43 | 44 | -- Handle Code elements directly - use template's texttt command 45 | function Code(elem) 46 | -- Don't process if we're not generating LaTeX 47 | if not FORMAT:match 'latex' then 48 | return elem 49 | end 50 | 51 | -- Escape special characters and use our template's texttt command 52 | local escaped_text = escape_latex_special_chars(elem.text) 53 | return pandoc.RawInline('latex', '\\texttt{' .. escaped_text .. '}') 54 | end 55 | -------------------------------------------------------------------------------- /filters/minted-filter.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Pandoc Lua filter to use minted for code blocks instead of listings 3 | This filter converts fenced code blocks to minted environments 4 | ]] 5 | 6 | function CodeBlock(elem) 7 | -- Get the language from the first class 8 | local lang = elem.classes[1] or "text" 9 | 10 | -- Convert common language aliases and unsupported languages 11 | local lang_map = { 12 | ["sh"] = "bash", 13 | ["shell"] = "bash", 14 | ["js"] = "javascript", 15 | ["ts"] = "typescript", 16 | ["py"] = "python", 17 | ["yml"] = "yaml", 18 | ["dockerfile"] = "docker", 19 | ["rs"] = "rust", 20 | ["go-html-template"] = "html", 21 | ["gotemplate"] = "html", 22 | ["go-template"] = "html" 23 | } 24 | 25 | if lang_map[lang] then 26 | lang = lang_map[lang] 27 | end 28 | 29 | -- List of known supported Pygments lexers for common languages 30 | local supported_lexers = { 31 | "text", "bash", "javascript", "typescript", "python", "yaml", "docker", "rust", 32 | "html", "css", "json", "xml", "go", "java", "c", "cpp", "sql", "markdown", 33 | "latex", "php", "ruby", "perl", "r", "scala", "swift", "kotlin", "dart" 34 | } 35 | 36 | -- Check if the language is supported, fallback to text if not 37 | local function is_supported(language) 38 | for _, supported in ipairs(supported_lexers) do 39 | if supported == language then 40 | return true 41 | end 42 | end 43 | return false 44 | end 45 | 46 | if not is_supported(lang) then 47 | lang = "text" 48 | end 49 | 50 | -- Use minted with mdframed and add spacing before and after 51 | local minted_begin = "\\vspace{0.5em}\n\\begin{mdframed}[style=codeblockstyle]\n\\begin{minted}{" .. lang .. "}" 52 | local minted_end = "\\end{minted}\n\\end{mdframed}\n\\vspace{0.5em}" 53 | 54 | -- Return raw LaTeX block 55 | return pandoc.RawBlock("latex", minted_begin .. "\n" .. elem.text .. "\n" .. minted_end) 56 | end 57 | 58 | -- Also handle inline code (if needed) 59 | function Code(elem) 60 | -- For inline code, we can use \mintinline or just keep it as is 61 | return elem 62 | end 63 | -------------------------------------------------------------------------------- /diagnose_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "=== 终端环境诊断脚本 ===" 4 | echo "运行时间: $(date)" 5 | echo "用户: $(whoami)" 6 | echo "Shell: $0 ($SHELL)" 7 | echo "工作目录: $(pwd)" 8 | echo "" 9 | 10 | echo "=== PATH信息 ===" 11 | echo "PATH长度: ${#PATH} 字符" 12 | echo "TeX路径:" 13 | echo "$PATH" | tr ':' '\n' | grep -E "(tex|latex)" | head -5 14 | echo "" 15 | 16 | echo "=== LaTeX环境 ===" 17 | echo "LuaLaTeX位置: $(which lualatex)" 18 | echo "LuaLaTeX版本:" 19 | lualatex --version 2>/dev/null | head -2 || echo "❌ LuaLaTeX不可用" 20 | echo "" 21 | 22 | echo "=== Pandoc环境 ===" 23 | echo "Pandoc位置: $(which pandoc)" 24 | echo "Pandoc版本:" 25 | pandoc --version 2>/dev/null | head -1 || echo "❌ Pandoc不可用" 26 | echo "" 27 | 28 | echo "=== 字体环境 ===" 29 | echo "字体缓存:" 30 | fc-list 2>/dev/null | grep -E "(Source Han|PingFang|Noto)" | wc -l | awk '{print $1 " 个相关字体"}' 31 | echo "" 32 | 33 | echo "=== LaTeX包状态 ===" 34 | echo "检查关键包:" 35 | for pkg in fontspec xeCJK luatexja; do 36 | if kpsewhich $pkg.sty >/dev/null 2>&1; then 37 | echo "✅ $pkg.sty 已安装" 38 | else 39 | echo "❌ $pkg.sty 未找到" 40 | fi 41 | done 42 | echo "" 43 | 44 | echo "=== 工作目录文件 ===" 45 | echo "关键文件存在性:" 46 | for file in ../template.tex ../cli.py ../filters/emoji-passthrough.lua; do 47 | if [ -f "$file" ]; then 48 | echo "✅ $(basename $file) 存在 ($(stat -f%z "$file") bytes)" 49 | else 50 | echo "❌ $(basename $file) 缺失" 51 | fi 52 | done 53 | echo "" 54 | 55 | echo "=== 临时目录权限 ===" 56 | temp_dir=$(python3 -c "import tempfile; print(tempfile.gettempdir())") 57 | echo "临时目录: $temp_dir" 58 | echo "权限: $(ls -ld "$temp_dir" 2>/dev/null || echo '无法访问')" 59 | echo "" 60 | 61 | echo "=== 环境变量 ===" 62 | echo "相关环境变量:" 63 | env | grep -E "(TEXMF|LUA|PANDOC|LANG|LC_)" | sort 64 | echo "" 65 | 66 | echo "=== 测试简单命令 ===" 67 | echo "测试LaTeX字体命令:" 68 | echo '\documentclass{article}\usepackage{fontspec}\begin{document}测试\end{document}' > /tmp/test_font.tex 69 | if lualatex -interaction=nonstopmode -output-directory=/tmp /tmp/test_font.tex >/dev/null 2>&1; then 70 | echo "✅ LuaLaTeX字体测试成功" 71 | else 72 | echo "❌ LuaLaTeX字体测试失败" 73 | fi 74 | rm -f /tmp/test_font.* 75 | echo "" 76 | 77 | echo "=== 诊断建议 ===" 78 | echo "如果在新终端窗口中遇到问题,请:" 79 | echo "1. 复制以下命令到新窗口运行此诊断脚本" 80 | echo "2. 对比两个窗口的输出结果" 81 | echo "3. 特别注意PATH、字体和LaTeX包的差异" 82 | echo "" 83 | echo "命令: cd $(pwd) && ./diagnose_env.sh" 84 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from tree import build_tree 4 | from frontmatter import parse_front_matter, should_include, load_book_config 5 | from pdf_builder import build_pdf 6 | from cache_utils import clean_cache, show_cache_info 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser(description='Export a Hugo book to PDF') 10 | parser.add_argument('book_dir', help='Path to the book directory') 11 | parser.add_argument('-o', '--output', help='Output PDF file path') 12 | parser.add_argument('--generate-summary', action='store_true', help='Generate summary.md file') 13 | parser.add_argument('--template', default=None, help='Custom LaTeX template path (XeLaTeX only)') 14 | parser.add_argument('--clean-cache', type=int, nargs='?', const=30, help='Clean cache files older than specified days (default: 30)') 15 | parser.add_argument('--cache-info', action='store_true', help='Show cache directory information') 16 | parser.add_argument('--appendix', default=None, help='Path to appendix markdown file') 17 | parser.add_argument('--emoji', action='store_true', help='Enable emoji support') 18 | parser.add_argument('--include-drafts', action='store_true', help='Include draft content') 19 | parser.add_argument('--diagnostics', action='store_true', help='Diagnostics') 20 | parser.add_argument('--generate-troubleshooting-guide', action='store_true', help='Generate troubleshooting guide') 21 | parser.add_argument('--max-table-width', type=float, default=0.85, 22 | help='Maximum table width as fraction of text width (default: 0.85)') 23 | 24 | args = parser.parse_args() 25 | 26 | if args.clean_cache is not None: 27 | clean_cache(args.book_dir, args.clean_cache) 28 | return 29 | if args.cache_info: 30 | show_cache_info(args.book_dir) 31 | return 32 | 33 | root_node = build_tree(args.book_dir, args.include_drafts, parse_front_matter, should_include) 34 | if not root_node: 35 | return 36 | 37 | # Load book configuration from _index.md 38 | book_config = load_book_config(args.book_dir) 39 | 40 | output_pdf = args.output or os.path.join(args.book_dir, 'book.pdf') 41 | ok = build_pdf(args.book_dir, root_node, output_pdf, book_config, args.template, args.appendix, args.emoji, args.max_table_width) 42 | if not ok: 43 | # Non-zero exit so callers (e.g., Makefile) can detect failure 44 | raise SystemExit(1) 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /example/chapter3-advanced-formatting/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "高级格式和特殊内容测试" 3 | weight: 30 4 | --- 5 | 6 | 本章节测试更复杂的格式,包括数学公式、图表、复杂布局等。 7 | 8 | ## 数学公式测试 9 | 10 | ### 内联数学公式 11 | 12 | 在文本中,我们可以使用内联公式,比如 $E = mc^2$ 或者 $\pi \approx 3.14159$。 13 | 14 | 中文文本中的数学公式:圆的面积公式是 $A = \pi r^2$,其中 $r$ 是半径。 15 | 16 | ### 块级数学公式 17 | 18 | $$ 19 | \int_{-\infty}^{\infty} e^{-x^2} dx = \sqrt{\pi} 20 | $$ 21 | 22 | $$ 23 | \sum_{n=1}^{\infty} \frac{1}{n^2} = \frac{\pi^2}{6} 24 | $$ 25 | 26 | ### 复杂数学公式 27 | 28 | \begin{align} 29 | \nabla \times \vec{\mathbf{B}} -\, \frac1c\, \frac{\partial\vec{\mathbf{E}}}{\partial t} &= \frac{4\pi}{c}\vec{\mathbf{j}} \\ 30 | \nabla \cdot \vec{\mathbf{E}} &= 4 \pi \rho \\ 31 | \nabla \times \vec{\mathbf{E}}\, +\, \frac1c\, \frac{\partial\vec{\mathbf{B}}}{\partial t} &= \vec{\mathbf{0}} \\ 32 | \nabla \cdot \vec{\mathbf{B}} &= 0 33 | \end{align} 34 | 35 | ## 任务列表和复选框 36 | 37 | ### 项目进度跟踪 38 | 39 | - [x] 需求分析 ✅ 40 | - [x] 技术选型 ✅ 41 | - [x] 架构设计 ✅ 42 | - [ ] 前端开发 🔄 43 | - [x] 页面设计 44 | - [x] 组件开发 45 | - [ ] 接口对接 46 | - [ ] 测试验证 47 | - [ ] 后端开发 ⏳ 48 | - [x] 数据库设计 49 | - [ ] API 开发 50 | - [ ] 业务逻辑 51 | - [ ] 性能优化 52 | - [ ] 测试阶段 ⏳ 53 | - [ ] 部署上线 ⏳ 54 | 55 | ### 学习计划 56 | 57 | - [x] **第一周**:基础知识 58 | - [x] HTML/CSS 复习 59 | - [x] JavaScript ES6+ 60 | - [x] React 基础 61 | - [ ] **第二周**:进阶内容 62 | - [x] React Hooks 63 | - [ ] 状态管理 (Redux/Zustand) 64 | - [ ] 路由管理 65 | - [ ] **第三周**:实战项目 66 | - [ ] 项目搭建 67 | - [ ] 功能开发 68 | - [ ] 测试部署 69 | 70 | ## 定义列表 71 | 72 | ### 技术术语 73 | 74 | API 75 | : Application Programming Interface,应用程序编程接口,是不同软件组件之间通信的规范。 76 | 77 | REST 78 | : Representational State Transfer,表现层状态转换,是一种软件架构风格。 79 | 80 | GraphQL 81 | : 一种用于 API 的查询语言和运行时,由 Facebook 开发。 82 | 83 | ### 编程概念 84 | 85 | 函数式编程 86 | : 一种编程范式,将计算视为数学函数的求值,避免状态变化和可变数据。 87 | 88 | 面向对象编程 89 | : 基于"对象"概念的编程范式,对象包含数据(属性)和代码(方法)。 90 | 91 | ## 复杂嵌套结构 92 | 93 | ### 多层嵌套列表 94 | 95 | 1. **前端技术栈** 96 | 1. **框架选择** 97 | - React 98 | - 优点: 99 | - 组件化开发 100 | - 虚拟 DOM 性能优化 101 | - 丰富的生态系统 102 | - 缺点: 103 | - 学习曲线陡峭 104 | - 需要额外的状态管理库 105 | 2. **构建工具** 106 | - Webpack 107 | - Vite 108 | - Parcel 109 | 2. **后端技术栈** 110 | 1. **语言选择** 111 | - Node.js 112 | - 优点:JavaScript 全栈 113 | - 缺点:单线程限制 114 | - Python 115 | - 优点:简洁易读 116 | - 缺点:性能相对较低 117 | 118 | ## 特殊布局测试 119 | 120 | ### 警告框样式 121 | > 122 | > ⚠️ **警告** 123 | > 124 | > 这是一个警告信息,用于提醒用户注意重要事项。 125 | 126 | > ℹ️ **信息** 127 | > 128 | > 这是一个信息提示,用于提供额外的说明。 129 | 130 | > ✅ **成功** 131 | > 132 | > 操作已成功完成! 133 | 134 | > ❌ **错误** 135 | > 136 | > 发生了错误,请检查输入并重试。 137 | 138 | ### 键盘快捷键 139 | 140 | - 复制:Ctrl + C 141 | - 粘贴:Ctrl + V 142 | - 撤销:Ctrl + Z 143 | - 保存:Ctrl + S 144 | - 查找:Ctrl + F 145 | 146 | --- 147 | 148 | ## 章节总结 149 | 150 | 本章节测试了以下高级格式: 151 | 152 | - ✅ 数学公式(内联和块级) 153 | - ✅ 任务列表和复选框 154 | - ✅ 定义列表 155 | - ✅ 复杂嵌套结构 156 | - ✅ 特殊布局和样式 157 | 158 | *测试内容涵盖了 PDF 电子书可能遇到的各种复杂格式,为导出工具提供了全面的测试用例。* 159 | -------------------------------------------------------------------------------- /filters/image-attr-cleanup.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Image attribute cleanup filter 3 | This filter removes image attribute lines like {width=1486 height=518} 4 | that appear immediately after images in markdown or LaTeX figure environments 5 | ]] 6 | 7 | -- Track the type of the previous element 8 | local previous_element_type = nil 9 | 10 | function Image(elem) 11 | -- Mark that we just processed an image 12 | previous_element_type = "image" 13 | return elem 14 | end 15 | 16 | function RawBlock(elem) 17 | -- Check if this is a LaTeX figure environment 18 | if elem.format == "latex" and (elem.text:match("\\begin{figure}") or elem.text:match("\\includegraphics")) then 19 | previous_element_type = "latex_figure" 20 | else 21 | previous_element_type = "other" 22 | end 23 | return elem 24 | end 25 | 26 | function Para(elem) 27 | -- Check if this paragraph contains only image attributes 28 | local content_str = pandoc.utils.stringify(elem) 29 | 30 | -- More comprehensive patterns to match image attributes 31 | local patterns = { 32 | "^%s*{%s*width%s*=%s*%d+%s+height%s*=%s*%d+%s*}%s*$", -- {width=123 height=456} 33 | "^%s*{%s*height%s*=%s*%d+%s+width%s*=%s*%d+%s*}%s*$", -- {height=456 width=123} 34 | "^%s*{%s*width%s*=%s*%d+%s*}%s*$", -- {width=123} 35 | "^%s*{%s*height%s*=%s*%d+%s*}%s*$", -- {height=456} 36 | "^%s*{width%s*=%s*%d+%s+height%s*=%s*%d+}%s*$", -- {width=123 height=456} (no spaces around =) 37 | "^%s*{height%s*=%s*%d+%s+width%s*=%s*%d+}%s*$" -- {height=456 width=123} (no spaces around =) 38 | } 39 | 40 | -- Check if content matches any of the image attribute patterns 41 | local is_image_attr = false 42 | for _, pattern in ipairs(patterns) do 43 | if content_str:match(pattern) then 44 | is_image_attr = true 45 | break 46 | end 47 | end 48 | 49 | -- If previous element was image/figure and this paragraph contains only image attributes, remove it 50 | if (previous_element_type == "image" or previous_element_type == "latex_figure") and is_image_attr then 51 | previous_element_type = nil -- Reset flag 52 | return {} -- Remove this paragraph 53 | end 54 | 55 | -- Reset flag for non-image-attribute paragraphs 56 | previous_element_type = nil 57 | return elem 58 | end 59 | 60 | -- Reset flag for other block elements 61 | function Header(elem) 62 | previous_element_type = nil 63 | return elem 64 | end 65 | 66 | function CodeBlock(elem) 67 | previous_element_type = nil 68 | return elem 69 | end 70 | 71 | function BlockQuote(elem) 72 | previous_element_type = nil 73 | return elem 74 | end 75 | 76 | function OrderedList(elem) 77 | previous_element_type = nil 78 | return elem 79 | end 80 | 81 | function BulletList(elem) 82 | previous_element_type = nil 83 | return elem 84 | end 85 | 86 | function Table(elem) 87 | previous_element_type = nil 88 | return elem 89 | end 90 | 91 | function Div(elem) 92 | previous_element_type = nil 93 | return elem 94 | end 95 | -------------------------------------------------------------------------------- /install_pdf_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install dependencies for PDF export script 4 | # This script installs the necessary tools for converting Hugo books to PDF 5 | 6 | echo "Installing PDF export dependencies..." 7 | 8 | # --- OS Detection and Installation --- 9 | if [[ "$OSTYPE" == "darwin"* ]]; then 10 | # macOS 11 | echo "Detected macOS, installing with Homebrew..." 12 | 13 | if ! command -v brew &> /dev/null; then 14 | echo "Homebrew not found. Please install it first." 15 | exit 1 16 | fi 17 | 18 | echo "Installing system dependencies: pandoc, librsvg, imagemagick, basictex..." 19 | brew install pandoc librsvg imagemagick 20 | brew install --cask basictex 21 | 22 | echo "Installing LaTeX packages via tlmgr..." 23 | sudo tlmgr update --self 24 | sudo tlmgr install ctex fancyhdr titlesec fontspec geometry chngcntr booktabs caption float framed hyperref listings parskip fvextra 25 | 26 | elif [[ "$OSTYPE" == "linux-gnu"* ]]; then 27 | # Linux 28 | echo "Detected Linux..." 29 | 30 | if command -v apt-get &> /dev/null; then 31 | # Debian/Ubuntu 32 | echo "Using apt-get..." 33 | sudo apt-get update 34 | sudo apt-get install -y \ 35 | pandoc \ 36 | librsvg2-bin \ 37 | imagemagick \ 38 | texlive-latex-base \ 39 | texlive-latex-recommended \ 40 | texlive-latex-extra \ 41 | texlive-xetex \ 42 | texlive-lang-chinese \ 43 | texlive-font-utils \ 44 | texlive-luatex 45 | 46 | elif command -v dnf &> /dev/null || command -v yum &> /dev/null; then 47 | # Fedora/CentOS/RHEL 48 | echo "Using dnf/yum..." 49 | if command -v dnf &> /dev/null; then 50 | PKG_MANAGER=dnf 51 | else 52 | PKG_MANAGER=yum 53 | fi 54 | sudo $PKG_MANAGER install -y \ 55 | pandoc \ 56 | librsvg2-tools \ 57 | ImageMagick \ 58 | texlive-latex-base \ 59 | texlive-latex \ 60 | texlive-collection-latexrecommended \ 61 | texlive-collection-latexextra \ 62 | texlive-collection-langchinese \ 63 | texlive-xetex \ 64 | texlive-font-utils \ 65 | texlive-luatex 66 | 67 | elif command -v pacman &> /dev/null; then 68 | # Arch Linux 69 | echo "Using pacman..." 70 | sudo pacman -S --noconfirm --needed \ 71 | pandoc \ 72 | librsvg \ 73 | imagemagick \ 74 | texlive-core \ 75 | texlive-latexextra \ 76 | texlive-langchinese \ 77 | texlive-xetex 78 | 79 | else 80 | echo "Unsupported Linux distribution. Please install dependencies manually." 81 | exit 1 82 | fi 83 | 84 | else 85 | echo "Unsupported operating system: $OSTYPE" 86 | exit 1 87 | fi 88 | 89 | echo "" 90 | echo "✅ Dependencies installation completed!" 91 | echo "" 92 | echo "To verify installation, run:" 93 | echo " pandoc --version" 94 | echo " rsvg-convert --version" 95 | echo " convert --version" 96 | echo " xelatex --version" 97 | echo "" 98 | echo "You can now use the PDF export script." 99 | -------------------------------------------------------------------------------- /example/chapter1-basics/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "基础格式测试" 3 | weight: 10 4 | --- 5 | 6 | ## 文本格式测试 7 | 8 | ### 中英文混排测试 9 | 10 | 这是一段包含**中文粗体**和*English italic*的混合文本。我们来测试一下`inline code`的效果,以及~~删除线~~的显示。 11 | 12 | Here's some English text with **bold formatting** and *italic formatting*. Let's also test `inline code` and ~~strikethrough~~ text. 13 | 14 | 在 Hugo 中,函数是在模板动作中使用的代码片段,它们接收一个或多个参数并返回一个值。与方法不同,函数不与特定的对象关联。Hugo 作为一个用 Go 语言编写的静态站点生成器,其函数体系充分利用了 Go 语言的特性,提供了高性能和丰富的功能。 15 | 16 | 只包含中文的段落:这部电影还有一个隐而不宣的主题:我们到底是在打自己的比赛,还是在西方的评分标准中争头名?郎朗在德国、仙台、柯蒂斯拿第一名;回国却被国内老师打成第三,说他“要摆正位置”;他用西方技法打败西方人,却始终无法获得完全的文化认同。 17 | 18 | 《Creep》是英国摇滚乐队 Radiohead 的首支单曲,发行于 1992 年,后收录于专辑《Pablo Honey》中。以下是其创作背景和表达含义: 19 | 20 | - **创作背景**:根据乐队贝斯手 Colin Greenwood 回忆,这首歌由主唱 Thom Yorke 在大学时期创作。吉他手 Jonny Greenwood 曾表示,歌曲灵感来源于 Thom Yorke 喜欢的一个女孩,她突然出现在乐队的某次演出中。另有说法称,Thom Yorke 从未与那个女孩正式交谈过,只是偶尔在酒吧里看到她,当他终于喝得烂醉鼓起勇气表白时,她却被吓跑了。 21 | - **表达含义**:这首歌主要表达了爱情中的自卑、自我怀疑以及对被接纳和理解的渴望。歌曲将喜欢的人比作“天使”,形容其如羽毛般轻盈美好,凸显出对方的完美与独特,而自己则是“a creep”“a weirdo”,觉得与周围世界格格不入,强烈的自卑感油然而生。歌词“I wish I was special, you're so fuckin' special”体现了主人公对自身平凡的不满,渴望变得特别,能与心仪之人相配,却又深知彼此差距,充满了无奈与辛酸。同时,歌曲也反映了主人公内心的矛盾与挣扎,他渴望拥有完美的身体和灵魂,希望能得到对方的注意,却又因自我认知而陷入痛苦,在爱情面前犹豫不决。整首歌通过主人公的内心独白,不仅是对个人情感的深刻剖析,也是对当代社会中人们在人际关系中常感到迷失和孤独这一现象的反思。 22 | 23 | ### Emoji 表情符号测试 😊 24 | 25 | - 基础表情:😀 😃 😄 😁 😆 😅 😂 🤣 26 | - 手势表情:👍 👎 👌 🤝 👏 🙏 💪 ✊ 27 | - 心形表情:❤️ 💙 💚 💛 🧡 💜 🖤 🤍 28 | - 动物表情:🐶 🐱 🐭 🐹 🐰 🦊 🐻 🐼 29 | - 食物表情:🍎 🍌 🍇 🍓 🥝 🍒 🥑 🥕 30 | - 技术相关:💻 📱 ⌨️ 🖥️ 🖨️ 📡 🔌 💾 31 | 32 | ## 列表格式测试 33 | 34 | ### 无序列表 35 | 36 | - 第一级列表项 37 | - 第二级列表项 38 | - 第三级列表项 39 | - 第四级列表项 40 | - 另一个第一级项目 41 | - 包含**粗体**的列表项 42 | - 包含*斜体*的列表项 43 | - 包含`代码`的列表项 44 | 45 | ### 有序列表 46 | 47 | 1. 第一个有序项目 48 | 1. 嵌套的有序项目 49 | 2. 另一个嵌套项目 50 | 1. 更深层的嵌套 51 | 2. 继续嵌套测试 52 | 2. 第二个主要项目 53 | 3. 第三个主要项目 54 | 55 | ### 混合列表 56 | 57 | 1. 有序列表开始 58 | - 嵌套的无序列表 59 | - 另一个无序项目 60 | 1. 再次嵌套有序列表 61 | 2. 继续有序项目 62 | 2. 回到主要有序列表 63 | 64 | ## 引用和分隔线测试 65 | 66 | ### 普通引用 67 | > 68 | > 这是一个普通的引用块。引用块通常用于突出显示重要的文本或者引用他人的话语。引用块通常用于突出显示重要的文本或者引用他人的话语。 69 | 70 | ### 包含格式的引用 71 | > 72 | > **重要提示**:这个引用块包含了多种格式 73 | > 74 | > - 列表项目 1 75 | > - 列表项目 2 76 | > 77 | > `代码示例`和*斜体文本*也可以在引用中使用。 78 | 79 | --- 80 | 81 | ### 分隔线测试 82 | 83 | 上面是一条分隔线,下面还有一条: 84 | 85 | *** 86 | 87 | ## 链接和图片测试 88 | 89 | ### 链接测试 90 | 91 | - 普通链接:[Google](https://www.google.com) 92 | - 中文链接:[百度搜索](https://www.baidu.com) 93 | - 邮箱链接: 94 | - 自动链接: 95 | 96 | ### 图片测试(占位符) 97 | 98 | ![示例图片](https://assets.jimmysong.io/images/blog/ni-xing-ni-shang-review/stages.webp) 99 | {width=1000 height=600} 100 | 101 | *注意:实际 PDF 导出时,网络图片可能需要特殊处理* 102 | 103 | ## 脚注测试 104 | 105 | 这里有一个脚注引用[^1],还有另一个脚注[^note2]。 106 | 107 | 脚注可以帮助提供额外的信息而不打断正文的流畅性[^3]。 108 | 109 | [^1]: 这是第一个脚注的内容 110 | [^note2]: 这是第二个脚注,使用了自定义标识符 111 | [^3]: 第三个脚注,包含**格式化文本**和`代码` 112 | 113 | ## 特殊字符测试 114 | 115 | ### 数学符号 116 | 117 | α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω 118 | 119 | ∑ ∏ ∫ ∂ ∇ ∞ ± × ÷ ≤ ≥ ≠ ≈ ∝ ∈ ∉ ⊂ ⊃ ∪ ∩ 120 | 121 | ### 货币符号 122 | 123 | $ € £ ¥ ₹ ₽ ₩ ₪ ₫ ₡ ₢ ₣ ₤ ₥ ₦ ₧ ₨ 124 | 125 | ### 其他特殊符号 126 | 127 | © ® ™ § ¶ † ‡ • ‰ ′ ″ ‴ ※ ‼ ⁇ ⁈ ⁉ ⁏ 128 | 129 | --- 130 | 131 | *本章节完成了基础格式的测试,接下来的章节将测试更复杂的格式。* 132 | -------------------------------------------------------------------------------- /example/chapter2-code-and-tables/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 代码块和表格测试 3 | weight: 20 4 | lastmod: '2025-08-04' 5 | --- 6 | 7 | 本章节专门测试代码块、表格等复杂格式在 PDF 中的渲染效果。 8 | 9 | ## 代码块测试 10 | 11 | ### 内联代码 12 | 13 | 在文本中使用 `console.log()` 或者 `print()` 这样的内联代码。中文文本中的 `代码片段` 测试。 14 | 15 | ### Python 代码块 16 | 17 | ```python 18 | # Python 代码示例 - 数据处理 19 | import pandas as pd 20 | import numpy as np 21 | from datetime import datetime 22 | 23 | def process_data(filename): 24 | """ 25 | 处理 CSV 数据文件 26 | Args: 27 | filename (str): 文件名 28 | Returns: 29 | pd.DataFrame: 处理后的数据 30 | """ 31 | # 读取数据 32 | df = pd.read_csv(filename, encoding='utf-8') 33 | 34 | # 数据清洗 35 | df = df.dropna() 36 | df['timestamp'] = pd.to_datetime(df['timestamp']) 37 | 38 | # 数据转换 39 | df['value'] = df['value'].astype(float) 40 | 41 | return df 42 | 43 | # 使用示例 44 | if __name__ == "__main__": 45 | data = process_data('sample.csv') 46 | print(f"处理了 {len(data)} 条记录 😄") 47 | ``` 48 | 49 | 这是代码块下面的文字,用来说明代码的用途,比如 Java 代码中的 interface 的实现 theory。 50 | 51 | ### JavaScript 代码块 52 | 53 | ```javascript 54 | // JavaScript 代码示例 - React 组件 55 | import React, { useState, useEffect } from 'react'; 56 | import axios from 'axios'; 57 | 58 | const DataFetcher = ({ apiUrl }) => { 59 | const [data, setData] = useState(null); 60 | const [loading, setLoading] = useState(true); 61 | const [error, setError] = useState(null); 62 | 63 | useEffect(() => { 64 | const fetchData = async () => { 65 | try { 66 | setLoading(true); 67 | const response = await axios.get(apiUrl); 68 | setData(response.data); 69 | } catch (err) { 70 | setError(err.message); 71 | } finally { 72 | setLoading(false); 73 | } 74 | }; 75 | 76 | fetchData(); 77 | }, [apiUrl]); 78 | 79 | if (loading) return '加载中...'; 80 | if (error) return `错误:${error}`; 81 | 82 | return [ 83 | '数据展示', 84 | JSON.stringify(data, null, 2) 85 | ]; 86 | 87 | export default DataFetcher; 88 | ``` 89 | 90 | 这是代码块下方的文字,不应该与代码块重叠,并且要保证上方的代码块可以正确的分页。 91 | 92 | 弹性云服务器(Elastic Cloud Server)是一种可随时自助获取、可弹性伸缩的云服务器,可帮助您打造可靠、安全、灵活、高效的应用环境,确保服务持久稳定运行,提升运维效率。根据业务发展需要,您可以随时变更规格、切换操作系统、配置安全组规则或调整配额。除此之外,您还可以实时查看监控指标及审计日志,以便及时了解弹性云服务器的健康状态。 93 | 94 | ### 超宽代码块测试 95 | 96 | ```bash 97 | # 这是一个非常长的命令行示例,用来测试PDF中超宽代码块的处理效果 98 | docker run -d --name my-container --restart=unless-stopped -p 8080:80 -v /host/path/to/data:/container/data -e ENV_VAR_1=value1 -e ENV_VAR_2=value2 -e ENV_VAR_3=value3 --network=my-network --memory=2g --cpus=1.5 my-image:latest 99 | ``` 100 | 101 | ## 表格测试 102 | 103 | ### 基础表格 104 | 105 | | 姓名 | 年龄 | 职业 | 城市 | 106 | |------|------|------|------| 107 | | 张三 | 28 | 工程师 | 北京 | 108 | | 李四 | 32 | 设计师 | 上海 | 109 | | 王五 | 25 | 产品经理 | 深圳 | 110 | 111 | ### 包含格式的表格 112 | 113 | | 功能 | 状态 | 优先级 | 负责人 | 备注 | 114 | |------|------|--------|--------|------| 115 | | **用户登录** | ✅ 完成 | 🔴 高 | @张三 | 已上线 | 116 | | *数据导出* | 🔄 进行中 | 🟡 中 | @李四 | 预计下周完成 | 117 | | ~~旧功能~~ | ❌ 废弃 | 🔵 低 | - | 不再维护 | 118 | | `API接口` | ⏳ 计划中 | 🟠 中 | @王五 | 需求评审中 | 119 | 120 | ### 复杂表格(包含代码和链接) 121 | 122 | | 技术栈 | 版本 | 用途 | 示例代码 | 文档链接 | 123 | |--------|------|------|----------|----------| 124 | | React | 18.2.0 | 前端框架 | `` | [官方文档](https://react.dev) | 125 | | Node.js | 18.17.0 | 后端运行时 | `require('express')` | [Node.js](https://nodejs.org) | 126 | | Python | 3.11 | 数据处理 | `import pandas` | [Python.org](https://python.org) | 127 | | Docker | 24.0 | 容器化,开发者友好,支持多平台,可以在 Linux、Windows 和 macOS 上运行,可以用 Orbstack 替代 | `docker build .` | [Docker Hub](https://hub.docker.com) | 128 | 129 | ### 包含内联代码和中文混排的表格 130 | 131 | | 匹配方式 | 描述 | 示例 | 132 | |----------|------|------| 133 | | `prefix` | 前缀必须与 `:path` 头的开头匹配 | `/hello` 匹配 `/hello`、`/helloworld`、`/hello/v1` | 134 | | `path` | 路径必须与 `:path` 头完全匹配 | `/hello` 只匹配 `/hello`,不匹配 `/helloworld` | 135 | | `safe_regex` | 使用正则表达式匹配 `:path` 头 | `/\d{3}` 匹配三位数字路径 | 136 | | `connect_matcher` | 只匹配 CONNECT 请求 | 用于 HTTP CONNECT 方法 | 137 | 138 | --- 139 | 140 | *本章节完成了代码块和表格的测试,下一章将测试更多高级格式。* 141 | -------------------------------------------------------------------------------- /filters/ansi-cleanup.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | ANSI Cleanup Filter for LaTeX PDF Generation 3 | This filter comprehensively removes ANSI escape codes from all content 4 | to prevent LaTeX compilation errors 5 | ]] 6 | 7 | -- Comprehensive ANSI escape code removal 8 | local function strip_ansi_codes(text) 9 | if not text or type(text) ~= "string" then 10 | return text 11 | end 12 | 13 | -- Remove various forms of ANSI escape sequences 14 | 15 | -- Standard ANSI color codes (ESC[...m) 16 | text = text:gsub("\27%[[%d;]*m", "") -- Literal ESC character 17 | text = text:gsub("\\033%[[%d;]*m", "") -- Octal escape \033 18 | text = text:gsub("\\x1[bB]%[[%d;]*m", "") -- Hex escape \x1b or \x1B 19 | text = text:gsub("\\e%[[%d;]*m", "") -- Short escape \e 20 | 21 | -- ANSI escape sequences with other terminators 22 | text = text:gsub("\27%[[%d;]*[A-Za-z]", "") -- Literal ESC 23 | text = text:gsub("\\033%[[%d;]*[A-Za-z]", "") -- Octal 24 | text = text:gsub("\\x1[bB]%[[%d;]*[A-Za-z]", "") -- Hex 25 | text = text:gsub("\\e%[[%d;]*[A-Za-z]", "") -- Short 26 | 27 | -- Handle shell variable assignments containing ANSI codes 28 | -- Patterns like: RED='\033[0;31m' or RED="\033[0;31m" 29 | text = text:gsub("='\\033%[[%d;]*m'", "=''") 30 | text = text:gsub('="\\033%[[%d;]*m"', '=""') 31 | text = text:gsub("='\\e%[[%d;]*m'", "=''") 32 | text = text:gsub('="\\e%[[%d;]*m"', '=""') 33 | text = text:gsub("='\\x1[bB]%[[%d;]*m'", "=''") 34 | text = text:gsub('="\\x1[bB]%[[%d;]*m"', '=""') 35 | 36 | -- Handle assignments without quotes 37 | text = text:gsub("=\\033%[[%d;]*m", "=") 38 | text = text:gsub("=\\e%[[%d;]*m", "=") 39 | text = text:gsub("=\\x1[bB]%[[%d;]*m", "=") 40 | 41 | -- Handle more complex variable assignments with color codes 42 | text = text:gsub("([%w_]+)=(['\"])\\\\*033%[[%d;]*m%2", "%1=%2%2") 43 | text = text:gsub("([%w_]+)=(['\"])\\\\*e%[[%d;]*m%2", "%1=%2%2") 44 | text = text:gsub("([%w_]+)=(['\"])\\\\*x1[bB]%[[%d;]*m%2", "%1=%2%2") 45 | 46 | -- Remove any remaining problematic backslash sequences 47 | -- This handles cases where \033 might be interpreted as LaTeX command 48 | text = text:gsub("\\033", "\\\\textbackslash{}033") 49 | text = text:gsub("\\x1[bB]", "\\\\textbackslash{}x1b") 50 | 51 | -- Handle raw octal sequences that might cause issues 52 | text = text:gsub("\\(%d%d%d)%[", function(digits) 53 | if digits == "033" then 54 | return "[ESC_" .. digits .. "][" 55 | else 56 | return "\\\\" .. digits .. "[" 57 | end 58 | end) 59 | 60 | return text 61 | end 62 | 63 | -- Apply ANSI cleanup to code blocks 64 | function CodeBlock(elem) 65 | if elem.text then 66 | elem.text = strip_ansi_codes(elem.text) 67 | end 68 | return elem 69 | end 70 | 71 | -- Apply ANSI cleanup to inline code 72 | function Code(elem) 73 | if elem.text then 74 | elem.text = strip_ansi_codes(elem.text) 75 | end 76 | return elem 77 | end 78 | 79 | -- Apply ANSI cleanup to raw blocks 80 | function RawBlock(elem) 81 | if elem.text then 82 | elem.text = strip_ansi_codes(elem.text) 83 | end 84 | return elem 85 | end 86 | 87 | -- Apply ANSI cleanup to raw inline 88 | function RawInline(elem) 89 | if elem.text then 90 | elem.text = strip_ansi_codes(elem.text) 91 | end 92 | return elem 93 | end 94 | 95 | -- Apply ANSI cleanup to string elements 96 | function Str(elem) 97 | if elem.text then 98 | elem.text = strip_ansi_codes(elem.text) 99 | end 100 | return elem 101 | end 102 | 103 | -- Apply ANSI cleanup to link URLs and titles 104 | function Link(elem) 105 | if elem.target then 106 | elem.target = strip_ansi_codes(elem.target) 107 | end 108 | if elem.title then 109 | elem.title = strip_ansi_codes(elem.title) 110 | end 111 | return elem 112 | end 113 | 114 | -- Apply ANSI cleanup to image URLs and titles 115 | function Image(elem) 116 | if elem.src then 117 | elem.src = strip_ansi_codes(elem.src) 118 | end 119 | if elem.title then 120 | elem.title = strip_ansi_codes(elem.title) 121 | end 122 | return elem 123 | end 124 | -------------------------------------------------------------------------------- /tree.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import re 4 | from dataclasses import dataclass, field 5 | from typing import List, Optional 6 | 7 | @dataclass 8 | class Node: 9 | """Represents a chapter or section in the book.""" 10 | title: str 11 | path: str 12 | weight: int 13 | children: List["Node"] = field(default_factory=list) 14 | 15 | def find_asset(book_dir, names): 16 | for n in names: 17 | p = os.path.join(book_dir, n) 18 | if os.path.exists(p): 19 | return p 20 | return None 21 | 22 | def adjust_heading_levels(content: str, base_level: int) -> str: 23 | lines = content.split('\n') 24 | adjusted_lines = [] 25 | has_headings = False 26 | inside_code_block = False 27 | for line in lines: 28 | if line.strip().startswith('```'): 29 | inside_code_block = not inside_code_block 30 | adjusted_lines.append(line) 31 | continue 32 | if inside_code_block: 33 | adjusted_lines.append(line) 34 | continue 35 | if line.strip().startswith('#'): 36 | has_headings = True 37 | original_level = len(line) - len(line.lstrip('#')) 38 | if base_level == 1: 39 | new_level = original_level 40 | else: 41 | new_level = original_level + base_level - 1 42 | new_level = min(new_level, 6) 43 | heading_text = line.lstrip('#').strip() 44 | adjusted_lines.append('#' * new_level + ' ' + heading_text) 45 | else: 46 | adjusted_lines.append(line) 47 | if not has_headings: 48 | return content 49 | return '\n'.join(adjusted_lines) 50 | 51 | 52 | def build_tree(directory: str, include_drafts: bool = False, parse_front_matter=None, should_include=None) -> Optional[Node]: 53 | index_path = None 54 | for name in ("_index.md", "index.md"): 55 | candidate = os.path.join(directory, name) 56 | if os.path.exists(candidate): 57 | index_path = candidate 58 | break 59 | if not index_path or not parse_front_matter or not should_include: 60 | return None 61 | metadata = parse_front_matter(index_path) 62 | title, weight, draft, publish, export_pdf = metadata 63 | if not should_include(index_path, metadata, include_drafts): 64 | return None 65 | title = title or os.path.basename(directory) 66 | node = Node(title=title, path=index_path, weight=weight) 67 | for entry in sorted(os.listdir(directory)): 68 | subdir = os.path.join(directory, entry) 69 | if os.path.isdir(subdir): 70 | child = build_tree(subdir, include_drafts, parse_front_matter, should_include) 71 | if child: 72 | node.children.append(child) 73 | node.children.sort(key=lambda n: n.weight) 74 | return node 75 | 76 | def flatten_tree(node: Node, result: List[Node]) -> None: 77 | result.append(node) 78 | for child in node.children: 79 | flatten_tree(child, result) 80 | 81 | def write_hierarchical_content(tmp, node: Node, book_dir: str, temp_dir: str, temp_pngs: list, level: int = 1, cache_dir: str = None, process_images_in_content=None, adjust_heading_levels_func=None) -> None: 82 | heading_level = min(level, 4) 83 | heading = '#' * heading_level 84 | tmp.write(f'{heading} {node.title}\n\n') 85 | with open(node.path, "r", encoding="utf-8") as f: 86 | content = f.read() 87 | if process_images_in_content: 88 | content = process_images_in_content(content, book_dir, temp_dir, temp_pngs, node.path, cache_dir) 89 | content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL) 90 | content = re.sub(r'^\s*---\s*$', '', content, flags=re.MULTILINE) 91 | content = re.sub(r'^# .*\n', '', content, count=1) 92 | if adjust_heading_levels_func: 93 | content = adjust_heading_levels_func(content, heading_level) 94 | else: 95 | content = adjust_heading_levels(content, heading_level) 96 | tmp.write(content + "\n\n") 97 | for child in node.children: 98 | write_hierarchical_content(tmp, child, book_dir, temp_dir, temp_pngs, level + 1, cache_dir, process_images_in_content, adjust_heading_levels_func) 99 | if level == 1: 100 | tmp.write('\\newpage\n\n') 101 | -------------------------------------------------------------------------------- /filters/cleanup-filter.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Cleanup filter to fix common LaTeX line break issues 3 | This filter addresses problems with empty lines that have trailing backslashes 4 | ]] 5 | 6 | -- Clean up code blocks by removing trailing empty lines and ANSI escape codes 7 | function CodeBlock(elem) 8 | -- Remove ANSI escape codes 9 | local clean_text = elem.text:gsub("\27%[[%d;]+m", "") 10 | clean_text = clean_text:gsub("\\033%[[%d;]+m", "") 11 | 12 | -- Remove trailing empty lines from code 13 | local lines = {} 14 | for line in clean_text:gmatch("[^\r\n]*") do 15 | table.insert(lines, line) 16 | end 17 | 18 | -- Remove trailing empty lines 19 | while #lines > 0 and lines[#lines]:match("^%s*$") do 20 | table.remove(lines) 21 | end 22 | 23 | elem.text = table.concat(lines, "\n") 24 | return elem 25 | end 26 | 27 | -- Clean up paragraphs to remove problematic content 28 | function Para(elem) 29 | -- Check if paragraph contains only whitespace or breaks 30 | local content_str = pandoc.utils.stringify(elem) 31 | if content_str:match("^%s*$") then 32 | -- Return empty block instead of empty paragraph 33 | return {} 34 | end 35 | return elem 36 | end 37 | 38 | -- Clean up raw blocks 39 | function RawBlock(elem) 40 | if elem.format == "latex" then 41 | -- Remove ANSI escape codes 42 | local cleaned = elem.text:gsub("\27%[[%d;]+m", "") 43 | cleaned = cleaned:gsub("\\033%[[%d;]+m", "") 44 | 45 | -- Remove trailing backslashes from empty lines 46 | cleaned = cleaned:gsub("\n%s*\\\\%s*\n", "\n\n") 47 | -- Remove lines that are just whitespace followed by \\ 48 | cleaned = cleaned:gsub("\n%s+\\\\%s*\n", "\n") 49 | -- Remove double backslashes at end of content 50 | cleaned = cleaned:gsub("\\\\%s*$", "") 51 | 52 | -- Fix specific problematic NormalTok patterns, but only if they are truly empty 53 | -- and not part of minted code blocks 54 | 55 | -- Only clean up NormalTok patterns that are clearly problematic 56 | -- Avoid cleaning patterns that might be legitimate LaTeX from minted 57 | 58 | -- Remove empty NormalTok followed by double backslash 59 | cleaned = cleaned:gsub("\\NormalTok{%s*}\\\\%s*\n", "\n") 60 | 61 | -- Remove empty tokens at line endings 62 | cleaned = cleaned:gsub("\\[%a]+Tok{%s*}\\\\%s*$", "") 63 | 64 | -- Fix double newlines after tokens 65 | cleaned = cleaned:gsub("}\\\\%s*\n%s*\n", "}\n\n") 66 | 67 | elem.text = cleaned 68 | end 69 | return elem 70 | end 71 | 72 | -- Clean up table cells to prevent empty line break issues 73 | function Table(tbl) 74 | -- Clean up all cells in the table 75 | local function clean_cell_contents(contents) 76 | if not contents then return {} end 77 | 78 | local cleaned = {} 79 | for i, element in ipairs(contents) do 80 | if element.t == "Str" then 81 | -- Keep non-empty strings 82 | if element.text and element.text ~= "" then 83 | table.insert(cleaned, element) 84 | end 85 | elseif element.t == "Space" then 86 | -- Keep spaces only if they're not at the end 87 | table.insert(cleaned, element) 88 | elseif element.t ~= "SoftBreak" and element.t ~= "LineBreak" then 89 | -- Keep other elements except breaks 90 | table.insert(cleaned, element) 91 | end 92 | end 93 | return cleaned 94 | end 95 | 96 | -- Process header 97 | if tbl.head and tbl.head.rows then 98 | for _, row in ipairs(tbl.head.rows) do 99 | for _, cell in ipairs(row.cells) do 100 | cell.contents = clean_cell_contents(cell.contents) 101 | end 102 | end 103 | end 104 | 105 | -- Process body 106 | if tbl.bodies then 107 | for _, body in ipairs(tbl.bodies) do 108 | if body.body then 109 | for _, row in ipairs(body.body) do 110 | for _, cell in ipairs(row.cells) do 111 | cell.contents = clean_cell_contents(cell.contents) 112 | end 113 | end 114 | end 115 | end 116 | end 117 | 118 | return tbl 119 | end 120 | -------------------------------------------------------------------------------- /filters/symbol-fallback-filter.lua: -------------------------------------------------------------------------------- 1 | -- tools/pdf-book-exporter/filters/symbol-fallback-filter.lua 2 | -- Symbol fallback filter for LaTeX PDF generation 3 | -- 4 | -- This filter handles special Unicode symbols that may not be available in all fonts 5 | -- by replacing them with LaTeX commands that provide proper fallbacks. 6 | -- 7 | -- Key functionality: 8 | -- 1. Replaces currency symbols with LaTeX currency commands 9 | -- 2. Handles special punctuation and technical symbols 10 | -- 3. Provides font-independent symbol rendering 11 | -- 4. Avoids processing code contexts where symbols should be preserved 12 | -- 13 | -- Implementation approach: 14 | -- - Simple and reliable processing of only Str elements 15 | -- - Avoids complex AST traversal that could cause issues 16 | -- - Uses direct string replacement with LaTeX commands 17 | -- - Preserves code blocks and inline code unchanged 18 | 19 | -- Define the symbol mapping table 20 | -- 21 | -- Maps Unicode symbols to LaTeX commands that provide reliable fallbacks. 22 | -- Each command should be defined in the LaTeX template or through packages. 23 | -- 24 | -- Categories: 25 | -- 1. Currency symbols - Various international currencies 26 | -- 2. Special punctuation - Rare punctuation marks 27 | -- 3. Technical symbols - UI/UX and interface symbols 28 | -- 29 | -- Note: LaTeX commands like \rupee{} should be defined in the template 30 | -- with appropriate fallback mechanisms for missing fonts 31 | local symbol_map = { 32 | -- Currency symbols 33 | ["₹"] = "\\rupee{}", -- Indian Rupee 34 | ["₽"] = "\\ruble{}", -- Russian Ruble 35 | ["₪"] = "\\shekel{}", -- Israeli Shekel 36 | ["₡"] = "\\colon{}", -- Costa Rican Colon 37 | ["₢"] = "\\cruzeiro{}", -- Brazilian Cruzeiro 38 | ["₣"] = "\\franc{}", -- French Franc 39 | ["₤"] = "\\lira{}", -- Italian Lira 40 | ["₥"] = "\\mill{}", -- Mill symbol 41 | ["₦"] = "\\naira{}", -- Nigerian Naira 42 | ["₧"] = "\\peseta{}", -- Spanish Peseta 43 | ["₨"] = "\\rupeeold{}", -- Old Rupee symbol 44 | 45 | -- Special punctuation and symbols 46 | ["‴"] = "\\tripleprime{}", -- Triple prime 47 | ["⁏"] = "\\reversedSemicolon{}", -- Reversed semicolon 48 | ["⏳"] = "\\hourglass{}", -- Hourglass 49 | ["ℹ"] = "\\infoSymbol{}", -- Information symbol 50 | ["✊"] = "\\raisedFist{}", -- Raised fist 51 | ["⌨"] = "\\keyboardSymbol{}", -- Keyboard symbol 52 | } 53 | 54 | -- Simple and reliable approach: only process Str elements 55 | -- This avoids complex data structure issues with pandoc.walk_inline/walk_block 56 | -- 57 | -- Processing strategy: 58 | -- 1. Only handle Str (string) elements to avoid AST complexity 59 | -- 2. Use simple string replacement for reliability 60 | -- 3. Return RawInline LaTeX when changes are made 61 | -- 4. Explicitly avoid processing Code and CodeBlock elements 62 | -- 5. Only process LaTeX output format 63 | 64 | -- Process Str elements (inline text) 65 | -- 66 | -- Main processing function for regular text content. 67 | -- Scans through the symbol mapping table and replaces any found symbols 68 | -- with their corresponding LaTeX commands. 69 | -- 70 | -- Process: 71 | -- 1. Check if generating LaTeX output 72 | -- 2. Apply string replacements for each symbol in the mapping 73 | -- 3. Return RawInline LaTeX if any changes were made 74 | -- 4. Otherwise return the original element unchanged 75 | function Str(elem) 76 | -- Only process LaTeX output 77 | if not FORMAT:match 'latex' then 78 | return elem 79 | end 80 | 81 | local text = elem.text 82 | local changed = false 83 | 84 | -- Replace each symbol with its LaTeX command 85 | for symbol, command in pairs(symbol_map) do 86 | local new_text = text:gsub(symbol, command) 87 | if new_text ~= text then 88 | text = new_text 89 | changed = true 90 | end 91 | end 92 | 93 | -- If any changes were made, return as raw LaTeX 94 | if changed then 95 | return pandoc.RawInline('latex', text) 96 | end 97 | 98 | -- Otherwise return original element 99 | return elem 100 | end 101 | 102 | -- Don't process inline code 103 | -- 104 | -- Inline code should preserve symbols exactly as written. 105 | -- Users may intentionally include Unicode symbols in code examples. 106 | function Code(elem) 107 | return elem 108 | end 109 | 110 | -- Don't process code blocks 111 | -- 112 | -- Code blocks should preserve all original characters. 113 | -- Symbol replacement could break code syntax or meaning. 114 | function CodeBlock(elem) 115 | return elem 116 | end 117 | -------------------------------------------------------------------------------- /filters/table-filter.lua: -------------------------------------------------------------------------------- 1 | -- table-filter.lua 2 | -- Pandoc filter to generate tables with black outer border and gray inner lines 3 | 4 | function Table(elem) 5 | -- Only process if we're generating LaTeX 6 | if FORMAT ~= "latex" then 7 | return elem 8 | end 9 | 10 | -- Get table dimensions 11 | local num_cols = #elem.colspecs 12 | 13 | -- Build LaTeX table manually with custom column separators 14 | local latex_lines = {} 15 | 16 | -- Define custom column types with gray vertical rules 17 | table.insert(latex_lines, "\\newcolumntype{G}{!{\\color{gray!40}\\vrule}}") 18 | table.insert(latex_lines, "\\newcolumntype{B}{!{\\color{black}\\vrule}}") 19 | 20 | -- Create column specification: black outer borders, gray inner separators 21 | -- Use >{\\raggedright\\arraybackslash} for left-aligned, top-aligned text 22 | local colspec = "B" 23 | for i = 1, num_cols do 24 | colspec = colspec .. ">{\\raggedright\\arraybackslash}p{" .. string.format("%.2f", 0.9 / num_cols) .. "\\textwidth}" 25 | if i < num_cols then 26 | colspec = colspec .. "G" -- Gray separator between columns 27 | end 28 | end 29 | colspec = colspec .. "B" -- Black right border 30 | 31 | -- Start table with black outer border and improved styling 32 | table.insert(latex_lines, "\\sloppy") -- Encourage line breaks without overfull hboxes 33 | table.insert(latex_lines, "\\arrayrulecolor{black}") 34 | table.insert(latex_lines, "\\begin{longtable}{" .. colspec .. "}") 35 | table.insert(latex_lines, "\\tablefontsize") -- Shrink font if necessary 36 | table.insert(latex_lines, "\\hline") 37 | 38 | -- Process header if exists 39 | if elem.head and elem.head.rows and #elem.head.rows > 0 then 40 | for _, row in ipairs(elem.head.rows) do 41 | local row_content = {} 42 | for _, cell in ipairs(row.cells) do 43 | -- Convert cell contents to LaTeX, preserving formatting 44 | local cell_latex = pandoc.write(pandoc.Pandoc(cell.contents), 'latex') 45 | -- Clean up the LaTeX output (remove extra newlines and paragraph tags) 46 | cell_latex = cell_latex:gsub("\\par\n", "") 47 | cell_latex = cell_latex:gsub("\n", " ") 48 | cell_latex = cell_latex:gsub("^%s+", "") 49 | cell_latex = cell_latex:gsub("%s+$", "") 50 | -- Make header text bold 51 | cell_latex = "\\textbf{" .. cell_latex .. "}" 52 | table.insert(row_content, cell_latex) 53 | end 54 | -- Add light gray background for header row 55 | table.insert(latex_lines, "\\rowcolor{gray!10}") 56 | table.insert(latex_lines, table.concat(row_content, " & ") .. " \\\\") 57 | -- Gray line after header 58 | table.insert(latex_lines, "\\arrayrulecolor{gray!40}") 59 | table.insert(latex_lines, "\\hline") 60 | end 61 | end 62 | 63 | -- Process body rows 64 | if elem.bodies and #elem.bodies > 0 then 65 | for _, body in ipairs(elem.bodies) do 66 | for i, row in ipairs(body.body) do 67 | local row_content = {} 68 | for _, cell in ipairs(row.cells) do 69 | -- Convert cell contents to LaTeX, preserving formatting 70 | local cell_latex = pandoc.write(pandoc.Pandoc(cell.contents), 'latex') 71 | -- Clean up the LaTeX output (remove extra newlines and paragraph tags) 72 | cell_latex = cell_latex:gsub("\\par\n", "") 73 | cell_latex = cell_latex:gsub("\n", " ") 74 | cell_latex = cell_latex:gsub("^%s+", "") 75 | cell_latex = cell_latex:gsub("%s+$", "") 76 | table.insert(row_content, cell_latex) 77 | end 78 | table.insert(latex_lines, table.concat(row_content, " & ") .. " \\\\") 79 | -- Gray lines between rows, black line for last row 80 | if i < #body.body then 81 | table.insert(latex_lines, "\\arrayrulecolor{gray!40}") 82 | table.insert(latex_lines, "\\hline") 83 | else 84 | table.insert(latex_lines, "\\arrayrulecolor{black}") 85 | table.insert(latex_lines, "\\hline") 86 | end 87 | end 88 | end 89 | end 90 | 91 | -- End table 92 | table.insert(latex_lines, "\\end{longtable}") 93 | table.insert(latex_lines, "\\relax") -- Reset line breaking behavior 94 | table.insert(latex_lines, "\\arrayrulecolor{black}") -- Reset to black 95 | 96 | -- Return as raw LaTeX 97 | return pandoc.RawBlock("latex", table.concat(latex_lines, "\n")) 98 | end 99 | -------------------------------------------------------------------------------- /table-filter.lua: -------------------------------------------------------------------------------- 1 | -- table-filter.lua 2 | -- Pandoc filter to generate tables with black outer border and gray inner lines 3 | 4 | function Table(elem) 5 | -- Only process if we're generating LaTeX 6 | if FORMAT ~= "latex" then 7 | return elem 8 | end 9 | 10 | -- Get table dimensions 11 | local num_cols = #elem.colspecs 12 | 13 | -- Build LaTeX table manually with custom column separators 14 | local latex_lines = {} 15 | 16 | -- Define custom column types with gray vertical rules 17 | table.insert(latex_lines, "\\newcolumntype{G}{!{\\color{gray!40}\\vrule}}") 18 | table.insert(latex_lines, "\\newcolumntype{B}{!{\\color{black}\\vrule}}") 19 | 20 | -- Create column specification: black outer borders, gray inner separators 21 | -- Use >{\\raggedright\\arraybackslash} for left-aligned, top-aligned text 22 | -- Table now occupies full text width for better content display 23 | -- Adjust column width calculation for better spacing with borders 24 | local colspec = "B" 25 | -- Reserve small amount for borders and column separators to prevent overflow 26 | local available_width = 0.98 -- 98% of textwidth to account for borders 27 | local col_width = available_width / num_cols 28 | 29 | for i = 1, num_cols do 30 | colspec = colspec .. ">{\\raggedright\\arraybackslash}p{" .. string.format("%.3f", col_width) .. "\\textwidth}" 31 | if i < num_cols then 32 | colspec = colspec .. "G" -- Gray separator between columns 33 | end 34 | end 35 | colspec = colspec .. "B" -- Black right border 36 | 37 | -- Start table with black outer border and improved styling 38 | table.insert(latex_lines, "\\sloppy") -- Encourage line breaks without overfull hboxes 39 | table.insert(latex_lines, "\\arrayrulecolor{black}") 40 | table.insert(latex_lines, "\\begin{longtable}{" .. colspec .. "}") 41 | table.insert(latex_lines, "\\tablefontsize") -- Shrink font if necessary 42 | table.insert(latex_lines, "\\hline") 43 | 44 | -- Process header if exists 45 | if elem.head and elem.head.rows and #elem.head.rows > 0 then 46 | for _, row in ipairs(elem.head.rows) do 47 | local row_content = {} 48 | for _, cell in ipairs(row.cells) do 49 | -- Convert cell contents to LaTeX, preserving formatting 50 | local cell_latex = pandoc.write(pandoc.Pandoc(cell.contents), 'latex') 51 | -- Clean up the LaTeX output (remove extra newlines and paragraph tags) 52 | cell_latex = cell_latex:gsub("\\par\n", "") 53 | cell_latex = cell_latex:gsub("\n", " ") 54 | cell_latex = cell_latex:gsub("^%s+", "") 55 | cell_latex = cell_latex:gsub("%s+$", "") 56 | -- Make header text bold 57 | cell_latex = "\\textbf{" .. cell_latex .. "}" 58 | table.insert(row_content, cell_latex) 59 | end 60 | -- Add light gray background for header row 61 | table.insert(latex_lines, "\\rowcolor{gray!10}") 62 | table.insert(latex_lines, table.concat(row_content, " & ") .. " \\\\") 63 | -- Gray line after header 64 | table.insert(latex_lines, "\\arrayrulecolor{gray!40}") 65 | table.insert(latex_lines, "\\hline") 66 | end 67 | end 68 | 69 | -- Process body rows 70 | if elem.bodies and #elem.bodies > 0 then 71 | for _, body in ipairs(elem.bodies) do 72 | for i, row in ipairs(body.body) do 73 | local row_content = {} 74 | for _, cell in ipairs(row.cells) do 75 | -- Convert cell contents to LaTeX, preserving formatting 76 | local cell_latex = pandoc.write(pandoc.Pandoc(cell.contents), 'latex') 77 | -- Clean up the LaTeX output (remove extra newlines and paragraph tags) 78 | cell_latex = cell_latex:gsub("\\par\n", "") 79 | cell_latex = cell_latex:gsub("\n", " ") 80 | cell_latex = cell_latex:gsub("^%s+", "") 81 | cell_latex = cell_latex:gsub("%s+$", "") 82 | table.insert(row_content, cell_latex) 83 | end 84 | table.insert(latex_lines, table.concat(row_content, " & ") .. " \\\\") 85 | -- Gray lines between rows, black line for last row 86 | if i < #body.body then 87 | table.insert(latex_lines, "\\arrayrulecolor{gray!40}") 88 | table.insert(latex_lines, "\\hline") 89 | else 90 | table.insert(latex_lines, "\\arrayrulecolor{black}") 91 | table.insert(latex_lines, "\\hline") 92 | end 93 | end 94 | end 95 | end 96 | 97 | -- End table 98 | table.insert(latex_lines, "\\end{longtable}") 99 | table.insert(latex_lines, "\\relax") -- Reset line breaking behavior 100 | table.insert(latex_lines, "\\arrayrulecolor{black}") -- Reset to black 101 | 102 | -- Return as raw LaTeX 103 | return pandoc.RawBlock("latex", table.concat(latex_lines, "\n")) 104 | end 105 | -------------------------------------------------------------------------------- /emoji_support.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def detect_emoji_fonts(): 4 | detected_fonts = { 5 | 'primary': None, 6 | 'fallbacks': [], 7 | 'available': [] 8 | } 9 | emoji_font_priorities = [ 10 | 'Apple Color Emoji', 11 | 'Noto Color Emoji', 12 | 'Segoe UI Emoji', 13 | 'Arial Unicode MS', 14 | 'Symbola', 15 | 'DejaVu Sans' 16 | ] 17 | try: 18 | result = subprocess.run(['fc-list', ':', 'family'], 19 | capture_output=True, text=True, check=True) 20 | available_fonts = result.stdout.split('\n') 21 | for font_name in emoji_font_priorities: 22 | if any(font_name in line for line in available_fonts): 23 | detected_fonts['available'].append(font_name) 24 | if detected_fonts['primary'] is None: 25 | detected_fonts['primary'] = font_name 26 | else: 27 | detected_fonts['fallbacks'].append(font_name) 28 | if not detected_fonts['available']: 29 | detected_fonts['primary'] = 'Source Han Sans SC' 30 | detected_fonts['available'].append('Source Han Sans SC') 31 | except (subprocess.CalledProcessError, FileNotFoundError): 32 | detected_fonts['primary'] = 'Apple Color Emoji' 33 | detected_fonts['available'] = ['Apple Color Emoji', 'Noto Color Emoji', 'Segoe UI Emoji'] 34 | detected_fonts['fallbacks'] = ['Noto Color Emoji', 'Segoe UI Emoji'] 35 | return detected_fonts 36 | 37 | def generate_emoji_font_config(emoji_fonts_info): 38 | if not emoji_fonts_info['available']: 39 | return "% No emoji fonts detected\n\\let\\emojifont\\rmfamily\n" 40 | latex_config = [] 41 | latex_config.append("% Enhanced emoji font detection and configuration") 42 | primary_font = emoji_fonts_info['primary'] 43 | fallback_fonts = emoji_fonts_info['fallbacks'] 44 | latex_config.append(f"\\IfFontExistsTF{{{primary_font}}}{{") 45 | latex_config.append(f" \\newfontfamily\\emojifont{{{primary_font}}}[Renderer=HarfBuzz]") 46 | latex_config.append(f" \\typeout{{Using {primary_font} for emoji rendering}}") 47 | latex_config.append("}{") 48 | current_indent = " " 49 | for i, fallback_font in enumerate(fallback_fonts): 50 | latex_config.append(f"{current_indent}\\IfFontExistsTF{{{fallback_font}}}{{") 51 | latex_config.append(f"{current_indent} \\newfontfamily\\emojifont{{{fallback_font}}}[Renderer=HarfBuzz]") 52 | latex_config.append(f"{current_indent} \\typeout{{Using {fallback_font} for emoji rendering}}") 53 | latex_config.append(f"{current_indent}}}{{") 54 | current_indent += " " 55 | latex_config.append(f"{current_indent}\\let\\emojifont\\rmfamily") 56 | latex_config.append(f"{current_indent}\\typeout{{Warning: No suitable emoji font found, using main font}}") 57 | for _ in range(len(fallback_fonts) + 1): 58 | current_indent = current_indent[:-2] 59 | latex_config.append(f"{current_indent}}}") 60 | return "\n".join(latex_config) 61 | 62 | def configure_emoji_fonts_for_template(template_vars): 63 | emoji_fonts_info = detect_emoji_fonts() 64 | font_config = generate_emoji_font_config(emoji_fonts_info) 65 | template_vars['emoji_font_config'] = font_config 66 | template_vars['primary_emoji_font'] = emoji_fonts_info['primary'] 67 | template_vars['emoji_fonts_available'] = emoji_fonts_info['available'] 68 | template_vars['emoji_fallback_fonts'] = emoji_fonts_info['fallbacks'] 69 | return template_vars 70 | 71 | def validate_emoji_support_requirements(emoji: bool, diagnostics_mode: bool = False) -> dict: 72 | # 简化版省略详细诊断逻辑,仅调用 detect_emoji_fonts 并返回结构 73 | validation_result = { 74 | 'valid': True, 75 | 'warnings': [], 76 | 'errors': [], 77 | 'engine': 'lualatex' if emoji else 'xelatex', 78 | 'emoji_fonts': detect_emoji_fonts(), 79 | 'diagnostics': [], 80 | 'system_info': {} 81 | } 82 | return validation_result 83 | 84 | def _analyze_pandoc_error(error, emoji, pdf_engine, emoji_validation): 85 | """分析 Pandoc 错误并提供重试建议""" 86 | error_analysis = { 87 | 'retry_recommended': False, 88 | 'retry_reason': '', 89 | 'suggested_fixes': [] 90 | } 91 | 92 | error_message = str(error.stderr) if hasattr(error, 'stderr') else str(error) 93 | 94 | # 常见错误模式分析 95 | if 'xelatex' in error_message.lower() and 'not found' in error_message.lower(): 96 | error_analysis['retry_recommended'] = True 97 | error_analysis['retry_reason'] = 'XeLaTeX engine issue detected, trying basic fallback' 98 | error_analysis['suggested_fixes'] = ['remove_shell_escape'] 99 | elif 'timeout' in error_message.lower(): 100 | error_analysis['retry_recommended'] = True 101 | error_analysis['retry_reason'] = 'Timeout detected, retrying with extended timeout' 102 | elif emoji and 'lua' in error_message.lower(): 103 | error_analysis['retry_recommended'] = True 104 | error_analysis['retry_reason'] = 'Emoji/Lua filter issue, trying without emoji filters' 105 | error_analysis['suggested_fixes'] = ['remove_emoji_filters'] 106 | 107 | return error_analysis 108 | 109 | def _apply_error_fixes(cmd, suggested_fixes): 110 | """根据错误分析应用修复建议""" 111 | new_cmd = cmd.copy() 112 | 113 | for fix in suggested_fixes: 114 | if fix == 'remove_shell_escape': 115 | # 移除 shell-escape 选项 116 | new_cmd = [arg for arg in new_cmd if '--pdf-engine-opt=-shell-escape' not in arg] 117 | elif fix == 'remove_emoji_filters': 118 | # 移除emoji相关的lua过滤器 119 | new_cmd = [arg for arg in new_cmd if 'emoji' not in arg.lower()] 120 | 121 | return new_cmd 122 | 123 | def _handle_final_pandoc_failure(error, emoji, pdf_engine, emoji_validation, tmp_path, template_path, emoji_filter_path): 124 | """处理最终的 Pandoc 失败""" 125 | print("❌ PDF generation failed after all retry attempts") 126 | print("\n🔍 Error Analysis:") 127 | 128 | if hasattr(error, 'stderr') and error.stderr: 129 | print(f" Pandoc stderr: {error.stderr[:500]}...") 130 | 131 | print(f" PDF Engine: {pdf_engine}") 132 | print(f" Emoji enabled: {emoji}") 133 | print(f" Template: {template_path}") 134 | 135 | print("\n💡 Troubleshooting suggestions:") 136 | print(" 1. Check if XeLaTeX/LuaLaTeX is properly installed") 137 | print(" 2. Verify template.tex syntax") 138 | print(" 3. Try without emoji support (remove --emoji flag)") 139 | print(" 4. Check intermediate markdown file for issues") 140 | print(f" 5. Debug file: {tmp_path}") 141 | 142 | print("\n📋 For detailed diagnostics, run:") 143 | print(" python cli.py --diagnostics ") 144 | -------------------------------------------------------------------------------- /emoji-commands.tex: -------------------------------------------------------------------------------- 1 | %% Emoji-specific LaTeX commands and environments 2 | %% This file provides comprehensive emoji support with sizing, coloring, and context-aware rendering 3 | 4 | % --- Core Emoji Commands --- 5 | 6 | % Enhanced \emoji{} command with fallback support 7 | \newcommand{\customemoji}[1]{% 8 | \ifcsname emojifont\endcsname% 9 | {{\emojifont #1}}% 10 | \else% 11 | \typeout{Warning: Emoji font not available, using fallback}% 12 | #1% 13 | \fi% 14 | } 15 | 16 | % --- Emoji Sizing Commands --- 17 | 18 | % Define emoji size presets 19 | \newcommand{\emojisize}[1]{\fontsize{#1}{#1}\selectfont} 20 | 21 | % Predefined emoji sizes 22 | \newcommand{\emojitiny}[1]{\begingroup\emojisize{8pt}\emoji{#1}\endgroup} 23 | \newcommand{\emojismall}[1]{\begingroup\emojisize{10pt}\emoji{#1}\endgroup} 24 | \newcommand{\emojinormal}[1]{\begingroup\emojisize{12pt}\emoji{#1}\endgroup} 25 | \newcommand{\emojilarge}[1]{\begingroup\emojisize{14pt}\emoji{#1}\endgroup} 26 | \newcommand{\emojiLarge}[1]{\begingroup\emojisize{17pt}\emoji{#1}\endgroup} 27 | \newcommand{\emojiLARGE}[1]{\begingroup\emojisize{20pt}\emoji{#1}\endgroup} 28 | \newcommand{\emojihuge}[1]{\begingroup\emojisize{25pt}\emoji{#1}\endgroup} 29 | \newcommand{\emojiHuge}[1]{\begingroup\emojisize{30pt}\emoji{#1}\endgroup} 30 | 31 | % Custom size emoji command 32 | \newcommand{\emojiwithsize}[2]{\begingroup\emojisize{#2}\emoji{#1}\endgroup} 33 | 34 | % --- Emoji Color Commands --- 35 | 36 | % Note: Most emoji fonts are color fonts and don't respond to LaTeX color commands 37 | % These commands are provided for fallback scenarios or special cases 38 | 39 | \newcommand{\emojicolored}[2]{% 40 | \ifcsname emojifont\endcsname% 41 | {{\emojifont\textcolor{#1}{#2}}}% 42 | \else% 43 | \textcolor{#1}{#2}% 44 | \fi% 45 | } 46 | 47 | % --- Context-Aware Emoji Commands --- 48 | 49 | % Emoji in headings - automatically sized for heading level 50 | \newcommand{\emojiheading}[2][]{% 51 | \ifx\@currsize\Huge% 52 | \emojihuge{#2}% For \chapter 53 | \else\ifx\@currsize\LARGE% 54 | \emojiLARGE{#2}% For \section 55 | \else\ifx\@currsize\Large% 56 | \emojilarge{#2}% For \subsection 57 | \else\ifx\@currsize\large% 58 | \emojinormal{#2}% For \subsubsection 59 | \else% 60 | \emoji{#2}% Default size 61 | \fi\fi\fi\fi% 62 | } 63 | 64 | % Emoji in tables - optimized for table context 65 | \newcommand{\emojiintable}[1]{% 66 | \begingroup% 67 | \emojisize{10pt}% Smaller size for tables 68 | \emoji{#1}% 69 | \endgroup% 70 | } 71 | 72 | % Emoji in lists - consistent with list text 73 | \newcommand{\emojiinlist}[1]{% 74 | \begingroup% 75 | \emojisize{11pt}% Slightly smaller than normal text 76 | \emoji{#1}% 77 | \endgroup% 78 | } 79 | 80 | % Emoji in captions - matching caption font size 81 | \newcommand{\emojicaption}[1]{% 82 | \begingroup% 83 | \emojisize{10pt}% Caption size 84 | \emoji{#1}% 85 | \endgroup% 86 | } 87 | 88 | % --- Fallback Text Representations --- 89 | 90 | % Define fallback text for common emojis when emoji font is not available 91 | \newcommand{\emojifallback}[2]{% 92 | \ifcsname emojifont\endcsname% 93 | \emoji{#1}% 94 | \else% 95 | \texttt{#2}% Use monospace for fallback text 96 | \fi% 97 | } 98 | 99 | % Common emoji fallbacks (only define if not already defined) 100 | \providecommand{\emojicheck}{\emojifallback{✅}{[CHECK]}} 101 | \providecommand{\emojicross}{\emojifallback{❌}{[X]}} 102 | \providecommand{\emojiwarning}{\emojifallback{⚠️}{[WARNING]}} 103 | \providecommand{\emojinote}{\emojifallback{📝}{[NOTE]}} 104 | \providecommand{\emojitool}{\emojifallback{🔧}{[TOOL]}} 105 | \providecommand{\emojiidea}{\emojifallback{💡}{[IDEA]}} 106 | \providecommand{\emojirocket}{\emojifallback{🚀}{[ROCKET]}} 107 | \providecommand{\emojichart}{\emojifallback{📊}{[CHART]}} 108 | \providecommand{\emojitarget}{\emojifallback{🎯}{[TARGET]}} 109 | \providecommand{\emojistar}{\emojifallback{⭐}{[STAR]}} 110 | \newcommand{\emojiheart}{\emojifallback{❤️}{[HEART]}} 111 | \newcommand{\emojithumbsup}{\emojifallback{👍}{[THUMBS-UP]}} 112 | \newcommand{\emojithumbsdown}{\emojifallback{👎}{[THUMBS-DOWN]}} 113 | \newcommand{\emojifire}{\emojifallback{🔥}{[FIRE]}} 114 | \newcommand{\emojiparty}{\emojifallback{🎉}{[PARTY]}} 115 | \newcommand{\emojiclock}{\emojifallback{🕐}{[CLOCK]}} 116 | \newcommand{\emojiphone}{\emojifallback{📱}{[PHONE]}} 117 | \newcommand{\emojicomputer}{\emojifallback{💻}{[COMPUTER]}} 118 | \newcommand{\emojibook}{\emojifallback{📚}{[BOOK]}} 119 | \newcommand{\emojiemail}{\emojifallback{📧}{[EMAIL]}} 120 | \newcommand{\emojilink}{\emojifallback{🔗}{[LINK]}} 121 | \newcommand{\emojikey}{\emojifallback{🔑}{[KEY]}} 122 | \newcommand{\emojilock}{\emojifallback{🔒}{[LOCK]}} 123 | \newcommand{\emojiunlock}{\emojifallback{🔓}{[UNLOCK]}} 124 | \newcommand{\emojisearch}{\emojifallback{🔍}{[SEARCH]}} 125 | \newcommand{\emojidownload}{\emojifallback{⬇️}{[DOWNLOAD]}} 126 | \newcommand{\emojiupload}{\emojifallback{⬆️}{[UPLOAD]}} 127 | 128 | % --- Emoji Environments --- 129 | 130 | % Environment for emoji-rich content with optimized spacing 131 | \newenvironment{emojitext}{% 132 | \begingroup% 133 | \setlength{\parskip}{4pt plus 1pt minus 1pt}% Slightly more space between paragraphs 134 | \renewcommand{\emoji}[1]{% 135 | \ifcsname emojifont\endcsname% 136 | {{\emojifont ##1}}% 137 | \else% 138 | ##1% 139 | \fi% 140 | }% 141 | }{% 142 | \endgroup% 143 | } 144 | 145 | % Environment for emoji lists with proper alignment 146 | \newenvironment{emojiitemize}{% 147 | \begin{itemize}[leftmargin=2em,itemsep=2pt,parsep=0pt]% 148 | \renewcommand{\labelitemi}{\emoji{•}}% Use emoji bullet if available 149 | }{% 150 | \end{itemize}% 151 | } 152 | 153 | % Environment for emoji tables with optimized rendering 154 | \newenvironment{emojitabular}{% 155 | \begingroup% 156 | \let\originalemoji\emoji% 157 | \renewcommand{\emoji}[1]{% 158 | \begingroup% 159 | \emojisize{9pt}% Smaller size for table context 160 | \originalemoji{##1}% 161 | \endgroup% 162 | }% 163 | }{% 164 | \endgroup% 165 | } 166 | 167 | % --- Advanced Emoji Commands --- 168 | 169 | % Emoji with tooltip-like fallback (for accessibility) 170 | \newcommand{\emojiwithalt}[2]{% 171 | \emoji{#1}% 172 | \ifcsname emojifont\endcsname% 173 | \else% 174 | \footnote{#2}% Add footnote with description if no emoji font 175 | \fi% 176 | } 177 | 178 | % Inline emoji with automatic spacing adjustment 179 | \newcommand{\emojiinline}[1]{% 180 | \,\emoji{#1}\,% Add thin spaces around emoji 181 | } 182 | 183 | % Emoji sequence for complex emoji combinations 184 | \newcommand{\emojisequence}[1]{% 185 | \ifcsname emojifont\endcsname% 186 | {{\emojifont #1}}% 187 | \else% 188 | #1% 189 | \fi% 190 | } 191 | 192 | % --- Emoji Debugging and Diagnostics --- 193 | 194 | % Command to test emoji rendering 195 | \newcommand{\emojitestrender}[1]{% 196 | \typeout{Testing emoji rendering for: #1}% 197 | \ifcsname emojifont\endcsname% 198 | \typeout{Emoji font available: \meaning\emojifont}% 199 | {{\emojifont #1}}% 200 | \else% 201 | \typeout{Warning: No emoji font available}% 202 | \texttt{[NO-EMOJI-FONT]}% 203 | \fi% 204 | } 205 | 206 | % Command to show emoji font information 207 | \newcommand{\emojifontinfo}{% 208 | \ifcsname emojifont\endcsname% 209 | \typeout{Emoji font family: \meaning\emojifont}% 210 | \texttt{Emoji font available}% 211 | \else% 212 | \typeout{No emoji font configured}% 213 | \texttt{No emoji font}% 214 | \fi% 215 | } 216 | 217 | % --- Compatibility Commands --- 218 | 219 | % Ensure compatibility with existing emoji usage 220 | \providecommand{\greencheckmark}{\emojicheck} 221 | \providecommand{\redcrossmark}{\emojicross} 222 | \providecommand{\orangewarningmark}{\emojiwarning} 223 | 224 | % Note: Legacy emoji commands are defined in the main template to avoid conflicts 225 | 226 | % --- End of Emoji Commands --- -------------------------------------------------------------------------------- /cache_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | import hashlib 5 | import time 6 | from pathlib import Path 7 | 8 | def get_file_hash(file_path): 9 | hasher = hashlib.sha256() 10 | try: 11 | with open(file_path, 'rb') as f: 12 | for chunk in iter(lambda: f.read(4096), b""): 13 | hasher.update(chunk) 14 | return hasher.hexdigest() 15 | except Exception as e: 16 | print(f"Error calculating hash for {file_path}: {e}") 17 | return None 18 | 19 | def get_cache_dir(book_dir): 20 | book_dir = os.path.abspath(book_dir) 21 | current_dir = book_dir 22 | book_root_dir = book_dir 23 | while current_dir and current_dir != os.path.dirname(current_dir): 24 | parent = os.path.dirname(current_dir) 25 | parent_name = os.path.basename(parent) 26 | if parent_name == "book": 27 | book_root_dir = current_dir 28 | break 29 | current_dir = parent 30 | cache_dir = os.path.join(book_root_dir, 'image-caches') 31 | os.makedirs(cache_dir, exist_ok=True) 32 | return cache_dir 33 | 34 | def get_cache_metadata_path(cache_dir): 35 | return os.path.join(cache_dir, 'cache_metadata.json') 36 | 37 | def load_cache_metadata(cache_dir): 38 | metadata_path = get_cache_metadata_path(cache_dir) 39 | if os.path.exists(metadata_path): 40 | try: 41 | with open(metadata_path, 'r', encoding='utf-8') as f: 42 | return json.load(f) 43 | except Exception as e: 44 | print(f"Error loading cache metadata: {e}") 45 | return {} 46 | 47 | def save_cache_metadata(cache_dir, metadata): 48 | metadata_path = get_cache_metadata_path(cache_dir) 49 | try: 50 | with open(metadata_path, 'w', encoding='utf-8') as f: 51 | json.dump(metadata, f, indent=2, ensure_ascii=False) 52 | except Exception as e: 53 | print(f"Error saving cache metadata: {e}") 54 | 55 | def get_cached_image(source_path, cache_dir, target_extension='.png'): 56 | if not os.path.exists(source_path): 57 | return None 58 | source_hash = get_file_hash(source_path) 59 | if not source_hash: 60 | return None 61 | source_name = os.path.basename(source_path) 62 | base_name = os.path.splitext(source_name)[0] 63 | cache_filename = f"{base_name}_{source_hash[:12]}{target_extension}" 64 | cache_path = os.path.join(cache_dir, cache_filename) 65 | metadata = load_cache_metadata(cache_dir) 66 | if os.path.exists(cache_path): 67 | cache_info = metadata.get(cache_filename, {}) 68 | cached_hash = cache_info.get('source_hash') 69 | if cached_hash == source_hash: 70 | print(f"Using cached image: {cache_path}") 71 | return cache_path 72 | else: 73 | print(f"Cache invalid for {source_path}, removing old cache") 74 | try: 75 | os.remove(cache_path) 76 | if cache_filename in metadata: 77 | del metadata[cache_filename] 78 | save_cache_metadata(cache_dir, metadata) 79 | except Exception as e: 80 | print(f"Error removing old cache: {e}") 81 | return None 82 | 83 | def save_to_cache(source_path, converted_path, cache_dir): 84 | if not os.path.exists(source_path) or not os.path.exists(converted_path): 85 | return None 86 | source_hash = get_file_hash(source_path) 87 | if not source_hash: 88 | return None 89 | source_name = os.path.basename(source_path) 90 | base_name = os.path.splitext(source_name)[0] 91 | target_extension = os.path.splitext(converted_path)[1] 92 | cache_filename = f"{base_name}_{source_hash[:12]}{target_extension}" 93 | cache_path = os.path.join(cache_dir, cache_filename) 94 | try: 95 | shutil.copy2(converted_path, cache_path) 96 | metadata = load_cache_metadata(cache_dir) 97 | metadata[cache_filename] = { 98 | 'source_path': os.path.abspath(source_path), 99 | 'source_hash': source_hash, 100 | 'cached_at': time.time(), 101 | 'cache_path': cache_path 102 | } 103 | save_cache_metadata(cache_dir, metadata) 104 | print(f"Saved to cache: {cache_path}") 105 | return cache_path 106 | except Exception as e: 107 | print(f"Error saving to cache: {e}") 108 | return None 109 | 110 | def get_cached_image_by_key(cache_key, cache_dir, extension='.png'): 111 | try: 112 | metadata = load_cache_metadata(cache_dir) 113 | for filename, info in metadata.items(): 114 | if info.get('cache_key') == cache_key: 115 | cache_path = info.get('cache_path') 116 | if cache_path and os.path.exists(cache_path): 117 | return cache_path 118 | except Exception as e: 119 | print(f"Error checking cache: {e}") 120 | return None 121 | 122 | def save_to_cache_with_key(cache_key, file_path, cache_dir): 123 | try: 124 | cache_filename = f"{cache_key}.png" 125 | cache_path = os.path.join(cache_dir, cache_filename) 126 | shutil.copy2(file_path, cache_path) 127 | metadata = load_cache_metadata(cache_dir) 128 | metadata[cache_filename] = { 129 | 'cache_key': cache_key, 130 | 'cached_at': time.time(), 131 | 'cache_path': cache_path 132 | } 133 | save_cache_metadata(cache_dir, metadata) 134 | print(f"Cached enhanced image: {cache_path}") 135 | return cache_path 136 | except Exception as e: 137 | print(f"Error saving to cache: {e}") 138 | return None 139 | 140 | def clean_cache(book_dir, days_old=30): 141 | cache_dir = get_cache_dir(book_dir) 142 | if not os.path.exists(cache_dir): 143 | print("No cache directory found.") 144 | return 145 | metadata = load_cache_metadata(cache_dir) 146 | current_time = time.time() 147 | cutoff_time = current_time - (days_old * 24 * 60 * 60) 148 | cleaned_count = 0 149 | for cache_filename, cache_info in list(metadata.items()): 150 | cached_at = cache_info.get('cached_at', 0) 151 | cache_path = cache_info.get('cache_path', os.path.join(cache_dir, cache_filename)) 152 | if cached_at < cutoff_time or not os.path.exists(cache_path): 153 | try: 154 | if os.path.exists(cache_path): 155 | os.remove(cache_path) 156 | del metadata[cache_filename] 157 | cleaned_count += 1 158 | print(f"Removed cache: {cache_filename}") 159 | except Exception as e: 160 | print(f"Error removing cache {cache_filename}: {e}") 161 | if cleaned_count > 0: 162 | save_cache_metadata(cache_dir, metadata) 163 | print(f"Cleaned {cleaned_count} cache files older than {days_old} days.") 164 | else: 165 | print("No cache files to clean.") 166 | 167 | def show_cache_info(book_dir): 168 | cache_dir = get_cache_dir(book_dir) 169 | if not os.path.exists(cache_dir): 170 | print("No cache directory found.") 171 | return 172 | metadata = load_cache_metadata(cache_dir) 173 | print(f"Cache directory: {cache_dir}") 174 | print(f"Cache files: {len(metadata)}") 175 | if metadata: 176 | total_size = 0 177 | for cache_filename, cache_info in metadata.items(): 178 | cache_path = cache_info.get('cache_path', os.path.join(cache_dir, cache_filename)) 179 | if os.path.exists(cache_path): 180 | size = os.path.getsize(cache_path) 181 | total_size += size 182 | cached_at = cache_info.get('cached_at', 0) 183 | age_days = (time.time() - cached_at) / (24 * 60 * 60) 184 | print(f" {cache_filename}: {size/1024:.1f}KB, {age_days:.1f} days old") 185 | print(f"Total cache size: {total_size/1024/1024:.2f}MB") 186 | -------------------------------------------------------------------------------- /frontmatter.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | def parse_front_matter(path): 4 | title = None 5 | weight = 9999 6 | draft = False 7 | publish = True 8 | export_pdf = True 9 | inside = False 10 | with open(path, 'r', encoding='utf-8') as f: 11 | for line in f: 12 | if line.strip() == '---': 13 | if not inside: 14 | inside = True 15 | continue 16 | else: 17 | break 18 | if inside: 19 | if line.startswith('title:'): 20 | title = line.split(':', 1)[1].strip().strip('"') 21 | elif line.startswith('weight:'): 22 | try: 23 | weight = int(line.split(':', 1)[1].strip()) 24 | except ValueError: 25 | pass 26 | elif line.startswith('draft:'): 27 | draft_value = line.split(':', 1)[1].strip().lower() 28 | draft = draft_value in ['true', '1', 'yes'] 29 | elif line.startswith('publish:'): 30 | publish_value = line.split(':', 1)[1].strip().lower() 31 | publish = publish_value in ['true', '1', 'yes'] 32 | elif line.startswith('export_pdf:'): 33 | export_pdf_value = line.split(':', 1)[1].strip().lower() 34 | export_pdf = export_pdf_value in ['true', '1', 'yes'] 35 | elif line.startswith('pdf:'): 36 | pdf_value = line.split(':', 1)[1].strip().lower() 37 | export_pdf = pdf_value in ['true', '1', 'yes'] 38 | return title, weight, draft, publish, export_pdf 39 | 40 | def should_include(path: str, metadata: tuple = None, include_drafts: bool = False) -> bool: 41 | if metadata is None: 42 | import frontmatter 43 | _, _, draft, publish, export_pdf = parse_front_matter(path) 44 | else: 45 | _, _, draft, publish, export_pdf = metadata 46 | if draft and not include_drafts: 47 | return False 48 | if not publish: 49 | return False 50 | if not export_pdf: 51 | return False 52 | return True 53 | 54 | def load_book_config(book_dir): 55 | """Load book configuration from the _index.md front matter.""" 56 | import os 57 | from tree import find_asset 58 | 59 | index_path = None 60 | for name in ("_index.md", "index.md"): 61 | candidate = os.path.join(book_dir, name) 62 | if os.path.exists(candidate): 63 | index_path = candidate 64 | break 65 | 66 | if not index_path: 67 | return {} 68 | 69 | config = {} 70 | with open(index_path, 'r', encoding='utf-8') as f: 71 | content = f.read() 72 | 73 | # Extract front matter 74 | if content.startswith('---'): 75 | try: 76 | # Find the end of front matter 77 | end_pos = content.find('\n---\n', 3) 78 | if end_pos != -1: 79 | front_matter = content[3:end_pos] 80 | metadata = yaml.safe_load(front_matter) 81 | 82 | # Extract book configuration if it exists 83 | if isinstance(metadata, dict) and 'book' in metadata: 84 | book_config = metadata['book'] 85 | # Map book config to expected format 86 | config = { 87 | 'title': book_config.get('title', metadata.get('title', 'Book')), 88 | 'author': book_config.get('author', 'Author'), 89 | 'date': str(book_config.get('date', metadata.get('date', '2024'))).split('T')[0], 90 | 'description': book_config.get('description', metadata.get('description', '')), 91 | 'language': book_config.get('language', 'zh-hans'), 92 | 'cover': book_config.get('cover', None), 93 | 'website': book_config.get('website', ''), 94 | 'appendix': book_config.get('appendix', False), 95 | 'subject': book_config.get('subject', book_config.get('description', metadata.get('description', ''))), 96 | 'keywords': book_config.get('keywords', ''), 97 | 'creator': book_config.get('creator', 'LaTeX with hyperref'), 98 | 'producer': book_config.get('producer', 'XeLaTeX'), 99 | 100 | # Enhanced cover configuration 101 | 'cover_config': { 102 | 'image': book_config.get('cover', None), 103 | 'title_text': book_config.get('cover_title_text', book_config.get('title', metadata.get('title', 'Book'))), 104 | 'author_text': book_config.get('cover_author_text', book_config.get('author', 'Author')), 105 | 'subtitle_text': book_config.get('cover_subtitle_text', ''), 106 | 'title_color': book_config.get('cover_title_color', '#000000'), 107 | 'author_color': book_config.get('cover_author_color', '#333333'), 108 | 'subtitle_color': book_config.get('cover_subtitle_color', '#666666'), 109 | 'title_font_size': book_config.get('cover_title_font_size', 48), 110 | 'author_font_size': book_config.get('cover_author_font_size', 24), 111 | 'subtitle_font_size': book_config.get('cover_subtitle_font_size', 18), 112 | 'title_position': book_config.get('cover_title_position', 'center'), 113 | 'author_position': book_config.get('cover_author_position', 'bottom'), 114 | 'overlay_enabled': book_config.get('cover_overlay_enabled', True), 115 | 'text_shadow': book_config.get('cover_text_shadow', True), 116 | 'background_overlay': book_config.get('cover_background_overlay', False), 117 | 'overlay_opacity': book_config.get('cover_overlay_opacity', 0.7) 118 | }, 119 | 120 | # Back-cover configuration (enhanced with text, QR code, and link) 121 | 'backcover_image': book_config.get('backcover_image', None), 122 | 'backcover_text': book_config.get('backcover_text', None), 123 | 'qrcode_image': book_config.get('qrcode_image', None), 124 | 'backcover_link_text': book_config.get('backcover_link_text', None), 125 | 'backcover_link_url': book_config.get('backcover_link_url', None), 126 | 127 | # Back-cover styling options 128 | 'backcover_text_color': book_config.get('backcover_text_color', '#000000'), 129 | 'backcover_link_color': book_config.get('backcover_link_color', '#0066CC'), 130 | 'backcover_text_font_size': book_config.get('backcover_text_font_size', 16), 131 | 'backcover_link_font_size': book_config.get('backcover_link_font_size', 14), 132 | 'qrcode_size': book_config.get('qrcode_size', '0.15\\paperwidth'), 133 | 'backcover_top_margin': book_config.get('backcover_top_margin', '0.2\\textheight'), 134 | 'backcover_bottom_margin': book_config.get('backcover_bottom_margin', '0.2\\textheight'), 135 | 'backcover_spacing_1': book_config.get('backcover_spacing_1', '1.5cm'), 136 | 'backcover_spacing_2': book_config.get('backcover_spacing_2', '1cm'), 137 | 138 | # Typography and styling configuration 139 | 'typography': { 140 | 'body_color': book_config.get('body_color', '#000000'), 141 | 'heading_color': book_config.get('heading_color', '#000000'), 142 | 'link_color': book_config.get('link_color', '#0066cc'), 143 | 'code_color': book_config.get('code_color', '#d14'), 144 | 'quote_color': book_config.get('quote_color', '#666666'), 145 | 'caption_color': book_config.get('caption_color', '#666666') 146 | } 147 | } 148 | else: 149 | # Fallback to using main front matter 150 | config = { 151 | 'title': metadata.get('title', 'Book'), 152 | 'author': metadata.get('author', 'Author'), 153 | 'date': str(metadata.get('date', '2024')).split('T')[0] if metadata.get('date') else '2024', 154 | 'description': metadata.get('description', ''), 155 | 'language': 'zh-hans', 156 | 'cover_config': { 157 | 'overlay_enabled': True, 158 | 'title_color': '#000000', 159 | 'author_color': '#333333' 160 | }, 161 | 'typography': { 162 | 'body_color': '#000000', 163 | 'heading_color': '#000000' 164 | } 165 | } 166 | except Exception as e: 167 | print(f"Warning: Failed to parse front matter: {e}") 168 | config = { 169 | 'title': 'Book', 170 | 'author': 'Author', 171 | 'date': '2024', 172 | 'language': 'zh-hans', 173 | 'cover_config': {'overlay_enabled': True}, 174 | 'typography': {'body_color': '#000000'} 175 | } 176 | 177 | # Print config for debugging 178 | print(f"Loaded book configuration with backcover: {config.get('backcover_image', 'None')}") 179 | return config 180 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /table-wrap.lua: -------------------------------------------------------------------------------- 1 | -- tools/pdf-book-exporter/filters/table-wrap.lua 2 | -- Advanced Pandoc Lua filter for converting markdown tables to LaTeX longtable format 3 | -- 4 | -- This filter provides comprehensive table processing capabilities: 5 | -- 1. Converts pipe tables to longtable for automatic page breaking 6 | -- 2. Handles long text content with intelligent wrapping and hyphenation 7 | -- 3. Processes inline code within table cells using flexcode command 8 | -- 4. Supports configurable table widths via metadata 9 | -- 5. Provides safe LaTeX character escaping 10 | -- 11 | -- Key enhancements: 12 | -- - Responsive column sizing based on max_table_width parameter 13 | -- - URL and long text detection with seqsplit wrapping 14 | -- - Proper handling of CJK text and technical content 15 | -- - Integration with minted/listings code highlighting 16 | 17 | -- Function to add hyphenation penalties for long words 18 | -- This function helps LaTeX break long words and technical terms appropriately 19 | -- by inserting spaces after certain characters to enable natural line breaking 20 | -- 21 | -- Enhanced for technical documentation with: 22 | -- - Namespace separators (::) 23 | -- - File paths (/) 24 | -- - Variable names (_) 25 | -- - Hyphenated terms (-) 26 | function add_hyphenation_penalties(text) 27 | if not text then return "" end 28 | 29 | -- Don't modify underscores here - let latex_escape handle them properly 30 | return text 31 | end 32 | 33 | -- Function to check if text contains URLs or long sequences that need seqsplit 34 | -- 35 | -- This function identifies content that requires special LaTeX wrapping: 36 | -- 1. URLs (http/https/ftp protocols) 37 | -- 2. Domain names and web addresses 38 | -- 3. Long unbroken sequences (>30 characters) 39 | -- 40 | -- The seqsplit package allows LaTeX to break these sequences at any character 41 | -- when normal hyphenation fails, preventing table overflow 42 | function needs_seqsplit(text) 43 | if not text then return false end 44 | 45 | -- Check for URL patterns 46 | if string.match(text, "https?://") or 47 | string.match(text, "ftp://") or 48 | string.match(text, "www%.") or 49 | string.match(text, "%w+%.%w+%.%w+") or -- domain.subdomain.tld pattern 50 | string.match(text, "%w+%.%w+/") then -- domain.tld/path pattern 51 | return true 52 | end 53 | 54 | -- Check for long sequences without spaces 55 | for word in string.gmatch(text, "%S+") do 56 | if string.len(word) > 30 then -- Threshold for long words 57 | return true 58 | end 59 | end 60 | 61 | return false 62 | end 63 | 64 | -- Function to safely convert cell content to LaTeX while preserving commands 65 | -- and handling inline code specially for table cells 66 | -- 67 | -- This is the core function for table cell processing: 68 | -- 1. Handles different Pandoc AST element types (Str, Code, RawInline, etc.) 69 | -- 2. Applies appropriate LaTeX escaping for special characters 70 | -- 3. Converts inline code to flexcode commands for better table formatting 71 | -- 4. Processes emphasis, strong text, and strikeout formatting 72 | -- 5. Ensures no unescaped line breaks that would break LaTeX compilation 73 | -- 74 | -- Special handling for table context: 75 | -- - Uses flexcode instead of texttt for better code wrapping 76 | -- - Applies seqsplit for URLs and long sequences 77 | -- - Replaces line breaks with spaces to maintain table structure 78 | function cell_to_latex(cell_contents) 79 | if not cell_contents or #cell_contents == 0 then 80 | return "" 81 | end 82 | 83 | local result = {} 84 | 85 | -- Process elements 86 | for i, element in ipairs(cell_contents) do 87 | if element.t == "Str" then 88 | -- Regular text - escape LaTeX special characters and handle long text 89 | local escaped_text = latex_escape(element.text) 90 | 91 | -- Add hyphenation penalties for long words 92 | escaped_text = add_hyphenation_penalties(escaped_text) 93 | 94 | -- Wrap in seqsplit if it contains URLs or very long sequences 95 | if needs_seqsplit(element.text) then 96 | result[#result + 1] = "\\seqsplit{" .. escaped_text .. "}" 97 | else 98 | result[#result + 1] = escaped_text 99 | end 100 | 101 | elseif element.t == "Code" then 102 | -- Inline code in table - use flexcode command for better wrapping 103 | local escaped_code = latex_escape(element.text) 104 | result[#result + 1] = "\\flexcode{" .. escaped_code .. "}" 105 | 106 | elseif element.t == "RawInline" and element.format == "latex" then 107 | -- Check if this is a texttt command and replace with flexcode 108 | local latex_text = element.text 109 | if latex_text:match("^\\texttt{.*}$") then 110 | -- Extract content from texttt and use flexcode 111 | local code_content = latex_text:match("^\\texttt{(.*)}$") 112 | if code_content then 113 | result[#result + 1] = "\\flexcode{" .. code_content .. "}" 114 | else 115 | result[#result + 1] = latex_text 116 | end 117 | else 118 | -- Other LaTeX commands - keep as is 119 | result[#result + 1] = latex_text 120 | end 121 | 122 | elseif element.t == "Space" then 123 | result[#result + 1] = " " 124 | 125 | elseif element.t == "SoftBreak" or element.t == "LineBreak" then 126 | -- Replace line breaks with spaces to ensure no unescaped line breaks 127 | result[#result + 1] = " " 128 | 129 | elseif element.t == "Emph" then 130 | -- Handle emphasized text 131 | result[#result + 1] = "\\emph{" .. cell_to_latex(element.content) .. "}" 132 | 133 | elseif element.t == "Strong" then 134 | -- Handle strong text 135 | result[#result + 1] = "\\textbf{" .. cell_to_latex(element.content) .. "}" 136 | 137 | elseif element.t == "Strikeout" then 138 | -- Handle strikeout text 139 | result[#result + 1] = "\\sout{" .. cell_to_latex(element.content) .. "}" 140 | 141 | elseif element.t == "Link" then 142 | -- Handle links - render as LaTeX hyperlink 143 | local link_text = cell_to_latex(element.content) 144 | local link_url = element.target 145 | 146 | -- Escape URL for LaTeX 147 | link_url = string.gsub(link_url, "#", "\\#") 148 | link_url = string.gsub(link_url, "%$", "\\$") 149 | link_url = string.gsub(link_url, "&", "\\&") 150 | link_url = string.gsub(link_url, "%%", "\\%%") 151 | link_url = string.gsub(link_url, "_", "\\_") 152 | 153 | -- Use href command for proper link rendering 154 | result[#result + 1] = "\\href{" .. link_url .. "}{" .. link_text .. "}" 155 | 156 | elseif element.t == "Image" then 157 | -- Handle images - render as LaTeX includegraphics with size constraint for table cells 158 | local image_alt = cell_to_latex(element.content) or "" 159 | local image_src = element.src 160 | 161 | -- Escape image path for LaTeX 162 | image_src = string.gsub(image_src, "#", "\\#") 163 | image_src = string.gsub(image_src, "%$", "\\$") 164 | image_src = string.gsub(image_src, "&", "\\&") 165 | image_src = string.gsub(image_src, "%%", "\\%%") 166 | image_src = string.gsub(image_src, "_", "\\_") 167 | 168 | -- Use a reasonable size constraint for table cell images 169 | result[#result + 1] = "\\includegraphics[width=0.8\\linewidth,height=2cm,keepaspectratio]{" .. image_src .. "}" 170 | 171 | elseif element.t == "Plain" then 172 | -- Plain elements contain other inline elements - recursively process 173 | result[#result + 1] = cell_to_latex(element.content) 174 | 175 | else 176 | -- For other elements, try to stringify them safely 177 | local stringified = pandoc.utils.stringify({element}) 178 | local escaped_text = latex_escape(stringified) 179 | 180 | -- Add hyphenation penalties and seqsplit for long text if needed 181 | escaped_text = add_hyphenation_penalties(escaped_text) 182 | if needs_seqsplit(stringified) then 183 | result[#result + 1] = "\\seqsplit{" .. escaped_text .. "}" 184 | else 185 | result[#result + 1] = escaped_text 186 | end 187 | end 188 | end 189 | 190 | -- Ensure the final result contains no unescaped line breaks 191 | local final_result = table.concat(result) 192 | -- Replace any remaining literal newlines with spaces 193 | final_result = string.gsub(final_result, "\n", " ") 194 | final_result = string.gsub(final_result, "\r", " ") 195 | 196 | return final_result 197 | end 198 | 199 | -- Function to escape LaTeX special characters (backup method) 200 | function latex_escape(text) 201 | if not text then return "" end 202 | text = string.gsub(text, "\\", "\\textbackslash{}") 203 | text = string.gsub(text, "{", "\\{") 204 | text = string.gsub(text, "}", "\\}") 205 | text = string.gsub(text, "%$", "\\$") 206 | text = string.gsub(text, "&", "\\&") 207 | text = string.gsub(text, "%%", "\\%%") 208 | text = string.gsub(text, "#", "\\#") 209 | text = string.gsub(text, "%^", "\\textasciicircum{}") 210 | text = string.gsub(text, "_", "\\_") 211 | text = string.gsub(text, "~", "\\textasciitilde{}") 212 | return text 213 | end 214 | 215 | -- Main table processing function - converts Pandoc tables to LaTeX longtable 216 | -- 217 | -- This function implements comprehensive table processing: 218 | -- 1. Extracts table width configuration from Pandoc metadata 219 | -- 2. Calculates optimal column widths with safety margins 220 | -- 3. Generates LaTeX column specifications with proper alignment 221 | -- 4. Processes header and body content with cell_to_latex 222 | -- 5. Creates longtable environment with page-break support 223 | -- 224 | -- Key features: 225 | -- - Configurable max_table_width (default 0.98 of text width) 226 | -- - Support for left, right, and center column alignment 227 | -- - Automatic header repetition on page breaks (\endfirsthead/\endhead) 228 | -- - Proper spacing calculation accounting for vertical borders 229 | -- - Safety factor to prevent LaTeX dimension errors 230 | function Table(tbl) 231 | -- Get the number of columns from the table 232 | local num_cols = #tbl.colspecs 233 | 234 | -- Extract maximum table width from Pandoc metadata (passed via -V max_table_width) 235 | -- Use more aggressive default to maximize page width utilization 236 | local max_table_width = 0.98 -- More aggressive default for better content display 237 | if PANDOC_STATE.meta and PANDOC_STATE.meta.max_table_width then 238 | local meta_value = PANDOC_STATE.meta.max_table_width 239 | if type(meta_value) == "table" and meta_value.t == "MetaInlines" then 240 | -- Extract from MetaInlines format 241 | max_table_width = tonumber(pandoc.utils.stringify(meta_value)) or 0.98 242 | elseif type(meta_value) == "number" then 243 | max_table_width = meta_value 244 | elseif type(meta_value) == "string" then 245 | max_table_width = tonumber(meta_value) or 0.98 246 | end 247 | end 248 | 249 | -- Apply conservative width reduction only for very wide tables 250 | -- Allow better utilization of page width for better content display 251 | if num_cols >= 6 then 252 | max_table_width = max_table_width * 0.96 -- 4% reduction for very wide tables 253 | elseif num_cols >= 5 then 254 | max_table_width = max_table_width * 0.98 -- 2% reduction for wide tables 255 | end 256 | 257 | -- Extract minimum column width from metadata if provided (table-level override) 258 | local min_col_width = nil 259 | if tbl.attr and tbl.attr.attributes and tbl.attr.attributes.min_col_width then 260 | min_col_width = tonumber(tbl.attr.attributes.min_col_width) 261 | end 262 | 263 | -- Compute usable width by subtracting vertical borders (arrayrulewidth) 264 | -- Use max_table_width to limit overall table width, then subtract borders 265 | -- Formula: (max_table_width * textwidth) - arrayrulewidth * (num_cols + 1) 266 | local usable_width_expr = string.format("\\dimexpr%.4f\\textwidth-\\arrayrulewidth*%d\\relax", max_table_width, num_cols + 1) 267 | 268 | -- Calculate width ratio per column based on max_table_width 269 | local base_width_ratio = max_table_width / num_cols 270 | -- Increase safety factor for better margin protection 271 | local safety_factor = 0.92 -- Increased from 0.95 to 0.92 for more conservative calculation 272 | local width_ratio = base_width_ratio * safety_factor 273 | 274 | -- Apply minimum column width override if specified (but respect max_table_width) 275 | if min_col_width then 276 | width_ratio = math.max(width_ratio, math.min(min_col_width, max_table_width / num_cols)) 277 | end 278 | 279 | -- Format the column width as LaTeX dimexpr 280 | local col_width = string.format("\\dimexpr%.4f\\textwidth\\relax", width_ratio) 281 | 282 | -- Create column specification with proper alignment and line breaking support 283 | local column_spec = "|" 284 | for i = 1, num_cols do 285 | local alignment_spec = "" 286 | 287 | -- Check column alignment from colspecs 288 | if tbl.colspecs and tbl.colspecs[i] then 289 | local align = tbl.colspecs[i][1] -- Alignment is first element of colspec 290 | if align == "AlignCenter" then 291 | alignment_spec = ">{\\centering\\arraybackslash}p{" .. col_width .. "}" 292 | elseif align == "AlignRight" then 293 | alignment_spec = ">{\\raggedleft\\arraybackslash}p{" .. col_width .. "}" 294 | else -- AlignLeft or AlignDefault 295 | alignment_spec = ">{\\raggedright\\arraybackslash\\hspace{0pt}}p{" .. col_width .. "}" 296 | end 297 | else 298 | -- Default to left-aligned with line breaking support 299 | alignment_spec = ">{\\raggedright\\arraybackslash\\hspace{0pt}}p{" .. col_width .. "}" 300 | end 301 | 302 | column_spec = column_spec .. alignment_spec .. "|" 303 | end 304 | 305 | -- Create raw LaTeX block for longtable 306 | local latex_content = {} 307 | 308 | -- Start longtable environment 309 | latex_content[#latex_content + 1] = "\\begin{longtable}" .. "{" .. column_spec .. "}" 310 | latex_content[#latex_content + 1] = "\\hline" 311 | 312 | -- Process header if it exists 313 | if tbl.head and tbl.head.rows and #tbl.head.rows > 0 then 314 | for _, row in ipairs(tbl.head.rows) do 315 | local row_content = {} 316 | for j, cell in ipairs(row.cells) do 317 | -- Use safe cell conversion function 318 | local cell_latex = cell_to_latex(cell.contents) 319 | row_content[#row_content + 1] = "\\textbf{" .. cell_latex .. "}" 320 | end 321 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 322 | end 323 | latex_content[#latex_content + 1] = "\\hline" 324 | latex_content[#latex_content + 1] = "\\endfirsthead" 325 | 326 | -- Repeat header on subsequent pages 327 | latex_content[#latex_content + 1] = "\\hline" 328 | for _, row in ipairs(tbl.head.rows) do 329 | local row_content = {} 330 | for j, cell in ipairs(row.cells) do 331 | -- Use safe cell conversion function 332 | local cell_latex = cell_to_latex(cell.contents) 333 | row_content[#row_content + 1] = "\\textbf{" .. cell_latex .. "}" 334 | end 335 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 336 | end 337 | latex_content[#latex_content + 1] = "\\hline" 338 | latex_content[#latex_content + 1] = "\\endhead" 339 | end 340 | 341 | -- Process body rows 342 | if tbl.bodies and #tbl.bodies > 0 then 343 | for _, body in ipairs(tbl.bodies) do 344 | if body.body then 345 | for _, row in ipairs(body.body) do 346 | local row_content = {} 347 | for j, cell in ipairs(row.cells) do 348 | -- Use safe cell conversion function 349 | local cell_latex = cell_to_latex(cell.contents) 350 | row_content[#row_content + 1] = cell_latex 351 | end 352 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 353 | latex_content[#latex_content + 1] = "\\hline" -- Add horizontal line after each row 354 | end 355 | end 356 | end 357 | end 358 | 359 | -- End longtable environment 360 | latex_content[#latex_content + 1] = "\\hline" 361 | latex_content[#latex_content + 1] = "\\end{longtable}" 362 | 363 | -- Return as RawBlock 364 | return pandoc.RawBlock("latex", table.concat(latex_content, "\n")) 365 | end 366 | 367 | -------------------------------------------------------------------------------- /filters/table-wrap.lua: -------------------------------------------------------------------------------- 1 | -- tools/pdf-book-exporter/filters/table-wrap.lua 2 | -- Advanced Pandoc Lua filter for converting markdown tables to LaTeX longtable format 3 | -- 4 | -- This filter provides comprehensive table processing capabilities: 5 | -- 1. Converts pipe tables to longtable for automatic page breaking 6 | -- 2. Handles long text content with intelligent wrapping and hyphenation 7 | -- 3. Processes inline code within table cells using flexcode command 8 | -- 4. Supports configurable table widths via metadata 9 | -- 5. Provides safe LaTeX character escaping 10 | -- 11 | -- Key enhancements: 12 | -- - Responsive column sizing based on max_table_width parameter 13 | -- - URL and long text detection with seqsplit wrapping 14 | -- - Proper handling of CJK text and technical content 15 | -- - Integration with minted/listings code highlighting 16 | 17 | -- Function to add hyphenation penalties for long words 18 | -- This function helps LaTeX break long words and technical terms appropriately 19 | -- by inserting spaces after certain characters to enable natural line breaking 20 | -- 21 | -- Enhanced for technical documentation with: 22 | -- - Namespace separators (::) 23 | -- - File paths (/) 24 | -- - Variable names (_) 25 | -- - Hyphenated terms (-) 26 | function add_hyphenation_penalties(text) 27 | if not text then return "" end 28 | 29 | -- Don't modify underscores here - let latex_escape handle them properly 30 | return text 31 | end 32 | 33 | -- Function to check if text contains URLs or long sequences that need seqsplit 34 | -- 35 | -- This function identifies content that requires special LaTeX wrapping: 36 | -- 1. URLs (http/https/ftp protocols) 37 | -- 2. Domain names and web addresses 38 | -- 3. Long unbroken sequences (>30 characters) 39 | -- 40 | -- The seqsplit package allows LaTeX to break these sequences at any character 41 | -- when normal hyphenation fails, preventing table overflow 42 | function needs_seqsplit(text) 43 | if not text then return false end 44 | 45 | -- Check for URL patterns 46 | if string.match(text, "https?://") or 47 | string.match(text, "ftp://") or 48 | string.match(text, "www%.") or 49 | string.match(text, "%w+%.%w+%.%w+") or -- domain.subdomain.tld pattern 50 | string.match(text, "%w+%.%w+/") then -- domain.tld/path pattern 51 | return true 52 | end 53 | 54 | -- Check for long sequences without spaces 55 | for word in string.gmatch(text, "%S+") do 56 | if string.len(word) > 30 then -- Threshold for long words 57 | return true 58 | end 59 | end 60 | 61 | return false 62 | end 63 | 64 | -- Function to safely convert cell content to LaTeX while preserving commands 65 | -- and handling inline code specially for table cells 66 | -- 67 | -- This is the core function for table cell processing: 68 | -- 1. Handles different Pandoc AST element types (Str, Code, RawInline, etc.) 69 | -- 2. Applies appropriate LaTeX escaping for special characters 70 | -- 3. Converts inline code to flexcode commands for better table formatting 71 | -- 4. Processes emphasis, strong text, and strikeout formatting 72 | -- 5. Ensures no unescaped line breaks that would break LaTeX compilation 73 | -- 74 | -- Special handling for table context: 75 | -- - Uses flexcode instead of texttt for better code wrapping 76 | -- - Applies seqsplit for URLs and long sequences 77 | -- - Replaces line breaks with spaces to maintain table structure 78 | function cell_to_latex(cell_contents) 79 | if not cell_contents or #cell_contents == 0 then 80 | return "" 81 | end 82 | 83 | local result = {} 84 | 85 | -- Process elements 86 | for i, element in ipairs(cell_contents) do 87 | if element.t == "Str" then 88 | -- Regular text - escape LaTeX special characters and handle long text 89 | local escaped_text = latex_escape(element.text) 90 | 91 | -- Add hyphenation penalties for long words 92 | escaped_text = add_hyphenation_penalties(escaped_text) 93 | 94 | -- Wrap in seqsplit if it contains URLs or very long sequences 95 | if needs_seqsplit(element.text) then 96 | result[#result + 1] = "\\seqsplit{" .. escaped_text .. "}" 97 | else 98 | result[#result + 1] = escaped_text 99 | end 100 | 101 | elseif element.t == "Code" then 102 | -- Inline code in table - use flexcode command for better wrapping 103 | local escaped_code = latex_escape(element.text) 104 | result[#result + 1] = "\\flexcode{" .. escaped_code .. "}" 105 | 106 | elseif element.t == "RawInline" and element.format == "latex" then 107 | -- Check if this is a texttt command and replace with flexcode 108 | local latex_text = element.text 109 | if latex_text:match("^\\texttt{.*}$") then 110 | -- Extract content from texttt and use flexcode 111 | local code_content = latex_text:match("^\\texttt{(.*)}$") 112 | if code_content then 113 | result[#result + 1] = "\\flexcode{" .. code_content .. "}" 114 | else 115 | result[#result + 1] = latex_text 116 | end 117 | else 118 | -- Other LaTeX commands - keep as is 119 | result[#result + 1] = latex_text 120 | end 121 | 122 | elseif element.t == "Space" then 123 | result[#result + 1] = " " 124 | 125 | elseif element.t == "SoftBreak" or element.t == "LineBreak" then 126 | -- Replace line breaks with spaces to ensure no unescaped line breaks 127 | result[#result + 1] = " " 128 | 129 | elseif element.t == "Emph" then 130 | -- Handle emphasized text 131 | result[#result + 1] = "\\emph{" .. cell_to_latex(element.content) .. "}" 132 | 133 | elseif element.t == "Strong" then 134 | -- Handle strong text 135 | result[#result + 1] = "\\textbf{" .. cell_to_latex(element.content) .. "}" 136 | 137 | elseif element.t == "Strikeout" then 138 | -- Handle strikeout text 139 | result[#result + 1] = "\\sout{" .. cell_to_latex(element.content) .. "}" 140 | 141 | elseif element.t == "Link" then 142 | -- Handle links - render as LaTeX hyperlink 143 | local link_text = cell_to_latex(element.content) 144 | local link_url = element.target 145 | 146 | -- Escape URL for LaTeX 147 | link_url = string.gsub(link_url, "#", "\\#") 148 | link_url = string.gsub(link_url, "%$", "\\$") 149 | link_url = string.gsub(link_url, "&", "\\&") 150 | link_url = string.gsub(link_url, "%%", "\\%%") 151 | link_url = string.gsub(link_url, "_", "\\_") 152 | 153 | -- Use href command for proper link rendering 154 | result[#result + 1] = "\\href{" .. link_url .. "}{" .. link_text .. "}" 155 | 156 | elseif element.t == "Image" then 157 | -- Handle images - render as LaTeX includegraphics with size constraint for table cells 158 | local image_alt = cell_to_latex(element.content) or "" 159 | local image_src = element.src 160 | 161 | -- Escape image path for LaTeX 162 | image_src = string.gsub(image_src, "#", "\\#") 163 | image_src = string.gsub(image_src, "%$", "\\$") 164 | image_src = string.gsub(image_src, "&", "\\&") 165 | image_src = string.gsub(image_src, "%%", "\\%%") 166 | image_src = string.gsub(image_src, "_", "\\_") 167 | 168 | -- Use a reasonable size constraint for table cell images 169 | result[#result + 1] = "\\includegraphics[width=0.8\\linewidth,height=2cm,keepaspectratio]{" .. image_src .. "}" 170 | 171 | elseif element.t == "Plain" then 172 | -- Plain elements contain other inline elements - recursively process 173 | result[#result + 1] = cell_to_latex(element.content) 174 | 175 | else 176 | -- For other elements, try to stringify them safely 177 | local stringified = pandoc.utils.stringify({element}) 178 | local escaped_text = latex_escape(stringified) 179 | 180 | -- Add hyphenation penalties and seqsplit for long text if needed 181 | escaped_text = add_hyphenation_penalties(escaped_text) 182 | if needs_seqsplit(stringified) then 183 | result[#result + 1] = "\\seqsplit{" .. escaped_text .. "}" 184 | else 185 | result[#result + 1] = escaped_text 186 | end 187 | end 188 | end 189 | 190 | -- Ensure the final result contains no unescaped line breaks 191 | local final_result = table.concat(result) 192 | -- Replace any remaining literal newlines with spaces 193 | final_result = string.gsub(final_result, "\n", " ") 194 | final_result = string.gsub(final_result, "\r", " ") 195 | 196 | return final_result 197 | end 198 | 199 | -- Function to escape LaTeX special characters (backup method) 200 | function latex_escape(text) 201 | if not text then return "" end 202 | text = string.gsub(text, "\\", "\\textbackslash{}") 203 | text = string.gsub(text, "{", "\\{") 204 | text = string.gsub(text, "}", "\\}") 205 | text = string.gsub(text, "%$", "\\$") 206 | text = string.gsub(text, "&", "\\&") 207 | text = string.gsub(text, "%%", "\\%%") 208 | text = string.gsub(text, "#", "\\#") 209 | text = string.gsub(text, "%^", "\\textasciicircum{}") 210 | text = string.gsub(text, "_", "\\_") 211 | text = string.gsub(text, "~", "\\textasciitilde{}") 212 | return text 213 | end 214 | 215 | -- Main table processing function - converts Pandoc tables to LaTeX longtable 216 | -- 217 | -- This function implements comprehensive table processing: 218 | -- 1. Extracts table width configuration from Pandoc metadata 219 | -- 2. Calculates optimal column widths with safety margins 220 | -- 3. Generates LaTeX column specifications with proper alignment 221 | -- 4. Processes header and body content with cell_to_latex 222 | -- 5. Creates longtable environment with page-break support 223 | -- 224 | -- Key features: 225 | -- - Configurable max_table_width (default 0.98 of text width) 226 | -- - Support for left, right, and center column alignment 227 | -- - Automatic header repetition on page breaks (\endfirsthead/\endhead) 228 | -- - Proper spacing calculation accounting for vertical borders 229 | -- - Safety factor to prevent LaTeX dimension errors 230 | function Table(tbl) 231 | -- Get the number of columns from the table 232 | local num_cols = #tbl.colspecs 233 | 234 | -- Extract maximum table width from Pandoc metadata (passed via -V max_table_width) 235 | -- Use more conservative default to prevent margin overflow 236 | local max_table_width = 0.85 -- Conservative default for reliable margin control 237 | if PANDOC_STATE.meta and PANDOC_STATE.meta.max_table_width then 238 | local meta_value = PANDOC_STATE.meta.max_table_width 239 | if type(meta_value) == "table" and meta_value.t == "MetaInlines" then 240 | -- Extract from MetaInlines format 241 | max_table_width = tonumber(pandoc.utils.stringify(meta_value)) or 0.92 242 | elseif type(meta_value) == "number" then 243 | max_table_width = meta_value 244 | elseif type(meta_value) == "string" then 245 | max_table_width = tonumber(meta_value) or 0.92 246 | end 247 | end 248 | 249 | -- Apply additional safety reduction for tables with many columns 250 | -- Tables with more columns need extra conservative width calculation 251 | if num_cols >= 5 then 252 | max_table_width = max_table_width * 0.95 -- 5% additional reduction for wide tables 253 | elseif num_cols >= 4 then 254 | max_table_width = max_table_width * 0.97 -- 3% additional reduction 255 | end 256 | 257 | -- Extract minimum column width from metadata if provided (table-level override) 258 | local min_col_width = nil 259 | if tbl.attr and tbl.attr.attributes and tbl.attr.attributes.min_col_width then 260 | min_col_width = tonumber(tbl.attr.attributes.min_col_width) 261 | end 262 | 263 | -- Compute usable width by subtracting vertical borders (arrayrulewidth) 264 | -- Use max_table_width to limit overall table width, then subtract borders 265 | -- Formula: (max_table_width * textwidth) - arrayrulewidth * (num_cols + 1) 266 | local usable_width_expr = string.format("\\dimexpr%.4f\\textwidth-\\arrayrulewidth*%d\\relax", max_table_width, num_cols + 1) 267 | 268 | -- Calculate width ratio per column based on max_table_width 269 | local base_width_ratio = max_table_width / num_cols 270 | -- Increase safety factor for better margin protection 271 | local safety_factor = 0.92 -- Increased from 0.95 to 0.92 for more conservative calculation 272 | local width_ratio = base_width_ratio * safety_factor 273 | 274 | -- Apply minimum column width override if specified (but respect max_table_width) 275 | if min_col_width then 276 | width_ratio = math.max(width_ratio, math.min(min_col_width, max_table_width / num_cols)) 277 | end 278 | 279 | -- Format the column width as LaTeX dimexpr 280 | local col_width = string.format("\\dimexpr%.4f\\textwidth\\relax", width_ratio) 281 | 282 | -- Create column specification with proper alignment and line breaking support 283 | local column_spec = "|" 284 | for i = 1, num_cols do 285 | local alignment_spec = "" 286 | 287 | -- Check column alignment from colspecs 288 | if tbl.colspecs and tbl.colspecs[i] then 289 | local align = tbl.colspecs[i][1] -- Alignment is first element of colspec 290 | if align == "AlignCenter" then 291 | alignment_spec = ">{\\centering\\arraybackslash}p{" .. col_width .. "}" 292 | elseif align == "AlignRight" then 293 | alignment_spec = ">{\\raggedleft\\arraybackslash}p{" .. col_width .. "}" 294 | else -- AlignLeft or AlignDefault 295 | alignment_spec = ">{\\raggedright\\arraybackslash\\hspace{0pt}}p{" .. col_width .. "}" 296 | end 297 | else 298 | -- Default to left-aligned with line breaking support 299 | alignment_spec = ">{\\raggedright\\arraybackslash\\hspace{0pt}}p{" .. col_width .. "}" 300 | end 301 | 302 | column_spec = column_spec .. alignment_spec .. "|" 303 | end 304 | 305 | -- Create raw LaTeX block for longtable 306 | local latex_content = {} 307 | 308 | -- Start longtable environment 309 | latex_content[#latex_content + 1] = "\\begin{longtable}" .. "{" .. column_spec .. "}" 310 | latex_content[#latex_content + 1] = "\\hline" 311 | 312 | -- Process header if it exists 313 | if tbl.head and tbl.head.rows and #tbl.head.rows > 0 then 314 | for _, row in ipairs(tbl.head.rows) do 315 | local row_content = {} 316 | for j, cell in ipairs(row.cells) do 317 | -- Use safe cell conversion function 318 | local cell_latex = cell_to_latex(cell.contents) 319 | row_content[#row_content + 1] = "\\textbf{" .. cell_latex .. "}" 320 | end 321 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 322 | end 323 | latex_content[#latex_content + 1] = "\\hline" 324 | latex_content[#latex_content + 1] = "\\endfirsthead" 325 | 326 | -- Repeat header on subsequent pages 327 | latex_content[#latex_content + 1] = "\\hline" 328 | for _, row in ipairs(tbl.head.rows) do 329 | local row_content = {} 330 | for j, cell in ipairs(row.cells) do 331 | -- Use safe cell conversion function 332 | local cell_latex = cell_to_latex(cell.contents) 333 | row_content[#row_content + 1] = "\\textbf{" .. cell_latex .. "}" 334 | end 335 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 336 | end 337 | latex_content[#latex_content + 1] = "\\hline" 338 | latex_content[#latex_content + 1] = "\\endhead" 339 | end 340 | 341 | -- Process body rows 342 | if tbl.bodies and #tbl.bodies > 0 then 343 | for _, body in ipairs(tbl.bodies) do 344 | if body.body then 345 | for _, row in ipairs(body.body) do 346 | local row_content = {} 347 | for j, cell in ipairs(row.cells) do 348 | -- Use safe cell conversion function 349 | local cell_latex = cell_to_latex(cell.contents) 350 | row_content[#row_content + 1] = cell_latex 351 | end 352 | latex_content[#latex_content + 1] = table.concat(row_content, " & ") .. " \\\\" 353 | latex_content[#latex_content + 1] = "\\hline" -- Add horizontal line after each row 354 | end 355 | end 356 | end 357 | end 358 | 359 | -- End longtable environment 360 | latex_content[#latex_content + 1] = "\\hline" 361 | latex_content[#latex_content + 1] = "\\end{longtable}" 362 | 363 | -- Return as RawBlock 364 | return pandoc.RawBlock("latex", table.concat(latex_content, "\n")) 365 | end 366 | 367 | -------------------------------------------------------------------------------- /validate_lua_dependencies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Lua Dependencies Validation for PDF Emoji Support 4 | 5 | This module validates Lua filter dependencies and provides comprehensive 6 | error handling for Lua-related issues in the PDF export process. 7 | """ 8 | 9 | import os 10 | import subprocess 11 | import tempfile 12 | import json 13 | from typing import Dict, List, Optional, Tuple 14 | from dataclasses import dataclass 15 | 16 | 17 | @dataclass 18 | class LuaValidationResult: 19 | """Result of Lua dependency validation.""" 20 | valid: bool 21 | errors: List[str] 22 | warnings: List[str] 23 | details: Dict[str, any] 24 | 25 | 26 | class LuaDependencyValidator: 27 | """Validator for Lua filter dependencies and functionality.""" 28 | 29 | def __init__(self, script_dir: str): 30 | self.script_dir = script_dir 31 | self.emoji_filter_path = os.path.join(script_dir, 'filters', 'emoji-passthrough.lua') 32 | 33 | def validate_lua_filter_syntax(self, filter_path: str) -> LuaValidationResult: 34 | """Validate Lua filter syntax using lua command.""" 35 | result = LuaValidationResult( 36 | valid=True, 37 | errors=[], 38 | warnings=[], 39 | details={'filter_path': filter_path} 40 | ) 41 | 42 | if not os.path.exists(filter_path): 43 | result.valid = False 44 | result.errors.append(f"Lua filter not found: {filter_path}") 45 | return result 46 | 47 | try: 48 | # Check if lua command is available 49 | lua_check = subprocess.run(['lua', '-v'], 50 | capture_output=True, text=True, timeout=5) 51 | 52 | if lua_check.returncode != 0: 53 | result.warnings.append("Lua interpreter not available for syntax checking") 54 | return result 55 | 56 | # Test syntax using luac (Lua compiler) which only checks syntax 57 | syntax_check = subprocess.run([ 58 | 'luac', '-p', filter_path 59 | ], capture_output=True, text=True, timeout=10) 60 | 61 | if syntax_check.returncode != 0: 62 | result.valid = False 63 | result.errors.append(f"Lua syntax error in {filter_path}") 64 | result.details['syntax_error'] = syntax_check.stderr 65 | else: 66 | result.details['syntax_valid'] = True 67 | 68 | except subprocess.TimeoutExpired: 69 | result.warnings.append("Lua syntax check timed out") 70 | except FileNotFoundError: 71 | result.warnings.append("Lua interpreter not found - skipping syntax validation") 72 | except Exception as e: 73 | result.warnings.append(f"Lua syntax check failed: {str(e)}") 74 | 75 | return result 76 | 77 | def validate_emoji_filter_functions(self) -> LuaValidationResult: 78 | """Validate that emoji filter contains required functions.""" 79 | result = LuaValidationResult( 80 | valid=True, 81 | errors=[], 82 | warnings=[], 83 | details={'filter_path': self.emoji_filter_path} 84 | ) 85 | 86 | if not os.path.exists(self.emoji_filter_path): 87 | result.valid = False 88 | result.errors.append(f"Emoji filter not found: {self.emoji_filter_path}") 89 | return result 90 | 91 | try: 92 | with open(self.emoji_filter_path, 'r', encoding='utf-8') as f: 93 | content = f.read() 94 | 95 | # Required functions for emoji processing 96 | required_functions = [ 97 | 'process_text', 98 | 'is_emoji', 99 | 'Str', 100 | 'Code', 101 | 'CodeBlock' 102 | ] 103 | 104 | # Required data structures for current emoji filter 105 | required_data = [ 106 | 'emoji_map', 107 | 'fallback_map' 108 | ] 109 | 110 | missing_functions = [] 111 | missing_data = [] 112 | 113 | for func in required_functions: 114 | if f'function {func}' not in content: 115 | missing_functions.append(func) 116 | 117 | for data in required_data: 118 | if data not in content: 119 | missing_data.append(data) 120 | 121 | if missing_functions: 122 | result.valid = False 123 | result.errors.append(f"Missing required functions: {', '.join(missing_functions)}") 124 | 125 | if missing_data: 126 | result.warnings.append(f"Missing data structures: {', '.join(missing_data)}") 127 | 128 | # Check for return statement (filter must return filter table) 129 | if 'return {' not in content and 'return' not in content: 130 | result.warnings.append("Filter may not return proper filter table") 131 | 132 | result.details.update({ 133 | 'file_size': len(content), 134 | 'missing_functions': missing_functions, 135 | 'missing_data': missing_data, 136 | 'has_return': 'return' in content 137 | }) 138 | 139 | except Exception as e: 140 | result.valid = False 141 | result.errors.append(f"Error reading emoji filter: {str(e)}") 142 | 143 | return result 144 | 145 | def test_emoji_filter_with_pandoc(self) -> LuaValidationResult: 146 | """Test emoji filter functionality with Pandoc.""" 147 | result = LuaValidationResult( 148 | valid=True, 149 | errors=[], 150 | warnings=[], 151 | details={'test_type': 'pandoc_integration'} 152 | ) 153 | 154 | if not os.path.exists(self.emoji_filter_path): 155 | result.valid = False 156 | result.errors.append("Emoji filter not found for testing") 157 | return result 158 | 159 | # Test cases with different emoji types 160 | test_cases = [ 161 | { 162 | 'name': 'basic_emoji', 163 | 'input': 'Hello 😀 World!', 164 | 'expected_pattern': r'\\emoji\{' 165 | }, 166 | { 167 | 'name': 'keycap_sequence', 168 | 'input': 'Press 1️⃣ to continue', 169 | 'expected_pattern': r'\\emoji\{1' 170 | }, 171 | { 172 | 'name': 'flag_sequence', 173 | 'input': 'Flag: 🇺🇸', 174 | 'expected_pattern': r'\\emoji\{' 175 | }, 176 | { 177 | 'name': 'skin_tone_modifier', 178 | 'input': 'Wave 👋🏻 hello', 179 | 'expected_pattern': r'\\emoji\{' 180 | } 181 | ] 182 | 183 | try: 184 | with tempfile.TemporaryDirectory() as temp_dir: 185 | test_results = {} 186 | 187 | for test_case in test_cases: 188 | test_result = self._run_single_pandoc_test( 189 | temp_dir, test_case['name'], 190 | test_case['input'], test_case['expected_pattern'] 191 | ) 192 | test_results[test_case['name']] = test_result 193 | 194 | # Analyze results 195 | passed_tests = sum(1 for r in test_results.values() if r['success']) 196 | total_tests = len(test_cases) 197 | 198 | result.details.update({ 199 | 'test_results': test_results, 200 | 'passed_tests': passed_tests, 201 | 'total_tests': total_tests, 202 | 'success_rate': passed_tests / total_tests if total_tests > 0 else 0 203 | }) 204 | 205 | if passed_tests == 0: 206 | result.valid = False 207 | result.errors.append("All emoji filter tests failed") 208 | elif passed_tests < total_tests: 209 | result.warnings.append(f"Some emoji filter tests failed ({passed_tests}/{total_tests} passed)") 210 | 211 | except Exception as e: 212 | result.warnings.append(f"Emoji filter testing failed: {str(e)}") 213 | 214 | return result 215 | 216 | def _run_single_pandoc_test(self, temp_dir: str, test_name: str, 217 | input_text: str, expected_pattern: str) -> Dict: 218 | """Run a single Pandoc test with the emoji filter.""" 219 | import re 220 | 221 | try: 222 | input_file = os.path.join(temp_dir, f'{test_name}.md') 223 | output_file = os.path.join(temp_dir, f'{test_name}.tex') 224 | 225 | with open(input_file, 'w', encoding='utf-8') as f: 226 | f.write(input_text) 227 | 228 | # Run pandoc with emoji filter 229 | cmd = [ 230 | 'pandoc', 231 | input_file, 232 | '-o', output_file, 233 | '--to=latex', 234 | f'--lua-filter={self.emoji_filter_path}' 235 | ] 236 | 237 | result = subprocess.run(cmd, capture_output=True, text=True, timeout=15) 238 | 239 | if result.returncode == 0 and os.path.exists(output_file): 240 | with open(output_file, 'r', encoding='utf-8') as f: 241 | output_content = f.read() 242 | 243 | # Check if expected pattern is found 244 | pattern_found = bool(re.search(expected_pattern, output_content)) 245 | 246 | return { 247 | 'success': pattern_found, 248 | 'output_content': output_content, 249 | 'pattern_found': pattern_found, 250 | 'expected_pattern': expected_pattern 251 | } 252 | else: 253 | return { 254 | 'success': False, 255 | 'error': result.stderr, 256 | 'returncode': result.returncode 257 | } 258 | 259 | except Exception as e: 260 | return { 261 | 'success': False, 262 | 'error': str(e) 263 | } 264 | 265 | def validate_pandoc_lua_support(self) -> LuaValidationResult: 266 | """Validate that Pandoc supports Lua filters.""" 267 | result = LuaValidationResult( 268 | valid=True, 269 | errors=[], 270 | warnings=[], 271 | details={'test_type': 'pandoc_lua_support'} 272 | ) 273 | 274 | try: 275 | # Check Pandoc version and Lua filter support 276 | version_result = subprocess.run(['pandoc', '--version'], 277 | capture_output=True, text=True, timeout=10) 278 | 279 | if version_result.returncode != 0: 280 | result.valid = False 281 | result.errors.append("Pandoc not available") 282 | return result 283 | 284 | version_output = version_result.stdout 285 | result.details['pandoc_version'] = version_output.split('\n')[0] 286 | 287 | # Test basic Lua filter support with a minimal filter 288 | with tempfile.TemporaryDirectory() as temp_dir: 289 | # Create a minimal test filter 290 | test_filter = os.path.join(temp_dir, 'test.lua') 291 | with open(test_filter, 'w') as f: 292 | f.write(''' 293 | function Str(elem) 294 | return elem 295 | end 296 | ''') 297 | 298 | # Create test input 299 | test_input = os.path.join(temp_dir, 'test.md') 300 | test_output = os.path.join(temp_dir, 'test.tex') 301 | 302 | with open(test_input, 'w') as f: 303 | f.write('Test content') 304 | 305 | # Test Lua filter execution 306 | test_cmd = [ 307 | 'pandoc', 308 | test_input, 309 | '-o', test_output, 310 | '--to=latex', 311 | f'--lua-filter={test_filter}' 312 | ] 313 | 314 | test_result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=15) 315 | 316 | if test_result.returncode == 0: 317 | result.details['lua_filter_support'] = True 318 | else: 319 | result.valid = False 320 | result.errors.append("Pandoc Lua filter support not working") 321 | result.details['test_error'] = test_result.stderr 322 | 323 | except subprocess.TimeoutExpired: 324 | result.warnings.append("Pandoc Lua support test timed out") 325 | except FileNotFoundError: 326 | result.valid = False 327 | result.errors.append("Pandoc not found") 328 | except Exception as e: 329 | result.warnings.append(f"Pandoc Lua support test failed: {str(e)}") 330 | 331 | return result 332 | 333 | def run_comprehensive_lua_validation(self) -> LuaValidationResult: 334 | """Run comprehensive Lua dependency validation.""" 335 | overall_result = LuaValidationResult( 336 | valid=True, 337 | errors=[], 338 | warnings=[], 339 | details={'validation_type': 'comprehensive'} 340 | ) 341 | 342 | # Test 1: Pandoc Lua support 343 | pandoc_test = self.validate_pandoc_lua_support() 344 | overall_result.details['pandoc_lua_test'] = pandoc_test.details 345 | 346 | if not pandoc_test.valid: 347 | overall_result.valid = False 348 | overall_result.errors.extend(pandoc_test.errors) 349 | overall_result.warnings.extend(pandoc_test.warnings) 350 | 351 | # Test 2: Emoji filter syntax 352 | syntax_test = self.validate_lua_filter_syntax(self.emoji_filter_path) 353 | overall_result.details['syntax_test'] = syntax_test.details 354 | 355 | if not syntax_test.valid: 356 | overall_result.valid = False 357 | overall_result.errors.extend(syntax_test.errors) 358 | overall_result.warnings.extend(syntax_test.warnings) 359 | 360 | # Test 3: Emoji filter functions 361 | functions_test = self.validate_emoji_filter_functions() 362 | overall_result.details['functions_test'] = functions_test.details 363 | 364 | if not functions_test.valid: 365 | overall_result.valid = False 366 | overall_result.errors.extend(functions_test.errors) 367 | overall_result.warnings.extend(functions_test.warnings) 368 | 369 | # Test 4: Integration test (only if previous tests pass) 370 | if overall_result.valid: 371 | integration_test = self.test_emoji_filter_with_pandoc() 372 | overall_result.details['integration_test'] = integration_test.details 373 | 374 | if not integration_test.valid: 375 | overall_result.warnings.append("Emoji filter integration tests failed") 376 | overall_result.warnings.extend(integration_test.warnings) 377 | else: 378 | overall_result.warnings.extend(integration_test.warnings) 379 | 380 | return overall_result 381 | 382 | 383 | def main(): 384 | """Command-line interface for Lua dependency validation.""" 385 | import argparse 386 | 387 | parser = argparse.ArgumentParser(description='Validate Lua dependencies for emoji support') 388 | parser.add_argument('--script-dir', default='.', 389 | help='Directory containing the emoji filter') 390 | parser.add_argument('--verbose', '-v', action='store_true', 391 | help='Show detailed validation results') 392 | 393 | args = parser.parse_args() 394 | 395 | validator = LuaDependencyValidator(args.script_dir) 396 | result = validator.run_comprehensive_lua_validation() 397 | 398 | print("Lua Dependencies Validation Report") 399 | print("=" * 40) 400 | 401 | if result.valid: 402 | print("✅ All Lua dependencies are valid") 403 | else: 404 | print("❌ Lua dependency validation failed") 405 | 406 | if result.errors: 407 | print(f"\n❌ Errors ({len(result.errors)}):") 408 | for error in result.errors: 409 | print(f" • {error}") 410 | 411 | if result.warnings: 412 | print(f"\n⚠️ Warnings ({len(result.warnings)}):") 413 | for warning in result.warnings: 414 | print(f" • {warning}") 415 | 416 | if args.verbose: 417 | print(f"\n📋 Detailed Results:") 418 | print(json.dumps(result.details, indent=2, default=str)) 419 | 420 | 421 | if __name__ == '__main__': 422 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF Book Exporter 2 | 3 | A comprehensive tool for exporting Hugo book directories to professional PDF files, with enhanced support for multilingual content, emoji rendering, and customizable styling. 4 | 5 | ## ✨ Features 6 | 7 | - 📚 **Hugo Book Structure Support** - Automatically processes `_index.md` and `index.md` files with weight-based ordering 8 | - 🌍 **Multilingual Support** - CJK (Chinese, Japanese, Korean) character rendering with font auto-detection 9 | - 🎨 **Enhanced Cover System** - Dynamic text overlay on cover images with customizable fonts, colors, and positioning 10 | - 💻 **Advanced Code Highlighting** - Syntax highlighting for 20+ programming languages with customizable themes 11 | - 📊 **Smart Table Processing** - Auto-wrapping tables with enhanced formatting and break handling 12 | - 🎉 **Comprehensive Emoji Support** - Unicode emoji rendering with fallback font chains 13 | - 🎨 **Customizable Color Palettes** - Full typography and styling customization 14 | - ⚡ **Intelligent Caching** - Image processing cache with automatic invalidation 15 | - 🔧 **Flexible PDF Engines** - XeLaTeX and LuaLaTeX support with automatic engine selection 16 | - 📱 **Cross-Platform** - Works on macOS, Linux, and Windows 17 | 18 | ## 🚀 Quick Start 19 | 20 | ### Prerequisites 21 | 22 | ```bash 23 | # Install Pandoc (required) 24 | # macOS 25 | brew install pandoc 26 | 27 | # Ubuntu/Debian 28 | sudo apt-get install pandoc 29 | 30 | # Windows 31 | # Download from https://pandoc.org/installing.html 32 | 33 | # Install LaTeX distribution (TeX Live recommended) 34 | # macOS 35 | brew install --cask mactex 36 | 37 | # Ubuntu/Debian 38 | sudo apt-get install texlive-full 39 | 40 | # Windows 41 | # Download TeX Live or MiKTeX 42 | ``` 43 | 44 | ### Basic Usage 45 | 46 | ```bash 47 | # Export a Hugo book to PDF 48 | python cli.py /path/to/book/directory -o output.pdf 49 | 50 | # Generate with cover and summary 51 | python cli.py /path/to/book/directory \ 52 | -o professional-book.pdf \ 53 | --generate-summary 54 | 55 | # Enable emoji support with automatic engine optimization 56 | python cli.py /path/to/book/directory \ 57 | -o emoji-book.pdf \ 58 | --emoji 59 | ``` 60 | 61 | ## 📖 Examples 62 | 63 | You can export the built-in example book to PDF with the following command: 64 | 65 | ```bash 66 | python cli.py example -o example.pdf --emoji 67 | ``` 68 | 69 | ![Example Screenshot](example-screenshot.jpg) 70 | 71 | ### Example 1: Basic Book Export 72 | 73 | ```bash 74 | # Directory structure 75 | content/zh/book/my-handbook/ 76 | ├── _index.md # Book metadata and config 77 | ├── chapter1/ 78 | │ └── index.md # Chapter 1 content 79 | ├── chapter2/ 80 | │ └── index.md # Chapter 2 content 81 | └── images/ 82 | └── cover.jpg # Optional cover image 83 | 84 | # Export command 85 | python cli.py content/zh/book/my-handbook \ 86 | -o static/files/my-handbook.pdf \ 87 | --generate-summary 88 | ``` 89 | 90 | ### Example 2: Emoji-Enabled Technical Documentation 91 | 92 | ```bash 93 | # Export with emoji support and diagnostics 94 | python cli.py content/zh/book/tech-guide \ 95 | -o tech-guide-with-emoji.pdf \ 96 | --emoji \ 97 | --diagnostics 98 | 99 | # Example output 100 | ✅ Emoji fonts detected: Apple Color Emoji, Noto Color Emoji 101 | 🎨 Using emoji font: Apple Color Emoji 102 | 🚀 PDF generated successfully at tech-guide-with-emoji.pdf 103 | ``` 104 | 105 | ### Example 3: Custom Template and Appendix 106 | 107 | ```bash 108 | # Use custom template with appendix 109 | python cli.py content/zh/book/research-paper \ 110 | -o research-paper.pdf \ 111 | --template custom-template.tex \ 112 | --appendix bibliography.md 113 | ``` 114 | 115 | ### Example 4: Draft Content and Cache Management 116 | 117 | ```bash 118 | # Include draft content 119 | python cli.py content/zh/book/work-in-progress \ 120 | -o draft-book.pdf \ 121 | --include-drafts 122 | 123 | # Manage cache 124 | python cli.py --cache-info content/zh/book/my-book 125 | python cli.py --clean-cache 30 content/zh/book/my-book 126 | ``` 127 | 128 | ## 🎨 Customizable Palettes and Styling 129 | 130 | ### Color Palette Configuration 131 | 132 | Configure document-wide color schemes in your book's `_index.md`: 133 | 134 | ```yaml 135 | --- 136 | title: "My Professional Book" 137 | book: 138 | # Typography color palette 139 | body_color: "#2C3E50" # Main text color 140 | heading_color: "#34495E" # All heading levels 141 | link_color: "#3498DB" # Hyperlinks and references 142 | code_color: "#E74C3C" # Inline code snippets 143 | quote_color: "#7F8C8D" # Blockquotes and citations 144 | caption_color: "#95A5A6" # Figure and table captions 145 | 146 | # Enhanced cover customization 147 | cover: "professional-cover.jpg" 148 | cover_title_text: "Advanced Data Science" 149 | cover_author_text: "Dr. Jane Smith" 150 | cover_subtitle_text: "A Comprehensive Guide" 151 | 152 | # Cover color scheme 153 | cover_title_color: "#FFFFFF" 154 | cover_author_color: "#ECF0F1" 155 | cover_subtitle_color: "#BDC3C7" 156 | 157 | # Font sizes (points) 158 | cover_title_font_size: 48 159 | cover_author_font_size: 24 160 | cover_subtitle_font_size: 18 161 | 162 | # Layout positioning 163 | cover_title_position: "center" # top, center, bottom 164 | cover_author_position: "bottom" # top, center, bottom 165 | --- 166 | ``` 167 | 168 | ### Predefined Color Themes 169 | 170 | #### Professional Theme 171 | 172 | ```yaml 173 | book: 174 | body_color: "#2C3E50" 175 | heading_color: "#34495E" 176 | link_color: "#3498DB" 177 | code_color: "#E74C3C" 178 | quote_color: "#7F8C8D" 179 | ``` 180 | 181 | #### Academic Theme 182 | 183 | ```yaml 184 | book: 185 | body_color: "#2E3440" 186 | heading_color: "#5E81AC" 187 | link_color: "#88C0D0" 188 | code_color: "#BF616A" 189 | quote_color: "#4C566A" 190 | ``` 191 | 192 | #### Warm Theme 193 | 194 | ```yaml 195 | book: 196 | body_color: "#3E2723" 197 | heading_color: "#5D4037" 198 | link_color: "#FF5722" 199 | code_color: "#D84315" 200 | quote_color: "#6D4C41" 201 | ``` 202 | 203 | ### Advanced Cover Customization 204 | 205 | ```yaml 206 | book: 207 | # Visual effects 208 | cover_overlay_enabled: true 209 | cover_text_shadow: true 210 | cover_background_overlay: true 211 | cover_overlay_opacity: 0.6 212 | 213 | # Advanced positioning 214 | cover_title_position: "center" 215 | cover_author_position: "bottom" 216 | 217 | # Custom export date format 218 | cover_export_date: "2024 年 1 月" 219 | ``` 220 | 221 | ### Code Block Styling 222 | 223 | The tool automatically applies syntax highlighting with customizable color schemes: 224 | 225 | ```markdown 226 | ```python 227 | def hello_world(): 228 | print("Hello, World! 🌍") 229 | return {"status": "success", "emoji": "✅"} 230 | ``` 231 | 232 | # SQL example with automatic highlighting 233 | 234 | ```sql 235 | SELECT user_name, COUNT(*) as total_orders 236 | FROM orders 237 | WHERE created_date >= '2024-01-01' 238 | GROUP BY user_name; 239 | ``` 240 | 241 | ``` 242 | 243 | Supported languages: Python, JavaScript, Go, Rust, Java, C++, SQL, YAML, JSON, Bash, HTML, CSS, and more. 244 | 245 | ## 🔧 API Reference 246 | 247 | ### Core Functions 248 | 249 | #### `build_pdf(book_dir, root_node, output_pdf, metadata, **options)` 250 | 251 | Main PDF generation function. 252 | 253 | **Parameters:** 254 | - `book_dir` (str): Path to Hugo book directory 255 | - `root_node` (Node): Parsed book structure tree 256 | - `output_pdf` (str): Output PDF file path 257 | - `metadata` (dict): Book configuration and metadata 258 | - `template_path` (str, optional): Custom LaTeX template path 259 | - `appendix_path` (str, optional): Additional content to append 260 | - `emoji` (bool): Enable comprehensive emoji support 261 | 262 | **Example:** 263 | ```python 264 | from export_book_pdf import build_pdf, build_tree, load_config 265 | 266 | # Parse book structure 267 | root_node = build_tree("content/zh/book/my-book") 268 | config = load_config("content/zh/book/my-book") 269 | 270 | # Generate PDF 271 | build_pdf( 272 | book_dir="content/zh/book/my-book", 273 | root_node=root_node, 274 | output_pdf="output.pdf", 275 | metadata=config, 276 | emoji=True 277 | ) 278 | ``` 279 | 280 | ## 🛠️ CLI Reference 281 | 282 | ### Basic Commands 283 | 284 | ```bash 285 | # Core export command 286 | python cli.py [OPTIONS] 287 | ``` 288 | 289 | ### Command Line Options 290 | 291 | | Flag | Description | Example | 292 | |------|-------------|---------| 293 | | `-o, --output` | Output PDF file path | `-o my-book.pdf` | 294 | | `--generate-summary` | Create GitBook-style summary.md | `--generate-summary` | 295 | | `--template` | Custom LaTeX template path (XeLaTeX only) | `--template custom.tex` | 296 | | `--appendix` | Append additional content | `--appendix refs.md` | 297 | | `--emoji` | Enable emoji support with automatic engine selection | `--emoji` | 298 | | `--include-drafts` | Include draft content | `--include-drafts` | 299 | | `--diagnostics` | Run system compatibility diagnostics | `--diagnostics` | 300 | | `--clean-cache` | Clean cache files (optional: days) | `--clean-cache 30` | 301 | | `--cache-info` | Display cache information | `--cache-info` | 302 | | `--generate-troubleshooting-guide` | Generate troubleshooting guide | `--generate-troubleshooting-guide` | 303 | | `--max-table-width` | Maximum table width as fraction of text width | `--max-table-width 0.95` | 304 | 305 | ### Advanced Usage Examples 306 | 307 | #### Comprehensive Diagnostics 308 | 309 | ```bash 310 | # Run full system diagnostics 311 | python cli.py --diagnostics 312 | # Output: 313 | # 🔍 System validation: ✅ PASSED 314 | # ✅ LuaLaTeX Engine 315 | # ✅ Emoji Fonts (Apple Color Emoji) 316 | # ✅ Pandoc Available 317 | # ✅ Required LaTeX Packages 318 | ``` 319 | 320 | #### Cache Management 321 | 322 | ```bash 323 | # View cache statistics 324 | python cli.py --cache-info content/zh/book/handbook 325 | # Output: 326 | # Cache directory: /path/to/cache 327 | # Cache files: 15 328 | # Total cache size: 12.34MB 329 | 330 | # Clean old cache files 331 | python cli.py --clean-cache 30 content/zh/book/handbook 332 | # Output: Cleaned 8 cache files older than 30 days. 333 | ``` 334 | 335 | #### Draft Content Processing 336 | 337 | ```bash 338 | # Include draft chapters (with draft: true in front matter) 339 | python cli.py content/zh/book/work-in-progress \ 340 | -o draft-version.pdf \ 341 | --include-drafts 342 | ``` 343 | 344 | ## 📄 Workflow Integration 345 | 346 | ### Hugo Integration 347 | 348 | The tool seamlessly integrates with Hugo book themes: 349 | 350 | ```yaml 351 | # hugo.yaml or config.yaml 352 | params: 353 | book: 354 | pdf_export: true 355 | pdf_output_dir: "static/files" 356 | ``` 357 | 358 | ### Automated Build Integration 359 | 360 | #### GitHub Actions 361 | 362 | ```yaml 363 | name: Generate PDF Books 364 | on: 365 | push: 366 | paths: ['content/zh/book/**'] 367 | 368 | jobs: 369 | pdf-export: 370 | runs-on: ubuntu-latest 371 | steps: 372 | - uses: actions/checkout@v3 373 | 374 | - name: Install LaTeX 375 | run: sudo apt-get install texlive-full 376 | 377 | - name: Install Pandoc 378 | run: sudo apt-get install pandoc 379 | 380 | - name: Generate PDF 381 | run: | 382 | python tools/pdf-book-exporter/cli.py \ 383 | content/zh/book/my-handbook \ 384 | -o static/files/my-handbook.pdf \ 385 | --emoji --generate-summary 386 | 387 | - name: Upload PDF 388 | uses: actions/upload-artifact@v3 389 | with: 390 | name: generated-pdfs 391 | path: static/files/*.pdf 392 | ``` 393 | 394 | ## ⚠️ Known Limitations 395 | 396 | ### Current Limitations 397 | 398 | - **Template Compatibility**: Custom templates (`--template`) only work with XeLaTeX engine 399 | - **Emoji Font Dependencies**: Emoji support requires system-installed emoji fonts (Apple Color Emoji, Noto Color Emoji, etc.) 400 | - **LaTeX Engine Switching**: The tool automatically selects between XeLaTeX and LuaLaTeX based on emoji requirements 401 | - **Table Width Processing**: Very wide tables may require manual adjustment of `--max-table-width` parameter 402 | - **Image Format Support**: WebP images are automatically converted to PNG, which may increase processing time 403 | - **Cache Dependencies**: Image processing cache is tied to file modification times and may require manual clearing after system changes 404 | 405 | ### Performance Considerations 406 | 407 | - **Large Books**: Processing time increases significantly for books with 200+ pages 408 | - **Image-Heavy Content**: Books with many images may require substantial cache space 409 | - **Font Loading**: First-time emoji font detection adds ~2-3 seconds to processing time 410 | - **Memory Usage**: Large tables and complex formatting may require 2GB+ RAM 411 | 412 | ### Compatibility Notes 413 | 414 | - **Operating Systems**: Full emoji support requires macOS 10.12+, Ubuntu 18.04+, or Windows 10+ 415 | - **LaTeX Distributions**: TeX Live 2020+ recommended for best compatibility 416 | - **Pandoc Version**: Requires Pandoc 2.14+ for optimal table processing 417 | 418 | ## 📋 Content Guidelines 419 | 420 | ### Book Structure 421 | 422 | ``` 423 | content/zh/book/my-handbook/ 424 | ├── _index.md # Required: Book metadata 425 | ├── chapter1-introduction/ 426 | │ └── index.md # Chapter content 427 | ├── chapter2-basics/ 428 | │ └── index.md 429 | ├── chapter3-advanced/ 430 | │ ├── index.md 431 | │ └── images/ # Chapter-specific images 432 | │ └── diagram.png 433 | ├── images/ # Book-level images 434 | │ ├── cover.jpg # Optional: Book cover 435 | │ └── backcover.jpg # Optional: Back cover 436 | └── SUMMARY.md # Generated automatically 437 | ``` 438 | 439 | ### Front Matter Configuration 440 | 441 | #### Book-level (`_index.md`) 442 | 443 | ```yaml 444 | --- 445 | title: "Complete Programming Guide" 446 | weight: 1 447 | book: 448 | title: "Complete Programming Guide" 449 | author: "Jane Developer" 450 | date: "2024-01-15" 451 | description: "A comprehensive guide to modern programming" 452 | language: "zh-hans" 453 | 454 | # PDF-specific settings 455 | cover: "cover.jpg" 456 | appendix: true 457 | 458 | # Back-cover configuration (optional) 459 | backcover_image: "back_qr.png" # relative path, any raster img 460 | backcover_text: | 461 | **扫码关注公众号** 462 | https://my.site/book 463 | 464 | # Color customization 465 | body_color: "#2C3E50" 466 | heading_color: "#34495E" 467 | link_color: "#3498DB" 468 | 469 | # Cover customization 470 | cover_title_text: "完整编程指南" 471 | cover_author_text: "张三" 472 | cover_title_color: "#FFFFFF" 473 | cover_author_color: "#ECF0F1" 474 | --- 475 | 476 | # Introduction 477 | 478 | This book covers comprehensive programming concepts... 479 | ``` 480 | 481 | #### Chapter-level (`index.md`) 482 | 483 | ```yaml 484 | --- 485 | title: "Getting Started" 486 | weight: 10 487 | draft: false # Set to true to exclude from PDF 488 | publish: true # Set to false to exclude from PDF 489 | export_pdf: true # Set to false to exclude from PDF 490 | --- 491 | 492 | # Getting Started 493 | 494 | Welcome to the first chapter... 495 | ``` 496 | 497 | ### Content Exclusion 498 | 499 | Control which content appears in PDF exports: 500 | 501 | ```yaml 502 | --- 503 | title: "Work in Progress Chapter" 504 | weight: 99 505 | draft: true # Excluded unless --include-drafts 506 | publish: false # Always excluded 507 | export_pdf: false # Excluded from PDF only 508 | pdf: false # Alternative to export_pdf 509 | --- 510 | ``` 511 | 512 | ## 🎯 Advanced Features 513 | 514 | ### Multi-Language Support 515 | 516 | ```yaml 517 | book: 518 | language: "zh-hans" # Chinese Simplified 519 | # Automatic font selection: 520 | # - Source Han Sans SC (preferred) 521 | # - Noto Sans CJK SC 522 | # - PingFang SC (macOS) 523 | ``` 524 | 525 | ### Emoji Rendering 526 | 527 | ```bash 528 | # Enable comprehensive emoji support 529 | python cli.py content/book/emoji-guide \ 530 | -o emoji-guide.pdf \ 531 | --emoji 532 | 533 | # The tool automatically: 534 | # ✅ Detects system emoji fonts 535 | # 🎨 Configures optimal rendering engine 536 | # 🚀 Provides fallback options 537 | ``` 538 | 539 | ### Image Processing 540 | 541 | The tool automatically handles: 542 | 543 | - **Format conversion**: WebP → PNG, SVG → PNG 544 | - **Remote images**: Downloads and caches URLs 545 | - **Smart caching**: Avoids reprocessing unchanged images 546 | - **Size optimization**: Maintains quality while reducing file size 547 | 548 | ### Table Enhancement 549 | 550 | Advanced table processing includes: 551 | 552 | - **Auto-wrapping**: Long content automatically wraps 553 | - **Responsive sizing**: Tables adapt to page width 554 | - **Break handling**: Smart page breaks for long tables 555 | - **Styling**: Professional borders and spacing 556 | 557 | ## 🐛 Troubleshooting 558 | 559 | ### Common Issues 560 | 561 | #### 1. LaTeX Engine Not Found 562 | 563 | ```bash 564 | # Check if LaTeX is installed 565 | xelatex --version 566 | lualatex --version 567 | 568 | # Install TeX Live (recommended) 569 | # macOS: brew install --cask mactex 570 | # Ubuntu: sudo apt-get install texlive-full 571 | ``` 572 | 573 | #### 2. Emoji Not Rendering 574 | 575 | ```bash 576 | # Run diagnostics to identify issues 577 | python cli.py --diagnostics 578 | 579 | # Install emoji fonts if needed 580 | # macOS: Already included (Apple Color Emoji) 581 | # Ubuntu: sudo apt-get install fonts-noto-color-emoji 582 | # Windows: Available in Windows 10+ 583 | ``` 584 | 585 | #### 3. Chinese Characters Not Displaying 586 | 587 | ```bash 588 | # Install CJK fonts 589 | # macOS: brew install font-source-han-sans 590 | # Ubuntu: sudo apt-get install fonts-noto-cjk 591 | ``` 592 | 593 | #### 4. Memory Issues with Large Books 594 | 595 | ```bash 596 | # Process in smaller chunks or increase system memory 597 | # Use cache to avoid reprocessing images 598 | python cli.py --clean-cache 0 # Clear cache if needed 599 | ``` 600 | 601 | ### Getting Help 602 | 603 | 1. **Run diagnostics**: `python cli.py --diagnostics` 604 | 2. **Generate troubleshooting guide**: `--generate-troubleshooting-guide` 605 | 3. **Check logs**: Enable verbose output in the script 606 | 4. **Community support**: Create an issue with diagnostic output 607 | 608 | ## 📊 Performance and Statistics 609 | 610 | ### Example Performance Metrics 611 | 612 | ```bash 613 | # Typical processing times 614 | Small book (5 chapters, 20 pages): ~15 seconds 615 | Medium book (15 chapters, 100 pages): ~45 seconds 616 | Large book (30 chapters, 300 pages): ~2 minutes 617 | 618 | # With caching enabled: 619 | Subsequent runs: ~5-10 seconds (cache hit rate: 85%+) 620 | ``` 621 | 622 | ### Cache Management 623 | 624 | ```bash 625 | # Monitor cache usage 626 | python cli.py --cache-info content/book/handbook 627 | # Output: 628 | # Cache directory: /path/to/cache 629 | # Cache files: 25 630 | # image1_a1b2c3d4.png: 125.3KB, 2.5 days old 631 | # image2_e5f6g7h8.png: 89.7KB, 1.2 days old 632 | # Total cache size: 15.67MB 633 | 634 | # Clean old cache files 635 | python cli.py --clean-cache 7 # Remove files older than 7 days 636 | ``` 637 | 638 | ## 🤝 Contributing 639 | 640 | Contributions are welcome! Please feel free to submit issues, feature requests, or pull requests. 641 | 642 | ### Development Setup 643 | 644 | ```bash 645 | # Clone the repository 646 | git clone https://github.com/rootsongjc/pdf-book-exporter.git 647 | cd pdf-book-exporter 648 | 649 | # Install dependencies 650 | ./install_pdf_dependencies.sh 651 | ``` 652 | 653 | --- 654 | 655 | **Professional PDF generation for Hugo books with comprehensive multilingual and emoji support.** 656 | -------------------------------------------------------------------------------- /image_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shutil 4 | import hashlib 5 | import subprocess 6 | from pathlib import Path 7 | import cache_utils 8 | 9 | def latex_escape(s): 10 | return s.replace('\\', '/').replace('_', '\\_').replace('#', '\\#').replace('%', '\\%').replace('&', '\\&').replace(' ', '\\ ') 11 | 12 | def download_image(url, output_path): 13 | import urllib.request 14 | try: 15 | req = urllib.request.Request( 16 | url, 17 | headers={ 18 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' 19 | } 20 | ) 21 | with urllib.request.urlopen(req) as response: 22 | with open(output_path, 'wb') as f: 23 | f.write(response.read()) 24 | print(f"Downloaded {url} to {output_path}") 25 | return True 26 | except Exception as e: 27 | print(f"Failed to download image {url}: {e}") 28 | return False 29 | 30 | def find_image_file_recursive(book_dir, img_name, current_file_path): 31 | img_name = img_name.split('?')[0].split('#')[0] 32 | current_dir = os.path.dirname(current_file_path) 33 | candidate = os.path.abspath(os.path.join(current_dir, img_name)) 34 | if os.path.exists(candidate): 35 | return candidate 36 | candidates = [ 37 | os.path.join(book_dir, img_name), 38 | os.path.join(book_dir, 'images', img_name), 39 | os.path.join('static', 'images', img_name), 40 | os.path.join('static', 'files', img_name), 41 | ] 42 | for c in candidates: 43 | if os.path.exists(c): 44 | return c 45 | for root, dirs, files in os.walk(book_dir): 46 | if img_name in files: 47 | return os.path.join(root, img_name) 48 | for static_dir in ['static/images', 'static/files']: 49 | for root, dirs, files in os.walk(static_dir): 50 | if img_name in files: 51 | return os.path.join(root, img_name) 52 | return None 53 | 54 | def convert_svg_to_png(svg_path, output_dir, cache_dir=None): 55 | if cache_dir: 56 | import cache_utils 57 | cached_path = cache_utils.get_cached_image(svg_path, cache_dir, '.png') 58 | if cached_path: 59 | output_name = os.path.splitext(os.path.basename(svg_path))[0] + '.png' 60 | output_path = os.path.join(output_dir, output_name) 61 | shutil.copy2(cached_path, output_path) 62 | return output_path 63 | svg2png_script = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../scripts/svg2png.sh')) 64 | if not os.path.exists(svg2png_script): 65 | print(f"svg2png.sh not found at {svg2png_script}") 66 | return None 67 | svg_name = os.path.basename(svg_path) 68 | png_name = svg_name.replace('.svg', '.png') 69 | png_path = os.path.join(output_dir, png_name) 70 | try: 71 | subprocess.run([svg2png_script, svg_path, png_path], check=True) 72 | if os.path.exists(png_path): 73 | print(f"Converted {svg_path} to {png_path} (via svg2png.sh)") 74 | if cache_dir: 75 | cache_utils.save_to_cache(svg_path, png_path, cache_dir) 76 | return png_path 77 | else: 78 | print(f"svg2png.sh did not produce {png_path}") 79 | return None 80 | except Exception as e: 81 | print(f"Error using svg2png.sh: {e}") 82 | return None 83 | 84 | def convert_webp_to_png(webp_path, output_dir, cache_dir=None): 85 | if cache_dir: 86 | import cache_utils 87 | cached_path = cache_utils.get_cached_image(webp_path, cache_dir, '.png') 88 | if cached_path: 89 | output_name = os.path.splitext(os.path.basename(webp_path))[0] + '.png' 90 | output_path = os.path.join(output_dir, output_name) 91 | shutil.copy2(cached_path, output_path) 92 | return output_path 93 | try: 94 | webp_name = os.path.basename(webp_path) 95 | png_name = webp_name.replace('.webp', '.png') 96 | png_path = os.path.join(output_dir, png_name) 97 | cmd = ['magick', webp_path, png_path] 98 | subprocess.run(cmd, check=True, capture_output=True) 99 | print(f"Converted {webp_path} to {png_path}") 100 | if cache_dir: 101 | cache_utils.save_to_cache(webp_path, png_path, cache_dir) 102 | return png_path 103 | except (subprocess.CalledProcessError, FileNotFoundError): 104 | print(f"Warning: Could not convert {webp_path} to PNG. Install ImageMagick.") 105 | return None 106 | 107 | def process_images_in_content(content, book_dir, temp_dir, temp_pngs, current_file_path, cache_dir=None): 108 | os.makedirs(temp_dir, exist_ok=True) 109 | processed_images = {} 110 | 111 | # Only remove mermaid code blocks, not other types 112 | content = re.sub(r'```mermaid[\s\S]*?```', '', content) 113 | 114 | # Track code block boundaries to avoid processing content inside them 115 | def is_inside_code_block(text, position): 116 | before_text = text[:position] 117 | code_blocks = re.finditer(r'```[\w]*', before_text) 118 | count = len(list(code_blocks)) 119 | return count % 2 == 1 # Odd count means we're inside a code block 120 | 121 | def replace_image(match): 122 | # Check if this image is inside a code block 123 | if is_inside_code_block(content, match.start()): 124 | return match.group(0) # Don't process images inside code blocks 125 | 126 | alt_text = match.group(1) 127 | img_path = match.group(2) 128 | if img_path.startswith('http://') or img_path.startswith('https://'): 129 | url_hash = hashlib.md5(img_path.encode()).hexdigest()[:12] 130 | original_filename = os.path.basename(img_path.split('?')[0]) 131 | base_name = os.path.splitext(original_filename)[0] 132 | ext = os.path.splitext(original_filename)[1].lower() 133 | cached_filename = f"{base_name}_{url_hash}.png" 134 | cached_path = os.path.join(cache_dir, cached_filename) if cache_dir else None 135 | metadata = {} 136 | if cache_dir: 137 | import cache_utils 138 | metadata = cache_utils.load_cache_metadata(cache_dir) 139 | if cache_dir and cached_filename in metadata and os.path.exists(cached_path): 140 | print(f"Using cached remote image: {cached_path}") 141 | abs_path = cached_path 142 | else: 143 | temp_download_path = os.path.join(temp_dir, f"download_{url_hash}{ext}") 144 | if download_image(img_path, temp_download_path): 145 | if ext == '.webp': 146 | png_path = convert_webp_to_png(temp_download_path, temp_dir, cache_dir) 147 | if png_path: 148 | shutil.copy2(png_path, cached_path) 149 | if cache_dir: 150 | metadata[cached_filename] = { 151 | 'source_url': img_path, 152 | 'cached_at': __import__('time').time(), 153 | 'cache_path': cached_path 154 | } 155 | cache_utils.save_cache_metadata(cache_dir, metadata) 156 | abs_path = cached_path 157 | else: 158 | print(f"Warning: Failed to convert downloaded WebP to PNG: {img_path}") 159 | return match.group(0) 160 | elif ext == '.svg': 161 | png_path = convert_svg_to_png(temp_download_path, temp_dir, cache_dir) 162 | if png_path: 163 | shutil.copy2(png_path, cached_path) 164 | if cache_dir: 165 | metadata[cached_filename] = { 166 | 'source_url': img_path, 167 | 'cached_at': __import__('time').time(), 168 | 'cache_path': cached_path 169 | } 170 | cache_utils.save_cache_metadata(cache_dir, metadata) 171 | abs_path = cached_path 172 | else: 173 | print(f"Warning: Failed to convert downloaded SVG to PNG: {img_path}") 174 | return match.group(0) 175 | else: 176 | try: 177 | if ext == '.gif': 178 | cmd = ['magick', temp_download_path + '[0]', cached_path] 179 | else: 180 | cmd = ['magick', temp_download_path, cached_path] 181 | subprocess.run(cmd, check=True, capture_output=True) 182 | if cache_dir: 183 | metadata[cached_filename] = { 184 | 'source_url': img_path, 185 | 'cached_at': __import__('time').time(), 186 | 'cache_path': cached_path 187 | } 188 | cache_utils.save_cache_metadata(cache_dir, metadata) 189 | abs_path = cached_path 190 | except (subprocess.CalledProcessError, FileNotFoundError): 191 | print(f"Warning: Could not convert downloaded image {img_path} to PNG") 192 | shutil.copy2(temp_download_path, cached_path) 193 | if cache_dir: 194 | metadata[cached_filename] = { 195 | 'source_url': img_path, 196 | 'cached_at': __import__('time').time(), 197 | 'cache_path': cached_path 198 | } 199 | cache_utils.save_cache_metadata(cache_dir, metadata) 200 | abs_path = cached_path 201 | if os.path.exists(temp_download_path): 202 | os.remove(temp_download_path) 203 | else: 204 | print(f"Warning: Failed to download image: {img_path}") 205 | return f"\n\n" 206 | else: 207 | abs_path = find_image_file_recursive(book_dir, img_path, current_file_path) 208 | if not abs_path: 209 | print(f"Warning: Image not found: {img_path} in {current_file_path}") 210 | return match.group(0) 211 | if abs_path in processed_images: 212 | escaped_path = processed_images[abs_path] 213 | latex = ('\n\\begin{figure}[htbp]\n' + 214 | ' \\centering\n' + 215 | f' \\includegraphics[width=0.8\\textwidth]{{{escaped_path}}}\n' + 216 | f' \\caption{{{alt_text}}}\n' + 217 | '\\end{figure}\n') 218 | return latex 219 | ext = os.path.splitext(abs_path)[1].lower() 220 | target_path = '' 221 | if ext == '.svg': 222 | png_path = convert_svg_to_png(abs_path, temp_dir, cache_dir) 223 | if not png_path or not os.path.exists(png_path): 224 | print(f"Warning: Failed to convert SVG to PNG: {abs_path}") 225 | return match.group(0) 226 | base_name = os.path.splitext(os.path.basename(abs_path))[0] 227 | unique_name = f"{base_name}.png" 228 | target_path = os.path.join(temp_dir, unique_name) 229 | if png_path != target_path: 230 | shutil.copy(png_path, target_path) 231 | temp_pngs.append(target_path) 232 | elif ext == '.webp': 233 | png_path = convert_webp_to_png(abs_path, temp_dir, cache_dir) 234 | if not png_path or not os.path.exists(png_path): 235 | print(f"Warning: Failed to convert WEBP to PNG: {abs_path}") 236 | return match.group(0) 237 | base_name = os.path.splitext(os.path.basename(abs_path))[0] 238 | unique_name = f"{base_name}.png" 239 | target_path = os.path.join(temp_dir, unique_name) 240 | if png_path != target_path: 241 | shutil.copy(png_path, target_path) 242 | temp_pngs.append(target_path) 243 | else: 244 | unique_name = os.path.basename(abs_path) 245 | target_path = os.path.join(temp_dir, unique_name) 246 | shutil.copy(abs_path, target_path) 247 | temp_pngs.append(target_path) 248 | escaped_path = latex_escape(target_path) 249 | processed_images[abs_path] = escaped_path 250 | latex = ('\n\\begin{figure}[htbp]\n' + 251 | ' \\centering\n' + 252 | f' \\includegraphics[width=0.8\\textwidth]{{{escaped_path}}}\n' + 253 | f' \\caption{{{alt_text}}}\n' + 254 | '\\end{figure}\n') 255 | return latex 256 | content = re.sub(r'!\[(.*?)\]\((.*?)\)', replace_image, content) 257 | 258 | # Remove Hugo shortcodes and HTML comments, but avoid processing content inside code blocks 259 | lines = content.split('\n') 260 | processed_lines = [] 261 | inside_code_block = False 262 | 263 | for line in lines: 264 | # Check if we're entering or leaving a code block 265 | if line.strip().startswith('```'): 266 | inside_code_block = not inside_code_block 267 | processed_lines.append(line) 268 | continue 269 | 270 | # If we're inside a code block, preserve the line as-is 271 | if inside_code_block: 272 | processed_lines.append(line) 273 | continue 274 | 275 | # Only apply cleanup outside of code blocks 276 | # Remove Hugo shortcode parameters and HTML comments 277 | if re.match(r'^\s*(\{:[^}]*\}|)\s*$', line): 278 | continue # Skip this line entirely 279 | else: 280 | processed_lines.append(line) 281 | 282 | content = '\n'.join(processed_lines) 283 | 284 | # Remove Hugo template syntax 285 | content = re.sub(r'{{[%<][\s\S]*?[%>]}}', '', content) 286 | return content 287 | 288 | def prepare_cover_for_latex(cover_path, config, temp_dir, cache_dir=None): 289 | """Prepare cover image for LaTeX processing without text overlay.""" 290 | if not cover_path or not os.path.exists(cover_path): 291 | print("No cover image found") 292 | return None 293 | 294 | try: 295 | # Get cover configuration 296 | cover_config = config.get('cover_config', {}) 297 | 298 | # If text overlay is disabled, just return the original image 299 | if not cover_config.get('overlay_enabled', True): 300 | print("Cover text overlay disabled, using original image") 301 | return cover_path 302 | 303 | # For WebP covers, convert to PNG for better LaTeX compatibility 304 | if cover_path.lower().endswith('.webp'): 305 | print(f"Converting WebP cover to PNG for LaTeX: {cover_path}") 306 | try: 307 | from PIL import Image 308 | import uuid 309 | 310 | # Generate cache key for WebP conversion 311 | source_hash = cache_utils.get_file_hash(cover_path)[:12] if cover_path else "default" 312 | cache_key = f"cover_png_{source_hash}" 313 | 314 | # Check cache first 315 | if cache_dir: 316 | cached_cover = _get_cached_image_by_key(cache_key, cache_dir, '.png') 317 | if cached_cover: 318 | print(f"Using cached PNG cover: {cached_cover}") 319 | return cached_cover 320 | 321 | # Convert WebP to PNG 322 | with Image.open(cover_path) as img: 323 | if img.mode != 'RGB': 324 | img = img.convert('RGB') 325 | 326 | png_cover_path = os.path.join(temp_dir, f"cover_{uuid.uuid4().hex[:8]}.png") 327 | img.save(png_cover_path, 'PNG', quality=95) 328 | 329 | # Cache the result 330 | if cache_dir: 331 | _save_to_cache_with_key(cache_key, png_cover_path, cache_dir) 332 | 333 | print(f"Converted cover to PNG: {png_cover_path}") 334 | return png_cover_path 335 | 336 | except ImportError: 337 | print("PIL (Pillow) not available, using original WebP cover") 338 | return cover_path 339 | except Exception as e: 340 | print(f"Error converting WebP cover: {e}") 341 | return cover_path 342 | else: 343 | # For non-WebP images, use directly 344 | print(f"Using original cover image: {cover_path}") 345 | return cover_path 346 | 347 | except Exception as e: 348 | print(f"Error preparing cover: {e}") 349 | return cover_path 350 | 351 | 352 | def _get_cached_image_by_key(cache_key, cache_dir, extension='.png'): 353 | """Get cached image by cache key.""" 354 | try: 355 | metadata = cache_utils.load_cache_metadata(cache_dir) 356 | for filename, info in metadata.items(): 357 | if info.get('cache_key') == cache_key: 358 | cache_path = info.get('cache_path') 359 | if cache_path and os.path.exists(cache_path): 360 | return cache_path 361 | except Exception as e: 362 | print(f"Error checking cache: {e}") 363 | return None 364 | 365 | 366 | def _save_to_cache_with_key(cache_key, file_path, cache_dir): 367 | """Save file to cache with a specific cache key.""" 368 | import time 369 | 370 | try: 371 | cache_filename = f"{cache_key}.png" 372 | cache_path = os.path.join(cache_dir, cache_filename) 373 | 374 | # Copy file to cache 375 | shutil.copy2(file_path, cache_path) 376 | 377 | # Update metadata 378 | metadata = cache_utils.load_cache_metadata(cache_dir) 379 | metadata[cache_filename] = { 380 | 'cache_key': cache_key, 381 | 'cached_at': time.time(), 382 | 'cache_path': cache_path 383 | } 384 | cache_utils.save_cache_metadata(cache_dir, metadata) 385 | 386 | print(f"Cached enhanced image: {cache_path}") 387 | return cache_path 388 | except Exception as e: 389 | print(f"Error saving to cache: {e}") 390 | return None 391 | 392 | 393 | def get_available_fonts(): 394 | """Get available fonts for Chinese text on the system.""" 395 | import subprocess 396 | 397 | try: 398 | # Check for available Chinese fonts 399 | result = subprocess.run(['fc-list', ':', 'family'], 400 | capture_output=True, text=True, check=True) 401 | fonts = result.stdout.split('\n') 402 | 403 | # Priority list of preferred Chinese fonts 404 | preferred_fonts = [ 405 | 'Source Han Sans SC', # Adobe/Google 思源黑体 406 | 'Noto Sans CJK SC', # Google Noto Sans CJK 407 | 'PingFang SC', # macOS 默认中文字体 408 | 'STSong', # macOS 宋体 409 | 'FangSong', # 仿宋 410 | 'Hiragino Mincho Pro', # 日文字体 411 | 'Times New Roman', # Fallback serif font 412 | 'DejaVu Serif' # Universal fallback 413 | ] 414 | 415 | # Find the first available font 416 | for font in preferred_fonts: 417 | if any(font in line for line in fonts): 418 | return font 419 | return 'Source Han Sans SC' 420 | except (subprocess.CalledProcessError, FileNotFoundError): 421 | return 'Source Han Sans SC' 422 | -------------------------------------------------------------------------------- /filters/emoji-passthrough.lua: -------------------------------------------------------------------------------- 1 | -- tools/pdf-book-exporter/filters/emoji-passthrough.lua 2 | -- Advanced emoji processing filter for LaTeX PDF generation 3 | -- 4 | -- This filter provides comprehensive emoji support for LaTeX documents: 5 | -- 1. Detects Unicode emoji characters using strict range checking 6 | -- 2. Handles emoji variation selectors (emoji vs text style) 7 | -- 3. Wraps emojis with appropriate LaTeX font commands 8 | -- 4. Provides fallback text representations for emoji in code contexts 9 | -- 5. Processes inline code and code blocks with emoji-safe replacements 10 | -- 11 | -- Key features: 12 | -- - Precise emoji detection (avoids false positives with punctuation) 13 | -- - Support for composite emoji sequences with variation selectors 14 | -- - Context-aware processing (normal text vs code blocks) 15 | -- - Comprehensive emoji-to-text mapping for accessibility 16 | -- - Integration with LaTeX emoji font commands 17 | 18 | -- Function to check if a character is an emoji 19 | -- 20 | -- Uses strict Unicode range checking to identify genuine emoji characters 21 | -- while avoiding false positives with: 22 | -- - ASCII punctuation and symbols 23 | -- - Mathematical operators 24 | -- - Currency symbols 25 | -- - Arrows and technical symbols 26 | -- 27 | -- Only includes actual emoji Unicode blocks: 28 | -- - Miscellaneous Symbols and Pictographs (1F300-1F5FF) 29 | -- - Emoticons (1F600-1F64F) 30 | -- - Transport and Map Symbols (1F680-1F6FF) 31 | -- - Plus specific common emoji from other ranges 32 | function is_emoji(char) 33 | local code = utf8.codepoint(char) 34 | if not code then return false end 35 | 36 | -- Exclude ASCII range entirely (0x00-0x7F) 37 | if code < 0x80 then 38 | return false -- ASCII range, includes all punctuation, letters, numbers 39 | end 40 | 41 | -- Exclude Latin-1 supplement range (common punctuation) 42 | if code >= 0x80 and code <= 0xFF then 43 | return false 44 | end 45 | 46 | -- Exclude common punctuation ranges that might be misidentified 47 | if (code >= 0x2000 and code <= 0x206F) or -- General Punctuation (includes quotes, dashes) 48 | (code >= 0x20A0 and code <= 0x20CF) or -- Currency Symbols 49 | (code >= 0x2100 and code <= 0x214F and code ~= 0x2139) or -- Letterlike Symbols (except info ℹ) 50 | (code >= 0x2150 and code <= 0x218F) or -- Number Forms 51 | (code >= 0x2190 and code <= 0x21FF) or -- Arrows (might be emoji-like but usually not) 52 | (code >= 0x2200 and code <= 0x22FF) or -- Mathematical Operators 53 | (code >= 0x2300 and code <= 0x23FF and code ~= 0x2328) then -- Miscellaneous Technical (except keyboard) 54 | return false 55 | end 56 | 57 | -- Be very specific about emoji ranges - only include actual emoji blocks 58 | return (code >= 0x1F300 and code <= 0x1F5FF) or -- Symbols & Pictographs 59 | (code >= 0x1F600 and code <= 0x1F64F) or -- Emoticons 60 | (code >= 0x1F680 and code <= 0x1F6FF) or -- Transport & Map 61 | (code >= 0x1F700 and code <= 0x1F77F) or -- Alchemical 62 | (code >= 0x1F780 and code <= 0x1F7FF) or -- Geometric Shapes Extended 63 | (code >= 0x1F800 and code <= 0x1F8FF) or -- Supplemental Arrows-C 64 | (code >= 0x1F900 and code <= 0x1F9FF) or -- Supplemental Symbols 65 | (code >= 0x1FA00 and code <= 0x1FA6F) or -- Chess Symbols 66 | (code >= 0x1FA70 and code <= 0x1FAFF) or -- Symbols and Pictographs Extended-A 67 | (code >= 0x1F000 and code <= 0x1F02F) or -- Mahjong & Dominoes 68 | -- Very specific common emoji symbols from Miscellaneous Symbols 69 | (code == 0x2600) or -- Sun 70 | (code == 0x2601) or -- Cloud 71 | (code == 0x2614) or -- Umbrella 72 | (code == 0x2615) or -- Coffee 73 | (code == 0x26A0) or -- Warning Sign ⚠ 74 | (code == 0x26BD) or -- Soccer Ball 75 | (code == 0x26BE) or -- Baseball 76 | (code == 0x2728) or -- Sparkles ✨ 77 | (code == 0x2764) or -- Heavy Black Heart ❤ 78 | (code == 0x2B50) or -- White Medium Star ⭐ 79 | (code == 0x2139) or -- Information Source ℹ 80 | (code == 0x2328) or -- Keyboard ⌨ 81 | -- Specific technical/UI symbols that are commonly used as emoji 82 | (code == 0x2713) or -- Check Mark ✓ 83 | (code == 0x2717) or -- Cross Mark ✗ 84 | (code == 0x274C) or -- Cross Mark ❌ 85 | (code == 0x2705) -- White Heavy Check Mark ✅ 86 | end 87 | 88 | -- Function to check if a character is a variation selector 89 | -- 90 | -- Variation selectors control whether Unicode characters appear in: 91 | -- - Text style (VS15 - 0xFE0E): Black and white, text-like appearance 92 | -- - Emoji style (VS16 - 0xFE0F): Colorful, pictographic appearance 93 | -- 94 | -- This function detects these selectors to properly group them with 95 | -- their base emoji characters for LaTeX processing 96 | function is_variation_selector(char) 97 | local code = utf8.codepoint(char) 98 | if not code then return false end 99 | 100 | return code == 0xFE0F or -- Variation Selector-16 (emoji style) 101 | code == 0xFE0E or -- Variation Selector-15 (text style) 102 | (code >= 0xE0100 and code <= 0xE01EF) -- Variation Selectors Supplement 103 | end 104 | 105 | -- Process text and wrap emojis, handling composite emojis with variation selectors 106 | -- 107 | -- This function is the core text processor for normal document content: 108 | -- 1. Iterates through text using UTF-8 aware character processing 109 | -- 2. Identifies emoji characters using is_emoji function 110 | -- 3. Looks ahead for variation selectors to build complete emoji sequences 111 | -- 4. Wraps emoji sequences with {\emojifont ...} LaTeX commands 112 | -- 5. Preserves non-emoji text unchanged 113 | -- 114 | -- Special handling: 115 | -- - Properly calculates UTF-8 character boundaries (1-4 bytes) 116 | -- - Groups emojis with their variation selectors 117 | -- - Uses direct font commands for better LaTeX compatibility 118 | function process_text(text) 119 | local result = {} 120 | local i = 1 121 | 122 | while i <= #text do 123 | local char_start = i 124 | local char_end = i 125 | local byte = text:byte(i) 126 | 127 | if byte then 128 | -- Determine UTF-8 character length 129 | if byte < 0x80 then 130 | char_end = i 131 | elseif byte < 0xE0 then 132 | char_end = i + 1 133 | elseif byte < 0xF0 then 134 | char_end = i + 2 135 | else 136 | char_end = i + 3 137 | end 138 | 139 | local char = text:sub(char_start, char_end) 140 | 141 | if is_emoji(char) then 142 | -- Check if next character is a variation selector 143 | local next_i = char_end + 1 144 | local emoji_sequence = char 145 | 146 | -- Look ahead for variation selectors 147 | while next_i <= #text do 148 | local next_byte = text:byte(next_i) 149 | local next_char_end = next_i 150 | 151 | if next_byte then 152 | -- Determine next character length 153 | if next_byte < 0x80 then 154 | next_char_end = next_i 155 | elseif next_byte < 0xE0 then 156 | next_char_end = next_i + 1 157 | elseif next_byte < 0xF0 then 158 | next_char_end = next_i + 2 159 | else 160 | next_char_end = next_i + 3 161 | end 162 | 163 | local next_char = text:sub(next_i, next_char_end) 164 | 165 | if is_variation_selector(next_char) then 166 | -- Include the variation selector in the emoji sequence 167 | emoji_sequence = emoji_sequence .. next_char 168 | next_i = next_char_end + 1 169 | else 170 | break 171 | end 172 | else 173 | break 174 | end 175 | end 176 | 177 | -- Use direct emoji font command for better compatibility 178 | table.insert(result, '{\\emojifont ' .. emoji_sequence .. '}') 179 | i = next_i 180 | else 181 | -- Skip lone variation selectors that weren't processed with an emoji 182 | if not is_variation_selector(char) then 183 | table.insert(result, char) 184 | end 185 | i = char_end + 1 186 | end 187 | else 188 | break 189 | end 190 | end 191 | 192 | return table.concat(result) 193 | end 194 | 195 | -- Convert emoji to text representation 196 | -- 197 | -- Provides accessible fallback text for emojis in contexts where 198 | -- emoji fonts are not available or appropriate (e.g., code blocks). 199 | -- 200 | -- Maps common emojis to: 201 | -- - Descriptive text (e.g., 😀 → ':grin:') 202 | -- - Symbolic representations (e.g., ✅ → '[check]') 203 | -- - Technical abbreviations (e.g., 🔧 → ':wrench:') 204 | -- 205 | -- Covers major emoji categories: 206 | -- - Facial expressions and emotions 207 | -- - UI/UX symbols (check marks, warnings) 208 | -- - Technical and office objects 209 | -- - Nature and geography symbols 210 | function emoji_to_text(emoji) 211 | local emoji_map = { 212 | ['😄'] = ':smile:', 213 | ['😀'] = ':grin:', 214 | ['😃'] = ':happy:', 215 | ['😁'] = ':beam:', 216 | ['😆'] = ':laugh:', 217 | ['😅'] = ':sweat:', 218 | ['😂'] = ':joy:', 219 | ['🤣'] = ':rofl:', 220 | ['✅'] = '[check]', 221 | ['❌'] = '[x]', 222 | ['⚠'] = '[warning]', 223 | ['ℹ'] = '[info]', 224 | ['🎉'] = ':party:', 225 | ['💥'] = ':boom:', 226 | ['📝'] = ':note:', 227 | ['👍'] = ':+1:', 228 | ['👎'] = ':-1:', 229 | ['👌'] = ':ok:', 230 | ['🤝'] = ':handshake:', 231 | ['👏'] = ':clap:', 232 | ['🙏'] = ':pray:', 233 | ['💪'] = ':muscle:', 234 | ['✊'] = ':fist:', 235 | ['🔥'] = ':fire:', 236 | ['💡'] = ':bulb:', 237 | ['🚀'] = ':rocket:', 238 | ['⭐'] = ':star:', 239 | ['💯'] = ':100:', 240 | ['🎯'] = ':target:', 241 | ['📊'] = ':chart:', 242 | ['📈'] = ':chart_up:', 243 | ['📉'] = ':chart_down:', 244 | ['🔧'] = ':wrench:', 245 | ['⚙'] = ':gear:', 246 | ['🛠'] = ':tools:', 247 | ['🔍'] = ':search:', 248 | ['📱'] = ':phone:', 249 | ['💻'] = ':computer:', 250 | ['🖥'] = ':desktop:', 251 | ['⌨'] = ':keyboard:', 252 | ['🖱'] = ':mouse:', 253 | ['🖨'] = ':printer:', 254 | ['📷'] = ':camera:', 255 | ['🎥'] = ':video:', 256 | ['🎵'] = ':music:', 257 | ['🎶'] = ':notes:', 258 | ['📚'] = ':books:', 259 | ['📖'] = ':book:', 260 | ['📝'] = ':memo:', 261 | ['✏'] = ':pencil:', 262 | ['🖊'] = ':pen:', 263 | ['📌'] = ':pin:', 264 | ['📎'] = ':paperclip:', 265 | ['🔗'] = ':link:', 266 | ['📧'] = ':email:', 267 | ['📨'] = ':inbox:', 268 | ['📩'] = ':outbox:', 269 | ['📤'] = ':outbox_tray:', 270 | ['📥'] = ':inbox_tray:', 271 | ['📦'] = ':package:', 272 | ['🏷'] = ':label:', 273 | ['🔖'] = ':bookmark:', 274 | ['📋'] = ':clipboard:', 275 | ['📄'] = ':page:', 276 | ['📃'] = ':document:', 277 | ['📑'] = ':pages:', 278 | ['📊'] = ':chart:', 279 | ['📈'] = ':trending_up:', 280 | ['📉'] = ':trending_down:', 281 | ['🗂'] = ':folder:', 282 | ['📁'] = ':folder_open:', 283 | ['📂'] = ':folder_closed:', 284 | ['🗃'] = ':file_cabinet:', 285 | ['🗄'] = ':filing_cabinet:', 286 | ['🗑'] = ':trash:', 287 | ['🔒'] = ':lock:', 288 | ['🔓'] = ':unlock:', 289 | ['🔐'] = ':locked:', 290 | ['🔑'] = ':key:', 291 | ['🗝'] = ':old_key:', 292 | ['🔨'] = ':hammer:', 293 | ['⚒'] = ':hammer_pick:', 294 | ['🛠'] = ':tools:', 295 | ['⚙'] = ':gear:', 296 | ['🔧'] = ':wrench:', 297 | ['🔩'] = ':nut_and_bolt:', 298 | ['⚡'] = ':zap:', 299 | ['🔋'] = ':battery:', 300 | ['🔌'] = ':plug:', 301 | ['💡'] = ':bulb:', 302 | ['🔦'] = ':flashlight:', 303 | ['🕯'] = ':candle:', 304 | ['🪔'] = ':lamp:', 305 | ['🔥'] = ':fire:', 306 | ['💧'] = ':droplet:', 307 | ['🌊'] = ':ocean:', 308 | ['❄'] = ':snowflake:', 309 | ['☀'] = ':sun:', 310 | ['🌙'] = ':moon:', 311 | ['⭐'] = ':star:', 312 | ['🌟'] = ':star2:', 313 | ['✨'] = ':sparkles:', 314 | ['⚡'] = ':zap:', 315 | ['☁'] = ':cloud:', 316 | ['🌈'] = ':rainbow:', 317 | ['🌍'] = ':earth_africa:', 318 | ['🌎'] = ':earth_americas:', 319 | ['🌏'] = ':earth_asia:', 320 | ['🌐'] = ':globe:', 321 | ['🗺'] = ':world_map:', 322 | ['🧭'] = ':compass:', 323 | ['🏔'] = ':mountain:', 324 | ['⛰'] = ':mountain_peak:', 325 | ['🌋'] = ':volcano:', 326 | ['🗻'] = ':mount_fuji:', 327 | ['🏕'] = ':camping:', 328 | ['🏖'] = ':beach:', 329 | ['🏜'] = ':desert:', 330 | ['🏝'] = ':island:', 331 | ['🏞'] = ':park:', 332 | ['🏟'] = ':stadium:', 333 | ['🏛'] = ':classical_building:', 334 | ['🏗'] = ':construction:', 335 | ['🧱'] = ':brick:', 336 | ['🏘'] = ':houses:', 337 | ['🏚'] = ':house_abandoned:', 338 | ['🏠'] = ':house:', 339 | ['🏡'] = ':house_garden:', 340 | ['🏢'] = ':office:', 341 | ['🏣'] = ':post_office:', 342 | ['🏤'] = ':european_post_office:', 343 | ['🏥'] = ':hospital:', 344 | ['🏦'] = ':bank:', 345 | ['🏨'] = ':hotel:', 346 | ['🏩'] = ':love_hotel:', 347 | ['🏪'] = ':convenience_store:', 348 | ['🏫'] = ':school:', 349 | ['🏬'] = ':department_store:', 350 | ['🏭'] = ':factory:', 351 | ['🏯'] = ':japanese_castle:', 352 | ['🏰'] = ':european_castle:', 353 | ['💒'] = ':wedding:', 354 | ['🗼'] = ':tokyo_tower:', 355 | ['🗽'] = ':statue_of_liberty:', 356 | ['⛪'] = ':church:', 357 | ['🕌'] = ':mosque:', 358 | ['🛕'] = ':hindu_temple:', 359 | ['🕍'] = ':synagogue:', 360 | ['⛩'] = ':shinto_shrine:', 361 | ['🕋'] = ':kaaba:' 362 | } 363 | 364 | return emoji_map[emoji] or ':emoji:' 365 | end 366 | 367 | -- Process text for code blocks - use a special marker that can be processed by LaTeX 368 | function process_text_for_code(text) 369 | local result = {} 370 | local i = 1 371 | 372 | while i <= #text do 373 | local char_start = i 374 | local char_end = i 375 | local byte = text:byte(i) 376 | 377 | if byte then 378 | -- Determine UTF-8 character length 379 | if byte < 0x80 then 380 | char_end = i 381 | elseif byte < 0xE0 then 382 | char_end = i + 1 383 | elseif byte < 0xF0 then 384 | char_end = i + 2 385 | else 386 | char_end = i + 3 387 | end 388 | 389 | local char = text:sub(char_start, char_end) 390 | 391 | if is_emoji(char) then 392 | -- Check if next character is a variation selector 393 | local next_i = char_end + 1 394 | local emoji_sequence = char 395 | 396 | -- Look ahead for variation selectors 397 | while next_i <= #text do 398 | local next_byte = text:byte(next_i) 399 | local next_char_end = next_i 400 | 401 | if next_byte then 402 | -- Determine next character length 403 | if next_byte < 0x80 then 404 | next_char_end = next_i 405 | elseif next_byte < 0xE0 then 406 | next_char_end = next_i + 1 407 | elseif next_byte < 0xF0 then 408 | next_char_end = next_i + 2 409 | else 410 | next_char_end = next_i + 3 411 | end 412 | 413 | local next_char = text:sub(next_i, next_char_end) 414 | 415 | if is_variation_selector(next_char) then 416 | -- Include the variation selector in the emoji sequence 417 | emoji_sequence = emoji_sequence .. next_char 418 | next_i = next_char_end + 1 419 | else 420 | break 421 | end 422 | else 423 | break 424 | end 425 | end 426 | 427 | -- Use a special marker for code blocks that lstlisting can handle 428 | table.insert(result, '(*@\\emoji{' .. emoji_sequence .. '}@*)') 429 | i = next_i 430 | else 431 | -- Skip lone variation selectors that weren't processed with an emoji 432 | if not is_variation_selector(char) then 433 | table.insert(result, char) 434 | end 435 | i = char_end + 1 436 | end 437 | else 438 | break 439 | end 440 | end 441 | 442 | return table.concat(result) 443 | end 444 | 445 | -- Main filter function for processing Str (string) elements 446 | -- 447 | -- This is called by Pandoc for every string element in the document. 448 | -- Only processes LaTeX output format to avoid interfering with other formats. 449 | -- 450 | -- Process: 451 | -- 1. Check if we're generating LaTeX output 452 | -- 2. Process the text for emoji characters 453 | -- 3. If emojis were found and wrapped, return as RawInline LaTeX 454 | -- 4. Otherwise return original element unchanged 455 | function Str(elem) 456 | if not FORMAT:match 'latex' then 457 | return elem 458 | end 459 | 460 | local processed = process_text(elem.text) 461 | if processed ~= elem.text then 462 | return pandoc.RawInline('latex', processed) 463 | end 464 | return elem 465 | end 466 | 467 | -- Process inline code elements with emoji-to-text conversion 468 | -- 469 | -- Inline code requires special handling because: 470 | -- 1. Emoji fonts may not work properly in monospace/code contexts 471 | -- 2. Code should remain readable in all output formats 472 | -- 3. Emojis in code often serve as UI indicators or comments 473 | -- 474 | -- Process: 475 | -- 1. Scan code text for emoji characters 476 | -- 2. Replace emojis with descriptive text representations 477 | -- 3. Return as RawInline LaTeX with \texttt formatting if changes made 478 | -- 4. Preserve all other characters including CJK text and HTML tags 479 | function Code(elem) 480 | -- Process inline code with safe character replacements 481 | if not FORMAT:match 'latex' then 482 | return elem 483 | end 484 | 485 | local text = elem.text 486 | local result = {} 487 | local i = 1 488 | local changed = false 489 | 490 | -- Process each character (same logic as CodeBlock) 491 | while i <= #text do 492 | local char_start = i 493 | local char_end = i 494 | local byte = text:byte(i) 495 | 496 | if byte then 497 | -- Determine UTF-8 character length 498 | if byte < 0x80 then 499 | char_end = i 500 | elseif byte < 0xE0 then 501 | char_end = i + 1 502 | elseif byte < 0xF0 then 503 | char_end = i + 2 504 | else 505 | char_end = i + 3 506 | end 507 | 508 | local char = text:sub(char_start, char_end) 509 | 510 | -- Handle specific characters 511 | if is_emoji(char) then 512 | -- Convert emoji to text representation 513 | local emoji_text = emoji_to_text(char) 514 | table.insert(result, emoji_text) 515 | changed = true 516 | else 517 | -- Keep all other characters as-is (including Chinese characters and HTML tags) 518 | table.insert(result, char) 519 | end 520 | 521 | i = char_end + 1 522 | else 523 | break 524 | end 525 | end 526 | 527 | if changed then 528 | local processed_text = table.concat(result) 529 | return pandoc.RawInline('latex', '\\texttt{' .. processed_text .. '}') 530 | end 531 | return elem 532 | end 533 | 534 | -- Process code blocks with emoji-to-text conversion 535 | -- 536 | -- Code blocks require emoji replacement because: 537 | -- 1. LaTeX listings/minted packages may not handle emoji fonts properly 538 | -- 2. Code blocks should maintain consistent monospace appearance 539 | -- 3. Emojis in code are usually semantic indicators 540 | -- 541 | -- Process: 542 | -- 1. Scan entire code block content character by character 543 | -- 2. Replace emoji characters with text equivalents 544 | -- 3. Preserve syntax highlighting compatibility 545 | -- 4. Maintain original code block attributes (language, etc.) 546 | function CodeBlock(elem) 547 | -- For code blocks, replace problematic characters with safe alternatives 548 | if not FORMAT:match 'latex' then 549 | return elem 550 | end 551 | 552 | local text = elem.text 553 | local result = {} 554 | local i = 1 555 | local changed = false 556 | 557 | -- Process each character 558 | while i <= #text do 559 | local char_start = i 560 | local char_end = i 561 | local byte = text:byte(i) 562 | 563 | if byte then 564 | -- Determine UTF-8 character length 565 | if byte < 0x80 then 566 | char_end = i 567 | elseif byte < 0xE0 then 568 | char_end = i + 1 569 | elseif byte < 0xF0 then 570 | char_end = i + 2 571 | else 572 | char_end = i + 3 573 | end 574 | 575 | local char = text:sub(char_start, char_end) 576 | 577 | -- Handle specific characters 578 | if is_emoji(char) then 579 | -- Convert emoji to text representation 580 | local emoji_text = emoji_to_text(char) 581 | table.insert(result, emoji_text) 582 | changed = true 583 | else 584 | -- Keep all other characters as-is (including Chinese characters and HTML tags) 585 | table.insert(result, char) 586 | end 587 | 588 | i = char_end + 1 589 | else 590 | break 591 | end 592 | end 593 | 594 | if changed then 595 | return pandoc.CodeBlock(table.concat(result), elem.attr) 596 | end 597 | 598 | return elem 599 | end 600 | --------------------------------------------------------------------------------