├── go.mod ├── .gitignore ├── go.sum ├── Makefile ├── CLAUDE.md ├── README.md └── main.go /go.mod: -------------------------------------------------------------------------------- 1 | module mastodon-hugo 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/mattn/go-mastodon v0.0.8 7 | gopkg.in/yaml.v3 v3.0.1 8 | ) 9 | 10 | require ( 11 | github.com/gorilla/websocket v1.5.1 // indirect 12 | github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect 13 | golang.org/x/net v0.25.0 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | mastodon-hugo 8 | 9 | # Test binary, built with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | 15 | # Dependency directories (remove the comment below to include it) 16 | # vendor/ 17 | 18 | # Go workspace file 19 | go.work 20 | 21 | # IDE files 22 | .vscode/ 23 | .idea/ 24 | *.swp 25 | *.swo 26 | *~ 27 | .claude 28 | 29 | # OS generated files 30 | .DS_Store 31 | .DS_Store? 32 | ._* 33 | .Spotlight-V100 34 | .Trashes 35 | ehthumbs.db 36 | Thumbs.db 37 | 38 | # Output directories (when testing locally) 39 | content/ 40 | attachments/ -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= 2 | github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= 3 | github.com/mattn/go-mastodon v0.0.8 h1:UgKs4SmQ5JeawxMIPP7NQ9xncmOXA+5q6jYk4erR7vk= 4 | github.com/mattn/go-mastodon v0.0.8/go.mod h1:8YkqetHoAVEktRkK15qeiv/aaIMfJ/Gc89etisPZtHU= 5 | github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= 6 | github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= 7 | golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= 8 | golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build clean deps test 2 | 3 | # Binary name 4 | BINARY_NAME=mastodon-hugo 5 | 6 | # Build the binary 7 | build: deps 8 | go build -o $(BINARY_NAME) . 9 | 10 | # Download dependencies 11 | deps: 12 | go mod tidy 13 | go mod download 14 | 15 | # Clean build artifacts 16 | clean: 17 | go clean 18 | rm -f $(BINARY_NAME) 19 | rm -rf content/ 20 | 21 | # Test the application 22 | test: 23 | go test -v ./... 24 | 25 | # Install the binary to $GOPATH/bin 26 | install: build 27 | go install . 28 | 29 | # Run with default parameters (requires MASTODON_TOKEN env var) 30 | run: build 31 | ./$(BINARY_NAME) 32 | 33 | # Show help 34 | help: 35 | @echo "Available targets:" 36 | @echo " build - Build the binary" 37 | @echo " deps - Download dependencies" 38 | @echo " clean - Clean build artifacts" 39 | @echo " test - Run tests" 40 | @echo " install - Install binary to GOPATH/bin" 41 | @echo " run - Build and run with defaults" 42 | @echo " help - Show this help" 43 | @echo "" 44 | @echo "Usage example:" 45 | @echo " export MASTODON_TOKEN=your_token_here" 46 | @echo " ./$(BINARY_NAME) --user your_username" 47 | @echo " # or with all options:" 48 | @echo " ./$(BINARY_NAME) --instance mastodon.social --user your_username --content-dir ./content --ignore-replies --max-status-id 123456789 --skip-existing --trailing-tag '#BLOG'" 49 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Project Overview 6 | 7 | mastodon-hugo is a Go CLI tool that fetches Mastodon posts from a specific user and generates Hugo-compatible Markdown files with downloaded media attachments. It's designed to work with the author's website (ttvl.co) and is not intended as a universal tool. 8 | 9 | ## Key Commands 10 | 11 | ### Building and Development 12 | ```bash 13 | # Build the binary 14 | make build 15 | 16 | # Download dependencies 17 | make deps 18 | 19 | # Clean build artifacts 20 | make clean 21 | 22 | # Run tests 23 | make test 24 | 25 | # Install to $GOPATH/bin 26 | make install 27 | ``` 28 | 29 | ### Running the Application 30 | ```bash 31 | # Required environment variable 32 | export MASTODON_TOKEN="your_access_token_here" 33 | 34 | # Basic usage (user parameter is required) 35 | ./mastodon-hugo --user your_username 36 | 37 | # Common flags 38 | ./mastodon-hugo --user username --ignore-replies --skip-existing --max-status-id 123456789 39 | ``` 40 | 41 | ## Architecture 42 | 43 | ### Core Components 44 | - **main.go**: Single-file application containing all functionality 45 | - **Config struct**: Holds CLI configuration (instance, user, directories, flags) 46 | - **PostMetadata struct**: YAML front matter structure for Hugo compatibility 47 | - **Key functions**: 48 | - `processStatus()`: Processes individual Mastodon posts 49 | - `downloadMedia()`: Downloads and deduplicates media files 50 | - `stripHTML()`: Cleans HTML content while preserving formatting 51 | - `createMarkdownFile()`: Generates Hugo-compatible markdown with YAML front matter 52 | 53 | ### Data Flow 54 | 1. Authenticate with Mastodon API using access token 55 | 2. Search for target user account 56 | 3. Fetch public posts with pagination 57 | 4. Apply filters (replies, status ID limits, existing files) 58 | 5. For each post: download media, create markdown file with YAML front matter 59 | 6. Output files to configurable directories 60 | 61 | ### Output Structure 62 | - Content directory: `./content/` (configurable) 63 | - Media directory: `./content/attachments/` (configurable) 64 | - Filename format: `YYYY-MM-DD-{status-id}.md` 65 | - YAML front matter includes: date, original_url, media array 66 | 67 | ### Dependencies 68 | - `github.com/mattn/go-mastodon`: Mastodon API client 69 | - `gopkg.in/yaml.v3`: YAML processing for front matter 70 | - Go 1.21+ required -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mastodon Hugo 2 | 3 | A Go CLI tool that fetches Mastodon posts from a specific user and generates Hugo-compatible Markdown files with downloaded media attachments. 4 | 5 | > [!IMPORTANT] 6 | > This tool is primarily designed to work on my website, [ttvl.co](https://ttvl.co), and is not widely supported as a generic tool. Pull requests are welcome, but there is no plan to make it a universal, almighty tool that can do everything. 7 | 8 | ## Features 9 | 10 | - Fetches public posts from any Mastodon instance 11 | - Downloads all attached images and videos 12 | - Generates Hugo-compatible Markdown files with YAML front matter 13 | - Configurable output directories 14 | - Fetch all posts up to a specific status ID (useful for incremental updates) 15 | - Option to ignore replies to other people 16 | - Skip existing posts for fast incremental updates 17 | - Filter posts by trailing tags (case insensitive) 18 | - Simple CLI interface 19 | 20 | ## Setup 21 | 22 | ### 1. Get a Mastodon Access Token 23 | 24 | 1. Go to your Mastodon instance (e.g., https://mastodon.social) 25 | 2. Navigate to **Preferences** → **Development** 26 | 3. Click **New Application** 27 | 4. Fill in the form: 28 | - **Application name**: `mastodon-hugo` (or any name you prefer) 29 | - **Application website**: Leave blank or add your website 30 | - **Scopes**: Make sure `read` is checked (you can uncheck `write` and `follow`) 31 | 5. Click **Submit** 32 | 6. Click on your newly created application 33 | 7. Copy the **Access token** (not the client key/secret) 34 | 35 | ### 2. Set Environment Variable 36 | 37 | ```bash 38 | export MASTODON_TOKEN="your_access_token_here" 39 | ``` 40 | 41 | You can add this to your shell profile (`.bashrc`, `.zshrc`, etc.) to make it permanent. 42 | 43 | ## Building 44 | 45 | ```bash 46 | # Build the binary 47 | make build 48 | 49 | # Or install to $GOPATH/bin 50 | make install 51 | ``` 52 | 53 | ## Usage 54 | 55 | ### Basic Usage 56 | 57 | ```bash 58 | # Specify required user parameter 59 | ./mastodon-hugo --user your_username 60 | ``` 61 | 62 | ### Fetch All Posts 63 | 64 | ```bash 65 | # Fetch all public posts from a user (ignoring replies to others, but keeping your threads) 66 | ./mastodon-hugo --user your_username --ignore-replies 67 | ``` 68 | 69 | ### Fetch Posts Up to a Specific Status 70 | 71 | ```bash 72 | # Fetch all posts up to a specific status ID (useful for incremental updates) 73 | ./mastodon-hugo --user your_username --max-status-id 123456789 74 | ``` 75 | 76 | ### Incremental Updates (Skip Existing Posts) 77 | 78 | ```bash 79 | # Only process new posts, skip ones that already exist as markdown files 80 | ./mastodon-hugo --user your_username --skip-existing 81 | ``` 82 | 83 | ### Filter by Trailing Tags 84 | 85 | ```bash 86 | # Only process posts that end with a specific tag (case insensitive) 87 | ./mastodon-hugo --user your_username --trailing-tag "#BLOG" 88 | 89 | # Or use caret notation 90 | ./mastodon-hugo --user your_username --trailing-tag "^PUBLISH" 91 | 92 | # Tags are stripped from the final markdown output 93 | ``` 94 | 95 | ### Custom Parameters 96 | 97 | ```bash 98 | ./mastodon-hugo \ 99 | --instance mastodon.social \ 100 | --user your_username \ 101 | --content-dir ./content \ 102 | --media-dir ./content/attachments \ 103 | --max-status-id 123456789 \ 104 | --ignore-replies \ 105 | --skip-existing \ 106 | --trailing-tag "#BLOG" 107 | ``` 108 | 109 | ### CLI Options 110 | 111 | - `--instance` - Mastodon instance (default: `mastodon.social`) 112 | - `--user` - Username to fetch posts from (required) 113 | - `--content-dir` - Directory for markdown files (default: `./content`) 114 | - `--media-dir` - Directory for media files (default: `./content/attachments`) 115 | - `--max-status-id` - Fetch all posts up to this status ID (optional) 116 | - `--ignore-replies` - Skip replies to other people, but keep your own threads (optional) 117 | - `--skip-existing` - Skip posts that already exist as markdown files (optional) 118 | - `--trailing-tag` - Only process posts ending with this specific tag, case insensitive (optional) 119 | 120 | ## Output Format 121 | 122 | ### File Structure 123 | 124 | ``` 125 | content/ 126 | ├── 2024-01-15-123456789.md 127 | ├── 2024-01-14-123456788.md 128 | └── attachments/ 129 | ├── image1.jpg 130 | ├── video1.mp4 131 | └── image2.png 132 | ``` 133 | 134 | ### Markdown File Format 135 | 136 | Each post becomes a markdown file with YAML front matter: 137 | 138 | ```yaml 139 | --- 140 | date: 2024-01-15T10:30:00Z 141 | original_url: https://mastodon.social/@username/123456789 142 | media: 143 | - image1.jpg 144 | - video1.mp4 145 | --- 146 | 147 | This is the post content with HTML tags stripped 148 | and line breaks preserved from the original post. 149 | ``` 150 | 151 | ## Integration with Hugo 152 | 153 | ### In Your Hugo Makefile 154 | 155 | ```makefile 156 | # Add this to your Hugo project's Makefile 157 | fetch-mastodon: 158 | cd path/to/mastodon-hugo && ./mastodon-hugo --content-dir ../hugo-site/content/posts 159 | 160 | build: fetch-mastodon 161 | hugo 162 | ``` 163 | 164 | ### Example Hugo Template 165 | 166 | You can access the media files in your Hugo templates: 167 | 168 | ```html 169 | {{ if .Params.media }} 170 |
View original post on Mastodon
184 | {{ end }} 185 | ``` 186 | 187 | ## Development 188 | 189 | ```bash 190 | # Download dependencies 191 | make deps 192 | 193 | # Build 194 | make build 195 | 196 | # Clean 197 | make clean 198 | 199 | # Help 200 | make help 201 | ``` 202 | 203 | ## Requirements 204 | 205 | - Go 1.21 or later 206 | - Internet connection for fetching posts and media 207 | - Valid Mastodon access token 208 | 209 | ## Notes 210 | 211 | - Only fetches **public** posts 212 | - Skips reblogs/boosts (fetches original content only) 213 | - Media files are downloaded as-is (no processing/resizing) 214 | - Existing files are not re-downloaded (basic deduplication) 215 | - HTML tags are stripped from post content while preserving line breaks 216 | - Post content is stored in markdown body, not in YAML front matter 217 | - When using `--max-status-id`, the tool fetches ALL posts chronologically up to that status ID (useful for incremental syncing) 218 | - The `--ignore-replies` flag skips posts that are replies to other users' posts, but keeps your own threads (replies to your own posts) 219 | - The `--skip-existing` flag checks for existing markdown files and skips processing them, making subsequent runs much faster 220 | - The `--trailing-tag` flag filters posts by their ending tag (case insensitive) and removes the tag from the final markdown output -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "html" 8 | "io" 9 | "log" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | "path" 14 | "path/filepath" 15 | "regexp" 16 | "strings" 17 | "time" 18 | 19 | "github.com/mattn/go-mastodon" 20 | "gopkg.in/yaml.v3" 21 | ) 22 | 23 | type Config struct { 24 | Instance string 25 | User string 26 | Token string 27 | ContentDir string 28 | MediaDir string 29 | MaxStatusID string 30 | IgnoreReplies bool 31 | SkipExisting bool 32 | TrailingTag string 33 | } 34 | 35 | type PostMetadata struct { 36 | Date time.Time `yaml:"date"` 37 | OriginalURL string `yaml:"original_url"` 38 | Media []string `yaml:"media,omitempty"` 39 | } 40 | 41 | func main() { 42 | var config Config 43 | 44 | // CLI flags 45 | flag.StringVar(&config.Instance, "instance", "mastodon.social", "Mastodon instance URL") 46 | flag.StringVar(&config.User, "user", "", "Mastodon username") 47 | flag.StringVar(&config.ContentDir, "content-dir", "./content", "Directory for markdown files") 48 | flag.StringVar(&config.MediaDir, "media-dir", "./content/attachments", "Directory for media files") 49 | flag.StringVar(&config.MaxStatusID, "max-status-id", "", "Maximum status ID to fetch") 50 | flag.BoolVar(&config.IgnoreReplies, "ignore-replies", false, "Ignore replies to other people") 51 | flag.BoolVar(&config.SkipExisting, "skip-existing", false, "Skip posts that already exist as markdown files") 52 | flag.StringVar(&config.TrailingTag, "trailing-tag", "", "Specific trailing tag to filter for (e.g., '#BLOG' or '^PUBLISH')") 53 | flag.Parse() 54 | 55 | // Get token from environment 56 | config.Token = os.Getenv("MASTODON_TOKEN") 57 | if config.Token == "" { 58 | log.Fatal("MASTODON_TOKEN environment variable is required") 59 | } 60 | 61 | // Validate required parameters 62 | if config.User == "" { 63 | log.Fatal("--user parameter is required") 64 | } 65 | 66 | // Ensure directories exist 67 | if err := os.MkdirAll(config.ContentDir, 0755); err != nil { 68 | log.Fatalf("Failed to create content directory: %v", err) 69 | } 70 | if err := os.MkdirAll(config.MediaDir, 0755); err != nil { 71 | log.Fatalf("Failed to create media directory: %v", err) 72 | } 73 | 74 | // Create Mastodon client 75 | client := mastodon.NewClient(&mastodon.Config{ 76 | Server: "https://" + config.Instance, 77 | AccessToken: config.Token, 78 | }) 79 | 80 | // Verify credentials and get account info 81 | account, err := client.GetAccountCurrentUser(context.Background()) 82 | if err != nil { 83 | log.Fatalf("Failed to verify credentials: %v", err) 84 | } 85 | 86 | fmt.Printf("Authenticated as: @%s\n", account.Username) 87 | 88 | // Find the target user account 89 | accounts, err := client.AccountsSearch(context.Background(), config.User, 1) 90 | if err != nil { 91 | log.Fatalf("Failed to search for user: %v", err) 92 | } 93 | if len(accounts) == 0 { 94 | log.Fatalf("User @%s not found", config.User) 95 | } 96 | 97 | targetAccount := accounts[0] 98 | fmt.Printf("Found user: @%s (%s)\n", targetAccount.Username, targetAccount.DisplayName) 99 | 100 | // Fetch user's statuses (public only) 101 | var pg mastodon.Pagination 102 | allStatuses := []*mastodon.Status{} 103 | reachedMaxStatusID := false 104 | 105 | fmt.Printf("Fetching statuses") 106 | if config.MaxStatusID != "" { 107 | fmt.Printf(" up to status ID: %s", config.MaxStatusID) 108 | } 109 | if config.IgnoreReplies { 110 | fmt.Printf(" (ignoring replies)") 111 | } 112 | fmt.Println() 113 | 114 | for { 115 | statuses, err := client.GetAccountStatuses(context.Background(), targetAccount.ID, &pg) 116 | if err != nil { 117 | log.Fatalf("Failed to fetch statuses: %v", err) 118 | } 119 | if len(statuses) == 0 { 120 | break 121 | } 122 | 123 | // Process statuses and apply filters 124 | for _, status := range statuses { 125 | // Check if we've reached the maximum status ID 126 | if config.MaxStatusID != "" && string(status.ID) == config.MaxStatusID { 127 | fmt.Printf("Reached maximum status ID: %s\n", config.MaxStatusID) 128 | reachedMaxStatusID = true 129 | break 130 | } 131 | 132 | // Filter for public posts only 133 | if status.Visibility != "public" { 134 | continue 135 | } 136 | 137 | // Skip replies to other people if ignore-replies flag is set 138 | // Keep replies to self (threads) but ignore replies to others 139 | if config.IgnoreReplies && status.InReplyToID != nil && status.InReplyToAccountID != nil { 140 | // Convert InReplyToAccountID to string for comparison 141 | replyToAccountID := "" 142 | if accountID, ok := status.InReplyToAccountID.(string); ok { 143 | replyToAccountID = accountID 144 | } else if accountID, ok := status.InReplyToAccountID.(mastodon.ID); ok { 145 | replyToAccountID = string(accountID) 146 | } 147 | 148 | // Skip if replying to someone else (not self) 149 | if replyToAccountID != string(targetAccount.ID) { 150 | continue 151 | } 152 | } 153 | 154 | // Skip posts without specified trailing tag if trailing-tag is set 155 | if config.TrailingTag != "" && !hasTrailingTag(status.Content, config.TrailingTag) { 156 | continue 157 | } 158 | 159 | allStatuses = append(allStatuses, status) 160 | } 161 | 162 | // Stop if we reached the max status ID 163 | if reachedMaxStatusID { 164 | break 165 | } 166 | 167 | // Check if there are more pages 168 | if pg.MaxID == "" { 169 | break 170 | } 171 | } 172 | 173 | fmt.Printf("Found %d posts to process", len(allStatuses)) 174 | if config.IgnoreReplies { 175 | fmt.Printf(" (replies excluded)") 176 | } 177 | if config.SkipExisting { 178 | fmt.Printf(" (skipping existing)") 179 | } 180 | if config.TrailingTag != "" { 181 | fmt.Printf(" (filtering by: %s)", config.TrailingTag) 182 | } 183 | fmt.Println() 184 | 185 | // Process each status 186 | processedCount := 0 187 | skippedCount := 0 188 | 189 | for i, status := range allStatuses { 190 | fmt.Printf("Processing post %d/%d (ID: %s)...", i+1, len(allStatuses), status.ID) 191 | 192 | // Check if file already exists when skip-existing flag is set 193 | if config.SkipExisting { 194 | // Parse the status creation time to generate filename 195 | createdAt, err := time.Parse(time.RFC3339, status.CreatedAt.Format(time.RFC3339)) 196 | if err != nil { 197 | log.Printf("Failed to parse created time for status %s: %v", status.ID, err) 198 | continue 199 | } 200 | 201 | // Generate the same filename that would be created 202 | filename := fmt.Sprintf("%s-%s.md", createdAt.Format("2006-01-02"), status.ID) 203 | filePath := filepath.Join(config.ContentDir, filename) 204 | 205 | // Check if file already exists 206 | if _, err := os.Stat(filePath); err == nil { 207 | fmt.Printf(" already exists, skipping\n") 208 | skippedCount++ 209 | continue 210 | } 211 | } 212 | 213 | if err := processStatus(status, config); err != nil { 214 | log.Printf("Failed to process status %s: %v", status.ID, err) 215 | continue 216 | } 217 | 218 | fmt.Printf(" done\n") 219 | processedCount++ 220 | } 221 | 222 | fmt.Printf("Completed! Processed: %d, Skipped: %d\n", processedCount, skippedCount) 223 | 224 | fmt.Println("Done!") 225 | } 226 | 227 | func processStatus(status *mastodon.Status, config Config) error { 228 | // Skip reblogs/boosts - we want original content only 229 | if status.Reblog != nil { 230 | return nil 231 | } 232 | 233 | // Parse the status creation time 234 | createdAt, err := time.Parse(time.RFC3339, status.CreatedAt.Format(time.RFC3339)) 235 | if err != nil { 236 | return fmt.Errorf("failed to parse created time: %v", err) 237 | } 238 | 239 | // Download media attachments 240 | var mediaFiles []string 241 | for _, attachment := range status.MediaAttachments { 242 | if attachment.URL == "" { 243 | continue 244 | } 245 | 246 | filename, err := downloadMedia(attachment.URL, config.MediaDir) 247 | if err != nil { 248 | log.Printf("Failed to download media %s: %v", attachment.URL, err) 249 | continue 250 | } 251 | mediaFiles = append(mediaFiles, filename) 252 | } 253 | 254 | // Create post metadata 255 | metadata := PostMetadata{ 256 | Date: createdAt, 257 | OriginalURL: status.URL, 258 | Media: mediaFiles, 259 | } 260 | 261 | // Generate filename: YYYY-MM-DD-post-id.md 262 | filename := fmt.Sprintf("%s-%s.md", 263 | createdAt.Format("2006-01-02"), 264 | status.ID) 265 | 266 | filepath := filepath.Join(config.ContentDir, filename) 267 | 268 | // Create markdown file 269 | content := stripHTML(status.Content) 270 | 271 | // Remove trailing tag from content if it exists 272 | if config.TrailingTag != "" { 273 | content = removeTrailingTag(content, config.TrailingTag) 274 | } 275 | 276 | return createMarkdownFile(filepath, metadata, content) 277 | } 278 | 279 | func downloadMedia(mediaURL, mediaDir string) (string, error) { 280 | // Parse URL to get filename 281 | parsedURL, err := url.Parse(mediaURL) 282 | if err != nil { 283 | return "", err 284 | } 285 | 286 | // Extract filename from URL path 287 | filename := path.Base(parsedURL.Path) 288 | if filename == "." || filename == "/" { 289 | // Generate filename if not available 290 | filename = fmt.Sprintf("media-%d", time.Now().UnixNano()) 291 | } 292 | 293 | // Create full file path 294 | filePath := filepath.Join(mediaDir, filename) 295 | 296 | // Check if file already exists 297 | if _, err := os.Stat(filePath); err == nil { 298 | return filename, nil // File already exists 299 | } 300 | 301 | // Download the file 302 | resp, err := http.Get(mediaURL) 303 | if err != nil { 304 | return "", err 305 | } 306 | defer resp.Body.Close() 307 | 308 | if resp.StatusCode != http.StatusOK { 309 | return "", fmt.Errorf("failed to download media: HTTP %d", resp.StatusCode) 310 | } 311 | 312 | // Create the file 313 | file, err := os.Create(filePath) 314 | if err != nil { 315 | return "", err 316 | } 317 | defer file.Close() 318 | 319 | // Copy data 320 | _, err = io.Copy(file, resp.Body) 321 | if err != nil { 322 | return "", err 323 | } 324 | 325 | return filename, nil 326 | } 327 | 328 | func createMarkdownFile(filepath string, metadata PostMetadata, content string) error { 329 | file, err := os.Create(filepath) 330 | if err != nil { 331 | return err 332 | } 333 | defer file.Close() 334 | 335 | // Write YAML front matter 336 | _, err = file.WriteString("---\n") 337 | if err != nil { 338 | return err 339 | } 340 | 341 | encoder := yaml.NewEncoder(file) 342 | encoder.SetIndent(2) 343 | if err := encoder.Encode(metadata); err != nil { 344 | return err 345 | } 346 | encoder.Close() 347 | 348 | _, err = file.WriteString("---\n\n") 349 | if err != nil { 350 | return err 351 | } 352 | 353 | // Write content as markdown 354 | _, err = file.WriteString(content) 355 | return err 356 | } 357 | 358 | // stripHTML removes HTML tags from content while preserving line breaks and decoding HTML entities 359 | func stripHTML(content string) string { 360 | // Step 1: Convert HTML line breaks to newlines 361 | content = regexp.MustCompile(`]*>`).ReplaceAllString(content, "") 366 | 367 | // Step 3: Handle list items - convert lines starting with "- " after newlines to proper list format 368 | // This preserves the bullet point formatting 369 | 370 | // Step 4: Remove all other HTML tags 371 | re := regexp.MustCompile(`<[^>]*>`) 372 | stripped := re.ReplaceAllString(content, "") 373 | 374 | // Step 5: Decode HTML entities 375 | stripped = html.UnescapeString(stripped) 376 | 377 | // Step 6: Clean up whitespace but preserve meaningful line breaks 378 | stripped = strings.TrimSpace(stripped) 379 | 380 | // Replace multiple spaces/tabs with single space, but keep newlines 381 | stripped = regexp.MustCompile(`[ \t]+`).ReplaceAllString(stripped, " ") 382 | 383 | // Clean up multiple consecutive newlines (more than 3) to max 2 384 | stripped = regexp.MustCompile(`\n{3,}`).ReplaceAllString(stripped, "\n\n") 385 | 386 | // Clean up any trailing spaces at end of lines 387 | stripped = regexp.MustCompile(` +\n`).ReplaceAllString(stripped, "\n") 388 | 389 | return stripped 390 | } 391 | 392 | // hasTrailingTag checks if content ends with the specified custom tag (case insensitive) 393 | func hasTrailingTag(content string, customTag string) bool { 394 | stripped := stripHTML(content) 395 | stripped = strings.TrimSpace(stripped) 396 | 397 | if customTag == "" { 398 | return false 399 | } 400 | 401 | // Escape special regex characters and create case-insensitive pattern 402 | escapedTag := regexp.QuoteMeta(strings.TrimSpace(customTag)) 403 | pattern := regexp.MustCompile(`(?i)` + escapedTag + `\s*$`) 404 | return pattern.MatchString(stripped) 405 | } 406 | 407 | // removeTrailingTag strips the specified trailing tag from content (case insensitive) 408 | func removeTrailingTag(content string, customTag string) string { 409 | stripped := strings.TrimSpace(content) 410 | 411 | if customTag == "" { 412 | return stripped 413 | } 414 | 415 | // Escape special regex characters and create case-insensitive pattern to remove the tag 416 | escapedTag := regexp.QuoteMeta(strings.TrimSpace(customTag)) 417 | pattern := regexp.MustCompile(`(?i)\s*` + escapedTag + `\s*$`) 418 | stripped = pattern.ReplaceAllString(stripped, "") 419 | 420 | return strings.TrimSpace(stripped) 421 | } 422 | --------------------------------------------------------------------------------