├── .github └── workflows │ └── release.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── find_og_tag.go ├── go.mod ├── go.sum ├── prepare_release.sh ├── progress_writer.go ├── rrip.go ├── sanitize_filenames.go ├── template_util.go ├── types.go └── util.go /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | fetch-depth: 0 15 | fetch-tags: true 16 | 17 | - name: Set up Go 18 | uses: actions/setup-go@v4 19 | with: 20 | go-version: '1.24' 21 | 22 | - name: Build 23 | run: bash ./prepare_release.sh 24 | 25 | - name: Create Release 26 | uses: softprops/action-gh-release@v1 27 | with: 28 | name: ${{ github.ref_name }} 29 | tag_name: ${{ github.ref_name }} 30 | draft: false 31 | prerelease: false 32 | files: | 33 | rrip_linux_amd64.tar.xz 34 | rrip_windows64.zip 35 | rrip_macos_intel.tar.xz 36 | rrip_macos_arm64.tar.xz 37 | env: 38 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | rrip 3 | *.jpeg 4 | *.png 5 | *.jpg 6 | *.gif 7 | *.mp4 8 | .vscode/launch.json 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_VERSION=latest 2 | FROM golang:1.20 as build 3 | 4 | WORKDIR /go/src/app 5 | COPY go.mod go.sum ./ 6 | RUN go mod download 7 | COPY . ./ 8 | RUN CGO_ENABLED=0 go build -o /go/bin/rrip 9 | 10 | FROM gcr.io/distroless/static-debian11:${BASE_VERSION} 11 | COPY --from=build /go/bin/rrip / 12 | WORKDIR /app/ 13 | ENTRYPOINT ["/rrip"] 14 | 15 | ## Run example 16 | ## docker run -v $PWD:/app -u $(id -u):$(id -g) rrip --max-size 1000 r/LogicGateMemes 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mahesh Hegde 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rrip - Bulk-download images from subreddits 2 | 3 | Program to bulk-download image from reddit subreddits. 4 | 5 | ## Features 6 | 7 | * Set max size of file, max total size, minimum score etc.. 8 | 9 | * Download images from Reddit preview links instead of source, saving some space. 10 | 11 | * If the image / GIF is already downloaded in same folder, skip it. 12 | 13 | * Log final download URLs to a file using a custom format string. 14 | 15 | * Filter by post title or link using regular expression. 16 | 17 | * Use Go template syntax to do custom filtering over post properties, or change file name format. 18 | 19 | * Single static binary written in Golang 20 | 21 | (Note: I have not tested all combinations of features, you might encounter some bugs!) 22 | 23 | ## Install 24 | ### Using `go` command 25 | Assuming you have Go toolchain installed 26 | 27 | ``` 28 | go install github.com/mahesh-hegde/rrip@latest 29 | ``` 30 | 31 | ### From Release section 32 | Download from Release section and unpack the binary executable somewhere in your `PATH`. 33 | 34 | ### Using Docker / Podman 35 | `rrip` is also available as a lightweight (distroless) [Docker image](https://ghcr.io/mahesh-hegde/rrip) from ghcr.io. 36 | 37 | Running from docker image obviously requires a bind mount and setting the user. So you can use an alias like this. 38 | 39 | ```bash 40 | alias rrip='docker run --rm -v $PWD:/app/ -u $(id -u):$(id -g) ghcr.io/mahesh-hegde/rrip:latest' 41 | ``` 42 | 43 | Then invoke it as `rrip`. 44 | 45 | In case of rootless `podman`, specifying user & group is not required due to uid remapping. So alias will be: 46 | 47 | ```bash 48 | alias rrip='podman run --rm -v $PWD:/app/ ghcr.io/mahesh-hegde/rrip:v0.5' 49 | ``` 50 | 51 | ## Options 52 | Invoke `rrip` without arguments for up-to-date usage output. 53 | 54 | ## TL;DR 55 | 56 | ```sh 57 | ## Download only <200KB files from r/Wallpaper 58 | rrip --max-size=200 r/Wallpaper 59 | 60 | ## Download all time top from r/WildLifePhotography, without exceeding 20MB storage or 50 files 61 | rrip --max-storage=20 --max-files=50 --sort=top-all r/WildlifePhotography 62 | 63 | ## Search "Neon" on r/AMOLEDBackgrounds and download top 20, sorted by top voted in past one year 64 | rrip --search="Neon" --max-files=20 --sort=top-year r/AMOLEDBackgrounds 65 | 66 | ## Download memes from r/LogicGateMemes, download reddit previews (640p) 67 | ## instead of original image, for space savings. 68 | ## Also log all image links to file called meme.txt along with title 69 | 70 | ## Note that -preview-res cannot be arbitrary 71 | ## Ones that generally work are 1080, 960, 640, 360, 216, 108 72 | ## If no suitable preview is found, image won't be downloaded 73 | 74 | ## use -prefer-preview instead of -download-preview 75 | ## to download original URL if no preview could be found 76 | 77 | rrip --download-preview --preview-res=640 --data-output-file=meme.txt --data-output-format="{{.final_url}} {{.title}}" r/LogicGateMemes 78 | 79 | ## Log all image links from r/ImaginaryLandscape 80 | ## without downloading files, using -d (dry run) option. 81 | ## (Reddit shows last 600 or so.., not really "all") 82 | rrip -d --data-output-file=imaginary_landscapes.txt --data-output-format="{{.score}} {{.final_url}} {{.quoted_title}} {{.author}}" r/ImaginaryLandscapes 83 | ``` 84 | 85 | ### Using template options 86 | Go `text/template` syntax can be used to do versatile filtering. It can also be used to do formatting of logged links. 87 | 88 | ```sh 89 | ## Inspect the JSON of post using --print-post-data 90 | rrip --print-post-data --max-files=1 r/AMOLEDBackgrounds 91 | 92 | ## After inspecting the JSON, you can use the field values in `-template-filter` to filter based on any attribute. 93 | ## If the template evaluates to "false", "", or "0", the post will be skipped by rrip 94 | 95 | ## Example: only download gilded posts 96 | rrip --template-filter='{{gt .gilded 0.0}}' --max-files=20 --sort=top-year r/AMOLEDBackgrounds 97 | 98 | ## Example: only download posts by a given author, say u/temporary_08 99 | rrip --template-filter='{{eq .author "temporary_08"}}' --max-files=20 r/AMOLEDBackgrounds 100 | 101 | ## Example: skip potentially unsafe content 102 | rrip --template-filter='{{not .over_18}}' --max-files=20 r/AMOLEDBackgrounds 103 | 104 | ## Example: Log links to a file with author, upvote ratio, and quoted title. 105 | ## Use dry run (-d) to skip download 106 | rrip -d --data-output-file=amoled.txt --data-output-format='{{.upvote_ratio}} {{.author}} {{.quoted_title}}' r/AMOLEDBackgrounds 107 | 108 | ## Example: Change file name format using Go templates. 109 | rrip --filename-format='{{.author}} {{.title}} {{.score}}' r/AMOLEDBackgrounds 110 | ``` 111 | 112 | ## Caveats 113 | * Can't handle crossposts when downloading preview image. 114 | * No support for downloading albums. 115 | * Some options don't work together 116 | * Many other caveats I don't remember. 117 | -------------------------------------------------------------------------------- /find_og_tag.go: -------------------------------------------------------------------------------- 1 | // functions to parse html webpage and get a url 2 | 3 | package main 4 | 5 | import ( 6 | "errors" 7 | "io" 8 | 9 | "golang.org/x/net/html" 10 | ) 11 | 12 | func NextMetaTag(tok *html.Tokenizer) (html.Token, error) { 13 | for { 14 | tt := tok.Next() 15 | switch tt { 16 | case html.ErrorToken: 17 | return html.Token{}, tok.Err() 18 | case html.SelfClosingTagToken, html.StartTagToken: 19 | token := tok.Token() 20 | if token.Data == "meta" { 21 | return token, nil 22 | } 23 | if token.Data == "body" { 24 | return token, io.EOF 25 | } 26 | default: 27 | continue 28 | } 29 | } 30 | } 31 | 32 | func AttrValue(token html.Token, ns, key string) string { 33 | for _, attr := range token.Attr { 34 | if attr.Namespace == ns && attr.Key == key { 35 | return attr.Val 36 | } 37 | } 38 | return "" 39 | } 40 | 41 | // ogType can be "video", "image", or "any" 42 | 43 | func GetOgUrl(source io.Reader) (string, error) { 44 | tokenizer := html.NewTokenizer(source) 45 | reqProp := "og:" + options.OgType 46 | if options.OgType == "any" { 47 | reqProp = "og:video" 48 | } 49 | for { 50 | metaTag, err := NextMetaTag(tokenizer) 51 | if err == io.EOF { 52 | return "", nil 53 | } 54 | // unknown error 55 | if err != nil { 56 | return "", errors.New("Error Parsing HTML" + err.Error()) 57 | } 58 | prop := AttrValue(metaTag, "", "property") 59 | if prop == reqProp || 60 | prop == "og:image" && options.OgType == "any" { 61 | link := AttrValue(metaTag, "", "content") 62 | return link, nil 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mahesh-hegde/rrip 2 | 3 | go 1.18 4 | 5 | require golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 6 | 7 | require github.com/spf13/pflag v1.0.5 8 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 2 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 3 | golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 h1:HVyaeDAYux4pnY+D/SiwmLOR36ewZ4iGQIIrtnuCjFA= 4 | golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 5 | -------------------------------------------------------------------------------- /prepare_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | GOOS=linux GOARCH=amd64 go build 4 | tar -cJf rrip_linux_amd64.tar.xz rrip 5 | rm rrip 6 | 7 | GOOS=windows GOARCH=amd64 go build 8 | zip -r rrip_windows64.zip rrip.exe 9 | rm rrip.exe 10 | 11 | GOOS=darwin GOARCH=amd64 go build 12 | tar -cJf rrip_macos_intel.tar.xz rrip 13 | rm rrip 14 | 15 | GOOS=darwin GOARCH=arm64 go build 16 | tar -cJf rrip_macos_arm64.tar.xz rrip 17 | rm rrip 18 | 19 | -------------------------------------------------------------------------------- /progress_writer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "time" 6 | ) 7 | 8 | // ProgressWriter wraps a io.Writer and an updater callback 9 | // It calls updater callback only if there are more than 1024 bytes written 10 | // And more than 500ms has elapsed since last call to callback 11 | 12 | const significantWrite = 1024 * 10 13 | const significantTime = 500 14 | 15 | type ProgressWriter struct { 16 | Callback func(int64) 17 | Writer io.Writer 18 | total int64 19 | lastCall int64 20 | lastTotal int64 21 | } 22 | 23 | func (pw *ProgressWriter) Write(p []byte) (n int, err error) { 24 | n, err = pw.Writer.Write(p) 25 | pw.total += int64(n) 26 | var writeCondition, timeCondition bool 27 | var now time.Time 28 | writeCondition = (pw.total - pw.lastTotal) > significantWrite 29 | if writeCondition { 30 | now = time.Now() 31 | timeCondition = pw.lastCall == 0 || 32 | (now.UnixMilli()-pw.lastCall) > significantTime 33 | } 34 | if writeCondition && timeCondition { 35 | pw.lastCall = now.UnixMilli() 36 | pw.lastTotal = pw.total 37 | pw.Callback(pw.total) 38 | } 39 | return n, err 40 | } 41 | -------------------------------------------------------------------------------- /rrip.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "encoding/json" 6 | "fmt" 7 | "html" 8 | "io" 9 | "net/http" 10 | "net/url" 11 | "os" 12 | "os/signal" 13 | "regexp" 14 | "strings" 15 | "text/template" 16 | 17 | flag "github.com/spf13/pflag" 18 | ) 19 | 20 | // For disabling http/2! 21 | 22 | const ( 23 | UserAgent = "rrip / Go CLI Tool" 24 | DefaultLimit = 100 25 | defaultDataOutputFormat = "{{.final_url}}" 26 | defaultFileNameFormat = "{{.title}}" 27 | ) 28 | 29 | var terminalColumns = getTerminalSize() 30 | 31 | var horizontalDashedLine = strings.Repeat("-", terminalColumns) 32 | 33 | var ( 34 | stats Stats 35 | options Options 36 | ) 37 | 38 | var ( 39 | interrupt chan os.Signal 40 | completion = make(chan bool) 41 | ) 42 | 43 | var downloadingFilename string 44 | 45 | // On windows, os.Remove() fails unless we close the open file 46 | // For that, we need to keep a reference for signal handler 47 | var outputFile *os.File 48 | 49 | // BugFix: with transparent HTTP/2, sometimes reddit servers send HTML instead of JSON 50 | // So create a custom client 51 | var client http.Client 52 | 53 | var falseValues = map[string]bool{"": true, "nil": true, "false": true, "0": true} 54 | 55 | func pickPreview(choices ImagePreview, width int) *ImagePreviewEntry { 56 | if width == -1 { 57 | return &choices.Source 58 | } 59 | for _, preview := range choices.Resolutions { 60 | if preview.Width == width { 61 | result := preview 62 | return &result 63 | } 64 | } 65 | return nil 66 | } 67 | 68 | func PrintStat() { 69 | eprintln(horizontalDashedLine) 70 | eprintln("Processed Posts: ", stats.Processed) 71 | eprintln("Already Downloaded: ", stats.Repeated) 72 | eprintln("Failed: ", stats.Failed) 73 | eprintln("Saved: ", stats.Saved) 74 | eprintln("Other: ", 75 | stats.Processed-stats.Failed-stats.Repeated-stats.Saved) 76 | eprintln(horizontalDashedLine) 77 | eprintln("Approx. Storage Used:", size(stats.CopiedBytes)) 78 | eprintln(horizontalDashedLine) 79 | } 80 | 81 | func Finish() { 82 | PrintStat() 83 | // This seems to fix partial printing with -print-post-data 84 | os.Stderr.Close() 85 | completion <- true 86 | } 87 | 88 | // body is the response body which contains json 89 | // handler is run for every post entry unless handler exits early 90 | // returns last posts's id ('name' attribute in json) 91 | // which is useful to fetch next page 92 | func HandlePosts(body io.ReadCloser, handler PostHandler) (last string) { 93 | b, err := io.ReadAll(body) 94 | check(err) 95 | 96 | apiResponseMap := map[string]any{} 97 | err = json.Unmarshal(b, &apiResponseMap) 98 | check(err) 99 | 100 | apiResponse := ApiResponse{} 101 | err = json.Unmarshal(b, &apiResponse) 102 | check(err) 103 | 104 | children := apiResponse.Data.Children 105 | dataMap := apiResponseMap["data"].(map[string]any) 106 | childrenArray := dataMap["children"].([]any) 107 | 108 | for i, post := range children { 109 | stats.Processed += 1 110 | childMap := childrenArray[i].(map[string]any) 111 | handler(post.Data, childMap["data"].(map[string](any))) 112 | log(horizontalDashedLine) 113 | last = post.Data.Name 114 | } 115 | log(horizontalDashedLine) 116 | return last 117 | } 118 | 119 | // Returns whether the image link can be downloaded 120 | // if downloadable, return final URL, else return empty string 121 | // also the extension string that matched 122 | func CheckAndResolveImage(linkString string) (finalLink string, extension string) { 123 | exts := []string{".jpeg", ".gif", ".mp4", ".jpg", ".png"} 124 | link, err := url.Parse(linkString) 125 | check(err) 126 | path := link.Path 127 | 128 | // imgur gifv links are generally MP4 129 | if (link.Host == "i.imgur.com" || link.Host == "imgur.com") && 130 | strings.HasSuffix(path, ".gifv") { 131 | trimmed := strings.TrimSuffix(path, ".gifv") 132 | link.Path = trimmed + ".mp4" 133 | link.Host = "i.imgur.com" 134 | return link.String(), ".mp4" 135 | } 136 | 137 | for _, ext := range exts { 138 | if strings.HasSuffix(path, ext) { 139 | return linkString, ext 140 | } 141 | } 142 | 143 | // if ogType is given, read the link and get it's og:video or og:image 144 | if options.OgType != "" { 145 | log("REQUEST PAGE: " + linkString) 146 | response, err := GetUrl(linkString) 147 | if err != nil { 148 | log(err.Error()) 149 | return "", "" 150 | } 151 | defer response.Body.Close() 152 | contentType := response.Header.Get("Content-Type") 153 | if strings.ToLower(contentType) != "text/html; charset=utf-8" { 154 | log("Unsupported ContentType when looking for og: url") 155 | return "", "" 156 | } 157 | ogUrl, _ := GetOgUrl(response.Body) 158 | if ogUrl != "" { 159 | return CheckAndResolveImage(ogUrl) 160 | } 161 | } 162 | return "", "" 163 | } 164 | 165 | // pass acceptMimeType = "" if no restriction 166 | func FetchUrlWithMethod(url, method string, acceptMimeType string) (*http.Response, error) { 167 | req, err := http.NewRequest(method, url, nil) 168 | check(err) 169 | req.Header.Add("User-Agent", options.UserAgent) 170 | 171 | if acceptMimeType != "" { 172 | req.Header.Add("Accept", acceptMimeType) 173 | } 174 | 175 | response, err := client.Do(req) 176 | if err != nil { 177 | return nil, err 178 | } 179 | return response, err 180 | } 181 | 182 | func GetUrl(url string) (*http.Response, error) { 183 | return FetchUrlWithMethod(url, "GET", "") 184 | } 185 | 186 | func Traverse(path string, handler PostHandler) { 187 | query := url.Values{} 188 | 189 | unsuffixedPath := strings.TrimSuffix(path, "/") 190 | 191 | if options.Search == "" && unsuffixedPath == "" { 192 | fatal("Please provide a search string or subreddit") 193 | } 194 | 195 | target := "https://www.reddit.com/" + unsuffixedPath 196 | 197 | after := options.After 198 | 199 | // Handle sort options 200 | var sortString, timePeriod string 201 | switch options.Sort { 202 | case "hot", "new", "rising": 203 | sortString = options.Sort 204 | case "top-hour", "top-day", "top-month", "top-year", "top-all": 205 | sortString = "top" 206 | timePeriod = strings.TrimPrefix(options.Sort, "top-") 207 | case "": 208 | _ = "best" // do nothing 209 | default: 210 | fatal("Invalid option passed to sort") 211 | } 212 | 213 | if options.Search == "" { 214 | if sortString != "" { 215 | target += "/" + sortString 216 | } 217 | } else { 218 | target += "/search" 219 | query.Set("sort", sortString) 220 | } 221 | 222 | query.Set("limit", fmt.Sprint(options.EntriesLimit)) 223 | 224 | if timePeriod != "" { 225 | query.Set("t", timePeriod) 226 | } 227 | 228 | if options.Search != "" { 229 | query.Set("q", options.Search) 230 | query.Set("restrict_sr", "true") 231 | } 232 | 233 | target += ".json?" + query.Encode() 234 | 235 | for { 236 | link := target // final link 237 | if after != "" { 238 | link += "&after=" + after 239 | } 240 | log("Request: ", link) 241 | response, err := FetchUrlWithMethod(link, "GET", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") 242 | check(err, "Cannot get JSON response") 243 | 244 | processed := stats.Processed 245 | after = HandlePosts(response.Body, handler) 246 | if stats.Processed == processed { 247 | Finish() 248 | } 249 | response.Body.Close() 250 | } 251 | } 252 | 253 | func skipByRegexMatch(re *regexp.Regexp, s string) bool { 254 | if re != nil { 255 | return re.MatchString(s) 256 | } 257 | // if re = nil, don't skip anything 258 | return false 259 | } 260 | 261 | func chooseByRegexMatch(re *regexp.Regexp, s string) bool { 262 | if re != nil { 263 | return re.MatchString(s) 264 | } 265 | // if re = nil, choose everything 266 | return true 267 | } 268 | 269 | func DownloadPost(post PostData, postDataMap map[string]any) { 270 | title := strings.TrimSpace(strings.ReplaceAll(post.Title, "/", "|")) 271 | title = html.UnescapeString(title) // & etc.. are escaped in json 272 | if len(title) > 194 { 273 | title = title[:192] + ".." 274 | } 275 | 276 | if !chooseByRegexMatch(options.TitleContains, post.Title) { 277 | log("Title not match regex:", quote(post.Title)) 278 | return 279 | } 280 | 281 | if !chooseByRegexMatch(options.FlairContains, post.LinkFlairText) { 282 | log("Flair not match regex:", quote(post.Title), quote(post.LinkFlairText)) 283 | return 284 | } 285 | 286 | if !chooseByRegexMatch(options.LinkContains, post.Url) { 287 | log("Link not match regex:", quote(post.Title), post.Url) 288 | return 289 | } 290 | 291 | if skipByRegexMatch(options.TitleNotContains, post.Title) { 292 | log("Title skipped by regex: ", quote(post.Title)) 293 | return 294 | } 295 | 296 | if skipByRegexMatch(options.FlairNotContains, post.LinkFlairText) { 297 | log("Flair skipped by regex: ", quote(post.Title), quote(post.LinkFlairText)) 298 | return 299 | } 300 | 301 | if skipByRegexMatch(options.LinkNotContains, post.Url) { 302 | log("Posted link skipped by regex: ", quote(post.Title), post.Url) 303 | return 304 | } 305 | 306 | if post.Score < options.MinScore { 307 | log("Skipped due to less score:", title, 308 | "| Score:", post.Score, "|", post.Url, "\n") 309 | if strings.HasPrefix(options.Sort, "top-") { 310 | eprintln("Skipping posts with less points, since sort=" + options.Sort) 311 | Finish() 312 | } 313 | return 314 | } 315 | 316 | postDataMap["quoted_title"] = quote(post.Title) 317 | postDataMap["final_url"] = "![will be set after processing]" 318 | postDataMap["rrip_filename"] = "![will be set after processing]" 319 | 320 | if options.TemplateFilter != nil { 321 | templated := formatTemplate(options.TemplateFilter, postDataMap) 322 | if falseValues[templated] { 323 | log("template filter evaluated to:", quote(templated)) 324 | return 325 | } 326 | } 327 | 328 | // Print post data only if its not already excluded by a template / regex 329 | // filter. 330 | 331 | if options.PrintPostData { 332 | fmt.Fprintln(os.Stderr, marshallIndent(postDataMap)) 333 | } 334 | 335 | url := post.Url 336 | 337 | usePreview := func() bool { 338 | log("Original URL: ", post.Url) 339 | log("Choosing preview URL") 340 | if len(post.Preview.Images) == 0 { 341 | log("No preview found: ", quote(post.Title)) 342 | return false 343 | } 344 | preview := pickPreview(post.Preview.Images[0], options.PreviewRes) 345 | if preview == nil { 346 | log("No preview found: ", quote(post.Title)) 347 | return false 348 | } 349 | url = html.UnescapeString(preview.Url) 350 | return true 351 | } 352 | 353 | if options.DownloadPreview { 354 | if !usePreview() { 355 | return 356 | } 357 | } else if options.PreferPreview { 358 | usePreview() 359 | } // else proceed with post.data.url 360 | 361 | imageUrl, extension := CheckAndResolveImage(url) 362 | if imageUrl == "" { 363 | log("Skip non-imagelike entry: ", title, " | ", url) 364 | return 365 | } 366 | 367 | filenameRaw := formatTemplate(options.FilenameFormat, postDataMap) 368 | filename := fmt.Sprintf("%s [%s]%s", filenameRaw, post.Id, extension) 369 | filename = sanitizeFileName(filename, options.AllowSpecialChars) 370 | log("URL: ", url, " | Score:", post.Score) 371 | if imageUrl != url { 372 | log("->", imageUrl) 373 | } 374 | 375 | postDataMap["rrip_filename"] = filename 376 | postDataMap["final_url"] = imageUrl 377 | 378 | if options.DataOutputFile != nil && options.DataOutputFormat != nil { 379 | fmt.Fprintln(options.DataOutputFile, 380 | formatTemplate(options.DataOutputFormat, postDataMap)) 381 | } 382 | 383 | printName := func() { 384 | eprintf("\r%-*.*s", terminalColumns-24, terminalColumns-24, 385 | filename) 386 | } 387 | 388 | printName() 389 | 390 | // check if already downloaded file 391 | _, err := os.Stat(filename) 392 | if err == nil { 393 | eprint(" [Already Saved]\n") 394 | stats.Repeated += 1 395 | return 396 | } 397 | 398 | // If dry run, don't fetch media, or create a file 399 | // but you still have to increase number of files for config.MaxFiles to work 400 | if options.DryRun { 401 | eprint(" [Dry Run]\n") 402 | stats.Saved += 1 403 | if stats.Saved == options.MaxFiles { 404 | Finish() 405 | } 406 | return 407 | } 408 | 409 | // CHECK: any edge case? 410 | var output *os.File = nil // don't create until needed 411 | 412 | // Common error handling code 413 | netError := func(what string) { 414 | stats.Failed += 1 415 | eprintf(" [" + what + " Error: " + err.Error() + "]\n") 416 | if output != nil { 417 | // transfer errors when file was already created 418 | log("Try remove file: ", filename) 419 | rmErr := os.Remove(filename) 420 | if rmErr != nil { 421 | log("Error removing file") 422 | } 423 | } 424 | } 425 | // Fetch 426 | response, err := FetchUrlWithMethod(imageUrl, "HEAD", "") 427 | if err != nil { 428 | netError("Request ") 429 | return 430 | } 431 | defer response.Body.Close() 432 | 433 | // check content-type 434 | // It's generally rare, but few sites send html from urls that end with gif etc.. 435 | contentType := response.Header.Get("Content-Type") 436 | if !strings.HasPrefix(contentType, "image/") && 437 | !strings.HasPrefix(contentType, "video/") { 438 | eprintln(" [Unexpected Content-Type: " + contentType + "]") 439 | return 440 | } 441 | 442 | length := response.ContentLength 443 | // If larger or unknown length, skip 444 | skipDueToSize := (options.MaxSize != -1) && 445 | (options.MaxSize < length || length == -1) 446 | // if file length unknown and there is storage limit, skip 447 | skipDueToSize = skipDueToSize || 448 | (options.MaxStorage != -1 && length == -1) 449 | if skipDueToSize { 450 | eprintf(" [Too Large: %s]\n", size(length)) 451 | return 452 | } 453 | // if file length will go past the storage limit, finish 454 | if options.MaxStorage != -1 && options.MaxStorage < length+stats.CopiedBytes { 455 | eprintf(" [%s | Crosses storage limit]\n\n", size(length)) 456 | Finish() 457 | } 458 | 459 | // Create file 460 | downloadingFilename = filename 461 | defer func() { 462 | downloadingFilename = "" 463 | }() 464 | output, err = os.Create(filename) 465 | if err != nil { 466 | eprintf(" [Can't create file]\n") 467 | stats.Failed += 1 468 | return 469 | } 470 | outputFile = output 471 | defer func() { 472 | outputFile = nil 473 | output.Close() 474 | }() 475 | 476 | maxCharsOnRight := 0 477 | 478 | out := ProgressWriter{Writer: output, Callback: func(i int64) { 479 | printName() 480 | progress := fmt.Sprintf(" [%s/%s]", size(i), size(length)) 481 | _n, _ := eprintf("%-*s", maxCharsOnRight, progress) 482 | maxCharsOnRight = max(_n, maxCharsOnRight) 483 | }} 484 | 485 | // do a GET request 486 | fullResponse, err := GetUrl(imageUrl) 487 | if err != nil { 488 | netError("Request ") 489 | return 490 | } 491 | defer fullResponse.Body.Close() 492 | 493 | n, err := io.Copy(&out, fullResponse.Body) 494 | printName() 495 | // add n to how much diskspace is consumed even if there's an error 496 | // because it would give a more appropriate approximation of bandwidth consumption 497 | // But if you're using that option to limit data usage, give 80% of airtime you can use 498 | stats.CopiedBytes += n 499 | 500 | if err != nil { 501 | netError("Transfer ") 502 | return 503 | } 504 | 505 | // Transfer success I hope 506 | // write stats 507 | done := fmt.Sprintf(" [Complete: %s]\n", size(n)) 508 | eprintf("%-*s", maxCharsOnRight, done) 509 | stats.Saved += 1 510 | if stats.Saved == options.MaxFiles { 511 | Finish() 512 | } 513 | } 514 | 515 | func createLinksFile(filename string) io.WriteCloser { 516 | if filename == "" { 517 | return nil 518 | } 519 | if filename == "-" || filename == "stdout" { 520 | return os.Stdout 521 | } 522 | output, err := os.Create(filename) 523 | check(err) 524 | return output 525 | } 526 | 527 | func main() { 528 | help := false 529 | // whether help option is provided 530 | flag.BoolVar(&help, "help", false, "Show this help message") 531 | var dataOutputFileName string 532 | var err error 533 | var titleContains, titleNotContains string 534 | var flairContains, flairNotContains string 535 | var linkContains, linkNotContains string 536 | var dataOutputFormat, templateFilter, filenameFormat string 537 | 538 | // option parsing 539 | flag.BoolVarP(&options.Debug, "verbose", "v", false, "Enable verbose output (devel)") 540 | flag.BoolVarP(&options.DryRun, "dry-run", "d", false, "DryRun i.e just print urls and names (devel)") 541 | flag.BoolVar(&options.UseHTTP1, "http1", false, "Use HTTP/1.1 to make calls to Reddit API") 542 | flag.BoolVar(&options.AllowSpecialChars, "allow-special-chars", false, 543 | "Allow all characters in filenames except / and \\, "+ 544 | "And windows-special filenames like NUL") 545 | flag.BoolVarP(&options.PrintPostData, "print-post-data", "P", false, "Print posts data as JSON. Implies dry run") 546 | flag.StringVar(&options.After, "after", "", "Get posts after the given ID") 547 | flag.StringVarP(&options.UserAgent, "useragent", "U", UserAgent, "UserAgent string") 548 | flag.Int64Var(&options.MaxStorage, "max-storage", -1, "Data usage limit in MB, -1 for no limit") 549 | flag.Int64VarP(&options.MaxSize, "max-size", "z", -1, "Max size of media file in KB, -1 for no limit") 550 | flag.StringVar(&options.Folder, "folder", "", "Target folder name") 551 | 552 | flag.StringVarP(&dataOutputFileName, "data-output-file", "O", "", "Log media links to given file") 553 | flag.StringVarP(&dataOutputFormat, "data-output-format", "f", defaultDataOutputFormat, "Template for saving post data") 554 | flag.StringVar(&templateFilter, "template-filter", "", "Posts will be ignored if this template evaluates to \"false\", \"0\" or empty string") 555 | flag.StringVarP(&filenameFormat, "filename-format", "t", defaultFileNameFormat, "Template for naming files. (Post ID is always appended)") 556 | 557 | flag.StringVar(&options.OgType, "og-type", "", "Look Up for a media link in page's og:property"+ 558 | " if link itself is not image/video (experimental). supported values: video, image, any") 559 | flag.StringVar(&options.Sort, "sort", "", "Sort: best|hot|new|rising|top-") 560 | flag.IntVar(&options.MaxFiles, "max-files", -1, "Max number of files to download (+ve), -1 for no limit") 561 | flag.IntVar(&options.MinScore, "min-score", 0, "Minimum score of the post to download") 562 | flag.IntVar(&options.EntriesLimit, "entries-limit", 100, "Number of entries to fetch in one API request (devel)") 563 | 564 | flag.StringVar(&titleContains, "title-contains", "", "Download if "+ 565 | "title contains substring matching given regex") 566 | flag.StringVar(&flairContains, "flair-contains", "", "Download if "+ 567 | "flair contains substring matching given regex (works only if flair is plaintext)") 568 | flag.StringVar(&linkContains, "link-contains", "", "Download if "+ 569 | "posted link contains substring matching given regex") 570 | 571 | flag.StringVar(&titleNotContains, "title-not-contains", "", "Download if "+ 572 | "title does not contain substring matching given regex") 573 | flag.StringVar(&flairNotContains, "flair-not-contains", "", "Download if "+ 574 | "flair does not contain substring matching given regex") 575 | flag.StringVar(&linkNotContains, "link-not-contains", "", "Download if "+ 576 | "posted link does not contain substring matching given regex") 577 | 578 | flag.StringVar(&options.Search, "search", "", "Search for given term") 579 | flag.BoolVar(&options.PreferPreview, "prefer-preview", false, 580 | "Prefer reddit preview image when possible") 581 | flag.BoolVar(&options.DownloadPreview, "download-preview", false, 582 | "download reddit preview image instead of posted URL") 583 | flag.IntVar(&options.PreviewRes, "preview-res", -1, 584 | "Width of preview to download, eg: 640, 960, 1080") 585 | 586 | flag.Parse() 587 | args := flag.Args() 588 | if (len(args) != 1 && options.Search == "") || help { 589 | eprintf("Usage: %s \n", os.Args[0]) 590 | flag.PrintDefaults() 591 | os.Exit(1) 592 | } 593 | 594 | if options.UseHTTP1 { 595 | client = http.Client{ 596 | Transport: &http.Transport{ 597 | TLSNextProto: map[string]func(authority string, c *tls.Conn) http.RoundTripper{}, 598 | }, 599 | } 600 | } else { 601 | client = http.Client{} 602 | } 603 | 604 | if dataOutputFileName != "" && dataOutputFormat == "" { 605 | fmt.Fprintln(os.Stderr, "Data output format not provided. "+ 606 | "It must be a valid go template.") 607 | os.Exit(1) 608 | } 609 | 610 | options.DataOutputFile = createLinksFile(dataOutputFileName) 611 | if options.DataOutputFile != nil { 612 | defer options.DataOutputFile.Close() 613 | } 614 | 615 | var path string = "" 616 | 617 | if len(args) > 0 { 618 | // TODO: Join multireddits 619 | path = strings.TrimSuffix(args[0], "/") 620 | } 621 | 622 | // validate some arguments 623 | toCheck := map[string]int64{ 624 | "--max": int64(options.MaxFiles), 625 | "--max-storage": options.MaxStorage, 626 | "--max-size": options.MaxSize, 627 | } 628 | for option, value := range toCheck { 629 | if value < 1 && value != -1 { 630 | fatal("Invalid value for option " + option) 631 | } 632 | } 633 | 634 | if options.DryRun { 635 | if options.MaxSize != -1 || options.MaxStorage != -1 { 636 | fatal("Can't combine image-size based options with dry run") 637 | } 638 | } 639 | 640 | if options.PreviewRes > 0 && !options.DownloadPreview && 641 | !options.PreferPreview { 642 | fatal("--download-preview or --prefer-preview should be used with " + 643 | "--preview-res") 644 | } 645 | 646 | if options.PreferPreview && options.DownloadPreview { 647 | fatal("Use only one of --prefer-preview and --download-preview") 648 | } 649 | 650 | og := options.OgType 651 | if og != "" && og != "video" && og != "image" && og != "any" { 652 | fatal("Only supported values for --og-type are image, video and any") 653 | } 654 | 655 | // if PrintPostData is enabled, enable dry run 656 | options.DryRun = options.DryRun || options.PrintPostData 657 | 658 | // enable debug output in case of dry run w/o print post data 659 | options.Debug = options.Debug || (options.DryRun && !options.PrintPostData) 660 | 661 | if options.After != "" && !strings.HasPrefix(options.After, "t3_") { 662 | options.After = "t3_" + options.After 663 | } 664 | 665 | // compute actual MaxStorage in bytes 666 | if options.MaxStorage != -1 { 667 | options.MaxStorage *= 1000 * 1000 // MB 668 | } 669 | 670 | if options.MaxSize != -1 { 671 | options.MaxSize *= 1000 // KB 672 | } 673 | 674 | regexVals := []struct { 675 | re **regexp.Regexp 676 | opt string 677 | }{ 678 | {&options.TitleContains, titleContains}, 679 | {&options.TitleNotContains, titleNotContains}, 680 | {&options.FlairContains, flairContains}, 681 | {&options.FlairNotContains, flairNotContains}, 682 | {&options.LinkContains, linkContains}, 683 | {&options.LinkNotContains, linkNotContains}, 684 | } 685 | 686 | for _, rv := range regexVals { 687 | if rv.opt != "" { 688 | *(rv.re) = regexp.MustCompile(rv.opt) 689 | } 690 | } 691 | 692 | templateVals := []struct { 693 | name string 694 | tm **template.Template 695 | opt string 696 | }{ 697 | {"data-output-format", &options.DataOutputFormat, dataOutputFormat}, 698 | {"template-filter", &options.TemplateFilter, templateFilter}, 699 | {"filename-format", &options.FilenameFormat, filenameFormat}, 700 | } 701 | 702 | for _, tv := range templateVals { 703 | if tv.opt != "" { 704 | *(tv.tm) = createTemplate(tv.name, tv.opt) 705 | } 706 | } 707 | 708 | // Create folder 709 | folderPath := "rrip-downloads" 710 | if path != "" { 711 | folderPath = path 712 | } 713 | options.Folder = coalesce(options.Folder, 714 | strings.TrimPrefix(strings.ReplaceAll(folderPath, "/", "."), "r.")) 715 | _, err = os.Stat(options.Folder) 716 | 717 | // Note: not creating folder anew if dry run 718 | if os.IsNotExist(err) && !options.DryRun { 719 | check(os.MkdirAll(options.Folder, 0o755)) 720 | } 721 | 722 | // if dry run, change to folder only if folder already existed 723 | if err == nil || !options.DryRun { 724 | check(os.Chdir(options.Folder)) 725 | } 726 | 727 | // to properly handle Ctrl+C, notify os.Interrupt 728 | interrupt = make(chan os.Signal, 1) 729 | signal.Notify(interrupt, os.Interrupt) 730 | 731 | go Traverse(path, func(post PostData, postMap map[string]any) { 732 | DownloadPost(post, postMap) 733 | }) 734 | 735 | select { 736 | case <-interrupt: 737 | eprintln("Interrupt received, Exiting...") 738 | if outputFile != nil { 739 | outputFile.Close() 740 | } 741 | if downloadingFilename != "" { 742 | eprintf("Removing possibly incomplete file: '%s'\n", downloadingFilename) 743 | os.Remove(downloadingFilename) 744 | } 745 | PrintStat() 746 | case <-completion: 747 | os.Exit(0) 748 | } 749 | } 750 | -------------------------------------------------------------------------------- /sanitize_filenames.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | ) 7 | 8 | var windowsSubst = map[rune]string{ 9 | '<': "<", 10 | '>': ">", 11 | ':': "-", 12 | '"': """, 13 | '/': "", 14 | '\\': "", 15 | '|': "", 16 | '?': "", 17 | '*': "", 18 | } 19 | 20 | var minimalSubst = map[rune]string{ 21 | '/': "", 22 | '\\': "", 23 | } 24 | 25 | var winBan = map[string]bool{ 26 | "CON": true, "PRN": true, "AUX": true, "NUL": true, 27 | "COM1": true, "COM2": true, "COM3": true, "COM4": true, 28 | "COM5": true, "COM6": true, "COM7": true, "COM8": true, "COM9": true, 29 | "LPT1": true, "LPT2": true, "LPT3": true, "LPT4": true, 30 | "LPT5": true, "LPT6": true, "LPT7": true, "LPT8": true, "LPT9": true, 31 | } 32 | 33 | func sanitizeWindowsFilename(name string) string { 34 | name = strings.Trim(name, " .") 35 | sansExt := strings.SplitN(name, ".", 2)[0] 36 | if winBan[sansExt] { 37 | return "__" + name 38 | } 39 | if name == "" { 40 | return "__Blank__" 41 | } 42 | return name 43 | } 44 | 45 | func sanitizeFileName(filename string, allowSpecialChars bool) string { 46 | var b strings.Builder 47 | var banned map[rune]string 48 | if allowSpecialChars { 49 | banned = minimalSubst 50 | } else { 51 | banned = windowsSubst 52 | } 53 | for _, r := range filename { 54 | repl, spec := banned[r] 55 | if spec { 56 | b.WriteString(repl) 57 | } else if !strconv.IsPrint(r) { 58 | b.WriteRune('-') 59 | } else { 60 | b.WriteRune(r) 61 | } 62 | } 63 | if allowSpecialChars { 64 | return b.String() 65 | } 66 | return sanitizeWindowsFilename(b.String()) 67 | } 68 | -------------------------------------------------------------------------------- /template_util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "strings" 6 | "text/template" 7 | ) 8 | 9 | func createTemplate(name string, tm string) *template.Template { 10 | tmpl, err := template.New(name).Parse(tm) 11 | check(err, "cannot parse template:", tm) 12 | return tmpl 13 | } 14 | 15 | func formatTemplate(tm *template.Template, value any) string { 16 | var sb strings.Builder 17 | err := tm.Execute(&sb, value) 18 | check(err, "Cannot apply template to value:", value) 19 | return sb.String() 20 | } 21 | 22 | // Returns value as json with indentation 23 | func marshallIndent(value any) string { 24 | b, err := json.MarshalIndent(value, "", " ") 25 | check(err) 26 | return string(b) 27 | } 28 | -------------------------------------------------------------------------------- /types.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "regexp" 6 | "text/template" 7 | ) 8 | 9 | type Stats struct { 10 | Processed, Saved, Failed, Repeated int 11 | CopiedBytes int64 12 | } 13 | 14 | type Options struct { 15 | After, Sort, UserAgent, Folder string 16 | EntriesLimit, MaxFiles, MinScore int 17 | Debug, DryRun, AllowSpecialChars bool 18 | MaxStorage, MaxSize int64 19 | OgType string 20 | DataOutputFile io.WriteCloser 21 | DataOutputFormat *template.Template 22 | TemplateFilter *template.Template 23 | FilenameFormat *template.Template 24 | PrintPostData bool 25 | TitleContains, TitleNotContains *regexp.Regexp 26 | FlairContains, FlairNotContains *regexp.Regexp 27 | LinkContains, LinkNotContains *regexp.Regexp 28 | Search string 29 | DownloadPreview bool 30 | PreferPreview bool 31 | PreviewRes int 32 | UseHTTP1 bool 33 | } 34 | 35 | type ImagePreviewEntry struct { 36 | Url string 37 | Width int 38 | Height int 39 | } 40 | 41 | type ImagePreview struct { 42 | Source ImagePreviewEntry 43 | Resolutions []ImagePreviewEntry 44 | } 45 | 46 | type PostData struct { 47 | Url, Name, Title, Id string 48 | Score int 49 | Subreddit, Author string 50 | LinkFlairText string 51 | CreatedUtc int64 52 | Preview struct { 53 | Images []ImagePreview 54 | } 55 | } 56 | 57 | type Post struct { 58 | Data PostData 59 | } 60 | 61 | type ApiData struct { 62 | After string 63 | Children []Post 64 | } 65 | 66 | type ApiResponse struct { 67 | Data ApiData 68 | } 69 | 70 | type PostHandler func(post PostData, postMap map[string]any) 71 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "strconv" 8 | ) 9 | 10 | func getTerminalSize() int { 11 | cmd := exec.Command("stty", "size") 12 | cmd.Stdin = os.Stdin 13 | out, err1 := cmd.Output() 14 | res := string(out) 15 | var rows, cols int 16 | _, err2 := fmt.Sscanf(res, "%d %d", &rows, &cols) 17 | if err1 != nil || err2 != nil || cols == 0 { 18 | return 80 19 | } 20 | return cols 21 | } 22 | 23 | func coalesce(a, b string) string { 24 | if a == "" { 25 | return b 26 | } 27 | return a 28 | } 29 | 30 | func quote(s string) string { 31 | return strconv.Quote(s) 32 | } 33 | 34 | func fatal(val ...interface{}) { 35 | fmt.Fprintln(os.Stderr, val...) 36 | os.Exit(1) 37 | } 38 | 39 | func eprintln(vals ...interface{}) (int, error) { 40 | return fmt.Fprintln(os.Stderr, vals...) 41 | } 42 | 43 | func eprintf(format string, vals ...interface{}) (int, error) { 44 | return fmt.Fprintf(os.Stderr, format, vals...) 45 | } 46 | 47 | func eprint(vals ...interface{}) (int, error) { 48 | return fmt.Fprint(os.Stderr, vals...) 49 | } 50 | 51 | func max(a, b int) int { 52 | if a > b { 53 | return a 54 | } 55 | return b 56 | } 57 | 58 | func check(e error, extra ...interface{}) { 59 | if e != nil { 60 | fmt.Fprintln(os.Stderr, extra...) 61 | fatal(e.Error()) 62 | } 63 | } 64 | 65 | func log(vals ...interface{}) { 66 | if options.Debug { 67 | fmt.Fprintln(os.Stderr, vals...) 68 | } 69 | } 70 | 71 | func size(bytes int64) string { 72 | sizes := []int64{1000 * 1000 * 1000, 1000 * 1000, 1000} 73 | names := []string{"GB", "MB", "KB"} 74 | 75 | if bytes == -1 { 76 | return "Unknown length" 77 | } 78 | 79 | for i, sz := range sizes { 80 | if bytes > sz { 81 | units := float64(bytes) / float64(sz) 82 | return strconv.FormatFloat(units, 'f', 1, 64) + names[i] 83 | } 84 | } 85 | return strconv.FormatInt(bytes, 10) + "B" 86 | } 87 | --------------------------------------------------------------------------------