├── requirements.txt
├── .github
└── FUNDING.yml
├── logindata.py.example
├── .gitignore
├── docker-compose.yml
├── html
├── comment-div.html
├── post.html
├── post-div.html
├── upvoted.html
├── saved.html
├── username.html
├── main.js
└── style.css
├── Dockerfile
├── README.md
├── save.py
└── utilities.py
/requirements.txt:
--------------------------------------------------------------------------------
1 | praw
2 | requests
3 | youtube_dl
4 | yt-dlp
5 | tqdm
6 | redvid
7 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [samirelanduk]
4 |
--------------------------------------------------------------------------------
/logindata.py.example:
--------------------------------------------------------------------------------
1 | REDDIT_USERNAME = "username"
2 | REDDIT_PASSWORD = "password"
3 | REDDIT_CLIENT_ID = "id"
4 | REDDIT_SECRET = "secret"
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | .DS_Store
4 | samirelanduk
5 | secrets.py
6 | secrets1.py
7 | secrets2.py
8 | /logindata.py
9 | logindata.py
10 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.2"
2 | services:
3 | reddit-save:
4 | build: .
5 | image: reddit-save:latest
6 | environment:
7 | REDDIT_USERNAME:
8 | REDDIT_PASSWORD:
9 | REDDIT_CLIENT_ID:
10 | REDDIT_SECRET:
11 | volumes:
12 | - "./archive:/opt/app/archive"
13 |
--------------------------------------------------------------------------------
/html/comment-div.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/html/post.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
--------------------------------------------------------------------------------
/html/post-div.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/html/upvoted.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Upvoted Posts
4 |
5 |
6 |
7 |
8 |
9 |
13 | Upvoted Posts
14 |
15 |
19 |
20 |
--------------------------------------------------------------------------------
/html/saved.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Saved Posts
4 |
5 |
6 |
7 |
8 |
9 |
13 |
14 | Saved Posts
15 |
16 |
17 |
21 |
25 |
26 |
--------------------------------------------------------------------------------
/html/username.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Saved Posts
4 |
5 |
6 |
7 |
8 |
9 |
13 |
14 | u/[username]'s' Posts
15 |
16 |
17 |
21 |
25 |
26 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jrottenberg/ffmpeg:4.0-alpine
2 |
3 | ENV PYTHONUNBUFFERED=1
4 | ENV DOCKER=1
5 |
6 | RUN apk add build-base && apk add python3-dev
7 |
8 | RUN echo "**** install Python ****" && \
9 | apk add --no-cache python3 && \
10 | if [ ! -e /usr/bin/python ]; then ln -sf python3 /usr/bin/python ; fi && \
11 | \
12 | echo "**** install pip ****" && \
13 | python3 -m ensurepip && \
14 | rm -r /usr/lib/python*/ensurepip && \
15 | pip3 install --no-cache --upgrade pip setuptools wheel && \
16 | if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi
17 |
18 |
19 | COPY requirements.txt /opt/app/requirements.txt
20 | WORKDIR /opt/app
21 | RUN pip install -r requirements.txt
22 | COPY . .
23 |
24 | ENTRYPOINT ["python", "save.py"]
25 | CMD []
26 |
--------------------------------------------------------------------------------
/html/main.js:
--------------------------------------------------------------------------------
1 | const toggleView = () => {
2 | const postsSection = document.querySelector(".posts-section");
3 | const commentsSection = document.querySelector(".comments-section");
4 | if (commentsSection.style.display === "none") {
5 | commentsSection.style.display = "block";
6 | postsSection.style.display = "none";
7 | } else {
8 | postsSection.style.display = "block";
9 | commentsSection.style.display = "none";
10 | }
11 | }
12 |
13 | const toggleMedia = e => {
14 | const img = e.target;
15 | const preview = img.parentNode;
16 | preview.classList.toggle("full");
17 | }
18 |
19 | window.addEventListener("load", function() {
20 | const postsSection = document.querySelector(".posts-section");
21 | const commentsSection = document.querySelector(".comments-section");
22 | if (commentsSection) {
23 | commentsSection.style.display = "none";
24 | const toggleButton = document.createElement("button");
25 | toggleButton.innerText = "toggle";
26 | toggleButton.addEventListener("click", toggleView);
27 | document.body.insertBefore(toggleButton, postsSection);
28 | }
29 |
30 | for (let preview of document.querySelectorAll(".preview")) {
31 | const media = preview.querySelector("img") || preview.querySelector("video");
32 | if (media) {
33 | media.addEventListener("click", toggleMedia);
34 | }
35 | }
36 | })
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # reddit-save
2 |
3 | A Python utility for backing up your reddit upvoted/saved stuff.
4 |
5 | Browsing through the stuff you've liked or saved on reddit is really enjoyable and, depending on the reason you saved something, can be a great way to recap stuff you once thought important. It is a personalised feed of posts and comments by the one person guaranteed to know what you like - past you.
6 |
7 | However over time more and more of the older posts will be deleted or missing, and the historical record atrophies. Use this tool to back up those posts and comments to your computer where you can browse them offline, and where they are safe forever.
8 |
9 | reddit-save will back up saved posts, saved comments, and upvoted posts. It can't do upvoted comments because the reddit API doesn't expose them. Crucially, when it is run again on the same location it will ignore any posts/comments previously archived - once something is saved, it's saved permanently.
10 |
11 | ## Installation
12 |
13 | ```bash
14 | $ git clone https://github.com/samirelanduk/reddit-save .
15 | $ cd reddit-save
16 | $ pip install -r requirements.txt
17 | ```
18 |
19 | If you get permission errors, try using `sudo` or using a virtual environment.
20 |
21 | You will need [ffmpeg](https://ffmpeg.org/) installed somewhere too.
22 |
23 | Rename the file `logindata.py.example` to `logindata.py`. You will need to add four things to this file: your reddit username and password, and a reddit client ID and secret. The latter two are obtained using [the instructions here](https://github.com/reddit-archive/reddit/wiki/OAuth2-Quick-Start-Example#first-steps). The file should look something like this:
24 |
25 | ```python
26 | REDDIT_USERNAME = "spez"
27 | REDDIT_PASSWORD = "myredditpassword123"
28 | REDDIT_CLIENT_ID = "sadsU7-zfX"
29 | REDDIT_SECRET = "687DDJSS&999d-hdkjK8h"
30 | ```
31 |
32 | (If you have 2FA enabled, you will need to append that to the password, separated by a colon.)
33 |
34 | ## Use
35 |
36 | Create a folder that will contain your archive. Then run:
37 |
38 | ```bash
39 | $ ./save.py saved folder_name
40 | $ ./save.py upvoted folder_name
41 | ```
42 |
43 | The first command will back up your saved posts/comments to a file called folder_name/saved.html. The second will back up your upvoted posts to a file called folder_name/upvoted.html.
44 |
45 | Each post will have its top-level comments saved, as well as each of their immediate child comments (but no further).
46 |
47 | Linked media files (images, videos etc.) will be saved locally where possible, though imgur is currently not well supported in all cases.
48 |
49 | If you want to also break the resultant HTML file into multiple files (as browsers struggle to display enormous HTML files) you can add the `--page-size 100` argument (replacing 100 with whatever the posts-per page you want is).
50 |
51 | ## Use with Docker
52 |
53 | Rather than installing dependencies locally, you can use Docker to create a local image and use that instead. First build the image:
54 |
55 | ```bash
56 | $ docker build -t redditsave .
57 | ```
58 |
59 | Then run reddit-save within a container created from this image:
60 |
61 | ```bash
62 | $ docker run \
63 | -e REDDIT_USERNAME=spez \
64 | -e REDDIT_PASSWORD="myredditpassword123" \
65 | -e REDDIT_CLIENT_ID="sadsU7-zfX" \
66 | -e REDDIT_SECRET="687DDJSS&999d-hdkjK8h" \
67 | -v /Local/location/to/save/in:/opt/app/archive \
68 | redditsave saved
69 | ```
70 |
71 | ## Backing up a specific username
72 |
73 | Rather than backing up your own saved/upvoted posts and comments, you can back up the submitted posts and comments of another user:
74 |
75 | ```bash
76 | ./save.py user:samirelanduk folder_name
77 | ```
--------------------------------------------------------------------------------
/save.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import os
5 | import re
6 | from tqdm import tqdm
7 | from utilities import *
8 |
9 | # Get arguments
10 | def validate_mode(mode):
11 | if mode not in ["saved", "upvoted"] and not mode.startswith("user:"):
12 | raise argparse.ArgumentTypeError(f"Invalid mode: {mode}")
13 | return mode
14 | parser = argparse.ArgumentParser(description="Save reddit posts to file.")
15 | parser.add_argument("mode", type=validate_mode, nargs=1, help="The file to convert.")
16 | if os.getenv("DOCKER", "0") != "1":
17 | parser.add_argument("location", type=str, nargs=1, help="The path to save to.")
18 | # Optional page size argument
19 | parser.add_argument("--page-size", type=int, nargs=1, default=[0], help="The number of posts to save per page.")
20 | args = parser.parse_args()
21 | mode = args.mode[0]
22 | page_size = args.page_size[0]
23 | location = "./archive/" if os.getenv("DOCKER", "0") == "1" else args.location[0]
24 |
25 | # Is location specified a directory?
26 | if not os.path.isdir(location):
27 | print(location, "is not a directory")
28 |
29 | # Make a client object
30 | client = make_client()
31 |
32 | # Saved posts or upvoted posts?
33 | if mode == "saved":
34 | html_file = "saved.html"
35 | get_posts = get_saved_posts
36 | get_comments = get_saved_comments
37 | elif mode == "upvoted":
38 | html_file = "upvoted.html"
39 | get_posts = get_upvoted_posts
40 | get_comments = lambda client: []
41 | elif mode.startswith("user:"):
42 | username = mode.split(":")[-1]
43 | html_file = f"{username}.html"
44 | get_posts = lambda client: get_user_posts(client, username)
45 | get_comments = lambda client: get_user_comments(client, username)
46 |
47 | # Make directory for media and posts
48 | if not os.path.exists(os.path.join(location, "media")):
49 | os.mkdir(os.path.join(location, "media"))
50 | if not os.path.exists(os.path.join(location, "posts")):
51 | os.mkdir(os.path.join(location, "posts"))
52 |
53 | # Get files to search through
54 | print("Getting previously saved posts and comments...")
55 | existing_ids, existing_posts_html, existing_comments_html = get_previous(location, html_file)
56 | print(len(existing_posts_html), "previous posts.")
57 | print(len(existing_comments_html), "previous comments.")
58 |
59 | # Get posts HTML
60 | posts_html = []
61 | posts = [p for p in get_posts(client) if p.id not in existing_ids]
62 | if not posts:
63 | print("No new posts")
64 | else:
65 | for post in tqdm(posts):
66 | post_html = get_post_html(post)
67 | media = save_media(post, location)
68 | if media:
69 | post_html = add_media_preview_to_html(post_html, media)
70 | posts_html.append(post_html)
71 | page_html = create_post_page_html(post, post_html)
72 | with open(os.path.join(location, "posts", f"{post.id}.html"), "w", encoding="utf-8") as f:
73 | f.write(page_html)
74 | posts_html += existing_posts_html
75 |
76 | # Get comments HTML
77 | comments_html = []
78 | comments = [c for c in get_comments(client) if c.id not in existing_ids]
79 | if not comments:
80 | print("No new comments")
81 | else:
82 | for comment in tqdm(comments):
83 | comment_html = get_comment_html(comment)
84 | media = save_media(post, location)
85 | comments_html.append(comment_html)
86 | comments_html += existing_comments_html
87 |
88 | # Save overall HTML
89 | print("Saving HTML...")
90 | if page_size:
91 | length = max(len(posts_html), len(comments_html))
92 | page_count = (length // page_size) + 1
93 | for i in range(page_count):
94 | posts_on_page = posts_html[i*page_size:(i+1)*page_size]
95 | comments_on_page = comments_html[i*page_size:(i+1)*page_size]
96 | has_next = i < page_count - 1
97 | save_html(posts_on_page, comments_on_page, location, html_file, i, has_next, username=html_file.split(".")[0])
98 | save_html(posts_html, comments_html, location, html_file, None, False, username=html_file.split(".")[0])
99 |
--------------------------------------------------------------------------------
/html/style.css:
--------------------------------------------------------------------------------
1 | * {
2 | margin: 0;
3 | padding: 0;
4 | border: 0;
5 | outline: none;
6 | font-size: 100%;
7 | font: inherit;
8 | vertical-align: baseline;
9 | list-style: none;
10 | background-color: inherit;
11 | box-sizing: border-box;
12 | -webkit-appearance: none;
13 | -moz-appearance: none;
14 | }
15 |
16 | body {
17 | font-family: 'Open Sans', Verdana;
18 | }
19 |
20 | img, video {
21 | max-width: 200px;
22 | max-height: 200px;
23 | cursor: pointer;
24 | }
25 |
26 | a {
27 | color: #0079d3;
28 | text-decoration: none;
29 | }
30 |
31 | p, li {
32 | font-size: 14px;
33 | margin-bottom: 8px;
34 | }
35 |
36 | ul li {
37 | list-style: disc;
38 | }
39 |
40 | ol li {
41 | list-style-type: decimal;
42 | }
43 |
44 | li {
45 | margin-left: 24px;
46 | }
47 |
48 | em {
49 | font-style: italic;
50 | }
51 |
52 | strong {
53 | font-weight: bold;
54 | }
55 |
56 | code {
57 | font-family: monospace;
58 | white-space: pre;
59 | }
60 |
61 | blockquote {
62 | font-style: italic;
63 | padding-left: 8px;
64 | position: relative;
65 | }
66 |
67 | blockquote:before {
68 | position: absolute;
69 | left: 0;
70 | width: 4px;
71 | content: "";
72 | height: 100%;
73 | background-color: #16a085;
74 | }
75 |
76 | a:hover {
77 | text-decoration: underline;
78 | }
79 |
80 | h1 {
81 | font-weight: bold;
82 | font-size: 24px;
83 | padding: 8px 16px;
84 | }
85 |
86 | .links {
87 | padding: 12px 16px 0px;
88 | font-size: 12px;
89 | display: flex;
90 | gap: 8px;
91 | }
92 |
93 | .post, .comment {
94 | border-top: 1px solid #f0f0f0;
95 | padding: 12px 16px;
96 | }
97 |
98 | .post h2 {
99 | font-weight: bold;
100 | font-size: 18px;
101 | }
102 |
103 | .post h1, .comment h1 {
104 | padding: 0;
105 | margin-bottom: 8px;
106 | }
107 |
108 | .post .info {
109 | font-size: 12px;
110 | font-weight: 300;
111 | margin-bottom: 16px;
112 | }
113 |
114 | .post .info time {
115 | font-weight: 500;
116 | }
117 |
118 | .post .info time, .post .info span {
119 | padding-right: 4px;
120 | }
121 |
122 | .post .links a {
123 | padding-right: 4px;
124 | font-weight: 400;
125 | }
126 |
127 | .post .links a:after {
128 | content: "|";
129 | position: relative;
130 | right: -4px;
131 | }
132 |
133 | .post .links a:last-child:after {
134 | content: "";
135 | }
136 |
137 | .comment .info {
138 | display: flex;
139 | align-items: baseline;
140 | margin-bottom: 8px;
141 | }
142 |
143 | .comment .info time {
144 | padding-right: 12px;
145 | }
146 |
147 | .comment .info div {
148 | font-weight: 300;
149 | font-size: 12px;
150 | padding-right: 8px;
151 | position: relative;
152 | top: -1px;
153 | }
154 |
155 | .comments h2 {
156 | font-weight: bold;
157 | font-size: 20px;
158 | padding: 8px 16px;
159 | }
160 |
161 | .op {
162 | background: #0055df;
163 | color: white;
164 | font-weight: 700;
165 | padding: 0px 4px;
166 | border-radius: 4px;
167 | }
168 |
169 | button {
170 | border: 1px solid #1abc9c60;
171 | color: #1abc9c;
172 | border-radius: 5px;
173 | padding: 4px 8px;;
174 | font-size: 12px;
175 | cursor: pointer;
176 | position: absolute;
177 | right: 16px;
178 | font-weight: bold;
179 | top: 10px;
180 | background-color: #1abc9c20;
181 | }
182 |
183 | button:hover {
184 | background-color: #1abc9c40;
185 | }
186 |
187 | .preview.full {
188 | width: 100vw;
189 | height: 100vh;
190 | position: fixed;
191 | left: 0;
192 | top: 0;
193 | background-color: #00000080;
194 | display: flex;
195 | justify-content: center;
196 | z-index: 100;
197 | align-items: center;
198 | }
199 |
200 | .preview.full img, .preview.full video {
201 | max-width: 100vw;
202 | max-height: 100vh;
203 | width: 100%;
204 | height: 100%;
205 | object-fit: contain;
206 | }
--------------------------------------------------------------------------------
/utilities.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 | import praw
4 | import requests
5 | from redvid import Downloader
6 | import yt_dlp
7 | import re
8 | from datetime import datetime
9 |
10 | try:
11 | from logindata import REDDIT_USERNAME, REDDIT_PASSWORD
12 | from logindata import REDDIT_CLIENT_ID, REDDIT_SECRET
13 | except ImportError:
14 | REDDIT_USERNAME = os.getenv("REDDIT_USERNAME")
15 | REDDIT_PASSWORD = os.getenv("REDDIT_PASSWORD")
16 | REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
17 | REDDIT_SECRET = os.getenv("REDDIT_SECRET")
18 |
19 | IMAGE_EXTENSIONS = ["gif", "gifv", "jpg", "jpeg", "png"]
20 | VIDEO_EXTENSIONS = ["mp4"]
21 | PLATFORMS = ["redgifs.com", "gfycat.com", "imgur.com", "youtube.com"]
22 |
23 |
24 | def make_client():
25 | """Creates a PRAW client with the details in the secrets.py file."""
26 |
27 | print(REDDIT_USERNAME)
28 |
29 | return praw.Reddit(
30 | username=REDDIT_USERNAME,
31 | password=REDDIT_PASSWORD,
32 | client_id=REDDIT_CLIENT_ID,
33 | client_secret=REDDIT_SECRET,
34 | user_agent="reddit-save",
35 | )
36 |
37 |
38 | def get_previous(location, html_file):
39 | html_files = [f for f in os.listdir(location) if f.endswith(".html")]
40 | pattern = html_file.replace(".html", r"\.(\d+)?\.html")
41 | matches = [re.match(pattern, f) for f in html_files]
42 | matches = [m[0] for m in matches if m]
43 | matches.sort(key=lambda x: int(x.split(".")[1]))
44 | existing_ids = []
45 | existing_posts_html = []
46 | existing_comments_html = []
47 | if html_file in html_files: matches.append(html_file)
48 | for match in matches:
49 | with open(os.path.join(location, match), encoding="utf-8") as f:
50 | current_html = f.read()
51 | for id in re.findall(r'id="(.+?)"', current_html):
52 | if id not in existing_ids:
53 | existing_ids.append(id)
54 | posts = re.findall(
55 | r'(<\/div>)',
56 | current_html
57 | )
58 | comments = re.findall(
59 | r'(
Comments
12 | 13 |