├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_PyPI.md
├── images
    ├── demo.gif
    ├── imgur_app.png
    ├── logo.png
    └── reddit_app.png
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── src
    └── saveddit
        ├── __init__.py
        ├── _version.py
        ├── configuration.py
        ├── multireddit_downloader.py
        ├── multireddit_downloader_config.py
        ├── saveddit.py
        ├── search_config.py
        ├── search_subreddits.py
        ├── submission_downloader.py
        ├── subreddit_downloader.py
        ├── subreddit_downloader_config.py
        ├── user_downloader.py
        └── user_downloader_config.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .DS_Store
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # pytype static type analyzer
136 | .pytype/
137 | 
138 | # Cython debug symbols
139 | cython_debug/ 
140 | 
141 | # Configuration file
142 | **/user_config.yaml


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | Contributions are welcomed. Open a pull-request or an issue.
3 | 
4 | ## Code of conduct
5 | This project adheres to the [Open Code of Conduct][code-of-conduct]. By participating, you are expected to honor this code.
6 | 
7 | [code-of-conduct]: https://github.com/spotify/code-of-conduct/blob/master/code-of-conduct.md
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Pranav
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include images/*
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img height="50" src="images/logo.png"/>
  3 | </p>
  4 | 
  5 | <p align="center">
  6 |   <a href="https://badge.fury.io/py/saveddit">
  7 |     <img src="https://badge.fury.io/py/saveddit.svg" alt="PyPI version" height="18">
  8 |   </a>
  9 |   <a href="https://github.com/p-ranav/indicators/blob/master/LICENSE">
 10 |     <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="license"/>
 11 |   </a>
 12 | </p>
 13 | 
 14 | `saveddit` is a bulk media downloader for reddit
 15 | 
 16 | ```console
 17 | pip3 install saveddit
 18 | ```
 19 | 
 20 | ## Setting up authorization
 21 | 
 22 | * [Register an application with Reddit](https://ssl.reddit.com/prefs/apps/)
 23 |   - Write down your client ID and secret from the app
 24 |   - More about Reddit API access [here](https://ssl.reddit.com/wiki/api)
 25 |   - Wiki page about Reddit OAuth2 applications [here](https://github.com/reddit-archive/reddit/wiki/OAuth2)
 26 | 
 27 | <p align="left">
 28 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img height="300" src="images/reddit_app.png"/>
 29 | </p>
 30 | 
 31 | * [Register an application with Imgur](https://api.imgur.com/oauth2/addclient)
 32 |   - Write down the Imgur client ID from the app
 33 | 
 34 | <p align="left">
 35 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img height="600" src="images/imgur_app.png"/>
 36 | </p>
 37 | 
 38 | These registrations will authorize you to use the Reddit and Imgur APIs to download publicly available information.
 39 | 
 40 | ## User configuration
 41 | 
 42 | The first time you run `saveddit`, you will see something like this:
 43 | 
 44 | ```console
 45 | foo@bar:~$ saveddit
 46 | Retrieving configuration from ~/.saveddit/user_config.yaml file
 47 | No configuration file found.
 48 | Creating one. Would you like to edit it now?
 49 | > Choose Y for yes and N for no
 50 | ```
 51 | 
 52 | Once you choose 'yes', the program will request you to enter these credentials:
 53 | - Your imgur client ID
 54 | - Your reddit client ID
 55 | - Your reddit client secret
 56 | - Your reddit username
 57 | 
 58 | In case you choose 'no', the program will create a file which you can edit later, this is how to edit it:
 59 | 
 60 | * Open the generated `~/.saveddit/user_config.yaml`
 61 | * Update the client IDs and secrets from the previous step
 62 | * If you plan on using the `user` API, add your reddit username as well
 63 | 
 64 | ```yaml
 65 | imgur_client_id: '<YOUR_IMGUR_CLIENT_ID>'
 66 | reddit_client_id: '<YOUR_REDDIT_CLIENT_ID>'
 67 | reddit_client_secret: '<YOUR_REDDIT_CLIENT_SECRET>'
 68 | reddit_username: '<YOUR_REDDIT_USERNAME>'
 69 | ```
 70 | 
 71 | ## Download from Subreddit
 72 | 
 73 | ```console
 74 | foo@bar:~$ saveddit subreddit -h
 75 | Retrieving configuration from /Users/pranav/.saveddit/user_config.yaml file
 76 | 
 77 | usage: saveddit subreddit [-h] [-f categories [categories ...]] [-l post_limit] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
 78 | 
 79 | positional arguments:
 80 |   subreddits            Names of subreddits to download, e.g., AskReddit
 81 | 
 82 | optional arguments:
 83 |   -h, --help            show this help message and exit
 84 |   -f categories [categories ...]
 85 |                         Categories of posts to download (default: ['hot', 'new', 'rising', 'controversial', 'top', 'gilded'])
 86 |   -l post_limit         Limit the number of submissions downloaded in each category (default: None, i.e., all submissions)
 87 |   --skip-comments       When true, saveddit will not save comments to a comments.json file
 88 |   --skip-meta           When true, saveddit will not save meta to a submission.json file on submissions
 89 |   --skip-videos         When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
 90 |   --all-comments        When true, saveddit will download all the comments in a post instead of just downloading the top ones.)
 91 |   -o output_path        Directory where saveddit will save downloaded content
 92 | ```
 93 | 
 94 | ```console
 95 | foo@bar:~$ saveddit subreddit pics -f hot -l 5 -o ~/Desktop
 96 | ```
 97 | 
 98 | ```console
 99 | foo@bar:~$ tree -L 4 ~/Desktop/www.reddit.com
100 | /Users/pranav/Desktop/www.reddit.com
101 | └── r
102 |     └── pics
103 |         └── hot
104 |             ├── 000_Prince_Philip_Duke_of_Edinburgh_...
105 |             ├── 001_Day_10_of_Nobody_Noticing_the_Ap...
106 |             ├── 002_First_edited_picture
107 |             ├── 003_Reorganized_a_few_months_ago_and...
108 |             └── 004_Van_Gogh_inspired_rainy_street_I...
109 | ```
110 | 
111 | You can download from multiple subreddits and use multiple filters:
112 | 
113 | ```console
114 | foo@bar:~$ saveddit subreddit funny AskReddit -f hot top new rising -l 5 -o ~/Downloads/Reddit/.
115 | ```
116 | 
117 | The downloads from each subreddit to go to a separate folder like so:
118 | 
119 | ```console
120 | foo@bar:~$ tree -L 3 ~/Downloads/Reddit/www.reddit.com
121 | /Users/pranav/Downloads/Reddit/www.reddit.com
122 | └── r
123 |     ├── AskReddit
124 |     │   ├── hot
125 |     │   ├── new
126 |     │   ├── rising
127 |     │   └── top
128 |     └── funny
129 |         ├── hot
130 |         ├── new
131 |         ├── rising
132 |         └── top
133 | ```
134 | 
135 | ## Download from anonymous Multireddit
136 | 
137 | To download from an anonymous multireddit, use the `multireddit` option and pass a number of subreddit names
138 | 
139 | ```console
140 | foo@bar:~$ saveddit multireddit -h
141 | usage: saveddit multireddit [-h] [-f categories [categories ...]] [-l post_limit] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
142 | 
143 | positional arguments:
144 |   subreddits            Names of subreddits to download, e.g., aww, pics. The downloads will be stored in <OUTPUT_PATH>/www.reddit.com/m/aww+pics/.
145 | 
146 | optional arguments:
147 |   -h, --help            show this help message and exit
148 |   -f categories [categories ...]
149 |                         Categories of posts to download (default: ['hot', 'new', 'random_rising', 'rising', 'controversial', 'top', 'gilded'])
150 |   -l post_limit         Limit the number of submissions downloaded in each category (default: None, i.e., all submissions)
151 |   --skip-comments       When true, saveddit will not save comments to a comments.json file
152 |   --skip-meta           When true, saveddit will not save meta to a submission.json file on submissions
153 |   --skip-videos         When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
154 |   -o output_path        Directory where saveddit will save downloaded content
155 | ```
156 | 
157 | ```console
158 | foo@bar:~$ saveddit multireddit EarthPorn NaturePics -f hot -l 5 -o ~/Desktop
159 | ```
160 | 
161 | Anonymous multireddits are saved in `www.reddit.com/m/<Multireddit_names>/<category>/` like so:
162 | 
163 | ```console
164 | tree -L 4 ~/Desktop/www.reddit.com
165 | /Users/pranav/Desktop/www.reddit.com
166 | └── m
167 |     └── EarthPorn+NaturePics
168 |         └── hot
169 |             ├── 000_Banning_State_Park_Minnesota_OC_...
170 |             ├── 001_Misty_forest_in_the_mountains_of...
171 |             ├── 002_One_of_the_highlights_of_my_last...
172 |             ├── 003__OC_Japan_Kyoto_Garden_of_the_Go...
173 |             └── 004_Sunset_at_Mt_Rainier_National_Pa...
174 | ```
175 | 
176 | ## Download from User's page
177 | 
178 | ```console
179 | foo@bar:~$ saveddit user -h
180 | usage: saveddit user [-h] users [users ...] {saved,gilded,submitted,multireddits,upvoted,comments} ...
181 | 
182 | positional arguments:
183 |   users                 Names of users to download, e.g., Poem_for_your_sprog
184 |   {saved,gilded,submitted,multireddits,upvoted,comments}
185 | 
186 | optional arguments:
187 |   -h, --help            show this help message and exit
188 | ```
189 | 
190 | Here's a usage example for downloading all comments made by `Poem_for_your_sprog`
191 | 
192 | ```console
193 | foo@bar:~$ saveddit user "Poem_for_your_sprog" comments -s top -l 5 -o ~/Desktop
194 | ```
195 | 
196 | Here's another example for downloading `kemitche`'s multireddits:
197 | 
198 | ```console
199 | foo@bar:~$ saveddit user kemitche multireddits -n reddit -f hot -l 5 -o ~/Desktop
200 | ```
201 | 
202 | User-specific content is downloaded to `www.reddit.com/u/<Username>/...` like so:
203 | 
204 | ```console
205 | foo@bar:~$ tree ~/Desktop/www.reddit.com
206 | /Users/pranav/Desktop/www.reddit.com
207 | └── u
208 |     ├── Poem_for_your_sprog
209 |     │   ├── comments
210 |     │   │   └── top
211 |     │   │       ├── 000_Comment_my_name_is_Cow_and_wen_its_ni....json
212 |     │   │       ├── 001_Comment_It_stopped_at_six_and_life....json
213 |     │   │       ├── 002_Comment__Perhaps_I_could_listen_to_podca....json
214 |     │   │       ├── 003_Comment__I_don_t_have_regret_for_the_thi....json
215 |     │   │       └── 004_Comment__So_throw_off_the_chains_of_oppr....json
216 |     │   └── user.json
217 |     └── kemitche
218 |         ├── m
219 |         │   └── reddit
220 |         │       └── hot
221 |         │           ├── 000_When_posting_to_my_u_channel_NSF...
222 |         │           │   ├── comments.json
223 |         │           │   └── submission.json
224 |         │           ├── 001_How_to_remove_popular_near_you
225 |         │           │   ├── comments.json
226 |         │           │   └── submission.json
227 |         │           ├── 002__IOS_2021_13_0_Reddit_is_just_su...
228 |         │           │   ├── comments.json
229 |         │           │   └── submission.json
230 |         │           ├── 003_The_Approve_User_button_should_n...
231 |         │           │   ├── comments.json
232 |         │           │   └── submission.json
233 |         │           └── 004_non_moderators_unable_to_view_su...
234 |         │               ├── comments.json
235 |         │               └── submission.json
236 |         └── user.json
237 | ```
238 | 
239 | ## Search and Download
240 | 
241 | `saveddit` support searching subreddits and downloading search results
242 | 
243 | ```console
244 | foo@bar:~$ saveddit search -h
245 | usage: saveddit search [-h] -q query [-s sort] [-t time_filter] [--include-nsfw] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
246 | 
247 | positional arguments:
248 |   subreddits       Names of subreddits to search, e.g., all, aww, pics
249 | 
250 | optional arguments:
251 |   -h, --help       show this help message and exit
252 |   -q query         Search query string
253 |   -s sort          Sort to apply on search (default: relevance, choices: [relevance, hot, top, new, comments])
254 |   -t time_filter   Time filter to apply on search (default: all, choices: [all, day, hour, month, week, year])
255 |   --include-nsfw   When true, saveddit will include NSFW results in search
256 |   --skip-comments  When true, saveddit will not save comments to a comments.json file
257 |   --skip-meta      When true, saveddit will not save meta to a submission.json file on submissions
258 |   --skip-videos    When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
259 |   -o output_path   Directory where saveddit will save downloaded content
260 | ```
261 | 
262 | e.g.,
263 | 
264 | ```console
265 | foo@bar:~$ saveddit search soccer -q "Chelsea" -o ~/Desktop
266 | ```
267 | 
268 | The downloaded search results are stored in `www.reddit.com/q/<search_query>/<subreddits>/<sort>/.`
269 | 
270 | ```console
271 | foo@bar:~$ tree -L 4 ~/Desktop/www.reddit.com/q
272 | /Users/pranav/Desktop/www.reddit.com/q
273 | └── Chelsea
274 |     └── soccer
275 |         └── relevance
276 |             ├── 000__Official_Results_for_UEFA_Champ...
277 |             ├── 001_Porto_0_1_Chelsea_Mason_Mount_32...
278 |             ├── 002_Crystal_Palace_0_2_Chelsea_Chris...
279 |             ├── 003_Post_Match_Thread_Chelsea_2_5_We...
280 |             ├── 004_Match_Thread_Porto_vs_Chelsea_UE...
281 |             ├── 005_Crystal_Palace_1_4_Chelsea_Chris...
282 |             ├── 006_Porto_0_2_Chelsea_Ben_Chilwell_8...
283 |             ├── 007_Post_Match_Thread_Porto_0_2_Chel...
284 |             ├── 008_UCL_Quaterfinalists_are_Bayern_D...
285 |             ├── 009__MD_Mino_Raiola_and_Haaland_s_fa...
286 |             ├── 010_Chelsea_2_5_West_Brom_Callum_Rob...
287 |             ├── 011_Chelsea_1_2_West_Brom_Matheus_Pe...
288 |             ├── 012__Bild_Sport_via_Sport_Witness_Ch...
289 |             ├── 013_Match_Thread_Chelsea_vs_West_Bro...
290 |             ├── 014_Chelsea_1_3_West_Brom_Callum_Rob...
291 |             ├── 015_Match_Thread_Chelsea_vs_Atletico...
292 |             ├── 016_Stefan_Savi�\207_Atlético_Madrid_str...
293 |             ├── 017_Chelsea_1_0_West_Brom_Christian_...
294 |             └── 018_Alvaro_Morata_I_ve_never_had_dep...
295 | ```
296 | 
297 | ## Supported Links:
298 | 
299 | * Direct links to images or videos, e.g., `.png`, `.jpg`, `.mp4`, `.gif` etc.
300 | * Reddit galleries `reddit.com/gallery/...`
301 | * Reddit videos `v.redd.it/...`
302 | * Gfycat links `gfycat.com/...`
303 | * Redgif links `redgifs.com/...`
304 | * Imgur images `imgur.com/...`
305 | * Imgur albums `imgur.com/a/...` and `imgur.com/gallery/...`
306 | * Youtube links `youtube.com/...` and `yout.be/...`
307 | * These [sites](https://ytdl-org.github.io/youtube-dl/supportedsites.html) supported by `youtube-dl`
308 | * Self posts
309 | * For all other cases, `saveddit` will simply fetch the HTML of the URL
310 | 
311 | ## Contributing
312 | Contributions are welcome, have a look at the [CONTRIBUTING.md](CONTRIBUTING.md) document for more information.
313 | 
314 | ## License
315 | The project is available under the [MIT](https://opensource.org/licenses/MIT) license.
316 | 


--------------------------------------------------------------------------------
/README_PyPI.md:
--------------------------------------------------------------------------------
  1 | ![](https://raw.githubusercontent.com/p-ranav/saveddit/master/images/logo.png)
  2 | 
  3 | `saveddit` is a bulk media downloader for reddit
  4 | 
  5 | ```console
  6 | pip3 install saveddit
  7 | ```
  8 | 
  9 | ## Setting up authorization
 10 | 
 11 | * [Register an application with Reddit](https://ssl.reddit.com/prefs/apps/)
 12 |   - Write down your client ID and secret from the app
 13 |   - More about Reddit API access [here](https://ssl.reddit.com/wiki/api)
 14 |   - Wiki page about Reddit OAuth2 applications [here](https://github.com/reddit-archive/reddit/wiki/OAuth2)
 15 | 
 16 | ![](https://raw.githubusercontent.com/p-ranav/saveddit/master/images/reddit_app.png)
 17 | 
 18 | * [Register an application with Imgur](https://api.imgur.com/oauth2/addclient)
 19 |   - Write down the Imgur client ID from the app
 20 | 
 21 | ![](https://raw.githubusercontent.com/p-ranav/saveddit/master/images/imgur_app.png)
 22 | 
 23 | These registrations will authorize you to use the Reddit and Imgur APIs to download publicly available information.
 24 | 
 25 | ## User configuration
 26 | 
 27 | The first time you run `saveddit`, you will see something like this:
 28 | 
 29 | ```console
 30 | foo@bar:~$ saveddit
 31 | Retrieving configuration from ~/.saveddit/user_config.yaml file
 32 | No configuration file found.
 33 | Creating one. Would you like to edit it now?
 34 | > Choose Y for yes and N for no
 35 | ```
 36 | 
 37 | Once you choose 'yes', the program will request you to enter these credentials:
 38 | - Your imgur client ID
 39 | - Your reddit client ID
 40 | - Your reddit client secret
 41 | - Your reddit username
 42 | 
 43 | In case you choose 'no', the program will create a file which you can edit later, this is how to edit it:
 44 | 
 45 | * Open the generated `~/.saveddit/user_config.yaml`
 46 | * Update the client IDs and secrets from the previous step
 47 | * If you plan on using the `user` API, add your reddit username as well
 48 | 
 49 | ```yaml
 50 | imgur_client_id: '<YOUR_IMGUR_CLIENT_ID>'
 51 | reddit_client_id: '<YOUR_REDDIT_CLIENT_ID>'
 52 | reddit_client_secret: '<YOUR_REDDIT_CLIENT_SECRET>'
 53 | reddit_username: '<YOUR_REDDIT_USERNAME>'
 54 | ```
 55 | 
 56 | ## Download from Subreddit
 57 | 
 58 | ```console
 59 | foo@bar:~$ saveddit subreddit -h
 60 | Retrieving configuration from /Users/pranav/.saveddit/user_config.yaml file
 61 | 
 62 | usage: saveddit subreddit [-h] [-f categories [categories ...]] [-l post_limit] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
 63 | 
 64 | positional arguments:
 65 |   subreddits            Names of subreddits to download, e.g., AskReddit
 66 | 
 67 | optional arguments:
 68 |   -h, --help            show this help message and exit
 69 |   -f categories [categories ...]
 70 |                         Categories of posts to download (default: ['hot', 'new', 'rising', 'controversial', 'top', 'gilded'])
 71 |   -l post_limit         Limit the number of submissions downloaded in each category (default: None, i.e., all submissions)
 72 |   --skip-comments       When true, saveddit will not save comments to a comments.json file
 73 |   --skip-meta           When true, saveddit will not save meta to a submission.json file on submissions
 74 |   --skip-videos         When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
 75 |   --all-comments        When true, saveddit will download all the comments in a post instead of just downloading the top ones.)
 76 |   -o output_path        Directory where saveddit will save downloaded content
 77 | ```
 78 | 
 79 | ```console
 80 | foo@bar:~$ saveddit subreddit pics -f hot -l 5 -o ~/Desktop
 81 | ```
 82 | 
 83 | ```console
 84 | foo@bar:~$ tree -L 4 ~/Desktop/www.reddit.com
 85 | /Users/pranav/Desktop/www.reddit.com
 86 | └── r
 87 |     └── pics
 88 |         └── hot
 89 |             ├── 000_Prince_Philip_Duke_of_Edinburgh_...
 90 |             ├── 001_Day_10_of_Nobody_Noticing_the_Ap...
 91 |             ├── 002_First_edited_picture
 92 |             ├── 003_Reorganized_a_few_months_ago_and...
 93 |             └── 004_Van_Gogh_inspired_rainy_street_I...
 94 | ```
 95 | 
 96 | You can download from multiple subreddits and use multiple filters:
 97 | 
 98 | ```console
 99 | foo@bar:~$ saveddit subreddit funny AskReddit -f hot top new rising -l 5 -o ~/Downloads/Reddit/.
100 | ```
101 | 
102 | The downloads from each subreddit to go to a separate folder like so:
103 | 
104 | ```console
105 | foo@bar:~$ tree -L 3 ~/Downloads/Reddit/www.reddit.com
106 | /Users/pranav/Downloads/Reddit/www.reddit.com
107 | └── r
108 |     ├── AskReddit
109 |     │   ├── hot
110 |     │   ├── new
111 |     │   ├── rising
112 |     │   └── top
113 |     └── funny
114 |         ├── hot
115 |         ├── new
116 |         ├── rising
117 |         └── top
118 | ```
119 | 
120 | ## Download from anonymous Multireddit
121 | 
122 | To download from an anonymous multireddit, use the `multireddit` option and pass a number of subreddit names
123 | 
124 | ```console
125 | foo@bar:~$ saveddit multireddit -h
126 | usage: saveddit multireddit [-h] [-f categories [categories ...]] [-l post_limit] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
127 | 
128 | positional arguments:
129 |   subreddits            Names of subreddits to download, e.g., aww, pics. The downloads will be stored in <OUTPUT_PATH>/www.reddit.com/m/aww+pics/.
130 | 
131 | optional arguments:
132 |   -h, --help            show this help message and exit
133 |   -f categories [categories ...]
134 |                         Categories of posts to download (default: ['hot', 'new', 'random_rising', 'rising', 'controversial', 'top', 'gilded'])
135 |   -l post_limit         Limit the number of submissions downloaded in each category (default: None, i.e., all submissions)
136 |   --skip-comments       When true, saveddit will not save comments to a comments.json file
137 |   --skip-meta           When true, saveddit will not save meta to a submission.json file on submissions
138 |   --skip-videos         When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
139 |   -o output_path        Directory where saveddit will save downloaded content
140 | ```
141 | 
142 | ```console
143 | foo@bar:~$ saveddit multireddit EarthPorn NaturePics -f hot -l 5 -o ~/Desktop
144 | ```
145 | 
146 | Anonymous multireddits are saved in `www.reddit.com/m/<Multireddit_names>/<category>/` like so:
147 | 
148 | ```console
149 | tree -L 4 ~/Desktop/www.reddit.com
150 | /Users/pranav/Desktop/www.reddit.com
151 | └── m
152 |     └── EarthPorn+NaturePics
153 |         └── hot
154 |             ├── 000_Banning_State_Park_Minnesota_OC_...
155 |             ├── 001_Misty_forest_in_the_mountains_of...
156 |             ├── 002_One_of_the_highlights_of_my_last...
157 |             ├── 003__OC_Japan_Kyoto_Garden_of_the_Go...
158 |             └── 004_Sunset_at_Mt_Rainier_National_Pa...
159 | ```
160 | 
161 | ## Download from User's page
162 | 
163 | ```console
164 | foo@bar:~$ saveddit user -h
165 | usage: saveddit user [-h] users [users ...] {saved,gilded,submitted,multireddits,upvoted,comments} ...
166 | 
167 | positional arguments:
168 |   users                 Names of users to download, e.g., Poem_for_your_sprog
169 |   {saved,gilded,submitted,multireddits,upvoted,comments}
170 | 
171 | optional arguments:
172 |   -h, --help            show this help message and exit
173 | ```
174 | 
175 | Here's a usage example for downloading all comments made by `Poem_for_your_sprog`
176 | 
177 | ```console
178 | foo@bar:~$ saveddit user "Poem_for_your_sprog" comments -s top -l 5 -o ~/Desktop
179 | ```
180 | 
181 | Here's another example for downloading `kemitche`'s multireddits:
182 | 
183 | ```console
184 | foo@bar:~$ saveddit user kemitche multireddits -n reddit -f hot -l 5 -o ~/Desktop
185 | ```
186 | 
187 | User-specific content is downloaded to `www.reddit.com/u/<Username>/...` like so:
188 | 
189 | ```console
190 | foo@bar:~$ tree ~/Desktop/www.reddit.com
191 | /Users/pranav/Desktop/www.reddit.com
192 | └── u
193 |     ├── Poem_for_your_sprog
194 |     │   ├── comments
195 |     │   │   └── top
196 |     │   │       ├── 000_Comment_my_name_is_Cow_and_wen_its_ni....json
197 |     │   │       ├── 001_Comment_It_stopped_at_six_and_life....json
198 |     │   │       ├── 002_Comment__Perhaps_I_could_listen_to_podca....json
199 |     │   │       ├── 003_Comment__I_don_t_have_regret_for_the_thi....json
200 |     │   │       └── 004_Comment__So_throw_off_the_chains_of_oppr....json
201 |     │   └── user.json
202 |     └── kemitche
203 |         ├── m
204 |         │   └── reddit
205 |         │       └── hot
206 |         │           ├── 000_When_posting_to_my_u_channel_NSF...
207 |         │           │   ├── comments.json
208 |         │           │   └── submission.json
209 |         │           ├── 001_How_to_remove_popular_near_you
210 |         │           │   ├── comments.json
211 |         │           │   └── submission.json
212 |         │           ├── 002__IOS_2021_13_0_Reddit_is_just_su...
213 |         │           │   ├── comments.json
214 |         │           │   └── submission.json
215 |         │           ├── 003_The_Approve_User_button_should_n...
216 |         │           │   ├── comments.json
217 |         │           │   └── submission.json
218 |         │           └── 004_non_moderators_unable_to_view_su...
219 |         │               ├── comments.json
220 |         │               └── submission.json
221 |         └── user.json
222 | ```
223 | 
224 | ## Search and Download
225 | 
226 | `saveddit` support searching subreddits and downloading search results
227 | 
228 | ```console
229 | foo@bar:~$ saveddit search -h
230 | usage: saveddit search [-h] -q query [-s sort] [-t time_filter] [--include-nsfw] [--skip-comments] [--skip-meta] [--skip-videos] -o output_path subreddits [subreddits ...]
231 | 
232 | positional arguments:
233 |   subreddits       Names of subreddits to search, e.g., all, aww, pics
234 | 
235 | optional arguments:
236 |   -h, --help       show this help message and exit
237 |   -q query         Search query string
238 |   -s sort          Sort to apply on search (default: relevance, choices: [relevance, hot, top, new, comments])
239 |   -t time_filter   Time filter to apply on search (default: all, choices: [all, day, hour, month, week, year])
240 |   --include-nsfw   When true, saveddit will include NSFW results in search
241 |   --skip-comments  When true, saveddit will not save comments to a comments.json file
242 |   --skip-meta      When true, saveddit will not save meta to a submission.json file on submissions
243 |   --skip-videos    When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)
244 |   -o output_path   Directory where saveddit will save downloaded content
245 | ```
246 | 
247 | e.g.,
248 | 
249 | ```console
250 | foo@bar:~$ saveddit search soccer -q "Chelsea" -o ~/Desktop
251 | ```
252 | 
253 | The downloaded search results are stored in `www.reddit.com/q/<search_query>/<subreddits>/<sort>/.`
254 | 
255 | ```console
256 | foo@bar:~$ tree -L 4 ~/Desktop/www.reddit.com/q
257 | /Users/pranav/Desktop/www.reddit.com/q
258 | └── Chelsea
259 |     └── soccer
260 |         └── relevance
261 |             ├── 000__Official_Results_for_UEFA_Champ...
262 |             ├── 001_Porto_0_1_Chelsea_Mason_Mount_32...
263 |             ├── 002_Crystal_Palace_0_2_Chelsea_Chris...
264 |             ├── 003_Post_Match_Thread_Chelsea_2_5_We...
265 |             ├── 004_Match_Thread_Porto_vs_Chelsea_UE...
266 |             ├── 005_Crystal_Palace_1_4_Chelsea_Chris...
267 |             ├── 006_Porto_0_2_Chelsea_Ben_Chilwell_8...
268 |             ├── 007_Post_Match_Thread_Porto_0_2_Chel...
269 |             ├── 008_UCL_Quaterfinalists_are_Bayern_D...
270 |             ├── 009__MD_Mino_Raiola_and_Haaland_s_fa...
271 |             ├── 010_Chelsea_2_5_West_Brom_Callum_Rob...
272 |             ├── 011_Chelsea_1_2_West_Brom_Matheus_Pe...
273 |             ├── 012__Bild_Sport_via_Sport_Witness_Ch...
274 |             ├── 013_Match_Thread_Chelsea_vs_West_Bro...
275 |             ├── 014_Chelsea_1_3_West_Brom_Callum_Rob...
276 |             ├── 015_Match_Thread_Chelsea_vs_Atletico...
277 |             ├── 016_Stefan_Savi�\207_Atlético_Madrid_str...
278 |             ├── 017_Chelsea_1_0_West_Brom_Christian_...
279 |             └── 018_Alvaro_Morata_I_ve_never_had_dep...
280 | ```
281 | 
282 | ## Supported Links:
283 | 
284 | * Direct links to images or videos, e.g., `.png`, `.jpg`, `.mp4`, `.gif` etc.
285 | * Reddit galleries `reddit.com/gallery/...`
286 | * Reddit videos `v.redd.it/...`
287 | * Gfycat links `gfycat.com/...`
288 | * Redgif links `redgifs.com/...`
289 | * Imgur images `imgur.com/...`
290 | * Imgur albums `imgur.com/a/...` and `imgur.com/gallery/...`
291 | * Youtube links `youtube.com/...` and `yout.be/...`
292 | * These [sites](https://ytdl-org.github.io/youtube-dl/supportedsites.html) supported by `youtube-dl`
293 | * Self posts
294 | * For all other cases, `saveddit` will simply fetch the HTML of the URL
295 | 
296 | ## Contributing
297 | Contributions are welcome, have a look at the [CONTRIBUTING.md](CONTRIBUTING.md) document for more information.
298 | 
299 | ## License
300 | The project is available under the [MIT](https://opensource.org/licenses/MIT) license.
301 | 


--------------------------------------------------------------------------------
/images/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-ranav/saveddit/f4aa0749eec1020bb9927c6dd7fd5059a6d989af/images/demo.gif


--------------------------------------------------------------------------------
/images/imgur_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-ranav/saveddit/f4aa0749eec1020bb9927c6dd7fd5059a6d989af/images/imgur_app.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-ranav/saveddit/f4aa0749eec1020bb9927c6dd7fd5059a6d989af/images/logo.png


--------------------------------------------------------------------------------
/images/reddit_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-ranav/saveddit/f4aa0749eec1020bb9927c6dd7fd5059a6d989af/images/reddit_app.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | colorama==0.4.4
 2 | coloredlogs==15.0
 3 | verboselogs==1.7
 4 | praw==7.2.0
 5 | tqdm==4.60.0
 6 | ffmpeg_python==0.2.0
 7 | youtube_dl==2021.4.7
 8 | requests==2.25.1
 9 | beautifulsoup4==4.9.3
10 | PyYAML==5.4.1
11 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | # replace with your username:
 3 | name = saveddit
 4 | version = 2.2.1
 5 | author = Pranav Srinivas Kumar
 6 | author_email = pranav.srinivas.kumar@gmail.com
 7 | description = Bulk Downloader for Reddit
 8 | long_description = file: README_PyPI.md
 9 | long_description_content_type = text/markdown
10 | url = https://github.com/p-ranav/saveddit
11 | project_urls =
12 |     Bug Tracker = https://github.com/p-ranav/saveddit/issues
13 | classifiers =
14 |     Programming Language :: Python :: 3
15 |     License :: OSI Approved :: MIT License
16 |     Operating System :: OS Independent
17 | 
18 | [options]
19 | package_dir =
20 |     = src
21 | packages = find:
22 | python_requires = >=3.8
23 | install_requires =
24 |     praw
25 |     verboselogs
26 |     requests
27 |     colorama
28 |     coloredlogs
29 |     youtube_dl
30 |     tqdm
31 |     ffmpeg_python
32 |     beautifulsoup4
33 |     PyYAML
34 | 
35 | [options.packages.find]
36 | where = src
37 | 
38 | [options.entry_points]
39 | console_scripts =
40 |     saveddit = saveddit.saveddit:main


--------------------------------------------------------------------------------
/src/saveddit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-ranav/saveddit/f4aa0749eec1020bb9927c6dd7fd5059a6d989af/src/saveddit/__init__.py


--------------------------------------------------------------------------------
/src/saveddit/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "2.2.1"


--------------------------------------------------------------------------------
/src/saveddit/configuration.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Union
 3 | import yaml
 4 | import pathlib
 5 | import colorama
 6 | import sys
 7 | 
 8 | 
 9 | class ConfigurationLoader:
10 |     PURPLE = colorama.Fore.MAGENTA
11 |     WHITE = colorama.Style.RESET_ALL
12 |     RED = colorama.Fore.RED
13 | 
14 |     @staticmethod
15 |     def load(path):
16 |         """
17 |         Loads Saveddit configuration from a configuration file.
18 |         If ifle is not found, create one and exit.
19 | 
20 |         Arguments:
21 |             path: path to user_config.yaml file
22 | 
23 |         Returns:
24 |             A Python dictionary with Saveddit configuration info
25 |         """
26 | 
27 |         def _create_config(_path):
28 |             _STD_CONFIG = {
29 |                 "reddit_client_id": "",
30 |                 "reddit_client_secret": "",
31 |                 "reddit_username": "",
32 |                 "imgur_client_id": "",
33 |             }
34 |             with open(_path, "x") as _f:
35 |                 yaml.dump(_STD_CONFIG, _f)
36 |             sys.exit(0)
37 | 
38 |         # Explicitly converting path to POSIX-like path (to avoid '\\' hell)
39 |         print(
40 |             "{notice}Retrieving configuration from {path} file{white}".format(
41 |                 path=path,
42 |                 notice=ConfigurationLoader.PURPLE,
43 |                 white=ConfigurationLoader.WHITE,
44 |             )
45 |         )
46 |         path = pathlib.Path(path).absolute().as_posix()
47 | 
48 |         # Check if file exists. If not, create one and fill it with std config template
49 |         if not os.path.exists(path):
50 |             print(
51 |                 "{red}No configuration file found.\nCreating one. Would you like to edit it now?\n > Choose {purple}Y{red} for yes and {purple}N{red} for no.{white}".format(
52 |                     red=ConfigurationLoader.RED,
53 |                     path=path,
54 |                     white=ConfigurationLoader.WHITE,
55 |                     purple=ConfigurationLoader.PURPLE,
56 |                 )
57 |             )
58 |             getchoice = str(input("> "))
59 |             if getchoice == "Y":
60 |                reddit_client = str(input("Reddit Client ID: "))
61 |                reddit_client_sec = str(input("Reddit Client Secret: "))
62 |                reddit_user = str(input("Reddit Username: "))
63 |                imgur_client = str(input("Imgur Client ID: "))
64 |                STD_CONFIG = {
65 |                    "reddit_client_id": "{}".format(reddit_client),
66 |                    "reddit_client_secret": "{}".format(reddit_client_sec),
67 |                    "reddit_username": "{}".format(reddit_user),
68 |                    "imgur_client_id": "{}".format(imgur_client),
69 |                }
70 |                with open(path, "x") as f:
71 |                    yaml.dump(STD_CONFIG, f)
72 |                sys.exit(0)
73 |             elif getchoice == "N":
74 |                print(
75 |                 "{red}Alright.\nPlease edit {path} with valid credentials.\nExiting{white}".format(
76 |                     red=ConfigurationLoader.RED,
77 |                     path=path,
78 |                     white=ConfigurationLoader.WHITE,
79 |                 )
80 |             )
81 |                _create_config(path)
82 |             else:
83 |                 print("Invalid choice.")
84 |                 exit()
85 | 
86 |         with open(path, "r") as _f:
87 |             return yaml.safe_load(_f.read())
88 | 


--------------------------------------------------------------------------------
/src/saveddit/multireddit_downloader.py:
--------------------------------------------------------------------------------
 1 | import coloredlogs
 2 | from colorama import Fore, Style
 3 | from datetime import datetime, timezone
 4 | import logging
 5 | import verboselogs
 6 | import getpass
 7 | import json
 8 | import os
 9 | import praw
10 | from pprint import pprint
11 | import re
12 | from saveddit.submission_downloader import SubmissionDownloader
13 | from saveddit.subreddit_downloader import SubredditDownloader
14 | from saveddit.multireddit_downloader_config import MultiredditDownloaderConfig
15 | import sys
16 | from tqdm import tqdm
17 | 
18 | class MultiredditDownloader:
19 |     config = SubredditDownloader.config
20 |     REDDIT_CLIENT_ID = config['reddit_client_id']
21 |     REDDIT_CLIENT_SECRET = config['reddit_client_secret']
22 |     IMGUR_CLIENT_ID = config['imgur_client_id']
23 | 
24 |     def __init__(self, multireddit_names):
25 |         self.logger = verboselogs.VerboseLogger(__name__)
26 |         level_styles = {
27 |             'critical': {'bold': True, 'color': 'red'},
28 |             'debug': {'color': 'green'},
29 |             'error': {'color': 'red'},
30 |             'info': {'color': 'white'},
31 |             'notice': {'color': 'magenta'},
32 |             'spam': {'color': 'white', 'faint': True},
33 |             'success': {'bold': True, 'color': 'green'},
34 |             'verbose': {'color': 'blue'},
35 |             'warning': {'color': 'yellow'}
36 |         }
37 |         coloredlogs.install(level='SPAM', logger=self.logger,
38 |                             fmt='%(message)s', level_styles=level_styles)
39 | 
40 |         self.reddit = praw.Reddit(
41 |             client_id=MultiredditDownloader.REDDIT_CLIENT_ID,
42 |             client_secret=MultiredditDownloader.REDDIT_CLIENT_SECRET,
43 |             user_agent="saveddit (by /u/p_ranav)"
44 |         )
45 | 
46 |         self.multireddit_name = "+".join(multireddit_names)
47 |         self.multireddit = self.reddit.subreddit(self.multireddit_name)
48 | 
49 |     def download(self, output_path, categories=MultiredditDownloaderConfig.DEFAULT_CATEGORIES, post_limit=MultiredditDownloaderConfig.DEFAULT_POST_LIMIT, skip_videos=False, skip_meta=False, skip_comments=False, comment_limit=0):
50 |         '''
51 |         categories: List of categories within the multireddit to download (see MultiredditDownloaderConfig.DEFAULT_CATEGORIES)
52 |         post_limit: Number of posts to download (default: None, i.e., all posts)
53 |         comment_limit: Number of comment levels to download from submission (default: `0`, i.e., only top-level comments)
54 |           - to get all comments, set comment_limit to `None`
55 |         '''
56 | 
57 |         multireddit_dir_name = self.multireddit_name
58 |         if len(multireddit_dir_name) > 64:
59 |             multireddit_dir_name = multireddit_dir_name[0:63]
60 |             multireddit_dir_name += "..."
61 | 
62 |         root_dir = os.path.join(os.path.join(os.path.join(
63 |             output_path, "www.reddit.com"), "m"), multireddit_dir_name)
64 |         categories = categories
65 | 
66 |         for c in categories:
67 |             self.logger.notice("Downloading from /m/" +
68 |                                self.multireddit_name + "/" + c + "/")
69 |             category_dir = os.path.join(root_dir, c)
70 |             if not os.path.exists(category_dir):
71 |                 os.makedirs(category_dir)
72 |             category_function = getattr(self.multireddit, c)
73 | 
74 |             for i, submission in enumerate(category_function(limit=post_limit)):
75 |                 SubmissionDownloader(submission, i, self.logger, category_dir,
76 |                     skip_videos, skip_meta, skip_comments, comment_limit,
77 |                     {'imgur_client_id': MultiredditDownloader.IMGUR_CLIENT_ID})
78 | 


--------------------------------------------------------------------------------
/src/saveddit/multireddit_downloader_config.py:
--------------------------------------------------------------------------------
1 | class MultiredditDownloaderConfig:
2 |     DEFAULT_CATEGORIES = ["hot", "new", "random_rising", "rising",
3 |                           "controversial", "top", "gilded"]
4 |     DEFAULT_POST_LIMIT = None


--------------------------------------------------------------------------------
/src/saveddit/saveddit.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | from saveddit.multireddit_downloader_config import MultiredditDownloaderConfig
  4 | from saveddit.search_config import SearchConfig
  5 | from saveddit.subreddit_downloader_config import SubredditDownloaderConfig
  6 | from saveddit.user_downloader_config import UserDownloaderConfig
  7 | from saveddit._version import __version__
  8 | 
  9 | 
 10 | def asciiart():
 11 |     return r'''                                .___  .___.__  __
 12 |    ___________ ___  __ ____   __| _/__| _/|__|/  |_
 13 |   /  ___/\__  \\  \/ // __ \ / __ |/ __ | |  \   __\
 14 |   \___ \  / __ \\   /\  ___// /_/ / /_/ | |  ||  |
 15 |  /____  >(____  /\_/  \___  >____ \____ | |__||__|
 16 |       \/      \/          \/     \/    \/
 17 | 
 18 |  Downloader for Reddit
 19 |  version : ''' + __version__ + '''
 20 |  URL     : https://github.com/p-ranav/saveddit
 21 | '''
 22 | 
 23 | 
 24 | def check_positive(value):
 25 |     ivalue = int(value)
 26 |     if ivalue <= 0:
 27 |         raise argparse.ArgumentTypeError(
 28 |             "%s is an invalid positive int value" % value)
 29 |     return ivalue
 30 | 
 31 | class UniqueAppendAction(argparse.Action):
 32 |     '''
 33 |     Class used to discard duplicates in list arguments
 34 |     https://stackoverflow.com/questions/9376670/python-argparse-force-a-list-item-to-be-unique
 35 |     '''
 36 |     def __call__(self, parser, namespace, values, option_string=None):
 37 |         unique_values = set(values)
 38 |         setattr(namespace, self.dest, unique_values)
 39 | 
 40 | def main():
 41 |     argv = sys.argv[1:]
 42 | 
 43 |     parser = argparse.ArgumentParser(prog="saveddit")
 44 |     parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
 45 | 
 46 |     subparsers = parser.add_subparsers(dest="subparser_name")
 47 | 
 48 |     subreddit_parser = subparsers.add_parser('subreddit')
 49 |     subreddit_parser.add_argument('subreddits',
 50 |                         metavar='subreddits',
 51 |                         nargs='+',
 52 |                         action=UniqueAppendAction,
 53 |                         help='Names of subreddits to download, e.g., AskReddit')
 54 |     subreddit_parser.add_argument('-f',
 55 |                         metavar='categories',
 56 |                         default=SubredditDownloaderConfig.DEFAULT_CATEGORIES,
 57 |                         nargs='+',
 58 |                         action=UniqueAppendAction,
 59 |                         help='Categories of posts to download (default: %(default)s)')
 60 |     subreddit_parser.add_argument('-l',
 61 |                         default=SubredditDownloaderConfig.DEFAULT_POST_LIMIT,
 62 |                         metavar='post_limit',
 63 |                         type=check_positive,
 64 |                         help='Limit the number of submissions downloaded in each category (default: %(default)s, i.e., all submissions)')
 65 |     subreddit_parser.add_argument('--skip-comments',
 66 |                         default=False,
 67 |                         action='store_true',
 68 |                         help='When true, saveddit will not save comments to a comments.json file')
 69 |     subreddit_parser.add_argument('--skip-meta',
 70 |                         default=False,
 71 |                         action='store_true',
 72 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
 73 |     subreddit_parser.add_argument('--skip-videos',
 74 |                         default=False,
 75 |                         action='store_true',
 76 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
 77 |     subreddit_parser.add_argument('--all-comments',
 78 |                         default=False,
 79 |                         action='store_true',
 80 |                         help='When true, saveddit will download all the comments in a post instead of just the top ones.')
 81 |     subreddit_parser.add_argument('-o',
 82 |                         required=True,
 83 |                         type=str,
 84 |                         metavar='output_path',
 85 |                         help='Directory where saveddit will save downloaded content'
 86 |                         )
 87 | 
 88 |     multireddit_parser = subparsers.add_parser('multireddit')
 89 |     multireddit_parser.add_argument('subreddits',
 90 |                         metavar='subreddits',
 91 |                         nargs='+',
 92 |                         action=UniqueAppendAction,
 93 |                         help='Names of subreddits to download, e.g., aww, pics. The downloads will be stored in <OUTPUT_PATH>/www.reddit.com/m/aww+pics/.')
 94 |     multireddit_parser.add_argument('-f',
 95 |                         metavar='categories',
 96 |                         default=MultiredditDownloaderConfig.DEFAULT_CATEGORIES,
 97 |                         nargs='+',
 98 |                         action=UniqueAppendAction,
 99 |                         help='Categories of posts to download (default: %(default)s)')
100 |     multireddit_parser.add_argument('-l',
101 |                         default=MultiredditDownloaderConfig.DEFAULT_POST_LIMIT,
102 |                         metavar='post_limit',
103 |                         type=check_positive,
104 |                         help='Limit the number of submissions downloaded in each category (default: %(default)s, i.e., all submissions)')
105 |     multireddit_parser.add_argument('--skip-comments',
106 |                         default=False,
107 |                         action='store_true',
108 |                         help='When true, saveddit will not save comments to a comments.json file')
109 |     multireddit_parser.add_argument('--skip-meta',
110 |                         default=False,
111 |                         action='store_true',
112 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
113 |     multireddit_parser.add_argument('--skip-videos',
114 |                         default=False,
115 |                         action='store_true',
116 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
117 |     multireddit_parser.add_argument('-o',
118 |                         required=True,
119 |                         type=str,
120 |                         metavar='output_path',
121 |                         help='Directory where saveddit will save downloaded content'
122 |                         )
123 | 
124 |     search_parser = subparsers.add_parser('search')
125 |     search_parser.add_argument('subreddits',
126 |                         metavar='subreddits',
127 |                         nargs='+',
128 |                         action=UniqueAppendAction,
129 |                         help='Names of subreddits to search, e.g., all, aww, pics')
130 |     search_parser.add_argument('-q',
131 |                         metavar='query',
132 |                         required=True,
133 |                         help='Search query string')
134 |     search_parser.add_argument('-s',
135 |                         metavar='sort',
136 |                         default=SearchConfig.DEFAULT_SORT,
137 |                         choices=SearchConfig.DEFAULT_SORT_CATEGORIES,
138 |                         help='Sort to apply on search (default: %(default)s, choices: [%(choices)s])')
139 |     search_parser.add_argument('-t',
140 |                         metavar='time_filter',
141 |                         default=SearchConfig.DEFAULT_TIME_FILTER,
142 |                         choices=SearchConfig.DEFAULT_TIME_FILTER_CATEGORIES,
143 |                         help='Time filter to apply on search (default: %(default)s, choices: [%(choices)s])')
144 |     search_parser.add_argument('--include-nsfw',
145 |                         default=False,
146 |                         action='store_true',
147 |                         help='When true, saveddit will include NSFW results in search')
148 |     search_parser.add_argument('--skip-comments',
149 |                         default=False,
150 |                         action='store_true',
151 |                         help='When true, saveddit will not save comments to a comments.json file')
152 |     search_parser.add_argument('--skip-meta',
153 |                         default=False,
154 |                         action='store_true',
155 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
156 |     search_parser.add_argument('--skip-videos',
157 |                         default=False,
158 |                         action='store_true',
159 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
160 |     search_parser.add_argument('-o',
161 |                         required=True,
162 |                         type=str,
163 |                         metavar='output_path',
164 |                         help='Directory where saveddit will save downloaded content'
165 |                         )
166 | 
167 |     user_parser = subparsers.add_parser('user')
168 |     user_parser.add_argument('users',
169 |                         metavar='users',
170 |                         nargs='+',
171 |                         help='Names of users to download, e.g., Poem_for_your_sprog')
172 | 
173 | 
174 |     user_subparsers = user_parser.add_subparsers(dest="user_subparser_name")
175 |     user_subparsers.required = True
176 | 
177 |     # user.saved subparser
178 |     saved_parser = user_subparsers.add_parser('saved')
179 |     saved_parser.add_argument('--skip-meta',
180 |                         default=False,
181 |                         action='store_true',
182 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
183 |     saved_parser.add_argument('--skip-comments',
184 |                         default=False,
185 |                         action='store_true',
186 |                         help='When true, saveddit will not save comments to a comments.json file')
187 |     saved_parser.add_argument('--skip-videos',
188 |                         default=False,
189 |                         action='store_true',
190 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
191 |     saved_parser.add_argument('-l',
192 |                         default=UserDownloaderConfig.DEFAULT_POST_LIMIT,
193 |                         metavar='post_limit',
194 |                         type=check_positive,
195 |                         help='Limit the number of saved submissions downloaded (default: %(default)s, i.e., all submissions)')
196 |     saved_parser.add_argument('-o',
197 |                         required=True,
198 |                         type=str,
199 |                         metavar='output_path',
200 |                         help='Directory where saveddit will save downloaded content'
201 |                         )
202 | 
203 |     # user.gilded subparser
204 |     gilded_parser = user_subparsers.add_parser('gilded')
205 |     gilded_parser.add_argument('--skip-meta',
206 |                         default=False,
207 |                         action='store_true',
208 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
209 |     gilded_parser.add_argument('--skip-comments',
210 |                         default=False,
211 |                         action='store_true',
212 |                         help='When true, saveddit will not save comments to a comments.json file')
213 |     gilded_parser.add_argument('--skip-videos',
214 |                         default=False,
215 |                         action='store_true',
216 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
217 |     gilded_parser.add_argument('-l',
218 |                         default=UserDownloaderConfig.DEFAULT_POST_LIMIT,
219 |                         metavar='post_limit',
220 |                         type=check_positive,
221 |                         help='Limit the number of saved submissions downloaded (default: %(default)s, i.e., all submissions)')
222 |     gilded_parser.add_argument('-o',
223 |                         required=True,
224 |                         type=str,
225 |                         metavar='output_path',
226 |                         help='Directory where saveddit will save downloaded content'
227 |                         )
228 | 
229 |     # user.submitted subparser
230 |     submitted_parser = user_subparsers.add_parser('submitted')
231 |     submitted_parser.add_argument('-s',
232 |                         metavar='sort',
233 |                         default=UserDownloaderConfig.DEFAULT_SORT,
234 |                         choices=UserDownloaderConfig.DEFAULT_SORT_OPTIONS,
235 |                         help='Download submissions sorted by this <sort> option (default: %(default)s, choices: [%(choices)s])')
236 |     submitted_parser.add_argument('--skip-comments',
237 |                         default=False,
238 |                         action='store_true',
239 |                         help='When true, saveddit will not save comments to a comments.json file for the submissions')
240 |     submitted_parser.add_argument('--skip-meta',
241 |                         default=False,
242 |                         action='store_true',
243 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
244 |     submitted_parser.add_argument('--skip-videos',
245 |                         default=False,
246 |                         action='store_true',
247 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
248 |     submitted_parser.add_argument('-l',
249 |                         default=UserDownloaderConfig.DEFAULT_POST_LIMIT,
250 |                         metavar='post_limit',
251 |                         type=check_positive,
252 |                         help='Limit the number of submissions downloaded (default: %(default)s, i.e., all submissions)')
253 |     submitted_parser.add_argument('-o',
254 |                         required=True,
255 |                         type=str,
256 |                         metavar='output_path',
257 |                         help='Directory where saveddit will save downloaded posts'
258 |                         )
259 | 
260 |     # user.multireddits subparser
261 |     submitted_parser = user_subparsers.add_parser('multireddits')
262 |     submitted_parser.add_argument('-n',
263 |                         metavar='names',
264 |                         default=None,
265 |                         nargs='+',
266 |                         action=UniqueAppendAction,
267 |                         help='Names of specific multireddits to download (default: %(default)s, i.e., all multireddits for this user)')
268 |     submitted_parser.add_argument('-f',
269 |                         metavar='categories',
270 |                         default=UserDownloaderConfig.DEFAULT_CATEGORIES,
271 |                         nargs='+',
272 |                         action=UniqueAppendAction,
273 |                         help='Categories of posts to download (default: %(default)s)')
274 |     submitted_parser.add_argument('--skip-comments',
275 |                         default=False,
276 |                         action='store_true',
277 |                         help='When true, saveddit will not save comments to a comments.json file for the submissions')
278 |     submitted_parser.add_argument('--skip-meta',
279 |                         default=False,
280 |                         action='store_true',
281 |                         help='When true, saveddit will not save meta to a submission.json file on submissions')
282 |     submitted_parser.add_argument('--skip-videos',
283 |                         default=False,
284 |                         action='store_true',
285 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
286 |     submitted_parser.add_argument('-l',
287 |                         default=UserDownloaderConfig.DEFAULT_POST_LIMIT,
288 |                         metavar='post_limit',
289 |                         type=check_positive,
290 |                         help='Limit the number of submissions downloaded (default: %(default)s, i.e., all submissions)')
291 |     submitted_parser.add_argument('-o',
292 |                         required=True,
293 |                         type=str,
294 |                         metavar='output_path',
295 |                         help='Directory where saveddit will save downloaded posts'
296 |                         )
297 | 
298 |     # user.upvoted subparser
299 |     upvoted_parser = user_subparsers.add_parser('upvoted')
300 |     upvoted_parser.add_argument('--skip-comments',
301 |                         default=False,
302 |                         action='store_true',
303 |                         help='When true, saveddit will not save comments to a comments.json file for the upvoted submissions')
304 |     upvoted_parser.add_argument('--skip-meta',
305 |                         default=False,
306 |                         action='store_true',
307 |                         help='When true, saveddit will not save meta to a submission.json file on upvoted submissions')
308 |     upvoted_parser.add_argument('--skip-videos',
309 |                         default=False,
310 |                         action='store_true',
311 |                         help='When true, saveddit will not download videos (e.g., gfycat, redgifs, youtube, v.redd.it links)')
312 |     upvoted_parser.add_argument('-l',
313 |                         default=UserDownloaderConfig.DEFAULT_POST_LIMIT,
314 |                         metavar='post_limit',
315 |                         type=check_positive,
316 |                         help='Limit the number of submissions downloaded (default: %(default)s, i.e., all submissions)')
317 |     upvoted_parser.add_argument('-o',
318 |                         required=True,
319 |                         type=str,
320 |                         metavar='output_path',
321 |                         help='Directory where saveddit will save downloaded posts'
322 |                         )
323 | 
324 |     # user.comments subparser
325 |     comments_parser = user_subparsers.add_parser('comments')
326 |     comments_parser.add_argument('-s',
327 |                         metavar='sort',
328 |                         default=UserDownloaderConfig.DEFAULT_SORT,
329 |                         choices=UserDownloaderConfig.DEFAULT_SORT_OPTIONS,
330 |                         help='Download comments sorted by this <sort> option (default: %(default)s, choices: [%(choices)s])')
331 |     comments_parser.add_argument('-l',
332 |                         default=UserDownloaderConfig.DEFAULT_COMMENT_LIMIT,
333 |                         metavar='post_limit',
334 |                         type=check_positive,
335 |                         help='Limit the number of comments downloaded (default: %(default)s, i.e., all comments)')
336 |     comments_parser.add_argument('-o',
337 |                         required=True,
338 |                         type=str,
339 |                         metavar='output_path',
340 |                         help='Directory where saveddit will save downloaded comments'
341 |                         )
342 | 
343 |     args = parser.parse_args(argv)
344 |     print(asciiart())
345 | 
346 |     if args.subparser_name == "subreddit":
347 |         from saveddit.subreddit_downloader import SubredditDownloader
348 |         for subreddit in args.subreddits:
349 |             downloader = SubredditDownloader(subreddit)
350 |             downloader.download(args.o,
351 |                                 download_all_comments=args.all_comments, categories=args.f, post_limit=args.l, skip_videos=args.skip_videos, skip_meta=args.skip_meta, skip_comments=args.skip_comments)
352 |     elif args.subparser_name == "multireddit":
353 |         from saveddit.multireddit_downloader import MultiredditDownloader
354 |         downloader = MultiredditDownloader(args.subreddits)
355 |         downloader.download(args.o,
356 |                             categories=args.f, post_limit=args.l, skip_videos=args.skip_videos, skip_meta=args.skip_meta, skip_comments=args.skip_comments)
357 |     elif args.subparser_name == "search":
358 |         from saveddit.search_subreddits import SearchSubreddits
359 |         downloader = SearchSubreddits(args.subreddits)
360 |         downloader.download(args)
361 |     elif args.subparser_name == "user":
362 |         from saveddit.user_downloader import UserDownloader
363 |         downloader = UserDownloader()
364 |         downloader.download_user_meta(args)
365 |         if args.user_subparser_name == "comments":
366 |             downloader.download_comments(args)
367 |         elif args.user_subparser_name == "multireddits":
368 |             downloader.download_multireddits(args)
369 |         elif args.user_subparser_name == "submitted":
370 |             downloader.download_submitted(args)
371 |         elif args.user_subparser_name == "saved":
372 |             downloader.download_saved(args)
373 |         elif args.user_subparser_name == "upvoted":
374 |             downloader.download_upvoted(args)
375 |         elif args.user_subparser_name == "gilded":
376 |             downloader.download_gilded(args)
377 |     else:
378 |         parser.print_help()
379 | 
380 | if __name__ == "__main__":
381 |     main()
382 | 


--------------------------------------------------------------------------------
/src/saveddit/search_config.py:
--------------------------------------------------------------------------------
1 | class SearchConfig:
2 |     DEFAULT_SORT = "relevance"
3 |     DEFAULT_SORT_CATEGORIES = ["relevance", "hot", "top", "new", "comments"]
4 |     DEFAULT_SYNTAX = "lucene"
5 |     DEFAULT_SYNTAX_CATEGORIES = ["cloud search", "lucene", "plain"]
6 |     DEFAULT_TIME_FILTER = "all"
7 |     DEFAULT_TIME_FILTER_CATEGORIES = ["all", "day", "hour", "month", "week", "year"]


--------------------------------------------------------------------------------
/src/saveddit/search_subreddits.py:
--------------------------------------------------------------------------------
  1 | import coloredlogs
  2 | from colorama import Fore, Style
  3 | from datetime import datetime, timezone
  4 | import logging
  5 | import verboselogs
  6 | import getpass
  7 | import json
  8 | import os
  9 | import praw
 10 | from pprint import pprint
 11 | import re
 12 | from saveddit.submission_downloader import SubmissionDownloader
 13 | from saveddit.subreddit_downloader import SubredditDownloader
 14 | from saveddit.search_config import SearchConfig
 15 | import sys
 16 | from tqdm import tqdm
 17 | 
 18 | class SearchSubreddits:
 19 |     config = SubredditDownloader.config
 20 |     REDDIT_CLIENT_ID = config['reddit_client_id']
 21 |     REDDIT_CLIENT_SECRET = config['reddit_client_secret']
 22 |     IMGUR_CLIENT_ID = config['imgur_client_id']
 23 | 
 24 |     REDDIT_USERNAME = None
 25 |     try:
 26 |         REDDIT_USERNAME = config['reddit_username']
 27 |     except Exception as e:
 28 |         pass
 29 | 
 30 |     REDDIT_PASSWORD = None
 31 |     if REDDIT_USERNAME:
 32 |         if sys.stdin.isatty():
 33 |             print("Username: " + REDDIT_USERNAME)
 34 |             REDDIT_PASSWORD = getpass.getpass("Password: ")
 35 |         else:
 36 |             # echo "foobar" > password
 37 |             # saveddit user .... < password
 38 |             REDDIT_PASSWORD = sys.stdin.readline().rstrip()
 39 | 
 40 |     def __init__(self, subreddit_names):
 41 |         self.logger = verboselogs.VerboseLogger(__name__)
 42 |         level_styles = {
 43 |             'critical': {'bold': True, 'color': 'red'},
 44 |             'debug': {'color': 'green'},
 45 |             'error': {'color': 'red'},
 46 |             'info': {'color': 'white'},
 47 |             'notice': {'color': 'magenta'},
 48 |             'spam': {'color': 'white', 'faint': True},
 49 |             'success': {'bold': True, 'color': 'green'},
 50 |             'verbose': {'color': 'blue'},
 51 |             'warning': {'color': 'yellow'}
 52 |         }
 53 |         coloredlogs.install(level='SPAM', logger=self.logger,
 54 |                             fmt='%(message)s', level_styles=level_styles)
 55 | 
 56 |         if not SearchSubreddits.REDDIT_USERNAME:
 57 |             self.logger.error("`reddit_username` in user_config.yaml is empty")
 58 |             self.logger.error("If you plan on using the user API of saveddit, then add your username to user_config.yaml")
 59 |             print("Exiting now")
 60 |             exit()
 61 |         else:
 62 |             if not len(SearchSubreddits.REDDIT_PASSWORD):
 63 |                 if sys.stdin.isatty():
 64 |                     print("Username: " + REDDIT_USERNAME)
 65 |                     REDDIT_PASSWORD = getpass.getpass("Password: ")
 66 |                 else:
 67 |                     # echo "foobar" > password
 68 |                     # saveddit user .... < password
 69 |                     REDDIT_PASSWORD = sys.stdin.readline().rstrip()
 70 | 
 71 |         self.reddit = praw.Reddit(
 72 |             client_id=SearchSubreddits.REDDIT_CLIENT_ID,
 73 |             client_secret=SearchSubreddits.REDDIT_CLIENT_SECRET,
 74 |             user_agent="saveddit (by /u/p_ranav)"
 75 |         )
 76 | 
 77 |         self.multireddit_name = "+".join(subreddit_names)
 78 |         self.subreddit = self.reddit.subreddit(self.multireddit_name)
 79 | 
 80 |     def download(self, args):
 81 |         output_path = args.o
 82 |         query = args.q
 83 |         sort = args.s
 84 |         syntax = SearchConfig.DEFAULT_SYNTAX
 85 |         time_filter = args.t
 86 |         include_nsfw = args.include_nsfw
 87 |         skip_comments = args.skip_comments
 88 |         skip_videos = args.skip_videos
 89 |         skip_meta = args.skip_meta
 90 |         comment_limit = 0 # top-level comments ONLY
 91 | 
 92 |         self.logger.verbose("Searching '" + query + "' in " + self.multireddit_name + ", sorted by " + sort)
 93 |         if include_nsfw:
 94 |             self.logger.spam("     * Including NSFW results")
 95 | 
 96 |         search_dir = os.path.join(os.path.join(os.path.join(os.path.join(os.path.join(
 97 |           output_path, "www.reddit.com"), "q"), query), self.multireddit_name), sort)
 98 | 
 99 |         if not os.path.exists(search_dir):
100 |             os.makedirs(search_dir)
101 | 
102 |         search_results = None
103 |         if include_nsfw:
104 |             search_params = {"include_over_18": "on"}
105 |             search_results = self.subreddit.search(query, sort, syntax, time_filter, params=search_params)
106 |         else:
107 |             search_results = self.subreddit.search(query, sort, syntax, time_filter)
108 | 
109 |         results_found = False
110 |         for i, submission in enumerate(search_results):
111 |             if not results_found:
112 |                 results_found = True
113 |             SubmissionDownloader(submission, i, self.logger, search_dir,
114 |                 skip_videos, skip_meta, skip_comments, comment_limit,
115 |                 {'imgur_client_id': SubredditDownloader.IMGUR_CLIENT_ID})
116 | 
117 |         if not results_found:
118 |             self.logger.spam("     * No results found")


--------------------------------------------------------------------------------
/src/saveddit/submission_downloader.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | import coloredlogs
  3 | from colorama import Fore
  4 | import contextlib
  5 | import logging
  6 | import verboselogs
  7 | from datetime import datetime
  8 | import os
  9 | from io import StringIO
 10 | import json
 11 | import mimetypes
 12 | import ffmpeg
 13 | import praw
 14 | from pprint import pprint
 15 | import re
 16 | import requests
 17 | from tqdm import tqdm
 18 | import urllib.request
 19 | import youtube_dl
 20 | import os
 21 | 
 22 | 
 23 | class SubmissionDownloader:
 24 |     def __init__(self, submission, submission_index, logger, output_dir, skip_videos, skip_meta, skip_comments, comment_limit, config):
 25 |         self.IMGUR_CLIENT_ID = config["imgur_client_id"]
 26 | 
 27 |         self.logger = logger
 28 |         i = submission_index
 29 |         prefix_str = '#' + str(i).zfill(3) + ' '
 30 |         self.indent_1 = ' ' * len(prefix_str) + "* "
 31 |         self.indent_2 = ' ' * len(self.indent_1) + "- "
 32 | 
 33 |         has_url = getattr(submission, "url", None)
 34 |         if has_url:
 35 |             title = submission.title
 36 |             self.logger.verbose(prefix_str + '"' + title + '"')
 37 |             title = re.sub(r'\W+', '_', title)
 38 | 
 39 |             # Truncate title
 40 |             if len(title) > 32:
 41 |                 title = title[0:32]
 42 |                 if os.name == "nt":
 43 |                     pass
 44 |                 else:
 45 |                     title += "..."
 46 | 
 47 |             # Prepare directory for the submission
 48 |             post_dir = str(i).zfill(3) + "_" + title.replace(" ", "_")
 49 |             submission_dir = os.path.join(output_dir, post_dir)
 50 |             if not os.path.exists(submission_dir):
 51 |                 os.makedirs(submission_dir)
 52 |             elif os.path.exists(submission_dir):
 53 |                 print("File exists, Skipping it.")
 54 |                 return
 55 | 
 56 |             self.logger.spam(
 57 |                 self.indent_1 + "Processing `" + submission.url + "`")
 58 | 
 59 |             success = False
 60 | 
 61 |             should_create_files_dir = True
 62 |             if skip_comments and skip_meta:
 63 |                 should_create_files_dir = False
 64 | 
 65 |             def create_files_dir(submission_dir):
 66 |                 if should_create_files_dir:
 67 |                     files_dir = os.path.join(submission_dir, "files")
 68 |                     if not os.path.exists(files_dir):
 69 |                         os.makedirs(files_dir)
 70 |                     return files_dir
 71 |                 else:
 72 |                     return submission_dir
 73 | 
 74 |             if self.is_direct_link_to_content(submission.url, [".png", ".jpg", ".jpeg", ".gif"]):
 75 |                 files_dir = create_files_dir(submission_dir)
 76 | 
 77 |                 filename = submission.url.split("/")[-1]
 78 |                 self.logger.spam(
 79 |                     self.indent_1 + "This is a direct link to a " + filename.split(".")[-1] + " file")
 80 |                 save_path = os.path.join(files_dir, filename)
 81 |                 self.download_direct_link(submission, save_path)
 82 |                 success = True
 83 |             elif self.is_direct_link_to_content(submission.url, [".mp4"]):
 84 |                 filename = submission.url.split("/")[-1]
 85 |                 self.logger.spam(
 86 |                     self.indent_1 + "This is a direct link to a " + filename.split(".")[-1] + " file")
 87 |                 if not skip_videos:
 88 |                     files_dir = create_files_dir(submission_dir)
 89 |                     save_path = os.path.join(files_dir, filename)
 90 |                     self.download_direct_link(submission, save_path)
 91 |                     success = True
 92 |                 else:
 93 |                     self.logger.spam(self.indent_1 + "Skipping download of video content")
 94 |                     success = True
 95 |             elif self.is_reddit_gallery(submission.url):
 96 |                 files_dir = create_files_dir(submission_dir)
 97 | 
 98 |                 self.logger.spam(
 99 |                     self.indent_1 + "This is a reddit gallery")
100 |                 self.download_reddit_gallery(submission, files_dir, skip_videos)
101 |                 success = True
102 |             elif self.is_reddit_video(submission.url):
103 |                 self.logger.spam(
104 |                     self.indent_1 + "This is a reddit video")
105 | 
106 |                 if not skip_videos:
107 |                     files_dir = create_files_dir(submission_dir)
108 |                     self.download_reddit_video(submission, files_dir)
109 |                     success = True
110 |                 else:
111 |                     self.logger.spam(self.indent_1 + "Skipping download of video content")
112 |                     success = True
113 |             elif self.is_gfycat_link(submission.url) or self.is_redgifs_link(submission.url):
114 |                 if self.is_gfycat_link(submission.url):
115 |                     self.logger.spam(
116 |                         self.indent_1 + "This is a gfycat link")
117 |                 else:
118 |                     self.logger.spam(
119 |                         self.indent_1 + "This is a redgif link")
120 | 
121 |                 if not skip_videos:
122 |                     files_dir = create_files_dir(submission_dir)
123 |                     self.download_gfycat_or_redgif(submission, files_dir)
124 |                     success = True
125 |                 else:
126 |                     self.logger.spam(self.indent_1 + "Skipping download of video content")
127 |                     success = True
128 |             elif self.is_imgur_album(submission.url):
129 |                 files_dir = create_files_dir(submission_dir)
130 | 
131 |                 self.logger.spam(
132 |                     self.indent_1 + "This is an imgur album")
133 |                 self.download_imgur_album(submission, files_dir)
134 |                 success = True
135 |             elif self.is_imgur_image(submission.url):
136 |                 files_dir = create_files_dir(submission_dir)
137 | 
138 |                 self.logger.spam(
139 |                     self.indent_1 + "This is an imgur image or video")
140 |                 self.download_imgur_image(submission, files_dir)
141 |                 success = True
142 |             elif self.is_self_post(submission):
143 |                 self.logger.spam(self.indent_1 + "This is a self-post")
144 |                 success = True
145 |             elif (not skip_videos) and (self.is_youtube_link(submission.url) or self.is_supported_by_youtubedl(submission.url)):
146 |                 if self.is_youtube_link(submission.url):
147 |                     self.logger.spam(
148 |                         self.indent_1 + "This is a youtube link")
149 |                 else:
150 |                     self.logger.spam(
151 |                         self.indent_1 + "This link is supported by a youtube-dl extractor")
152 | 
153 |                 if not skip_videos:
154 |                     files_dir = create_files_dir(submission_dir)
155 |                     self.download_youtube_video(submission.url, files_dir)
156 |                     success = True
157 |                 else:
158 |                     self.logger.spam(self.indent_1 + "Skipping download of video content")
159 |                     success = True
160 |             else:
161 |                 success = True
162 | 
163 |             # Download submission meta
164 |             if not skip_meta:
165 |                 self.logger.spam(self.indent_1 + "Saving submission.json")
166 |                 self.download_submission_meta(submission, submission_dir)
167 |             else:
168 |                 self.logger.spam(
169 |                     self.indent_1 + "Skipping submissions meta")
170 | 
171 |             # Downlaod comments if requested
172 |             if not skip_comments:
173 |                 if comment_limit == None:
174 |                     self.logger.spam(
175 |                         self.indent_1 + "Saving all comments to comments.json")
176 |                 else:
177 |                     self.logger.spam(
178 |                         self.indent_1 + "Saving top-level comments to comments.json")
179 |                 self.download_comments(
180 |                     submission, submission_dir, comment_limit)
181 |             else:
182 |                 self.logger.spam(
183 |                     self.indent_1 + "Skipping comments")
184 | 
185 |             if success:
186 |                 self.logger.spam(
187 |                     self.indent_1 + "Saved to " + submission_dir + "\n")
188 |             else:
189 |                 self.logger.warning(
190 |                     self.indent_1 + "Failed to download from link " + submission.url + "\n"
191 |                 )
192 | 
193 |     def print_formatted_error(self, e):
194 |         for line in str(e).split("\n"):
195 |             self.logger.error(self.indent_2 + line)
196 | 
197 |     def is_direct_link_to_content(self, url, supported_file_formats):
198 |         url_leaf = url.split("/")[-1]
199 |         return any([i in url_leaf for i in supported_file_formats]) and ".gifv" not in url_leaf
200 | 
201 |     def download_direct_link(self, submission, output_path):
202 |         try:
203 |             urllib.request.urlretrieve(submission.url, output_path)
204 |         except Exception as e:
205 |             self.print_formatted_error(e)
206 | 
207 |     def is_youtube_link(self, url):
208 |         return "youtube.com" in url or "youtu.be" in url
209 | 
210 |     def is_supported_by_youtubedl(self, url):
211 |         try:
212 |             # Since youtube-dl's quiet mode is amything BUT quiet
213 |             # using contextlib to redirect stdout to a local StringIO variable
214 |             local_stderr = StringIO()
215 |             with contextlib.redirect_stderr(local_stderr):
216 |                 if "flickr.com/photos" in url:
217 |                     return False
218 | 
219 |                 # Try to extract info from URL
220 |                 try:
221 |                     download_options = {
222 |                         'quiet': True,
223 |                         'warnings': True,
224 |                         'ignoreerrors': True,
225 |                     }
226 |                     ydl = youtube_dl.YoutubeDL(download_options)
227 |                     r = ydl.extract_info(url, download=False)
228 |                 except Exception as e:
229 |                     # No media found through youtube-dl
230 |                     self.logger.spam(self.indent_2 + "No media found in '" + url + "' that could be downloaded with youtube-dl")
231 |                     return False
232 | 
233 |                 extractors = youtube_dl.extractor.gen_extractors()
234 |                 for e in extractors:
235 |                     if e.suitable(url) and e.IE_NAME != 'generic':
236 |                         return True
237 |                         self.logger.spam(self.indent_2 + "This link could potentially be downloaded with youtube-dl")
238 |                 self.logger.spam(self.indent_2 + "No media found in '" + url + "' that could be downloaded with youtube-dl")
239 |                 return False
240 |         except Exception as e:
241 |             return False
242 | 
243 |     def download_youtube_video(self, url, output_path):
244 |         try:
245 |             local_stderr = StringIO()
246 |             with contextlib.redirect_stderr(local_stderr):
247 |                 download_options = {
248 |                     'format': "299+bestaudio/298+bestaudio/137+bestaudio/136+bestaudio/best",
249 |                     'quiet': True,
250 |                     'warnings': True,
251 |                     'ignoreerrors': True,
252 |                     'nooverwrites': True,
253 |                     'continuedl': True,
254 |                     'outtmpl': output_path + '/%(id)s.%(ext)s'
255 |                 }
256 |                 self.logger.spam(self.indent_2 + "Downloading " +
257 |                                 url + " with youtube-dl")
258 |                 with youtube_dl.YoutubeDL(download_options) as ydl:
259 |                     ydl.download([url])
260 |                     errors = local_stderr.getvalue()
261 |                     if not len(errors):
262 |                         self.logger.spam(self.indent_2 + "Finished downloading video from " +
263 |                                     url)
264 |                     else:
265 |                         self.logger.error(self.indent_2 + errors.strip())
266 |         except Exception as e:
267 |             self.logger.error(self.indent_2 + "Failed to download with youtube-dl")
268 |             self.print_formatted_error(e)
269 | 
270 |     def is_reddit_gallery(self, url):
271 |         return "reddit.com/gallery" in url
272 | 
273 |     def download_reddit_gallery(self, submission, output_path, skip_videos):
274 |         gallery_data = getattr(submission, "gallery_data", None)
275 |         media_metadata = getattr(submission, "media_metadata", None)
276 |         self.logger.spam(
277 |             self.indent_2 + "Looking for submission.gallery_data and submission.media_metadata")
278 | 
279 |         if gallery_data == None and media_metadata == None:
280 |             # gallery_data not in submission
281 |             # could be a crosspost
282 |             crosspost_parent_list = getattr(
283 |                 submission, "crosspost_parent_list", None)
284 |             if crosspost_parent_list != None:
285 |                 self.logger.spam(
286 |                     self.indent_2 + "This is a crosspost to a reddit gallery")
287 |                 first_parent = crosspost_parent_list[0]
288 |                 gallery_data = first_parent["gallery_data"]
289 |                 media_metadata = first_parent["media_metadata"]
290 | 
291 |         if gallery_data != None and media_metadata != None:
292 |             image_count = len(gallery_data["items"])
293 |             self.logger.spam(self.indent_2 + "This reddit gallery has " +
294 |                              str(image_count) + " images")
295 |             for j, item in tqdm(enumerate(gallery_data["items"]), total=image_count, bar_format='%s%s{l_bar}{bar:20}{r_bar}%s' % (self.indent_2, Fore.WHITE + Fore.LIGHTBLACK_EX, Fore.RESET)):
296 |                 try:
297 |                     media_id = item["media_id"]
298 |                     item_metadata = media_metadata[media_id]
299 |                     item_format = item_metadata['m']
300 |                     if "image/" in item_format or "video/" in item_format:
301 |                         if not os.path.exists(output_path):
302 |                             os.makedirs(output_path)
303 |                         if "image/" in item_format:
304 |                             item_format = item_format.split("image/")[-1]
305 |                         elif "video/" in item_format:
306 |                             item_format = item_format.split("video/")[-1]
307 |                             # Skip video content if requested by user
308 |                             if skip_videos:
309 |                                 continue
310 |                         item_filename = media_id + "." + item_format
311 |                         item_url = item_metadata["s"]["u"]
312 |                         save_path = os.path.join(output_path, item_filename)
313 |                         try:
314 |                             urllib.request.urlretrieve(item_url, save_path)
315 |                         except Exception as e:
316 |                             self.print_formatted_error(e)
317 |                 except Exception as e:
318 |                     self.print_formatted_error(e)
319 | 
320 |     def is_reddit_video(self, url):
321 |         return "v.redd.it" in url
322 | 
323 |     def download_reddit_video(self, submission, output_path):
324 |         media = getattr(submission, "media", None)
325 |         media_id = submission.url.split("v.redd.it/")[-1]
326 | 
327 |         self.logger.spam(self.indent_2 + "Looking for submission.media")
328 | 
329 |         if media == None:
330 |             # link might be a crosspost
331 |             crosspost_parent_list = getattr(
332 |                 submission, "crosspost_parent_list", None)
333 |             if crosspost_parent_list != None:
334 |                 self.logger.spam(
335 |                     self.indent_2 + "This is a crosspost to a reddit video")
336 |                 first_parent = crosspost_parent_list[0]
337 |                 media = first_parent["media"]
338 | 
339 |         if media != None:
340 |             self.logger.spam(self.indent_2 + "Downloading video component")
341 |             url = media["reddit_video"]["fallback_url"]
342 |             video_save_path = os.path.join(
343 |                 output_path, media_id + "_video.mp4")
344 |             try:
345 |                 urllib.request.urlretrieve(url, video_save_path)
346 |             except Exception as e:
347 |                 self.print_formatted_error(e)
348 | 
349 |             # Download the audio
350 |             self.logger.spam(self.indent_2 + "Downloading audio component")
351 |             audio_downloaded = False
352 |             audio_save_path = os.path.join(
353 |                 output_path, media_id + "_audio.mp4")
354 |             try:
355 |                 urllib.request.urlretrieve(
356 |                     submission.url + "/DASH_audio.mp4", audio_save_path)
357 |                 audio_downloaded = True
358 |             except Exception as e:
359 |                 pass
360 | 
361 |             if audio_downloaded == True:
362 |                 # Merge mp4 files
363 |                 self.logger.spam(
364 |                     self.indent_2 + "Merging video & audio components with ffmpeg")
365 |                 output_save_path = os.path.join(output_path, media_id + ".mp4")
366 |                 input_video = ffmpeg.input(video_save_path)
367 |                 input_audio = ffmpeg.input(audio_save_path)
368 |                 ffmpeg.concat(input_video, input_audio, v=1, a=1)\
369 |                     .output(output_save_path)\
370 |                     .global_args('-loglevel', 'error')\
371 |                       .global_args('-y')\
372 |                     .run()
373 |                 self.logger.spam(self.indent_2 + "Done merging with ffmpeg")
374 |             else:
375 |                 self.logger.spam(
376 |                     self.indent_2 + "This video does not have an audio component")
377 | 
378 |             self.logger.spam(
379 |                     self.indent_2 + "Sucessfully saved video")
380 | 
381 |     def is_gfycat_link(self, url):
382 |         return "gfycat.com/" in url
383 | 
384 |     def is_redgifs_link(self, url):
385 |         return "redgifs.com/" in url
386 | 
387 |     def get_gfycat_embedded_video_url(self, url):
388 |         try:
389 |             response  = requests.get(url)
390 |             data = response.text
391 |             soup = BeautifulSoup(data, features="html.parser")
392 | 
393 |             # Cycle through all links
394 |             giant_url_found = False
395 |             giant_url = ""
396 |             thumbs_url_found = False
397 |             thumbs_url = ""
398 |             for link in soup.find_all():
399 |                 link_src = link.get('src')
400 |                 src_url = str(link_src)
401 |                 if ".mp4" in src_url:
402 |                     # Looking for giant.gfycat.com
403 |                     if "giant." in src_url:
404 |                         giant_url_found = True
405 |                         giant_url = src_url
406 |                     elif "thumbs." in src_url:
407 |                         thumbs_url_found = True
408 |                         thumbs_url = src_url
409 |         except Exception as e:
410 |             self.print_formatted_error(e)
411 |             return ""
412 | 
413 |         if giant_url_found:
414 |             return giant_url
415 |         elif thumbs_url_found:
416 |             return thumbs_url
417 |         else:
418 |             return ""
419 | 
420 |     def guess_extension(self, url):
421 |         response = requests.get(url)
422 |         content_type = response.headers['content-type']
423 |         return mimetypes.guess_extension(content_type)
424 | 
425 |     def get_redirect_url(self, url):
426 |         r = requests.get(url)
427 |         return r.url
428 | 
429 |     def download_gfycat_or_redgif(self, submission, output_dir):
430 |         # Check if gfycat redirects to gifdeliverynetwork
431 |         redirect_url = self.get_redirect_url(submission.url)
432 |         if "gfycat.com" in submission.url and "gifdeliverynetwork.com" in redirect_url:
433 |             self.logger.spam(
434 |                 self.indent_2 + "This is a gfycat link that redirects to gifdeliverynetwork.com")
435 |             try:
436 |                 # Gfycat link that redirects to gifdeliverynetwork
437 |                 # True source in this case is hiding in redgifs.com
438 |                 response = requests.get(redirect_url)
439 |                 html = BeautifulSoup(response.content, features="html.parser")
440 |                 links = html.find_all()
441 |                 for i in links:
442 |                     if "src" in str(i):
443 |                         attrs = i.attrs
444 |                         if "src" in attrs:
445 |                             src = attrs["src"]
446 |                             if "redgifs.com/" in src:
447 |                                 self.logger.spam(
448 |                                     self.indent_2 + "Found embedded media at " + src)
449 |                                 filename = src.split("/")[-1]
450 |                                 save_path = os.path.join(output_dir, filename)
451 |                                 try:
452 |                                     r = requests.get(src)
453 |                                     with open(save_path, 'wb') as outfile:
454 |                                         outfile.write(r.content)
455 |                                 except Exception as e:
456 |                                     self.print_formatted_error(e)
457 |             except Exception as e:
458 |                 self.print_formatted_error(e)
459 | 
460 |         self.logger.spam(
461 |             self.indent_2 + "Looking for submission.preview.reddit_video_preview.fallback_url")
462 | 
463 |         preview = None
464 |         try:
465 |             preview = getattr(submission, "preview")
466 |             if preview:
467 |                 if "reddit_video_preview" in preview:
468 |                     if "fallback_url" in preview["reddit_video_preview"]:
469 |                         self.logger.spam(self.indent_2 + "Found submission.preview.reddit_video_preview.fallback_url")
470 |                         fallback_url = preview["reddit_video_preview"]["fallback_url"]
471 |                         if "." in fallback_url.split("/")[-1]:
472 |                             file_format = fallback_url.split(".")[-1]
473 |                             filename = submission.url.split("/")[-1] + "." + file_format
474 |                         else:
475 |                             filename = submission.url.split("/")[-1] + ".mp4"
476 |                         save_path = os.path.join(output_dir, filename)
477 |                         try:
478 |                             urllib.request.urlretrieve(fallback_url, save_path)
479 |                             return
480 |                         except Exception as e:
481 |                             self.print_formatted_error(e)
482 |                 elif "images" in preview:
483 |                     if "source" in preview["images"][0]:
484 |                         self.logger.spam(self.indent_2 + "Found submission.preview.images instead")
485 |                         source_url = preview["images"][0]["source"]["url"]
486 |                         try:
487 |                             extension = self.guess_extension(source_url)
488 |                             filename = submission.url.split("/")[-1] + extension
489 |                             save_path = os.path.join(output_dir, filename)
490 |                             try:
491 |                                 urllib.request.urlretrieve(source_url, save_path)
492 |                             except Exception as e:
493 |                                 self.print_formatted_error(e)
494 |                         except Exception as e:
495 |                             self.print_formatted_error(e)
496 | 
497 |         except Exception as e:
498 |            self.print_formatted_error(e)
499 | 
500 |         try:
501 |             self.logger.spam(
502 |                 self.indent_2 + "Looking for submission.media_embed")
503 |             media_embed = getattr(submission, "media_embed")
504 |             if media_embed:
505 |                 content = media_embed["content"]
506 |                 self.logger.spam(
507 |                     self.indent_2 + "Found submission.media_embed")
508 |                 if "iframe" in content:
509 |                     if "gfycat.com" in submission.url:
510 |                         self.logger.spam(
511 |                             self.indent_2 + "This is an embedded video in gfycat.com")
512 |                         # This is likely an embedded video in gfycat
513 |                         video_url = self.get_gfycat_embedded_video_url(submission.url)
514 |                         if video_url:
515 |                             filename = video_url.split("/")[-1]
516 |                             save_path = os.path.join(output_dir, filename)
517 | 
518 |                             self.logger.spam(
519 |                                 self.indent_2 + "Embedded video URL: " + video_url)
520 |                             try:
521 |                                 r = requests.get(video_url)
522 |                                 with open(save_path, 'wb') as outfile:
523 |                                     outfile.write(r.content)
524 |                             except Exception as e:
525 |                                 self.print_formatted_error(e)
526 |         except Exception as e:
527 |             self.print_formatted_error(e)
528 | 
529 |     def is_imgur_album(self, url):
530 |         return "imgur.com/a/" in url or "imgur.com/gallery/" in url
531 | 
532 |     def get_imgur_album_images_count(self, album_id):
533 |         request = "https://api.imgur.com/3/album/" + album_id
534 |         res = requests.get(request, headers={
535 |                            "Authorization": "Client-ID " + self.IMGUR_CLIENT_ID})
536 |         if res.status_code == 200:
537 |             return res.json()["data"]["images_count"]
538 |         else:
539 |             self.logger.spam(self.indent_2 + "This imgur album is empty")
540 |             return 0
541 | 
542 |     def get_imgur_image_meta(self, image_id):
543 |         request = "https://api.imgur.com/3/image/" + image_id
544 |         res = requests.get(request, headers={
545 |                            "Authorization": "Client-ID " + self.IMGUR_CLIENT_ID})
546 |         return res.json()["data"]
547 | 
548 |     def download_imgur_album(self, submission, output_dir):
549 |         # Imgur album
550 |         album_id = ""
551 |         if "imgur.com/a/" in submission.url:
552 |             album_id = submission.url.split("imgur.com/a/")[-1]
553 |         elif "imgur.com/gallery/" in submission.url:
554 |             album_id = submission.url.split("imgur.com/gallery/")[-1]
555 | 
556 |         self.logger.spam(self.indent_2 + "Album ID " + album_id)
557 | 
558 |         images_count = self.get_imgur_album_images_count(album_id)
559 |         if images_count > 0:
560 |             request = "https://api.imgur.com/3/album/" + album_id
561 |             res = requests.get(request, headers={
562 |                                "Authorization": "Client-ID " + self.IMGUR_CLIENT_ID})
563 |             self.logger.spam(self.indent_2 + "This imgur album has " +
564 |                              str(images_count) + " images")
565 |             for i, image in tqdm(enumerate(res.json()["data"]["images"]), total=images_count, bar_format='%s%s{l_bar}{bar:20}{r_bar}%s' % (self.indent_2, Fore.WHITE + Fore.LIGHTBLACK_EX, Fore.RESET)):
566 |                 url = image["link"]
567 |                 filename = str(i).zfill(3) + "_" + url.split("/")[-1]
568 |                 save_path = os.path.join(output_dir, filename)
569 |                 try:
570 |                     if not os.path.exists(output_dir):
571 |                         os.makedirs(output_dir)
572 |                     urllib.request.urlretrieve(url, save_path)
573 |                 except Exception as e:
574 |                     self.print_formatted_error(e)
575 | 
576 |     def is_imgur_image(self, url):
577 |         return "imgur.com" in url
578 | 
579 |     def download_imgur_image(self, submission, output_dir):
580 |         # Other imgur content, e.g., .gifv, '.mp4', '.jpg', etc.
581 |         url_leaf = submission.url.split("/")[-1]
582 |         if "." in url_leaf:
583 |             image_id = url_leaf.split(".")[0]
584 |         else:
585 |             image_id = url_leaf
586 | 
587 |         try:
588 |             data = self.get_imgur_image_meta(image_id)
589 |             url = data["link"]
590 |             image_type = data["type"]
591 |             if "video/" in image_type:
592 |                 self.logger.spam(
593 |                     self.indent_2 + "This is an imgur link to a video file")
594 |                 image_type = image_type.split("video/")[-1]
595 |             elif "image/" in image_type:
596 |                 self.logger.spam(
597 |                     self.indent_2 + "This is an imgur link to an image file")
598 |                 image_type = image_type.split("image/")[-1]
599 | 
600 |             filename = image_id + "." + image_type
601 |             save_path = os.path.join(output_dir, filename)
602 | 
603 |             urllib.request.urlretrieve(url, save_path)
604 |         except Exception as e:
605 |             self.print_formatted_error(e)
606 | 
607 |     def download_comments(self, submission, output_dir, comment_limit):
608 |         # Save comments - Breath first unwrap of comment forest
609 |         comments_list = []
610 |         with open(os.path.join(output_dir, 'comments.json'), 'w') as file:
611 |             submission.comments.replace_more(limit=comment_limit)
612 |             limited_comments = submission.comments.list()
613 |             if not len(limited_comments):
614 |                 # No comments
615 |                 self.logger.spam(self.indent_2 + "No comments found")
616 |                 return
617 | 
618 |             for comment in tqdm(limited_comments, total=len(limited_comments), bar_format='%s%s{l_bar}{bar:20}{r_bar}%s' % (self.indent_2, Fore.WHITE + Fore.LIGHTBLACK_EX, Fore.RESET)):
619 |                 comment_dict = {}
620 |                 try:
621 |                     if comment.author:
622 |                         comment_dict["author"] = comment.author.name
623 |                     else:
624 |                         comment_dict["author"] = None
625 |                     comment_dict["body"] = comment.body
626 |                     comment_dict["created_utc"] = int(comment.created_utc)
627 |                     comment_dict["distinguished"] = comment.distinguished
628 |                     comment_dict["downs"] = comment.downs
629 |                     comment_dict["edited"] = comment.edited
630 |                     comment_dict["id"] = comment.id
631 |                     comment_dict["is_submitter"] = comment.is_submitter
632 |                     comment_dict["link_id"] = comment.link_id
633 |                     comment_dict["parent_id"] = comment.parent_id
634 |                     comment_dict["permalink"] = comment.permalink
635 |                     comment_dict["score"] = comment.score
636 |                     comment_dict["stickied"] = comment.stickied
637 |                     comment_dict["subreddit_name_prefixed"] = comment.subreddit_name_prefixed
638 |                     comment_dict["subreddit_id"] = comment.subreddit_id
639 |                     comment_dict["total_awards_received"] = comment.total_awards_received
640 |                     comment_dict["ups"] = comment.ups
641 |                 except Exception as e:
642 |                     self.print_formatted_error(e)
643 |                 comments_list.append(comment_dict)
644 |             file.write(json.dumps(comments_list, indent=2))
645 | 
646 |     def is_self_post(self, submission):
647 |         return submission.is_self
648 | 
649 |     def download_submission_meta(self, submission, submission_dir):
650 |         submission_dict = {}
651 |         if submission.author:
652 |             submission_dict["author"] = submission.author.name
653 |         else:
654 |             submission_dict["author"] = None
655 |         submission_dict["created_utc"] = int(submission.created_utc)
656 |         submission_dict["distinguished"] = submission.distinguished
657 |         submission_dict["downs"] = submission.downs
658 |         submission_dict["edited"] = submission.edited
659 |         submission_dict["id"] = submission.id
660 |         submission_dict["link_flair_text"] = submission.link_flair_text
661 |         submission_dict["locked"] = submission.locked
662 |         submission_dict["num_comments"] = submission.num_comments
663 |         submission_dict["num_crossposts"] = submission.num_crossposts
664 |         submission_dict["permalink"] = submission.permalink
665 |         submission_dict["selftext"] = submission.selftext
666 |         submission_dict["selftext"] = submission.selftext
667 |         submission_dict["selftext_html"] = submission.selftext_html
668 |         submission_dict["send_replies"] = submission.send_replies
669 |         submission_dict["spoiler"] = submission.spoiler
670 |         submission_dict["stickied"] = submission.stickied
671 |         submission_dict["subreddit_name_prefixed"] = submission.subreddit_name_prefixed
672 |         submission_dict["subreddit_id"] = submission.subreddit_id
673 |         submission_dict["subreddit_subscribers"] = submission.subreddit_subscribers
674 |         submission_dict["subreddit_type"] = submission.subreddit_type
675 |         submission_dict["title"] = submission.title
676 |         submission_dict["total_awards_received"] = submission.total_awards_received
677 |         submission_dict["ups"] = submission.ups
678 |         submission_dict["upvote_ratio"] = submission.upvote_ratio
679 |         submission_dict["url"] = submission.url
680 | 
681 |         with open(os.path.join(submission_dir, "submission.json"), 'w') as file:
682 |             file.write(json.dumps(submission_dict, indent=2))
683 | 


--------------------------------------------------------------------------------
/src/saveddit/subreddit_downloader.py:
--------------------------------------------------------------------------------
 1 | import coloredlogs
 2 | from colorama import Fore
 3 | import logging
 4 | import verboselogs
 5 | import os
 6 | import praw
 7 | from saveddit.configuration import ConfigurationLoader
 8 | from saveddit.submission_downloader import SubmissionDownloader
 9 | from saveddit.subreddit_downloader_config import SubredditDownloaderConfig
10 | 
11 | class SubredditDownloader:
12 |     app_config_dir = os.path.expanduser("~/.saveddit")
13 |     if not os.path.exists(app_config_dir):
14 |         os.makedirs(app_config_dir)
15 | 
16 |     config_file_location = os.path.expanduser("~/.saveddit/user_config.yaml")
17 |     config = ConfigurationLoader.load(config_file_location)
18 | 
19 |     REDDIT_CLIENT_ID = config['reddit_client_id']
20 |     REDDIT_CLIENT_SECRET = config['reddit_client_secret']
21 |     IMGUR_CLIENT_ID = config['imgur_client_id']
22 | 
23 |     def __init__(self, subreddit_name):
24 |         self.subreddit_name = subreddit_name
25 |         reddit = praw.Reddit(
26 |             client_id=SubredditDownloader.REDDIT_CLIENT_ID,
27 |             client_secret=SubredditDownloader.REDDIT_CLIENT_SECRET,
28 |             user_agent="saveddit (by /u/p_ranav)",
29 |         )
30 |         self.subreddit = reddit.subreddit(subreddit_name)
31 | 
32 |         self.logger = verboselogs.VerboseLogger(__name__)
33 |         level_styles = {
34 |             'critical': {'bold': True, 'color': 'red'},
35 |             'debug': {'color': 'green'},
36 |             'error': {'color': 'red'},
37 |             'info': {'color': 'white'},
38 |             'notice': {'color': 'magenta'},
39 |             'spam': {'color': 'white', 'faint': True},
40 |             'success': {'bold': True, 'color': 'green'},
41 |             'verbose': {'color': 'blue'},
42 |             'warning': {'color': 'yellow'}
43 |         }
44 |         coloredlogs.install(level='SPAM', logger=self.logger,
45 |                             fmt='%(message)s', level_styles=level_styles)
46 | 
47 |     def download(self, output_path, download_all_comments, categories=SubredditDownloaderConfig.DEFAULT_CATEGORIES, post_limit=SubredditDownloaderConfig.DEFAULT_POST_LIMIT, skip_videos=False, skip_meta=False, skip_comments=False):
48 |         '''
49 |         categories: List of categories within the subreddit to download (see SubredditDownloaderConfig.DEFAULT_CATEGORIES)
50 |         post_limit: Number of posts to download (default: None, i.e., all posts)
51 |         comment_limit: Number of comment levels to download from submission (default: `0`, i.e., only top-level comments)
52 |           - to get all comments, set comment_limit to `None`
53 |         '''
54 |         root_dir = os.path.join(os.path.join(os.path.join(
55 |             output_path, "www.reddit.com"), "r"), self.subreddit_name)
56 |         categories = categories
57 |         
58 |         if download_all_comments == False:
59 |             comment_limit = 0
60 |         elif download_all_comments == True:
61 |             comment_limit = None
62 | 
63 |         for c in categories:
64 |             self.logger.notice("Downloading from /r/" +
65 |                                self.subreddit_name + "/" + c + "/")
66 |             category_dir = os.path.join(root_dir, c)
67 |             if not os.path.exists(category_dir):
68 |                 os.makedirs(category_dir)
69 |             category_function = getattr(self.subreddit, c)
70 | 
71 |             for i, submission in enumerate(category_function(limit=post_limit)):
72 |                 SubmissionDownloader(submission, i, self.logger, category_dir,
73 |                     skip_videos, skip_meta, skip_comments, comment_limit,
74 |                     {'imgur_client_id': SubredditDownloader.IMGUR_CLIENT_ID})
75 | 


--------------------------------------------------------------------------------
/src/saveddit/subreddit_downloader_config.py:
--------------------------------------------------------------------------------
1 | class SubredditDownloaderConfig:
2 |     DEFAULT_CATEGORIES = ["hot", "new", "random_rising", "rising",
3 |                           "controversial", "top", "gilded"]
4 |     DEFAULT_POST_LIMIT = None


--------------------------------------------------------------------------------
/src/saveddit/user_downloader.py:
--------------------------------------------------------------------------------
  1 | import coloredlogs
  2 | from colorama import Fore, Style
  3 | from datetime import datetime, timezone
  4 | import logging
  5 | import verboselogs
  6 | import getpass
  7 | import json
  8 | import os
  9 | import praw
 10 | from pprint import pprint
 11 | import re
 12 | from saveddit.submission_downloader import SubmissionDownloader
 13 | from saveddit.subreddit_downloader import SubredditDownloader
 14 | import sys
 15 | from tqdm import tqdm
 16 | 
 17 | class UserDownloader:
 18 |     config = SubredditDownloader.config
 19 | 
 20 |     REDDIT_CLIENT_ID = config['reddit_client_id']
 21 |     REDDIT_CLIENT_SECRET = config['reddit_client_secret']
 22 |     REDDIT_USERNAME = None
 23 |     try:
 24 |         REDDIT_USERNAME = config['reddit_username']
 25 |     except Exception as e:
 26 |         pass
 27 | 
 28 |     REDDIT_PASSWORD = None
 29 |     if REDDIT_USERNAME:
 30 |         if sys.stdin.isatty():
 31 |             print("Username: " + REDDIT_USERNAME)
 32 |             REDDIT_PASSWORD = getpass.getpass("Password: ")
 33 |         else:
 34 |             # echo "foobar" > password
 35 |             # saveddit user .... < password
 36 |             REDDIT_PASSWORD = sys.stdin.readline().rstrip()
 37 | 
 38 |     IMGUR_CLIENT_ID = config['imgur_client_id']
 39 | 
 40 |     def __init__(self):
 41 |         self.logger = verboselogs.VerboseLogger(__name__)
 42 |         level_styles = {
 43 |             'critical': {'bold': True, 'color': 'red'},
 44 |             'debug': {'color': 'green'},
 45 |             'error': {'color': 'red'},
 46 |             'info': {'color': 'white'},
 47 |             'notice': {'color': 'magenta'},
 48 |             'spam': {'color': 'white', 'faint': True},
 49 |             'success': {'bold': True, 'color': 'green'},
 50 |             'verbose': {'color': 'blue'},
 51 |             'warning': {'color': 'yellow'}
 52 |         }
 53 |         coloredlogs.install(level='SPAM', logger=self.logger,
 54 |                             fmt='%(message)s', level_styles=level_styles)
 55 | 
 56 |         if not UserDownloader.REDDIT_USERNAME:
 57 |             self.logger.error("`reddit_username` in user_config.yaml is empty")
 58 |             self.logger.error("If you plan on using the user API of saveddit, then add your username to user_config.yaml")
 59 |             print("Exiting now")
 60 |             exit()
 61 |         else:
 62 |             if not len(UserDownloader.REDDIT_PASSWORD):
 63 |                 if sys.stdin.isatty():
 64 |                     print("Username: " + UserDownloader.REDDIT_USERNAME)
 65 |                     REDDIT_PASSWORD = getpass.getpass("Password: ")
 66 |                 else:
 67 |                     # echo "foobar" > password
 68 |                     # saveddit user .... < password
 69 |                     REDDIT_PASSWORD = sys.stdin.readline().rstrip()
 70 | 
 71 |         self.reddit = praw.Reddit(
 72 |             client_id=UserDownloader.REDDIT_CLIENT_ID,
 73 |             client_secret=UserDownloader.REDDIT_CLIENT_SECRET,
 74 |             user_agent="saveddit (by /u/p_ranav)",
 75 |             username=UserDownloader.REDDIT_USERNAME,
 76 |             password=UserDownloader.REDDIT_PASSWORD
 77 |         )
 78 | 
 79 |     def download_user_meta(self, args):
 80 |         output_path = args.o
 81 | 
 82 |         for username in args.users:
 83 |             user = self.reddit.redditor(name=username)
 84 | 
 85 |             root_dir = os.path.join(os.path.join(os.path.join(
 86 |                 output_path, "www.reddit.com"), "u"), username)
 87 | 
 88 |             if not os.path.exists(root_dir):
 89 |                 os.makedirs(root_dir)
 90 | 
 91 |             with open(os.path.join(root_dir, 'user.json'), 'w') as file:
 92 |                 user_dict = {}
 93 |                 user_dict["comment_karma"] = user.comment_karma
 94 |                 user_dict["created_utc"] = int(user.created_utc)
 95 |                 user_dict["has_verified_email"] = user.has_verified_email
 96 |                 user_dict["icon_img"] = user.icon_img
 97 |                 user_dict["id"] = user.id
 98 |                 user_dict["is_employee"] = user.is_employee
 99 |                 user_dict["is_friend"] = user.is_friend
100 |                 user_dict["is_mod"] = user.is_mod
101 |                 user_dict["is_gold"] = user.is_gold
102 |                 try:
103 |                     user_dict["is_suspended"] = user.is_suspended
104 |                 except Exception as e:
105 |                     user_dict["is_suspended"] = None
106 |                 user_dict["link_karma"] = user.link_karma
107 |                 user_dict["name"] = user.name
108 | 
109 |                 file.write(json.dumps(user_dict, indent=2))
110 | 
111 |     def download_comments(self, args):
112 |         output_path = args.o
113 | 
114 |         for username in args.users:
115 |             user = self.reddit.redditor(name=username)
116 | 
117 |             self.logger.notice("Downloading from /u/" + username + "/comments")
118 | 
119 |             root_dir = os.path.join(os.path.join(os.path.join(
120 |                 output_path, "www.reddit.com"), "u"), username)
121 | 
122 |             try:
123 |                 sort = args.s
124 |                 limit = args.l
125 | 
126 |                 comments_dir = os.path.join(root_dir, "comments")
127 |                 if not os.path.exists(comments_dir):
128 |                     os.makedirs(comments_dir)
129 | 
130 |                 self.logger.verbose("Downloading comments sorted by " + sort)
131 |                 category_function = getattr(user.comments, sort)
132 | 
133 |                 category_dir = os.path.join(comments_dir, sort)
134 | 
135 |                 if category_function:
136 |                     if not os.path.exists(category_dir):
137 |                         os.makedirs(category_dir)
138 |                     for i, comment in enumerate(category_function(limit=limit)):
139 |                         prefix_str = '#' + str(i).zfill(3) + ' '
140 |                         self.indent_1 = ' ' * len(prefix_str) + "* "
141 |                         self.indent_2 = ' ' * len(self.indent_1) + "- "
142 | 
143 |                         comment_body = comment.body
144 |                         comment_body = comment_body[0:32]
145 |                         comment_body = re.sub(r'\W+', '_', comment_body)
146 |                         comment_filename = str(i).zfill(3) + "_Comment_" + \
147 |                             comment_body + "..." + ".json"
148 |                         self.logger.spam(self.indent_1 + comment.id + ' - "' + comment.body[0:64].replace("\n", "").replace("\r", "")  + '..."')
149 | 
150 |                         with open(os.path.join(category_dir, comment_filename), 'w') as file:
151 |                             comment_dict = {}
152 |                             try:
153 |                                 if comment.author:
154 |                                     comment_dict["author"] = comment.author.name
155 |                                 else:
156 |                                     comment_dict["author"] = None
157 |                                 comment_dict["body"] = comment.body
158 |                                 comment_dict["created_utc"] = int(comment.created_utc)
159 |                                 comment_dict["distinguished"] = comment.distinguished
160 |                                 comment_dict["downs"] = comment.downs
161 |                                 comment_dict["edited"] = comment.edited
162 |                                 comment_dict["id"] = comment.id
163 |                                 comment_dict["is_submitter"] = comment.is_submitter
164 |                                 comment_dict["link_id"] = comment.link_id
165 |                                 comment_dict["parent_id"] = comment.parent_id
166 |                                 comment_dict["permalink"] = comment.permalink
167 |                                 comment_dict["score"] = comment.score
168 |                                 comment_dict["stickied"] = comment.stickied
169 |                                 comment_dict["subreddit_name_prefixed"] = comment.subreddit_name_prefixed
170 |                                 comment_dict["subreddit_id"] = comment.subreddit_id
171 |                                 comment_dict["total_awards_received"] = comment.total_awards_received
172 |                                 comment_dict["ups"] = comment.ups
173 |                                 file.write(json.dumps(comment_dict, indent=2))
174 |                             except Exception as e:
175 |                                 self.print_formatted_error(e)
176 |             except Exception as e:
177 |                 self.logger.error("Unable to download comments for user `" + username + "` - " + str(e))
178 | 
179 |     def download_multireddits(self, args):
180 |         output_path = args.o
181 | 
182 |         for username in args.users:
183 |             user = self.reddit.redditor(name=username)
184 | 
185 |             root_dir = os.path.join(os.path.join(os.path.join(os.path.join(
186 |                 output_path, "www.reddit.com"), "u"), username), "m")
187 | 
188 |             try:
189 |                 post_limit = args.l
190 |                 names = args.n
191 |                 categories = args.f
192 |                 skip_meta = args.skip_meta
193 |                 skip_videos = args.skip_videos
194 |                 skip_comments = args.skip_comments
195 |                 comment_limit = 0 # top-level comments ONLY
196 | 
197 |                 # If names is None, download all multireddits from user's page
198 |                 if not names:
199 |                     names = [m.name.lower() for m in user.multireddits()]
200 |                 else:
201 |                     names = [n.lower() for n in names]
202 | 
203 |                 for multireddit in user.multireddits():
204 |                     if multireddit.name.lower() in names:
205 |                         name = multireddit.name
206 |                         self.logger.notice("Downloading from /u/" + username + "/m/" + name)
207 |                         multireddit_dir = os.path.join(root_dir, name)
208 |                         if not os.path.exists(multireddit_dir):
209 |                             os.makedirs(multireddit_dir)
210 | 
211 |                         for category in categories:
212 | 
213 |                             self.logger.verbose("Downloading submissions sorted by " + category)
214 |                             category_function = getattr(multireddit, category)
215 | 
216 |                             category_dir = os.path.join(multireddit_dir, category)
217 | 
218 |                             if category_function:
219 |                                 for i, s in enumerate(category_function(limit=post_limit)):
220 |                                     try:
221 |                                         prefix_str = '#' + str(i).zfill(3) + ' '
222 |                                         self.indent_1 = ' ' * len(prefix_str) + "* "
223 |                                         self.indent_2 = ' ' * len(self.indent_1) + "- "
224 |                                         SubmissionDownloader(s, i, self.logger, category_dir, skip_videos, skip_meta, skip_comments, comment_limit,
225 |                                                                 {'imgur_client_id': UserDownloader.IMGUR_CLIENT_ID})
226 |                                     except Exception as e:
227 |                                         self.logger.error(self.indent_2 + "Unable to download post #" + str(i) + " for user `" + username + "` from multireddit " + name + " - " + str(e))
228 |             except Exception as e:
229 |                 self.logger.error(self.indent_1 + "Unable to download multireddit posts for user `" + username + "` - " + str(e))
230 | 
231 |     def download_submitted(self, args):
232 |         output_path = args.o
233 | 
234 |         for username in args.users:
235 |             user = self.reddit.redditor(name=username)
236 | 
237 |             self.logger.notice("Downloading from /u/" + username + "/submitted")
238 | 
239 |             root_dir = os.path.join(os.path.join(os.path.join(
240 |                 output_path, "www.reddit.com"), "u"), username)
241 | 
242 |             try:
243 |                 post_limit = args.l
244 |                 sort = args.s
245 |                 skip_meta = args.skip_meta
246 |                 skip_videos = args.skip_videos
247 |                 skip_comments = args.skip_comments
248 |                 comment_limit = 0 # top-level comments ONLY
249 | 
250 |                 submitted_dir = os.path.join(root_dir, "submitted")
251 |                 if not os.path.exists(submitted_dir):
252 |                     os.makedirs(submitted_dir)
253 | 
254 |                 self.logger.verbose("Downloading submissions sorted by " + sort)
255 |                 category_function = getattr(user.submissions, sort)
256 | 
257 |                 category_dir = os.path.join(submitted_dir, sort)
258 | 
259 |                 if category_function:
260 |                     for i, s in enumerate(category_function(limit=post_limit)):
261 |                         try:
262 |                             prefix_str = '#' + str(i).zfill(3) + ' '
263 |                             self.indent_1 = ' ' * len(prefix_str) + "* "
264 |                             self.indent_2 = ' ' * len(self.indent_1) + "- "
265 |                             SubmissionDownloader(s, i, self.logger, category_dir, skip_videos, skip_meta, skip_comments, comment_limit,
266 |                                                     {'imgur_client_id': UserDownloader.IMGUR_CLIENT_ID})
267 |                         except Exception as e:
268 |                             self.logger.error(self.indent_2 + "Unable to download post #" + str(i) + " for user `" + username + "` - " + str(e))
269 |             except Exception as e:
270 |                 self.logger.error(self.indent_1 + "Unable to download submitted posts for user `" + username + "` - " + str(e))
271 | 
272 |     def download_upvoted(self, args):
273 |         output_path = args.o
274 | 
275 |         for username in args.users:
276 |             user = self.reddit.redditor(name=username)
277 | 
278 |             self.logger.notice("Downloading from /u/" + username + "/upvoted")
279 | 
280 |             root_dir = os.path.join(os.path.join(os.path.join(
281 |                 output_path, "www.reddit.com"), "u"), username)
282 | 
283 |             try:
284 |                 post_limit = args.l
285 |                 skip_meta = args.skip_meta
286 |                 skip_videos = args.skip_videos
287 |                 skip_comments = args.skip_comments
288 |                 comment_limit = 0 # top-level comments ONLY
289 | 
290 |                 upvoted_dir = os.path.join(root_dir, "upvoted")
291 |                 if not os.path.exists(upvoted_dir):
292 |                     os.makedirs(upvoted_dir)
293 | 
294 |                 for i, s in enumerate(user.upvoted(limit=post_limit)):
295 |                     try:
296 |                         prefix_str = '#' + str(i).zfill(3) + ' '
297 |                         self.indent_1 = ' ' * len(prefix_str) + "* "
298 |                         self.indent_2 = ' ' * len(self.indent_1) + "- "
299 |                         SubmissionDownloader(s, i, self.logger, upvoted_dir, skip_videos, skip_meta, skip_comments, comment_limit,
300 |                                                 {'imgur_client_id': UserDownloader.IMGUR_CLIENT_ID})
301 |                     except Exception as e:
302 |                         self.logger.error(self.indent_2 + "Unable to download post #" + str(i) + " for user `" + username + "` - " + str(e))
303 |             except Exception as e:
304 |                 self.logger.error("Unable to download upvoted posts for user `" + username + "` - " + str(e))
305 | 
306 |     def download_saved(self, args):
307 |         output_path = args.o
308 | 
309 |         for username in args.users:
310 |             user = self.reddit.redditor(name=username)
311 | 
312 |             self.logger.notice("Downloading from /u/" + username + "/saved")
313 | 
314 |             root_dir = os.path.join(os.path.join(os.path.join(
315 |                 output_path, "www.reddit.com"), "u"), username)
316 | 
317 |             try:
318 |                 post_limit = args.l
319 |                 skip_meta = args.skip_meta
320 |                 skip_videos = args.skip_videos
321 |                 skip_comments = args.skip_comments
322 |                 comment_limit = 0 # top-level comments ONLY
323 | 
324 |                 saved_dir = os.path.join(root_dir, "saved")
325 |                 if not os.path.exists(saved_dir):
326 |                     os.makedirs(saved_dir)
327 | 
328 |                 for i, s in enumerate(user.saved(limit=post_limit)):
329 |                     try:
330 |                         prefix_str = '#' + str(i).zfill(3) + ' '
331 |                         self.indent_1 = ' ' * len(prefix_str) + "* "
332 |                         self.indent_2 = ' ' * len(self.indent_1) + "- "
333 |                         if isinstance(s, praw.models.Comment) and not skip_comments:
334 |                             self.logger.verbose(
335 |                                 prefix_str + "Comment `" + str(s.id) + "` by " + str(s.author) + " \"" + s.body[0:32].replace("\n", "").replace("\r", "") + "...\"")
336 | 
337 |                             comment_body = s.body
338 |                             comment_body = comment_body[0:32]
339 |                             comment_body = re.sub(r'\W+', '_', comment_body)
340 |                             post_dir = str(i).zfill(3) + "_Comment_" + \
341 |                                 comment_body + "..."
342 |                             submission_dir = os.path.join(saved_dir, post_dir)
343 |                             self.download_saved_comment(s, submission_dir)
344 |                         elif isinstance(s, praw.models.Comment):
345 |                             self.logger.verbose(
346 |                                 prefix_str + "Comment `" + str(s.id) + "` by " + str(s.author))
347 |                             self.logger.spam(self.indent_2 + "Skipping comment")
348 |                         elif isinstance(s, praw.models.Submission):
349 |                             SubmissionDownloader(s, i, self.logger, saved_dir, skip_videos, skip_meta, skip_comments, comment_limit,
350 |                                                 {'imgur_client_id': UserDownloader.IMGUR_CLIENT_ID})
351 |                         else:
352 |                             pass
353 |                     except Exception as e:
354 |                         self.logger.error(self.indent_2 + "Unable to download #" + str(i) + " for user `" + username + "` - " + str(e))
355 |             except Exception as e:
356 |                 self.logger.error("Unable to download saved for user `" + username + "` - " + str(e))
357 | 
358 |     def download_gilded(self, args):
359 |         output_path = args.o
360 | 
361 |         for username in args.users:
362 |             user = self.reddit.redditor(name=username)
363 | 
364 |             self.logger.notice("Downloading from /u/" + username + "/gilded")
365 | 
366 |             root_dir = os.path.join(os.path.join(os.path.join(
367 |                 output_path, "www.reddit.com"), "u"), username)
368 | 
369 |             try:
370 |                 post_limit = args.l
371 |                 skip_meta = args.skip_meta
372 |                 skip_videos = args.skip_videos
373 |                 skip_comments = args.skip_comments
374 |                 comment_limit = 0 # top-level comments ONLY
375 | 
376 |                 saved_dir = os.path.join(root_dir, "gilded")
377 |                 if not os.path.exists(saved_dir):
378 |                     os.makedirs(saved_dir)
379 | 
380 |                 for i, s in enumerate(user.gilded(limit=post_limit)):
381 |                     try:
382 |                         prefix_str = '#' + str(i).zfill(3) + ' '
383 |                         self.indent_1 = ' ' * len(prefix_str) + "* "
384 |                         self.indent_2 = ' ' * len(self.indent_1) + "- "
385 |                         if isinstance(s, praw.models.Comment) and not skip_comments:
386 |                             self.logger.verbose(
387 |                                 prefix_str + "Comment `" + str(s.id) + "` by " + str(s.author) + " \"" + s.body[0:32].replace("\n", "").replace("\r", "") + "...\"")
388 | 
389 |                             comment_body = s.body
390 |                             comment_body = comment_body[0:32]
391 |                             comment_body = re.sub(r'\W+', '_', comment_body)
392 |                             post_dir = str(i).zfill(3) + "_Comment_" + \
393 |                                 comment_body + "..."
394 |                             submission_dir = os.path.join(saved_dir, post_dir)
395 |                             self.download_saved_comment(s, submission_dir)
396 |                         elif isinstance(s, praw.models.Comment):
397 |                             self.logger.verbose(
398 |                                 prefix_str + "Comment `" + str(s.id) + "` by " + str(s.author))
399 |                             self.logger.spam(self.indent_2 + "Skipping comment")
400 |                         elif isinstance(s, praw.models.Submission):
401 |                             SubmissionDownloader(s, i, self.logger, saved_dir, skip_videos, skip_meta, skip_comments, comment_limit,
402 |                                                 {'imgur_client_id': UserDownloader.IMGUR_CLIENT_ID})
403 |                         else:
404 |                             pass
405 |                     except Exception as e:
406 |                         self.logger.error(self.indent_2 + "Unable to download #" + str(i) + " for user `" + username + "` - " + str(e))
407 |             except Exception as e:
408 |                 self.logger.error("Unable to download gilded for user `" + username + "` - " + str(e))
409 | 
410 |     def print_formatted_error(self, e):
411 |         for line in str(e).split("\n"):
412 |             self.logger.error(self.indent_2 + line)
413 | 
414 |     def download_saved_comment(self, comment, output_dir):
415 |         if not os.path.exists(output_dir):
416 |             os.makedirs(output_dir)
417 |         self.logger.spam(
418 |             self.indent_2 + "Saving comment.json to " + output_dir)
419 |         with open(os.path.join(output_dir, 'comments.json'), 'w') as file:
420 |             comment_dict = {}
421 |             try:
422 |                 if comment.author:
423 |                     comment_dict["author"] = comment.author.name
424 |                 else:
425 |                     comment_dict["author"] = None
426 |                 comment_dict["body"] = comment.body
427 |                 comment_dict["created_utc"] = int(comment.created_utc)
428 |                 comment_dict["distinguished"] = comment.distinguished
429 |                 comment_dict["downs"] = comment.downs
430 |                 comment_dict["edited"] = comment.edited
431 |                 comment_dict["id"] = comment.id
432 |                 comment_dict["is_submitter"] = comment.is_submitter
433 |                 comment_dict["link_id"] = comment.link_id
434 |                 comment_dict["parent_id"] = comment.parent_id
435 |                 comment_dict["permalink"] = comment.permalink
436 |                 comment_dict["score"] = comment.score
437 |                 comment_dict["stickied"] = comment.stickied
438 |                 comment_dict["subreddit_name_prefixed"] = comment.subreddit_name_prefixed
439 |                 comment_dict["subreddit_id"] = comment.subreddit_id
440 |                 comment_dict["total_awards_received"] = comment.total_awards_received
441 |                 comment_dict["ups"] = comment.ups
442 |                 file.write(json.dumps(comment_dict, indent=2))
443 |                 self.logger.spam(
444 |                     self.indent_2 + "Successfully saved comment.json")
445 |             except Exception as e:
446 |                 self.print_formatted_error(e)


--------------------------------------------------------------------------------
/src/saveddit/user_downloader_config.py:
--------------------------------------------------------------------------------
1 | class UserDownloaderConfig:
2 |     DEFAULT_CATEGORIES = ["hot", "new", "random_rising", "rising",
3 |                           "controversial", "top", "gilded"]
4 |     DEFAULT_SORT = "hot"
5 |     DEFAULT_SORT_OPTIONS = ["hot", "new", "top", "controversial"]
6 |     DEFAULT_POST_LIMIT = None
7 |     DEFAULT_COMMENT_LIMIT = None


--------------------------------------------------------------------------------