├── LICENSE
├── README.md
└── sync.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 flake.art
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MidJourney-Scraper
 2 | A Python Script to Download all MidJourney renders from any user, writtin in python and easy to use.
 3 | 
 4 | - Easy to identify english named files
 5 | - Downloads all files on all pages for any midjourny user.
 6 | - keeps json metadata and command used to render in a seperate .json file
 7 | - Only downloads new files, can be run more than once to keep sync.
 8 | 
 9 | This will download all of the midjourny renders.  All you need to do is provide a user id and your session id from your browser.
10 | 
11 | If you add this to the top of the sync.py script then it only needs to be added once:
12 | SESSION_TOKEN = "eyJ..."
13 | USER_ID = "4.."
14 | 
15 | To get the session id go into the developer tools. CLick on the application tab winthin the developer tool bar,  click on cookes on the left and use
16 | __Secure-next-auth.session-token cookie. 
17 | 
18 | If you don't provide a userid it will download your own files.
19 | 


--------------------------------------------------------------------------------
/sync.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/usr/bin/env python3
 3 | 
 4 | import requests
 5 | import unicodedata
 6 | import re
 7 | import json 
 8 | from os.path import exists
 9 | 
10 | # The userid you would like to download from:
11 | USER_ID=""
12 | # The session id of ou logged in, starts with eyJ:
13 | SESSION_TOKEN=""
14 | 
15 | 
16 | def main():
17 | 	global SESSION_TOKEN, USER_ID
18 | 	if not len(SESSION_TOKEN):
19 | 		SESSION_TOKEN = input("What is your MidJourney Session ID? (hint: it starts with eyJ and you get it from your browser): ")
20 | 
21 | 	cookies = {'__Secure-next-auth.session-token': SESSION_TOKEN}
22 | 	page = 1
23 | 	totalImages = 0 
24 | 	while(True):
25 | 		r = requests.get("https://www.midjourney.com/api/app/recent-jobs/?orderBy=new&jobStatus=completed&userId="+USER_ID+"&dedupe=true&refreshApi=0&page="+str(page), cookies=cookies)
26 | 		for render in r.json():
27 | 			dex = 0
28 | 			foundImage = 0
29 | 			if 'image_paths' in render:
30 | 				renderName = slugify(render['full_command'])
31 | 				write_json(render, renderName+".json")
32 | 				# download_image("https://mj-gallery.com/"+render['id']+"/grid_0.png",renderName+"_hq.png")
33 | 				for image in render['image_paths']:
34 | 					print("Syncing: " + str(totalImages) + ") -> "+ render['full_command'])
35 | 					download_image(image, renderName+str(dex)+".png")
36 | 					dex += 1
37 | 					foundImage += 1
38 | 					totalImages += 1
39 | 		# no images left.
40 | 		if foundImage == 0:
41 | 			break
42 | 		page += 1;
43 | 
44 | def slugify(value, allow_unicode=False):
45 |     """
46 |     Taken from https://github.com/django/django/blob/master/django/utils/text.py
47 |     Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
48 |     dashes to single dashes. Remove characters that aren't alphanumerics,
49 |     underscores, or hyphens. Convert to lowercase. Also strip leading and
50 |     trailing whitespace, dashes, and underscores.
51 |     """
52 |     value = str(value)
53 |     # remove urls
54 |     value = re.sub(r'http\S+', '', value)
55 |     if allow_unicode:
56 |         value = unicodedata.normalize('NFKC', value)
57 |     else:
58 |         value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
59 |     value = re.sub(r'[^\w\s-]', '', value.lower())
60 |     ret = re.sub(r'[-\s]+', '-', value).strip('-_')
61 |     return ret[0:248]
62 | 
63 | def write_json(obj, path):
64 | 	# only sync new files.
65 | 	if not exists(path):	
66 | 		info = open(path,"w")
67 | 		info.write(json.dumps(obj))
68 | 		info.close()
69 | 
70 | def download_image(url, path):
71 | 	# only sync new files.
72 | 	if not exists(path):
73 | 		r = requests.get(url, stream=True)
74 | 		if r.status_code == 200:
75 | 			with open(path, 'wb') as f:
76 | 				for chunk in r:
77 | 					f.write(chunk)
78 | 
79 | if __name__ == "__main__":
80 | 	main()


--------------------------------------------------------------------------------