├── LICENSE ├── README.md └── sync.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 flake.art 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MidJourney-Scraper 2 | A Python Script to Download all MidJourney renders from any user, writtin in python and easy to use. 3 | 4 | - Easy to identify english named files 5 | - Downloads all files on all pages for any midjourny user. 6 | - keeps json metadata and command used to render in a seperate .json file 7 | - Only downloads new files, can be run more than once to keep sync. 8 | 9 | This will download all of the midjourny renders. All you need to do is provide a user id and your session id from your browser. 10 | 11 | If you add this to the top of the sync.py script then it only needs to be added once: 12 | SESSION_TOKEN = "eyJ..." 13 | USER_ID = "4.." 14 | 15 | To get the session id go into the developer tools. CLick on the application tab winthin the developer tool bar, click on cookes on the left and use 16 | __Secure-next-auth.session-token cookie. 17 | 18 | If you don't provide a userid it will download your own files. 19 | -------------------------------------------------------------------------------- /sync.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env python3 3 | 4 | import requests 5 | import unicodedata 6 | import re 7 | import json 8 | from os.path import exists 9 | 10 | # The userid you would like to download from: 11 | USER_ID="" 12 | # The session id of ou logged in, starts with eyJ: 13 | SESSION_TOKEN="" 14 | 15 | 16 | def main(): 17 | global SESSION_TOKEN, USER_ID 18 | if not len(SESSION_TOKEN): 19 | SESSION_TOKEN = input("What is your MidJourney Session ID? (hint: it starts with eyJ and you get it from your browser): ") 20 | 21 | cookies = {'__Secure-next-auth.session-token': SESSION_TOKEN} 22 | page = 1 23 | totalImages = 0 24 | while(True): 25 | r = requests.get("https://www.midjourney.com/api/app/recent-jobs/?orderBy=new&jobStatus=completed&userId="+USER_ID+"&dedupe=true&refreshApi=0&page="+str(page), cookies=cookies) 26 | for render in r.json(): 27 | dex = 0 28 | foundImage = 0 29 | if 'image_paths' in render: 30 | renderName = slugify(render['full_command']) 31 | write_json(render, renderName+".json") 32 | # download_image("https://mj-gallery.com/"+render['id']+"/grid_0.png",renderName+"_hq.png") 33 | for image in render['image_paths']: 34 | print("Syncing: " + str(totalImages) + ") -> "+ render['full_command']) 35 | download_image(image, renderName+str(dex)+".png") 36 | dex += 1 37 | foundImage += 1 38 | totalImages += 1 39 | # no images left. 40 | if foundImage == 0: 41 | break 42 | page += 1; 43 | 44 | def slugify(value, allow_unicode=False): 45 | """ 46 | Taken from https://github.com/django/django/blob/master/django/utils/text.py 47 | Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated 48 | dashes to single dashes. Remove characters that aren't alphanumerics, 49 | underscores, or hyphens. Convert to lowercase. Also strip leading and 50 | trailing whitespace, dashes, and underscores. 51 | """ 52 | value = str(value) 53 | # remove urls 54 | value = re.sub(r'http\S+', '', value) 55 | if allow_unicode: 56 | value = unicodedata.normalize('NFKC', value) 57 | else: 58 | value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') 59 | value = re.sub(r'[^\w\s-]', '', value.lower()) 60 | ret = re.sub(r'[-\s]+', '-', value).strip('-_') 61 | return ret[0:248] 62 | 63 | def write_json(obj, path): 64 | # only sync new files. 65 | if not exists(path): 66 | info = open(path,"w") 67 | info.write(json.dumps(obj)) 68 | info.close() 69 | 70 | def download_image(url, path): 71 | # only sync new files. 72 | if not exists(path): 73 | r = requests.get(url, stream=True) 74 | if r.status_code == 200: 75 | with open(path, 'wb') as f: 76 | for chunk in r: 77 | f.write(chunk) 78 | 79 | if __name__ == "__main__": 80 | main() --------------------------------------------------------------------------------