├── requirements.txt ├── README.md └── gdrive_calculator.py /requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client>=1.7.11,<1.7.20 2 | google-auth-httplib2>=0.0.3,<0.1.0 3 | google-auth-oauthlib>=0.4.1,<0.10.0 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GDrive Folder Size Calculator 2 | 3 | This is a simple yet only known effective method to calculate google drive folder size using Python. 4 | Works for both gdrive file and folder. Uses Drive Api v3. It is scripted to be usable on terminal, the major application of this script is that you can use it on your gDrive projects by simply modifying it to your needs. 5 | 6 | ## Also calculates total number of files and folders inside whole tree 7 | 8 | It returns a dict contains name, size, size in bytes, type, total number of files and total number of folders 9 | of the given gdrive file/folder's link/fileID. 10 | 11 | The keys in the dict are name, size, bytes, type, files and folders. 12 | 13 | ## All you need to do is this: 14 | 1) ```pip3 install -r requirements.txt``` 15 | 2) Create a **`service`** variable and pass it to the GoogleDriveSizeCalculate class. Notes to create it are given inside the bottom of the script. There you can either use a credentials.json or a service account to auth the service. 16 | 3) ```python3 gdrive_calculator.py``` 17 | 4) On the terminal, Input the url/fileid of your file/folder in Google Drive (Files associated with your account OR public shareable links Only) 18 | 19 | ## Output Example: 20 | ``` 21 | Name: Smallville (2001-) Season 1-4 S01-S04 22 | Size: 38.25GB 23 | Type: Folder 24 | Bytes: 41074136619 25 | Files: 24 26 | Folders: 5 27 | 28 | If you want only size in bytes: 29 | print(calculate['bytes']) 30 | ``` 31 | -------------------------------------------------------------------------------- /gdrive_calculator.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import pickle 4 | import json 5 | import sys 6 | import urllib.parse as urlparse 7 | from urllib.parse import parse_qs 8 | 9 | from google.auth.transport.requests import Request 10 | # from google_auth_oauthlib.flow import InstalledAppFlow 11 | from google_auth_oauthlib.flow import Flow 12 | from google.oauth2 import service_account 13 | from googleapiclient.discovery import build 14 | 15 | 16 | def get_readable_file_size(size_in_bytes) -> str: 17 | SIZE_UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] 18 | if size_in_bytes is None: 19 | return '0B' 20 | index = 0 21 | while size_in_bytes >= 1024: 22 | size_in_bytes /= 1024 23 | index += 1 24 | try: 25 | return f'{round(size_in_bytes, 2)}{SIZE_UNITS[index]}' 26 | except IndexError: 27 | return 'File too large' 28 | 29 | 30 | class GoogleDriveSizeCalculate: 31 | def __init__(self, service=None): 32 | self.__G_DRIVE_DIR_MIME_TYPE = "application/vnd.google-apps.folder" 33 | self.__service = service 34 | if service is None: 35 | print("\nATTENTION !!!") 36 | print("Pass `service` variable to GoogleDriveSizeCalculate class. Please carefully read all the instructions given in bottom of the script.") 37 | print("Note: This Is Just to ensure that the user of this script is going through all the important instructions or not.\n") 38 | return 39 | self.total_bytes = 0 40 | self.total_files = 0 41 | self.total_folders = 0 42 | 43 | @staticmethod 44 | def getIdFromUrl(link: str): 45 | if "folders" in link or "file" in link: 46 | regex = r"https://drive\.google\.com/(drive)?/?u?/?\d?/?(mobile)?/?(file)?(folders)?/?d?/([-\w]+)[?+]?/?(w+)?" 47 | res = re.search(regex,link) 48 | if res is None: 49 | return "GDrive ID not found. Try sending url in different format." 50 | return res.group(5) 51 | parsed = urlparse.urlparse(link) 52 | return parse_qs(parsed.query)['id'][0] 53 | 54 | def gdrive_checker(self, LINKorID): 55 | if self.__service is None: 56 | return 57 | if 'drive.google.com' in LINKorID: 58 | try: 59 | file_id = self.getIdFromUrl(LINKorID) 60 | if 'GDrive ID not found.' in file_id: 61 | print(file_id) 62 | return 63 | except (KeyError, IndexError): 64 | print("GDrive ID could not be found in the provided link.") 65 | return 66 | else: 67 | file_id = LINKorID.strip() 68 | 69 | error = False 70 | print("\nCalculating... Please Wait!") 71 | 72 | try: 73 | drive_file = self.__service.files().get(fileId=file_id, fields="id, name, mimeType, size", supportsTeamDrives=True).execute() 74 | name = drive_file['name'] 75 | if drive_file['mimeType'] == self.__G_DRIVE_DIR_MIME_TYPE: 76 | typee = 'Folder' 77 | self.total_folders += 1 78 | self.gDrive_directory(**drive_file) 79 | else: 80 | try: 81 | typee = drive_file['mimeType'] 82 | except: 83 | typee = 'File' 84 | self.total_files += 1 85 | self.gDrive_file(**drive_file) 86 | 87 | except Exception as e: 88 | print('\n') 89 | if 'HttpError' in str(e): 90 | h_e = str(e) 91 | ori = h_e 92 | try: 93 | h_e = h_e.replace('<', '').replace('>', '') 94 | h_e = h_e.split('when') 95 | f = h_e[0].strip() 96 | s = h_e[1].split('"')[1].split('"')[0].strip() 97 | e = f"{f}\n{s}" 98 | except: 99 | e = ori 100 | print(str(e)) 101 | error = True 102 | finally: 103 | if error: 104 | return 105 | return { 106 | 'name': name, 107 | 'size': get_readable_file_size(self.total_bytes), 108 | 'bytes': self.total_bytes, 109 | 'type': typee, 110 | 'files': self.total_files, 111 | 'folders': self.total_folders 112 | } 113 | 114 | def list_drive_dir(self, file_id: str) -> list: 115 | query = f"'{file_id}' in parents and (name contains '*')" 116 | fields = 'nextPageToken, files(id, mimeType, size)' 117 | page_token = None 118 | page_size = 1000 119 | files = [] 120 | while True: 121 | response = self.__service.files().list(supportsTeamDrives=True, 122 | includeTeamDriveItems=True, 123 | q=query, spaces='drive', 124 | fields=fields, pageToken=page_token, 125 | pageSize=page_size, corpora='allDrives', 126 | orderBy='folder, name').execute() 127 | files.extend(response.get('files', [])) 128 | page_token = response.get('nextPageToken', None) 129 | if page_token is None: 130 | break 131 | return files 132 | 133 | def gDrive_file(self, **kwargs): 134 | try: 135 | size = int(kwargs['size']) 136 | except: 137 | size = 0 138 | self.total_bytes += size 139 | 140 | def gDrive_directory(self, **kwargs) -> None: 141 | files = self.list_drive_dir(kwargs['id']) 142 | if len(files) == 0: 143 | return 144 | for file_ in files: 145 | if file_['mimeType'] == self.__G_DRIVE_DIR_MIME_TYPE: 146 | self.total_folders += 1 147 | self.gDrive_directory(**file_) 148 | else: 149 | self.total_files += 1 150 | self.gDrive_file(**file_) 151 | 152 | 153 | # ─────────────────────────────────────────────────────────────────────────────────────── 154 | # ~ Notes ~ 155 | # CREATE YOUR `service` VARIABLE AND PASS `service` TO THE GoogleDriveSizeCalculate class 156 | # There are 2 methods. Default is Method 1 157 | # ─────────────────────────────────────────────────────────────────────────────────────── 158 | 159 | credentials = None 160 | oauth_scope = ['https://www.googleapis.com/auth/drive'] 161 | 162 | # METHOD: 1 163 | # IF YOU WANT TO USE credentials.json 164 | # Required credentials.json in this script's directory! 165 | # If you don't want to use credentials.json, 166 | # then directly assign client_id and client_secret in CLIENT_CONFIG 167 | # For this, you must comment out `if not credentials_file:` lines 168 | # If you have a service account, use METHOD 2 169 | 170 | if os.path.exists('token.pickle'): 171 | print("Found token.pickle!") 172 | with open('token.pickle', 'rb') as f: 173 | credentials = pickle.load(f) 174 | else: 175 | print("\nToken File Not Generated! Trying to generate `token.pickle`") 176 | if credentials is None or not credentials.valid: 177 | if credentials and credentials.expired and credentials.refresh_token: 178 | credentials.refresh(Request()) 179 | else: 180 | #Comment out below 6 lines if you want to directly assign client_id and client_secret from variables. 181 | credentials_file = os.path.exists("credentials.json") 182 | if not credentials_file: 183 | print("\nFile NOT FOUND! `credentials.json`") 184 | print("You need to download credentials.json from your Google Cloud console.") 185 | print("Do Google for getting that file!\n") 186 | #Change below one according to your application of my script. or else leave. 187 | sys.exit() 188 | with open("credentials.json", "r") as f: 189 | client = json.load(f) 190 | 191 | CLIENT_CONFIG = { 192 | 'web': { 193 | 'client_id':client['installed']["client_id"], 194 | 'auth_uri':'https://accounts.google.com/o/oauth2/auth', 195 | 'token_uri':'https://oauth2.googleapis.com/token', 196 | 'auth_provider_x509_cert_url':'https://www.googleapis.com/oauth2/v1/certs', 197 | 'client_secret':client['installed']["client_secret"] 198 | } 199 | } 200 | CALLBACK_URL = 'http://localhost:1' #users/auth/google_oauth2/callback' 201 | flow = Flow.from_client_config(CLIENT_CONFIG, oauth_scope) 202 | flow.redirect_uri = CALLBACK_URL 203 | authorize_url, _ = flow.authorization_url() 204 | print("\nFound the credentials.json file!") 205 | print("Note! Once you provide access, it will redirect you to localhost link which tells `This site can’t be reached`") 206 | print("Just ignore it and copy the part of the URL after `code=` which is in format like `4/0AWtgzhBdH_9Ee8iMv....` till before `&scope=`\n") 207 | print("Open the below Auth URL, provide access to your google account and Enter the code below:") 208 | print(authorize_url) 209 | code = input("Enter the code: ") 210 | flow.fetch_token(code=code) 211 | credentials = flow.credentials 212 | print("\nToken File Generated!") 213 | with open('token.pickle', 'wb') as token: 214 | pickle.dump(credentials, token) 215 | service = build('drive', 'v3', credentials=credentials, cache_discovery=False) 216 | 217 | # To use Method 2, Comment the Method 1 completely and uncomment the below lines. 218 | 219 | # # METHOD: 2 220 | # # IF YOU WANT TO USE A SERVICE ACCOUNT, Example: some_service_account.json 221 | # # Required some_service_account.json file. Google it to get it. 222 | # credentials = service_account.Credentials.from_service_account_file('some_service_account.json',scopes=oauth_scope) 223 | # service = build('drive', 'v3', credentials=credentials, cache_discovery=False) 224 | 225 | link_or_fileid = input("\nPaste your GoogleDrive file/folder's link/fileId : ") 226 | 227 | # ~ Complete creating the service variable and then pass it here 228 | calculator = GoogleDriveSizeCalculate(service) #GoogleDriveSizeCalculate(service) 229 | calculate = calculator.gdrive_checker(link_or_fileid) 230 | 231 | # Note that, gdrive folder size calculating speed depends on how many files inside a folder. 232 | if not calculate is None: 233 | print('') 234 | for k, v in calculate.items(): 235 | print(f'{k.title()}:', v) 236 | --------------------------------------------------------------------------------