├── pkg └── Cleanup Manager [1.5.0].dmg ├── MANIFEST.in ├── cleanup_management ├── __init__.py ├── cleanup.py └── analysis.py ├── .gitignore ├── setup.py ├── LICENSE ├── README.md └── cleanup_manager.py /pkg/Cleanup Manager [1.5.0].dmg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/univ-of-utah-marriott-library-apple/cleanup_manager/HEAD/pkg/Cleanup Manager [1.5.0].dmg -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.md 2 | include README.md 3 | include MANIFEST.in 4 | recursive-include cleanup-management * 5 | recursive-exclude * *.py[co] 6 | recursive-exclude build * 7 | -------------------------------------------------------------------------------- /cleanup_management/__init__.py: -------------------------------------------------------------------------------- 1 | import analysis 2 | import cleanup 3 | 4 | __version__ = '1.5.0' 5 | __all__ = ['analysis', 'cleanup'] 6 | 7 | if __name__ == "__main__": 8 | print("Cleanup Management, version: {}".format(__version__)) 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import cleanup_management 3 | 4 | setup( 5 | name='Cleanup Manager', 6 | version=cleanup_management.__version__, 7 | url='https://github.com/univ-of-utah-marriott-library-apple/cleanup_manager', 8 | author='Pierce Darragh, Marriott Library IT Services', 9 | author_email='mlib-its-mac-github@lists.utah.edu', 10 | description=("Cleanup Manager helps you clean up folders on your Mac's hard drive."), 11 | license='MIT', 12 | packages=['cleanup_management'], 13 | package_dir={'cleanup_management': 'cleanup_management'}, 14 | scripts=['cleanup_manager.py'], 15 | classifiers=[ 16 | 'Development Status :: 5 - Stable', 17 | 'Environment :: Console', 18 | 'Environment :: MacOS X', 19 | 'Intended Audience :: Information Technology', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Natural Language :: English', 22 | 'Operating System :: MacOS :: MacOS X', 23 | 'Programming Language :: Python', 24 | 'Programming Language :: Python :: 2.7', 25 | 'Topic :: System :: Systems Administration' 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 University of Utah, Marriott Library, Apple Support 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /cleanup_management/cleanup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | 5 | 6 | def delete_links(links, logger): 7 | """ 8 | Unmake all of the links. 9 | 10 | :param links: A list containing paths to link objects to be deleted. 11 | :param logger: A Management Tools Logger object for handling output. 12 | """ 13 | for link in links: 14 | try: 15 | logger.info(" Unlinking: {}".format(link)) 16 | os.unlink(link) 17 | except IOError as (errno, strerror): 18 | logger.error("I/O Error({}): {}".format(errno, strerror)) 19 | except: 20 | logger.error("{}: {}".format(sys.exc_info()[0].__name__, sys.exc_info()[1])) 21 | 22 | 23 | def delete_files(files, logger): 24 | """ 25 | Remove all of the files. 26 | 27 | :param files: A list containing paths to files to be deleted. 28 | :param logger: A Management Tools Logger object for handling output. 29 | """ 30 | for file in files: 31 | try: 32 | logger.info(" Deleting File: {}".format(file)) 33 | os.remove(file) 34 | except IOError as (errno, strerror): 35 | logger.error("I/O Error({}): {}".format(errno, strerror)) 36 | except: 37 | logger.error("{}: {}".format(sys.exc_info()[0].__name__, sys.exc_info()[1])) 38 | 39 | 40 | def delete_folders(folders, logger): 41 | """ 42 | Recursively delete the folders. 43 | 44 | :param folders: A list containing folders to be deleted. 45 | :param logger: A Management Tools Logger object for handling output. 46 | """ 47 | for folder in folders: 48 | try: 49 | logger.info(" Removing Directory: {}".format(folder)) 50 | shutil.rmtree(folder) 51 | except IOError as (errno, strerror): 52 | logger.error("I/O Error({}): {}".format(errno, strerror)) 53 | except: 54 | logger.error("{}: {}".format(sys.exc_info()[0].__name__, sys.exc_info()[1])) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Cleanup Manager 2 | =============== 3 | 4 | Cleanup Manager helps you clean up folders on your Mac's hard drive. 5 | 6 | ## Contents 7 | 8 | * [Contact](#contact) - how to reach us 9 | * [System Requirements](#system-requirements) - what you need 10 | * [Download](#download) - get it here! 11 | * [Purpose](#purpose) - why it does what it does 12 | * [Usage](#usage) - how to make Cleanup Manager do the thing 13 | * [Details](#details) - how the things are done 14 | * [Update History](#update-history) - history of the project 15 | 16 | ## Contact 17 | 18 | If you have any comments, questions, or other input, either [file an issue](../../issues) or [send us an email](mailto:mlib-its-mac-github@lists.utah.edu). Thanks! 19 | 20 | ## System Requirements 21 | 22 | * Mac OS X 23 | * Tested on 10.9 and 10.10 24 | * Python 2.7.x (which comes preinstalled on OS X, or you can download a non-Apple version [here](https://www.python.org/download/)) 25 | * [Management Tools](https://github.com/univ-of-utah-marriott-library-apple/management_tools) - version 1.8.1 or greater 26 | 27 | ## Download 28 | 29 | [Download the latest installer for Cleanup Manager here!](../../releases/) 30 | 31 | ## Purpose 32 | 33 | Cleanup Manager was originally designed to help cleanup user home folders on shared, frequently-used machines. We use it in some student labs that have persistent login information where the users' home folders can accumulate and aren't deleted for long periods of time. Cleanup Manager is also used to manage shared drives that have a tendency to fill up quickly. 34 | 35 | ## Usage 36 | 37 | ``` 38 | $ cleanup_manager.py [-hvnV] [-l log] [-k date] [-f format] target 39 | ``` 40 | 41 | ### Options 42 | 43 | | Option | Purpose | 44 | |---------------------------------------|-------------------------------------------------------------------------------| 45 | | `-h`, `--help` | Print help message and quit. | 46 | | `-v`, `--version` | Print version information and quit. | 47 | | `-n`, `--no-log` | Redirect logging to standard output. | 48 | | `-V`, `--verbose` | Increase verbosity of output (can be used twice). | 49 | | `--skip-prompt` | Skips the confirmation prompt. Be careful with this. | 50 | | `-l log`, `--log-dest log` | Change the destination log file of log events. | 51 | | `-k date`, `--keep-after date` | The date to keep items after. Default is seven days prior to invocation. | 52 | | `-d format`, `--date-format format` | Format of the given date. Useful if you have that one particular way of formatting your dates and you don't want to change. | 53 | | `-f size`, `--freeup size` | The amount of space to attempt to free up. | 54 | | `-t trigger`, `--dir-trigger trigger` | A specific file to set a directory's timestamp from within that directory. | 55 | | `--delete-oldest-first` | When deleting by size, older items are deleted first. This is the default. | 56 | | `--delete-largest-first` | When deleting by size, larger items are deleted first. | 57 | | `--overflow` | Allows the script to delete more than just the size specified to hit target. | 58 | 59 | `target` is a path to a directory that you want to clean up. 60 | 61 | ### Examples 62 | 63 | To delete *up to* 15 gigabytes of data within the target directory, while deleting the oldest items first: 64 | 65 | ``` 66 | $ cleanup_manager.py -f 15g /path/to/target 67 | ``` 68 | 69 | To attempt to reach 500 megabytes free on the drive with preference given to larger items: 70 | 71 | ``` 72 | $ cleanup_manager.py -f 500mf --delete-largest-first /path/to/target 73 | ``` 74 | 75 | To clear up 30% of the drive where `target` exists by deleting items inside of `target` (with preference given to older items): 76 | 77 | ``` 78 | $ cleanup_manager.py -f 30 /path/to/target 79 | ``` 80 | 81 | ## Details 82 | 83 | After being given a directory to examine, the Cleanup Manager navigates the entire directory tree. Files in the top level are recorded with their last modification timestamp, and folders are navigated to find the most recent item within them. Anything that, from the top level (`target`), has a most-recent modification timestamp that is older than the `--keep-after` date will be deleted. 84 | 85 | Links are examined to see whether they either: 86 | 87 | 1. exist inside of a folder that will be deleted, or 88 | 2. point to something else that will be deleted. 89 | 90 | Any link that meets either of these criteria will be unmade. 91 | 92 | ## Update History 93 | 94 | This is a short, reverse-chronological summary of the updates to this project. 95 | 96 | | Date | Version | Update Description | 97 | |------------|:---------:|------------------------------------------------------------------------------| 98 | | 2016-04-12 | 1.5.0 | Added `--dir-trigger` option for folder-level trigger files. | 99 | | 2015-07-01 | 1.4.0 | Updated logging to change outputs slightly (for better readability). | 100 | | 2015-05-19 | 1.3.0 | New `--overflow` flag ensures specified disk space will be cleared. | 101 | | 2015-05-18 | 1.2.0 | Added increased verbosity availability via more `-V` flags. | 102 | | 2015-03-31 | 1.1.1 | Amended logic to handle combinations of size- and date-based deletions. | 103 | | 2015-03-27 | 1.1.0 | Proper release with size-based deleting of things. | 104 | | 2015-03-27 | 1.1.0pre4 | Script actually handles size-based options properly. Updated in-line docs. | 105 | | 2015-03-27 | 1.1.0pre3 | Implemented logic for deleting inventory items based on size. | 106 | | 2015-03-26 | 1.1.0pre2 | Completed the handling of user instructions for size-based deletion. | 107 | | 2015-03-24 | 1.1.0pre1 | Working to add size-based deletion (instead of just date-based). | 108 | | 2015-02-27 | 1.0.0 | Actual first release. Fairly stable and usable. | 109 | | 2015-02-26 | 1.0.0a | Moved the module directory to a different name than the script. | 110 | | 2015-02-26 | 1.0.0rc3 | Now asks for confirmation prior to deleting things. | 111 | | 2015-02-26 | 1.0.0rc2 | Apparently changing the value of '__name__' is a bad idea. | 112 | | 2015-02-26 | 1.0.0rc1 | Uncommented lines that cause deletion. First trials in-action. | 113 | | 2015-02-26 | 0.9.3 | Docstrings added to all methods. | 114 | | 2015-02-26 | 0.9.2 | Added in-script documentation and help feature. | 115 | | 2015-02-26 | 0.9.1 | Updated `main` method and init. | 116 | | 2015-02-26 | 0.9.0 | Most of the functionality put in place for analysis and deletion. | 117 | | 2014-10-03 | 0.1 | Project started. | 118 | -------------------------------------------------------------------------------- /cleanup_management/analysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_date_based_deletable_inventory(keep_after, logger, target=None, folders=None, files=None, links=None, trigger=None): 5 | """ 6 | Finds all of the items within an inventory that can be deleted based on 7 | their last modification date. 8 | 9 | :param keep_after: a unix timestamp; any files or directories with a last 10 | modification time after this will be removed 11 | :param logger: a Management Tools logger object 12 | :param target: the directory to clean out 13 | :param folders: an inventory of the folders (see get_inventory()) 14 | :param files: an inventory of the files (see get_inventory()) 15 | :param links: an inventory of the links (see get_inventory()) 16 | :return: lists of folers, files, and links to be deleted and/or unmade 17 | """ 18 | if folders is None or files is None or links is None: 19 | if not target: 20 | raise ValueError("Must give either a target or the inventory.") 21 | else: 22 | folders, files, links = get_inventory(target, logger) 23 | else: 24 | # Make copies of the inventory lists just in case the user wanted to 25 | # keep the originals. 26 | folders = list(folders) 27 | files = list(files) 28 | links = list(links) 29 | 30 | logger.verbose("Getting date-based deletable inventory:") 31 | 32 | # Check if any of the folders contain the trigger file. 33 | # If they do, and if that trigger file's last modified timestamp is within 34 | # the specified range, set that folder to be removed. 35 | delete_folders = [] 36 | if trigger is not None: 37 | pop_indices = [] 38 | # Iterate over the folders with their index in the list. 39 | for index, folder in enumerate(folders): 40 | triggerpath = os.path.join(folder[0], trigger) 41 | # Test if the trigger file exists in the folder. 42 | try: 43 | if os.path.getmtime(triggerpath) < keep_after: 44 | # If the file exists and its timestamp is old enough, then 45 | # the folder should be deleted. 46 | pop_indices.append(index) 47 | except OSError: 48 | # The file does not exist in the folder. That's fine; we'll just 49 | # continue on and let it be deleted naturally if it ought to be. 50 | continue 51 | # Go through the marked indices and add the appropriate folders to the 52 | # deletion list. 53 | for index in pop_indices: 54 | delete_folders.append(folders[index]) 55 | # Then go through and remove those folders from the original list. 56 | for index in pop_indices: 57 | folders.pop(index) 58 | # Note that these steps must be separated. If the folders were popped at 59 | # the same time they're added to the delete_folders list, the indices 60 | # would change and you would be adding the wrong folders. 61 | 62 | # Find the folders and files that need to be deleted. 63 | # If the item's score is above the threshold value, it will be deleted. 64 | # Folder and file lists are assumed to contain tuples as: 65 | # (path, age, size) 66 | # Effectively, for each folder/file: if that item has a timestamp that is 67 | # less than the 'keep_after' value, it gets added to the list. 68 | delete_folders += [folder[0] for folder in folders if folder[1] < keep_after] 69 | delete_files = [file[0] for file in files if file[1] < keep_after] 70 | 71 | # Now handle links. This is a bit trickier. 72 | # Link array is assumed to contain tuples as: 73 | # (link location, target location, inside) 74 | delete_links = [] 75 | for link in links: 76 | # If the link points inside the 'target' directory and the target of the 77 | # link will be deleted during cleanup, then remove the link. 78 | if link[2] and (link[1] in delete_folders or link[1] in delete_files): 79 | delete_links.append(link[0]) 80 | else: 81 | for folder in delete_folders: 82 | # If the link exists inside of or points into a folder that is 83 | # going to be deleted, then remove the link. 84 | if link[0].startswith(folder) or link[1].startswith(folder): 85 | delete_links.append(link[0]) 86 | break 87 | 88 | # Print out lots of fun information if it's warranted. 89 | for folder in delete_folders: 90 | logger.debug(" Set to remove folder: {}".format(folder)) 91 | for file in delete_files: 92 | logger.debug(" Set to remove file: {}".format(file)) 93 | for link in delete_links: 94 | logger.debug(" Set to remove link: {}".format(link)) 95 | 96 | # Return the deletable inventory. 97 | return delete_folders, delete_files, delete_links 98 | 99 | 100 | def get_size_based_deletable_inventory(target_space, logger, target=None, oldest_first=True, overflow=False, folders=None, files=None, links=None): 101 | """ 102 | Finds all of the items within an inventory that can be deleted based on a 103 | given target amount of space to attempt to free up. 104 | 105 | :param target_space: the amount of space to attempt to clean up 106 | :param logger: a Management Tools logger object 107 | :type target_space: int 108 | :param target: the directory to clean out 109 | :param oldest_first: whether to prefer deleting old itmes first; if set to 110 | False, then largest items will be deleted first 111 | :param folders: an inventory of the folders (see get_inventory()) 112 | :param files: an inventory of the files (see get_inventory()) 113 | :param links: an inventory of the links (see get_inventory()) 114 | :return: list of folders, files, and links to be deleted and/or unmade and 115 | the total amount of stuff deleted (in bytes) 116 | """ 117 | if folders is None or files is None or links is None: 118 | if not target: 119 | raise ValueError("Must give either a target or the inventory.") 120 | else: 121 | folders, files, links = get_inventory(target, logger) 122 | else: 123 | # Make copies of the inventory lists just in case the user wanted to 124 | # keep the originals. 125 | folders = list(folders) 126 | files = list(files) 127 | links = list(links) 128 | 129 | logger.verbose("Getting size-based deletable inventory:") 130 | 131 | # Initialize lists to be returned. 132 | delete_folders = [] 133 | delete_files = [] 134 | delete_links = [] 135 | 136 | # # Set the index key based on oldest/largest preference. 137 | # if oldest_first: 138 | # key = 1 139 | # else: 140 | # key = 2 141 | # Initialize an accumulated_size counter to keep track of how much stuff is 142 | # going to be deleted. 143 | accumulated_size = 0 144 | 145 | # Build up the deletion lists. 146 | while accumulated_size < target_space: 147 | logger.verbose(" target_space = {}".format(target_space)) 148 | logger.verbose(" accumulated_size = {}".format(accumulated_size)) 149 | # If we have folders left but no files, just look at the maximum value 150 | # for the folders. 151 | if folders and not files: 152 | if oldest_first: 153 | folder = min(folders, key=lambda folder: folder[1]) 154 | else: 155 | folder = max(folders, key=lambda folder: folder[2]) 156 | logger.verbose(" folder: {}".format(folder)) 157 | logger.verbose(" age: {}".format(folder[2])) 158 | # If that folder's size won't put us over the 'total_size' alotment, 159 | # add it to the list of folders to be deleted. 160 | if overflow or folder[2] <= target_space - accumulated_size: 161 | delete_folders.append(folder[0]) 162 | accumulated_size += folder[2] 163 | logger.verbose(" deleting") 164 | # In any case, remove the folder from the list of folders. 165 | # This way we can keep iterating over the list and not get stuck on 166 | # one value. 167 | folders.remove(folder) 168 | 169 | # Files but no folders. 170 | elif files and not folders: 171 | if oldest_first: 172 | file = min(files, key=lambda file: file[1]) 173 | else: 174 | file = max(files, key=lambda file: file[2]) 175 | logger.verbose(" file: {}".format(file)) 176 | logger.verbose(" age: {}".format(file[2])) 177 | # If the file's size won't put us over the 'total_size' alotment, 178 | # add it do the list of files to be deleted. 179 | if overflow or file[2] <= target_space - accumulated_size: 180 | delete_files.append(file[0]) 181 | accumulated_size += file[2] 182 | logger.verbose(" deleting") 183 | # Even if the file is too big to be deleted, remove it from the list 184 | # of files so we don't have to see it again. 185 | files.remove(file) 186 | 187 | # Maybe we have both! That's kind of tricky. 188 | elif files and folders: 189 | # Take the maximum value from each of 'folders' and 'files'. 190 | if oldest_first: 191 | folder = min(folders, key=lambda folder: folder[1]) 192 | file = min(files, key=lambda file: file[1]) 193 | else: 194 | folder = max(folders, key=lambda folder: folder[2]) 195 | file = max(files, key=lambda file: file[2]) 196 | logger.verbose(" folder: {}".format(folder)) 197 | logger.verbose(" age: {}".format(folder[2])) 198 | logger.verbose(" v file: {}".format(file)) 199 | logger.verbose(" age: {}".format(file[2])) 200 | # If the folder is older/larger than the file... 201 | if (oldest_first and folder[1] <= file[1]) or (not oldest_first and folder[2] >= file[2]): 202 | logger.verbose(" folder preferred") 203 | # Add the folder to the list of folders to be deleted. 204 | if overflow or folder[2] <= target_space - accumulated_size: 205 | delete_folders.append(folder[0]) 206 | accumulated_size += folder[2] 207 | logger.verbose(" deleting folder") 208 | # Remove the folder from the list of folders. 209 | folders.remove(folder) 210 | # But if the file is older/larger... 211 | else: 212 | logger.verbose(" file preferred") 213 | # Add the file to the list of files to be deleted. 214 | if overflow or file[2] <= target_space - accumulated_size: 215 | delete_files.append(file[0]) 216 | accumulated_size += file[2] 217 | logger.verbose(" deleting file") 218 | # Remove the file from the list of files. 219 | files.remove(file) 220 | 221 | # We don't have any folders or files left, so quit the loop. 222 | # If this gets triggered, it means that there weren't enough items in 223 | # the target directory to fill up the 'total_size' alotment. 224 | else: 225 | break 226 | 227 | # Now handle links. This is a bit trickier. 228 | # Link array is assumed to contain tuples as: 229 | # (link location, target location, inside) 230 | for link in links: 231 | # If the link points inside the 'target' directory and the target of the 232 | # link will be deleted during cleanup, then remove the link. 233 | if link[2] and (link[1] in delete_folders or link[1] in delete_files): 234 | delete_links.append(link[0]) 235 | else: 236 | for folder in delete_folders: 237 | # If the link exists inside of or points into a folder that is 238 | # going to be deleted, then remove the link. 239 | if link[0].startswith(folder) or link[1].startswith(folder): 240 | delete_links.append(link[0]) 241 | break 242 | 243 | # Print out lots of fun information if it's warranted. 244 | for folder in delete_folders: 245 | logger.debug(" Set to remove folder: {}".format(folder)) 246 | for file in delete_files: 247 | logger.debug(" Set to remove file: {}".format(file)) 248 | for link in delete_links: 249 | logger.debug(" Set to remove link: {}".format(link)) 250 | 251 | # Return the deletable inventory and accumulated size. 252 | return delete_folders, delete_files, delete_links, accumulated_size 253 | 254 | 255 | def get_inventory(target, logger): 256 | """ 257 | Given a target directory, finds all subitems within that directory and 258 | stores them in separate lists, ie folders, files, and links. 259 | 260 | Folder and file lists are full of tuples as: 261 | (folder/file path, modification timestamp, size) 262 | where: 263 | folder/file path: the path to the object 264 | modification timestamp: the Unix timestamp of the last modification 265 | size: the size of the object 266 | Folder sizes are just the sum of their content, and folder modification 267 | times are considered to be the most-recent timestamp among all objects 268 | within that folder. 269 | 270 | Link list is full of tuples as: 271 | (link path, target path, internal) 272 | where: 273 | link path: the path to the link object 274 | target path: the target that the link points to 275 | internal: whether the target is in this inventory 276 | 277 | :param target: directory to search for inventory 278 | :param logger: a Management Tools logger object 279 | :return: a tuple containing lists containing tuples describing the contents 280 | as (folders, files, links) 281 | """ 282 | if not os.path.isdir(target): 283 | raise ValueError("The target must be a valid, existing directory.") 284 | 285 | logger.verbose("Getting top-level inventory:") 286 | 287 | ##-------------------------------------------------------------------------- 288 | ## Get top-level directory listings. 289 | ##-------------------------------------------------------------------------- 290 | 291 | # Initialize lists to hold tuples. 292 | folders = [] 293 | files = [] 294 | links = [] 295 | 296 | # Walk through everything in just the top directory. 297 | for path, subdirs, subfiles in os.walk(target): 298 | for folder in subdirs: 299 | folder = os.path.join(path, folder) 300 | if os.path.islink(folder): 301 | links.append(folder) 302 | logger.verbose(" Found link: {}".format(folder)) 303 | else: 304 | folders.append(folder) 305 | logger.verbose(" Found folder: {}".format(folder)) 306 | 307 | for file in subfiles: 308 | file = os.path.join(path, file) 309 | if os.path.islink(file): 310 | links.append(file) 311 | logger.verbose(" Found link: {}".format(file)) 312 | else: 313 | files.append(file) 314 | logger.verbose(" Found file: {}".format(file)) 315 | 316 | # Prevent recursion to reduce time (we don't need everything indexed). 317 | break 318 | 319 | ##-------------------------------------------------------------------------- 320 | ## Get file information. 321 | ##-------------------------------------------------------------------------- 322 | 323 | # Get the age and size of each file. 324 | files = [(file, os.path.getmtime(file), os.path.getsize(file)) for file in files] 325 | 326 | ##-------------------------------------------------------------------------- 327 | ## Get folder information. 328 | ##-------------------------------------------------------------------------- 329 | 330 | # Get the age of each folder. 331 | for i in range(len(folders)): 332 | folder = folders[i] 333 | age = os.path.getmtime(folder) 334 | size = 0 335 | 336 | for path, subdirs, subfiles in os.walk(folder): 337 | for directory in subdirs: 338 | directory = os.path.join(path, directory) 339 | # If the modification time is more recent than that of the top 340 | # directory, overwrite the directory's age with the file's. 341 | directory_age = os.path.getmtime(directory) 342 | if directory_age > age: 343 | age = directory_age 344 | # Is the directory a link? 345 | if os.path.islink(directory): 346 | links.append(directory) 347 | 348 | for file in subfiles: 349 | file = os.path.join(path, file) 350 | file_age = 0 351 | # Is the file a link? 352 | if os.path.islink(file): 353 | links.append(file) 354 | else: 355 | size += os.path.getsize(file) 356 | file_age = os.path.getmtime(file) 357 | # If the modification time is more recent than that of the top 358 | # directory, overwrite the directory's age with the file's. 359 | if file_age > age: 360 | age = file_age 361 | 362 | folders[i] = (folder, age, size) 363 | 364 | ##-------------------------------------------------------------------------- 365 | ## Get link information. 366 | ##-------------------------------------------------------------------------- 367 | 368 | # Determine whether each link connects to a point within the top directory. 369 | links = [(link, os.path.realpath(link), os.path.realpath(link).startswith(target)) for link in links] 370 | 371 | return folders, files, links 372 | -------------------------------------------------------------------------------- /cleanup_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cleanup_management 4 | 5 | import argparse 6 | import datetime 7 | import os 8 | import re 9 | import sys 10 | import time 11 | 12 | 13 | try: 14 | from management_tools import loggers 15 | from management_tools import fs_analysis as fsa 16 | # Check for MT version 1.8.1 17 | if not "bytes" in dir(fsa.Filesystem): 18 | raise ImportError 19 | except ImportError as e: 20 | print("You need version 1.8.1 or greater of the 'Management Tools' module to be installed first.") 21 | print("https://github.com/univ-of-utah-marriott-library-apple/management_tools") 22 | raise e 23 | 24 | 25 | def main(target, keep_after, free_space, oldest_first, skip_prompt, overflow, dir_trigger, logger): 26 | # Get an absolute reference to the target path. 27 | target = os.path.abspath(os.path.expanduser(target)) 28 | 29 | # Obtain the initial inventory. 30 | folders, files, links = cleanup_management.analysis.get_inventory(target, logger) 31 | 32 | # Build the appropriate deletion inventory. 33 | if keep_after is not None: 34 | delete_folders, delete_files, delete_links = cleanup_management.analysis.get_date_based_deletable_inventory(keep_after=keep_after, logger=logger, folders=folders, files=files, links=links, trigger=dir_trigger) 35 | elif free_space is not None and oldest_first is not None: 36 | delete_folders, delete_files, delete_links, deleted_space = cleanup_management.analysis.get_size_based_deletable_inventory(target_space=free_space, logger=logger, oldest_first=oldest_first, overflow=overflow, folders=folders, files=files, links=links) 37 | else: 38 | raise RuntimeError("Did not specify either --keep-after or --freeup.") 39 | 40 | # Inform the user about stuff (if they wanted it). 41 | if not skip_prompt: 42 | logger.info("These items will be deleted:") 43 | 44 | if len(delete_links) > 0: 45 | logger.info(" Links:") 46 | for link in delete_links: 47 | logger.info(" {}".format(link)) 48 | 49 | if len(delete_files) > 0: 50 | logger.info(" Files:") 51 | for file in delete_files: 52 | logger.info(" {}".format(file)) 53 | 54 | if len(delete_folders) > 0: 55 | logger.info(" Folders:") 56 | for folder in delete_folders: 57 | logger.info(" {}".format(folder)) 58 | 59 | if not query_yes_no("Proceed with cleanup?"): 60 | sys.exit(7) 61 | 62 | if keep_after: 63 | logger.info("Deleting contents recursively older than {} from {}".format(datetime.datetime.fromtimestamp(keep_after), target)) 64 | else: 65 | logger.info("Deleting {} bytes of data from {}".format(deleted_space, target)) 66 | 67 | # Remove links first. 68 | if len(delete_links) == 0: 69 | logger.info("No links to remove.") 70 | else: 71 | logger.info("Removing bad links...") 72 | cleanup_management.cleanup.delete_links(delete_links, logger) 73 | logger.info("Bad links removed.") 74 | 75 | # Then delete files. 76 | if len(delete_files) == 0: 77 | logger.info("No files to remove.") 78 | else: 79 | logger.info("Removing files...") 80 | cleanup_management.cleanup.delete_files(delete_files, logger) 81 | logger.info("Files removed.") 82 | 83 | # And then delete folders. 84 | if len(delete_folders) == 0: 85 | logger.info("No folders to remove.") 86 | else: 87 | logger.info("Removing folders...") 88 | cleanup_management.cleanup.delete_folders(delete_folders, logger) 89 | logger.info("Folders removed.") 90 | 91 | logger.info("Cleanup complete.") 92 | 93 | def query_yes_no(question): 94 | """ 95 | Asks a user a yes/no question and expects a valid response. 96 | 97 | :param question: The prompt to give to the user. 98 | :return: A boolean; True for 'yes', False for 'no'. 99 | """ 100 | valid = { 101 | 'yes': True, 'ye': True, 'y': True, 102 | 'no': False, 'n': False 103 | } 104 | 105 | # Until they give valid input, loop and keep asking the question. 106 | while True: 107 | # Note the comma at the end. We don't want a newline. 108 | print("{} [y/N] ".format(question)), 109 | choice = raw_input().lower() 110 | if choice == '': 111 | return False 112 | elif choice in valid: 113 | return valid[choice] 114 | else: 115 | print("Please respond with 'yes' or 'no'.") 116 | 117 | 118 | def version(): 119 | """ 120 | :return: The version information for this program. 121 | """ 122 | return ("{name}, version {version}\n".format(name='cleanup_manager', version=cleanup_management.__version__)) 123 | 124 | 125 | def usage(): 126 | """ 127 | Prints usage information. 128 | """ 129 | print(version()) 130 | 131 | print('''\ 132 | usage: {name} [-hvnV] [-l log] [-k date] [-f format] target 133 | 134 | Delete old items from a specific directory, but only at a top-level granularity. 135 | 136 | -h, --help 137 | Print this help message and quit. 138 | -v, --version 139 | Print the version information and quit. 140 | -n, --no-log 141 | Do not output log events to file. 142 | -V, --verbose 143 | Increase verbosity to see more information. Two levels of verbosity are 144 | supported. 145 | --skip-prompt 146 | Skips the confirmation prompt. Warning: this will lead to lots of 147 | deletion. 148 | 149 | -l log, --log-dest log 150 | Redirect log file output to 'log'. 151 | -k date, --keep-after date 152 | The date to compare file modification times to. Anything before this 153 | date will be removed. 154 | default: seven days prior to today, rounded down to midnight 155 | -d format, --date-format format 156 | The date format, given as a Python datetime.datetime.strptime()- 157 | compatible format. 158 | default: '%Y-%m-%d' 159 | -f size, --freeup size 160 | The amount of space to attempt to free up. 161 | -t trigger, --dir-trigger trigger 162 | Sets a specific file to look for in the top-level of directories inside 163 | the specified target directory. If this file exists, its timestamp will 164 | be used in place of the directory's timestamp to determine removal. If 165 | the file does not exist, the timestamp for the directory will be found 166 | through the usual method. 167 | (Only has an effect with date-based deletion.) 168 | --delete-oldest-first 169 | When deleting by size, older items are deleted first to free up the 170 | designated `--freeup` space. 171 | This is the default action when using `--freeup`. 172 | --delete-largest-first 173 | When deleting by size, larger items are deleted first to free up the 174 | designated `--freeup` space. 175 | --overflow 176 | When deleting by size, this flag will ensure that at the very least the 177 | amount designated will be deleted. (The default action is to delete up 178 | to - but not more than - the amount.) This is useful when your top-level 179 | directory only contains items that are greater in size than the target 180 | free space amount. 181 | 182 | target 183 | The top-level directory to delete from within. 184 | 185 | Cleanup Manager is a simple script to help you delete items from folders en 186 | masse. 187 | 188 | Originally conceived to delete user home directories in student labs at a 189 | university, Cleanup Manager takes a look at a directory's contents and checks 190 | them recursively for the most recently-modified timestamp. This timestamp is 191 | compared against the keep-after date, and any item with a timestamp older than 192 | that date is deleted. 193 | 194 | KEEP-AFTER DATE 195 | The date can be either absolute or relative. Absolute dates can be given 196 | with a format to indicate how you want it parsed. 197 | 198 | Relative dates can be given as: 199 | NNNXr 200 | where "N" is an integer number, "X" represents a shorthand form of 201 | the time scale, i.e.: 202 | M - minutes 203 | H - hours 204 | d - days 205 | m - months 206 | y - years 207 | and "r" or "R" indicates that the date should be rounded back to 208 | the previous midnight. 209 | 210 | Note: When deleting directories, Cleanup Manager will search the full 211 | contents of a directory to find the file with the most recent timestamp. 212 | This ensures that folders aren't deleted whose contents were modified after 213 | the 'keep-after' date even if the folder's own modification timestamp is 214 | from before that date. 215 | 216 | Example 217 | To delete everything older than four days ago: 218 | cleanup_manager.py -k 4d /path/to/target 219 | 220 | FREEUP SPACE 221 | You can specify an amount of space to attempt to free up in the target 222 | directory. This size can be specified in one of three ways: 223 | 1. A number of bytes to free up on the drive. 224 | 2. A number of bytes to have free on the drive (this is different). 225 | 3. A percentage representing the amount of free space you want on the 226 | drive. 227 | 228 | These can be inputted as (for example): 229 | 1. 10g - will attempt to remote 10 gigabytes of data 230 | 2. 10gf - 10 gigabytes will be free after cleanup runs 231 | 3. 10 - 10% of the drive will be free after cleanup 232 | 233 | There are five supported byte modifiers to specify explicit sizes: 234 | b: bytes 235 | k: kilobytes - 1024 bytes 236 | m: megabytes - 1024 kilobytes 237 | g: gigabytes - 1024 megabytes 238 | t: terabytes - 1024 gigabytes 239 | 240 | Example 241 | To delete up to 15 gigabytes of data within the target directory with 242 | preference given to older items: 243 | cleanup_manager.py -f 15g /path/to/target 244 | To attempt to have 500 megabytes free on your old hard drive with preference 245 | given to larger items: 246 | cleanup_manager.py -f 500mf --delete-largest-first /path/to/target 247 | To clear up 30% of the drive where 'target' exists by deleting items inside 248 | of 'target' (with preference to older items): 249 | cleanup_manager.py -f 30 /path/to/target 250 | 251 | LINKS 252 | All links existing within the directory structure are checked for whether 253 | they point internally; that is, if a link points to a file or folder that is 254 | going to be deleted, or if it is in a folder that is going to be deleted, 255 | the link is unmade. However, this program does not check the rest of the 256 | system to ensure that external links do not point inside a deleted 257 | directory.\ 258 | '''.format(name='cleanup_manager')) 259 | 260 | 261 | def date_to_unix(date, date_format): 262 | """ 263 | Converts a date to a local Unix timestamp (non-UTC). 264 | 265 | The date can be either absolute or relative. Absolute dates can be given 266 | with a format to indicate how you want it parsed. 267 | 268 | Relative dates can be given as: 269 | NNNXr 270 | where "N" is an integer number, "X" represents a shorthand form of 271 | the time scale, i.e.: 272 | M - minutes 273 | H - hours 274 | d - days 275 | m - months 276 | y - years 277 | and "r" or "R" indicates that the date should be rounded back to 278 | the previous midnight. 279 | 280 | :param date: 281 | :param date_format: 282 | :return: The Unix timestamp of the given date as a float. 283 | """ 284 | try: 285 | # Attempt to pull the time out directly based on the format. 286 | target_date = datetime.datetime.strptime(date, date_format) 287 | except ValueError: 288 | # If that didn't work, let's try to parse the string for a relative 289 | # date according to the given specifications. 290 | relative_match = re.match(r"\A-?(\d+)([a-zA-Z]?)([rR]?)\Z", date) 291 | 292 | if relative_match: 293 | # If no scale is given, "D" is assumed. 294 | if not relative_match.group(2): 295 | scale = 'd' 296 | else: 297 | scale = relative_match.group(2) 298 | 299 | # If rounding is not specified, don't do it. 300 | if not relative_match.group(3): 301 | rounding = False 302 | else: 303 | rounding = True 304 | 305 | # Set the amount of change. 306 | amount = int(relative_match.group(1)) 307 | 308 | if scale == 'M': 309 | # Minutes 310 | seconds = amount * 60 311 | elif scale == 'H': 312 | # Hours 313 | seconds = amount * 60 * 60 314 | elif scale == 'd': 315 | # Days 316 | seconds = amount * 60 * 60 * 24 317 | elif scale == 'm': 318 | # Months 319 | seconds = amount * 60 * 60 * 24 * 30 320 | elif scale == 'y': 321 | # Years 322 | seconds = amount * 60 * 60 * 24 * 365 323 | else: 324 | # Invalid specification. 325 | raise ValueError("{date} is not a valid relative date value".format(date=date)) 326 | 327 | days = seconds / 86399 328 | seconds = seconds % 86399 329 | 330 | time_difference = datetime.timedelta(days=days, seconds=seconds) 331 | 332 | # Calculate the target date. 333 | target_date = datetime.datetime.now() - time_difference 334 | 335 | # If rounding was specified, round to the previous midnight. 336 | if rounding: 337 | target_date = target_date.replace(hour=0, minute=0, second=0, microsecond=0) 338 | else: 339 | # Neither of these is valid. Raise an exception. 340 | raise ValueError("{date} is not a valid date specification".format(date=date)) 341 | 342 | # Buidl up the current time for Unix time conversion. 343 | time_tuple = time.struct_time(( 344 | target_date.year, target_date.month, target_date.day, 345 | target_date.hour, target_date.minute, target_date.second, -1, -1, -1 346 | )) 347 | unix_time = time.mktime(time_tuple) + (target_date.microsecond / 1e6) 348 | 349 | return unix_time 350 | 351 | 352 | def volume_size_target(size, target, logger=None): 353 | """ 354 | Converts a size into a number of bytes to clear up on the filesystem where 355 | 'target' exists. 356 | 357 | The size can be given in one of three ways: 358 | 1. A number of bytes to free up on the drive. 359 | 2. A number of bytes to have free on the drive (this is different). 360 | 3. A percentage representing the amount of free space you want on the 361 | drive. 362 | 363 | These can be inputted as (for example): 364 | 1. 10g - will attempt to remote 10 gigabytes of data 365 | 2. 10gf - 10 gigabytes will be free after cleanup runs 366 | 3. 10 - 10% of the drive will be free after cleanup 367 | 368 | There are five supported byte modifiers to specify explicit sizes: 369 | b: bytes 370 | k: kilobytes - 1024 bytes 371 | m: megabytes - 1024 kilobytes 372 | g: gigabytes - 1024 megabytes 373 | t: terabytes - 1024 gigabytes 374 | 375 | :param size: the amount to clear up on the volume where 'target' exists 376 | :param target: the location in the system where cleanup will occur 377 | :param logger: a Management Tools logger (if you want some things logged) 378 | :return: the number of bytes that should be freed up if possible 379 | :return type: int 380 | """ 381 | # Get the filesystem information for 'target'. 382 | volume = fsa.Filesystem(fsa.get_responsible_fs(target)) 383 | delete_target = None 384 | try: 385 | # Is it just the percentage? 386 | percentage = int(size) 387 | 388 | # Convert to a decimal percentage for math. 389 | percentage = float(percentage) / 100.0 390 | 391 | # Take the ceiling of the product of the percentage with the number of 392 | # blocks available. 393 | from math import ceil 394 | free_target = int(ceil(volume.bytes * percentage)) 395 | delete_target = free_target - volume.bytes_free 396 | except ValueError: 397 | # 'size' is not just a percentage. Parse it for values! 398 | size_match = re.match(r"^(\d+)([bkmgt])([f]?)$", size.lower()) 399 | if not size_match: 400 | raise ValueError("{size} is not a valid size-deletion target".format(size=size)) 401 | 402 | amount, indicator, total_free = size_match.groups() 403 | total_free = True if total_free else False 404 | amount = int(amount) 405 | 406 | # Concordance between letters and their "byte powers"! 407 | size_indicators = { 408 | 'b': 0, # bytes = amount * (1024^0) 409 | 'k': 1, # kilobytes (1024 bytes) = amount * (1024^1) 410 | 'm': 2, # megabytes (1024 kilobytes) = amount * (1024^2) 411 | 'g': 3, # gigabytes (1024 megabytes) = amount * (1024^3) 412 | 't': 4 # terabytes (1024 gigabytes) = amount * (1024^4) 413 | } 414 | 415 | # Convert the 'amount' into a target amount of bytes to delete. 416 | from math import pow 417 | byte_multiplier = int(pow(1024, size_indicators[indicator])) 418 | free_target = amount * byte_multiplier 419 | 420 | if total_free: 421 | delete_target = free_target - volume.bytes_free 422 | else: 423 | delete_target = free_target 424 | 425 | # Check if anything happened. If not... problems. 426 | if delete_target is None: 427 | raise RuntimeError("No target deletion size could be found.") 428 | 429 | # Check that the volume can actually have that much space deleted. 430 | if delete_target < 0: 431 | raise ValueError("Negative target deletion size encountered - is there already enough free space?") 432 | if delete_target > volume.bytes: 433 | if logger: 434 | logger.warn("Too many bytes to delete; will delete as much as possible.") 435 | 436 | # Return the amount of bytes to delete. 437 | return delete_target 438 | 439 | ##------------------------------------------------------------------------------ 440 | ## Program entry point. 441 | ##------------------------------------------------------------------------------ 442 | 443 | if __name__ == '__main__': 444 | # Build the argument parser. 445 | parser = argparse.ArgumentParser(add_help=False) 446 | parser.add_argument('-h', '--help', action='store_true') 447 | parser.add_argument('-v', '--version', action='store_true') 448 | parser.add_argument('-n', '--no-log', action='store_true') 449 | parser.add_argument('-V', '--verbose', action='count') 450 | parser.add_argument('--skip-prompt', action='store_true') 451 | parser.add_argument('-l', '--log-dest') 452 | parser.add_argument('-k', '--keep-after', default=None) 453 | parser.add_argument('-d', '--date-format', default='%Y-%m-%d') 454 | parser.add_argument('-f', '--freeup', default=None) 455 | parser.add_argument('-t', '--dir-trigger', default=None) 456 | parser.add_argument('--delete-oldest-first', action='store_true', default=True) 457 | parser.add_argument('--delete-largest-first', action='store_false', dest='delete_oldest_first') 458 | parser.add_argument('--overflow', action='store_true') 459 | parser.add_argument('target', nargs='?', default=os.getcwd()) 460 | 461 | # Parse the arguments. 462 | args = parser.parse_args() 463 | 464 | if args.keep_after and args.freeup: 465 | parser.error("You may only specify one of --keep-after and --freeup.") 466 | 467 | if not args.keep_after and not args.freeup: 468 | args.keep_after = '-7dr' 469 | 470 | if args.help: 471 | usage() 472 | sys.exit(0) 473 | 474 | if args.version: 475 | print(version()) 476 | sys.exit(0) 477 | 478 | # Set the logging level. There's the regular level, the verbose level, 479 | # and the super-verbose level. 480 | if args.verbose is None: 481 | log_level = 20 482 | elif args.verbose == 1: 483 | log_level = 10 484 | else: 485 | log_level = 5 486 | 487 | # Build the logger. 488 | logger = loggers.get_logger( 489 | name = 'cleanup_manager', 490 | log = not args.no_log, 491 | level = log_level, 492 | path = args.log_dest 493 | ) 494 | 495 | # Set output logging prompts. 496 | for logging_level in [x for x in logger.prompts.keys() if x <= loggers.INFO]: 497 | logger.set_prompt(logging_level, '') 498 | 499 | # Get the necessary information to perform cleanup. Either calculate the 500 | # unix date of the time to delete before, or find the amount of space to 501 | # delete off the given volume. 502 | if args.keep_after: 503 | free_space = None 504 | keep_after = date_to_unix(args.keep_after, args.date_format) 505 | elif args.freeup: 506 | keep_after = None 507 | free_space = volume_size_target(args.freeup, args.target, logger) 508 | 509 | # Run it! 510 | try: 511 | main( 512 | target = args.target, 513 | keep_after = keep_after, 514 | free_space = free_space, 515 | oldest_first = args.delete_oldest_first, 516 | skip_prompt = args.skip_prompt, 517 | overflow = args.overflow, 518 | dir_trigger = args.dir_trigger, 519 | logger = logger, 520 | ) 521 | except: 522 | # Output the exception with the error name and its message. Suppresses the stack trace. 523 | logger.error("{errname}: {error}".format(errname=sys.exc_info()[0].__name__, error=' '.join([str(x) for x in sys.exc_info()[1]]))) 524 | raise 525 | --------------------------------------------------------------------------------