├── .gitignore ├── LICENSE.txt ├── README.md ├── setup.cfg ├── setup.py ├── wcleaner-demo.gif └── wcleaner ├── __init__.py ├── core.py ├── junkcenter.py └── settings.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.log 4 | *.backup 5 | build 6 | *.egg-info 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015, Li Ruiqi (see AUTHORS) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wcleaner - Disk Space Cleaner 2 | 3 | ## Features 4 | * Automatic identification log files. Default log pattern is r'.\*\blogs?\b.\*'. 5 | * Intelligently unify similar log files by numeric patterns. Very useful for log rotation. 6 | * Automatic find deleted files which is not free up space. 7 | * Automatic clean log files when enable junk center. 8 | * Junk center support greylist, whitelist, blacklist, redlist. 9 | 10 | ## Demo 11 | ![Wcleaner Demo](./wcleaner-demo.gif) 12 | 13 | ## Install 14 | ````bash 15 | $ pip install wcleaner 16 | ```` 17 | 18 | ## Usage 19 | ```` 20 | usage: wcleaner [-h] [-v] [-n N] [--max-capacity MAX_CAPACITY] 21 | [--target-capacity TARGET_CAPACITY] [--auto] [--no-interface] 22 | [FILESYSTEM] 23 | 24 | Disk Space Cleaner 25 | 26 | positional arguments: 27 | FILESYSTEM filesystem to clean 28 | 29 | optional arguments: 30 | -h, --help show this help message and exit 31 | -v, --version show program's version number and exit 32 | -n N print the largest N files 33 | --max-capacity MAX_CAPACITY 34 | max capacity. default: 90 35 | --target-capacity TARGET_CAPACITY 36 | target capacity. default: 50 37 | --auto automatically clean junk files in whitelist, greylist and on matched hostname 38 | --no-interface none-interactive mode 39 | ```` 40 | 41 | ## Enable Junk Center 42 | ````bash 43 | $ echo_wcleaner_conf > /etc/wcleaner.conf 44 | $ sed -i 's/your redis host/$host/g' /etc/wcleaner.conf 45 | $ sed -i 's/6379/$port/g' /etc/wcleaner.conf 46 | ```` 47 | 48 | ## Junk Center 49 | Suport grey/white/black/red list 50 | 51 | ```python 52 | list: [ 53 | junk1: set([ 54 | hostname1, 55 | hostname2, 56 | ]), 57 | junk2: set([ 58 | hostname1, 59 | hostname2, 60 | ]), 61 | ] 62 | ``` 63 | 64 | * greylist: [--auto] Junk will be automatic cleaned if hostname marched in greylist. Wcleaner clean up junk and submit it to here. #safe or normal 65 | * whitelist: [--auto] Junk will be automatic cleaned if it is in whitelist. #safe 66 | * blacklist: All junks in blacklist can not be automatic cleaned up. #normal 67 | * redlist: All junks in redlist can not be cleaned up. #dangerous 68 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from wcleaner import __version__ 5 | 6 | from setuptools import setup 7 | 8 | setup( 9 | name = 'wcleaner', 10 | version = __version__, 11 | description = "Disk Space Cleaner", 12 | author = 'ruiqi', 13 | author_email = 'smile.ruiqi@gmail.com', 14 | url = 'https://github.com/ruiqi/wcleaner', 15 | download_url = 'https://github.com/ruiqi/wcleaner/archive/v%s.tar.gz' %__version__, 16 | keywords = ['disk', 'cleaner', 'walk', 'scandir'], 17 | license = 'License :: OSI Approved :: MIT License', 18 | 19 | packages = [ 20 | 'wcleaner', 21 | ], 22 | 23 | entry_points = { 24 | 'console_scripts': [ 25 | 'wcleaner = wcleaner.core:wcleaner', 26 | 'echo_wcleaner_conf = wcleaner.core:echo_wcleaner_conf', 27 | ] 28 | }, 29 | install_requires = [ 30 | 'argparse', 31 | 'scandir', 32 | ], 33 | ) 34 | -------------------------------------------------------------------------------- /wcleaner-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruiqi/wcleaner/958618c8652c9516234ddde0379991a63d7ef7ea/wcleaner-demo.gif -------------------------------------------------------------------------------- /wcleaner/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.1.3' 2 | -------------------------------------------------------------------------------- /wcleaner/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from wcleaner import __version__ 4 | 5 | import os 6 | import time 7 | import argparse 8 | import scandir 9 | import heapq 10 | import re 11 | import tempfile 12 | import redis 13 | from settings import * 14 | from junkcenter import JunkCenter 15 | 16 | conf_paths = [ 17 | os.path.join(os.path.expanduser('~'), '.wcleaner.conf'), 18 | '/etc/wcleaner.conf', 19 | ] 20 | 21 | for conf_path in conf_paths: 22 | try: 23 | with open(conf_path) as conf_f: 24 | exec conf_f.read() 25 | break 26 | except IOError: 27 | pass 28 | 29 | JUNK_CENTER = JunkCenter(JUNK_CENTER_HOST, JUNK_CENTER_PORT, *JUNK_CENTER_DBS) 30 | 31 | MOUNT_POINTS = {} 32 | for line in os.popen('df -Plk').readlines()[1:]: 33 | if line[0] != '/': continue 34 | 35 | line_cells = line.split() 36 | #print line_cells 37 | 38 | filesystem = line_cells[0] 39 | point = line_cells[5] 40 | capacity = int(line_cells[4][:-1]) 41 | size = int(line_cells[1]) 42 | 43 | MOUNT_POINTS[point] = (filesystem, size, capacity) 44 | #print MOUNT_POINTS 45 | 46 | def get_filesystem_capacity(filesystem): 47 | return int(os.popen("df -Plk | grep '%s'" %filesystem).read().split()[-2][:-1]) 48 | 49 | def is_opened(path): 50 | return not bool(os.system("fuser '%s' 2>&1 | grep '%s' >/dev/null" %(path, path))) 51 | 52 | def walk(path, pattern=None): 53 | try: 54 | for f in scandir.scandir(path): 55 | #print f.path, f.is_dir(), f.is_symlink(), f.is_file() 56 | if f.is_symlink(): continue 57 | 58 | if f.is_dir() and not f.path in MOUNT_POINTS: 59 | #print f.path, f.is_dir() 60 | #yield from walk(f.path) #yield bug 61 | for path, size, mtime in walk(f.path, pattern): 62 | yield (path, size, mtime) 63 | else: 64 | if pattern is None or re.match(pattern, f.path): 65 | #get file info 66 | path = f.path 67 | stat = f.stat() 68 | size = stat.st_blocks*stat.st_blksize/1024/8 69 | mtime = int(stat.st_mtime) 70 | 71 | yield (path, size, mtime) 72 | except OSError, e: 73 | yield (path, None, None) 74 | 75 | def get_junk(fileinfos): 76 | paths = zip(*fileinfos)[0] 77 | strings = re.findall(r'[^\d]+', paths[0]+'$') 78 | numbers_l = map(lambda path: re.findall(r'[\d]+', path), paths) 79 | 80 | for i, z_numbers in enumerate(zip(*numbers_l)): 81 | #print i, z_numbers[0], all([number == z_numbers[0] for number in z_numbers]) 82 | if all([number == z_numbers[0] for number in z_numbers]): 83 | strings[i] += z_numbers[0] + '*' 84 | 85 | #print strings 86 | return '*'.join(strings).replace('**', '')[:-1] 87 | 88 | def get_human_size(size): 89 | size = float(size) 90 | if size < 1024: 91 | return '%.1fK' %size 92 | elif size < 1024*1024: 93 | return '%.1fM' %(size/1024) 94 | elif size < 1024*1024*1024: 95 | return '%.1fG' %(size/1024/1024) 96 | else: 97 | return '%.1fT' %(size/1024/1024/1024) 98 | 99 | def clean_files(groupinfos): 100 | fileinfos = sorted(groupinfos['fileinfos'], key=lambda (path, size, mtime): mtime) 101 | clean_before_mtime = fileinfos[int(len(fileinfos)*0.6)][2]/86400*86400 + 86400 102 | 103 | for path, size, mtime in fileinfos: 104 | if mtime >= clean_before_mtime: break 105 | 106 | groupinfos['total-size'] -= size 107 | 108 | if not is_opened(path): 109 | os.remove(path) 110 | else: 111 | open(path, 'w').close() 112 | 113 | def get_group_fileinfos(Point): 114 | fileinfos = list(walk(Point)) 115 | 116 | #Warning ignore files 117 | ignore_count = len(filter(lambda (path, size, mtime): size is None, fileinfos)) 118 | if ignore_count: print '\nWarning: Ignore %d file(s) ...' %ignore_count 119 | 120 | fileinfos = filter(lambda (path, size, mtime): not size is None, fileinfos) 121 | 122 | group_fileinfos = {} 123 | for path, size, mtime in fileinfos: 124 | key = tuple(re.findall(r'[^\d]+', path+'$')) 125 | if not key in group_fileinfos: 126 | group_fileinfos[key] = { 127 | 'total-size': 0, 128 | 'fileinfos': [], 129 | } 130 | 131 | group_fileinfos[key]['total-size'] += size 132 | group_fileinfos[key]['fileinfos'].append((path, size, mtime)) 133 | 134 | return group_fileinfos 135 | 136 | def can_not_reduce_capacity(Point, Filesystem, Size, Capacity, group_fileinfos, nlargest_groupinfos): 137 | print 138 | print 'Warning: Can not reduce capacity < %d%%. These are the largest 10 files:' %MAX_CAPACITY 139 | for groupinfos in sorted(nlargest_groupinfos, key=lambda groupinfos: groupinfos['total-size'], reverse=True)[:10]: 140 | print '%s\t%s' %(get_human_size(groupinfos['total-size']), get_junk(groupinfos['fileinfos'])) 141 | 142 | FILES_TOTAL_SIZE = sum([groupinfos['total-size'] for groupinfos in group_fileinfos.values()]) 143 | #print 'miss capa:', Capacity-FILES_TOTAL_SIZE*100/Size 144 | 145 | #when miss capacity > 20% 146 | if Capacity-FILES_TOTAL_SIZE*100/Size >= 20: 147 | #lsof | grep deleted files 148 | deleted_files = [] 149 | current_pid = os.getpid() 150 | current_tmp_file = None 151 | for line in os.popen("lsof %s | grep -E '\(deleted\)$'" %Point).readlines(): 152 | try: 153 | cells = line.split() 154 | command = cells[0] 155 | pid = int(cells[1]) 156 | fd = int(cells[3][:-1]) 157 | proc_fd = '/proc/%d/fd/%d' %(pid, fd) 158 | except ValueError: 159 | continue 160 | 161 | try: 162 | stat = os.stat(proc_fd) 163 | size = stat.st_blocks*stat.st_blksize/1024/8 164 | except OSError: 165 | continue 166 | 167 | if pid == current_pid: current_tmp_file = cells[-2] 168 | 169 | deleted_files.append((cells[-2], size, pid, command)) 170 | 171 | deleted_files = [deleted_file for deleted_file in deleted_files if deleted_file[0] != current_tmp_file] 172 | deleted_files.sort(key=lambda deleted_file: deleted_file[1], reverse=True) 173 | if deleted_files: 174 | print 175 | print 'Warning: Some files have been deleted, but allocated space has not been freed. These are the largest 10 files:' 176 | print 'SIZE\tPID\tCOMMAND\tFILE' 177 | for deleted_file in deleted_files[:10]: 178 | print '%s\t%d\t%s\t%s (deleted)' %(get_human_size(deleted_file[1]), deleted_file[2], deleted_file[3], deleted_file[0]) 179 | 180 | 181 | def print_nlargest(Point, Filesystem, N=10): 182 | group_fileinfos = get_group_fileinfos(Point) 183 | nlargest_groupinfos = heapq.nlargest(N, group_fileinfos.values(), key=lambda groupinfos: groupinfos['total-size']) 184 | 185 | print '\nLargest %d file(s) are:' %N 186 | for i, groupinfos in enumerate(nlargest_groupinfos): 187 | print '%s\t%s' %(get_human_size(groupinfos['total-size']), get_junk(groupinfos['fileinfos'])) 188 | print 189 | print 190 | 191 | def clean_filesystem(Point, Filesystem, Size, Capacity, Auto=False, No_Interface=False): 192 | #default to clean >MAX_CAPACITY% filesystem 193 | if Capacity < MAX_CAPACITY: 194 | print 'No need to clean ...\n\n' 195 | return 196 | 197 | group_fileinfos = get_group_fileinfos(Point) 198 | nlargest_groupinfos = heapq.nlargest(20, group_fileinfos.values(), key=lambda groupinfos: groupinfos['total-size']) 199 | 200 | #clean for largest 10 201 | for i, groupinfos in enumerate(nlargest_groupinfos[:10]): 202 | #stop when capacity < target capacity 203 | Capacity = get_filesystem_capacity(Filesystem) 204 | if Capacity <= TARGET_CAPACITY: break 205 | 206 | #stop ... need clean other files 207 | if Size*(Capacity-MAX_CAPACITY)/100 > groupinfos['total-size']*(10-i): break 208 | 209 | human_total_size = get_human_size(groupinfos['total-size']) 210 | junk = get_junk(groupinfos['fileinfos']) 211 | 212 | if not re.match(JUNK_PATTERN, junk): continue 213 | 214 | #dangerous 215 | if JUNK_CENTER.is_dangerous(junk): continue 216 | 217 | if Auto and JUNK_CENTER.is_safe(junk): 218 | print 219 | print "Junk file(s): (%s) %s" %(human_total_size, junk) 220 | print 'Automatically Clean ... ' 221 | clean_files(groupinfos) 222 | 223 | continue 224 | 225 | if No_Interface: continue 226 | 227 | while True: 228 | print 229 | print "Junk file(s): (%s) %s" %(human_total_size, junk) 230 | p = raw_input('Clean old junk files (opened or recent ones are safe)? [y/n/l/h]:') 231 | 232 | if p in ['y', 'yes', 'Y', 'YES']: 233 | print 'Clean ...' 234 | 235 | clean_files(groupinfos) 236 | 237 | #submit junk 238 | JUNK_CENTER.submit(junk) 239 | 240 | break 241 | 242 | elif p in ['l', 'list', 'L', 'LIST']: 243 | print 'Listing ...' 244 | 245 | temp = tempfile.NamedTemporaryFile() 246 | temp.writelines(['%s\n' %path for path, size, mtime in groupinfos['fileinfos']]) 247 | temp.flush() 248 | os.system('less %s' %temp.name) 249 | temp.close() 250 | 251 | elif p in ['h', 'help', 'H', 'HELP']: 252 | print 'Help ... Default: n' 253 | print 'y:\tExecute the default action.' 254 | print 'n:\tDo nothing.' 255 | print 'l:\tList junk files to be cleaned.' 256 | print 'h:\tPrint help message.' 257 | 258 | else: 259 | print 'Cancelling ...' 260 | 261 | break 262 | 263 | time.sleep(1) 264 | Capacity = get_filesystem_capacity(Filesystem) 265 | if Capacity < MAX_CAPACITY: 266 | print 267 | print 'Now the %s (%s) capacity is %d%% < %d%%' %(Point, Filesystem, Capacity, MAX_CAPACITY) 268 | else: 269 | can_not_reduce_capacity(Point, Filesystem, Size, Capacity, group_fileinfos, nlargest_groupinfos) 270 | 271 | print 272 | print 273 | print 274 | 275 | def wcleaner(): 276 | global MAX_CAPACITY, TARGET_CAPACITY 277 | 278 | parser = argparse.ArgumentParser(description='Disk Space Cleaner') 279 | parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) 280 | parser.add_argument('FILESYSTEM', type=str, nargs='?', help='filesystem to clean') 281 | parser.add_argument('-n', type=int, help='print the largest N files') 282 | parser.add_argument('--max-capacity', type=int, help='max capacity. default: 90') 283 | parser.add_argument('--target-capacity', type=int, help='target capacity. default: 50') 284 | parser.add_argument('--auto', action='store_true', help='automatically clean junk files in whitelist, greylist and on matched hostname') 285 | parser.add_argument('--no-interface', action='store_true', help='none-interactive mode') 286 | 287 | args = parser.parse_args() 288 | 289 | if args.max_capacity: MAX_CAPACITY = args.max_capacity 290 | if args.target_capacity: TARGET_CAPACITY = args.target_capacity 291 | TARGET_CAPACITY = min(TARGET_CAPACITY, int(MAX_CAPACITY*0.8)) 292 | #print MAX_CAPACITY, TARGET_CAPACITY 293 | 294 | for Point, (Filesystem, Size, Capacity) in MOUNT_POINTS.items(): 295 | #specify the filesystem to clean 296 | if args.FILESYSTEM and not args.FILESYSTEM in [Filesystem, Point]: continue 297 | 298 | print '#'*70 299 | print '#' + ' '*68 + '#' 300 | print '#%s#' %'{0: ^68}'.format('WCleaner: %s (%s) ...' %(Point, Filesystem)) 301 | print '#' + ' '*68 + '#' 302 | print '#'*70 303 | 304 | if args.n: 305 | print_nlargest(Point, Filesystem, args.n) 306 | else: 307 | clean_filesystem(Point, Filesystem, Size, Capacity, Auto=args.auto, No_Interface=args.no_interface) 308 | 309 | def echo_wcleaner_conf(): 310 | from pkg_resources import Requirement, resource_filename 311 | 312 | conf = resource_filename(Requirement.parse('wcleaner'), 'wcleaner/settings.py') 313 | 314 | with open(conf) as conf_f: 315 | print conf_f.read(), 316 | -------------------------------------------------------------------------------- /wcleaner/junkcenter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | >>> JUNK_CENTER = JunkCenter('rd1.hy01', 6373, *[12, 13, 14, 15]) 5 | >>> for rd in [JUNK_CENTER.grey_rd, JUNK_CENTER.white_rd, JUNK_CENTER.black_rd, JUNK_CENTER.red_rd]: rd.flushdb() 6 | True 7 | True 8 | True 9 | True 10 | >>> JUNK_CENTER.submit('/tmp/test.log.2015-03-*') 11 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 12 | /tmp/test.log.2015-03-* 13 | >>> JUNK_CENTER.submit('/tmp/test.log.2015-*-*') 14 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 15 | /tmp/test.log.2015-*-* 16 | >>> JUNK_CENTER.is_safe('/tmp/test.log.2015-03-05') 17 | True 18 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 19 | /tmp/test.log.2015-*-* 20 | >>> JUNK_CENTER.is_safe('/tmp/test.log.*-*-*') 21 | False 22 | >>> JUNK_CENTER.submit('/tmp/test.log.*-*-*') 23 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 24 | /tmp/test.log.*-*-* 25 | >>> JUNK_CENTER.is_safe('/tmp/test.log.2015-*-05') 26 | True 27 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 28 | /tmp/test.log.*-*-* 29 | >>> JUNK_CENTER.grey_rd.move('/tmp/test.log.*-*-*', 14) 30 | True 31 | >>> for key in sorted(JUNK_CENTER.grey_rd.keys('*')): print key 32 | >>> JUNK_CENTER.is_safe('/tmp/test.log.2015-*-05') 33 | False 34 | >>> JUNK_CENTER = JunkCenter('your redis host', 6379, *[12, 13, 14, 15]) 35 | >>> JUNK_CENTER.is_safe('/tmp/test.log.2015-*-05') 36 | False 37 | >>> JUNK_CENTER.is_dangerous('/tmp/test.log.2015-*-05') 38 | False 39 | >>> JUNK_CENTER.submit('/tmp/test.log.*-*-*') 40 | ''' 41 | 42 | import re 43 | import socket 44 | import redis 45 | 46 | class JunkCenter(object): 47 | ''' 48 | ===Junk Center=== 49 | 50 | grey/white/black/red list 51 | 52 | list: [ 53 | junk1: set([ 54 | hostname1, 55 | hostname2, 56 | ]), 57 | junk2: set([ 58 | hostname1, 59 | hostname2, 60 | ]), 61 | ] 62 | 63 | greylist: '--auto' will clean junks in greylist and hostname marched. All junks cleaned up by wcleaner will submit to here. #safe or normal 64 | whitelist: '--auto' will clean junks in whitelist. #safe 65 | blacklist: All junks in blacklist can not be auto cleaned up. #normal 66 | readlist: All junks in redlist can not be cleaned up. #dangerous 67 | ''' 68 | 69 | def __init__(self, host, port, grey_db, white_db, black_db, red_db): 70 | self.grey_rd = redis.StrictRedis(host=host, port=port, db=grey_db) 71 | self.white_rd = redis.StrictRedis(host=host, port=port, db=white_db) 72 | self.black_rd = redis.StrictRedis(host=host, port=port, db=black_db) 73 | self.red_rd = redis.StrictRedis(host=host, port=port, db=red_db) 74 | 75 | self.hostname = socket.gethostname() 76 | 77 | def get_similar_junk(self, rd, junk): 78 | pattern = re.sub('\d+', '*', junk) 79 | similar_junks = [key for key in rd.keys(pattern) if re.sub('\d+', '*', key) == pattern] 80 | 81 | if similar_junks: 82 | similar_junks.sort(key=lambda similar_junk: similar_junk.count('*')) 83 | 84 | #only keep the max one 85 | rd.sunionstore(similar_junks[-1], *similar_junks) 86 | for similar_junk in similar_junks[:-1]: rd.delete(similar_junk) 87 | 88 | return similar_junks[-1] 89 | else: 90 | return None 91 | 92 | def contain(self, rd, junk): 93 | similar_junk = self.get_similar_junk(rd, junk) 94 | if not similar_junk: return False 95 | 96 | if similar_junk.count('*') >= junk.count('*'): 97 | if rd != self.grey_rd: return True 98 | 99 | if self.hostname in rd.smembers(similar_junk): return True 100 | 101 | return False 102 | 103 | def submit(self, junk): 104 | try: 105 | for rd in [self.black_rd, self.white_rd, self.grey_rd]: 106 | if self.contain(rd, junk): return 107 | 108 | similar_junk = self.get_similar_junk(self.grey_rd, junk) 109 | if similar_junk is None: similar_junk = junk 110 | 111 | if similar_junk.count('*') >= junk.count('*'): 112 | self.grey_rd.sadd(similar_junk, self.hostname) 113 | else: 114 | self.grey_rd.sadd(junk, self.hostname) 115 | self.grey_rd.sunionstore(junk, similar_junk) 116 | self.grey_rd.delete(similar_junk) 117 | except redis.ConnectionError: 118 | pass 119 | 120 | def is_dangerous(self, junk): 121 | '''in redlist''' 122 | try: 123 | if self.contain(self.red_rd, junk): return True 124 | except redis.ConnectionError: 125 | pass 126 | 127 | return False 128 | 129 | def is_safe(self, junk): 130 | ''' 131 | not in redlist and not in blacklist 132 | in whitelist or in greylist and hostname marched 133 | ''' 134 | 135 | try: 136 | if self.contain(self.red_rd, junk) or self.contain(self.black_rd, junk): return False 137 | 138 | if self.contain(self.white_rd, junk): return True 139 | 140 | if self.contain(self.grey_rd, junk): 141 | similar_junk = self.get_similar_junk(self.grey_rd, junk) 142 | if self.hostname in self.grey_rd.smembers(similar_junk): return True 143 | except redis.ConnectionError: 144 | pass 145 | 146 | return False 147 | 148 | if __name__ == '__main__': 149 | import doctest 150 | doctest.testmod() 151 | -------------------------------------------------------------------------------- /wcleaner/settings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # filesystem capacity > max capacity is bad 4 | MAX_CAPACITY = 90 5 | 6 | # filesystem capacity < target capacity is good 7 | TARGET_CAPACITY = 50 8 | 9 | # matching pattern file is junk files. 10 | JUNK_PATTERN = r'.*\blogs?\b.*' 11 | 12 | # junk center redis 13 | JUNK_CENTER_HOST = 'your redis host' 14 | JUNK_CENTER_PORT = 6379 15 | JUNK_CENTER_DBS = [0, 1, 2, 3] #grey/white/black/red dbs 16 | --------------------------------------------------------------------------------