├── .gitignore ├── README.md ├── config ├── __init__.py └── setting.default.py ├── cover.png ├── db ├── ossync.db └── ossync.db.bak ├── init.py ├── inotify_thread.py ├── logs └── app.log ├── ossync.py ├── ossync ├── __init__.py ├── lib │ ├── __init__.py │ ├── helper.py │ └── queue_model.py └── sdk │ ├── __init__.py │ ├── oss_api.py │ ├── oss_sample.py │ ├── oss_util.py │ ├── oss_xml_handler.py │ └── pkg_info.py ├── queue_thread.py ├── setup.py └── sync_thread.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | doc/ 4 | db/ 5 | *.db 6 | *.log 7 | config/ 8 | downloads/ 9 | uploads/ 10 | backup/ 11 | logs/ 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ossync修改版 2 | 3 | Ossync是一款开源的、基于inotify机制的阿里云同步上载工具,采用Python开发。License : [MIT](http://rem.mit-license.org/).这个版本是在实时同步版基础上修改的,只是实现增量备份,没用采用Inotify机制。每次备份完毕自动退出,下次启动会自动备份新增或修改的内容。建议用crobtab设置一个定时器,比如定时在凌晨三点启动。 4 | 5 | ## 主要特色 6 | 7 | * **可以一次同步多个本地文件夹和多个bucket** - 只要定义好本地文件夹和bucket的映射关系,可以同时将多个本地文件夹同步到多个bucket. 8 | * **基于消息队列的多线程快速同步** - 采用消息队列和多线程机制,实现快速同步. 9 | * **安全准确同步** - 文件上传校验和失败重传确保文件完整同步。 10 | 11 | ## 安装 12 | 将本程序解压到任意目录, 并进入该目录,运行: 13 | 14 | sudo python setup.py 15 | 16 | 如果提示:“Installation complete successfully!”,表明安装成功。否则,请检查是否满足以下条件并手动安装pyinotify模块。 17 | 18 | * Python版本大于2.6(建议使用python2.7, 暂不支持python3) 19 | * 检查和系统是否有/proc/sys/fs/inotify/目录,以确定内核是否支持inotify,即linux内核版本号大于2.6.13。 20 | * 安装pyinotify模块,[https://github.com/seb-m/pyinotify](https://github.com/seb-m/pyinotify)。 21 | 22 | 23 | ## 运行 24 | * 请复制config/setting.default.py并命名为setting.py,修改setting.py中的配置,请参考配置文件中的说明文字. 25 | * 在程序根目录下运行: 26 | 27 | nohup python ossync.py >/dev/null 2>&1 & 28 | 29 | ## 定时运行 30 | 31 | * 以凌晨三点启动备份为例, 运行: 32 | 33 | crontab -e 34 | 35 | 将这行添加到crontab: 0 3 * * * python ~/ossyncone/ossync.py,保存退出。 36 | 37 | **注:请查看logs目录下的日志文件以了解系统运行状况。** 38 | 39 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/config/__init__.py -------------------------------------------------------------------------------- /config/setting.default.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | # OSS 连接参数 24 | #################### 25 | # OSS # 26 | #################### 27 | 28 | HOST = "oss.aliyuncs.com" 29 | ACCESS_ID = "" 30 | SECRET_ACCESS_KEY = "" 31 | 32 | # OSS Bucket和本地目录同步映射关系,一个Bucket对应一个或多个本地目录(local_folder)。 33 | # 可以定义多个bucket。示例: 34 | # oss_mappers = [{'bucket': 'dzdata', 'local_folders': ['/root/testdata/audios', '/root/testdata/docs']}, 35 | # {'bucket': 'privdata', 'local_folders': ['/root/testdata/images', '/root/testdata/pdfs']}] 36 | #################### 37 | # OSS MAP # 38 | #################### 39 | oss_mappers = [{'bucket': 'dzdata', 'local_folders': ['/root/testdata/audios', '/root/testdata/docs']}] 40 | 41 | # 日志选项 42 | #################### 43 | # LOGGING SETTING # 44 | #################### 45 | LOGFILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../", "logs/app.log") 46 | MAX_LOGFILE_SIZE = 104857600 # 默认日志文件大小为100M,每次达大小限制时,会自动加后缀生成备份文件 47 | MAX_BACKUP_COUNT = 5 # 默认备份文件为5个 48 | 49 | # 上传文件或者删除object的最大重试次数 50 | #################### 51 | # MAX_RETRIES # 52 | #################### 53 | MAX_RETRIES = 10 54 | 55 | # 上传文件线程数 56 | #################### 57 | # MAX_RETRIES # 58 | #################### 59 | NTHREADS = 5 60 | 61 | # 数据库路径 62 | #################### 63 | # DB PATH # 64 | #################### 65 | DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../", "db/ossync.db") 66 | -------------------------------------------------------------------------------- /cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/cover.png -------------------------------------------------------------------------------- /db/ossync.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/db/ossync.db -------------------------------------------------------------------------------- /db/ossync.db.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/db/ossync.db.bak -------------------------------------------------------------------------------- /init.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import sys 24 | import os 25 | import os.path 26 | from config.setting import * 27 | from ossync.lib import queue_model 28 | import logging 29 | import logging.handlers 30 | import time 31 | try: 32 | from ossync.sdk.oss_api import * 33 | except: 34 | from ossync.oss_api import * 35 | 36 | def set_sys_to_utf8(): 37 | reload(sys) 38 | sys.setdefaultencoding('utf-8') 39 | 40 | def get_logger(): 41 | 42 | format = logging.Formatter("%(levelname)-10s %(asctime)s %(message)s") 43 | logging.basicConfig(level = logging.INFO) 44 | logger = logging.getLogger('app') 45 | handler1 = logging.handlers.RotatingFileHandler(LOGFILE_PATH , maxBytes = MAX_LOGFILE_SIZE, backupCount = MAX_BACKUP_COUNT) 46 | handler2 = logging.StreamHandler(sys.stdout) 47 | handler1.setFormatter(format) 48 | handler2.setFormatter(format) 49 | logger.addHandler(handler1) 50 | # logger.addHandler(handler2) 51 | return logger 52 | 53 | def check_config(logger): 54 | if len(HOST) == 0 or len(ACCESS_ID) == 0 or len(SECRET_ACCESS_KEY) == 0: 55 | msg = "Please set HOST and ACCESS_ID and SECRET_ACCESS_KEY" 56 | #print msg 57 | logger.critical(msg) 58 | exit(0) 59 | if len(oss_mappers) == 0: 60 | msg = "please set OSS Mappers" 61 | #print msg 62 | logger.critical(msg) 63 | exit(0) 64 | oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY) 65 | for oss_mapper in oss_mappers: 66 | bucket = oss_mapper['bucket'] 67 | acl = '' 68 | headers = {} 69 | try: 70 | res = oss.create_bucket(bucket, acl, headers) 71 | except Exception as e: 72 | logger.critical(e.message) 73 | exit(0) 74 | if (res.status / 100) != 2: 75 | msg = "Bucket: " + bucket + " is not existed or create bucket failure, please rename your bucket." 76 | #print msg 77 | logger.critical(msg) 78 | exit(0) 79 | local_folders = oss_mapper['local_folders'] 80 | if len(local_folders) > 0: 81 | for folder in local_folders: 82 | if not os.path.exists(folder) or not os.path.isdir(folder): 83 | msg = "Local folder: " + folder + " is not existed or is not a direcotry.Please check you setting." 84 | #print msg 85 | logger.critical(msg) 86 | exit(0) 87 | else: 88 | msg = "please at least set one local folder for each bucket" 89 | #print msg 90 | logger.critical(msg) 91 | exit(0) 92 | 93 | def queue_unprocessed(queue, logger): 94 | dbpath = DB_PATH 95 | qm = queue_model.QueueModel(dbpath) 96 | try: 97 | qm.open() 98 | items = qm.find_all(status = 0) 99 | if items: 100 | for item in items: 101 | logger.info(item) 102 | if int(item['retries']) < MAX_RETRIES: 103 | el = item['bucket'] + '::' + item['root'] + '::' + item['relpath'] + '::' + item['action'] + '::' + item['hashcode'] 104 | queue.put(el, block = True, timeout = 1) 105 | msg = 'queue unprocessed element:' + el 106 | logger.info(msg) 107 | qm.close() 108 | except Exception as e: 109 | logger.critical(e.message) 110 | pass 111 | 112 | -------------------------------------------------------------------------------- /inotify_thread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os, sys, threading, logging 24 | from Queue import * 25 | from ossync.lib import queue_model 26 | from ossync.lib import helper 27 | try: 28 | import pyinotify 29 | except ImportError as e: 30 | print e.message 31 | exit(0) 32 | 33 | class EventHandler(pyinotify.ProcessEvent): 34 | """事件处理""" 35 | 36 | def __init__(self, monitered_dir, queue, bucket): 37 | self.monitered_dir = monitered_dir 38 | self.queue = queue 39 | self.bucket = bucket 40 | self.logger = logging.getLogger('app') 41 | dbpath = 'db/ossync.db' 42 | self.qm = queue_model.QueueModel(dbpath) 43 | 44 | def process_IN_CREATE(self, event): 45 | self.process_event(event, 'CREATE') 46 | 47 | def process_IN_DELETE(self, event): 48 | self.process_event(event, 'DELETE') 49 | 50 | def process_IN_MODIFY(self, event): 51 | self.process_event(event, 'MODIFY') 52 | 53 | def process_IN_MOVED_FROM(self, event): 54 | self.logger.info("Moved from file: %s " % os.path.join(event.path, event.name)) 55 | self.process_event(event, 'DELETE') 56 | 57 | def process_IN_MOVED_TO(self, event): 58 | self.logger.info("Moved to file: %s " % os.path.join(event.path, event.name)) 59 | realpath = os.path.join(event.path, event.name) 60 | if event.dir: 61 | self.queue_dir(realpath) 62 | self.process_event(event, 'CREATE') 63 | 64 | def process_event(self, event, action): 65 | if len(action) == 0: 66 | return False 67 | realpath = os.path.join(event.path, event.name) 68 | relpath = os.path.relpath(realpath, self.monitered_dir) 69 | if action == 'DELETE': 70 | if event.dir: 71 | relpath += '/' 72 | self.logger.info(action.title() + " file: %s " % realpath) 73 | #print "Modify file: %s " % os.path.join(event.path, event.name) 74 | el = self.bucket + '::' + self.monitered_dir + '::' + relpath + '::' + action[0] 75 | self.save_el(self.monitered_dir, relpath, self.bucket, action[0]) 76 | self.queue_el(el) 77 | 78 | def queue_dir(self, queue_path): 79 | files = list(helper.walk_files(queue_path, yield_folders = True)) 80 | if len(files) > 0: 81 | for path in files: 82 | relpath = os.path.relpath(path, self.monitered_dir) 83 | self.save_el(self.monitered_dir, relpath, self.bucket,'C') 84 | el = self.bucket + '::' + self.monitered_dir + '::' + relpath + '::' + 'C' 85 | self.queue_el(el) 86 | 87 | def save_el(self, root, relpath, bucket, action): 88 | hashcode = helper.calc_el_md5(root, relpath, bucket) 89 | self.qm.open() 90 | if self.is_el_existed(hashcode): 91 | self.qm.update_action(hashcode, action) 92 | self.qm.update_status(hashcode, 0) 93 | else: 94 | data={"root": root, "relpath": relpath, "bucket": bucket, "action": action, "status": 0, "retries": 0} 95 | self.qm.save(data) 96 | self.qm.close() 97 | 98 | def is_el_existed(self, hashcode): 99 | row = self.qm.get(hashcode) 100 | if row: 101 | return True 102 | return False 103 | 104 | 105 | def queue_el(self, el): 106 | '''el: element of queue , formated as "bucket::root::path::C|M|D" 107 | C means CREATE, M means MODIFY, D means DELETE 108 | ''' 109 | try: 110 | self.queue.put(el, block = True, timeout = 1) 111 | msg = 'queue element:' + el 112 | #print msg 113 | self.logger.info(msg) 114 | except Full as e: 115 | #print e 116 | self.logger.error(e.message) 117 | 118 | class InotifyThread(threading.Thread): 119 | def __init__(self, bucket, root, queue, *args, **kwargs): 120 | threading.Thread.__init__(self, *args, **kwargs) 121 | self.bucket = bucket 122 | self.queue = queue 123 | self.root = root 124 | self.logger = logging.getLogger('app') 125 | self._terminate = False 126 | 127 | def terminate(self): 128 | self._terminate = True 129 | self.notify.stop() 130 | 131 | def start_notify(self, monitered_dir): 132 | wm = pyinotify.WatchManager() 133 | mask = pyinotify.IN_DELETE | pyinotify.IN_CREATE | pyinotify.IN_MODIFY | pyinotify.IN_MOVED_FROM | pyinotify.IN_MOVED_TO 134 | self.notifier = pyinotify.Notifier(wm, EventHandler(monitered_dir, self.queue, self.bucket), timeout = 10) 135 | wm.add_watch(monitered_dir, mask, rec = True, auto_add = True) 136 | self.logger.info('now starting monitor %s'%(monitered_dir)) 137 | # self.notifier.loop() 138 | while True: 139 | if self._terminate: 140 | break 141 | self.notifier.process_events() 142 | if self.notifier.check_events(): 143 | self.notifier.read_events() 144 | 145 | def run(self): 146 | self.start_notify(self.root) 147 | return 148 | 149 | if __name__ == '__main__': 150 | queue = Queue() 151 | root = '.' 152 | bucket = 'dzdata' 153 | logger = logging.getLogger('app') 154 | logger.setLevel(logging.INFO) 155 | logger.addHandler(logging.FileHandler('logs/app.log')) 156 | inotifythd = InotifyThread(bucket, root, queue) 157 | inotifythd.start() 158 | 159 | -------------------------------------------------------------------------------- /logs/app.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/logs/app.log -------------------------------------------------------------------------------- /ossync.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os, threading 24 | from Queue import * 25 | from ossync.sdk.oss_api import * 26 | from queue_thread import QueueThread 27 | from sync_thread import SyncThread 28 | # from inotify_thread import InotifyThread 29 | from init import * 30 | 31 | if __name__ == '__main__': 32 | try: 33 | set_sys_to_utf8() 34 | logger = get_logger() 35 | check_config(logger) 36 | queue = Queue() 37 | # check unprocessed items, if exists queue them 38 | queue_unprocessed(queue, logger) 39 | oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY) 40 | 41 | syncthd = SyncThread(oss, queue) 42 | syncthd.start() 43 | 44 | queuethd = QueueThread(oss_mappers, queue) 45 | queuethd.start() 46 | except KeyboardInterrupt as e: 47 | for thd in threading.enumerate(): 48 | if thd is main_thread: 49 | continue 50 | else: 51 | thd.terminate() 52 | logger.error(msg) 53 | #print e.message() 54 | exit(0) 55 | 56 | -------------------------------------------------------------------------------- /ossync/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/__init__.py -------------------------------------------------------------------------------- /ossync/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/lib/__init__.py -------------------------------------------------------------------------------- /ossync/lib/helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os, fnmatch 24 | import hashlib 25 | 26 | def encode(str): 27 | if(isinstance(str, unicode)): 28 | return str.encode('utf-8') 29 | else: 30 | return str 31 | 32 | def walk_files(root, patterns = '*', single_level = False, yield_folders = False): 33 | patterns = patterns.split(';') 34 | for path, subdirs, files in os.walk(root): 35 | if yield_folders: 36 | files.extend(subdirs) 37 | files.sort() 38 | for name in files: 39 | for pattern in patterns: 40 | if fnmatch.fnmatch(name, pattern): 41 | yield os.path.join(path, name) 42 | break 43 | if single_level: 44 | break 45 | 46 | def calc_file_md5(filepath): 47 | """calc files's hashcode """ 48 | with open(filepath, 'rb') as f: 49 | md5obj = hashlib.md5() 50 | md5obj.update(f.read()) 51 | hashstr = md5obj.hexdigest() 52 | return hashstr 53 | 54 | def calc_el_md5(root, relpath, bucket, filehash): 55 | """calc queue element's hashcode """ 56 | m = hashlib.md5() 57 | m.update(root + relpath + bucket + filehash) 58 | hashcode = m.hexdigest() 59 | return hashcode 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /ossync/lib/queue_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import sqlite3, md5 24 | import itertools 25 | import hashlib 26 | 27 | class QueueModel(object): 28 | def __init__(self, dbpath): 29 | self.dbpath = dbpath 30 | 31 | def open(self): 32 | self.conn = sqlite3.connect(self.dbpath) 33 | self.conn.text_factory = lambda x: unicode(x, 'utf-8', 'ignore') 34 | self.cursor = self.conn.cursor() 35 | 36 | def close(self): 37 | self.cursor.close() 38 | self.conn.close() 39 | 40 | def save(self, data={"root": '', "relpath": '', "bucket": '', "action": '', "status": 0, "hashcode": '', "retries": 0}): 41 | if(len(data) == 0): 42 | return False 43 | #m = hashlib.md5() 44 | #m.update(data['root'] + data['relpath'] + data['bucket']) 45 | #hashcode = m.hexdigest() 46 | self.cursor.execute('insert into queue values(?, ?, ?, ?, ?, ?, ?)', (data['root'], data['relpath'], data['bucket'], data['action'], data['status'], data['hashcode'], data['retries'])) 47 | self.conn.commit() 48 | 49 | def get(self, hashcode): 50 | self.cursor.execute('select * from queue where hashcode=?', (hashcode, )) 51 | result = self._map_fields(self.cursor) 52 | if len(result) > 0: 53 | return result[0] 54 | return None 55 | 56 | def find_all(self, status): 57 | self.cursor.execute('select * from queue where status=?', (status, )) 58 | result = self._map_fields(self.cursor) 59 | if len(result) > 0: 60 | return result 61 | return None 62 | 63 | def update_status(self, hashcode, status): 64 | self.cursor.execute('update queue set status=? where hashcode=?', (status, hashcode)) 65 | self.conn.commit() 66 | 67 | def update_action(self, hashcode, action): 68 | self.cursor.execute('update queue set action=? where hashcode=?', (action, hashcode)) 69 | self.conn.commit() 70 | 71 | def update_retries(self, hashcode, retries): 72 | self.cursor.execute('update queue set retries=? where hashcode=?', (retries, hashcode)) 73 | self.conn.commit() 74 | 75 | def delete(self, hashcode): 76 | self.cursor.execute('delete from queue where hashcode=?', (hashcode,)) 77 | self.conn.commit() 78 | 79 | def _map_fields(self, cursor): 80 | """将结果元组映射到命名字段中""" 81 | filednames = [d[0].lower() for d in cursor.description] 82 | result = [] 83 | while True: 84 | rows = cursor.fetchmany() 85 | if not rows: 86 | break 87 | for row in rows: 88 | result.append(dict(itertools.izip(filednames, row))) 89 | return result 90 | 91 | 92 | -------------------------------------------------------------------------------- /ossync/sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/sdk/__init__.py -------------------------------------------------------------------------------- /ossync/sdk/oss_api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | 4 | # Copyright (c) 2011, Alibaba Cloud Computing 5 | # All rights reserved. 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the 9 | # "Software"), to deal in the Software without restriction, including 10 | # without limitation the rights to use, copy, modify, merge, publish, dis- 11 | # tribute, sublicense, and/or sell copies of the Software, and to permit 12 | # persons to whom the Software is furnished to do so, subject to the fol- 13 | # lowing conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included 16 | # in all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 20 | # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | # IN THE SOFTWARE. 25 | 26 | import httplib 27 | import time 28 | import base64 29 | import urllib 30 | import StringIO 31 | import sys 32 | try: 33 | from oss.oss_util import * 34 | except: 35 | from oss_util import * 36 | try: 37 | from oss.oss_xml_handler import * 38 | except: 39 | from oss_xml_handler import * 40 | 41 | class OssAPI: 42 | ''' 43 | A simple OSS API 44 | ''' 45 | DefaultContentType = 'application/octet-stream' 46 | provider = PROVIDER 47 | __version__ = '0.3.2' 48 | Version = __version__ 49 | AGENT = 'oss-python%s (%s)' % (__version__, sys.platform) 50 | 51 | def __init__(self, host, access_id, secret_access_key='', port=80, is_security=False): 52 | self.SendBufferSize = 8192 53 | self.RecvBufferSize = 1024*1024*10 54 | self.host = get_second_level_domain(host) 55 | self.port = port 56 | self.access_id = access_id 57 | self.secret_access_key = secret_access_key 58 | self.show_bar = False 59 | self.is_security = is_security 60 | self.retry_times = 5 61 | self.agent = self.AGENT 62 | self.debug = False 63 | 64 | def set_debug(self, is_debug): 65 | if is_debug: 66 | self.debug = True 67 | 68 | def set_retry_times(self, retry_times=5): 69 | self.retry_times = retry_times 70 | 71 | def set_send_buf_size(self, buf_size): 72 | try: 73 | self.SendBufferSize = (int)(buf_size) 74 | except ValueError: 75 | pass 76 | 77 | def set_recv_buf_size(self, buf_size): 78 | try: 79 | self.RecvBufferSize = (int)(buf_size) 80 | except ValueError: 81 | pass 82 | 83 | def get_connection(self, tmp_host=None): 84 | host = '' 85 | port = 80 86 | timeout = 10 87 | if not tmp_host: 88 | tmp_host = self.host 89 | host_port_list = tmp_host.split(":") 90 | if len(host_port_list) == 1: 91 | host = host_port_list[0].strip() 92 | elif len(host_port_list) == 2: 93 | host = host_port_list[0].strip() 94 | port = int(host_port_list[1].strip()) 95 | if self.is_security or port == 443: 96 | self.is_security = True 97 | if sys.version_info >= (2, 6): 98 | return httplib.HTTPSConnection(host=host, port=port, timeout=timeout) 99 | else: 100 | return httplib.HTTPSConnection(host=host, port=port) 101 | else: 102 | if sys.version_info >= (2, 6): 103 | return httplib.HTTPConnection(host=host, port=port, timeout=timeout) 104 | else: 105 | return httplib.HTTPConnection(host=host, port=port) 106 | 107 | def sign_url_auth_with_expire_time(self, method, url, headers=None, resource="/", timeout=60, params=None): 108 | ''' 109 | Create the authorization for OSS based on the input method, url, body and headers 110 | 111 | :type method: string 112 | :param method: one of PUT, GET, DELETE, HEAD 113 | 114 | :type url: string 115 | :param:HTTP address of bucket or object, eg: http://HOST/bucket/object 116 | 117 | :type headers: dict 118 | :param: HTTP header 119 | 120 | :type resource: string 121 | :param:path of bucket or object, eg: /bucket/ or /bucket/object 122 | 123 | :type timeout: int 124 | :param 125 | 126 | Returns: 127 | signature url. 128 | ''' 129 | if not headers: 130 | headers = {} 131 | if not params: 132 | params = {} 133 | send_time = str(int(time.time()) + timeout) 134 | headers['Date'] = send_time 135 | auth_value = get_assign(self.secret_access_key, method, headers, resource, None, self.debug) 136 | params["OSSAccessKeyId"] = self.access_id 137 | params["Expires"] = str(send_time) 138 | params["Signature"] = auth_value 139 | sign_url = append_param(url, params) 140 | return sign_url 141 | 142 | def sign_url(self, method, bucket, object, timeout=60, headers=None, params=None): 143 | ''' 144 | Create the authorization for OSS based on the input method, url, body and headers 145 | 146 | :type method: string 147 | :param method: one of PUT, GET, DELETE, HEAD 148 | 149 | :type bucket: string 150 | :param: 151 | 152 | :type object: string 153 | :param: 154 | 155 | :type timeout: int 156 | :param 157 | 158 | :type headers: dict 159 | :param: HTTP header 160 | 161 | :type params: dict 162 | :param: the parameters that put in the url address as query string 163 | 164 | :type resource: string 165 | :param:path of bucket or object, eg: /bucket/ or /bucket/object 166 | 167 | Returns: 168 | signature url. 169 | ''' 170 | if not headers: 171 | headers = {} 172 | if not params: 173 | params = {} 174 | send_time = str(int(time.time()) + timeout) 175 | headers['Date'] = send_time 176 | if isinstance(object, unicode): 177 | object = object.encode('utf-8') 178 | resource = "/%s/%s%s" % (bucket, object, get_resource(params)) 179 | auth_value = get_assign(self.secret_access_key, method, headers, resource, None, self.debug) 180 | params["OSSAccessKeyId"] = self.access_id 181 | params["Expires"] = str(send_time) 182 | params["Signature"] = auth_value 183 | url = '' 184 | if self.is_security: 185 | if is_ip(self.host): 186 | url = "https://%s/%s/%s" % (self.host, bucket, object) 187 | else: 188 | url = "https://%s.%s/%s" % (bucket, self.host, object) 189 | else: 190 | if is_ip(self.host): 191 | url = "http://%s/%s/%s" % (self.host, bucket, object) 192 | else: 193 | url = "http://%s.%s/%s" % (bucket, self.host, object) 194 | sign_url = append_param(url, params) 195 | return sign_url 196 | 197 | def _create_sign_for_normal_auth(self, method, headers=None, resource="/"): 198 | ''' 199 | NOT public API 200 | Create the authorization for OSS based on header input. 201 | it should be put into "Authorization" parameter of header. 202 | 203 | :type method: string 204 | :param:one of PUT, GET, DELETE, HEAD 205 | 206 | :type headers: dict 207 | :param: HTTP header 208 | 209 | :type resource: string 210 | :param:path of bucket or object, eg: /bucket/ or /bucket/object 211 | 212 | Returns: 213 | signature string 214 | ''' 215 | auth_value = "%s %s:%s" % (self.provider, self.access_id, get_assign(self.secret_access_key, method, headers, resource, None, self.debug)) 216 | return auth_value 217 | 218 | def bucket_operation(self, method, bucket, headers=None, params=None): 219 | return self.http_request(method, bucket, '', headers, '', params) 220 | 221 | def object_operation(self, method, bucket, object, headers=None, body='', params=None): 222 | return self.http_request(method, bucket, object, headers, body, params) 223 | 224 | def http_request(self, method, bucket, object, headers=None, body='', params=None): 225 | ''' 226 | Send http request of operation 227 | 228 | :type method: string 229 | :param method: one of PUT, GET, DELETE, HEAD, POST 230 | 231 | :type bucket: string 232 | :param 233 | 234 | :type object: string 235 | :param 236 | 237 | :type headers: dict 238 | :param: HTTP header 239 | 240 | :type body: string 241 | :param 242 | 243 | Returns: 244 | HTTP Response 245 | ''' 246 | retry = 5 247 | res = None 248 | while retry > 0: 249 | retry -= 1 250 | tmp_bucket = bucket 251 | tmp_object = object 252 | tmp_headers = {} 253 | if headers and isinstance(headers, dict): 254 | tmp_headers = headers.copy() 255 | tmp_params = {} 256 | if params and isinstance(params, dict): 257 | tmp_params = params.copy() 258 | 259 | res = self.http_request_with_redirect(method, tmp_bucket, tmp_object, tmp_headers, body, tmp_params) 260 | if res.status == 301 or res.status == 302: 261 | self.host = helper_get_host_from_resp(res, bucket) 262 | else: 263 | return res 264 | return res 265 | 266 | def http_request_with_redirect(self, method, bucket, object, headers=None, body='', params=None): 267 | ''' 268 | Send http request of operation 269 | 270 | :type method: string 271 | :param method: one of PUT, GET, DELETE, HEAD, POST 272 | 273 | :type bucket: string 274 | :param 275 | 276 | :type object: string 277 | :param 278 | 279 | :type headers: dict 280 | :param: HTTP header 281 | 282 | :type body: string 283 | :param 284 | 285 | Returns: 286 | HTTP Response 287 | ''' 288 | if not params: 289 | params = {} 290 | if not headers: 291 | headers = {} 292 | if isinstance(object, unicode): 293 | object = object.encode('utf-8') 294 | if not bucket: 295 | resource = "/" 296 | headers['Host'] = self.host 297 | else: 298 | headers['Host'] = "%s.%s" % (bucket, self.host) 299 | resource = "/%s/" % bucket 300 | resource = "%s%s%s" % (resource.encode('utf-8'), object, get_resource(params)) 301 | object = urllib.quote(object) 302 | url = "/%s" % object 303 | if is_ip(self.host): 304 | url = "/%s/%s" % (bucket, object) 305 | if not bucket: 306 | url = "/%s" % object 307 | headers['Host'] = self.host 308 | url = append_param(url, params) 309 | date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) 310 | headers['Date'] = date 311 | headers['Authorization'] = self._create_sign_for_normal_auth(method, headers, resource) 312 | headers['User-Agent'] = self.agent 313 | if check_bucket_valid(bucket) and not is_ip(self.host): 314 | conn = self.get_connection(headers['Host']) 315 | else: 316 | conn = self.get_connection() 317 | conn.request(method, url, body, headers) 318 | return conn.getresponse() 319 | 320 | def get_service(self, headers=None): 321 | ''' 322 | List all buckets of user 323 | ''' 324 | return self.list_all_my_buckets(headers) 325 | 326 | def list_all_my_buckets(self, headers=None): 327 | ''' 328 | List all buckets of user 329 | type headers: dict 330 | :param 331 | 332 | Returns: 333 | HTTP Response 334 | ''' 335 | method = 'GET' 336 | bucket = '' 337 | object = '' 338 | body = '' 339 | params = {} 340 | return self.http_request(method, bucket, object, headers, body, params) 341 | 342 | def get_bucket_acl(self, bucket): 343 | ''' 344 | Get Access Control Level of bucket 345 | 346 | :type bucket: string 347 | :param 348 | 349 | Returns: 350 | HTTP Response 351 | ''' 352 | method = 'GET' 353 | object = '' 354 | headers = {} 355 | body = '' 356 | params = {} 357 | params['acl'] = '' 358 | return self.http_request(method, bucket, object, headers, body, params) 359 | 360 | def get_bucket_location(self, bucket): 361 | ''' 362 | Get Location of bucket 363 | ''' 364 | method = 'GET' 365 | object = '' 366 | headers = {} 367 | body = '' 368 | params = {} 369 | params['location'] = '' 370 | return self.http_request(method, bucket, object, headers, body, params) 371 | 372 | def get_bucket(self, bucket, prefix='', marker='', delimiter='', maxkeys='', headers=None): 373 | ''' 374 | List object that in bucket 375 | ''' 376 | return self.list_bucket(bucket, prefix, marker, delimiter, maxkeys, headers) 377 | 378 | def list_bucket(self, bucket, prefix='', marker='', delimiter='', maxkeys='', headers=None): 379 | ''' 380 | List object that in bucket 381 | 382 | :type bucket: string 383 | :param 384 | 385 | :type prefix: string 386 | :param 387 | 388 | :type marker: string 389 | :param 390 | 391 | :type delimiter: string 392 | :param 393 | 394 | :type maxkeys: string 395 | :param 396 | 397 | :type headers: dict 398 | :param: HTTP header 399 | 400 | Returns: 401 | HTTP Response 402 | ''' 403 | method = 'GET' 404 | object = '' 405 | body = '' 406 | params = {} 407 | params['prefix'] = prefix 408 | params['marker'] = marker 409 | params['delimiter'] = delimiter 410 | params['max-keys'] = maxkeys 411 | return self.http_request(method, bucket, object, headers, body, params) 412 | 413 | def create_bucket(self, bucket, acl='', headers=None): 414 | ''' 415 | Create bucket 416 | ''' 417 | return self.put_bucket(bucket, acl, headers) 418 | 419 | def put_bucket(self, bucket, acl='', headers=None): 420 | ''' 421 | Create bucket 422 | 423 | :type bucket: string 424 | :param 425 | 426 | :type acl: string 427 | :param: one of private public-read public-read-write 428 | 429 | :type headers: dict 430 | :param: HTTP header 431 | 432 | Returns: 433 | HTTP Response 434 | ''' 435 | if not headers: 436 | headers = {} 437 | if acl != '': 438 | if "AWS" == self.provider: 439 | headers['x-amz-acl'] = acl 440 | else: 441 | headers['x-oss-acl'] = acl 442 | method = 'PUT' 443 | object = '' 444 | body = '' 445 | params = {} 446 | return self.http_request(method, bucket, object, headers, body, params) 447 | 448 | def put_bucket_with_location(self, bucket, acl='', location='', headers=None): 449 | ''' 450 | Create bucket 451 | 452 | :type bucket: string 453 | :param 454 | 455 | :type acl: string 456 | :param: one of private public-read public-read-write 457 | 458 | :type location: string 459 | :param: 460 | 461 | :type headers: dict 462 | :param: HTTP header 463 | 464 | Returns: 465 | HTTP Response 466 | ''' 467 | if not headers: 468 | headers = {} 469 | if acl != '': 470 | if "AWS" == self.provider: 471 | headers['x-amz-acl'] = acl 472 | else: 473 | headers['x-oss-acl'] = acl 474 | params = {} 475 | body = '' 476 | if location != '': 477 | body = r'' 478 | body += r'' 479 | body += location 480 | body += r'' 481 | body += r'' 482 | method = 'PUT' 483 | object = '' 484 | return self.http_request(method, bucket, object, headers, body, params) 485 | 486 | def delete_bucket(self, bucket, headers=None): 487 | ''' 488 | Delete bucket 489 | 490 | :type bucket: string 491 | :param 492 | 493 | Returns: 494 | HTTP Response 495 | ''' 496 | method = 'DELETE' 497 | object = '' 498 | body = '' 499 | params = {} 500 | return self.http_request(method, bucket, object, headers, body, params) 501 | 502 | def put_object_with_data(self, bucket, object, input_content, content_type=DefaultContentType, headers=None, params=None): 503 | ''' 504 | Put object into bucket, the content of object is from input_content 505 | ''' 506 | return self.put_object_from_string(bucket, object, input_content, content_type, headers, params) 507 | 508 | def put_object_from_string(self, bucket, object, input_content, content_type=DefaultContentType, headers=None, params=None): 509 | ''' 510 | Put object into bucket, the content of object is from input_content 511 | 512 | :type bucket: string 513 | :param 514 | 515 | :type object: string 516 | :param 517 | 518 | :type input_content: string 519 | :param 520 | 521 | :type content_type: string 522 | :param: the object content type that supported by HTTP 523 | 524 | :type headers: dict 525 | :param: HTTP header 526 | 527 | Returns: 528 | HTTP Response 529 | ''' 530 | if not headers: 531 | headers = {} 532 | headers['Content-Type'] = content_type 533 | headers['Content-Length'] = str(len(input_content)) 534 | fp = StringIO.StringIO(input_content) 535 | res = self.put_object_from_fp(bucket, object, fp, content_type, headers, params) 536 | fp.close() 537 | return res 538 | 539 | def _open_conn_to_put_object(self, bucket, object, filesize, content_type=DefaultContentType, headers=None, params=None): 540 | ''' 541 | NOT public API 542 | Open a connectioon to put object 543 | 544 | :type bucket: string 545 | :param 546 | 547 | :type filesize: int 548 | :param 549 | 550 | :type object: string 551 | :param 552 | 553 | :type input_content: string 554 | :param 555 | 556 | :type content_type: string 557 | :param: the object content type that supported by HTTP 558 | 559 | :type headers: dict 560 | :param: HTTP header 561 | 562 | Returns: 563 | Initialized HTTPConnection 564 | ''' 565 | if not params: 566 | params = {} 567 | if not headers: 568 | headers = {} 569 | method = 'PUT' 570 | if isinstance(object, unicode): 571 | object = object.encode('utf-8') 572 | resource = "/%s/" % bucket 573 | if not bucket: 574 | resource = "/" 575 | resource = "%s%s%s" % (resource.encode('utf-8'), object, get_resource(params)) 576 | 577 | object = urllib.quote(object) 578 | url = "/%s" % object 579 | if bucket: 580 | headers['Host'] = "%s.%s" % (bucket, self.host) 581 | else: 582 | headers['Host'] = self.host 583 | if is_ip(self.host): 584 | url = "/%s/%s" % (bucket, object) 585 | headers['Host'] = self.host 586 | url = append_param(url, params) 587 | date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) 588 | 589 | if check_bucket_valid(bucket) and not is_ip(self.host): 590 | conn = self.get_connection(headers['Host']) 591 | else: 592 | conn = self.get_connection() 593 | conn.putrequest(method, url) 594 | if isinstance(content_type, unicode): 595 | content_type = content_type.encode('utf-8') 596 | headers["Content-Type"] = content_type 597 | headers["Content-Length"] = filesize 598 | headers["Date"] = date 599 | headers["Expect"] = "100-Continue" 600 | headers['User-Agent'] = self.agent 601 | for k in headers.keys(): 602 | conn.putheader(str(k), str(headers[k])) 603 | if '' != self.secret_access_key and '' != self.access_id: 604 | auth = self._create_sign_for_normal_auth(method, headers, resource) 605 | conn.putheader("Authorization", auth) 606 | conn.endheaders() 607 | return conn 608 | 609 | def put_object_from_file(self, bucket, object, filename, content_type='', headers=None, params=None): 610 | ''' 611 | put object into bucket, the content of object is read from file 612 | 613 | :type bucket: string 614 | :param 615 | 616 | :type object: string 617 | :param 618 | 619 | :type fllename: string 620 | :param: the name of the read file 621 | 622 | :type content_type: string 623 | :param: the object content type that supported by HTTP 624 | 625 | :type headers: dict 626 | :param: HTTP header 627 | 628 | Returns: 629 | HTTP Response 630 | ''' 631 | fp = open(filename, 'rb') 632 | if not content_type: 633 | content_type = get_content_type_by_filename(filename) 634 | res = self.put_object_from_fp(bucket, object, fp, content_type, headers, params) 635 | fp.close() 636 | return res 637 | 638 | def view_bar(self, num=1, sum=100): 639 | rate = float(num) / float(sum) 640 | rate_num = int(rate * 100) 641 | print '\r%d%% ' % (rate_num), 642 | sys.stdout.flush() 643 | 644 | def put_object_from_fp(self, bucket, object, fp, content_type=DefaultContentType, headers=None, params=None): 645 | ''' 646 | Put object into bucket, the content of object is read from file pointer 647 | 648 | :type bucket: string 649 | :param 650 | 651 | :type object: string 652 | :param 653 | 654 | :type fp: file 655 | :param: the pointer of the read file 656 | 657 | :type content_type: string 658 | :param: the object content type that supported by HTTP 659 | 660 | :type headers: dict 661 | :param: HTTP header 662 | 663 | Returns: 664 | HTTP Response 665 | ''' 666 | tmp_object = object 667 | tmp_headers = {} 668 | tmp_params = {} 669 | if headers and isinstance(headers, dict): 670 | tmp_headers = headers.copy() 671 | if params and isinstance(params, dict): 672 | tmp_params = params.copy() 673 | 674 | fp.seek(os.SEEK_SET, os.SEEK_END) 675 | filesize = fp.tell() 676 | fp.seek(os.SEEK_SET) 677 | conn = self._open_conn_to_put_object(bucket, object, filesize, content_type, headers, params) 678 | totallen = 0 679 | l = fp.read(self.SendBufferSize) 680 | retry_times = 0 681 | while len(l) > 0: 682 | if retry_times > 100: 683 | print "retry too many times" 684 | raise 685 | try: 686 | conn.send(l) 687 | retry_times = 0 688 | except: 689 | retry_times += 1 690 | continue 691 | totallen += len(l) 692 | if self.show_bar: 693 | self.view_bar(totallen, filesize) 694 | l = fp.read(self.SendBufferSize) 695 | res = conn.getresponse() 696 | if res.status == 301 or res.status == 302: 697 | self.host = helper_get_host_from_resp(res, bucket) 698 | return self.put_object_from_fp(bucket, tmp_object, fp, content_type, tmp_headers, tmp_params) 699 | return res 700 | 701 | def get_object(self, bucket, object, headers=None, params=None): 702 | ''' 703 | Get object 704 | 705 | :type bucket: string 706 | :param 707 | 708 | :type object: string 709 | :param 710 | 711 | :type headers: dict 712 | :param: HTTP header 713 | 714 | Returns: 715 | HTTP Response 716 | ''' 717 | method = 'GET' 718 | body = '' 719 | return self.http_request(method, bucket, object, headers, body, params) 720 | 721 | def get_object_to_file(self, bucket, object, filename, headers=None): 722 | ''' 723 | Get object and write the content of object into a file 724 | 725 | :type bucket: string 726 | :param 727 | 728 | :type object: string 729 | :param 730 | 731 | :type filename: string 732 | :param 733 | 734 | :type headers: dict 735 | :param: HTTP header 736 | 737 | Returns: 738 | HTTP Response 739 | ''' 740 | res = self.get_object(bucket, object, headers) 741 | totalread = 0 742 | if res.status / 100 == 2: 743 | header = {} 744 | header = convert_header2map(res.getheaders()) 745 | filesize = safe_get_element("content-length", header) 746 | f = file(filename, 'wb') 747 | data = '' 748 | while True: 749 | data = res.read(self.RecvBufferSize) 750 | if data: 751 | f.write(data) 752 | totalread += len(data) 753 | if self.show_bar: 754 | self.view_bar(totalread, filesize) 755 | else: 756 | break 757 | f.close() 758 | # TODO: get object with flow 759 | return res 760 | 761 | def delete_object(self, bucket, object, headers=None): 762 | ''' 763 | Delete object 764 | 765 | :type bucket: string 766 | :param 767 | 768 | :type object: string 769 | :param 770 | 771 | :type headers: dict 772 | :param: HTTP header 773 | 774 | Returns: 775 | HTTP Response 776 | ''' 777 | method = 'DELETE' 778 | body = '' 779 | params = {} 780 | return self.http_request(method, bucket, object, headers, body, params) 781 | 782 | def head_object(self, bucket, object, headers=None): 783 | ''' 784 | Head object, to get the meta message of object without the content 785 | 786 | :type bucket: string 787 | :param 788 | 789 | :type object: string 790 | :param 791 | 792 | :type headers: dict 793 | :param: HTTP header 794 | 795 | Returns: 796 | HTTP Response 797 | ''' 798 | method = 'HEAD' 799 | body = '' 800 | params = {} 801 | return self.http_request(method, bucket, object, headers, body, params) 802 | 803 | def post_object_group(self, bucket, object, object_group_msg_xml, headers=None, params=None): 804 | ''' 805 | Post object group, merge all objects in object_group_msg_xml into one object 806 | :type bucket: string 807 | :param 808 | 809 | :type object: string 810 | :param 811 | 812 | :type object_group_msg_xml: string 813 | :param: xml format string, like 814 | 815 | 816 | N 817 | objectN 818 | "47BCE5C74F589F4867DBD57E9CA9F808" 819 | 820 | 821 | :type headers: dict 822 | :param: HTTP header 823 | 824 | :type params: dict 825 | :param: parameters 826 | 827 | Returns: 828 | HTTP Response 829 | ''' 830 | method = 'POST' 831 | if not headers: 832 | headers = {} 833 | if not params: 834 | params = {} 835 | if not headers.has_key('Content-Type'): 836 | content_type = get_content_type_by_filename(object) 837 | headers['Content-Type'] = content_type 838 | body = object_group_msg_xml 839 | params['group'] = '' 840 | headers['Content-Length'] = str(len(body)) 841 | return self.http_request(method, bucket, object, headers, body, params) 842 | 843 | def get_object_group_index(self, bucket, object, headers=None): 844 | ''' 845 | Get object group_index 846 | 847 | :type bucket: string 848 | :param 849 | 850 | :type object: string 851 | :param 852 | 853 | :type headers: dict 854 | :param: HTTP header 855 | 856 | Returns: 857 | HTTP Response 858 | ''' 859 | if not headers: 860 | headers = {} 861 | headers["x-oss-file-group"] = '' 862 | method = 'GET' 863 | body = '' 864 | params = {} 865 | return self.http_request(method, bucket, object, headers, body, params) 866 | 867 | def upload_part_from_file_given_pos(self, bucket, object, filename, offset, partsize, upload_id, part_number, headers=None, params=None): 868 | if not params: 869 | params = {} 870 | params['partNumber'] = part_number 871 | params['uploadId'] = upload_id 872 | content_type = '' 873 | return self.put_object_from_file_given_pos(bucket, object, filename, offset, partsize, content_type, headers, params) 874 | 875 | def put_object_from_file_given_pos(self, bucket, object, filename, offset, partsize, content_type='', headers=None, params=None): 876 | ''' 877 | Put object into bucket, the content of object is read from given posision of filename 878 | :type bucket: string 879 | :param 880 | 881 | :type object: string 882 | :param 883 | 884 | :type fllename: string 885 | :param: the name of the read file 886 | 887 | :type offset: int 888 | :param: the given position of file 889 | 890 | :type partsize: int 891 | :param: the size of read content 892 | 893 | :type content_type: string 894 | :param: the object content type that supported by HTTP 895 | 896 | :type headers: dict 897 | :param: HTTP header 898 | 899 | Returns: 900 | HTTP Response 901 | ''' 902 | tmp_object = object 903 | tmp_headers = {} 904 | tmp_params = {} 905 | if headers and isinstance(headers, dict): 906 | tmp_headers = headers.copy() 907 | if params and isinstance(params, dict): 908 | tmp_params = params.copy() 909 | 910 | fp = open(filename, 'rb') 911 | if offset > os.path.getsize(filename): 912 | fp.seek(os.SEEK_SET, os.SEEK_END) 913 | else: 914 | fp.seek(offset) 915 | if not content_type: 916 | content_type = get_content_type_by_filename(filename) 917 | conn = self._open_conn_to_put_object(bucket, object, partsize, content_type, headers, params) 918 | left_len = partsize 919 | while True: 920 | if left_len <= 0: 921 | break 922 | elif left_len < self.SendBufferSize: 923 | buffer_content = fp.read(left_len) 924 | else: 925 | buffer_content = fp.read(self.SendBufferSize) 926 | 927 | if buffer_content: 928 | conn.send(buffer_content) 929 | 930 | left_len = left_len - len(buffer_content) 931 | 932 | fp.close() 933 | res = conn.getresponse() 934 | if res.status == 301 or res.status == 302: 935 | self.host = helper_get_host_from_resp(res, bucket) 936 | return self.put_object_from_file_given_pos(bucket, tmp_object, filename, offset, partsize 937 | , content_type, tmp_headers, tmp_params) 938 | return res 939 | 940 | def upload_large_file(self, bucket, object, filename, thread_num=10, max_part_num=1000, headers=None): 941 | ''' 942 | Upload large file, the content is read from filename. The large file is splitted into many parts. It will put the many parts into bucket and then merge all the parts into one object. 943 | 944 | :type bucket: string 945 | :param 946 | 947 | :type object: string 948 | :param 949 | 950 | :type fllename: string 951 | :param: the name of the read file 952 | 953 | :type thread_num: int 954 | :param 955 | 956 | :type max_part_num: int 957 | :param 958 | 959 | :type headers: dict 960 | :param 961 | 962 | Returns: 963 | HTTP Response 964 | 965 | ''' 966 | #split the large file into 1000 parts or many parts 967 | #get part_msg_list 968 | if not headers: 969 | headers = {} 970 | if isinstance(filename, unicode): 971 | filename = filename.encode('utf-8') 972 | part_msg_list = split_large_file(filename, object, max_part_num) 973 | #make sure all the parts are put into same bucket 974 | if len(part_msg_list) < thread_num and len(part_msg_list) != 0: 975 | thread_num = len(part_msg_list) 976 | step = len(part_msg_list) / thread_num 977 | retry_times = self.retry_times 978 | while(retry_times >= 0): 979 | try: 980 | threadpool = [] 981 | for i in xrange(0, thread_num): 982 | if i == thread_num - 1: 983 | end = len(part_msg_list) 984 | else: 985 | end = i * step + step 986 | begin = i * step 987 | oss = OssAPI(self.host, self.access_id, self.secret_access_key) 988 | current = PutObjectGroupWorker(oss, bucket, filename, part_msg_list[begin:end], self.retry_times) 989 | threadpool.append(current) 990 | current.start() 991 | for item in threadpool: 992 | item.join() 993 | break 994 | except: 995 | retry_times = retry_times -1 996 | if -1 >= retry_times: 997 | print "after retry %s, failed, upload large file failed!" % retry_times 998 | return 999 | #get xml string that contains msg of object group 1000 | object_group_msg_xml = create_object_group_msg_xml(part_msg_list) 1001 | content_type = get_content_type_by_filename(filename) 1002 | if isinstance(content_type, unicode): 1003 | content_type = content_type.encode('utf-8') 1004 | if not headers.has_key('Content-Type'): 1005 | headers['Content-Type'] = content_type 1006 | return self.post_object_group(bucket, object, object_group_msg_xml, headers) 1007 | 1008 | def copy_object(self, source_bucket, source_object, target_bucket, target_object, headers=None): 1009 | ''' 1010 | Copy object 1011 | 1012 | :type source_bucket: string 1013 | :param 1014 | 1015 | :type source_object: string 1016 | :param 1017 | 1018 | :type target_bucket: string 1019 | :param 1020 | 1021 | :type target_object: string 1022 | :param 1023 | 1024 | :type headers: dict 1025 | :param: HTTP header 1026 | 1027 | Returns: 1028 | HTTP Response 1029 | ''' 1030 | if not headers: 1031 | headers = {} 1032 | if isinstance(source_object, unicode): 1033 | source_object = source_object.encode('utf-8') 1034 | source_object = urllib.quote(source_object) 1035 | headers['x-oss-copy-source'] = "/%s/%s" % (source_bucket, source_object) 1036 | method = 'PUT' 1037 | body = '' 1038 | params = {} 1039 | return self.http_request(method, target_bucket, target_object, headers, body, params) 1040 | 1041 | def init_multi_upload(self, bucket, object, headers=None, params=None): 1042 | ''' 1043 | Init multi upload 1044 | 1045 | :type bucket: string 1046 | :param 1047 | 1048 | :type object: string 1049 | :param 1050 | 1051 | :type headers: dict 1052 | :param: HTTP header 1053 | 1054 | :type params: dict 1055 | :param: HTTP header 1056 | 1057 | Returns: 1058 | HTTP Response 1059 | ''' 1060 | if not params: 1061 | params = {} 1062 | method = 'POST' 1063 | body = '' 1064 | params['uploads'] = '' 1065 | return self.http_request(method, bucket, object, headers, body, params) 1066 | 1067 | def get_all_parts(self, bucket, object, upload_id, max_parts=None, part_number_marker=None): 1068 | ''' 1069 | List all upload parts of given upload_id 1070 | :type bucket: string 1071 | :param 1072 | 1073 | :type object: string 1074 | :param 1075 | 1076 | :type upload_id: string 1077 | :param 1078 | 1079 | :type max_parts: int 1080 | :param 1081 | 1082 | :type part_number_marker: string 1083 | :param 1084 | 1085 | Returns: 1086 | HTTP Response 1087 | ''' 1088 | method = 'GET' 1089 | headers = {} 1090 | body = '' 1091 | params = {} 1092 | params['uploadId'] = upload_id 1093 | if max_parts: 1094 | params['max-parts'] = max_parts 1095 | if part_number_marker: 1096 | params['part-number-marker'] = part_number_marker 1097 | return self.http_request(method, bucket, object, headers, body, params) 1098 | 1099 | def get_all_multipart_uploads(self, bucket, delimiter=None, max_uploads=None, key_marker=None, prefix=None, upload_id_marker=None, headers=None): 1100 | ''' 1101 | List all upload_ids and their parts 1102 | :type bucket: string 1103 | :param 1104 | 1105 | :type delimiter: string 1106 | :param 1107 | 1108 | :type max_uploads: string 1109 | :param 1110 | 1111 | :type key_marker: string 1112 | :param 1113 | 1114 | :type prefix: string 1115 | :param 1116 | 1117 | :type upload_id_marker: string 1118 | :param 1119 | 1120 | :type headers: dict 1121 | :param: HTTP header 1122 | 1123 | Returns: 1124 | HTTP Response 1125 | ''' 1126 | method = 'GET' 1127 | object = '' 1128 | body = '' 1129 | params = {} 1130 | params['uploads'] = '' 1131 | if delimiter: 1132 | params['delimiter'] = delimiter 1133 | if max_uploads: 1134 | params['max-uploads'] = max_uploads 1135 | if key_marker: 1136 | params['key-marker'] = key_marker 1137 | if prefix: 1138 | params['prefix'] = prefix 1139 | if upload_id_marker: 1140 | params['upload-id-marker'] = upload_id_marker 1141 | return self.http_request(method, bucket, object, headers, body, params) 1142 | 1143 | def upload_part(self, bucket, object, filename, upload_id, part_number, headers=None, params=None): 1144 | ''' 1145 | Upload the content of filename as one part of given upload_id 1146 | 1147 | :type bucket: string 1148 | :param 1149 | 1150 | :type object: string 1151 | :param 1152 | 1153 | :type filename: string 1154 | :param 1155 | 1156 | :type upload_id: string 1157 | :param 1158 | 1159 | :type part_number: int 1160 | :param 1161 | 1162 | :type headers: dict 1163 | :param: HTTP header 1164 | 1165 | :type params: dict 1166 | :param: HTTP header 1167 | 1168 | Returns: 1169 | HTTP Response 1170 | ''' 1171 | if not params: 1172 | params = {} 1173 | params['partNumber'] = part_number 1174 | params['uploadId'] = upload_id 1175 | content_type = '' 1176 | return self.put_object_from_file(bucket, object, filename, content_type, headers, params) 1177 | 1178 | def upload_part_from_string(self, bucket, object, data, upload_id, part_number, headers=None, params=None): 1179 | ''' 1180 | Upload the content of string as one part of given upload_id 1181 | 1182 | :type bucket: string 1183 | :param 1184 | 1185 | :type object: string 1186 | :param 1187 | 1188 | :type data: string 1189 | :param 1190 | 1191 | :type upload_id: string 1192 | :param 1193 | 1194 | :type part_number: int 1195 | :param 1196 | 1197 | :type headers: dict 1198 | :param: HTTP header 1199 | 1200 | :type params: dict 1201 | :param: HTTP header 1202 | 1203 | Returns: 1204 | HTTP Response 1205 | ''' 1206 | if not params: 1207 | params = {} 1208 | params['partNumber'] = part_number 1209 | params['uploadId'] = upload_id 1210 | content_type = '' 1211 | fp = StringIO.StringIO(data) 1212 | return self.put_object_from_fp(bucket, object, fp, content_type, headers, params) 1213 | 1214 | def complete_upload(self, bucket, object, upload_id, part_msg_xml, headers=None, params=None): 1215 | ''' 1216 | Finish multiupload and merge all the parts in part_msg_xml as a object. 1217 | 1218 | :type bucket: string 1219 | :param 1220 | 1221 | :type object: string 1222 | :param 1223 | 1224 | :type upload_id: string 1225 | :param 1226 | 1227 | :type part_msg_xml: string 1228 | :param 1229 | 1230 | :type headers: dict 1231 | :param: HTTP header 1232 | 1233 | :type params: dict 1234 | :param: HTTP header 1235 | 1236 | Returns: 1237 | HTTP Response 1238 | ''' 1239 | if not headers: 1240 | headers = {} 1241 | if not params: 1242 | params = {} 1243 | method = 'POST' 1244 | body = part_msg_xml 1245 | headers['Content-Length'] = str(len(body)) 1246 | params['uploadId'] = upload_id 1247 | if not headers.has_key('Content-Type'): 1248 | content_type = get_content_type_by_filename(object) 1249 | headers['Content-Type'] = content_type 1250 | return self.http_request(method, bucket, object, headers, body, params) 1251 | 1252 | def cancel_upload(self, bucket, object, upload_id, headers=None, params=None): 1253 | ''' 1254 | Cancel multiupload and delete all parts of given upload_id 1255 | :type bucket: string 1256 | :param 1257 | 1258 | :type object: string 1259 | :param 1260 | 1261 | :type upload_id: string 1262 | :param 1263 | 1264 | :type headers: dict 1265 | :param: HTTP header 1266 | 1267 | :type params: dict 1268 | :param: HTTP header 1269 | 1270 | Returns: 1271 | HTTP Response 1272 | ''' 1273 | if not params: 1274 | params = {} 1275 | method = 'DELETE' 1276 | if isinstance(upload_id, unicode): 1277 | upload_id = upload_id.encode('utf-8') 1278 | params['uploadId'] = upload_id 1279 | body = '' 1280 | return self.http_request(method, bucket, object, headers, body, params) 1281 | 1282 | def multi_upload_file(self, bucket, object, filename, upload_id='', thread_num=10, max_part_num=10000, headers=None, params=None): 1283 | ''' 1284 | Upload large file, the content is read from filename. The large file is splitted into many parts. It will put the many parts into bucket and then merge all the parts into one object. 1285 | 1286 | :type bucket: string 1287 | :param 1288 | 1289 | :type object: string 1290 | :param 1291 | 1292 | :type fllename: string 1293 | :param: the name of the read file 1294 | 1295 | :type upload_id: string 1296 | :param 1297 | 1298 | :type thread_num: int 1299 | :param 1300 | 1301 | :type max_part_num: int 1302 | :param 1303 | 1304 | :type headers: dict 1305 | :param 1306 | 1307 | :type params: dict 1308 | :param 1309 | 1310 | Returns: 1311 | HTTP Response 1312 | ''' 1313 | #get init upload_id 1314 | if not upload_id: 1315 | res = self.init_multi_upload(bucket, object, headers, params) 1316 | body = res.read() 1317 | if res.status == 200: 1318 | h = GetInitUploadIdXml(body) 1319 | upload_id = h.upload_id 1320 | else: 1321 | err = ErrorXml(body) 1322 | raise Exception("%s, %s" %(res.status, err.msg)) 1323 | if not upload_id: 1324 | raise Exception("-1, Cannot get upload id.") 1325 | #split the large file into 1000 parts or many parts 1326 | #get part_msg_list 1327 | if isinstance(filename, unicode): 1328 | filename = filename.encode('utf-8') 1329 | part_msg_list = split_large_file(filename, object, max_part_num) 1330 | logger = getlogger(self.debug) 1331 | logger.info("bucket:%s, object:%s, upload_id is: %s, split_number:%d" % (bucket, object, upload_id, len(part_msg_list))) 1332 | 1333 | #make sure all the parts are put into same bucket 1334 | if len(part_msg_list) < thread_num and len(part_msg_list) != 0: 1335 | thread_num = len(part_msg_list) 1336 | step = len(part_msg_list) / thread_num 1337 | 1338 | #list part to get a map 1339 | upload_retry_times = self.retry_times 1340 | while(upload_retry_times >= 0): 1341 | uploaded_part_map = {} 1342 | oss = OssAPI(self.host, self.access_id, self.secret_access_key) 1343 | uploaded_part_map = get_part_map(oss, bucket, object, upload_id) 1344 | retry_times = self.retry_times 1345 | while(retry_times >= 0): 1346 | threadpool = [] 1347 | try: 1348 | for i in xrange(0, thread_num): 1349 | if i == thread_num - 1: 1350 | end = len(part_msg_list) 1351 | else: 1352 | end = i * step + step 1353 | begin = i * step 1354 | oss = OssAPI(self.host, self.access_id, self.secret_access_key) 1355 | current = UploadPartWorker(oss, bucket, object, upload_id, filename, part_msg_list[begin:end], uploaded_part_map, self.retry_times) 1356 | threadpool.append(current) 1357 | current.start() 1358 | for item in threadpool: 1359 | item.join() 1360 | break 1361 | except: 1362 | retry_times -= 1 1363 | if -1 >= retry_times: 1364 | raise Exception("-2, after retry %s, failed, multi upload part failed! upload_id:%s" % (self.retry_times, upload_id)) 1365 | #get xml string that contains msg of part 1366 | part_msg_xml = create_part_xml(part_msg_list) 1367 | #complete upload 1368 | res = self.complete_upload(bucket, object, upload_id, part_msg_xml, headers, params) 1369 | if res.status == 200: 1370 | break 1371 | upload_retry_times -= 1 1372 | if upload_retry_times < 0: 1373 | raise Exception("-3, after retry %s, failed, multi upload file failed! upload_id:%s" % (self.retry_times, upload_id)) 1374 | return res 1375 | 1376 | def delete_objects(self, bucket, object_list=None, headers=None, params=None): 1377 | ''' 1378 | Batch delete objects 1379 | :type bucket: string 1380 | :param: 1381 | 1382 | :type object_list: list 1383 | :param: 1384 | 1385 | :type headers: dict 1386 | :param: HTTP header 1387 | 1388 | :type params: dict 1389 | :param: the parameters that put in the url address as query string 1390 | 1391 | Returns: 1392 | HTTP Response 1393 | ''' 1394 | if not object_list: 1395 | object_list = [] 1396 | object_list_xml = create_delete_object_msg_xml(object_list) 1397 | return self.batch_delete_object(bucket, object_list_xml, headers, params) 1398 | 1399 | def batch_delete_object(self, bucket, object_list_xml, headers=None, params=None): 1400 | ''' 1401 | Delete the objects in object_list_xml 1402 | :type bucket: string 1403 | :param: 1404 | 1405 | :type object_list_xml: string 1406 | :param: 1407 | 1408 | :type headers: dict 1409 | :param: HTTP header 1410 | 1411 | :type params: dict 1412 | :param: the parameters that put in the url address as query string 1413 | 1414 | Returns: 1415 | HTTP Response 1416 | ''' 1417 | if not headers: 1418 | headers = {} 1419 | if not params: 1420 | params = {} 1421 | method = 'POST' 1422 | object = '' 1423 | body = object_list_xml 1424 | headers['Content-Length'] = str(len(body)) 1425 | params['delete'] = '' 1426 | base64md5 = base64.encodestring(md5.new(body).digest()) 1427 | if base64md5[-1] == '\n': 1428 | base64md5 = base64md5[0:-1] 1429 | headers['Content-MD5'] = base64md5 1430 | return self.http_request(method, bucket, object, headers, body, params) 1431 | 1432 | def list_objects(self, bucket, prefix=''): 1433 | ''' 1434 | :type bucket: string 1435 | :param: 1436 | 1437 | :type prefix: string 1438 | :param: 1439 | 1440 | Returns: 1441 | a list that contains the objects in bucket with prefix 1442 | ''' 1443 | get_instance = GetAllObjects() 1444 | marker_input = '' 1445 | object_list = [] 1446 | oss = OssAPI(self.host, self.access_id, self.secret_access_key) 1447 | (object_list, marker_output) = get_instance.get_object_in_bucket(oss, bucket, marker_input, prefix) 1448 | return object_list 1449 | 1450 | def batch_delete_objects(self, bucket, object_list=None): 1451 | ''' 1452 | :type bucket: string 1453 | :param: 1454 | 1455 | :type object_list: object name list 1456 | :param: 1457 | 1458 | Returns: 1459 | True or False 1460 | ''' 1461 | if not object_list: 1462 | object_list = [] 1463 | object_list_xml = create_delete_object_msg_xml(object_list) 1464 | try: 1465 | res = self.batch_delete_object(bucket, object_list_xml) 1466 | if res.status / 100 == 2: 1467 | return True 1468 | except: 1469 | pass 1470 | return False 1471 | 1472 | def get_object_info(self, bucket, object, headers=None, params=None): 1473 | ''' 1474 | Get object information 1475 | :type bucket: string 1476 | :param: 1477 | 1478 | :type object: string 1479 | :param: 1480 | 1481 | :type headers: dict 1482 | :param: HTTP header 1483 | 1484 | :type params: dict 1485 | :param: the parameters that put in the url address as query string 1486 | 1487 | Returns: 1488 | HTTP Response 1489 | ''' 1490 | if not headers: 1491 | headers = {} 1492 | if not params: 1493 | params = {} 1494 | method = 'GET' 1495 | body = '' 1496 | params['objectInfo'] = '' 1497 | return self.http_request(method, bucket, object, headers, body, params) 1498 | -------------------------------------------------------------------------------- /ossync/sdk/oss_sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf8 3 | import time 4 | try: 5 | from oss.oss_api import * 6 | except: 7 | from oss_api import * 8 | try: 9 | from oss.oss_xml_handler import * 10 | except: 11 | from oss_xml_handler import * 12 | HOST = "oss.aliyuncs.com" 13 | ACCESS_ID = "" 14 | SECRET_ACCESS_KEY = "" 15 | #ACCESS_ID and SECRET_ACCESS_KEY 默认是空,请填入您申请的正确的ID和KEY. 16 | 17 | if __name__ == "__main__": 18 | #初始化 19 | if len(ACCESS_ID) == 0 or len(SECRET_ACCESS_KEY) == 0: 20 | print "Please make sure ACCESS_ID and SECRET_ACCESS_KEY are correct in ", __file__ , ", init are empty!" 21 | exit(0) 22 | oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY) 23 | sep = "==============================" 24 | 25 | #对特定的URL签名,默认URL过期时间为60秒 26 | method = "GET" 27 | bucket = "test" + time.strftime("%Y-%b-%d%H-%M-%S").lower() 28 | object = "test_object" 29 | url = "http://" + HOST + "/oss/" + bucket + "/" + object 30 | headers = {} 31 | resource = "/" + bucket + "/" + object 32 | 33 | timeout = 60 34 | url_with_auth = oss.sign_url_auth_with_expire_time(method, url, headers, resource, timeout) 35 | print "after signature url is: ", url_with_auth 36 | print sep 37 | #创建属于自己的bucket 38 | acl = 'private' 39 | headers = {} 40 | res = oss.put_bucket(bucket, acl, headers) 41 | if (res.status / 100) == 2: 42 | print "put bucket ", bucket, "OK" 43 | else: 44 | print "put bucket ", bucket, "ERROR" 45 | print sep 46 | 47 | #列出创建的bucket 48 | res = oss.get_service() 49 | if (res.status / 100) == 2: 50 | body = res.read() 51 | h = GetServiceXml(body) 52 | print "bucket list size is: ", len(h.list()) 53 | print "bucket list is: " 54 | for i in h.list(): 55 | print i 56 | else: 57 | print res.status 58 | print sep 59 | 60 | #把指定的字符串内容上传到bucket中,在bucket中的文件名叫object。 61 | object = "object_test" 62 | input_content = "hello, OSS" 63 | content_type = "text/HTML" 64 | headers = {} 65 | res = oss.put_object_from_string(bucket, object, input_content, content_type, headers) 66 | if (res.status / 100) == 2: 67 | print "put_object_from_string OK" 68 | else: 69 | print "put_object_from_string ERROR" 70 | print sep 71 | 72 | #指定文件名, 把这个文件上传到bucket中,在bucket中的文件名叫object。 73 | object = "object_test" 74 | filename = __file__ 75 | content_type = "text/HTML" 76 | headers = {} 77 | res = oss.put_object_from_file(bucket, object, filename, content_type, headers) 78 | if (res.status / 100) == 2: 79 | print "put_object_from_file OK" 80 | else: 81 | print "put_object_from_file ERROR" 82 | print sep 83 | 84 | #指定文件名, 把这个文件上传到bucket中,在bucket中的文件名叫object。 85 | object = "object_test" 86 | filename = __file__ 87 | content_type = "text/HTML" 88 | headers = {} 89 | 90 | fp = open(filename, 'rb') 91 | res = oss.put_object_from_fp(bucket, object, fp, content_type, headers) 92 | fp.close() 93 | if (res.status / 100) == 2: 94 | print "put_object_from_fp OK" 95 | else: 96 | print "put_object_from_fp ERROR" 97 | print sep 98 | 99 | #下载bucket中的object,内容在body中 100 | object = "object_test" 101 | headers = {} 102 | 103 | res = oss.get_object(bucket, object, headers) 104 | if (res.status / 100) == 2: 105 | print "get_object OK" 106 | else: 107 | print "get_object ERROR" 108 | print sep 109 | 110 | #下载bucket中的object,把内容写入到本地文件中 111 | object = "object_test" 112 | headers = {} 113 | filename = "get_object_test_file" 114 | 115 | res = oss.get_object_to_file(bucket, object, filename, headers) 116 | if (res.status / 100) == 2: 117 | print "get_object_to_file OK" 118 | else: 119 | print "get_object_to_file ERROR" 120 | print sep 121 | 122 | #查看object的meta 信息,例如长度,类型等 123 | object = "object_test" 124 | headers = {} 125 | res = oss.head_object(bucket, object, headers) 126 | if (res.status / 100) == 2: 127 | print "head_object OK" 128 | header_map = convert_header2map(res.getheaders()) 129 | content_len = safe_get_element("content-length", header_map) 130 | etag = safe_get_element("etag", header_map).upper() 131 | print "content length is:", content_len 132 | print "ETag is: ", etag 133 | 134 | else: 135 | print "head_object ERROR" 136 | print sep 137 | 138 | #查看bucket中所拥有的权限 139 | res = oss.get_bucket_acl(bucket) 140 | if (res.status / 100) == 2: 141 | body = res.read() 142 | h = GetBucketAclXml(body) 143 | print "bucket acl is:", h.grant 144 | else: 145 | print "get bucket acl ERROR" 146 | print sep 147 | 148 | #列出bucket中所拥有的object 149 | prefix = "" 150 | marker = "" 151 | delimiter = "/" 152 | maxkeys = "100" 153 | headers = {} 154 | res = oss.get_bucket(bucket, prefix, marker, delimiter, maxkeys, headers) 155 | if (res.status / 100) == 2: 156 | body = res.read() 157 | h = GetBucketXml(body) 158 | (file_list, common_list) = h.list() 159 | print "object list is:" 160 | for i in file_list: 161 | print i 162 | print "common list is:" 163 | for i in common_list: 164 | print i 165 | print sep 166 | 167 | #以object group的形式上传大文件,object group的相关概念参考官方API文档 168 | res = oss.upload_large_file(bucket, object, __file__) 169 | if (res.status / 100) == 2: 170 | print "upload_large_file OK" 171 | else: 172 | print "upload_large_file ERROR" 173 | 174 | print sep 175 | 176 | #得到object group中所拥有的object 177 | res = oss.get_object_group_index(bucket, object) 178 | if (res.status / 100) == 2: 179 | print "get_object_group_index OK" 180 | body = res.read() 181 | h = GetObjectGroupIndexXml(body) 182 | for i in h.list(): 183 | print "object group part msg:", i 184 | else: 185 | print "get_object_group_index ERROR" 186 | 187 | res = oss.get_object_group_index(bucket, object) 188 | if res.status == 200: 189 | body = res.read() 190 | h = GetObjectGroupIndexXml(body) 191 | object_group_index = h.list() 192 | for i in object_group_index: 193 | if len(i) == 4 and len(i[1]) > 0: 194 | part_name = i[1].strip() 195 | res = oss.delete_object(bucket, part_name) 196 | if res.status != 204: 197 | print "delete part ", part_name, " in bucket:", bucket, " failed!" 198 | else: 199 | print "delete part ", part_name, " in bucket:", bucket, " ok" 200 | print sep 201 | #multi part upload相关操作 202 | #get a upload id 203 | upload_id = "" 204 | res = oss.init_multi_upload(bucket, object, headers) 205 | if res.status == 200: 206 | body = res.read() 207 | h = GetInitUploadIdXml(body) 208 | upload_id = h.upload_id 209 | 210 | if len(upload_id) == 0: 211 | print "init upload failed!" 212 | else: 213 | print "init upload OK!" 214 | print "upload id is: %s" % upload_id 215 | 216 | #upload a part 217 | data = "this is test content string." 218 | part_number = "1" 219 | res = oss.upload_part_from_string(bucket, object, data, upload_id, part_number) 220 | if (res.status / 100) == 2: 221 | print "upload part OK" 222 | else: 223 | print "upload part ERROR" 224 | 225 | #complete upload 226 | part_msg_xml = get_part_xml(oss, bucket, object, upload_id) 227 | res = oss.complete_upload(bucket, object, upload_id, part_msg_xml) 228 | if (res.status / 100) == 2: 229 | print "complete upload OK" 230 | else: 231 | print "complete upload ERROR" 232 | 233 | res = oss.get_object(bucket, object) 234 | if (res.status / 100) == 2 and res.read() == data: 235 | print "verify upload OK" 236 | else: 237 | print "verify upload ERROR" 238 | 239 | print sep 240 | 241 | 242 | #删除bucket中的object 243 | object = "object_test" 244 | headers = {} 245 | res = oss.delete_object(bucket, object, headers) 246 | if (res.status / 100) == 2: 247 | print "delete_object OK" 248 | else: 249 | print "delete_object ERROR" 250 | print sep 251 | 252 | #删除bucket 253 | res = oss.delete_bucket(bucket) 254 | if (res.status / 100) == 2: 255 | print "delete bucket ", bucket, "OK" 256 | else: 257 | print "delete bucket ", bucket, "ERROR" 258 | 259 | print sep 260 | 261 | 262 | -------------------------------------------------------------------------------- /ossync/sdk/oss_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | import urllib 4 | import base64 5 | import hmac 6 | import time 7 | from hashlib import sha1 as sha 8 | import os 9 | import sys 10 | import md5 11 | import StringIO 12 | from threading import Thread 13 | import threading 14 | import ConfigParser 15 | import logging 16 | from logging.handlers import RotatingFileHandler 17 | from xml.sax.saxutils import escape 18 | try: 19 | from oss.oss_xml_handler import * 20 | except: 21 | from oss_xml_handler import * 22 | 23 | #LOG_LEVEL can be one of DEBUG INFO ERROR CRITICAL WARNNING 24 | DEBUG = False 25 | LOG_LEVEL = "DEBUG" 26 | PROVIDER = "OSS" 27 | SELF_DEFINE_HEADER_PREFIX = "x-oss-" 28 | if "AWS" == PROVIDER: 29 | SELF_DEFINE_HEADER_PREFIX = "x-amz-" 30 | 31 | def getlogger(debug=DEBUG, log_level=LOG_LEVEL, log_name="log.txt"): 32 | if not debug: 33 | logger = logging.getLogger('oss') 34 | logger.addHandler(EmptyHandler()) 35 | return logger 36 | logfile = os.path.join(os.getcwd(), log_name) 37 | max_log_size = 100*1024*1024 #Bytes 38 | backup_count = 5 39 | format = \ 40 | "%(asctime)s %(levelname)-8s[%(filename)s:%(lineno)d(%(funcName)s)] %(message)s" 41 | hdlr = RotatingFileHandler(logfile, 42 | mode='a', 43 | maxBytes=max_log_size, 44 | backupCount=backup_count) 45 | formatter = logging.Formatter(format) 46 | hdlr.setFormatter(formatter) 47 | logger = logging.getLogger("oss") 48 | logger.addHandler(hdlr) 49 | if "DEBUG" == log_level.upper(): 50 | logger.setLevel(logging.DEBUG) 51 | elif "INFO" == log_level.upper(): 52 | logger.setLevel(logging.INFO) 53 | elif "WARNING" == log_level.upper(): 54 | logger.setLevel(logging.WARNING) 55 | elif "ERROR" == log_level.upper(): 56 | logger.setLevel(logging.ERROR) 57 | elif "CRITICAL" == log_level.upper(): 58 | logger.setLevel(logging.CRITICAL) 59 | else: 60 | logger.setLevel(logging.ERROR) 61 | return logger 62 | 63 | class EmptyHandler(logging.Handler): 64 | pass 65 | 66 | def helper_get_host_from_resp(res, bucket): 67 | host = helper_get_host_from_headers(res.getheaders(), bucket) 68 | if not host: 69 | xml = res.read() 70 | host = RedirectXml(xml).Endpoint().strip() 71 | host = helper_get_host_from_endpoint(host, bucket) 72 | return host 73 | 74 | def helper_get_host_from_headers(headers, bucket): 75 | mp = convert_header2map(headers) 76 | location = safe_get_element('location', mp).strip() 77 | #https://bucket.oss.aliyuncs.com or http://oss.aliyuncs.com/bucket 78 | location = location.replace("https://", "").replace("http://", "") 79 | if location.startswith("%s." % bucket): 80 | location = location[len(bucket)+1:] 81 | index = location.find('/') 82 | if index == -1: 83 | return location 84 | return location[:index] 85 | 86 | def helper_get_host_from_endpoint(host, bucket): 87 | index = host.find('/') 88 | if index != -1: 89 | host = host[:index] 90 | index = host.find('\\') 91 | if index != -1: 92 | host = host[:index] 93 | index = host.find(bucket) 94 | if index == 0: 95 | host = host[len(bucket)+1:] 96 | return host 97 | 98 | def check_bucket_valid(bucket): 99 | alphabeta = "abcdefghijklmnopqrstuvwxyz0123456789-" 100 | if len(bucket) < 3 or len(bucket) > 63: 101 | return False 102 | if bucket[-1] == "-" or bucket[-1] == "_": 103 | return False 104 | if not ((bucket[0] >= 'a' and bucket[0] <= 'z') or (bucket[0] >= '0' and bucket[0] <= '9')): 105 | return False 106 | for i in bucket: 107 | if not i in alphabeta: 108 | return False 109 | return True 110 | 111 | ########## function for Authorization ########## 112 | def _format_header(headers=None): 113 | ''' 114 | format the headers that self define 115 | convert the self define headers to lower. 116 | ''' 117 | if not headers: 118 | headers = {} 119 | tmp_headers = {} 120 | for k in headers.keys(): 121 | if isinstance(headers[k], unicode): 122 | headers[k] = headers[k].encode('utf-8') 123 | 124 | if k.lower().startswith(SELF_DEFINE_HEADER_PREFIX): 125 | k_lower = k.lower() 126 | tmp_headers[k_lower] = headers[k] 127 | else: 128 | tmp_headers[k] = headers[k] 129 | return tmp_headers 130 | 131 | def get_assign(secret_access_key, method, headers=None, resource="/", result=None, debug=DEBUG): 132 | ''' 133 | Create the authorization for OSS based on header input. 134 | You should put it into "Authorization" parameter of header. 135 | ''' 136 | if not headers: 137 | headers = {} 138 | if not result: 139 | result = [] 140 | content_md5 = "" 141 | content_type = "" 142 | date = "" 143 | canonicalized_oss_headers = "" 144 | logger = getlogger(debug) 145 | logger.debug("secret_access_key: %s" % secret_access_key) 146 | content_md5 = safe_get_element('Content-MD5', headers) 147 | content_type = safe_get_element('Content-Type', headers) 148 | date = safe_get_element('Date', headers) 149 | canonicalized_resource = resource 150 | tmp_headers = _format_header(headers) 151 | if len(tmp_headers) > 0: 152 | x_header_list = tmp_headers.keys() 153 | x_header_list.sort() 154 | for k in x_header_list: 155 | if k.startswith(SELF_DEFINE_HEADER_PREFIX): 156 | canonicalized_oss_headers += "%s:%s\n" % (k, tmp_headers[k]) 157 | string_to_sign = method + "\n" + content_md5.strip() + "\n" + content_type + "\n" + date + "\n" + canonicalized_oss_headers + canonicalized_resource 158 | result.append(string_to_sign) 159 | logger.debug("method:%s\n content_md5:%s\n content_type:%s\n data:%s\n canonicalized_oss_headers:%s\n canonicalized_resource:%s\n" % (method, content_md5, content_type, date, canonicalized_oss_headers, canonicalized_resource)) 160 | logger.debug("string_to_sign:%s\n \nlength of string_to_sign:%d\n" % (string_to_sign, len(string_to_sign))) 161 | h = hmac.new(secret_access_key, string_to_sign, sha) 162 | sign_result = base64.encodestring(h.digest()).strip() 163 | logger.debug("sign result:%s" % sign_result) 164 | return sign_result 165 | 166 | def get_resource(params=None): 167 | if not params: 168 | return "" 169 | tmp_headers = {} 170 | for k, v in params.items(): 171 | tmp_k = k.lower().strip() 172 | tmp_headers[tmp_k] = v 173 | override_response_list = ['response-content-type', 'response-content-language', \ 174 | 'response-cache-control', 'logging', 'response-content-encoding', \ 175 | 'acl', 'uploadId', 'uploads', 'partNumber', 'group', \ 176 | 'delete', 'website', 'location', 'objectInfo', \ 177 | 'response-expires', 'response-content-disposition'] 178 | override_response_list.sort() 179 | resource = "" 180 | separator = "?" 181 | for i in override_response_list: 182 | if tmp_headers.has_key(i.lower()): 183 | resource += separator 184 | resource += i 185 | tmp_key = str(tmp_headers[i.lower()]) 186 | if len(tmp_key) != 0: 187 | resource += "=" 188 | resource += tmp_key 189 | separator = '&' 190 | return resource 191 | 192 | def append_param(url, params): 193 | ''' 194 | convert the parameters to query string of URI. 195 | ''' 196 | l = [] 197 | for k, v in params.items(): 198 | k = k.replace('_', '-') 199 | if k == 'maxkeys': 200 | k = 'max-keys' 201 | if isinstance(v, unicode): 202 | v = v.encode('utf-8') 203 | if v is not None and v != '': 204 | l.append('%s=%s' % (urllib.quote(k), urllib.quote(str(v)))) 205 | elif k == 'acl': 206 | l.append('%s' % (urllib.quote(k))) 207 | elif v is None or v == '': 208 | l.append('%s' % (urllib.quote(k))) 209 | if len(l): 210 | url = url + '?' + '&'.join(l) 211 | return url 212 | 213 | ############### Construct XML ############### 214 | def create_object_group_msg_xml(part_msg_list=None): 215 | ''' 216 | get information from part_msg_list and covert it to xml. 217 | part_msg_list has special format. 218 | ''' 219 | if not part_msg_list: 220 | part_msg_list = [] 221 | xml_string = r'' 222 | for part in part_msg_list: 223 | if len(part) >= 3: 224 | if isinstance(part[1], unicode): 225 | file_path = part[1].encode('utf-8') 226 | else: 227 | file_path = part[1] 228 | file_path = escape(file_path) 229 | xml_string += r'' 230 | xml_string += r'' + str(part[0]) + r'' 231 | xml_string += r'' + str(file_path) + r'' 232 | xml_string += r'"' + str(part[2]).upper() + r'"' 233 | xml_string += r'' 234 | else: 235 | print "the ", part, " in part_msg_list is not as expected!" 236 | return "" 237 | xml_string += r'' 238 | 239 | return xml_string 240 | 241 | def create_part_xml(part_msg_list=None): 242 | ''' 243 | get information from part_msg_list and covert it to xml. 244 | part_msg_list has special format. 245 | ''' 246 | if not part_msg_list: 247 | part_msg_list = [] 248 | xml_string = r'' 249 | for part in part_msg_list: 250 | if len(part) >= 3: 251 | xml_string += r'' 252 | xml_string += r'' + str(part[0]) + r'' 253 | xml_string += r'"' + str(part[2]).upper() + r'"' 254 | xml_string += r'' 255 | else: 256 | print "the ", part, " in part_msg_list is not as expected!" 257 | return "" 258 | xml_string += r'' 259 | 260 | return xml_string 261 | 262 | def create_delete_object_msg_xml(object_list=None, is_quiet=False, is_defult=False): 263 | ''' 264 | covert object name list to xml. 265 | ''' 266 | if not object_list: 267 | object_list = [] 268 | xml_string = r'' 269 | if not is_defult: 270 | if is_quiet: 271 | xml_string += r'true' 272 | else: 273 | xml_string += r'false' 274 | for object in object_list: 275 | key = object.strip() 276 | if isinstance(object, unicode): 277 | key = object.encode('utf-8') 278 | key = escape(key) 279 | xml_string += r'%s' % key 280 | xml_string += r'' 281 | return xml_string 282 | 283 | ############### operate OSS ############### 284 | def clear_all_object_of_bucket(oss_instance, bucket): 285 | ''' 286 | clean all objects in bucket, after that, it will delete this bucket. 287 | ''' 288 | return clear_all_objects_in_bucket(oss_instance, bucket) 289 | 290 | def clear_all_objects_in_bucket(oss_instance, bucket, delete_marker="", delete_upload_id_marker="", debug=False): 291 | ''' 292 | it will clean all objects in bucket, after that, it will delete this bucket. 293 | 294 | example: 295 | from oss_api import * 296 | host = "" 297 | id = "" 298 | key = "" 299 | oss_instance = OssAPI(host, id, key) 300 | bucket = "leopublicreadprivatewrite" 301 | if clear_all_objects_in_bucket(oss_instance, bucket): 302 | pass 303 | else: 304 | print "clean Fail" 305 | ''' 306 | prefix = "" 307 | delimiter = "" 308 | maxkeys = 1000 309 | delete_all_objects(oss_instance, bucket, prefix, delimiter, delete_marker, maxkeys, debug) 310 | delete_all_parts(oss_instance, bucket, delete_marker, delete_upload_id_marker, debug) 311 | res = oss_instance.delete_bucket(bucket) 312 | if (res.status / 100 != 2 and res.status != 404): 313 | print "clear_all_objects_in_bucket: delete bucket:%s fail, ret:%s, request id:%s" % (bucket, res.status, res.getheader("x-oss-request-id")) 314 | return False 315 | return True 316 | 317 | def delete_all_objects(oss_instance, bucket, prefix="", delimiter="", delete_marker="", maxkeys=1000, debug=False): 318 | marker = delete_marker 319 | delete_obj_num = 0 320 | while 1: 321 | object_list = [] 322 | res = oss_instance.get_bucket(bucket, prefix, marker, delimiter, maxkeys) 323 | if res.status != 200: 324 | break 325 | body = res.read() 326 | (tmp_object_list, marker) = get_object_list_marker_from_xml(body) 327 | for item in tmp_object_list: 328 | object_list.append(item[0]) 329 | 330 | if object_list: 331 | object_list_xml = create_delete_object_msg_xml(object_list) 332 | res = oss_instance.batch_delete_object(bucket, object_list_xml) 333 | if res.status/100 != 2: 334 | if marker: 335 | print "delete_all_objects: batch delete objects in bucket:%s fail, ret:%s, request id:%s, first object:%s, marker:%s" % (bucket, res.status, res.getheader("x-oss-request-id"), object_list[0], marker) 336 | else: 337 | print "delete_all_objects: batch delete objects in bucket:%s fail, ret:%s, request id:%s, first object:%s" % (bucket, res.status, res.getheader("x-oss-request-id"), object_list[0]) 338 | else: 339 | if debug: 340 | delete_obj_num += len(object_list) 341 | if marker: 342 | print "delete_all_objects: Now %s objects deleted, marker:%s" % (delete_obj_num, marker) 343 | else: 344 | print "delete_all_objects: Now %s objects deleted" % (delete_obj_num) 345 | if len(marker) == 0: 346 | break 347 | 348 | def delete_all_parts(oss_instance, bucket, delete_object_marker="", delete_upload_id_marker="", debug=False): 349 | delete_mulitipart_num = 0 350 | marker = delete_object_marker 351 | id_marker = delete_upload_id_marker 352 | while 1: 353 | res = oss_instance.get_all_multipart_uploads(bucket, key_marker=marker, upload_id_marker=id_marker) 354 | if res.status != 200: 355 | break 356 | body = res.read() 357 | hh = GetMultipartUploadsXml(body) 358 | (fl, pl) = hh.list() 359 | for i in fl: 360 | object = i[0] 361 | if isinstance(i[0], unicode): 362 | object = i[0].encode('utf-8') 363 | res = oss_instance.cancel_upload(bucket, object, i[1]) 364 | if (res.status / 100 != 2 and res.status != 404): 365 | print "delete_all_parts: cancel upload object:%s, upload_id:%s FAIL, ret:%s, request-id:%s" % (object, i[1], res.status, res.getheader("x-oss-request-id")) 366 | else: 367 | delete_mulitipart_num += 1 368 | if debug: 369 | print "delete_all_parts: cancel upload object:%s, upload_id:%s OK\nNow %s parts deleted." % (object, i[1], delete_mulitipart_num) 370 | if hh.is_truncated: 371 | marker = hh.next_key_marker 372 | id_marker = hh.next_upload_id_marker 373 | else: 374 | break 375 | if not marker: 376 | break 377 | 378 | def clean_all_bucket(oss_instance): 379 | ''' 380 | it will clean all bucket, including the all objects in bucket. 381 | ''' 382 | res = oss_instance.get_service() 383 | if (res.status / 100) == 2: 384 | h = GetServiceXml(res.read()) 385 | for b in h.bucket_list: 386 | if not clear_all_objects_in_bucket(oss_instance, b.name): 387 | print "clean bucket ", b.name, " failed! in clean_all_bucket" 388 | return False 389 | return True 390 | else: 391 | print "failed! get service in clean_all_bucket return ", res.status 392 | print res.read() 393 | print res.getheaders() 394 | return False 395 | 396 | def pgfs_clear_all_objects_in_bucket(oss_instance, bucket): 397 | ''' 398 | it will clean all objects in bucket, after that, it will delete this bucket. 399 | 400 | example: 401 | from oss_api import * 402 | host = "" 403 | id = "" 404 | key = "" 405 | oss_instance = OssAPI(host, id, key) 406 | bucket = "leopublicreadprivatewrite" 407 | if clear_all_objects_in_bucket(oss_instance, bucket): 408 | pass 409 | else: 410 | print "clean Fail" 411 | ''' 412 | b = GetAllObjects() 413 | b.get_all_object_in_bucket(oss_instance, bucket) 414 | for i in b.object_list: 415 | res = oss_instance.delete_object(bucket, i) 416 | if (res.status / 100 != 2): 417 | print "clear_all_objects_in_bucket: delete object fail, ret is:", res.status, "bucket is:", bucket, "object is: ", i 418 | return False 419 | else: 420 | pass 421 | 422 | res = oss_instance.delete_bucket(bucket) 423 | if (res.status / 100 != 2 and res.status != 404): 424 | print "clear_all_objects_in_bucket: delete bucket fail, ret is: %s, request id is:%s" % (res.status, res.getheader("x-oss-request-id")) 425 | return False 426 | return True 427 | 428 | def pgfs_clean_all_bucket(oss_instance): 429 | ''' 430 | it will clean all bucket, including the all objects in bucket. 431 | ''' 432 | res = oss_instance.get_service() 433 | if (res.status / 100) == 2: 434 | h = GetServiceXml(res.read()) 435 | for b in h.bucket_list: 436 | print b 437 | ''' 438 | if not pgfs_clear_all_objects_in_bucket(oss_instance, b.name): 439 | print "clean bucket ", b.name, " failed! in clean_all_bucket" 440 | return False 441 | ''' 442 | return True 443 | else: 444 | print "failed! get service in clean_all_bucket return ", res.status 445 | print res.read() 446 | print res.getheaders() 447 | return False 448 | 449 | def delete_all_parts_of_object_group(oss, bucket, object_group_name): 450 | res = oss.get_object_group_index(bucket, object_group_name) 451 | if res.status == 200: 452 | body = res.read() 453 | h = GetObjectGroupIndexXml(body) 454 | object_group_index = h.list() 455 | for i in object_group_index: 456 | if len(i) == 4 and len(i[1]) > 0: 457 | part_name = i[1].strip() 458 | res = oss.delete_object(bucket, part_name) 459 | if res.status != 204: 460 | print "delete part ", part_name, " in bucket:", bucket, " failed!" 461 | return False 462 | else: 463 | return False 464 | return True 465 | 466 | class GetAllObjects: 467 | def __init__(self): 468 | self.object_list = [] 469 | 470 | def get_object_in_bucket(self, oss, bucket="", marker="", prefix=""): 471 | object_list = [] 472 | maxkeys = 1000 473 | try: 474 | res = oss.get_bucket(bucket, prefix, marker, maxkeys=maxkeys) 475 | body = res.read() 476 | hh = GetBucketXml(body) 477 | (fl, pl) = hh.list() 478 | if len(fl) != 0: 479 | for i in fl: 480 | if isinstance(i[0], unicode): 481 | object = i[0].encode('utf-8') 482 | object_list.append(object) 483 | 484 | marker = hh.nextmarker 485 | except: 486 | pass 487 | return (object_list, marker) 488 | 489 | def get_all_object_in_bucket(self, oss, bucket="", marker="", prefix=""): 490 | marker2 = "" 491 | while True: 492 | (object_list, marker) = self.get_object_in_bucket(oss, bucket, marker2, prefix) 493 | marker2 = marker 494 | if len(object_list) != 0: 495 | self.object_list.extend(object_list) 496 | 497 | if not marker: 498 | break 499 | def get_all_buckets(oss): 500 | bucket_list = [] 501 | res = oss.get_service() 502 | if res.status == 200: 503 | h = GetServiceXml(res.read()) 504 | for b in h.bucket_list: 505 | bucket_list.append(str(b.name).strip()) 506 | return bucket_list 507 | 508 | def get_object_list_marker_from_xml(body): 509 | #return ([(object_name, object_length, last_modify_time)...], marker) 510 | object_meta_list = [] 511 | next_marker = "" 512 | hh = GetBucketXml(body) 513 | (fl, pl) = hh.list() 514 | if len(fl) != 0: 515 | for i in fl: 516 | if isinstance(i[0], unicode): 517 | object = i[0].encode('utf-8') 518 | else: 519 | object = i[0] 520 | last_modify_time = i[1] 521 | length = i[3] 522 | etag = i[2] 523 | object_meta_list.append((object, length, last_modify_time, etag)) 524 | if hh.is_truncated: 525 | next_marker = hh.nextmarker 526 | return (object_meta_list, next_marker) 527 | 528 | def get_upload_id(oss, bucket, object, headers=None): 529 | ''' 530 | get the upload id of object. 531 | Returns: 532 | string 533 | ''' 534 | if not headers: 535 | headers = {} 536 | upload_id = "" 537 | res = oss.init_multi_upload(bucket, object, headers) 538 | if res.status == 200: 539 | body = res.read() 540 | h = GetInitUploadIdXml(body) 541 | upload_id = h.upload_id 542 | else: 543 | print res.status 544 | print res.getheaders() 545 | print res.read() 546 | return upload_id 547 | 548 | def get_all_upload_id_list(oss, bucket): 549 | ''' 550 | get all upload id of bucket 551 | Returns: 552 | list 553 | ''' 554 | all_upload_id_list = [] 555 | marker = "" 556 | id_marker = "" 557 | while True: 558 | res = oss.get_all_multipart_uploads(bucket, key_marker=marker, upload_id_marker=id_marker) 559 | if res.status != 200: 560 | return all_upload_id_list 561 | 562 | body = res.read() 563 | hh = GetMultipartUploadsXml(body) 564 | (fl, pl) = hh.list() 565 | for i in fl: 566 | all_upload_id_list.append(i) 567 | if hh.is_truncated: 568 | marker = hh.next_key_marker 569 | id_marker = hh.next_upload_id_marker 570 | else: 571 | break 572 | if not marker and not id_marker: 573 | break 574 | return all_upload_id_list 575 | 576 | def get_upload_id_list(oss, bucket, object): 577 | ''' 578 | get all upload id list of one object. 579 | Returns: 580 | list 581 | ''' 582 | upload_id_list = [] 583 | marker = "" 584 | id_marker = "" 585 | while True: 586 | res = oss.get_all_multipart_uploads(bucket, prefix=object, key_marker=marker, upload_id_marker=id_marker) 587 | if res.status != 200: 588 | break 589 | body = res.read() 590 | hh = GetMultipartUploadsXml(body) 591 | (fl, pl) = hh.list() 592 | for i in fl: 593 | upload_id_list.append(i[1]) 594 | if hh.is_truncated: 595 | marker = hh.next_key_marker 596 | id_marker = hh.next_upload_id_marker 597 | else: 598 | break 599 | if not marker: 600 | break 601 | 602 | return upload_id_list 603 | 604 | def get_part_list(oss, bucket, object, upload_id, max_part=""): 605 | ''' 606 | get uploaded part list of object. 607 | Returns: 608 | list 609 | ''' 610 | part_list = [] 611 | marker = "" 612 | while True: 613 | res = oss.get_all_parts(bucket, object, upload_id, part_number_marker = marker, max_parts=max_part) 614 | if res.status != 200: 615 | break 616 | body = res.read() 617 | h = GetPartsXml(body) 618 | part_list.extend(h.list()) 619 | if h.is_truncated: 620 | marker = h.next_part_number_marker 621 | else: 622 | break 623 | if not marker: 624 | break 625 | return part_list 626 | 627 | def get_part_xml(oss, bucket, object, upload_id): 628 | ''' 629 | get uploaded part list of object. 630 | Returns: 631 | string 632 | ''' 633 | part_list = [] 634 | part_list = get_part_list(oss, bucket, object, upload_id) 635 | xml_string = r'' 636 | for part in part_list: 637 | xml_string += r'' 638 | xml_string += r'' + str(part[0]) + r'' 639 | xml_string += r'' + part[1] + r'' 640 | xml_string += r'' 641 | xml_string += r'' 642 | return xml_string 643 | 644 | def get_part_map(oss, bucket, object, upload_id): 645 | part_list = [] 646 | part_list = get_part_list(oss, bucket, object, upload_id) 647 | part_map = {} 648 | for part in part_list: 649 | part_map[str(part[0])] = part[1] 650 | return part_map 651 | 652 | ########## multi-thread ########## 653 | class DeleteObjectWorker(Thread): 654 | def __init__(self, oss, bucket, part_msg_list, retry_times=5): 655 | Thread.__init__(self) 656 | self.oss = oss 657 | self.bucket = bucket 658 | self.part_msg_list = part_msg_list 659 | self.retry_times = retry_times 660 | 661 | def run(self): 662 | bucket = self.bucket 663 | object_list = self.part_msg_list 664 | step = 1000 665 | begin = 0 666 | end = 0 667 | total_length = len(object_list) 668 | remain_length = total_length 669 | while True: 670 | if remain_length > step: 671 | end = begin + step 672 | elif remain_length > 0: 673 | end = begin + remain_length 674 | else: 675 | break 676 | is_fail = True 677 | retry_times = self.retry_times 678 | while True: 679 | try: 680 | if retry_times <= 0: 681 | break 682 | res = self.oss.delete_objects(bucket, object_list[begin:end]) 683 | if res.status / 100 == 2: 684 | is_fail = False 685 | break 686 | except: 687 | retry_times = retry_times - 1 688 | time.sleep(1) 689 | if is_fail: 690 | print "delete object_list[%s:%s] failed!, first is %s" % (begin, end, object_list[begin]) 691 | begin = end 692 | remain_length = remain_length - step 693 | 694 | class PutObjectGroupWorker(Thread): 695 | def __init__(self, oss, bucket, file_path, part_msg_list, retry_times=5): 696 | Thread.__init__(self) 697 | self.oss = oss 698 | self.bucket = bucket 699 | self.part_msg_list = part_msg_list 700 | self.file_path = file_path 701 | self.retry_times = retry_times 702 | 703 | def run(self): 704 | for part in self.part_msg_list: 705 | if len(part) == 5: 706 | bucket = self.bucket 707 | file_name = part[1] 708 | if isinstance(file_name, unicode): 709 | filename = file_name.encode('utf-8') 710 | object_name = file_name 711 | retry_times = self.retry_times 712 | is_skip = False 713 | while True: 714 | try: 715 | if retry_times <= 0: 716 | break 717 | res = self.oss.head_object(bucket, object_name) 718 | if res.status == 200: 719 | header_map = convert_header2map(res.getheaders()) 720 | etag = safe_get_element("etag", header_map) 721 | md5 = part[2] 722 | if etag.replace('"', "").upper() == md5.upper(): 723 | is_skip = True 724 | break 725 | except: 726 | retry_times = retry_times - 1 727 | time.sleep(1) 728 | 729 | if is_skip: 730 | continue 731 | 732 | partsize = part[3] 733 | offset = part[4] 734 | retry_times = self.retry_times 735 | while True: 736 | try: 737 | if retry_times <= 0: 738 | break 739 | res = self.oss.put_object_from_file_given_pos(bucket, object_name, self.file_path, offset, partsize) 740 | if res.status != 200: 741 | print "upload ", file_name, "failed!", " ret is:", res.status 742 | print "headers", res.getheaders() 743 | retry_times = retry_times - 1 744 | time.sleep(1) 745 | else: 746 | break 747 | except: 748 | retry_times = retry_times - 1 749 | time.sleep(1) 750 | 751 | else: 752 | print "ERROR! part", part , " is not as expected!" 753 | 754 | class UploadPartWorker(Thread): 755 | def __init__(self, oss, bucket, object, upoload_id, file_path, part_msg_list, uploaded_part_map, retry_times=5, debug=DEBUG): 756 | Thread.__init__(self) 757 | self.oss = oss 758 | self.bucket = bucket 759 | self.object = object 760 | self.part_msg_list = part_msg_list 761 | self.file_path = file_path 762 | self.upload_id = upoload_id 763 | self.uploaded_part_map = uploaded_part_map 764 | self.retry_times = retry_times 765 | self.logger = getlogger(debug) 766 | 767 | def run(self): 768 | for part in self.part_msg_list: 769 | part_number = str(part[0]) 770 | if len(part) == 5: 771 | bucket = self.bucket 772 | object = self.object 773 | if self.uploaded_part_map.has_key(part_number): 774 | md5 = part[2] 775 | if self.uploaded_part_map[part_number].replace('"', "").upper() == md5.upper(): 776 | continue 777 | 778 | partsize = part[3] 779 | offset = part[4] 780 | retry_times = self.retry_times 781 | while True: 782 | try: 783 | if retry_times <= 0: 784 | break 785 | res = self.oss.upload_part_from_file_given_pos(bucket, object, self.file_path, offset, partsize, self.upload_id, part_number) 786 | if res.status != 200: 787 | self.logger.warn("Upload %s/%s from %s, failed! ret is:%s." %(bucket, object, self.file_path, res.status)) 788 | self.logger.warn("headers:%s" % res.getheaders()) 789 | retry_times = retry_times - 1 790 | time.sleep(1) 791 | else: 792 | self.logger.info("Upload %s/%s from %s, OK! ret is:%s." % (bucket, object, self.file_path, res.status)) 793 | break 794 | except: 795 | retry_times = retry_times - 1 796 | time.sleep(1) 797 | else: 798 | self.logger.error("ERROR! part %s is not as expected!" % part) 799 | 800 | class MultiGetWorker(Thread): 801 | def __init__(self, oss, bucket, object, file, start, end, retry_times=5): 802 | Thread.__init__(self) 803 | self.oss = oss 804 | self.bucket = bucket 805 | self.object = object 806 | self.startpos = start 807 | self.endpos = end 808 | self.file = file 809 | self.length = self.endpos - self.startpos + 1 810 | self.need_read = 0 811 | self.get_buffer_size = 10*1024*1024 812 | self.retry_times = retry_times 813 | 814 | def run(self): 815 | if self.startpos >= self.endpos: 816 | return 817 | 818 | retry_times = 0 819 | while True: 820 | headers = {} 821 | self.file.seek(self.startpos) 822 | headers['Range'] = 'bytes=%d-%d' % (self.startpos, self.endpos) 823 | try: 824 | res = self.oss.object_operation("GET", self.bucket, self.object, headers) 825 | if res.status == 206: 826 | while self.need_read < self.length: 827 | left_len = self.length - self.need_read 828 | if left_len > self.get_buffer_size: 829 | content = res.read(self.get_buffer_size) 830 | else: 831 | content = res.read(left_len) 832 | if content: 833 | self.need_read += len(content) 834 | self.file.write(content) 835 | else: 836 | break 837 | break 838 | except: 839 | pass 840 | retry_times += 1 841 | if retry_times > self.retry_times: 842 | print "ERROR, reach max retry times:%s when multi get /%s/%s" % (self.retry_times, self.bucket, self.object) 843 | break 844 | 845 | self.file.flush() 846 | self.file.close() 847 | 848 | ############### misc ############### 849 | 850 | def split_large_file(file_path, object_prefix="", max_part_num=1000, part_size=10*1024*1024, buffer_size=10*1024): 851 | parts_list = [] 852 | 853 | if os.path.isfile(file_path): 854 | file_size = os.path.getsize(file_path) 855 | 856 | if file_size > part_size * max_part_num: 857 | part_size = (file_size + max_part_num - file_size % max_part_num) / max_part_num 858 | 859 | part_order = 1 860 | fp = open(file_path, 'rb') 861 | fp.seek(os.SEEK_SET) 862 | 863 | part_num = (file_size + part_size - 1) / part_size 864 | 865 | for i in xrange(0, part_num): 866 | left_len = part_size 867 | real_part_size = 0 868 | m = md5.new() 869 | offset = part_size * i 870 | while True: 871 | read_size = 0 872 | if left_len <= 0: 873 | break 874 | elif left_len < buffer_size: 875 | read_size = left_len 876 | else: 877 | read_size = buffer_size 878 | 879 | buffer_content = fp.read(read_size) 880 | m.update(buffer_content) 881 | real_part_size += len(buffer_content) 882 | 883 | left_len = left_len - read_size 884 | 885 | md5sum = m.hexdigest() 886 | 887 | temp_file_name = os.path.basename(file_path) + "_" + str(part_order) 888 | if isinstance(object_prefix, unicode): 889 | object_prefix = object_prefix.encode('utf-8') 890 | if not object_prefix: 891 | file_name = sum_string(temp_file_name) + "_" + temp_file_name 892 | else: 893 | file_name = object_prefix + "/" + sum_string(temp_file_name) + "_" + temp_file_name 894 | part_msg = (part_order, file_name, md5sum, real_part_size, offset) 895 | parts_list.append(part_msg) 896 | part_order += 1 897 | 898 | fp.close() 899 | else: 900 | print "ERROR! No file: ", file_path, ", please check." 901 | 902 | return parts_list 903 | 904 | def sumfile(fobj): 905 | '''Returns an md5 hash for an object with read() method.''' 906 | m = md5.new() 907 | while True: 908 | d = fobj.read(8096) 909 | if not d: 910 | break 911 | m.update(d) 912 | return m.hexdigest() 913 | 914 | def md5sum(fname): 915 | '''Returns an md5 hash for file fname, or stdin if fname is "-".''' 916 | if fname == '-': 917 | ret = sumfile(sys.stdin) 918 | else: 919 | try: 920 | f = file(fname, 'rb') 921 | except: 922 | return 'Failed to open file' 923 | ret = sumfile(f) 924 | f.close() 925 | return ret 926 | 927 | def md5sum2(filename, offset=0, partsize=0): 928 | m = md5.new() 929 | fp = open(filename, 'rb') 930 | if offset > os.path.getsize(filename): 931 | fp.seek(os.SEEK_SET, os.SEEK_END) 932 | else: 933 | fp.seek(offset) 934 | 935 | left_len = partsize 936 | BufferSize = 8 * 1024 937 | while True: 938 | if left_len <= 0: 939 | break 940 | elif left_len < BufferSize: 941 | buffer_content = fp.read(left_len) 942 | else: 943 | buffer_content = fp.read(BufferSize) 944 | m.update(buffer_content) 945 | left_len = left_len - len(buffer_content) 946 | md5sum = m.hexdigest() 947 | return md5sum 948 | 949 | def sum_string(content): 950 | f = StringIO.StringIO(content) 951 | md5sum = sumfile(f) 952 | f.close() 953 | return md5sum 954 | 955 | def convert_header2map(header_list): 956 | header_map = {} 957 | for (a, b) in header_list: 958 | header_map[a] = b 959 | return header_map 960 | 961 | def safe_get_element(name, container): 962 | for k, v in container.items(): 963 | if k.strip().lower() == name.strip().lower(): 964 | return v 965 | return "" 966 | 967 | def get_content_type_by_filename(file_name): 968 | suffix = "" 969 | name = os.path.basename(file_name) 970 | suffix = name.split('.')[-1] 971 | mime_type = "" 972 | try: 973 | import mimetypes 974 | mimetypes.init() 975 | mime_type = mimetypes.types_map["." + suffix] 976 | except Exception: 977 | mime_type = 'application/octet-stream' 978 | return mime_type 979 | 980 | def smart_code(input_stream): 981 | if isinstance(input_stream, str): 982 | try: 983 | tmp = unicode(input_stream, 'utf-8') 984 | except UnicodeDecodeError: 985 | try: 986 | tmp = unicode(input_stream, 'gbk') 987 | except UnicodeDecodeError: 988 | try: 989 | tmp = unicode(input_stream, 'big5') 990 | except UnicodeDecodeError: 991 | try: 992 | tmp = unicode(input_stream, 'ascii') 993 | except: 994 | tmp = input_stream 995 | else: 996 | tmp = input_stream 997 | return tmp 998 | 999 | def is_ip(s): 1000 | try: 1001 | tmp_list = s.split(':') 1002 | s = tmp_list[0] 1003 | if s == 'localhost': 1004 | return True 1005 | tmp_list = s.split('.') 1006 | if len(tmp_list) != 4: 1007 | return False 1008 | else: 1009 | for i in tmp_list: 1010 | if int(i) < 0 or int(i) > 255: 1011 | return False 1012 | except: 1013 | return False 1014 | return True 1015 | 1016 | def get_second_level_domain(host): 1017 | if is_ip(host): 1018 | return host 1019 | else: 1020 | tmp_list = host.split('.') 1021 | if len(tmp_list) >= 4: 1022 | return ".".join(tmp_list[-3:]) 1023 | return host 1024 | 1025 | if __name__ == '__main__': 1026 | pass 1027 | -------------------------------------------------------------------------------- /ossync/sdk/oss_xml_handler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | from xml.dom import minidom 4 | 5 | def get_tag_text(element, tag): 6 | nodes = element.getElementsByTagName(tag) 7 | if len(nodes) == 0: 8 | return "" 9 | else: 10 | node = nodes[0] 11 | rc = "" 12 | for node in node.childNodes: 13 | if node.nodeType in ( node.TEXT_NODE, node.CDATA_SECTION_NODE): 14 | rc = rc + node.data 15 | if rc == "true": 16 | return True 17 | elif rc == "false": 18 | return False 19 | return rc 20 | 21 | class ErrorXml: 22 | def __init__(self, xml_string): 23 | self.xml = minidom.parseString(xml_string) 24 | self.code = get_tag_text(self.xml, 'Code') 25 | self.msg = get_tag_text(self.xml, 'Message') 26 | self.resource = get_tag_text(self.xml, 'Resource') 27 | self.request_id = get_tag_text(self.xml, 'RequestId') 28 | self.host_id = get_tag_text(self.xml, 'HostId') 29 | 30 | def show(self): 31 | print "Code: %s\nMessage: %s\nResource: %s\nRequestId: %s \nHostId: %s" % (self.code, self.msg, self.resource, self.request_id, self.host_id) 32 | 33 | class Owner: 34 | def __init__(self, xml_element): 35 | self.element = xml_element 36 | self.id = get_tag_text(self.element, "ID") 37 | self.display_name = get_tag_text(self.element, "DisplayName") 38 | 39 | def show(self): 40 | print "ID: %s\nDisplayName: %s" % (self.id, self.display_name) 41 | 42 | class Bucket: 43 | def __init__(self, xml_element): 44 | self.element = xml_element 45 | self.location = get_tag_text(self.element, "Location") 46 | self.name = get_tag_text(self.element, "Name") 47 | self.creation_date = get_tag_text(self.element, "CreationDate") 48 | 49 | def show(self): 50 | print "Name: %s\nCreationDate: %s\nLocation: %s" % (self.name, self.creation_date, self.location) 51 | 52 | class GetServiceXml: 53 | def __init__(self, xml_string): 54 | self.xml = minidom.parseString(xml_string) 55 | self.owner = Owner(self.xml.getElementsByTagName('Owner')[0]) 56 | self.buckets = self.xml.getElementsByTagName('Bucket') 57 | self.bucket_list = [] 58 | for b in self.buckets: 59 | self.bucket_list.append(Bucket(b)) 60 | 61 | def show(self): 62 | print "Owner:" 63 | self.owner.show() 64 | print "\nBucket list:" 65 | for b in self.bucket_list: 66 | b.show() 67 | print "" 68 | 69 | def list(self): 70 | bl = [] 71 | for b in self.bucket_list: 72 | bl.append((b.name, b.creation_date, b.location)) 73 | return bl 74 | 75 | class Content: 76 | def __init__(self, xml_element): 77 | self.element = xml_element 78 | self.key = get_tag_text(self.element, "Key") 79 | self.last_modified = get_tag_text(self.element, "LastModified") 80 | self.etag = get_tag_text(self.element, "ETag") 81 | self.size = get_tag_text(self.element, "Size") 82 | self.owner = Owner(self.element.getElementsByTagName('Owner')[0]) 83 | self.storage_class = get_tag_text(self.element, "StorageClass") 84 | 85 | def show(self): 86 | print "Key: %s\nLastModified: %s\nETag: %s\nSize: %s\nStorageClass: %s" % (self.key, self.last_modified, self.etag, self.size, self.storage_class) 87 | self.owner.show() 88 | 89 | class Part: 90 | def __init__(self, xml_element): 91 | self.element = xml_element 92 | self.part_num = get_tag_text(self.element, "PartNumber") 93 | self.object_name = get_tag_text(self.element, "PartName") 94 | self.object_size = get_tag_text(self.element, "PartSize") 95 | self.etag = get_tag_text(self.element, "ETag") 96 | 97 | def show(self): 98 | print "PartNumber: %s\nPartName: %s\nPartSize: %s\nETag: %s\n" % (self.part_num, self.object_name, self.object_size, self.etag) 99 | 100 | class PostObjectGroupXml: 101 | def __init__(self, xml_string): 102 | self.xml = minidom.parseString(xml_string) 103 | self.bucket = get_tag_text(self.xml, 'Bucket') 104 | self.key = get_tag_text(self.xml, 'Key') 105 | self.size = get_tag_text(self.xml, 'Size') 106 | self.etag = get_tag_text(self.xml, "ETag") 107 | 108 | def show(self): 109 | print "Post Object Group, Bucket: %s\nKey: %s\nSize: %s\nETag: %s" % (self.bucket, self.key, self.size, self.etag) 110 | 111 | class GetObjectGroupIndexXml: 112 | def __init__(self, xml_string): 113 | self.xml = minidom.parseString(xml_string) 114 | self.bucket = get_tag_text(self.xml, 'Bucket') 115 | self.key = get_tag_text(self.xml, 'Key') 116 | self.etag = get_tag_text(self.xml, 'Etag') 117 | self.file_length = get_tag_text(self.xml, 'FileLength') 118 | self.index_list = [] 119 | index_lists = self.xml.getElementsByTagName('Part') 120 | for i in index_lists: 121 | self.index_list.append(Part(i)) 122 | 123 | def list(self): 124 | index_list = [] 125 | for i in self.index_list: 126 | index_list.append((i.part_num, i.object_name, i.object_size, i.etag)) 127 | return index_list 128 | 129 | def show(self): 130 | print "Bucket: %s\nObject: %s\nEtag: %s\nObjectSize: %s" % (self.bucket, self.key, self.etag, self.file_length) 131 | print "\nPart list:" 132 | for p in self.index_list: 133 | p.show() 134 | 135 | class GetBucketXml: 136 | def __init__(self, xml_string): 137 | self.xml = minidom.parseString(xml_string) 138 | self.name = get_tag_text(self.xml, 'Name') 139 | self.prefix = get_tag_text(self.xml, 'Prefix') 140 | self.marker = get_tag_text(self.xml, 'Marker') 141 | self.nextmarker = get_tag_text(self.xml, 'NextMarker') 142 | self.maxkeys = get_tag_text(self.xml, 'MaxKeys') 143 | self.delimiter = get_tag_text(self.xml, 'Delimiter') 144 | self.is_truncated = get_tag_text(self.xml, 'IsTruncated') 145 | 146 | self.prefix_list = [] 147 | prefixes = self.xml.getElementsByTagName('CommonPrefixes') 148 | for p in prefixes: 149 | tag_txt = get_tag_text(p, "Prefix") 150 | self.prefix_list.append(tag_txt) 151 | 152 | self.content_list = [] 153 | contents = self.xml.getElementsByTagName('Contents') 154 | for c in contents: 155 | self.content_list.append(Content(c)) 156 | 157 | def show(self): 158 | print "Name: %s\nPrefix: %s\nMarker: %s\nNextMarker: %s\nMaxKeys: %s\nDelimiter: %s\nIsTruncated: %s" % (self.name, self.prefix, self.marker, self.nextmarker, self.maxkeys, self.delimiter, self.is_truncated) 159 | print "\nPrefix list:" 160 | for p in self.prefix_list: 161 | print p 162 | print "\nContent list:" 163 | for c in self.content_list: 164 | c.show() 165 | print "" 166 | 167 | def list(self): 168 | cl = [] 169 | pl = [] 170 | for c in self.content_list: 171 | cl.append((c.key, c.last_modified, c.etag, c.size, c.owner.id, c.owner.display_name, c.storage_class)) 172 | for p in self.prefix_list: 173 | pl.append(p) 174 | 175 | return (cl, pl) 176 | 177 | class GetBucketAclXml: 178 | def __init__(self, xml_string): 179 | self.xml = minidom.parseString(xml_string) 180 | if len(self.xml.getElementsByTagName('Owner')) != 0: 181 | self.owner = Owner(self.xml.getElementsByTagName('Owner')[0]) 182 | else: 183 | self.owner = "" 184 | self.grant = get_tag_text(self.xml, 'Grant') 185 | 186 | def show(self): 187 | print "Owner Name: %s\nOwner ID: %s\nGrant: %s" % (self.owner.id, self.owner.display_name, self.grant) 188 | 189 | class GetBucketLocationXml: 190 | def __init__(self, xml_string): 191 | self.xml = minidom.parseString(xml_string) 192 | self.location = get_tag_text(self.xml, 'LocationConstraint') 193 | 194 | def show(self): 195 | print "LocationConstraint: %s" % (self.location) 196 | 197 | class GetInitUploadIdXml: 198 | def __init__(self, xml_string): 199 | self.xml = minidom.parseString(xml_string) 200 | self.bucket = get_tag_text(self.xml, 'Bucket') 201 | self.object = get_tag_text(self.xml, 'Key') 202 | self.key = get_tag_text(self.xml, 'Key') 203 | self.upload_id = get_tag_text(self.xml, 'UploadId') 204 | self.marker = get_tag_text(self.xml, 'Marker') 205 | 206 | def show(self): 207 | print " " 208 | 209 | class Upload: 210 | def __init__(self, xml_element): 211 | self.element = xml_element 212 | self.key = get_tag_text(self.element, "Key") 213 | self.upload_id = get_tag_text(self.element, "UploadId") 214 | 215 | class GetMultipartUploadsXml: 216 | def __init__(self, xml_string): 217 | self.xml = minidom.parseString(xml_string) 218 | self.bucket = get_tag_text(self.xml, 'Bucket') 219 | self.key_marker = get_tag_text(self.xml, 'KeyMarker') 220 | self.upload_id_marker = get_tag_text(self.xml, 'UploadIdMarker') 221 | self.next_key_marker = get_tag_text(self.xml, 'NextKeyMarker') 222 | self.next_upload_id_marker = get_tag_text(self.xml, 'NextUploadIdMarker') 223 | self.delimiter = get_tag_text(self.xml, 'Delimiter') 224 | self.prefix = get_tag_text(self.xml, 'Prefix') 225 | self.max_uploads = get_tag_text(self.xml, 'MaxUploads') 226 | self.is_truncated = get_tag_text(self.xml, 'IsTruncated') 227 | 228 | self.prefix_list = [] 229 | prefixes = self.xml.getElementsByTagName('CommonPrefixes') 230 | for p in prefixes: 231 | tag_txt = get_tag_text(p, "Prefix") 232 | self.prefix_list.append(tag_txt) 233 | 234 | self.content_list = [] 235 | contents = self.xml.getElementsByTagName('Upload') 236 | for c in contents: 237 | self.content_list.append(Upload(c)) 238 | 239 | def list(self): 240 | cl = [] 241 | pl = [] 242 | for c in self.content_list: 243 | cl.append((c.key, c.upload_id)) 244 | for p in self.prefix_list: 245 | pl.append(p) 246 | 247 | return (cl, pl) 248 | 249 | class MultiPart: 250 | def __init__(self, xml_element): 251 | self.element = xml_element 252 | self.part_number = get_tag_text(self.element, 'PartNumber') 253 | self.last_modified = get_tag_text(self.element, 'LastModified') 254 | self.etag = get_tag_text(self.element, 'ETag') 255 | self.size = get_tag_text(self.element, 'Size') 256 | 257 | class GetPartsXml: 258 | def __init__(self, xml_string): 259 | self.xml = minidom.parseString(xml_string) 260 | self.bucket = get_tag_text(self.xml, 'Bucket') 261 | self.key = get_tag_text(self.xml, 'Key') 262 | self.upload_id = get_tag_text(self.xml, 'UploadId') 263 | self.storage_class = get_tag_text(self.xml, 'StorageClass') 264 | self.next_part_number_marker = get_tag_text(self.xml, 'NextPartNumberMarker') 265 | self.max_parts = get_tag_text(self.xml, 'MaxParts') 266 | self.is_truncated = get_tag_text(self.xml, 'IsTruncated') 267 | self.part_number_marker = get_tag_text(self.xml, 'PartNumberMarker') 268 | 269 | self.content_list = [] 270 | contents = self.xml.getElementsByTagName('Part') 271 | for c in contents: 272 | self.content_list.append(MultiPart(c)) 273 | 274 | def list(self): 275 | cl = [] 276 | for c in self.content_list: 277 | cl.append((c.part_number, c.etag, c.size, c.last_modified)) 278 | return cl 279 | 280 | class CompleteUploadXml: 281 | def __init__(self, xml_string): 282 | self.xml = minidom.parseString(xml_string) 283 | self.location = get_tag_text(self.xml, 'Location') 284 | self.bucket = get_tag_text(self.xml, 'Bucket') 285 | self.key = get_tag_text(self.xml, 'Key') 286 | self.etag = get_tag_text(self.xml, "ETag") 287 | 288 | class DeletedObjectsXml: 289 | def __init__(self, xml_string): 290 | self.xml = minidom.parseString(xml_string) 291 | contents = self.xml.getElementsByTagName('Deleted') 292 | self.content_list = [] 293 | for c in contents: 294 | self.content_list.append(get_tag_text(c, 'Key')) 295 | def list(self): 296 | cl = [] 297 | for c in self.content_list: 298 | cl.append(c) 299 | return cl 300 | 301 | class CnameInfoPart: 302 | def __init__(self, xml_element): 303 | self.element = xml_element 304 | self.cname = get_tag_text(self.element, 'Cname') 305 | self.bucket = get_tag_text(self.element, 'Bucket') 306 | self.status = get_tag_text(self.element, 'Status') 307 | self.lastmodifytime = get_tag_text(self.element, 'LastModifyTime') 308 | 309 | class CnameToBucketXml: 310 | def __init__(self, xml_string): 311 | self.xml = minidom.parseString(xml_string) 312 | self.content_list = [] 313 | contents = self.xml.getElementsByTagName('CnameInfo') 314 | for c in contents: 315 | self.content_list.append(CnameInfoPart(c)) 316 | 317 | def list(self): 318 | cl = [] 319 | for c in self.content_list: 320 | cl.append((c.cname, c.bucket, c.status, c.lastmodifytime)) 321 | return cl 322 | 323 | class RedirectXml: 324 | def __init__(self, xml_string): 325 | self.xml = minidom.parseString(xml_string) 326 | self.endpoint = get_tag_text(self.xml, 'Endpoint') 327 | def Endpoint(self): 328 | return self.endpoint 329 | 330 | if __name__ == "__main__": 331 | pass 332 | -------------------------------------------------------------------------------- /ossync/sdk/pkg_info.py: -------------------------------------------------------------------------------- 1 | package = "oss" 2 | version = "0.1.3" 3 | url = "http://oss.aliyun.com" 4 | license = "GPL version 2" 5 | short_description = "Command line tool for managing Aliyun Open Storage Service." 6 | long_description = """ 7 | osscmd lets you create/delete/list bucket and upload/download/copy/delete file from/to 8 | Aliyun OSS (Open Storage Service). 9 | """ 10 | 11 | -------------------------------------------------------------------------------- /queue_thread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os, threading, logging 24 | import os.path 25 | from Queue import * 26 | import hashlib 27 | from ossync.lib import helper 28 | from ossync.lib import queue_model 29 | from config.setting import * 30 | import time 31 | 32 | class QueueThread(threading.Thread): 33 | 34 | """ 此线程的作用是将bucket,root, path压入要上传的队列,队列元素格式: 35 | "bucket::root::relpath::action::life" 36 | 其中action表示文件是新建还是修改还是删除;life表示重入次数 37 | """ 38 | def __init__(self, oss_mappers, queue, *args, **kwargs): 39 | threading.Thread.__init__(self, *args, **kwargs) 40 | self.oss_mappers = oss_mappers 41 | self.queue = queue 42 | self._terminate = False 43 | self.logger = logging.getLogger('app') 44 | dbpath = DB_PATH 45 | self.qm = queue_model.QueueModel(dbpath) 46 | 47 | def terminate(self): 48 | self._terminate = True 49 | 50 | def queue_folders(self, bucket, folders): 51 | """将目录中的文件解析成队列元素并压入队列""" 52 | files = {} 53 | elements = [] 54 | for d in folders: 55 | files[d] = list(helper.walk_files(os.path.normpath(d), yield_folders = True)) 56 | if len(files) > 0: 57 | for k in files: 58 | if len(files[k]) > 0: 59 | for path in files[k]: 60 | self.queue_el(bucket, k, path) 61 | 62 | def queue_el(self, bucket, root, path): 63 | """根据bucket和root以及路径生成队列元素""" 64 | relpath = os.path.relpath(path, root) # 相对于root的相对路径 65 | filehash = "" 66 | if os.path.isfile(path): 67 | filehash = helper.calc_file_md5(path) 68 | hashcode = helper.calc_el_md5(root, relpath, bucket, filehash) 69 | el = bucket + '::' + root + '::' + relpath + '::C' + '::' + hashcode 70 | if not self.is_el_queued(hashcode): 71 | data={"root": root, "relpath": relpath, "bucket": bucket, "action": 'C', "status": 0, "hashcode": hashcode, "retries" : 0} 72 | 73 | try: 74 | self.qm.save(data) 75 | self.queue.put(el, block = True, timeout = 1) 76 | msg = 'queue element:' + el 77 | #print msg 78 | self.logger.info(msg) 79 | except Full as e: 80 | self.logger.error(e.message) 81 | 82 | 83 | def is_el_queued(self, hashcode): 84 | try: 85 | row = self.qm.get(hashcode) 86 | if row: 87 | return True 88 | return False 89 | except Exception as e: 90 | self.logger.error(e.message) 91 | return False 92 | 93 | def run(self): 94 | if self.oss_mappers == None or len(self.oss_mappers) == 0: 95 | self.queue.put(None) 96 | return 97 | self.qm.open() 98 | for oss_mapper in self.oss_mappers: 99 | bucket = oss_mapper['bucket'] 100 | local_folders = oss_mapper['local_folders'] 101 | if(len(bucket) > 0 and len(local_folders) > 0): 102 | self.queue_folders(bucket, local_folders) 103 | self.qm.close() 104 | time.sleep(1) 105 | self.queue.put(None) 106 | #self.queue.join() 107 | return 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os 24 | import sys 25 | 26 | # check if python version >= 2.6 and < 3.0 27 | if sys.version_info < (2, 6): 28 | sys.stderr.write("Sorry, OSSync requires at least Python 2.6\n") 29 | sys.exit(0) 30 | if sys.version_info >= (3, 0): 31 | sys.stderr.write("Sorry, Python 3.0+ is unsupported at present。\n") 32 | sys.exit(0) 33 | 34 | # check if linux kernel supports inotify 35 | #if not os.path.exists("/proc/sys/fs/inotify"): 36 | # sys.stderr.write("Sorry, your linux kernel doesn't support inotify。\n") 37 | # sys.exit(0) 38 | 39 | #print "Start to install necessary modules ..." 40 | # check if pip has been installed 41 | #excode = os.system("pip --version") 42 | #if excode > 0: 43 | # try to install pip 44 | # os.system("sudo curl http://python-distribute.org/distribute_setup.py | python") 45 | # os.system("curl https://raw.github.com/pypa/pip/master/contrib/get-pip.py | python") 46 | # clean temp files 47 | # os.system("rm -f distribute*.tar.gz") 48 | 49 | # try to install pyinotify 50 | #os.system("sudo pip install pyinotify") 51 | 52 | # check if pyinotify has been installed 53 | #try: 54 | # import pyinotify 55 | print "Installation complete successfully!" 56 | #except ImportError as e: 57 | # sys.stderr.write("Sorry, Installation pyinotify module failure! Please try to install it manually。\n") 58 | # sys.exit(0) 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /sync_thread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os, sys, threading 24 | import logging 25 | import hashlib 26 | from Queue import * 27 | from ossync.lib import queue_model 28 | from ossync.lib import helper 29 | import time 30 | try: 31 | from ossync.sdk.oss_api import * 32 | except: 33 | from ossync.oss_api import * 34 | try: 35 | from ossync.sdk.oss_xml_handler import * 36 | except: 37 | from ossync.oss_xml_handler import * 38 | from config.setting import * 39 | 40 | LARGE_FILE_SIZE = 10000000 # File larege than 2M will be depart small parts 41 | 42 | class SyncThread(threading.Thread): 43 | def __init__(self, oss, queue, *args, **kwargs): 44 | threading.Thread.__init__(self, *args, **kwargs) 45 | self.queue = queue 46 | self.oss = oss 47 | self._terminate = False 48 | self.logger = logging.getLogger('app') 49 | dbpath = DB_PATH 50 | self.qm = queue_model.QueueModel(dbpath) 51 | 52 | 53 | def terminate(self): 54 | self._terminate = True 55 | 56 | def upload(self, bucket, oss_obj_name, filename): 57 | if not os.path.lexists(filename): 58 | return None 59 | success = False 60 | if(os.path.isdir(filename)): 61 | oss_obj_name += '/' 62 | res = self.oss.put_object_with_data(bucket = bucket, object = oss_obj_name, input_content = '') 63 | if (res.status / 100) == 2: 64 | success = True 65 | else: 66 | file_size = os.path.getsize(filename) 67 | if file_size > LARGE_FILE_SIZE: 68 | is_large_file = True 69 | res = self.oss.upload_large_file(bucket = bucket, object = oss_obj_name, filename = filename) 70 | else: 71 | is_large_file = False 72 | res = self.oss.put_object_from_file(bucket = bucket, object = oss_obj_name, filename = filename) 73 | filehash = helper.calc_file_md5(filename) 74 | header_map = convert_header2map(res.getheaders()) 75 | etag = safe_get_element("etag", header_map).upper().replace('"', '') 76 | if (res.status / 100) == 2: 77 | if is_large_file == False: 78 | if filehash.upper() == etag: 79 | success = True 80 | else: 81 | success = False 82 | else: 83 | success = True 84 | return success 85 | 86 | def exists_oss_object(self, bucket, oss_obj_name): 87 | headers = {} 88 | res = self.oss.head_object(bucket, oss_obj_name, headers) 89 | if (res.status / 100) == 2: 90 | return True 91 | else: 92 | return False 93 | 94 | def walk_bucket(self, bucket, prefix, marker, delimiter, maxkeys, headers, result = []): 95 | res = self.oss.get_bucket(bucket, prefix, marker, delimiter, maxkeys, headers) 96 | if (res.status / 100) == 2: 97 | body = res.read() 98 | h = GetBucketXml(body) 99 | (file_list, common_list) = h.list() 100 | if len(file_list) > 0: 101 | for item in file_list: 102 | result.append(item[0]) 103 | if len(common_list) > 0: 104 | for path in common_list: 105 | result.append(path) 106 | self.walk_bucket(bucket, path, marker, delimiter, maxkeys, headers, result) 107 | 108 | def delete_oss_object(self, bucket, oss_obj_name): 109 | headers = {} 110 | res = self.oss.delete_object(bucket, oss_obj_name, headers) 111 | if (res.status / 100) == 2: 112 | return True 113 | else: 114 | return False 115 | 116 | def delete_oss_objects(self, bucket, oss_obj_name): 117 | headers = {} 118 | result = [] 119 | marker = '' 120 | delimiter = '/' 121 | maxkeys = 100 122 | self.walk_bucket(bucket, oss_obj_name, marker, delimiter, maxkeys, headers, result) 123 | if len(result) > 0: 124 | for item in result: 125 | self.oss.delete_object(bucket, item, headers) 126 | else: 127 | self.oss.delete_object(bucket, oss_obj_name, headers) 128 | return True 129 | 130 | def queue_el(self, el): 131 | '''el: element of queue , formated as "bucket::root::path"''' 132 | try: 133 | self.queue.put(el, block = True, timeout = 1) 134 | msg = 'requeue element:' + el 135 | self.logger.info(msg) 136 | except Full as e: 137 | self.logger.error(e.message) 138 | print e 139 | 140 | def is_el_processed(self, hashcode): 141 | row = self.qm.get(hashcode) 142 | if row and str(row['status']) == '1': 143 | self.qm.update_status(hashcode, 1) 144 | return True 145 | return False 146 | 147 | def run(self): 148 | self.logger.info('Now starting sync thread ...') 149 | self.qm.open() 150 | while True: 151 | if self._terminate: 152 | break 153 | item = self.queue.get() 154 | if item is None: 155 | self.logger.info("Sync thread got None and quit!") 156 | break 157 | (bucket, root, relpath, action, hashcode) = item.split('::') 158 | if len(bucket) > 0 and len(root) > 0 and len(relpath) > 0 and len(action) > 0: 159 | 160 | if not self.is_el_processed(hashcode): 161 | oss_obj_name = os.path.join(os.path.basename(root), relpath) 162 | if len(oss_obj_name) > 0: 163 | success = False 164 | msg = "" 165 | if(action == 'M' or action == 'C'): 166 | try: 167 | success = self.upload(bucket, oss_obj_name, os.path.join(root, relpath)) 168 | msg = 'put object ' + oss_obj_name + ' to bucket ' + bucket 169 | except Exception as e1: 170 | self.logger.critical(e1.message) 171 | pass 172 | 173 | if(action == 'D'): 174 | try: 175 | success = self.delete_oss_objects(bucket, oss_obj_name) 176 | msg = 'delete object ' + oss_obj_name + ' of bucket ' + bucket 177 | except Exception as e2: 178 | self.logger.critical(e2.message) 179 | pass 180 | if success: 181 | msg += ' success' 182 | self.logger.info(msg) 183 | try: 184 | self.qm.update_status(hashcode, 1) 185 | except Exception as e3: 186 | self.logger.critical(e3.message) 187 | else: 188 | if success == False: 189 | msg += ' failure' 190 | self.logger.error(msg) 191 | """requeue losing element""" 192 | row = self.qm.get(hashcode) 193 | if row: 194 | retries = int(row['retries'] ) 195 | if retries < MAX_RETRIES: 196 | self.queue_el(item) 197 | try: 198 | self.qm.update_retries(hashcode, retries + 1) 199 | except Exception as e4: 200 | self.logger.critical(e4.message) 201 | else: 202 | self.logger.critical(msg + ' exceed max retries') 203 | else: 204 | self.logger.critical(msg + ' failure, resource may not exists.') 205 | pass 206 | self.qm.close() 207 | self.queue.task_done() 208 | return 209 | --------------------------------------------------------------------------------