├── .gitignore
├── README.md
├── config
    ├── __init__.py
    └── setting.default.py
├── cover.png
├── db
    ├── ossync.db
    └── ossync.db.bak
├── init.py
├── inotify_thread.py
├── logs
    └── app.log
├── ossync.py
├── ossync
    ├── __init__.py
    ├── lib
    │   ├── __init__.py
    │   ├── helper.py
    │   └── queue_model.py
    └── sdk
    │   ├── __init__.py
    │   ├── oss_api.py
    │   ├── oss_sample.py
    │   ├── oss_util.py
    │   ├── oss_xml_handler.py
    │   └── pkg_info.py
├── queue_thread.py
├── setup.py
└── sync_thread.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.pyc
 3 | doc/
 4 | db/
 5 | *.db
 6 | *.log
 7 | config/
 8 | downloads/
 9 | uploads/
10 | backup/
11 | logs/
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ossync修改版
 2 | 
 3 | Ossync是一款开源的、基于inotify机制的阿里云同步上载工具，采用Python开发。License : [MIT](http://rem.mit-license.org/).这个版本是在实时同步版基础上修改的，只是实现增量备份，没用采用Inotify机制。每次备份完毕自动退出，下次启动会自动备份新增或修改的内容。建议用crobtab设置一个定时器，比如定时在凌晨三点启动。
 4 | 
 5 | ## 主要特色
 6 |      
 7 |   * **可以一次同步多个本地文件夹和多个bucket** - 只要定义好本地文件夹和bucket的映射关系，可以同时将多个本地文件夹同步到多个bucket.
 8 |   * **基于消息队列的多线程快速同步** - 采用消息队列和多线程机制，实现快速同步.
 9 |   * **安全准确同步** - 文件上传校验和失败重传确保文件完整同步。
10 | 
11 | ## 安装
12 | 将本程序解压到任意目录, 并进入该目录，运行：
13 | 
14 |  		sudo python setup.py
15 |  		
16 | 如果提示：“Installation complete successfully!”，表明安装成功。否则，请检查是否满足以下条件并手动安装pyinotify模块。
17 | 
18 | * Python版本大于2.6(建议使用python2.7, 暂不支持python3)
19 | * 检查和系统是否有/proc/sys/fs/inotify/目录，以确定内核是否支持inotify，即linux内核版本号大于2.6.13。
20 | * 安装pyinotify模块，[https://github.com/seb-m/pyinotify](https://github.com/seb-m/pyinotify)。
21 | 
22 |    
23 | ## 运行
24 |  * 请复制config/setting.default.py并命名为setting.py，修改setting.py中的配置，请参考配置文件中的说明文字.
25 |  * 在程序根目录下运行:
26 |  
27 |  		nohup python ossync.py >/dev/null 2>&1 &
28 |  
29 | ## 定时运行
30 |   
31 |  * 以凌晨三点启动备份为例， 运行： 
32 |  
33 |  		crontab -e
34 | 		
35 | 	将这行添加到crontab： 0 3 * * *  python ~/ossyncone/ossync.py，保存退出。
36 |   		
37 | **注：请查看logs目录下的日志文件以了解系统运行状况。**
38 | 
39 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/config/__init__.py


--------------------------------------------------------------------------------
/config/setting.default.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | 
23 | # OSS 连接参数
24 | ####################
25 | # OSS              #
26 | ####################
27 | 
28 | HOST = "oss.aliyuncs.com"
29 | ACCESS_ID = ""
30 | SECRET_ACCESS_KEY = ""
31 | 
32 | # OSS Bucket和本地目录同步映射关系，一个Bucket对应一个或多个本地目录(local_folder)。
33 | # 可以定义多个bucket。示例：
34 | # oss_mappers = [{'bucket': 'dzdata', 'local_folders': ['/root/testdata/audios', '/root/testdata/docs']},
35 | # {'bucket': 'privdata', 'local_folders': ['/root/testdata/images', '/root/testdata/pdfs']}]
36 | ####################
37 | # OSS MAP          #
38 | ####################
39 | oss_mappers = [{'bucket': 'dzdata', 'local_folders': ['/root/testdata/audios', '/root/testdata/docs']}]
40 | 
41 | # 日志选项
42 | ####################
43 | # LOGGING SETTING  #
44 | ####################
45 | LOGFILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../", "logs/app.log")
46 | MAX_LOGFILE_SIZE = 104857600 # 默认日志文件大小为100M，每次达大小限制时，会自动加后缀生成备份文件
47 | MAX_BACKUP_COUNT = 5 # 默认备份文件为5个
48 | 
49 | # 上传文件或者删除object的最大重试次数
50 | ####################
51 | # MAX_RETRIES      #
52 | ####################
53 | MAX_RETRIES = 10
54 | 
55 | # 上传文件线程数
56 | ####################
57 | # MAX_RETRIES      #
58 | ####################
59 | NTHREADS = 5
60 | 
61 | # 数据库路径
62 | ####################
63 | # DB PATH          #
64 | ####################
65 | DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../", "db/ossync.db")
66 | 


--------------------------------------------------------------------------------
/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/cover.png


--------------------------------------------------------------------------------
/db/ossync.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/db/ossync.db


--------------------------------------------------------------------------------
/db/ossync.db.bak:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/db/ossync.db.bak


--------------------------------------------------------------------------------
/init.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | 
 23 | import sys
 24 | import os
 25 | import os.path
 26 | from config.setting import *
 27 | from ossync.lib import queue_model
 28 | import logging
 29 | import logging.handlers
 30 | import time
 31 | try:
 32 |     from ossync.sdk.oss_api import *
 33 | except:
 34 |     from ossync.oss_api import *
 35 | 
 36 | def set_sys_to_utf8():
 37 | 	reload(sys)
 38 | 	sys.setdefaultencoding('utf-8')
 39 | 
 40 | def get_logger():
 41 | 	
 42 | 	format = logging.Formatter("%(levelname)-10s %(asctime)s %(message)s")
 43 | 	logging.basicConfig(level = logging.INFO) 
 44 | 	logger = logging.getLogger('app')
 45 | 	handler1 = logging.handlers.RotatingFileHandler(LOGFILE_PATH , maxBytes = MAX_LOGFILE_SIZE, backupCount = MAX_BACKUP_COUNT)
 46 | 	handler2 = logging.StreamHandler(sys.stdout)
 47 | 	handler1.setFormatter(format)
 48 | 	handler2.setFormatter(format)
 49 | 	logger.addHandler(handler1)
 50 | 	# logger.addHandler(handler2)
 51 | 	return logger
 52 | 	
 53 | def check_config(logger): 
 54 | 	if len(HOST) == 0 or len(ACCESS_ID) == 0 or len(SECRET_ACCESS_KEY) == 0:
 55 | 		msg = "Please set HOST and ACCESS_ID and SECRET_ACCESS_KEY"
 56 | 		#print msg
 57 | 		logger.critical(msg)
 58 | 		exit(0)
 59 | 	if len(oss_mappers) == 0:
 60 | 		msg = "please set OSS Mappers"
 61 | 		#print msg
 62 | 		logger.critical(msg)
 63 | 		exit(0) 
 64 | 	oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY) 
 65 | 	for oss_mapper in oss_mappers:
 66 | 		bucket = oss_mapper['bucket']
 67 | 		acl = ''
 68 | 		headers = {}
 69 | 		try:
 70 | 			res = oss.create_bucket(bucket, acl, headers) 
 71 | 		except Exception as e:
 72 | 			logger.critical(e.message)
 73 | 			exit(0)
 74 | 		if (res.status / 100) != 2:
 75 | 			msg = "Bucket: " + bucket + " is not existed or create bucket failure, please rename your bucket."
 76 | 			#print msg
 77 | 			logger.critical(msg)
 78 | 			exit(0)
 79 | 		local_folders = oss_mapper['local_folders']
 80 | 		if len(local_folders) > 0:
 81 | 			for folder in local_folders:
 82 | 				if not os.path.exists(folder) or not os.path.isdir(folder):
 83 | 					msg = "Local folder: " + folder + " is not existed or is not a direcotry.Please check you setting."
 84 | 					#print msg
 85 | 					logger.critical(msg)
 86 | 					exit(0)
 87 | 		else:
 88 | 			msg = "please at least set one local folder for each bucket"
 89 | 			#print msg
 90 | 			logger.critical(msg)
 91 | 			exit(0)
 92 | 
 93 | def queue_unprocessed(queue, logger):
 94 | 	dbpath =  DB_PATH
 95 | 	qm = queue_model.QueueModel(dbpath)
 96 | 	try:
 97 | 		qm.open()
 98 | 		items = qm.find_all(status = 0)
 99 | 		if items:
100 | 			for item in items:
101 | 				logger.info(item)
102 | 				if int(item['retries']) < MAX_RETRIES:
103 | 					el = item['bucket'] + '::' + item['root'] + '::' + item['relpath'] +  '::' + item['action'] + '::' + item['hashcode']
104 | 					queue.put(el, block = True, timeout = 1)
105 | 					msg = 'queue unprocessed element:' + el 
106 | 					logger.info(msg)
107 | 		qm.close()
108 | 	except Exception as e:
109 | 		logger.critical(e.message)
110 | 		pass
111 | 
112 | 	


--------------------------------------------------------------------------------
/inotify_thread.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | 	
 23 | import os, sys, threading, logging
 24 | from Queue import *
 25 | from ossync.lib import queue_model
 26 | from ossync.lib import helper
 27 | try:
 28 | 	import pyinotify
 29 | except ImportError as e:
 30 | 	print e.message
 31 | 	exit(0)
 32 | 
 33 | class EventHandler(pyinotify.ProcessEvent):
 34 | 	"""事件处理"""
 35 | 	
 36 | 	def __init__(self, monitered_dir, queue, bucket):
 37 | 		self.monitered_dir = monitered_dir
 38 | 		self.queue = queue
 39 | 		self.bucket = bucket 
 40 | 		self.logger =  logging.getLogger('app')
 41 | 		dbpath =  'db/ossync.db'
 42 | 		self.qm = queue_model.QueueModel(dbpath)
 43 | 	
 44 | 	def process_IN_CREATE(self, event):
 45 | 		self.process_event(event, 'CREATE')
 46 | 		
 47 | 	def process_IN_DELETE(self, event):
 48 | 		self.process_event(event, 'DELETE')
 49 | 		
 50 | 	def process_IN_MODIFY(self, event):
 51 | 		self.process_event(event, 'MODIFY')
 52 | 		
 53 | 	def process_IN_MOVED_FROM(self, event):
 54 | 		self.logger.info("Moved from file: %s "  %   os.path.join(event.path, event.name))
 55 | 		self.process_event(event, 'DELETE')
 56 | 	
 57 | 	def process_IN_MOVED_TO(self, event):
 58 | 		self.logger.info("Moved to file: %s "  %   os.path.join(event.path, event.name)) 
 59 | 		realpath = os.path.join(event.path, event.name)
 60 | 		if event.dir:
 61 | 			self.queue_dir(realpath)
 62 | 		self.process_event(event, 'CREATE')
 63 | 		
 64 | 	def process_event(self, event, action):
 65 | 		if len(action) == 0:
 66 | 			return False
 67 | 		realpath = os.path.join(event.path, event.name)
 68 | 		relpath = os.path.relpath(realpath, self.monitered_dir)
 69 | 		if action == 'DELETE':
 70 | 			if event.dir:
 71 | 				relpath += '/'
 72 | 		self.logger.info(action.title() + " file: %s " % realpath)
 73 | 		#print   "Modify file: %s "  %   os.path.join(event.path, event.name)
 74 | 		el = self.bucket  + '::' + self.monitered_dir + '::' + relpath + '::' + action[0] 
 75 | 		self.save_el(self.monitered_dir, relpath, self.bucket, action[0])
 76 | 		self.queue_el(el)
 77 | 		
 78 | 	def queue_dir(self, queue_path):
 79 | 		files = list(helper.walk_files(queue_path, yield_folders = True))
 80 | 		if len(files) > 0:
 81 | 			for path in files:
 82 | 				relpath = os.path.relpath(path, self.monitered_dir)
 83 | 				self.save_el(self.monitered_dir, relpath, self.bucket,'C')
 84 | 				el = self.bucket  + '::' + self.monitered_dir + '::' + relpath + '::' + 'C' 
 85 | 				self.queue_el(el)
 86 | 	
 87 | 	def save_el(self, root, relpath, bucket, action):
 88 | 		hashcode = helper.calc_el_md5(root, relpath, bucket)
 89 | 		self.qm.open()
 90 | 		if self.is_el_existed(hashcode):
 91 | 			self.qm.update_action(hashcode, action)
 92 | 			self.qm.update_status(hashcode, 0)
 93 | 		else:
 94 | 			data={"root": root, "relpath": relpath, "bucket": bucket, "action": action, "status":  0, "retries": 0}
 95 | 			self.qm.save(data)
 96 | 		self.qm.close()
 97 | 		
 98 | 	def is_el_existed(self, hashcode):
 99 | 		row = self.qm.get(hashcode)
100 | 		if row:
101 | 			return True
102 | 		return False
103 | 		
104 | 	
105 | 	def queue_el(self, el):
106 | 		'''el: element of queue , formated as "bucket::root::path::C|M|D"
107 | 		   C means CREATE, M means MODIFY, D means DELETE
108 | 		'''
109 | 		try:
110 | 			self.queue.put(el, block = True, timeout = 1)
111 | 			msg = 'queue element:' + el
112 | 			#print msg
113 | 			self.logger.info(msg)
114 | 		except Full as e:
115 | 			#print e
116 | 			self.logger.error(e.message)
117 | 
118 | class InotifyThread(threading.Thread):
119 | 	def __init__(self, bucket, root, queue, *args, **kwargs):
120 | 		threading.Thread.__init__(self, *args, **kwargs) 
121 | 		self.bucket = bucket
122 | 		self.queue = queue
123 | 		self.root = root
124 | 		self.logger =  logging.getLogger('app')
125 | 		self._terminate = False
126 | 
127 | 	def terminate(self):
128 | 		self._terminate = True
129 | 		self.notify.stop()
130 | 	
131 | 	def start_notify(self, monitered_dir):
132 | 		wm = pyinotify.WatchManager()
133 | 		mask = pyinotify.IN_DELETE | pyinotify.IN_CREATE | pyinotify.IN_MODIFY | pyinotify.IN_MOVED_FROM | pyinotify.IN_MOVED_TO
134 | 		self.notifier = pyinotify.Notifier(wm, EventHandler(monitered_dir, self.queue, self.bucket), timeout = 10)
135 | 		wm.add_watch(monitered_dir, mask, rec = True, auto_add = True) 
136 | 		self.logger.info('now starting monitor %s'%(monitered_dir)) 
137 | 		# self.notifier.loop()
138 | 		while True:
139 | 			if self._terminate:
140 | 				break
141 | 			self.notifier.process_events()
142 | 			if self.notifier.check_events():
143 | 				self.notifier.read_events()
144 | 		
145 | 	def run(self):
146 | 		self.start_notify(self.root)
147 | 		return
148 | 		
149 | if __name__ == '__main__': 
150 | 	queue = Queue()
151 | 	root = '.'
152 | 	bucket = 'dzdata'
153 | 	logger = logging.getLogger('app')
154 | 	logger.setLevel(logging.INFO)
155 | 	logger.addHandler(logging.FileHandler('logs/app.log'))
156 | 	inotifythd = InotifyThread(bucket, root, queue)
157 | 	inotifythd.start()
158 | 		
159 | 		


--------------------------------------------------------------------------------
/logs/app.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/logs/app.log


--------------------------------------------------------------------------------
/ossync.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | 
23 | import os, threading
24 | from Queue import *
25 | from ossync.sdk.oss_api import *
26 | from queue_thread import QueueThread
27 | from sync_thread import SyncThread
28 | # from inotify_thread import InotifyThread
29 | from init import *
30 | 
31 | if __name__ == '__main__':
32 | 	try:
33 | 		set_sys_to_utf8()
34 | 		logger = get_logger()
35 | 		check_config(logger)
36 | 		queue = Queue()
37 | 		# check unprocessed items, if exists queue them
38 | 		queue_unprocessed(queue, logger)
39 | 		oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY)
40 | 		
41 | 		syncthd = SyncThread(oss, queue)
42 | 		syncthd.start() 
43 | 		
44 | 		queuethd = QueueThread(oss_mappers, queue)
45 | 		queuethd.start()
46 | 	except KeyboardInterrupt as e:
47 | 		for thd in threading.enumerate():
48 | 			if thd is main_thread:
49 | 				continue
50 | 			else:
51 | 				thd.terminate()
52 | 		logger.error(msg)
53 | 		#print e.message()
54 | 		exit(0)
55 | 			
56 | 		


--------------------------------------------------------------------------------
/ossync/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/__init__.py


--------------------------------------------------------------------------------
/ossync/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/lib/__init__.py


--------------------------------------------------------------------------------
/ossync/lib/helper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | 
23 | import os, fnmatch
24 | import hashlib
25 | 
26 | def encode(str):
27 | 	if(isinstance(str, unicode)):
28 | 		return str.encode('utf-8')
29 | 	else:
30 | 		return str
31 | 	
32 | def walk_files(root, patterns = '*', single_level = False, yield_folders = False):
33 | 	patterns = patterns.split(';')
34 | 	for path, subdirs, files in os.walk(root):
35 | 		if yield_folders:
36 | 			files.extend(subdirs)
37 | 		files.sort()
38 | 		for name in files:
39 | 			for pattern in patterns:
40 | 				if fnmatch.fnmatch(name, pattern):
41 | 					yield os.path.join(path, name)
42 | 					break
43 | 		if single_level:
44 | 			break
45 | 			
46 | def calc_file_md5(filepath):
47 | 	"""calc files's hashcode """
48 | 	with open(filepath, 'rb') as f:
49 | 		md5obj = hashlib.md5()
50 | 		md5obj.update(f.read())
51 | 		hashstr = md5obj.hexdigest()
52 | 		return hashstr	
53 | 		
54 | def calc_el_md5(root, relpath, bucket, filehash):
55 | 	"""calc queue element's hashcode """
56 | 	m = hashlib.md5()
57 | 	m.update(root + relpath + bucket + filehash)
58 | 	hashcode = m.hexdigest()
59 | 	return hashcode
60 | 		
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/ossync/lib/queue_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | 
23 | import sqlite3, md5
24 | import itertools
25 | import hashlib
26 | 
27 | class QueueModel(object):
28 | 	def __init__(self, dbpath):
29 | 		self.dbpath = dbpath
30 | 	
31 | 	def open(self):
32 | 		self.conn = sqlite3.connect(self.dbpath)
33 | 		self.conn.text_factory = lambda x: unicode(x, 'utf-8', 'ignore')
34 | 		self.cursor = self.conn.cursor()
35 | 		
36 | 	def close(self):
37 | 		self.cursor.close()
38 | 		self.conn.close() 
39 | 	
40 | 	def save(self, data={"root": '', "relpath": '', "bucket": '', "action": '', "status":  0, "hashcode": '', "retries": 0}):
41 | 		if(len(data) == 0):
42 | 			return False
43 | 		#m = hashlib.md5()
44 | 		#m.update(data['root'] + data['relpath'] + data['bucket'])
45 | 		#hashcode = m.hexdigest()
46 | 		self.cursor.execute('insert into queue values(?, ?, ?, ?, ?, ?, ?)', (data['root'], data['relpath'], data['bucket'], data['action'], data['status'], data['hashcode'], data['retries']))
47 | 		self.conn.commit()
48 | 		
49 | 	def get(self, hashcode):
50 | 		self.cursor.execute('select * from queue where hashcode=?', (hashcode, ))
51 | 		result = self._map_fields(self.cursor)
52 | 		if len(result) > 0:
53 | 			return result[0]
54 | 		return None
55 | 		
56 | 	def find_all(self, status): 
57 | 		self.cursor.execute('select * from queue where status=?', (status, ))
58 | 		result = self._map_fields(self.cursor)
59 | 		if len(result) > 0:
60 | 			return result
61 | 		return None
62 | 			
63 | 	def update_status(self, hashcode, status):
64 | 		self.cursor.execute('update queue set status=? where hashcode=?', (status, hashcode))
65 | 		self.conn.commit()
66 | 	
67 | 	def update_action(self, hashcode, action):
68 | 		self.cursor.execute('update queue set action=? where hashcode=?', (action, hashcode))
69 | 		self.conn.commit()
70 | 		
71 | 	def update_retries(self, hashcode, retries):
72 | 		self.cursor.execute('update queue set retries=? where hashcode=?', (retries, hashcode))
73 | 		self.conn.commit()
74 | 		
75 | 	def delete(self, hashcode):
76 | 		self.cursor.execute('delete from queue where hashcode=?', (hashcode,))
77 | 		self.conn.commit()
78 | 		
79 | 	def _map_fields(self, cursor):
80 | 		"""将结果元组映射到命名字段中"""
81 | 		filednames = [d[0].lower() for d in cursor.description]
82 | 		result = []
83 | 		while True:
84 | 			rows = cursor.fetchmany()
85 | 			if not rows:
86 | 				break
87 | 			for row in rows:
88 | 				result.append(dict(itertools.izip(filednames, row)))
89 | 		return result
90 | 				
91 | 		
92 | 	


--------------------------------------------------------------------------------
/ossync/sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanbaba/Ossyncone/a015d13df203e3dcfba9f66caf4bfa6cf3dbbc07/ossync/sdk/__init__.py


--------------------------------------------------------------------------------
/ossync/sdk/oss_api.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | #coding=utf-8
   3 | 
   4 | # Copyright (c) 2011, Alibaba Cloud Computing
   5 | # All rights reserved.
   6 | #
   7 | # Permission is hereby granted, free of charge, to any person obtaining a
   8 | # copy of this software and associated documentation files (the
   9 | # "Software"), to deal in the Software without restriction, including
  10 | # without limitation the rights to use, copy, modify, merge, publish, dis-
  11 | # tribute, sublicense, and/or sell copies of the Software, and to permit
  12 | # persons to whom the Software is furnished to do so, subject to the fol-
  13 | # lowing conditions:
  14 | #
  15 | # The above copyright notice and this permission notice shall be included
  16 | # in all copies or substantial portions of the Software.
  17 | #
  18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
  20 | # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
  21 | # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24 | # IN THE SOFTWARE.
  25 | 
  26 | import httplib
  27 | import time
  28 | import base64
  29 | import urllib
  30 | import StringIO
  31 | import sys
  32 | try:
  33 |     from oss.oss_util import *
  34 | except:
  35 |     from oss_util import *
  36 | try:
  37 |     from oss.oss_xml_handler import *
  38 | except:
  39 |     from oss_xml_handler import *
  40 | 
  41 | class OssAPI:
  42 |     '''
  43 |     A simple OSS API
  44 |     '''
  45 |     DefaultContentType = 'application/octet-stream'
  46 |     provider = PROVIDER
  47 |     __version__ = '0.3.2'
  48 |     Version = __version__
  49 |     AGENT = 'oss-python%s (%s)' % (__version__, sys.platform)
  50 | 
  51 |     def __init__(self, host, access_id, secret_access_key='', port=80, is_security=False):
  52 |         self.SendBufferSize = 8192
  53 |         self.RecvBufferSize = 1024*1024*10
  54 |         self.host = get_second_level_domain(host)
  55 |         self.port = port
  56 |         self.access_id = access_id
  57 |         self.secret_access_key = secret_access_key
  58 |         self.show_bar = False
  59 |         self.is_security = is_security
  60 |         self.retry_times = 5
  61 |         self.agent = self.AGENT
  62 |         self.debug = False
  63 | 
  64 |     def set_debug(self, is_debug):
  65 |         if is_debug:
  66 |             self.debug = True 
  67 |         
  68 |     def set_retry_times(self, retry_times=5):
  69 |         self.retry_times = retry_times
  70 | 
  71 |     def set_send_buf_size(self, buf_size):
  72 |         try:
  73 |             self.SendBufferSize = (int)(buf_size)
  74 |         except ValueError:
  75 |             pass
  76 | 
  77 |     def set_recv_buf_size(self, buf_size):
  78 |         try:
  79 |             self.RecvBufferSize = (int)(buf_size)
  80 |         except ValueError:
  81 |             pass
  82 |     
  83 |     def get_connection(self, tmp_host=None):
  84 |         host = ''
  85 |         port = 80
  86 |         timeout = 10
  87 |         if not tmp_host:
  88 |             tmp_host = self.host
  89 |         host_port_list = tmp_host.split(":")
  90 |         if len(host_port_list) == 1:
  91 |             host = host_port_list[0].strip()
  92 |         elif len(host_port_list) == 2:
  93 |             host = host_port_list[0].strip()
  94 |             port = int(host_port_list[1].strip())
  95 |         if self.is_security or port == 443:
  96 |             self.is_security = True
  97 |             if sys.version_info >= (2, 6):
  98 |                 return httplib.HTTPSConnection(host=host, port=port, timeout=timeout)
  99 |             else:
 100 |                 return httplib.HTTPSConnection(host=host, port=port)
 101 |         else:
 102 |             if sys.version_info >= (2, 6):
 103 |                 return httplib.HTTPConnection(host=host, port=port, timeout=timeout)
 104 |             else:
 105 |                 return httplib.HTTPConnection(host=host, port=port)
 106 | 
 107 |     def sign_url_auth_with_expire_time(self, method, url, headers=None, resource="/", timeout=60, params=None):
 108 |         '''
 109 |         Create the authorization for OSS based on the input method, url, body and headers
 110 | 
 111 |         :type method: string
 112 |         :param method: one of PUT, GET, DELETE, HEAD
 113 | 
 114 |         :type url: string
 115 |         :param:HTTP address of bucket or object, eg: http://HOST/bucket/object
 116 | 
 117 |         :type headers: dict
 118 |         :param: HTTP header
 119 | 
 120 |         :type resource: string
 121 |         :param:path of bucket or object, eg: /bucket/ or /bucket/object
 122 | 
 123 |         :type timeout: int
 124 |         :param
 125 | 
 126 |         Returns:
 127 |             signature url.
 128 |         '''
 129 |         if not headers:
 130 |             headers = {}
 131 |         if not params:
 132 |             params = {}
 133 |         send_time = str(int(time.time()) + timeout)
 134 |         headers['Date'] = send_time
 135 |         auth_value = get_assign(self.secret_access_key, method, headers, resource, None, self.debug)
 136 |         params["OSSAccessKeyId"] = self.access_id
 137 |         params["Expires"] = str(send_time)
 138 |         params["Signature"] = auth_value
 139 |         sign_url = append_param(url, params)
 140 |         return sign_url
 141 | 
 142 |     def sign_url(self, method, bucket, object, timeout=60, headers=None, params=None):
 143 |         '''
 144 |         Create the authorization for OSS based on the input method, url, body and headers
 145 | 
 146 |         :type method: string
 147 |         :param method: one of PUT, GET, DELETE, HEAD
 148 | 
 149 |         :type bucket: string
 150 |         :param:
 151 | 
 152 |         :type object: string
 153 |         :param:
 154 | 
 155 |         :type timeout: int
 156 |         :param
 157 | 
 158 |         :type headers: dict
 159 |         :param: HTTP header
 160 | 
 161 |         :type params: dict
 162 |         :param: the parameters that put in the url address as query string
 163 | 
 164 |         :type resource: string
 165 |         :param:path of bucket or object, eg: /bucket/ or /bucket/object
 166 | 
 167 |         Returns:
 168 |             signature url.
 169 |         '''
 170 |         if not headers:
 171 |             headers = {}
 172 |         if not params:
 173 |             params = {}
 174 |         send_time = str(int(time.time()) + timeout)
 175 |         headers['Date'] = send_time
 176 |         if isinstance(object, unicode):
 177 |             object = object.encode('utf-8')
 178 |         resource = "/%s/%s%s" % (bucket, object, get_resource(params))
 179 |         auth_value = get_assign(self.secret_access_key, method, headers, resource, None, self.debug)
 180 |         params["OSSAccessKeyId"] = self.access_id
 181 |         params["Expires"] = str(send_time)
 182 |         params["Signature"] = auth_value
 183 |         url = ''
 184 |         if self.is_security:
 185 |             if is_ip(self.host):
 186 |                 url = "https://%s/%s/%s" % (self.host, bucket, object)
 187 |             else:
 188 |                 url = "https://%s.%s/%s" % (bucket, self.host, object)
 189 |         else:
 190 |             if is_ip(self.host):
 191 |                 url = "http://%s/%s/%s" % (self.host, bucket, object)
 192 |             else:
 193 |                 url = "http://%s.%s/%s" % (bucket, self.host, object)
 194 |         sign_url = append_param(url, params)
 195 |         return sign_url
 196 | 
 197 |     def _create_sign_for_normal_auth(self, method, headers=None, resource="/"):
 198 |         '''
 199 |         NOT public API
 200 |         Create the authorization for OSS based on header input.
 201 |         it should be put into "Authorization" parameter of header.
 202 | 
 203 |         :type method: string
 204 |         :param:one of PUT, GET, DELETE, HEAD
 205 | 
 206 |         :type headers: dict
 207 |         :param: HTTP header
 208 | 
 209 |         :type resource: string
 210 |         :param:path of bucket or object, eg: /bucket/ or /bucket/object
 211 | 
 212 |         Returns:
 213 |             signature string
 214 |         '''
 215 |         auth_value = "%s %s:%s" % (self.provider, self.access_id, get_assign(self.secret_access_key, method, headers, resource, None, self.debug))
 216 |         return auth_value
 217 | 
 218 |     def bucket_operation(self, method, bucket, headers=None, params=None):
 219 |         return self.http_request(method, bucket, '', headers, '', params)
 220 | 
 221 |     def object_operation(self, method, bucket, object, headers=None, body='', params=None):
 222 |         return self.http_request(method, bucket, object, headers, body, params)
 223 | 
 224 |     def http_request(self, method, bucket, object, headers=None, body='', params=None):
 225 |         '''
 226 |         Send http request of operation
 227 | 
 228 |         :type method: string
 229 |         :param method: one of PUT, GET, DELETE, HEAD, POST
 230 | 
 231 |         :type bucket: string
 232 |         :param
 233 | 
 234 |         :type object: string
 235 |         :param
 236 | 
 237 |         :type headers: dict
 238 |         :param: HTTP header
 239 | 
 240 |         :type body: string
 241 |         :param
 242 | 
 243 |         Returns:
 244 |             HTTP Response
 245 |         '''
 246 |         retry = 5
 247 |         res = None
 248 |         while retry > 0:
 249 |             retry -= 1
 250 |             tmp_bucket = bucket
 251 |             tmp_object = object
 252 |             tmp_headers = {}
 253 |             if headers and isinstance(headers, dict):
 254 |                 tmp_headers = headers.copy()
 255 |             tmp_params = {}
 256 |             if params and isinstance(params, dict):
 257 |                 tmp_params = params.copy()
 258 | 
 259 |             res = self.http_request_with_redirect(method, tmp_bucket, tmp_object, tmp_headers, body, tmp_params)
 260 |             if res.status == 301 or res.status == 302:
 261 |                 self.host = helper_get_host_from_resp(res, bucket)
 262 |             else:
 263 |                 return res
 264 |         return res
 265 |         
 266 |     def http_request_with_redirect(self, method, bucket, object, headers=None, body='', params=None):
 267 |         '''
 268 |         Send http request of operation
 269 | 
 270 |         :type method: string
 271 |         :param method: one of PUT, GET, DELETE, HEAD, POST
 272 | 
 273 |         :type bucket: string
 274 |         :param
 275 | 
 276 |         :type object: string
 277 |         :param
 278 | 
 279 |         :type headers: dict
 280 |         :param: HTTP header
 281 | 
 282 |         :type body: string
 283 |         :param
 284 | 
 285 |         Returns:
 286 |             HTTP Response
 287 |         '''
 288 |         if not params:
 289 |             params = {}
 290 |         if not headers:
 291 |             headers = {}
 292 |         if isinstance(object, unicode):
 293 |             object = object.encode('utf-8')
 294 |         if not bucket:
 295 |             resource = "/"
 296 |             headers['Host'] = self.host
 297 |         else:
 298 |             headers['Host'] = "%s.%s" % (bucket, self.host)
 299 |             resource = "/%s/" % bucket
 300 |         resource = "%s%s%s" % (resource.encode('utf-8'), object, get_resource(params))
 301 |         object = urllib.quote(object)
 302 |         url = "/%s" % object
 303 |         if is_ip(self.host):
 304 |             url = "/%s/%s" % (bucket, object)
 305 |             if not bucket:
 306 |                 url = "/%s" % object
 307 |             headers['Host'] = self.host
 308 |         url = append_param(url, params)
 309 |         date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())
 310 |         headers['Date'] = date
 311 |         headers['Authorization'] = self._create_sign_for_normal_auth(method, headers, resource)
 312 |         headers['User-Agent'] = self.agent 
 313 |         if check_bucket_valid(bucket) and not is_ip(self.host):
 314 |             conn = self.get_connection(headers['Host'])
 315 |         else:
 316 |             conn = self.get_connection()
 317 |         conn.request(method, url, body, headers)
 318 |         return conn.getresponse()
 319 | 
 320 |     def get_service(self, headers=None):
 321 |         '''
 322 |         List all buckets of user
 323 |         '''
 324 |         return self.list_all_my_buckets(headers)
 325 | 
 326 |     def list_all_my_buckets(self, headers=None):
 327 |         '''
 328 |         List all buckets of user
 329 |         type headers: dict
 330 |         :param
 331 |         
 332 |         Returns:
 333 |             HTTP Response
 334 |         '''
 335 |         method = 'GET'
 336 |         bucket = ''
 337 |         object = ''
 338 |         body = ''
 339 |         params = {}
 340 |         return self.http_request(method, bucket, object, headers, body, params)
 341 | 
 342 |     def get_bucket_acl(self, bucket):
 343 |         '''
 344 |         Get Access Control Level of bucket
 345 | 
 346 |         :type bucket: string
 347 |         :param
 348 | 
 349 |         Returns:
 350 |             HTTP Response
 351 |         '''
 352 |         method = 'GET'
 353 |         object = ''
 354 |         headers = {}
 355 |         body = ''
 356 |         params = {}
 357 |         params['acl'] = ''
 358 |         return self.http_request(method, bucket, object, headers, body, params)
 359 | 
 360 |     def get_bucket_location(self, bucket):
 361 |         '''
 362 |         Get Location of bucket
 363 |         '''
 364 |         method = 'GET'
 365 |         object = ''
 366 |         headers = {}
 367 |         body = ''
 368 |         params = {}
 369 |         params['location'] = ''
 370 |         return self.http_request(method, bucket, object, headers, body, params)
 371 | 
 372 |     def get_bucket(self, bucket, prefix='', marker='', delimiter='', maxkeys='', headers=None):
 373 |         '''
 374 |         List object that in bucket
 375 |         '''
 376 |         return self.list_bucket(bucket, prefix, marker, delimiter, maxkeys, headers)
 377 | 
 378 |     def list_bucket(self, bucket, prefix='', marker='', delimiter='', maxkeys='', headers=None):
 379 |         '''
 380 |         List object that in bucket
 381 | 
 382 |         :type bucket: string
 383 |         :param
 384 | 
 385 |         :type prefix: string
 386 |         :param
 387 | 
 388 |         :type marker: string
 389 |         :param
 390 | 
 391 |         :type delimiter: string
 392 |         :param
 393 | 
 394 |         :type maxkeys: string
 395 |         :param
 396 | 
 397 |         :type headers: dict
 398 |         :param: HTTP header
 399 | 
 400 |         Returns:
 401 |             HTTP Response
 402 |         '''
 403 |         method = 'GET'
 404 |         object = ''
 405 |         body = ''
 406 |         params = {}
 407 |         params['prefix'] = prefix
 408 |         params['marker'] = marker
 409 |         params['delimiter'] = delimiter
 410 |         params['max-keys'] = maxkeys
 411 |         return self.http_request(method, bucket, object, headers, body, params)
 412 | 
 413 |     def create_bucket(self, bucket, acl='', headers=None):
 414 |         '''
 415 |         Create bucket
 416 |         '''
 417 |         return self.put_bucket(bucket, acl, headers)
 418 | 
 419 |     def put_bucket(self, bucket, acl='', headers=None):
 420 |         '''
 421 |         Create bucket
 422 | 
 423 |         :type bucket: string
 424 |         :param
 425 | 
 426 |         :type acl: string
 427 |         :param: one of private public-read public-read-write
 428 | 
 429 |         :type headers: dict
 430 |         :param: HTTP header
 431 | 
 432 |         Returns:
 433 |             HTTP Response
 434 |         '''
 435 |         if not headers:
 436 |             headers = {}
 437 |         if acl != '':
 438 |             if "AWS" == self.provider:
 439 |                 headers['x-amz-acl'] = acl
 440 |             else:
 441 |                 headers['x-oss-acl'] = acl
 442 |         method = 'PUT'
 443 |         object = ''
 444 |         body = ''
 445 |         params = {}
 446 |         return self.http_request(method, bucket, object, headers, body, params)
 447 | 
 448 |     def put_bucket_with_location(self, bucket, acl='', location='', headers=None):
 449 |         '''
 450 |         Create bucket
 451 | 
 452 |         :type bucket: string
 453 |         :param
 454 | 
 455 |         :type acl: string
 456 |         :param: one of private public-read public-read-write
 457 | 
 458 |         :type location: string
 459 |         :param:
 460 | 
 461 |         :type headers: dict
 462 |         :param: HTTP header
 463 | 
 464 |         Returns:
 465 |             HTTP Response
 466 |         '''
 467 |         if not headers:
 468 |             headers = {}
 469 |         if acl != '':
 470 |             if "AWS" == self.provider:
 471 |                 headers['x-amz-acl'] = acl
 472 |             else:
 473 |                 headers['x-oss-acl'] = acl
 474 |         params = {}
 475 |         body = ''
 476 |         if location != '':
 477 |             body = r'<CreateBucketConfiguration>'
 478 |             body += r'<LocationConstraint>'
 479 |             body += location
 480 |             body += r'</LocationConstraint>'
 481 |             body += r'</CreateBucketConfiguration>'
 482 |         method = 'PUT'
 483 |         object = ''
 484 |         return self.http_request(method, bucket, object, headers, body, params)
 485 | 
 486 |     def delete_bucket(self, bucket, headers=None):
 487 |         '''
 488 |         Delete bucket
 489 | 
 490 |         :type bucket: string
 491 |         :param
 492 | 
 493 |         Returns:
 494 |             HTTP Response
 495 |         '''
 496 |         method = 'DELETE'
 497 |         object = ''
 498 |         body = ''
 499 |         params = {}
 500 |         return self.http_request(method, bucket, object, headers, body, params)
 501 | 
 502 |     def put_object_with_data(self, bucket, object, input_content, content_type=DefaultContentType, headers=None, params=None):
 503 |         '''
 504 |         Put object into bucket, the content of object is from input_content
 505 |         '''
 506 |         return self.put_object_from_string(bucket, object, input_content, content_type, headers, params)
 507 | 
 508 |     def put_object_from_string(self, bucket, object, input_content, content_type=DefaultContentType, headers=None, params=None):
 509 |         '''
 510 |         Put object into bucket, the content of object is from input_content
 511 | 
 512 |         :type bucket: string
 513 |         :param
 514 | 
 515 |         :type object: string
 516 |         :param
 517 | 
 518 |         :type input_content: string
 519 |         :param
 520 | 
 521 |         :type content_type: string
 522 |         :param: the object content type that supported by HTTP
 523 | 
 524 |         :type headers: dict
 525 |         :param: HTTP header
 526 | 
 527 |         Returns:
 528 |             HTTP Response
 529 |         '''
 530 |         if not headers:
 531 |             headers = {}
 532 |         headers['Content-Type'] = content_type
 533 |         headers['Content-Length'] = str(len(input_content))
 534 |         fp = StringIO.StringIO(input_content)
 535 |         res = self.put_object_from_fp(bucket, object, fp, content_type, headers, params)
 536 |         fp.close()
 537 |         return res
 538 | 
 539 |     def _open_conn_to_put_object(self, bucket, object, filesize, content_type=DefaultContentType, headers=None, params=None):
 540 |         '''
 541 |         NOT public API
 542 |         Open a connectioon to put object
 543 | 
 544 |         :type bucket: string
 545 |         :param
 546 | 
 547 |         :type filesize: int
 548 |         :param
 549 | 
 550 |         :type object: string
 551 |         :param
 552 | 
 553 |         :type input_content: string
 554 |         :param
 555 | 
 556 |         :type content_type: string
 557 |         :param: the object content type that supported by HTTP
 558 | 
 559 |         :type headers: dict
 560 |         :param: HTTP header
 561 | 
 562 |         Returns:
 563 |             Initialized HTTPConnection
 564 |         '''
 565 |         if not params:
 566 |             params = {}
 567 |         if not headers:
 568 |             headers = {}
 569 |         method = 'PUT'
 570 |         if isinstance(object, unicode):
 571 |             object = object.encode('utf-8')
 572 |         resource = "/%s/" % bucket
 573 |         if not bucket:
 574 |             resource = "/"
 575 |         resource = "%s%s%s" % (resource.encode('utf-8'), object, get_resource(params))
 576 | 
 577 |         object = urllib.quote(object)
 578 |         url = "/%s" % object
 579 |         if bucket:
 580 |             headers['Host'] = "%s.%s" % (bucket, self.host)
 581 |         else:
 582 |             headers['Host'] = self.host
 583 |         if is_ip(self.host):
 584 |             url = "/%s/%s" % (bucket, object)
 585 |             headers['Host'] = self.host
 586 |         url = append_param(url, params)
 587 |         date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())
 588 | 
 589 |         if check_bucket_valid(bucket) and not is_ip(self.host):
 590 |             conn = self.get_connection(headers['Host'])
 591 |         else:
 592 |             conn = self.get_connection()
 593 |         conn.putrequest(method, url)
 594 |         if isinstance(content_type, unicode):
 595 |             content_type = content_type.encode('utf-8')
 596 |         headers["Content-Type"] = content_type
 597 |         headers["Content-Length"] = filesize
 598 |         headers["Date"] = date
 599 |         headers["Expect"] = "100-Continue"
 600 |         headers['User-Agent'] = self.agent 
 601 |         for k in headers.keys():
 602 |             conn.putheader(str(k), str(headers[k]))
 603 |         if '' != self.secret_access_key and '' != self.access_id:
 604 |             auth = self._create_sign_for_normal_auth(method, headers, resource)
 605 |             conn.putheader("Authorization", auth)
 606 |         conn.endheaders()
 607 |         return conn
 608 | 
 609 |     def put_object_from_file(self, bucket, object, filename, content_type='', headers=None, params=None):
 610 |         '''
 611 |         put object into bucket, the content of object is read from file
 612 | 
 613 |         :type bucket: string
 614 |         :param
 615 | 
 616 |         :type object: string
 617 |         :param
 618 | 
 619 |         :type fllename: string
 620 |         :param: the name of the read file
 621 | 
 622 |         :type content_type: string
 623 |         :param: the object content type that supported by HTTP
 624 | 
 625 |         :type headers: dict
 626 |         :param: HTTP header
 627 | 
 628 |         Returns:
 629 |             HTTP Response
 630 |         '''
 631 |         fp = open(filename, 'rb')
 632 |         if not content_type:
 633 |             content_type = get_content_type_by_filename(filename)
 634 |         res = self.put_object_from_fp(bucket, object, fp, content_type, headers, params)
 635 |         fp.close()
 636 |         return res
 637 | 
 638 |     def view_bar(self, num=1, sum=100):
 639 |         rate = float(num) / float(sum)
 640 |         rate_num = int(rate * 100)
 641 |         print '\r%d%% ' % (rate_num),
 642 |         sys.stdout.flush()
 643 | 
 644 |     def put_object_from_fp(self, bucket, object, fp, content_type=DefaultContentType, headers=None, params=None):
 645 |         '''
 646 |         Put object into bucket, the content of object is read from file pointer
 647 | 
 648 |         :type bucket: string
 649 |         :param
 650 | 
 651 |         :type object: string
 652 |         :param
 653 | 
 654 |         :type fp: file
 655 |         :param: the pointer of the read file
 656 | 
 657 |         :type content_type: string
 658 |         :param: the object content type that supported by HTTP
 659 | 
 660 |         :type headers: dict
 661 |         :param: HTTP header
 662 | 
 663 |         Returns:
 664 |             HTTP Response
 665 |         '''
 666 |         tmp_object = object
 667 |         tmp_headers = {}
 668 |         tmp_params = {}
 669 |         if headers and isinstance(headers, dict):
 670 |             tmp_headers = headers.copy()
 671 |         if params and isinstance(params, dict):
 672 |             tmp_params = params.copy()
 673 | 
 674 |         fp.seek(os.SEEK_SET, os.SEEK_END)
 675 |         filesize = fp.tell()
 676 |         fp.seek(os.SEEK_SET)
 677 |         conn = self._open_conn_to_put_object(bucket, object, filesize, content_type, headers, params)
 678 |         totallen = 0
 679 |         l = fp.read(self.SendBufferSize)
 680 |         retry_times = 0
 681 |         while len(l) > 0:
 682 |             if retry_times > 100:
 683 |                 print "retry too many times"
 684 |                 raise
 685 |             try:
 686 |                 conn.send(l)
 687 |                 retry_times = 0
 688 |             except:
 689 |                 retry_times += 1
 690 |                 continue
 691 |             totallen += len(l)
 692 |             if self.show_bar:
 693 |                 self.view_bar(totallen, filesize)
 694 |             l = fp.read(self.SendBufferSize)
 695 |         res = conn.getresponse()
 696 |         if res.status == 301 or res.status == 302:
 697 |             self.host = helper_get_host_from_resp(res, bucket)
 698 |             return self.put_object_from_fp(bucket, tmp_object, fp, content_type, tmp_headers, tmp_params)
 699 |         return res
 700 | 
 701 |     def get_object(self, bucket, object, headers=None, params=None):
 702 |         '''
 703 |         Get object
 704 | 
 705 |         :type bucket: string
 706 |         :param
 707 | 
 708 |         :type object: string
 709 |         :param
 710 | 
 711 |         :type headers: dict
 712 |         :param: HTTP header
 713 | 
 714 |         Returns:
 715 |             HTTP Response
 716 |         '''
 717 |         method = 'GET'
 718 |         body = ''
 719 |         return self.http_request(method, bucket, object, headers, body, params)
 720 | 
 721 |     def get_object_to_file(self, bucket, object, filename, headers=None):
 722 |         '''
 723 |         Get object and write the content of object into a file
 724 | 
 725 |         :type bucket: string
 726 |         :param
 727 | 
 728 |         :type object: string
 729 |         :param
 730 | 
 731 |         :type filename: string
 732 |         :param
 733 | 
 734 |         :type headers: dict
 735 |         :param: HTTP header
 736 | 
 737 |         Returns:
 738 |             HTTP Response
 739 |         '''
 740 |         res = self.get_object(bucket, object, headers)
 741 |         totalread = 0
 742 |         if res.status / 100 == 2:
 743 |             header = {}
 744 |             header = convert_header2map(res.getheaders())
 745 |             filesize = safe_get_element("content-length", header)
 746 |             f = file(filename, 'wb')
 747 |             data = ''
 748 |             while True:
 749 |                 data = res.read(self.RecvBufferSize)
 750 |                 if data:
 751 |                     f.write(data)
 752 |                     totalread += len(data)
 753 |                     if self.show_bar:
 754 |                         self.view_bar(totalread, filesize)
 755 |                 else:
 756 |                     break
 757 |             f.close()
 758 |         # TODO: get object with flow
 759 |         return res
 760 | 
 761 |     def delete_object(self, bucket, object, headers=None):
 762 |         '''
 763 |         Delete object
 764 | 
 765 |         :type bucket: string
 766 |         :param
 767 | 
 768 |         :type object: string
 769 |         :param
 770 | 
 771 |         :type headers: dict
 772 |         :param: HTTP header
 773 | 
 774 |         Returns:
 775 |             HTTP Response
 776 |         '''
 777 |         method = 'DELETE'
 778 |         body = ''
 779 |         params = {}
 780 |         return self.http_request(method, bucket, object, headers, body, params)
 781 | 
 782 |     def head_object(self, bucket, object, headers=None):
 783 |         '''
 784 |         Head object, to get the meta message of object without the content
 785 | 
 786 |         :type bucket: string
 787 |         :param
 788 | 
 789 |         :type object: string
 790 |         :param
 791 | 
 792 |         :type headers: dict
 793 |         :param: HTTP header
 794 | 
 795 |         Returns:
 796 |             HTTP Response
 797 |         '''
 798 |         method = 'HEAD'
 799 |         body = ''
 800 |         params = {}
 801 |         return self.http_request(method, bucket, object, headers, body, params)
 802 | 
 803 |     def post_object_group(self, bucket, object, object_group_msg_xml, headers=None, params=None):
 804 |         '''
 805 |         Post object group, merge all objects in object_group_msg_xml into one object
 806 |         :type bucket: string
 807 |         :param
 808 | 
 809 |         :type object: string
 810 |         :param
 811 | 
 812 |         :type object_group_msg_xml: string
 813 |         :param: xml format string, like
 814 |                 <CreateFileGroup>
 815 |                     <Part>
 816 |                         <PartNumber>N</PartNumber>
 817 |                         <FileName>objectN</FileName>
 818 |                         <Etag>"47BCE5C74F589F4867DBD57E9CA9F808"</Etag>
 819 |                     </Part>
 820 |                 </CreateFileGroup>
 821 |         :type headers: dict
 822 |         :param: HTTP header
 823 | 
 824 |         :type params: dict
 825 |         :param: parameters
 826 | 
 827 |         Returns:
 828 |             HTTP Response
 829 |         '''
 830 |         method = 'POST'
 831 |         if not headers:
 832 |             headers = {}
 833 |         if not params:
 834 |             params = {}
 835 |         if not headers.has_key('Content-Type'):
 836 |             content_type = get_content_type_by_filename(object)
 837 |             headers['Content-Type'] = content_type
 838 |         body = object_group_msg_xml
 839 |         params['group'] = ''
 840 |         headers['Content-Length'] = str(len(body))
 841 |         return self.http_request(method, bucket, object, headers, body, params)
 842 | 
 843 |     def get_object_group_index(self, bucket, object, headers=None):
 844 |         '''
 845 |         Get object group_index
 846 | 
 847 |         :type bucket: string
 848 |         :param
 849 | 
 850 |         :type object: string
 851 |         :param
 852 | 
 853 |         :type headers: dict
 854 |         :param: HTTP header
 855 | 
 856 |         Returns:
 857 |             HTTP Response
 858 |         '''
 859 |         if not headers:
 860 |             headers = {}
 861 |         headers["x-oss-file-group"] = ''
 862 |         method = 'GET'
 863 |         body = ''
 864 |         params = {}
 865 |         return self.http_request(method, bucket, object, headers, body, params)
 866 | 
 867 |     def upload_part_from_file_given_pos(self, bucket, object, filename, offset, partsize, upload_id, part_number, headers=None, params=None):
 868 |         if not params:
 869 |             params = {}
 870 |         params['partNumber'] = part_number
 871 |         params['uploadId'] = upload_id
 872 |         content_type = ''
 873 |         return self.put_object_from_file_given_pos(bucket, object, filename, offset, partsize, content_type, headers, params)
 874 | 
 875 |     def put_object_from_file_given_pos(self, bucket, object, filename, offset, partsize, content_type='', headers=None, params=None):
 876 |         '''
 877 |         Put object into bucket, the content of object is read from given posision of filename
 878 |         :type bucket: string
 879 |         :param
 880 | 
 881 |         :type object: string
 882 |         :param
 883 | 
 884 |         :type fllename: string
 885 |         :param: the name of the read file
 886 | 
 887 |         :type offset: int
 888 |         :param: the given position of file
 889 | 
 890 |         :type partsize: int
 891 |         :param: the size of read content
 892 | 
 893 |         :type content_type: string
 894 |         :param: the object content type that supported by HTTP
 895 | 
 896 |         :type headers: dict
 897 |         :param: HTTP header
 898 | 
 899 |         Returns:
 900 |             HTTP Response
 901 |         '''
 902 |         tmp_object = object
 903 |         tmp_headers = {}
 904 |         tmp_params = {}
 905 |         if headers and isinstance(headers, dict):
 906 |             tmp_headers = headers.copy()
 907 |         if params and isinstance(params, dict):
 908 |             tmp_params = params.copy()
 909 | 
 910 |         fp = open(filename, 'rb')
 911 |         if offset > os.path.getsize(filename):
 912 |             fp.seek(os.SEEK_SET, os.SEEK_END)
 913 |         else:
 914 |             fp.seek(offset)
 915 |         if not content_type:
 916 |             content_type = get_content_type_by_filename(filename)
 917 |         conn = self._open_conn_to_put_object(bucket, object, partsize, content_type, headers, params)
 918 |         left_len = partsize
 919 |         while True:
 920 |             if left_len <= 0:
 921 |                 break
 922 |             elif left_len < self.SendBufferSize:
 923 |                 buffer_content = fp.read(left_len)
 924 |             else:
 925 |                 buffer_content = fp.read(self.SendBufferSize)
 926 | 
 927 |             if buffer_content:
 928 |                 conn.send(buffer_content)
 929 | 
 930 |             left_len = left_len - len(buffer_content)
 931 | 
 932 |         fp.close()
 933 |         res = conn.getresponse()
 934 |         if res.status == 301 or res.status == 302:
 935 |             self.host = helper_get_host_from_resp(res, bucket)
 936 |             return self.put_object_from_file_given_pos(bucket, tmp_object, filename, offset, partsize
 937 | , content_type, tmp_headers, tmp_params)
 938 |         return res
 939 | 
 940 |     def upload_large_file(self, bucket, object, filename, thread_num=10, max_part_num=1000, headers=None):
 941 |         '''
 942 |         Upload large file, the content is read from filename. The large file is splitted into many parts. It will        put the many parts into bucket and then merge all the parts into one object.
 943 | 
 944 |         :type bucket: string
 945 |         :param
 946 | 
 947 |         :type object: string
 948 |         :param
 949 | 
 950 |         :type fllename: string
 951 |         :param: the name of the read file
 952 | 
 953 |         :type thread_num: int
 954 |         :param
 955 | 
 956 |         :type max_part_num: int
 957 |         :param
 958 |         
 959 |         :type headers: dict
 960 |         :param
 961 |         
 962 |         Returns:
 963 |             HTTP Response
 964 | 
 965 |         '''
 966 |         #split the large file into 1000 parts or many parts
 967 |         #get part_msg_list
 968 |         if not headers:
 969 |             headers = {}
 970 |         if isinstance(filename, unicode):
 971 |             filename = filename.encode('utf-8')
 972 |         part_msg_list = split_large_file(filename, object, max_part_num)
 973 |         #make sure all the parts are put into same bucket
 974 |         if len(part_msg_list) < thread_num and len(part_msg_list) != 0:
 975 |             thread_num = len(part_msg_list)
 976 |         step = len(part_msg_list) / thread_num
 977 |         retry_times = self.retry_times
 978 |         while(retry_times >= 0):
 979 |             try:
 980 |                 threadpool = []
 981 |                 for i in xrange(0, thread_num):
 982 |                     if i == thread_num - 1:
 983 |                         end = len(part_msg_list)
 984 |                     else:
 985 |                         end = i * step + step
 986 |                     begin = i * step
 987 |                     oss = OssAPI(self.host, self.access_id, self.secret_access_key)
 988 |                     current = PutObjectGroupWorker(oss, bucket, filename, part_msg_list[begin:end], self.retry_times)
 989 |                     threadpool.append(current)
 990 |                     current.start()
 991 |                 for item in threadpool:
 992 |                     item.join()
 993 |                 break
 994 |             except:
 995 |                 retry_times = retry_times -1
 996 |         if -1 >= retry_times:
 997 |             print "after retry %s, failed, upload large file failed!" % retry_times
 998 |             return
 999 |         #get xml string that contains msg of object group
1000 |         object_group_msg_xml = create_object_group_msg_xml(part_msg_list)
1001 |         content_type = get_content_type_by_filename(filename)
1002 |         if isinstance(content_type, unicode):
1003 |             content_type = content_type.encode('utf-8')
1004 |         if not headers.has_key('Content-Type'):
1005 |             headers['Content-Type'] = content_type
1006 |         return self.post_object_group(bucket, object, object_group_msg_xml, headers)
1007 | 
1008 |     def copy_object(self, source_bucket, source_object, target_bucket, target_object, headers=None):
1009 |         '''
1010 |         Copy object
1011 | 
1012 |         :type source_bucket: string
1013 |         :param
1014 | 
1015 |         :type source_object: string
1016 |         :param
1017 | 
1018 |         :type target_bucket: string
1019 |         :param
1020 | 
1021 |         :type target_object: string
1022 |         :param
1023 | 
1024 |         :type headers: dict
1025 |         :param: HTTP header
1026 | 
1027 |         Returns:
1028 |             HTTP Response
1029 |         '''
1030 |         if not headers:
1031 |             headers = {}
1032 |         if isinstance(source_object, unicode):
1033 |             source_object = source_object.encode('utf-8')
1034 |         source_object = urllib.quote(source_object)
1035 |         headers['x-oss-copy-source'] = "/%s/%s" % (source_bucket, source_object)
1036 |         method = 'PUT'
1037 |         body = ''
1038 |         params = {}
1039 |         return self.http_request(method, target_bucket, target_object, headers, body, params)
1040 | 
1041 |     def init_multi_upload(self, bucket, object, headers=None, params=None):
1042 |         '''
1043 |         Init multi upload
1044 | 
1045 |         :type bucket: string
1046 |         :param
1047 | 
1048 |         :type object: string
1049 |         :param
1050 | 
1051 |         :type headers: dict
1052 |         :param: HTTP header
1053 | 
1054 |         :type params: dict
1055 |         :param: HTTP header
1056 | 
1057 |         Returns:
1058 |             HTTP Response
1059 |         '''
1060 |         if not params:
1061 |             params = {}
1062 |         method = 'POST'
1063 |         body = ''
1064 |         params['uploads'] = ''
1065 |         return self.http_request(method, bucket, object, headers, body, params)
1066 | 
1067 |     def get_all_parts(self, bucket, object, upload_id, max_parts=None, part_number_marker=None):
1068 |         '''
1069 |         List all upload parts of given upload_id
1070 |         :type bucket: string
1071 |         :param
1072 |         
1073 |         :type object: string
1074 |         :param
1075 |         
1076 |         :type upload_id: string
1077 |         :param
1078 |        
1079 |         :type max_parts: int
1080 |         :param 
1081 | 
1082 |         :type part_number_marker: string
1083 |         :param
1084 |         
1085 |         Returns:
1086 |             HTTP Response
1087 |         '''
1088 |         method = 'GET'
1089 |         headers = {}
1090 |         body = ''
1091 |         params = {}
1092 |         params['uploadId'] = upload_id
1093 |         if max_parts:
1094 |             params['max-parts'] = max_parts
1095 |         if part_number_marker:
1096 |             params['part-number-marker'] = part_number_marker
1097 |         return self.http_request(method, bucket, object, headers, body, params)
1098 | 
1099 |     def get_all_multipart_uploads(self, bucket, delimiter=None, max_uploads=None, key_marker=None, prefix=None, upload_id_marker=None, headers=None):
1100 |         '''
1101 |         List all upload_ids and their parts
1102 |         :type bucket: string
1103 |         :param
1104 | 
1105 |         :type delimiter: string
1106 |         :param
1107 | 
1108 |         :type max_uploads: string
1109 |         :param
1110 | 
1111 |         :type key_marker: string
1112 |         :param
1113 | 
1114 |         :type prefix: string
1115 |         :param
1116 | 
1117 |         :type upload_id_marker: string
1118 |         :param
1119 | 
1120 |         :type headers: dict
1121 |         :param: HTTP header
1122 | 
1123 |         Returns:
1124 |             HTTP Response
1125 |         '''
1126 |         method = 'GET'
1127 |         object = ''
1128 |         body = ''
1129 |         params = {}
1130 |         params['uploads'] = ''
1131 |         if delimiter:
1132 |             params['delimiter'] = delimiter
1133 |         if max_uploads:
1134 |             params['max-uploads'] = max_uploads
1135 |         if key_marker:
1136 |             params['key-marker'] = key_marker
1137 |         if prefix:
1138 |             params['prefix'] = prefix
1139 |         if upload_id_marker:
1140 |             params['upload-id-marker'] = upload_id_marker
1141 |         return self.http_request(method, bucket, object, headers, body, params)
1142 | 
1143 |     def upload_part(self, bucket, object, filename, upload_id, part_number, headers=None, params=None):
1144 |         '''
1145 |         Upload the content of filename as one part of given upload_id
1146 |         
1147 |         :type bucket: string
1148 |         :param
1149 | 
1150 |         :type object: string
1151 |         :param
1152 | 
1153 |         :type filename: string
1154 |         :param
1155 | 
1156 |         :type upload_id: string
1157 |         :param
1158 | 
1159 |         :type part_number: int 
1160 |         :param
1161 | 
1162 |         :type headers: dict
1163 |         :param: HTTP header
1164 | 
1165 |         :type params: dict
1166 |         :param: HTTP header
1167 | 
1168 |         Returns:
1169 |             HTTP Response
1170 |         '''
1171 |         if not params:
1172 |             params = {}
1173 |         params['partNumber'] = part_number
1174 |         params['uploadId'] = upload_id
1175 |         content_type = ''
1176 |         return self.put_object_from_file(bucket, object, filename, content_type, headers, params)
1177 | 
1178 |     def upload_part_from_string(self, bucket, object, data, upload_id, part_number, headers=None, params=None):
1179 |         '''
1180 |         Upload the content of string as one part of given upload_id
1181 |         
1182 |         :type bucket: string
1183 |         :param
1184 | 
1185 |         :type object: string
1186 |         :param
1187 | 
1188 |         :type data: string
1189 |         :param
1190 | 
1191 |         :type upload_id: string
1192 |         :param
1193 | 
1194 |         :type part_number: int 
1195 |         :param
1196 | 
1197 |         :type headers: dict
1198 |         :param: HTTP header
1199 | 
1200 |         :type params: dict
1201 |         :param: HTTP header
1202 | 
1203 |         Returns:
1204 |             HTTP Response
1205 |         '''
1206 |         if not params:
1207 |             params = {}
1208 |         params['partNumber'] = part_number
1209 |         params['uploadId'] = upload_id
1210 |         content_type = ''
1211 |         fp = StringIO.StringIO(data)
1212 |         return self.put_object_from_fp(bucket, object, fp, content_type, headers, params)
1213 | 
1214 |     def complete_upload(self, bucket, object, upload_id, part_msg_xml, headers=None, params=None):
1215 |         '''
1216 |         Finish multiupload and merge all the parts in part_msg_xml as a object.
1217 | 
1218 |         :type bucket: string
1219 |         :param
1220 | 
1221 |         :type object: string
1222 |         :param
1223 |         
1224 |         :type upload_id: string
1225 |         :param
1226 | 
1227 |         :type part_msg_xml: string
1228 |         :param
1229 | 
1230 |         :type headers: dict
1231 |         :param: HTTP header
1232 | 
1233 |         :type params: dict
1234 |         :param: HTTP header
1235 | 
1236 |         Returns:
1237 |             HTTP Response
1238 |         '''
1239 |         if not headers:
1240 |             headers = {}
1241 |         if not params:
1242 |             params = {}
1243 |         method = 'POST'
1244 |         body = part_msg_xml
1245 |         headers['Content-Length'] = str(len(body))
1246 |         params['uploadId'] = upload_id
1247 |         if not headers.has_key('Content-Type'):
1248 |             content_type = get_content_type_by_filename(object)
1249 |             headers['Content-Type'] = content_type
1250 |         return self.http_request(method, bucket, object, headers, body, params)
1251 | 
1252 |     def cancel_upload(self, bucket, object, upload_id, headers=None, params=None):
1253 |         '''
1254 |         Cancel multiupload and delete all parts of given upload_id
1255 |         :type bucket: string
1256 |         :param
1257 | 
1258 |         :type object: string
1259 |         :param
1260 | 
1261 |         :type upload_id: string
1262 |         :param
1263 | 
1264 |         :type headers: dict
1265 |         :param: HTTP header
1266 | 
1267 |         :type params: dict
1268 |         :param: HTTP header
1269 | 
1270 |         Returns:
1271 |             HTTP Response
1272 |         '''
1273 |         if not params:
1274 |             params = {}
1275 |         method = 'DELETE'
1276 |         if isinstance(upload_id, unicode):
1277 |             upload_id = upload_id.encode('utf-8')
1278 |         params['uploadId'] = upload_id
1279 |         body = ''
1280 |         return self.http_request(method, bucket, object, headers, body, params)
1281 | 
1282 |     def multi_upload_file(self, bucket, object, filename, upload_id='', thread_num=10, max_part_num=10000, headers=None, params=None):
1283 |         '''
1284 |         Upload large file, the content is read from filename. The large file is splitted into many parts. It will        put the many parts into bucket and then merge all the parts into one object.
1285 | 
1286 |         :type bucket: string
1287 |         :param
1288 | 
1289 |         :type object: string
1290 |         :param
1291 | 
1292 |         :type fllename: string
1293 |         :param: the name of the read file
1294 | 
1295 |         :type upload_id: string
1296 |         :param
1297 |     
1298 |         :type thread_num: int
1299 |         :param
1300 | 
1301 |         :type max_part_num: int
1302 |         :param
1303 | 
1304 |         :type headers: dict
1305 |         :param
1306 | 
1307 |         :type params: dict
1308 |         :param
1309 | 
1310 |         Returns:
1311 |             HTTP Response
1312 |         '''
1313 |         #get init upload_id
1314 |         if not upload_id:
1315 |             res = self.init_multi_upload(bucket, object, headers, params)
1316 |             body = res.read()
1317 |             if res.status == 200:
1318 |                 h = GetInitUploadIdXml(body)
1319 |                 upload_id = h.upload_id
1320 |             else:
1321 |                 err = ErrorXml(body)
1322 |                 raise Exception("%s, %s" %(res.status, err.msg))
1323 |         if not upload_id:
1324 |             raise Exception("-1, Cannot get upload id.")
1325 |         #split the large file into 1000 parts or many parts
1326 |         #get part_msg_list
1327 |         if isinstance(filename, unicode):
1328 |             filename = filename.encode('utf-8')
1329 |         part_msg_list = split_large_file(filename, object, max_part_num)
1330 |         logger = getlogger(self.debug)
1331 |         logger.info("bucket:%s, object:%s, upload_id is: %s, split_number:%d" % (bucket, object, upload_id, len(part_msg_list)))
1332 |         
1333 |         #make sure all the parts are put into same bucket
1334 |         if len(part_msg_list) < thread_num and len(part_msg_list) != 0:
1335 |             thread_num = len(part_msg_list)
1336 |         step = len(part_msg_list) / thread_num
1337 | 
1338 |         #list part to get a map
1339 |         upload_retry_times = self.retry_times
1340 |         while(upload_retry_times >= 0):
1341 |             uploaded_part_map = {}
1342 |             oss = OssAPI(self.host, self.access_id, self.secret_access_key)
1343 |             uploaded_part_map = get_part_map(oss, bucket, object, upload_id)
1344 |             retry_times = self.retry_times
1345 |             while(retry_times >= 0):
1346 |                 threadpool = []
1347 |                 try:
1348 |                     for i in xrange(0, thread_num):
1349 |                         if i == thread_num - 1:
1350 |                             end = len(part_msg_list)
1351 |                         else:
1352 |                             end = i * step + step
1353 |                         begin = i * step
1354 |                         oss = OssAPI(self.host, self.access_id, self.secret_access_key)
1355 |                         current = UploadPartWorker(oss, bucket, object, upload_id, filename, part_msg_list[begin:end], uploaded_part_map, self.retry_times)
1356 |                         threadpool.append(current)
1357 |                         current.start()
1358 |                     for item in threadpool:
1359 |                         item.join()
1360 |                     break
1361 |                 except:
1362 |                     retry_times -= 1
1363 |             if -1 >= retry_times:
1364 |                 raise Exception("-2, after retry %s, failed, multi upload part failed! upload_id:%s" % (self.retry_times, upload_id))
1365 |             #get xml string that contains msg of part
1366 |             part_msg_xml = create_part_xml(part_msg_list)
1367 |             #complete upload
1368 |             res = self.complete_upload(bucket, object, upload_id, part_msg_xml, headers, params)
1369 |             if res.status == 200:
1370 |                 break
1371 |             upload_retry_times -= 1
1372 |         if upload_retry_times < 0:
1373 |             raise Exception("-3, after retry %s, failed, multi upload file failed! upload_id:%s" % (self.retry_times, upload_id))
1374 |         return res
1375 | 
1376 |     def delete_objects(self, bucket, object_list=None, headers=None, params=None):
1377 |         '''
1378 |         Batch delete objects
1379 |         :type bucket: string
1380 |         :param:
1381 | 
1382 |         :type object_list: list
1383 |         :param:
1384 | 
1385 |         :type headers: dict
1386 |         :param: HTTP header
1387 | 
1388 |         :type params: dict
1389 |         :param: the parameters that put in the url address as query string
1390 | 
1391 |         Returns:
1392 |             HTTP Response
1393 |         '''
1394 |         if not object_list:
1395 |             object_list = []
1396 |         object_list_xml = create_delete_object_msg_xml(object_list)
1397 |         return self.batch_delete_object(bucket, object_list_xml, headers, params)
1398 | 
1399 |     def batch_delete_object(self, bucket, object_list_xml, headers=None, params=None):
1400 |         '''
1401 |         Delete the objects in object_list_xml
1402 |         :type bucket: string
1403 |         :param:
1404 | 
1405 |         :type object_list_xml: string
1406 |         :param:
1407 | 
1408 |         :type headers: dict
1409 |         :param: HTTP header
1410 | 
1411 |         :type params: dict
1412 |         :param: the parameters that put in the url address as query string
1413 | 
1414 |         Returns:
1415 |             HTTP Response
1416 |         '''
1417 |         if not headers:
1418 |             headers = {}
1419 |         if not params:
1420 |             params = {}
1421 |         method = 'POST'
1422 |         object = ''
1423 |         body = object_list_xml
1424 |         headers['Content-Length'] = str(len(body))
1425 |         params['delete'] = ''
1426 |         base64md5 = base64.encodestring(md5.new(body).digest())
1427 |         if base64md5[-1] == '\n':
1428 |             base64md5 = base64md5[0:-1]
1429 |         headers['Content-MD5'] = base64md5
1430 |         return self.http_request(method, bucket, object, headers, body, params)
1431 | 
1432 |     def list_objects(self, bucket, prefix=''):
1433 |         '''
1434 |         :type bucket: string
1435 |         :param:
1436 | 
1437 |         :type prefix: string
1438 |         :param:
1439 | 
1440 |         Returns:
1441 |             a list that contains the objects in bucket with prefix
1442 |         '''
1443 |         get_instance = GetAllObjects()
1444 |         marker_input = ''
1445 |         object_list = []
1446 |         oss = OssAPI(self.host, self.access_id, self.secret_access_key)
1447 |         (object_list, marker_output) = get_instance.get_object_in_bucket(oss, bucket, marker_input, prefix)
1448 |         return object_list
1449 | 
1450 |     def batch_delete_objects(self, bucket, object_list=None):
1451 |         '''
1452 |         :type bucket: string
1453 |         :param:
1454 | 
1455 |         :type object_list: object name list
1456 |         :param:
1457 | 
1458 |         Returns:
1459 |             True or False
1460 |         '''
1461 |         if not object_list:
1462 |             object_list = []
1463 |         object_list_xml = create_delete_object_msg_xml(object_list)
1464 |         try:
1465 |             res = self.batch_delete_object(bucket, object_list_xml)
1466 |             if res.status / 100 == 2:
1467 |                 return True
1468 |         except:
1469 |             pass
1470 |         return False
1471 | 
1472 |     def get_object_info(self, bucket, object, headers=None, params=None):
1473 |         ''' 
1474 |         Get object information
1475 |         :type bucket: string
1476 |         :param:
1477 | 
1478 |         :type object: string
1479 |         :param:
1480 | 
1481 |         :type headers: dict
1482 |         :param: HTTP header
1483 | 
1484 |         :type params: dict
1485 |         :param: the parameters that put in the url address as query string
1486 | 
1487 |         Returns:
1488 |             HTTP Response
1489 |         '''
1490 |         if not headers:
1491 |             headers = {}
1492 |         if not params:
1493 |             params = {}
1494 |         method = 'GET'
1495 |         body = ''
1496 |         params['objectInfo'] = ''
1497 |         return self.http_request(method, bucket, object, headers, body, params)
1498 | 


--------------------------------------------------------------------------------
/ossync/sdk/oss_sample.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding=utf8
  3 | import time
  4 | try:
  5 |     from oss.oss_api import *
  6 | except:
  7 |     from oss_api import *
  8 | try:
  9 |     from oss.oss_xml_handler import *
 10 | except:
 11 |     from oss_xml_handler import *
 12 | HOST = "oss.aliyuncs.com"
 13 | ACCESS_ID = ""
 14 | SECRET_ACCESS_KEY = ""
 15 | #ACCESS_ID and SECRET_ACCESS_KEY 默认是空，请填入您申请的正确的ID和KEY.
 16 |    
 17 | if __name__ == "__main__": 
 18 |     #初始化
 19 |     if len(ACCESS_ID) == 0 or len(SECRET_ACCESS_KEY) == 0:
 20 |         print "Please make sure ACCESS_ID and SECRET_ACCESS_KEY are correct in ", __file__ , ", init are empty!"
 21 |         exit(0) 
 22 |     oss = OssAPI(HOST, ACCESS_ID, SECRET_ACCESS_KEY)
 23 |     sep = "=============================="
 24 |    
 25 |     #对特定的URL签名，默认URL过期时间为60秒
 26 |     method = "GET"
 27 |     bucket = "test" + time.strftime("%Y-%b-%d%H-%M-%S").lower()
 28 |     object = "test_object" 
 29 |     url = "http://" + HOST + "/oss/" + bucket + "/" + object
 30 |     headers = {}
 31 |     resource = "/" + bucket + "/" + object
 32 | 
 33 |     timeout = 60
 34 |     url_with_auth = oss.sign_url_auth_with_expire_time(method, url, headers, resource, timeout)
 35 |     print "after signature url is: ", url_with_auth
 36 |     print sep
 37 |     #创建属于自己的bucket
 38 |     acl = 'private'
 39 |     headers = {}
 40 |     res = oss.put_bucket(bucket, acl, headers)
 41 |     if (res.status / 100) == 2:
 42 |         print "put bucket ", bucket, "OK"
 43 |     else:
 44 |         print "put bucket ", bucket, "ERROR"
 45 |     print sep
 46 | 
 47 |     #列出创建的bucket
 48 |     res = oss.get_service()
 49 |     if (res.status / 100) == 2:
 50 |         body = res.read()
 51 |         h = GetServiceXml(body)
 52 |         print "bucket list size is: ", len(h.list())
 53 |         print "bucket list is: "
 54 |         for i in h.list():
 55 |             print i
 56 |     else:
 57 |         print res.status
 58 |     print sep
 59 | 
 60 |     #把指定的字符串内容上传到bucket中,在bucket中的文件名叫object。
 61 |     object = "object_test"
 62 |     input_content = "hello, OSS"
 63 |     content_type = "text/HTML"
 64 |     headers = {}
 65 |     res = oss.put_object_from_string(bucket, object, input_content, content_type, headers)
 66 |     if (res.status / 100) == 2:
 67 |         print "put_object_from_string OK"
 68 |     else:
 69 |         print "put_object_from_string ERROR"
 70 |     print sep
 71 |     
 72 |     #指定文件名, 把这个文件上传到bucket中,在bucket中的文件名叫object。
 73 |     object = "object_test"
 74 |     filename = __file__ 
 75 |     content_type = "text/HTML"
 76 |     headers = {}
 77 |     res = oss.put_object_from_file(bucket, object, filename, content_type, headers)
 78 |     if (res.status / 100) == 2:
 79 |         print "put_object_from_file OK"
 80 |     else:
 81 |         print "put_object_from_file ERROR"
 82 |     print sep
 83 |  
 84 |     #指定文件名, 把这个文件上传到bucket中,在bucket中的文件名叫object。
 85 |     object = "object_test"
 86 |     filename = __file__
 87 |     content_type = "text/HTML"
 88 |     headers = {}
 89 | 
 90 |     fp = open(filename, 'rb')
 91 |     res = oss.put_object_from_fp(bucket, object, fp, content_type, headers)
 92 |     fp.close()
 93 |     if (res.status / 100) == 2:
 94 |         print "put_object_from_fp OK"
 95 |     else:
 96 |         print "put_object_from_fp ERROR"
 97 |     print sep
 98 | 
 99 |     #下载bucket中的object，内容在body中
100 |     object = "object_test"
101 |     headers = {}
102 | 
103 |     res = oss.get_object(bucket, object, headers)
104 |     if (res.status / 100) == 2:
105 |         print "get_object OK"
106 |     else:
107 |         print "get_object ERROR"
108 |     print sep
109 | 
110 |     #下载bucket中的object，把内容写入到本地文件中
111 |     object = "object_test"
112 |     headers = {}
113 |     filename = "get_object_test_file"
114 | 
115 |     res = oss.get_object_to_file(bucket, object, filename, headers)
116 |     if (res.status / 100) == 2:
117 |         print "get_object_to_file OK"
118 |     else:
119 |         print "get_object_to_file ERROR"
120 |     print sep
121 | 
122 |     #查看object的meta 信息，例如长度，类型等
123 |     object = "object_test"
124 |     headers = {}
125 |     res = oss.head_object(bucket, object, headers)
126 |     if (res.status / 100) == 2:
127 |          print "head_object OK"
128 |          header_map = convert_header2map(res.getheaders())
129 |          content_len = safe_get_element("content-length", header_map)
130 |          etag = safe_get_element("etag", header_map).upper()
131 |          print "content length is:", content_len
132 |          print "ETag is: ", etag
133 | 
134 |     else:
135 |         print "head_object ERROR"
136 |     print sep
137 |     
138 |     #查看bucket中所拥有的权限
139 |     res = oss.get_bucket_acl(bucket)
140 |     if (res.status / 100) == 2:
141 |         body = res.read()
142 |         h = GetBucketAclXml(body)
143 |         print "bucket acl is:", h.grant 
144 |     else:
145 |         print "get bucket acl ERROR"
146 |     print sep
147 | 
148 |     #列出bucket中所拥有的object
149 |     prefix = ""
150 |     marker = ""
151 |     delimiter = "/"
152 |     maxkeys = "100"
153 |     headers = {}
154 |     res = oss.get_bucket(bucket, prefix, marker, delimiter, maxkeys, headers)
155 |     if (res.status / 100) == 2:
156 |         body = res.read()
157 |         h = GetBucketXml(body)
158 |         (file_list, common_list) = h.list()
159 |         print "object list is:"
160 |         for i in file_list:
161 |             print i
162 |         print "common list is:"
163 |         for i in common_list:
164 |             print i
165 |     print sep
166 |  
167 |     #以object group的形式上传大文件，object group的相关概念参考官方API文档
168 |     res = oss.upload_large_file(bucket, object, __file__)    
169 |     if (res.status / 100) == 2:
170 |         print "upload_large_file OK"
171 |     else:
172 |         print "upload_large_file ERROR"
173 | 
174 |     print sep
175 | 
176 |     #得到object group中所拥有的object
177 |     res = oss.get_object_group_index(bucket, object)
178 |     if (res.status / 100) == 2:
179 |         print "get_object_group_index OK"
180 |         body = res.read()
181 |         h = GetObjectGroupIndexXml(body)
182 |         for i in h.list():
183 |             print "object group part msg:", i
184 |     else:
185 |         print "get_object_group_index ERROR"
186 | 
187 |     res = oss.get_object_group_index(bucket, object)
188 |     if res.status == 200:
189 |         body = res.read()
190 |         h = GetObjectGroupIndexXml(body)
191 |         object_group_index = h.list()
192 |         for i in object_group_index:
193 |             if len(i) == 4 and len(i[1]) > 0:
194 |                 part_name = i[1].strip()
195 |                 res = oss.delete_object(bucket, part_name)
196 |                 if res.status != 204:
197 |                     print "delete part ", part_name, " in bucket:", bucket, " failed!"
198 |                 else:
199 |                     print "delete part ", part_name, " in bucket:", bucket, " ok"
200 |     print sep
201 |     #multi part upload相关操作
202 |     #get a upload id
203 |     upload_id = ""
204 |     res = oss.init_multi_upload(bucket, object, headers)
205 |     if res.status == 200:
206 |         body = res.read()
207 |         h = GetInitUploadIdXml(body)
208 |         upload_id = h.upload_id
209 | 
210 |     if len(upload_id) == 0:
211 |         print "init upload failed!"
212 |     else:
213 |         print "init upload OK!"
214 |         print "upload id is: %s" % upload_id
215 | 
216 |     #upload a part
217 |     data = "this is test content string."
218 |     part_number = "1" 
219 |     res = oss.upload_part_from_string(bucket, object, data, upload_id, part_number)
220 |     if (res.status / 100) == 2:
221 |         print "upload part OK"
222 |     else:
223 |         print "upload part ERROR"
224 | 
225 |     #complete upload
226 |     part_msg_xml = get_part_xml(oss, bucket, object, upload_id)
227 |     res = oss.complete_upload(bucket, object, upload_id, part_msg_xml)
228 |     if (res.status / 100) == 2:
229 |         print "complete upload OK"
230 |     else:
231 |         print "complete upload ERROR"
232 | 
233 |     res = oss.get_object(bucket, object)
234 |     if (res.status / 100) == 2 and res.read() == data:
235 |         print "verify upload OK"
236 |     else:
237 |         print "verify upload ERROR"
238 |      
239 |     print sep
240 | 
241 |     
242 |     #删除bucket中的object
243 |     object = "object_test"
244 |     headers = {}
245 |     res = oss.delete_object(bucket, object, headers)
246 |     if (res.status / 100) == 2:
247 |         print "delete_object OK"
248 |     else:
249 |         print "delete_object ERROR"
250 |     print sep
251 | 
252 |     #删除bucket
253 |     res = oss.delete_bucket(bucket)
254 |     if (res.status / 100) == 2:
255 |         print "delete bucket ", bucket, "OK"
256 |     else:
257 |         print "delete bucket ", bucket, "ERROR"
258 | 
259 |     print sep
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/ossync/sdk/oss_util.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | #coding=utf-8
   3 | import urllib
   4 | import base64
   5 | import hmac
   6 | import time
   7 | from hashlib import sha1 as sha
   8 | import os
   9 | import sys
  10 | import md5
  11 | import StringIO
  12 | from threading import Thread
  13 | import threading
  14 | import ConfigParser
  15 | import logging
  16 | from logging.handlers import RotatingFileHandler
  17 | from xml.sax.saxutils import escape
  18 | try:
  19 |     from oss.oss_xml_handler import *
  20 | except:
  21 |     from oss_xml_handler import *
  22 | 
  23 | #LOG_LEVEL can be one of DEBUG INFO ERROR CRITICAL WARNNING
  24 | DEBUG = False 
  25 | LOG_LEVEL = "DEBUG" 
  26 | PROVIDER = "OSS"
  27 | SELF_DEFINE_HEADER_PREFIX = "x-oss-"
  28 | if "AWS" == PROVIDER:
  29 |     SELF_DEFINE_HEADER_PREFIX = "x-amz-"
  30 | 
  31 | def getlogger(debug=DEBUG, log_level=LOG_LEVEL, log_name="log.txt"):
  32 |     if not debug:
  33 |         logger = logging.getLogger('oss')
  34 |         logger.addHandler(EmptyHandler())
  35 |         return logger
  36 |     logfile = os.path.join(os.getcwd(), log_name)
  37 |     max_log_size = 100*1024*1024 #Bytes
  38 |     backup_count = 5
  39 |     format = \
  40 |     "%(asctime)s %(levelname)-8s[%(filename)s:%(lineno)d(%(funcName)s)] %(message)s"
  41 |     hdlr = RotatingFileHandler(logfile,
  42 |                                   mode='a',
  43 |                                   maxBytes=max_log_size,
  44 |                                   backupCount=backup_count)
  45 |     formatter = logging.Formatter(format)
  46 |     hdlr.setFormatter(formatter)
  47 |     logger = logging.getLogger("oss")
  48 |     logger.addHandler(hdlr)
  49 |     if "DEBUG" == log_level.upper():
  50 |         logger.setLevel(logging.DEBUG)
  51 |     elif "INFO" == log_level.upper():
  52 |         logger.setLevel(logging.INFO)
  53 |     elif "WARNING" == log_level.upper():
  54 |         logger.setLevel(logging.WARNING)
  55 |     elif "ERROR" == log_level.upper():
  56 |         logger.setLevel(logging.ERROR)
  57 |     elif "CRITICAL" == log_level.upper():
  58 |         logger.setLevel(logging.CRITICAL)
  59 |     else:
  60 |         logger.setLevel(logging.ERROR)
  61 |     return logger
  62 | 
  63 | class EmptyHandler(logging.Handler):
  64 |     pass
  65 | 
  66 | def helper_get_host_from_resp(res, bucket):
  67 |     host = helper_get_host_from_headers(res.getheaders(), bucket)
  68 |     if not host:
  69 |         xml = res.read()
  70 |         host = RedirectXml(xml).Endpoint().strip()
  71 |         host = helper_get_host_from_endpoint(host, bucket)
  72 |     return host
  73 | 
  74 | def helper_get_host_from_headers(headers, bucket):
  75 |     mp = convert_header2map(headers)
  76 |     location = safe_get_element('location', mp).strip()
  77 |     #https://bucket.oss.aliyuncs.com or http://oss.aliyuncs.com/bucket
  78 |     location = location.replace("https://", "").replace("http://", "")
  79 |     if location.startswith("%s." % bucket):
  80 |         location = location[len(bucket)+1:]
  81 |     index = location.find('/')
  82 |     if index == -1:
  83 |         return location
  84 |     return location[:index]
  85 | 
  86 | def helper_get_host_from_endpoint(host, bucket):
  87 |     index = host.find('/')
  88 |     if index != -1:
  89 |         host = host[:index]
  90 |     index = host.find('\\')
  91 |     if index != -1:
  92 |         host = host[:index]
  93 |     index = host.find(bucket)
  94 |     if index == 0:
  95 |         host = host[len(bucket)+1:]
  96 |     return host
  97 | 
  98 | def check_bucket_valid(bucket):
  99 |     alphabeta = "abcdefghijklmnopqrstuvwxyz0123456789-"
 100 |     if len(bucket) < 3 or len(bucket) > 63:
 101 |         return False
 102 |     if bucket[-1] == "-" or bucket[-1] == "_":
 103 |         return False
 104 |     if not ((bucket[0] >= 'a' and bucket[0] <= 'z') or (bucket[0] >= '0' and bucket[0] <= '9')):
 105 |         return False
 106 |     for i in bucket:
 107 |         if not i in alphabeta:
 108 |             return False
 109 |     return True
 110 | 
 111 | ########## function for Authorization ##########
 112 | def _format_header(headers=None):
 113 |     '''
 114 |     format the headers that self define
 115 |     convert the self define headers to lower.
 116 |     '''
 117 |     if not headers:
 118 |         headers = {}
 119 |     tmp_headers = {}
 120 |     for k in headers.keys():
 121 |         if isinstance(headers[k], unicode):
 122 |             headers[k] = headers[k].encode('utf-8')
 123 | 
 124 |         if k.lower().startswith(SELF_DEFINE_HEADER_PREFIX):
 125 |             k_lower = k.lower()
 126 |             tmp_headers[k_lower] = headers[k]
 127 |         else:
 128 |             tmp_headers[k] = headers[k]
 129 |     return tmp_headers
 130 | 
 131 | def get_assign(secret_access_key, method, headers=None, resource="/", result=None, debug=DEBUG):
 132 |     '''
 133 |     Create the authorization for OSS based on header input.
 134 |     You should put it into "Authorization" parameter of header.
 135 |     '''
 136 |     if not headers:
 137 |         headers = {}
 138 |     if not result:
 139 |         result = []
 140 |     content_md5 = ""
 141 |     content_type = ""
 142 |     date = ""
 143 |     canonicalized_oss_headers = ""
 144 |     logger = getlogger(debug)
 145 |     logger.debug("secret_access_key: %s" % secret_access_key)
 146 |     content_md5 = safe_get_element('Content-MD5', headers)
 147 |     content_type = safe_get_element('Content-Type', headers)
 148 |     date = safe_get_element('Date', headers)
 149 |     canonicalized_resource = resource
 150 |     tmp_headers = _format_header(headers)
 151 |     if len(tmp_headers) > 0:
 152 |         x_header_list = tmp_headers.keys()
 153 |         x_header_list.sort()
 154 |         for k in x_header_list:
 155 |             if k.startswith(SELF_DEFINE_HEADER_PREFIX):
 156 |                 canonicalized_oss_headers += "%s:%s\n" % (k, tmp_headers[k]) 
 157 |     string_to_sign = method + "\n" + content_md5.strip() + "\n" + content_type + "\n" + date + "\n" + canonicalized_oss_headers + canonicalized_resource
 158 |     result.append(string_to_sign)
 159 |     logger.debug("method:%s\n content_md5:%s\n content_type:%s\n data:%s\n canonicalized_oss_headers:%s\n canonicalized_resource:%s\n" % (method, content_md5, content_type, date, canonicalized_oss_headers, canonicalized_resource))
 160 |     logger.debug("string_to_sign:%s\n \nlength of string_to_sign:%d\n" % (string_to_sign, len(string_to_sign)))
 161 |     h = hmac.new(secret_access_key, string_to_sign, sha)
 162 |     sign_result = base64.encodestring(h.digest()).strip()
 163 |     logger.debug("sign result:%s" % sign_result)
 164 |     return sign_result
 165 | 
 166 | def get_resource(params=None):
 167 |     if not params:
 168 |         return ""
 169 |     tmp_headers = {}
 170 |     for k, v in params.items():
 171 |         tmp_k = k.lower().strip()
 172 |         tmp_headers[tmp_k] = v
 173 |     override_response_list = ['response-content-type', 'response-content-language', \
 174 |                               'response-cache-control', 'logging', 'response-content-encoding', \
 175 |                               'acl', 'uploadId', 'uploads', 'partNumber', 'group', \
 176 |                               'delete', 'website', 'location', 'objectInfo', \
 177 |                               'response-expires', 'response-content-disposition']
 178 |     override_response_list.sort()
 179 |     resource = ""
 180 |     separator = "?"
 181 |     for i in override_response_list:
 182 |         if tmp_headers.has_key(i.lower()):
 183 |             resource += separator
 184 |             resource += i
 185 |             tmp_key = str(tmp_headers[i.lower()])
 186 |             if len(tmp_key) != 0:
 187 |                 resource += "="
 188 |                 resource += tmp_key 
 189 |             separator = '&'
 190 |     return resource
 191 | 
 192 | def append_param(url, params):
 193 |     '''
 194 |     convert the parameters to query string of URI.
 195 |     '''
 196 |     l = []
 197 |     for k, v in params.items():
 198 |         k = k.replace('_', '-')
 199 |         if  k == 'maxkeys':
 200 |             k = 'max-keys'
 201 |         if isinstance(v, unicode):
 202 |             v = v.encode('utf-8')
 203 |         if v is not None and v != '':
 204 |             l.append('%s=%s' % (urllib.quote(k), urllib.quote(str(v))))
 205 |         elif k == 'acl':
 206 |             l.append('%s' % (urllib.quote(k)))
 207 |         elif v is None or v == '':
 208 |             l.append('%s' % (urllib.quote(k)))
 209 |     if len(l):
 210 |         url = url + '?' + '&'.join(l)
 211 |     return url
 212 | 
 213 | ############### Construct XML ###############
 214 | def create_object_group_msg_xml(part_msg_list=None):
 215 |     '''
 216 |     get information from part_msg_list and covert it to xml.
 217 |     part_msg_list has special format.
 218 |     '''
 219 |     if not part_msg_list:
 220 |         part_msg_list = []
 221 |     xml_string = r'<CreateFileGroup>'
 222 |     for part in part_msg_list:
 223 |         if len(part) >= 3:
 224 |             if isinstance(part[1], unicode):
 225 |                 file_path = part[1].encode('utf-8')
 226 |             else:
 227 |                 file_path = part[1]
 228 |             file_path = escape(file_path)
 229 |             xml_string += r'<Part>'
 230 |             xml_string += r'<PartNumber>' + str(part[0]) + r'</PartNumber>'
 231 |             xml_string += r'<PartName>' + str(file_path) + r'</PartName>'
 232 |             xml_string += r'<ETag>"' + str(part[2]).upper() + r'"</ETag>'
 233 |             xml_string += r'</Part>'
 234 |         else:
 235 |             print "the ", part, " in part_msg_list is not as expected!"
 236 |             return ""
 237 |     xml_string += r'</CreateFileGroup>'
 238 | 
 239 |     return xml_string
 240 | 
 241 | def create_part_xml(part_msg_list=None):
 242 |     '''
 243 |     get information from part_msg_list and covert it to xml.
 244 |     part_msg_list has special format.
 245 |     '''
 246 |     if not part_msg_list:
 247 |         part_msg_list = []
 248 |     xml_string = r'<CompleteMultipartUpload>'
 249 |     for part in part_msg_list:
 250 |         if len(part) >= 3:
 251 |             xml_string += r'<Part>'
 252 |             xml_string += r'<PartNumber>' + str(part[0]) + r'</PartNumber>'
 253 |             xml_string += r'<ETag>"' + str(part[2]).upper() + r'"</ETag>'
 254 |             xml_string += r'</Part>'
 255 |         else:
 256 |             print "the ", part, " in part_msg_list is not as expected!"
 257 |             return ""
 258 |     xml_string += r'</CompleteMultipartUpload>'
 259 | 
 260 |     return xml_string
 261 | 
 262 | def create_delete_object_msg_xml(object_list=None, is_quiet=False, is_defult=False):
 263 |     '''
 264 |     covert object name list to xml.
 265 |     '''
 266 |     if not object_list:
 267 |         object_list = []
 268 |     xml_string = r'<Delete>'
 269 |     if not is_defult:
 270 |         if is_quiet:
 271 |             xml_string += r'<Quiet>true</Quiet>'
 272 |         else:
 273 |             xml_string += r'<Quiet>false</Quiet>'
 274 |     for object in object_list:
 275 |         key = object.strip()
 276 |         if isinstance(object, unicode):
 277 |             key = object.encode('utf-8')
 278 |         key = escape(key)
 279 |         xml_string += r'<Object><Key>%s</Key></Object>' % key
 280 |     xml_string += r'</Delete>'
 281 |     return xml_string
 282 | 
 283 | ############### operate OSS ###############
 284 | def clear_all_object_of_bucket(oss_instance, bucket):
 285 |     '''
 286 |     clean all objects in bucket, after that, it will delete this bucket.
 287 |     '''
 288 |     return clear_all_objects_in_bucket(oss_instance, bucket)
 289 | 
 290 | def clear_all_objects_in_bucket(oss_instance, bucket, delete_marker="", delete_upload_id_marker="", debug=False):
 291 |     '''
 292 |     it will clean all objects in bucket, after that, it will delete this bucket.
 293 | 
 294 |     example:
 295 |     from oss_api import *
 296 |     host = ""
 297 |     id = ""
 298 |     key = ""
 299 |     oss_instance = OssAPI(host, id, key)
 300 |     bucket = "leopublicreadprivatewrite"
 301 |     if clear_all_objects_in_bucket(oss_instance, bucket):
 302 |         pass
 303 |     else:
 304 |         print "clean Fail"
 305 |     '''
 306 |     prefix = ""
 307 |     delimiter = ""
 308 |     maxkeys = 1000
 309 |     delete_all_objects(oss_instance, bucket, prefix, delimiter, delete_marker, maxkeys, debug)
 310 |     delete_all_parts(oss_instance, bucket, delete_marker, delete_upload_id_marker, debug)
 311 |     res = oss_instance.delete_bucket(bucket)
 312 |     if (res.status / 100 != 2 and res.status != 404):
 313 |         print "clear_all_objects_in_bucket: delete bucket:%s fail, ret:%s, request id:%s" % (bucket, res.status, res.getheader("x-oss-request-id"))
 314 |         return False
 315 |     return True
 316 | 
 317 | def delete_all_objects(oss_instance, bucket, prefix="", delimiter="", delete_marker="", maxkeys=1000, debug=False):
 318 |     marker = delete_marker
 319 |     delete_obj_num = 0
 320 |     while 1:
 321 |         object_list = []
 322 |         res = oss_instance.get_bucket(bucket, prefix, marker, delimiter, maxkeys)
 323 |         if res.status != 200:
 324 |             break
 325 |         body = res.read()
 326 |         (tmp_object_list, marker) = get_object_list_marker_from_xml(body)
 327 |         for item in tmp_object_list:
 328 |             object_list.append(item[0])
 329 | 
 330 |         if object_list:
 331 |             object_list_xml = create_delete_object_msg_xml(object_list)
 332 |             res = oss_instance.batch_delete_object(bucket, object_list_xml)
 333 |             if res.status/100 != 2:
 334 |                 if marker:
 335 |                     print "delete_all_objects: batch delete objects in bucket:%s fail, ret:%s, request id:%s, first object:%s, marker:%s" % (bucket, res.status, res.getheader("x-oss-request-id"), object_list[0], marker)
 336 |                 else:
 337 |                     print "delete_all_objects: batch delete objects in bucket:%s fail, ret:%s, request id:%s, first object:%s" % (bucket, res.status, res.getheader("x-oss-request-id"), object_list[0])
 338 |             else:
 339 |                 if debug:
 340 |                     delete_obj_num += len(object_list)
 341 |                     if marker:
 342 |                         print "delete_all_objects: Now %s objects deleted, marker:%s" % (delete_obj_num, marker)
 343 |                     else:
 344 |                         print "delete_all_objects: Now %s objects deleted" % (delete_obj_num)
 345 |         if len(marker) == 0:
 346 |             break
 347 | 
 348 | def delete_all_parts(oss_instance, bucket, delete_object_marker="", delete_upload_id_marker="", debug=False):
 349 |     delete_mulitipart_num = 0
 350 |     marker = delete_object_marker
 351 |     id_marker = delete_upload_id_marker
 352 |     while 1:
 353 |         res = oss_instance.get_all_multipart_uploads(bucket, key_marker=marker, upload_id_marker=id_marker)
 354 |         if res.status != 200:
 355 |             break
 356 |         body = res.read()
 357 |         hh = GetMultipartUploadsXml(body)
 358 |         (fl, pl) = hh.list()
 359 |         for i in fl:
 360 |             object = i[0]
 361 |             if isinstance(i[0], unicode):
 362 |                 object = i[0].encode('utf-8')
 363 |             res = oss_instance.cancel_upload(bucket, object, i[1])
 364 |             if (res.status / 100 != 2 and res.status != 404):
 365 |                 print "delete_all_parts: cancel upload object:%s, upload_id:%s FAIL, ret:%s, request-id:%s" % (object, i[1], res.status, res.getheader("x-oss-request-id"))
 366 |             else:
 367 |                 delete_mulitipart_num += 1
 368 |                 if debug:
 369 |                     print "delete_all_parts: cancel upload object:%s, upload_id:%s OK\nNow %s parts deleted." % (object, i[1], delete_mulitipart_num)
 370 |         if hh.is_truncated:
 371 |             marker = hh.next_key_marker
 372 |             id_marker = hh.next_upload_id_marker
 373 |         else:
 374 |             break
 375 |         if not marker:
 376 |             break
 377 | 
 378 | def clean_all_bucket(oss_instance):
 379 |     '''
 380 |     it will clean all bucket, including the all objects in bucket.
 381 |     '''
 382 |     res = oss_instance.get_service()
 383 |     if (res.status / 100) == 2:
 384 |         h = GetServiceXml(res.read())
 385 |         for b in h.bucket_list:
 386 |             if not clear_all_objects_in_bucket(oss_instance, b.name):
 387 |                 print "clean bucket ", b.name, " failed! in clean_all_bucket"
 388 |                 return False
 389 |         return True
 390 |     else:
 391 |         print "failed! get service in clean_all_bucket return ", res.status
 392 |         print res.read()
 393 |         print res.getheaders()
 394 |         return False
 395 | 
 396 | def pgfs_clear_all_objects_in_bucket(oss_instance, bucket):
 397 |     '''
 398 |     it will clean all objects in bucket, after that, it will delete this bucket.
 399 | 
 400 |     example:
 401 |     from oss_api import *
 402 |     host = ""
 403 |     id = ""
 404 |     key = ""
 405 |     oss_instance = OssAPI(host, id, key)
 406 |     bucket = "leopublicreadprivatewrite"
 407 |     if clear_all_objects_in_bucket(oss_instance, bucket):
 408 |         pass
 409 |     else:
 410 |         print "clean Fail"
 411 |     '''
 412 |     b = GetAllObjects()
 413 |     b.get_all_object_in_bucket(oss_instance, bucket)
 414 |     for i in b.object_list:
 415 |         res = oss_instance.delete_object(bucket, i)
 416 |         if (res.status / 100 != 2):
 417 |             print "clear_all_objects_in_bucket: delete object fail, ret is:", res.status, "bucket is:", bucket, "object is: ", i
 418 |             return False
 419 |         else:
 420 |             pass
 421 | 
 422 |     res = oss_instance.delete_bucket(bucket)
 423 |     if (res.status / 100 != 2 and res.status != 404):
 424 |         print "clear_all_objects_in_bucket: delete bucket fail, ret is: %s, request id is:%s" % (res.status, res.getheader("x-oss-request-id"))
 425 |         return False
 426 |     return True
 427 | 
 428 | def pgfs_clean_all_bucket(oss_instance):
 429 |     '''
 430 |     it will clean all bucket, including the all objects in bucket.
 431 |     '''
 432 |     res = oss_instance.get_service()
 433 |     if (res.status / 100) == 2:
 434 |         h = GetServiceXml(res.read())
 435 |         for b in h.bucket_list:
 436 |             print b
 437 |             '''
 438 |             if not pgfs_clear_all_objects_in_bucket(oss_instance, b.name):
 439 |                 print "clean bucket ", b.name, " failed! in clean_all_bucket"
 440 |                 return False
 441 |             '''
 442 |         return True
 443 |     else:
 444 |         print "failed! get service in clean_all_bucket return ", res.status
 445 |         print res.read()
 446 |         print res.getheaders()
 447 |         return False
 448 | 
 449 | def delete_all_parts_of_object_group(oss, bucket, object_group_name):
 450 |     res = oss.get_object_group_index(bucket, object_group_name)
 451 |     if res.status == 200:
 452 |         body = res.read()
 453 |         h = GetObjectGroupIndexXml(body)
 454 |         object_group_index = h.list()
 455 |         for i in object_group_index:
 456 |             if len(i) == 4 and len(i[1]) > 0:
 457 |                 part_name = i[1].strip()
 458 |                 res = oss.delete_object(bucket, part_name)
 459 |                 if res.status != 204:
 460 |                     print "delete part ", part_name, " in bucket:", bucket, " failed!"
 461 |                     return False
 462 |     else:
 463 |         return False
 464 |     return True
 465 | 
 466 | class GetAllObjects:
 467 |     def __init__(self):
 468 |         self.object_list = []
 469 | 
 470 |     def get_object_in_bucket(self, oss, bucket="", marker="", prefix=""):
 471 |         object_list = []
 472 |         maxkeys = 1000
 473 |         try:
 474 |             res = oss.get_bucket(bucket, prefix, marker, maxkeys=maxkeys)
 475 |             body = res.read()
 476 |             hh = GetBucketXml(body)
 477 |             (fl, pl) = hh.list()
 478 |             if len(fl) != 0:
 479 |                 for i in fl:
 480 |                     if isinstance(i[0], unicode):
 481 |                         object = i[0].encode('utf-8')
 482 |                         object_list.append(object)
 483 | 
 484 |             marker = hh.nextmarker
 485 |         except:
 486 |             pass
 487 |         return (object_list, marker)
 488 | 
 489 |     def get_all_object_in_bucket(self, oss, bucket="", marker="", prefix=""):
 490 |         marker2 = ""
 491 |         while True:
 492 |             (object_list, marker) = self.get_object_in_bucket(oss, bucket, marker2, prefix)
 493 |             marker2 = marker
 494 |             if len(object_list) != 0:
 495 |                 self.object_list.extend(object_list)
 496 | 
 497 |             if not marker:
 498 |                 break
 499 | def get_all_buckets(oss):
 500 |     bucket_list = []
 501 |     res = oss.get_service()
 502 |     if res.status == 200:
 503 |         h = GetServiceXml(res.read())
 504 |         for b in h.bucket_list:
 505 |             bucket_list.append(str(b.name).strip())
 506 |     return bucket_list 
 507 | 
 508 | def get_object_list_marker_from_xml(body):
 509 |     #return ([(object_name, object_length, last_modify_time)...], marker)
 510 |     object_meta_list = []
 511 |     next_marker = ""
 512 |     hh = GetBucketXml(body)
 513 |     (fl, pl) = hh.list()
 514 |     if len(fl) != 0:
 515 |         for i in fl:
 516 |             if isinstance(i[0], unicode):
 517 |                 object = i[0].encode('utf-8')
 518 |             else:
 519 |                 object = i[0]
 520 |             last_modify_time = i[1]
 521 |             length = i[3]
 522 |             etag = i[2]
 523 |             object_meta_list.append((object, length, last_modify_time, etag))
 524 |     if hh.is_truncated:
 525 |         next_marker = hh.nextmarker
 526 |     return (object_meta_list, next_marker)
 527 | 
 528 | def get_upload_id(oss, bucket, object, headers=None):
 529 |     '''
 530 |     get the upload id of object.
 531 |     Returns:
 532 |             string
 533 |     '''
 534 |     if not headers:
 535 |         headers = {}
 536 |     upload_id = ""
 537 |     res = oss.init_multi_upload(bucket, object, headers)
 538 |     if res.status == 200:
 539 |         body = res.read()
 540 |         h = GetInitUploadIdXml(body)
 541 |         upload_id = h.upload_id
 542 |     else:
 543 |         print res.status
 544 |         print res.getheaders()
 545 |         print res.read()
 546 |     return upload_id
 547 | 
 548 | def get_all_upload_id_list(oss, bucket):
 549 |     '''
 550 |     get all upload id of bucket
 551 |     Returns:
 552 |             list
 553 |     '''
 554 |     all_upload_id_list = []
 555 |     marker = ""
 556 |     id_marker = ""
 557 |     while True:
 558 |         res = oss.get_all_multipart_uploads(bucket, key_marker=marker, upload_id_marker=id_marker)
 559 |         if res.status != 200:
 560 |             return all_upload_id_list
 561 | 
 562 |         body = res.read()
 563 |         hh = GetMultipartUploadsXml(body)
 564 |         (fl, pl) = hh.list()
 565 |         for i in fl:
 566 |             all_upload_id_list.append(i)
 567 |         if hh.is_truncated:
 568 |             marker = hh.next_key_marker
 569 |             id_marker = hh.next_upload_id_marker
 570 |         else:
 571 |             break
 572 |         if not marker and not id_marker:
 573 |             break
 574 |     return all_upload_id_list
 575 | 
 576 | def get_upload_id_list(oss, bucket, object):
 577 |     '''
 578 |     get all upload id list of one object.
 579 |     Returns:
 580 |             list
 581 |     '''
 582 |     upload_id_list = []
 583 |     marker = ""
 584 |     id_marker = ""
 585 |     while True:
 586 |         res = oss.get_all_multipart_uploads(bucket, prefix=object, key_marker=marker, upload_id_marker=id_marker)
 587 |         if res.status != 200:
 588 |             break
 589 |         body = res.read()
 590 |         hh = GetMultipartUploadsXml(body)
 591 |         (fl, pl) = hh.list()
 592 |         for i in fl:
 593 |             upload_id_list.append(i[1])
 594 |         if hh.is_truncated:
 595 |             marker = hh.next_key_marker
 596 |             id_marker = hh.next_upload_id_marker
 597 |         else:
 598 |             break
 599 |         if not marker:
 600 |             break
 601 | 
 602 |     return upload_id_list
 603 | 
 604 | def get_part_list(oss, bucket, object, upload_id, max_part=""):
 605 |     '''
 606 |     get uploaded part list of object.
 607 |     Returns:
 608 |             list
 609 |     '''
 610 |     part_list = []
 611 |     marker = ""
 612 |     while True:
 613 |         res = oss.get_all_parts(bucket, object, upload_id, part_number_marker = marker, max_parts=max_part)
 614 |         if res.status != 200:
 615 |             break
 616 |         body = res.read()
 617 |         h = GetPartsXml(body)
 618 |         part_list.extend(h.list())
 619 |         if h.is_truncated:
 620 |             marker = h.next_part_number_marker
 621 |         else:
 622 |             break
 623 |         if not marker:
 624 |             break
 625 |     return part_list
 626 | 
 627 | def get_part_xml(oss, bucket, object, upload_id):
 628 |     '''
 629 |     get uploaded part list of object.
 630 |     Returns:
 631 |             string
 632 |     '''
 633 |     part_list = []
 634 |     part_list = get_part_list(oss, bucket, object, upload_id)
 635 |     xml_string = r'<CompleteMultipartUpload>'
 636 |     for part in part_list:
 637 |         xml_string += r'<Part>'
 638 |         xml_string += r'<PartNumber>' + str(part[0]) + r'</PartNumber>'
 639 |         xml_string += r'<ETag>' + part[1] + r'</ETag>'
 640 |         xml_string += r'</Part>'
 641 |     xml_string += r'</CompleteMultipartUpload>'
 642 |     return xml_string
 643 | 
 644 | def get_part_map(oss, bucket, object, upload_id):
 645 |     part_list = []
 646 |     part_list = get_part_list(oss, bucket, object, upload_id)
 647 |     part_map = {}
 648 |     for part in part_list:
 649 |         part_map[str(part[0])] = part[1]
 650 |     return part_map
 651 | 
 652 | ########## multi-thread ##########
 653 | class DeleteObjectWorker(Thread):
 654 |     def __init__(self, oss, bucket, part_msg_list, retry_times=5):
 655 |         Thread.__init__(self)
 656 |         self.oss = oss
 657 |         self.bucket = bucket
 658 |         self.part_msg_list = part_msg_list
 659 |         self.retry_times = retry_times
 660 | 
 661 |     def run(self):
 662 |         bucket = self.bucket
 663 |         object_list = self.part_msg_list
 664 |         step = 1000
 665 |         begin = 0
 666 |         end = 0
 667 |         total_length = len(object_list)
 668 |         remain_length = total_length
 669 |         while True:
 670 |             if remain_length > step:
 671 |                 end = begin + step
 672 |             elif remain_length > 0:
 673 |                 end = begin + remain_length
 674 |             else:
 675 |                 break
 676 |             is_fail = True
 677 |             retry_times = self.retry_times
 678 |             while True:
 679 |                 try:
 680 |                     if retry_times <= 0:
 681 |                         break
 682 |                     res = self.oss.delete_objects(bucket, object_list[begin:end])
 683 |                     if res.status / 100 == 2:
 684 |                         is_fail = False
 685 |                         break
 686 |                 except:
 687 |                     retry_times = retry_times - 1
 688 |                     time.sleep(1)
 689 |             if is_fail:
 690 |                 print "delete object_list[%s:%s] failed!, first is %s" % (begin, end, object_list[begin])
 691 |             begin = end
 692 |             remain_length = remain_length - step
 693 | 
 694 | class PutObjectGroupWorker(Thread):
 695 |     def __init__(self, oss, bucket, file_path, part_msg_list, retry_times=5):
 696 |         Thread.__init__(self)
 697 |         self.oss = oss
 698 |         self.bucket = bucket
 699 |         self.part_msg_list = part_msg_list
 700 |         self.file_path = file_path
 701 |         self.retry_times = retry_times
 702 | 
 703 |     def run(self):
 704 |         for part in self.part_msg_list:
 705 |             if len(part) == 5:
 706 |                 bucket = self.bucket
 707 |                 file_name = part[1]
 708 |                 if isinstance(file_name, unicode):
 709 |                     filename = file_name.encode('utf-8')
 710 |                 object_name = file_name
 711 |                 retry_times = self.retry_times
 712 |                 is_skip = False
 713 |                 while True:
 714 |                     try:
 715 |                         if retry_times <= 0:
 716 |                             break
 717 |                         res = self.oss.head_object(bucket, object_name)
 718 |                         if res.status == 200:
 719 |                             header_map = convert_header2map(res.getheaders())
 720 |                             etag = safe_get_element("etag", header_map)
 721 |                             md5 = part[2]
 722 |                             if etag.replace('"', "").upper() == md5.upper():
 723 |                                 is_skip = True
 724 |                         break
 725 |                     except:
 726 |                         retry_times = retry_times - 1
 727 |                         time.sleep(1)
 728 | 
 729 |                 if is_skip:
 730 |                     continue
 731 | 
 732 |                 partsize = part[3]
 733 |                 offset = part[4]
 734 |                 retry_times = self.retry_times
 735 |                 while True:
 736 |                     try:
 737 |                         if retry_times <= 0:
 738 |                             break
 739 |                         res = self.oss.put_object_from_file_given_pos(bucket, object_name, self.file_path, offset, partsize)
 740 |                         if res.status != 200:
 741 |                             print "upload ", file_name, "failed!", " ret is:", res.status
 742 |                             print "headers", res.getheaders()
 743 |                             retry_times = retry_times - 1
 744 |                             time.sleep(1)
 745 |                         else:
 746 |                             break
 747 |                     except:
 748 |                         retry_times = retry_times - 1
 749 |                         time.sleep(1)
 750 | 
 751 |             else:
 752 |                 print "ERROR! part", part , " is not as expected!"
 753 | 
 754 | class UploadPartWorker(Thread):
 755 |     def __init__(self, oss, bucket, object, upoload_id, file_path, part_msg_list, uploaded_part_map, retry_times=5, debug=DEBUG):
 756 |         Thread.__init__(self)
 757 |         self.oss = oss
 758 |         self.bucket = bucket
 759 |         self.object = object
 760 |         self.part_msg_list = part_msg_list
 761 |         self.file_path = file_path
 762 |         self.upload_id = upoload_id
 763 |         self.uploaded_part_map = uploaded_part_map
 764 |         self.retry_times = retry_times
 765 |         self.logger = getlogger(debug)
 766 | 
 767 |     def run(self):
 768 |         for part in self.part_msg_list:
 769 |             part_number = str(part[0])
 770 |             if len(part) == 5:
 771 |                 bucket = self.bucket
 772 |                 object = self.object
 773 |                 if self.uploaded_part_map.has_key(part_number):
 774 |                     md5 = part[2]
 775 |                     if self.uploaded_part_map[part_number].replace('"', "").upper() == md5.upper():
 776 |                         continue
 777 | 
 778 |                 partsize = part[3]
 779 |                 offset = part[4]
 780 |                 retry_times = self.retry_times
 781 |                 while True:
 782 |                     try:
 783 |                         if retry_times <= 0:
 784 |                             break
 785 |                         res = self.oss.upload_part_from_file_given_pos(bucket, object, self.file_path, offset, partsize, self.upload_id, part_number)
 786 |                         if res.status != 200:
 787 |                             self.logger.warn("Upload %s/%s from %s, failed! ret is:%s." %(bucket, object, self.file_path, res.status))
 788 |                             self.logger.warn("headers:%s" % res.getheaders())
 789 |                             retry_times = retry_times - 1
 790 |                             time.sleep(1)
 791 |                         else:
 792 |                             self.logger.info("Upload %s/%s from %s, OK! ret is:%s." % (bucket, object, self.file_path, res.status))
 793 |                             break
 794 |                     except:
 795 |                         retry_times = retry_times - 1
 796 |                         time.sleep(1)
 797 |             else:
 798 |                 self.logger.error("ERROR! part %s is not as expected!" % part)
 799 | 
 800 | class MultiGetWorker(Thread):
 801 |     def __init__(self, oss, bucket, object, file, start, end, retry_times=5):
 802 |         Thread.__init__(self)
 803 |         self.oss = oss
 804 |         self.bucket = bucket
 805 |         self.object = object
 806 |         self.startpos = start
 807 |         self.endpos = end
 808 |         self.file = file
 809 |         self.length = self.endpos - self.startpos + 1
 810 |         self.need_read = 0
 811 |         self.get_buffer_size = 10*1024*1024
 812 |         self.retry_times = retry_times
 813 | 
 814 |     def run(self):
 815 |         if self.startpos >= self.endpos:
 816 |             return
 817 | 
 818 |         retry_times = 0
 819 |         while True:
 820 |             headers = {}
 821 |             self.file.seek(self.startpos)
 822 |             headers['Range'] = 'bytes=%d-%d' % (self.startpos, self.endpos)
 823 |             try:
 824 |                 res = self.oss.object_operation("GET", self.bucket, self.object, headers)
 825 |                 if res.status == 206:
 826 |                     while self.need_read < self.length:
 827 |                         left_len = self.length - self.need_read
 828 |                         if left_len > self.get_buffer_size:
 829 |                             content = res.read(self.get_buffer_size)
 830 |                         else:
 831 |                             content = res.read(left_len)
 832 |                         if content:
 833 |                             self.need_read += len(content)
 834 |                             self.file.write(content)
 835 |                         else:
 836 |                             break
 837 |                     break
 838 |             except:
 839 |                 pass
 840 |             retry_times += 1
 841 |             if retry_times > self.retry_times:
 842 |                 print "ERROR, reach max retry times:%s when multi get /%s/%s" % (self.retry_times, self.bucket, self.object)
 843 |                 break 
 844 | 
 845 |         self.file.flush()
 846 |         self.file.close()
 847 | 
 848 | ############### misc ###############
 849 | 
 850 | def split_large_file(file_path, object_prefix="", max_part_num=1000, part_size=10*1024*1024, buffer_size=10*1024):
 851 |     parts_list = []
 852 | 
 853 |     if os.path.isfile(file_path):
 854 |         file_size = os.path.getsize(file_path)
 855 | 
 856 |         if file_size > part_size * max_part_num:
 857 |             part_size = (file_size + max_part_num - file_size % max_part_num) / max_part_num
 858 | 
 859 |         part_order = 1
 860 |         fp = open(file_path, 'rb')
 861 |         fp.seek(os.SEEK_SET)
 862 | 
 863 |         part_num = (file_size + part_size - 1) / part_size
 864 | 
 865 |         for i in xrange(0, part_num):
 866 |             left_len = part_size
 867 |             real_part_size = 0
 868 |             m = md5.new()
 869 |             offset = part_size * i
 870 |             while True:
 871 |                 read_size = 0
 872 |                 if left_len <= 0:
 873 |                     break
 874 |                 elif left_len < buffer_size:
 875 |                     read_size = left_len
 876 |                 else:
 877 |                     read_size = buffer_size
 878 | 
 879 |                 buffer_content = fp.read(read_size)
 880 |                 m.update(buffer_content)
 881 |                 real_part_size += len(buffer_content)
 882 | 
 883 |                 left_len = left_len - read_size
 884 | 
 885 |             md5sum = m.hexdigest()
 886 | 
 887 |             temp_file_name = os.path.basename(file_path) + "_" + str(part_order)
 888 |             if isinstance(object_prefix, unicode):
 889 |                 object_prefix = object_prefix.encode('utf-8')
 890 |             if not object_prefix:
 891 |                 file_name = sum_string(temp_file_name) + "_" + temp_file_name
 892 |             else:
 893 |                 file_name = object_prefix + "/" + sum_string(temp_file_name) + "_" + temp_file_name
 894 |             part_msg = (part_order, file_name, md5sum, real_part_size, offset)
 895 |             parts_list.append(part_msg)
 896 |             part_order += 1
 897 | 
 898 |         fp.close()
 899 |     else:
 900 |         print "ERROR! No file: ", file_path, ", please check."
 901 | 
 902 |     return parts_list
 903 | 
 904 | def sumfile(fobj):
 905 |     '''Returns an md5 hash for an object with read() method.'''
 906 |     m = md5.new()
 907 |     while True:
 908 |         d = fobj.read(8096)
 909 |         if not d:
 910 |             break
 911 |         m.update(d)
 912 |     return m.hexdigest()
 913 | 
 914 | def md5sum(fname):
 915 |     '''Returns an md5 hash for file fname, or stdin if fname is "-".'''
 916 |     if fname == '-':
 917 |         ret = sumfile(sys.stdin)
 918 |     else:
 919 |         try:
 920 |             f = file(fname, 'rb')
 921 |         except:
 922 |             return 'Failed to open file'
 923 |         ret = sumfile(f)
 924 |         f.close()
 925 |     return ret
 926 | 
 927 | def md5sum2(filename, offset=0, partsize=0):
 928 |     m = md5.new()
 929 |     fp = open(filename, 'rb')
 930 |     if offset > os.path.getsize(filename):
 931 |         fp.seek(os.SEEK_SET, os.SEEK_END)
 932 |     else:
 933 |         fp.seek(offset)
 934 | 
 935 |     left_len = partsize
 936 |     BufferSize = 8 * 1024
 937 |     while True:
 938 |         if left_len <= 0:
 939 |             break
 940 |         elif left_len < BufferSize:
 941 |             buffer_content = fp.read(left_len)
 942 |         else:
 943 |             buffer_content = fp.read(BufferSize)
 944 |         m.update(buffer_content)
 945 |         left_len = left_len - len(buffer_content)
 946 |     md5sum = m.hexdigest()
 947 |     return md5sum
 948 | 
 949 | def sum_string(content):
 950 |     f = StringIO.StringIO(content)
 951 |     md5sum = sumfile(f)
 952 |     f.close()
 953 |     return md5sum
 954 | 
 955 | def convert_header2map(header_list):
 956 |     header_map = {}
 957 |     for (a, b) in header_list:
 958 |         header_map[a] = b
 959 |     return header_map
 960 | 
 961 | def safe_get_element(name, container):
 962 |     for k, v in container.items():
 963 |         if k.strip().lower() == name.strip().lower():
 964 |             return v
 965 |     return ""
 966 | 
 967 | def get_content_type_by_filename(file_name):
 968 |     suffix = ""
 969 |     name = os.path.basename(file_name)
 970 |     suffix = name.split('.')[-1]
 971 |     mime_type = ""
 972 |     try:
 973 |         import mimetypes
 974 |         mimetypes.init()
 975 |         mime_type = mimetypes.types_map["." + suffix]
 976 |     except Exception:
 977 |         mime_type = 'application/octet-stream'
 978 |     return mime_type
 979 | 
 980 | def smart_code(input_stream):
 981 |     if isinstance(input_stream, str):
 982 |         try:
 983 |             tmp = unicode(input_stream, 'utf-8')
 984 |         except UnicodeDecodeError:
 985 |             try:
 986 |                 tmp = unicode(input_stream, 'gbk')
 987 |             except UnicodeDecodeError:
 988 |                 try:
 989 |                     tmp = unicode(input_stream, 'big5')
 990 |                 except UnicodeDecodeError:
 991 |                     try:
 992 |                         tmp = unicode(input_stream, 'ascii')
 993 |                     except:
 994 |                         tmp = input_stream
 995 |     else:
 996 |         tmp = input_stream
 997 |     return tmp
 998 | 
 999 | def is_ip(s):
1000 |     try:
1001 |         tmp_list = s.split(':')
1002 |         s = tmp_list[0]
1003 |         if s == 'localhost':
1004 |             return True
1005 |         tmp_list = s.split('.')
1006 |         if len(tmp_list) != 4:
1007 |             return False
1008 |         else:
1009 |             for i in tmp_list:
1010 |                 if int(i) < 0 or int(i) > 255:
1011 |                     return False
1012 |     except:
1013 |         return False
1014 |     return True
1015 | 
1016 | def get_second_level_domain(host):
1017 |     if is_ip(host):
1018 |         return host
1019 |     else:
1020 |         tmp_list = host.split('.')
1021 |         if len(tmp_list) >= 4:
1022 |             return ".".join(tmp_list[-3:])
1023 |     return host
1024 | 
1025 | if __name__ == '__main__':
1026 |     pass
1027 | 


--------------------------------------------------------------------------------
/ossync/sdk/oss_xml_handler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding=utf-8
  3 | from xml.dom import minidom
  4 | 
  5 | def get_tag_text(element, tag):
  6 |     nodes = element.getElementsByTagName(tag)
  7 |     if len(nodes) == 0:
  8 |         return ""
  9 |     else:
 10 |         node = nodes[0]
 11 |     rc = ""
 12 |     for node in node.childNodes:
 13 |         if node.nodeType in ( node.TEXT_NODE, node.CDATA_SECTION_NODE):
 14 |             rc = rc + node.data
 15 |     if rc == "true":
 16 |         return True
 17 |     elif rc == "false":
 18 |         return False
 19 |     return rc
 20 | 
 21 | class ErrorXml:
 22 |     def __init__(self, xml_string):
 23 |         self.xml = minidom.parseString(xml_string)
 24 |         self.code = get_tag_text(self.xml, 'Code')
 25 |         self.msg = get_tag_text(self.xml, 'Message')
 26 |         self.resource = get_tag_text(self.xml, 'Resource')
 27 |         self.request_id = get_tag_text(self.xml, 'RequestId')
 28 |         self.host_id = get_tag_text(self.xml, 'HostId')
 29 |     
 30 |     def show(self):
 31 |         print "Code: %s\nMessage: %s\nResource: %s\nRequestId: %s \nHostId: %s" % (self.code, self.msg, self.resource, self.request_id, self.host_id)
 32 | 
 33 | class Owner:
 34 |     def __init__(self, xml_element):
 35 |         self.element = xml_element
 36 |         self.id = get_tag_text(self.element, "ID")
 37 |         self.display_name = get_tag_text(self.element, "DisplayName")
 38 |     
 39 |     def show(self):
 40 |         print "ID: %s\nDisplayName: %s" % (self.id, self.display_name)
 41 | 
 42 | class Bucket:
 43 |     def __init__(self, xml_element):
 44 |         self.element = xml_element
 45 |         self.location = get_tag_text(self.element, "Location")
 46 |         self.name = get_tag_text(self.element, "Name")
 47 |         self.creation_date = get_tag_text(self.element, "CreationDate")
 48 |     
 49 |     def show(self):
 50 |         print "Name: %s\nCreationDate: %s\nLocation: %s" % (self.name, self.creation_date, self.location)
 51 | 
 52 | class GetServiceXml:
 53 |     def __init__(self, xml_string):
 54 |         self.xml = minidom.parseString(xml_string)
 55 |         self.owner = Owner(self.xml.getElementsByTagName('Owner')[0])
 56 |         self.buckets = self.xml.getElementsByTagName('Bucket')
 57 |         self.bucket_list = []
 58 |         for b in self.buckets:
 59 |             self.bucket_list.append(Bucket(b))
 60 | 
 61 |     def show(self):
 62 |         print "Owner:"
 63 |         self.owner.show()
 64 |         print "\nBucket list:"
 65 |         for b in self.bucket_list:
 66 |             b.show()
 67 |             print ""
 68 | 
 69 |     def list(self):
 70 |         bl = []
 71 |         for b in self.bucket_list:
 72 |             bl.append((b.name, b.creation_date, b.location))
 73 |         return bl
 74 |     
 75 | class Content:
 76 |     def __init__(self, xml_element):
 77 |         self.element = xml_element
 78 |         self.key = get_tag_text(self.element, "Key")        
 79 |         self.last_modified = get_tag_text(self.element, "LastModified")        
 80 |         self.etag = get_tag_text(self.element, "ETag")        
 81 |         self.size = get_tag_text(self.element, "Size")        
 82 |         self.owner = Owner(self.element.getElementsByTagName('Owner')[0])
 83 |         self.storage_class = get_tag_text(self.element, "StorageClass")        
 84 | 
 85 |     def show(self):
 86 |         print "Key: %s\nLastModified: %s\nETag: %s\nSize: %s\nStorageClass: %s" % (self.key, self.last_modified, self.etag, self.size, self.storage_class)
 87 |         self.owner.show()
 88 | 
 89 | class Part:
 90 |     def __init__(self, xml_element):
 91 |         self.element = xml_element
 92 |         self.part_num = get_tag_text(self.element, "PartNumber")        
 93 |         self.object_name = get_tag_text(self.element, "PartName")
 94 |         self.object_size = get_tag_text(self.element, "PartSize")
 95 |         self.etag = get_tag_text(self.element, "ETag")
 96 | 
 97 |     def show(self):
 98 |         print "PartNumber: %s\nPartName: %s\nPartSize: %s\nETag: %s\n" % (self.part_num, self.object_name, self.object_size, self.etag)
 99 | 
100 | class PostObjectGroupXml:
101 |     def __init__(self, xml_string):
102 |         self.xml = minidom.parseString(xml_string)
103 |         self.bucket = get_tag_text(self.xml, 'Bucket')
104 |         self.key = get_tag_text(self.xml, 'Key')
105 |         self.size = get_tag_text(self.xml, 'Size')
106 |         self.etag = get_tag_text(self.xml, "ETag")
107 | 
108 |     def show(self):
109 |         print "Post Object Group, Bucket: %s\nKey: %s\nSize: %s\nETag: %s" % (self.bucket, self.key, self.size, self.etag)
110 | 
111 | class GetObjectGroupIndexXml:
112 |     def __init__(self, xml_string):
113 |         self.xml = minidom.parseString(xml_string)
114 |         self.bucket = get_tag_text(self.xml, 'Bucket')
115 |         self.key = get_tag_text(self.xml, 'Key')
116 |         self.etag = get_tag_text(self.xml, 'Etag')
117 |         self.file_length = get_tag_text(self.xml, 'FileLength')
118 |         self.index_list = []
119 |         index_lists = self.xml.getElementsByTagName('Part')
120 |         for i in index_lists:
121 |             self.index_list.append(Part(i))
122 | 
123 |     def list(self):
124 |         index_list = []
125 |         for i in self.index_list:
126 |             index_list.append((i.part_num, i.object_name, i.object_size, i.etag))
127 |         return index_list
128 | 
129 |     def show(self):
130 |         print "Bucket: %s\nObject: %s\nEtag: %s\nObjectSize: %s" % (self.bucket, self.key, self.etag, self.file_length)
131 |         print "\nPart list:"
132 |         for p in self.index_list:
133 |             p.show()
134 | 
135 | class GetBucketXml:
136 |     def __init__(self, xml_string):
137 |         self.xml = minidom.parseString(xml_string)
138 |         self.name = get_tag_text(self.xml, 'Name')
139 |         self.prefix = get_tag_text(self.xml, 'Prefix')
140 |         self.marker = get_tag_text(self.xml, 'Marker')
141 |         self.nextmarker = get_tag_text(self.xml, 'NextMarker')
142 |         self.maxkeys = get_tag_text(self.xml, 'MaxKeys')
143 |         self.delimiter = get_tag_text(self.xml, 'Delimiter')
144 |         self.is_truncated = get_tag_text(self.xml, 'IsTruncated')
145 | 
146 |         self.prefix_list = []
147 |         prefixes = self.xml.getElementsByTagName('CommonPrefixes')
148 |         for p in prefixes:
149 |             tag_txt = get_tag_text(p, "Prefix")
150 |             self.prefix_list.append(tag_txt)
151 | 
152 |         self.content_list = []
153 |         contents = self.xml.getElementsByTagName('Contents')
154 |         for c in contents:
155 |             self.content_list.append(Content(c))
156 | 
157 |     def show(self):
158 |         print "Name: %s\nPrefix: %s\nMarker: %s\nNextMarker: %s\nMaxKeys: %s\nDelimiter: %s\nIsTruncated: %s" % (self.name, self.prefix, self.marker, self.nextmarker, self.maxkeys, self.delimiter, self.is_truncated)
159 |         print "\nPrefix list:"
160 |         for p in self.prefix_list:
161 |             print p
162 |         print "\nContent list:"
163 |         for c in self.content_list:
164 |             c.show()
165 |             print ""
166 | 
167 |     def list(self):
168 |         cl = []
169 |         pl = []
170 |         for c in self.content_list:
171 |             cl.append((c.key, c.last_modified, c.etag, c.size, c.owner.id, c.owner.display_name, c.storage_class))
172 |         for p in self.prefix_list:
173 |             pl.append(p)
174 | 
175 |         return (cl, pl)
176 |  
177 | class GetBucketAclXml:
178 |     def __init__(self, xml_string):
179 |         self.xml = minidom.parseString(xml_string)
180 |         if len(self.xml.getElementsByTagName('Owner')) != 0:
181 |             self.owner = Owner(self.xml.getElementsByTagName('Owner')[0])
182 |         else:
183 |             self.owner = "" 
184 |         self.grant = get_tag_text(self.xml, 'Grant')
185 | 
186 |     def show(self):
187 |         print "Owner Name: %s\nOwner ID: %s\nGrant: %s" % (self.owner.id, self.owner.display_name, self.grant)
188 |  
189 | class GetBucketLocationXml:
190 |     def __init__(self, xml_string):
191 |         self.xml = minidom.parseString(xml_string)
192 |         self.location = get_tag_text(self.xml, 'LocationConstraint')
193 |     
194 |     def show(self):
195 |         print "LocationConstraint: %s" % (self.location)
196 | 
197 | class GetInitUploadIdXml:
198 |     def __init__(self, xml_string):
199 |         self.xml = minidom.parseString(xml_string)
200 |         self.bucket = get_tag_text(self.xml, 'Bucket')
201 |         self.object = get_tag_text(self.xml, 'Key')
202 |         self.key = get_tag_text(self.xml, 'Key')
203 |         self.upload_id = get_tag_text(self.xml, 'UploadId')
204 |         self.marker = get_tag_text(self.xml, 'Marker')
205 |        
206 |     def show(self):
207 |         print " "     
208 | 
209 | class Upload:
210 |     def __init__(self, xml_element):
211 |         self.element = xml_element
212 |         self.key = get_tag_text(self.element, "Key")        
213 |         self.upload_id = get_tag_text(self.element, "UploadId")
214 | 
215 | class GetMultipartUploadsXml:
216 |     def __init__(self, xml_string):
217 |         self.xml = minidom.parseString(xml_string)
218 |         self.bucket = get_tag_text(self.xml, 'Bucket')
219 |         self.key_marker = get_tag_text(self.xml, 'KeyMarker')
220 |         self.upload_id_marker = get_tag_text(self.xml, 'UploadIdMarker')
221 |         self.next_key_marker = get_tag_text(self.xml, 'NextKeyMarker')
222 |         self.next_upload_id_marker = get_tag_text(self.xml, 'NextUploadIdMarker')
223 |         self.delimiter = get_tag_text(self.xml, 'Delimiter')
224 |         self.prefix = get_tag_text(self.xml, 'Prefix')
225 |         self.max_uploads = get_tag_text(self.xml, 'MaxUploads')
226 |         self.is_truncated = get_tag_text(self.xml, 'IsTruncated')
227 | 
228 |         self.prefix_list = []
229 |         prefixes = self.xml.getElementsByTagName('CommonPrefixes')
230 |         for p in prefixes:
231 |             tag_txt = get_tag_text(p, "Prefix")
232 |             self.prefix_list.append(tag_txt)
233 | 
234 |         self.content_list = []
235 |         contents = self.xml.getElementsByTagName('Upload')
236 |         for c in contents:
237 |             self.content_list.append(Upload(c))
238 | 
239 |     def list(self):
240 |         cl = []
241 |         pl = []
242 |         for c in self.content_list:
243 |             cl.append((c.key, c.upload_id))
244 |         for p in self.prefix_list:
245 |             pl.append(p)
246 | 
247 |         return (cl, pl)
248 | 
249 | class MultiPart:
250 |     def __init__(self, xml_element):
251 |         self.element = xml_element
252 |         self.part_number = get_tag_text(self.element, 'PartNumber')
253 |         self.last_modified = get_tag_text(self.element, 'LastModified')
254 |         self.etag = get_tag_text(self.element, 'ETag')
255 |         self.size = get_tag_text(self.element, 'Size')
256 | 
257 | class GetPartsXml:
258 |     def __init__(self, xml_string):
259 |         self.xml = minidom.parseString(xml_string)
260 |         self.bucket = get_tag_text(self.xml, 'Bucket')
261 |         self.key = get_tag_text(self.xml, 'Key')
262 |         self.upload_id = get_tag_text(self.xml, 'UploadId')
263 |         self.storage_class = get_tag_text(self.xml, 'StorageClass')
264 |         self.next_part_number_marker = get_tag_text(self.xml, 'NextPartNumberMarker')
265 |         self.max_parts = get_tag_text(self.xml, 'MaxParts')
266 |         self.is_truncated = get_tag_text(self.xml, 'IsTruncated')
267 |         self.part_number_marker = get_tag_text(self.xml, 'PartNumberMarker')
268 |         
269 |         self.content_list = []
270 |         contents = self.xml.getElementsByTagName('Part')
271 |         for c in contents:
272 |             self.content_list.append(MultiPart(c))
273 | 
274 |     def list(self):
275 |         cl = []
276 |         for c in self.content_list:
277 |             cl.append((c.part_number, c.etag, c.size, c.last_modified))
278 |         return cl
279 | 
280 | class CompleteUploadXml:
281 |     def __init__(self, xml_string):
282 |         self.xml = minidom.parseString(xml_string)
283 |         self.location = get_tag_text(self.xml, 'Location')
284 |         self.bucket = get_tag_text(self.xml, 'Bucket')
285 |         self.key = get_tag_text(self.xml, 'Key')
286 |         self.etag = get_tag_text(self.xml, "ETag")
287 | 
288 | class DeletedObjectsXml:
289 |     def __init__(self, xml_string):
290 |         self.xml = minidom.parseString(xml_string)
291 |         contents = self.xml.getElementsByTagName('Deleted')
292 |         self.content_list = []
293 |         for c in contents:
294 |             self.content_list.append(get_tag_text(c, 'Key'))
295 |     def list(self):
296 |         cl = []
297 |         for c in self.content_list:
298 |             cl.append(c)
299 |         return cl
300 | 
301 | class CnameInfoPart:
302 |     def __init__(self, xml_element):
303 |         self.element = xml_element
304 |         self.cname = get_tag_text(self.element, 'Cname')
305 |         self.bucket = get_tag_text(self.element, 'Bucket')
306 |         self.status = get_tag_text(self.element, 'Status')
307 |         self.lastmodifytime = get_tag_text(self.element, 'LastModifyTime')
308 | 
309 | class CnameToBucketXml:
310 |     def __init__(self, xml_string):
311 |         self.xml = minidom.parseString(xml_string)
312 |         self.content_list = []
313 |         contents = self.xml.getElementsByTagName('CnameInfo')
314 |         for c in contents:
315 |             self.content_list.append(CnameInfoPart(c))
316 | 
317 |     def list(self):
318 |         cl = []
319 |         for c in self.content_list:
320 |             cl.append((c.cname, c.bucket, c.status, c.lastmodifytime))
321 |         return cl
322 | 
323 | class RedirectXml:
324 |     def __init__(self, xml_string):
325 |         self.xml = minidom.parseString(xml_string)
326 |         self.endpoint = get_tag_text(self.xml, 'Endpoint')
327 |     def Endpoint(self):
328 |         return self.endpoint
329 | 
330 | if __name__ == "__main__":
331 |     pass
332 | 


--------------------------------------------------------------------------------
/ossync/sdk/pkg_info.py:
--------------------------------------------------------------------------------
 1 | package = "oss"
 2 | version = "0.1.3"
 3 | url = "http://oss.aliyun.com"
 4 | license = "GPL version 2"
 5 | short_description = "Command line tool for managing Aliyun Open Storage Service."
 6 | long_description = """
 7 | osscmd lets you create/delete/list bucket and upload/download/copy/delete file from/to
 8 |         Aliyun OSS (Open Storage Service).
 9 | """
10 | 
11 | 


--------------------------------------------------------------------------------
/queue_thread.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | 
 23 | import os, threading, logging
 24 | import os.path
 25 | from Queue import *
 26 | import hashlib
 27 | from ossync.lib import helper
 28 | from ossync.lib import queue_model
 29 | from config.setting import *
 30 | import time
 31 | 
 32 | class QueueThread(threading.Thread):
 33 | 	
 34 | 	""" 此线程的作用是将bucket,root, path压入要上传的队列，队列元素格式：
 35 | 	   "bucket::root::relpath::action::life"
 36 | 	   其中action表示文件是新建还是修改还是删除;life表示重入次数
 37 | 	"""
 38 | 	def __init__(self, oss_mappers, queue, *args, **kwargs):
 39 | 		threading.Thread.__init__(self, *args, **kwargs)
 40 | 		self.oss_mappers = oss_mappers
 41 | 		self.queue = queue
 42 | 		self._terminate = False
 43 | 		self.logger =  logging.getLogger('app')
 44 | 		dbpath =  DB_PATH
 45 | 		self.qm = queue_model.QueueModel(dbpath) 
 46 | 		
 47 | 	def terminate(self):
 48 | 		self._terminate = True
 49 | 	
 50 | 	def queue_folders(self, bucket, folders):
 51 | 		"""将目录中的文件解析成队列元素并压入队列"""
 52 | 		files = {}
 53 | 		elements = []
 54 | 		for d in folders:
 55 | 			files[d] = list(helper.walk_files(os.path.normpath(d), yield_folders = True))
 56 | 			if len(files) > 0:
 57 | 				for k in files:
 58 | 					if len(files[k]) > 0:
 59 | 						for path in files[k]:
 60 | 							self.queue_el(bucket, k, path)
 61 | 	
 62 | 	def queue_el(self, bucket, root, path):
 63 | 		"""根据bucket和root以及路径生成队列元素"""
 64 | 		relpath = os.path.relpath(path, root) # 相对于root的相对路径  
 65 | 		filehash = ""
 66 | 		if os.path.isfile(path):
 67 | 			filehash = helper.calc_file_md5(path)
 68 | 		hashcode = helper.calc_el_md5(root, relpath, bucket, filehash)
 69 | 		el = bucket + '::' + root + '::' + relpath + '::C' + '::' + hashcode
 70 | 		if not self.is_el_queued(hashcode): 
 71 | 			data={"root": root, "relpath": relpath, "bucket": bucket, "action": 'C', "status":  0, "hashcode": hashcode, "retries" : 0}
 72 | 
 73 | 			try:
 74 | 				self.qm.save(data)
 75 | 				self.queue.put(el, block = True, timeout = 1)
 76 | 				msg = 'queue element:' + el
 77 | 				#print msg
 78 | 				self.logger.info(msg)
 79 | 			except Full as e:
 80 | 				self.logger.error(e.message) 
 81 | 	
 82 | 		
 83 | 	def is_el_queued(self, hashcode):
 84 | 		try:
 85 | 			row = self.qm.get(hashcode) 
 86 | 			if row:
 87 | 				return True
 88 | 			return False
 89 | 		except Exception as e:
 90 | 			self.logger.error(e.message)
 91 | 			return False
 92 | 	
 93 | 	def run(self):
 94 | 		if self.oss_mappers == None or len(self.oss_mappers) == 0:
 95 | 			self.queue.put(None)
 96 | 			return
 97 | 		self.qm.open()
 98 | 		for oss_mapper in self.oss_mappers:
 99 | 			bucket = oss_mapper['bucket']
100 | 			local_folders = oss_mapper['local_folders']
101 | 			if(len(bucket) > 0 and len(local_folders) > 0):
102 | 				self.queue_folders(bucket, local_folders)
103 | 		self.qm.close()
104 | 		time.sleep(1)
105 | 		self.queue.put(None)
106 | 		#self.queue.join()
107 | 		return
108 | 		
109 | 			
110 | 					
111 | 			
112 | 		


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | 
23 | import os
24 | import sys
25 | 
26 | # check if python version >= 2.6 and < 3.0
27 | if sys.version_info < (2, 6):
28 | 	sys.stderr.write("Sorry, OSSync requires at least Python 2.6\n")
29 | 	sys.exit(0)
30 | if sys.version_info >= (3, 0):
31 | 	sys.stderr.write("Sorry, Python 3.0+ is unsupported at present。\n")
32 | 	sys.exit(0)
33 | 
34 | # check if linux kernel supports inotify
35 | #if not os.path.exists("/proc/sys/fs/inotify"):
36 | #	sys.stderr.write("Sorry, your linux kernel doesn't support inotify。\n")
37 | #	sys.exit(0)
38 | 
39 | #print "Start to install necessary modules ..."
40 | # check if pip has been installed
41 | #excode = os.system("pip --version")
42 | #if excode > 0:
43 | 	# try to install pip
44 | #	os.system("sudo curl http://python-distribute.org/distribute_setup.py | python")
45 | #	os.system("curl https://raw.github.com/pypa/pip/master/contrib/get-pip.py | python")
46 | 	# clean temp files
47 | #	os.system("rm -f distribute*.tar.gz")
48 | 
49 | # try to install pyinotify
50 | #os.system("sudo pip install pyinotify")
51 | 
52 | # check if pyinotify has been installed
53 | #try:
54 | #	import pyinotify
55 | print "Installation complete successfully!" 
56 | #except ImportError as e:
57 | #	sys.stderr.write("Sorry, Installation pyinotify module failure! Please try to install it manually。\n")
58 | #	sys.exit(0)
59 | 
60 | 
61 | 	
62 | 


--------------------------------------------------------------------------------
/sync_thread.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2012 Wu Tangsheng(lanbaba) <wuts73@gmail.com>
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | 
 23 | import os, sys, threading
 24 | import logging
 25 | import hashlib
 26 | from Queue import *
 27 | from ossync.lib import queue_model
 28 | from ossync.lib import helper
 29 | import time
 30 | try:
 31 |     from ossync.sdk.oss_api import *
 32 | except:
 33 |     from ossync.oss_api import *
 34 | try:
 35 |     from ossync.sdk.oss_xml_handler import *
 36 | except:
 37 |     from ossync.oss_xml_handler import *
 38 | from config.setting import *
 39 | 
 40 | LARGE_FILE_SIZE = 10000000 # File larege than 2M will be depart small parts
 41 | 
 42 | class SyncThread(threading.Thread):
 43 | 	def __init__(self, oss, queue, *args, **kwargs):
 44 | 		threading.Thread.__init__(self, *args, **kwargs)
 45 | 		self.queue = queue
 46 | 		self.oss = oss
 47 | 		self._terminate = False
 48 | 		self.logger =  logging.getLogger('app')
 49 | 		dbpath =  DB_PATH
 50 | 		self.qm = queue_model.QueueModel(dbpath)
 51 |         
 52 | 		
 53 | 	def terminate(self):
 54 | 		self._terminate = True
 55 | 		
 56 | 	def upload(self, bucket, oss_obj_name, filename):
 57 | 		if not os.path.lexists(filename):
 58 | 			return None
 59 | 		success = False
 60 | 		if(os.path.isdir(filename)):
 61 | 			oss_obj_name += '/'
 62 | 			res = self.oss.put_object_with_data(bucket = bucket, object = oss_obj_name, input_content = '')
 63 | 			if (res.status / 100) == 2:
 64 | 				success = True
 65 | 		else:
 66 | 			file_size = os.path.getsize(filename)
 67 | 			if file_size > LARGE_FILE_SIZE:
 68 | 				is_large_file = True    
 69 | 				res = self.oss.upload_large_file(bucket = bucket, object = oss_obj_name, filename = filename)
 70 | 			else:
 71 | 				is_large_file = False
 72 | 				res = self.oss.put_object_from_file(bucket = bucket, object = oss_obj_name, filename = filename)
 73 | 			filehash = helper.calc_file_md5(filename) 
 74 | 			header_map = convert_header2map(res.getheaders())
 75 | 			etag = safe_get_element("etag", header_map).upper().replace('"', '')
 76 | 			if (res.status / 100) == 2:
 77 | 				if is_large_file == False:
 78 | 					if filehash.upper() == etag:
 79 | 						success = True
 80 | 					else:
 81 | 						success = False
 82 | 				else:
 83 | 					success = True
 84 | 		return success
 85 | 		
 86 | 	def exists_oss_object(self, bucket, oss_obj_name):
 87 | 		headers = {}
 88 | 		res = self.oss.head_object(bucket, oss_obj_name, headers)
 89 | 		if (res.status / 100) == 2:
 90 | 			return True
 91 | 		else:
 92 | 			return False
 93 | 	
 94 | 	def walk_bucket(self, bucket, prefix, marker, delimiter, maxkeys, headers, result = []):
 95 | 		res = self.oss.get_bucket(bucket, prefix, marker, delimiter, maxkeys, headers)
 96 | 		if (res.status / 100) == 2:
 97 | 			body = res.read()
 98 | 			h = GetBucketXml(body)
 99 | 			(file_list, common_list) = h.list() 
100 | 			if len(file_list) > 0:
101 | 				for item in file_list:
102 | 					result.append(item[0])
103 | 			if len(common_list) > 0: 
104 | 				for path in common_list:
105 | 					result.append(path)
106 | 					self.walk_bucket(bucket, path, marker, delimiter, maxkeys, headers, result)
107 | 
108 | 	def delete_oss_object(self, bucket, oss_obj_name):
109 | 		headers = {}
110 | 		res = self.oss.delete_object(bucket, oss_obj_name, headers)
111 | 		if (res.status / 100) == 2:
112 | 			return True
113 | 		else:
114 | 			return False
115 | 		
116 | 	def delete_oss_objects(self, bucket, oss_obj_name):
117 | 		headers = {}
118 | 		result = []
119 | 		marker = ''
120 | 		delimiter = '/'
121 | 		maxkeys = 100
122 | 		self.walk_bucket(bucket, oss_obj_name, marker, delimiter, maxkeys, headers, result)
123 | 		if len(result) > 0:
124 | 			for item in result:
125 | 				self.oss.delete_object(bucket, item, headers)
126 | 		else:
127 | 			self.oss.delete_object(bucket, oss_obj_name, headers)
128 | 		return True
129 | 			
130 | 	def queue_el(self, el):
131 | 		'''el: element of queue , formated as "bucket::root::path"'''
132 | 		try:
133 | 			self.queue.put(el, block = True, timeout = 1)
134 | 			msg = 'requeue element:' + el 
135 | 			self.logger.info(msg)
136 | 		except Full as e:
137 | 			self.logger.error(e.message)
138 | 			print e
139 | 	
140 | 	def is_el_processed(self, hashcode):
141 | 		row = self.qm.get(hashcode)
142 | 		if row and str(row['status']) == '1':
143 | 			self.qm.update_status(hashcode, 1)
144 | 			return True
145 | 		return False
146 | 		
147 | 	def run(self):
148 | 		self.logger.info('Now starting sync thread ...')
149 | 		self.qm.open()
150 | 		while True:
151 | 			if self._terminate:
152 | 				break
153 | 			item = self.queue.get()
154 | 			if item is None:
155 | 				self.logger.info("Sync thread got None and quit!")
156 | 				break 
157 | 			(bucket, root, relpath, action, hashcode) = item.split('::')
158 | 			if len(bucket) > 0 and len(root) > 0 and len(relpath) > 0 and len(action) > 0:
159 | 				
160 | 				if not self.is_el_processed(hashcode):
161 | 					oss_obj_name = os.path.join(os.path.basename(root), relpath)
162 | 					if len(oss_obj_name) > 0:
163 | 						success = False
164 | 						msg = ""
165 | 						if(action == 'M' or action == 'C'): 
166 | 							try:
167 | 								success = self.upload(bucket, oss_obj_name, os.path.join(root, relpath)) 
168 | 								msg = 'put object ' + oss_obj_name + ' to bucket ' + bucket
169 | 							except Exception as e1:
170 | 								self.logger.critical(e1.message)
171 | 								pass
172 | 							
173 | 						if(action == 'D'): 
174 | 							try:
175 | 								success = self.delete_oss_objects(bucket, oss_obj_name)
176 | 								msg = 'delete object '  + oss_obj_name + ' of bucket ' + bucket
177 | 							except Exception as e2:
178 | 								self.logger.critical(e2.message)
179 | 								pass
180 | 						if success: 
181 | 							msg += ' success'
182 | 							self.logger.info(msg)
183 | 							try:
184 | 								self.qm.update_status(hashcode, 1)
185 | 							except Exception as e3:
186 | 								self.logger.critical(e3.message)
187 | 						else:
188 | 							if success == False:
189 | 								msg += ' failure'  
190 | 								self.logger.error(msg)
191 | 								"""requeue losing element"""
192 | 								row = self.qm.get(hashcode)
193 | 								if row:
194 | 									retries = int(row['retries'] )
195 | 									if retries < MAX_RETRIES:
196 | 										self.queue_el(item)
197 | 										try:
198 | 											self.qm.update_retries(hashcode, retries + 1)
199 | 										except Exception as e4:
200 | 											self.logger.critical(e4.message)
201 | 									else:
202 | 										self.logger.critical(msg + ' exceed max retries')
203 | 							else:
204 | 								self.logger.critical(msg + ' failure, resource may not exists.')
205 | 								pass
206 | 		self.qm.close()
207 | 		self.queue.task_done()
208 | 		return
209 | 		


--------------------------------------------------------------------------------