├── logs └── swall.log ├── .python-version ├── var ├── backup │ └── .gitignore └── cache │ └── .gitignore ├── version.txt ├── swall ├── __init__.py ├── bfclient.py ├── excpt.py ├── kthread.py ├── logger.py ├── keeper.py ├── crypt.py ├── client.py ├── mq.py ├── job.py ├── agent.py ├── parser.py └── utils.py ├── requirememts.txt ├── conf ├── redis.conf ├── fs.conf └── swall.conf ├── .gitignore ├── bin └── swall ├── module ├── _sys_common.py ├── network.py ├── cmd.py └── ps.py ├── plugins └── fservice │ ├── ssh.py │ ├── ftp.py │ └── rsync.py └── README.md /logs/swall.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | swall 2 | -------------------------------------------------------------------------------- /var/backup/.gitignore: -------------------------------------------------------------------------------- 1 | !.gitignore -------------------------------------------------------------------------------- /var/cache/.gitignore: -------------------------------------------------------------------------------- 1 | !.gitignore -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | B_beta4.0.1-20151011225700 -------------------------------------------------------------------------------- /swall/__init__.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | 5 | -------------------------------------------------------------------------------- /requirememts.txt: -------------------------------------------------------------------------------- 1 | msgpack_python >= 0.4.2 2 | paramiko >= 1.14 3 | psutil >= 2.1.1 4 | anyjson >= 0.3.3 5 | 6 | 7 | -------------------------------------------------------------------------------- /conf/redis.conf: -------------------------------------------------------------------------------- 1 | [main] 2 | #redis配置 3 | host = 192.168.1.7 4 | port = 6379 5 | db = 0 6 | password = 7 | expire = 600 8 | -------------------------------------------------------------------------------- /conf/fs.conf: -------------------------------------------------------------------------------- 1 | [main] 2 | fs_type = rsync 3 | fs_host = 192.168.4.181 4 | fs_port = 61768 5 | fs_user = swall 6 | fs_pass = vGjeVUncnbPV8CcZ 7 | fs_tmp_dir = /data/swall_fs 8 | fs_failtry = 3 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.7z 4 | *.dmg 5 | *.gz 6 | *.iso 7 | *.jar 8 | *.rar 9 | *.tar 10 | *.zip 11 | *.log 12 | .idea/ 13 | .DS_Store 14 | .DS_Store? 15 | .Spotlight-V100 16 | .Trashes 17 | Icon? 18 | ehthumbs.db 19 | Thumbs.db 20 | -------------------------------------------------------------------------------- /conf/swall.conf: -------------------------------------------------------------------------------- 1 | [main] 2 | 3 | user = root 4 | 5 | #定义角色 6 | node_name = node1.swall.org 7 | 8 | node_ip = 127.0.0.1 9 | 10 | #缓存路径 11 | cache = var/cache 12 | 13 | #模块路径 14 | module = module/ 15 | 16 | #文件备份路径 17 | backup = var/backup 18 | 19 | #plugins路径 20 | fs_plugin = plugins/fservice 21 | 22 | #pid文件 23 | pidfile = /tmp/.swall.pid 24 | 25 | #日志定义 26 | log_file = logs/swall.log 27 | 28 | log_level = DEBUG 29 | 30 | #认证key,数据传输用 31 | token = yhIC7oenuJDpBxqyP3GSHn7mgQThRHtOnNNwqpJnyPVhR1n9Y9Q+/T3PJfjYCZdiGRrX03CM+VI= 32 | 33 | -------------------------------------------------------------------------------- /swall/bfclient.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class BFClient(object): 8 | """ 9 | 该类为文件客户端的基类,所有的file_client都需要继承本类并实现upload、download、find方法 10 | """ 11 | __metaclass__ = ABCMeta 12 | 13 | @abstractmethod 14 | def upload(self, upload_path): 15 | pass 16 | 17 | @abstractmethod 18 | def download(self, fid, to_path): 19 | pass 20 | 21 | @abstractmethod 22 | def exists(self, fid): 23 | pass 24 | -------------------------------------------------------------------------------- /bin/swall: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding:utf-8 3 | 4 | __author__ = 'lufeng4828@163.com' 5 | 6 | import os 7 | import sys 8 | 9 | if "swall" not in os.environ.get("PYTHONPATH",""): 10 | sys.stderr.write( 11 | "swall is not in PYTHONPATH env,please set, for example:\nexport PYTHONPATH=/data/swall:$PYTHONPATH\n") 12 | sys.stderr.flush() 13 | sys.exit(1) 14 | 15 | from swall.parser import Swall 16 | 17 | reload(sys) 18 | sys.setdefaultencoding('utf-8') 19 | 20 | 21 | if __name__ == "__main__": 22 | swall = Swall() 23 | swall.main() 24 | -------------------------------------------------------------------------------- /swall/excpt.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | 5 | class SwallException(Exception): 6 | """ 7 | Base exception class; all Salt-specific exceptions should subclass this 8 | """ 9 | 10 | 11 | class SwallAgentError(SwallException): 12 | """ 13 | Problem reading the master root key 14 | """ 15 | 16 | 17 | class SwallCommandExecutionError(SwallException): 18 | """ 19 | Func run errror 20 | """ 21 | 22 | 23 | class SwallTimeoutError(SwallException): 24 | """ 25 | Timeout error 26 | """ 27 | 28 | 29 | class SwallAuthenticationError(SwallException): 30 | """ 31 | HMAC-SHA256 Authentication 32 | """ -------------------------------------------------------------------------------- /module/_sys_common.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import time 5 | from swall.utils import env 6 | 7 | 8 | @env 9 | def IP(*args, **kwargs): 10 | """ 11 | 获取ip地址 12 | """ 13 | return kwargs.get("node_ip") 14 | 15 | @env 16 | def NODE(*args, **kwargs): 17 | """ 18 | 获取节点 19 | """ 20 | return kwargs.get("node_name") 21 | 22 | 23 | @env 24 | def DATE(*args, **kwargs): 25 | """ 26 | 返回当前日期,格式为2014-07-03 27 | """ 28 | return time.strftime("%Y-%m-%d", time.localtime()) 29 | 30 | 31 | @env 32 | def TIME(*args, **kwargs): 33 | """ 34 | 返回当前时间,格式为12:00:00 35 | """ 36 | return time.strftime("%H:%M:%S", time.localtime()) -------------------------------------------------------------------------------- /module/network.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import socket 5 | import fcntl 6 | import struct 7 | import logging 8 | import commands 9 | from swall.utils import node 10 | 11 | log = logging.getLogger() 12 | 13 | 14 | @node 15 | def get_ip(ifname="eth0", *args, **kwarg): 16 | """ 17 | def get_ip(ifname="eth0") -> 获取对应网卡的ip地址 18 | @param ifname string:网卡名称 19 | @return string:ip地址 20 | """ 21 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 22 | return socket.inet_ntoa(fcntl.ioctl( 23 | s.fileno(), 24 | 0x8915, 25 | struct.pack('256s', ifname[:15]) 26 | )[20:24]) 27 | 28 | 29 | @node 30 | def get_ping(host="www.lu.com", count=5, *args, **kwarg): 31 | """ 32 | def get_ping(host="www.lu.com", count=5, *args, **kwarg) -> 获取到某个地址的ping延迟 33 | @param host string:目标检测点 34 | @param count int:检测几次 35 | @return int:延迟数据,单位是毫秒 36 | """ 37 | return commands.getoutput("ping %s -c %s -q | awk -F'/' '/rtt/{print $(NF-2)}'" % (host, count)) -------------------------------------------------------------------------------- /swall/kthread.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import threading 5 | threading._DummyThread._Thread__stop = lambda x: 42 6 | import sys 7 | 8 | 9 | class KThread(threading.Thread): 10 | def __init__(self, *args, **kwargs): 11 | threading.Thread.__init__(self, *args, **kwargs) 12 | self.killed = False 13 | self.exception = None 14 | 15 | def start(self): 16 | self.__run_backup = self.run 17 | self.run = self.__run 18 | threading.Thread.start(self) 19 | 20 | def __run(self): 21 | sys.settrace(self.globaltrace) 22 | try: 23 | self.__run_backup() 24 | except Exception, e: 25 | self.exception = e 26 | self.run = self.__run_backup 27 | 28 | def globaltrace(self, frame, why, arg): 29 | if why == 'call': 30 | return self.localtrace 31 | else: 32 | return None 33 | 34 | def localtrace(self, frame, why, arg): 35 | if self.killed: 36 | if why == 'line': 37 | raise SystemExit() 38 | return self.localtrace 39 | 40 | def kill(self): 41 | self.killed = True 42 | -------------------------------------------------------------------------------- /module/cmd.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import logging 5 | from swall.utils import node 6 | from swall.utils import run 7 | 8 | log = logging.getLogger() 9 | 10 | 11 | @node 12 | def call(cmd, ret_type="full", cwd=None, runas=None, shell="/bin/bash", env={}, *args, **kwarg): 13 | """ 14 | def call(cmd, ret_type="full", cwd=None, runas=None, shell="/bin/bash", env={}, *args, **kwarg) -> Execute the passed command and return the output as a string 15 | @param cmd string:执行的命令 16 | @param ret_type string:返回格式,默认全部返回 17 | @param cwd string:工作目录,执行命令时候需要进入的目录 18 | @param runas string:以runas的身份执行命令 19 | @param shell string:解析脚本的shell,默认是/bin/bash 20 | @paran env dict:执行命令的环境 21 | @return dict: 22 | ret{ 23 | 'stdout': 标准输出 24 | 'stderr': 错误输出 25 | 'pid': 执行脚本的pid 26 | 'retcode': 脚本返回状态 27 | } 28 | CLI Example:: 29 | swall ctl '*' cmd.run "ls -l | awk '/foo/{print $2}'" 30 | """ 31 | 32 | ret = run(cmd, runas=runas, shell=shell, cwd=cwd, env=env) 33 | #根据ret_type返回不同格式的结果 34 | if ret_type == "full": 35 | return ret 36 | else: 37 | return ret.get(ret_type) 38 | 39 | -------------------------------------------------------------------------------- /swall/logger.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import logging 5 | 6 | LOG_LEVEL = { 7 | "INFO": logging.INFO, 8 | "DEBUG": logging.DEBUG, 9 | "WARN": logging.WARN, 10 | "ERROR": logging.ERROR 11 | } 12 | 13 | 14 | def setup_file_logger(logfilepath, log_level="INFO", log_format=None, date_format=None): 15 | """ 16 | 设置file日志,其日志会保持到文件中 17 | """ 18 | if not log_format: 19 | log_format = "%(asctime)s %(levelname)s %(module)s.%(funcName)s:%(lineno)d %(message)s" 20 | if not date_format: 21 | date_format = "%Y-%m-%d %H:%M:%S" 22 | 23 | logging.basicConfig(filename=logfilepath, level=LOG_LEVEL.get(log_level, logging.INFO), format=log_format, datefmt=date_format) 24 | 25 | 26 | def setup_console_logger(log_level="INFO", log_format=None, date_format=None): 27 | """ 28 | 设置console日志,其日志直接打印到终端 29 | """ 30 | if not log_format: 31 | log_format = "%(asctime)s %(levelname)s %(module)s.%(funcName)s:%(lineno)d %(message)s" 32 | if not date_format: 33 | date_format = "%Y-%m-%d %H:%M:%S" 34 | 35 | logging.basicConfig(level=LOG_LEVEL.get(log_level, logging.INFO), format=log_format, datefmt=date_format) 36 | 37 | 38 | if __name__ == "__main__": 39 | setup_file_logger("/tmp/test.log", log_level="DEBUG") 40 | logging.debug("log test") 41 | logging.info("log test") -------------------------------------------------------------------------------- /swall/keeper.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import re 6 | import logging 7 | from swall.utils import Conf 8 | from swall.mq import MQ 9 | 10 | log = logging.getLogger() 11 | 12 | 13 | class Keeper(object): 14 | """ 15 | 实现keeper一些基础功能 16 | """ 17 | 18 | def __init__(self, config): 19 | self.main_conf = Conf(config["swall"]) 20 | self.mq = MQ(config) 21 | 22 | def is_valid(self, node_name): 23 | """ 24 | 检查某个节点是否正常 25 | """ 26 | return self.mq.is_valid(node_name) 27 | 28 | def get_valid_nodes(self): 29 | """ 30 | 获取所有可用的节点 31 | @return dict:["node1","node2","node3","nodeN"] 32 | """ 33 | nodes = self.mq.get_nodes() 34 | valid_nodes = [key for key in nodes] 35 | return valid_nodes 36 | 37 | def get_nodes_by_regex(self, nregx, nexclude): 38 | """ 39 | get nodes by regex 40 | @param nregx string:筛选节点需要的正则表达式 41 | @param nexclude string:需要排除的节点 42 | @return list:the node list 43 | """ 44 | valid_nodes = self.get_valid_nodes() 45 | match_nodes = [] 46 | ex_nodes = [] 47 | nregx = nregx.replace('*', '.*').replace('?', '.') 48 | if ';' in nregx: 49 | regs = ["(^%s$)" % n for n in nregx.split(';')] 50 | nregx = '|'.join(regs) 51 | else: 52 | nregx = "^%s$" % nregx 53 | regx = re.compile(nregx) 54 | if nregx: 55 | for node in valid_nodes: 56 | if regx.match(node): 57 | match_nodes.append(node) 58 | 59 | nexclude = nexclude.replace('*', '.*').replace('?', '.') 60 | if ';' in nexclude: 61 | nexcludes = ["(^%s$)" % n for n in nexclude.split(';')] 62 | nexclude = '|'.join(nexcludes) 63 | else: 64 | nexclude = "^%s$" % nexclude 65 | ex_regx = re.compile(nexclude) 66 | if nexclude: 67 | for node in valid_nodes: 68 | if ex_regx.match(node): 69 | ex_nodes.append(node) 70 | return list(set(match_nodes) - set(ex_nodes)) 71 | 72 | -------------------------------------------------------------------------------- /plugins/fservice/ssh.py: -------------------------------------------------------------------------------- 1 | 2 | #coding:utf-8 3 | __author__ = 'lufeng4828@163.com' 4 | 5 | import os 6 | import logging 7 | import paramiko 8 | from swall.utils import checksum, \ 9 | load_fclient 10 | 11 | from swall.bfclient import BFClient 12 | 13 | log = logging.getLogger() 14 | 15 | 16 | class SSHClient(BFClient): 17 | 18 | def __init__(self, fs_conf): 19 | self.fs_conf = fs_conf 20 | self.pk = paramiko.Transport((self.fs_conf.fs_host, int(self.fs_conf.fs_port))) 21 | self.pk.connect(username=self.fs_conf.fs_user, password=self.fs_conf.fs_pass) 22 | self.sftp = paramiko.SFTPClient.from_transport(self.pk) 23 | 24 | def upload(self, upload_path): 25 | """ 26 | 上传文件 27 | @param upload_path string:本地文件路径 28 | @return string:remote file path if success else "" 29 | """ 30 | fid = "" 31 | if os.path.exists(upload_path): 32 | fid = checksum(upload_path) 33 | dist = os.path.join(self.fs_conf.fs_tmp_dir, fid) 34 | if not self.exists(dist): 35 | try: 36 | self.sftp.put(upload_path, dist) 37 | except IOError, er: 38 | log.error(er) 39 | return "" 40 | else: 41 | log.error("sfile [%s] not exists" % upload_path) 42 | return fid 43 | 44 | def exists(self, fid): 45 | """ 46 | 查看文件是否存在 47 | @param path string:需要查看的文件id 48 | @return int:1 for exists else 0 49 | """ 50 | try: 51 | dist = os.path.join(self.fs_conf.fs_tmp_dir, fid) 52 | self.sftp.stat(dist) 53 | return 1 54 | except IOError: 55 | return 0 56 | 57 | def download(self, fid, to_path): 58 | """ 59 | 下载文件 60 | @param filename string:需要下载的文件路径,远程文件 61 | @param to_path string:存放本地的文件路径 62 | @return int:1 if success else 0 63 | """ 64 | try: 65 | dist = os.path.join(self.fs_conf.fs_tmp_dir, fid) 66 | self.sftp.get(dist, to_path) 67 | return 1 68 | except IOError, er: 69 | log.error(er) 70 | return 0 71 | 72 | if __name__ == "__main__": 73 | client = load_fclient("~/Documents/works/git/swall/plugins/fservice", ftype="rsync") 74 | print client 75 | scli = client("~/Documents/works/git/swall/conf/swall.conf") 76 | print scli 77 | print scli.upload("/etc/services") 78 | print scli.download("f9f1d3bc559b817e74c13efc3fd1172fbe170d37","/tmp/a.txt") -------------------------------------------------------------------------------- /plugins/fservice/ftp.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import traceback 6 | import ftplib 7 | import logging 8 | from swall.utils import checksum, \ 9 | load_fclient 10 | 11 | from swall.bfclient import BFClient 12 | 13 | log = logging.getLogger() 14 | 15 | BLOCK_SIZE = 8192 16 | 17 | 18 | class FTPClient(BFClient): 19 | 20 | def __init__(self, fs_conf): 21 | self.fs_conf = fs_conf 22 | self.ftp = ftplib.FTP(self.fs_conf.fs_host) 23 | self.ftp.port = int(self.fs_conf.fs_port) 24 | self.ftp.login(self.fs_conf.fs_user, self.fs_conf.fs_pass) 25 | self.ftp.cwd(self.fs_conf.fs_tmp_dir) 26 | 27 | def upload(self, upload_path): 28 | """ 29 | 上传文件 30 | @param upload_path string:本地文件路径 31 | @return string:remote file path if success else "" 32 | """ 33 | if not upload_path: 34 | return "" 35 | fid = checksum(upload_path) 36 | if self.exists(fid): 37 | return 1 38 | ftp_path = os.path.join(self.fs_conf.fs_tmp_dir, fid) 39 | try: 40 | f = open(upload_path, 'rb') 41 | self.ftp.storbinary('STOR %s' % ftp_path, f, BLOCK_SIZE) 42 | except : 43 | log.error(traceback.format_exc()) 44 | return 0 45 | return 1 46 | 47 | def exists(self, fid): 48 | """ 49 | 查看文件是否存在 50 | @param path string:需要查看的文件id 51 | @return int:1 for exists else 0 52 | """ 53 | if fid in self.ftp.nlst(self.fs_conf.fs_tmp_dir): 54 | return 1 55 | return 0 56 | 57 | def download(self, fid, to_path): 58 | """ 59 | 下载文件 60 | @param filename string:需要下载的文件路径,远程文件 61 | @param to_path string:存放本地的文件路径 62 | @return int:1 if success else 0 63 | """ 64 | if fid == "" or to_path == "": 65 | return 0 66 | ftp_file = os.path.join(self.fs_conf.fs_tmp_dir, fid) 67 | try: 68 | f = open(to_path, 'wb').write 69 | self.ftp.retrbinary('RETR %s' % ftp_file, f, BLOCK_SIZE) 70 | except : 71 | log.error(traceback.format_exc()) 72 | return 0 73 | return 1 74 | 75 | if __name__ == "__main__": 76 | client = load_fclient("~/Documents/works/git/swall/plugins/fservice", ftype="rsync") 77 | print client 78 | scli = client("~/Documents/works/git/swall/conf/swall.conf") 79 | print scli 80 | print scli.upload("/etc/services") 81 | print scli.download("f9f1d3bc559b817e74c13efc3fd1172fbe170d37","/tmp/a.txt") -------------------------------------------------------------------------------- /plugins/fservice/rsync.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import time 5 | import logging 6 | from subprocess import call 7 | from swall.utils import checksum, \ 8 | load_fclient 9 | 10 | from swall.bfclient import BFClient 11 | 12 | log = logging.getLogger() 13 | 14 | 15 | class RSYNCClient(BFClient): 16 | 17 | def __init__(self, fs_conf): 18 | self.fs_conf = fs_conf 19 | 20 | def upload(self, upload_path): 21 | """ 22 | 上传文件 23 | @param upload_path string:本地文件路径 24 | @return string:remote file path if success else "" 25 | """ 26 | if not upload_path: 27 | return "" 28 | fid = checksum(upload_path) 29 | max_retry = self.fs_conf.fs_failtry 30 | index = 1 31 | while index <= int(max_retry): 32 | ret = call( 33 | "RSYNC_PASSWORD=%s rsync -a --port=%s --partial %s %s@%s::swall_fs/%s" % 34 | ( 35 | self.fs_conf.fs_pass, 36 | self.fs_conf.fs_port, 37 | upload_path, 38 | self.fs_conf.fs_user, 39 | self.fs_conf.fs_host, 40 | fid 41 | ), 42 | shell=True 43 | ) 44 | if ret != 0: 45 | time.sleep(5) 46 | else: 47 | break 48 | index += 1 49 | if index <= int(max_retry): 50 | return fid 51 | else: 52 | return "" 53 | 54 | def exists(self, fid): 55 | """ 56 | 查看文件是否存在 57 | @param path string:需要查看的文件id 58 | @return int:1 for exists else 0 59 | """ 60 | return 0 61 | 62 | def download(self, fid, to_path): 63 | """ 64 | 下载文件 65 | @param filename string:需要下载的文件路径,远程文件 66 | @param to_path string:存放本地的文件路径 67 | @return int:1 if success else 0 68 | """ 69 | if fid == "" or to_path == "": 70 | return 0 71 | max_retry = self.fs_conf.fs_failtry 72 | index = 1 73 | while index <= int(max_retry): 74 | ret = call( 75 | "RSYNC_PASSWORD=%s rsync -a --port=%s --partial %s@%s::swall_fs/%s %s" % 76 | ( 77 | self.fs_conf.fs_pass, 78 | self.fs_conf.fs_port, 79 | self.fs_conf.fs_user, 80 | self.fs_conf.fs_host, 81 | fid, 82 | to_path 83 | ), 84 | shell=True 85 | ) 86 | if ret != 0: 87 | time.sleep(5) 88 | else: 89 | break 90 | index += 1 91 | if index <= int(max_retry): 92 | return 1 93 | else: 94 | return 0 95 | 96 | if __name__ == "__main__": 97 | client = load_fclient("~/Documents/works/git/swall/plugins/fservice", ftype="rsync") 98 | print client 99 | scli = client("~/Documents/works/git/swall/conf/swall.conf") 100 | print scli 101 | print scli.upload("/etc/services") 102 | print scli.download("f9f1d3bc559b817e74c13efc3fd1172fbe170d37","/tmp/a.txt") -------------------------------------------------------------------------------- /swall/crypt.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import msgpack 6 | import hmac 7 | import logging 8 | import hashlib 9 | from Crypto.Cipher import AES 10 | from swall.excpt import SwallAuthenticationError 11 | 12 | log = logging.getLogger() 13 | 14 | 15 | class Crypt(object): 16 | """ 17 | 对称加密类 18 | Encryption algorithm: AES-CBC 19 | Signing algorithm: HMAC-SHA256 20 | """ 21 | 22 | PICKLE_PAD = 'pickle::' 23 | AES_BLOCK_SIZE = 16 24 | SIG_SIZE = hashlib.sha256().digest_size 25 | 26 | def __init__(self, key_string, key_size=192): 27 | self.keys = self.extract_keys(key_string, key_size) 28 | self.key_size = key_size 29 | self.serial = msgpack 30 | 31 | @classmethod 32 | def gen_key(cls, key_size=192): 33 | """ 34 | 生成特定长度的用于堆成加密的key 35 | @param key_size:key len 36 | return string:key 37 | """ 38 | key = os.urandom(key_size // 8 + cls.SIG_SIZE) 39 | return key.encode('base64').replace('\n', '') 40 | 41 | @classmethod 42 | def extract_keys(cls, key_string, key_size): 43 | """ 44 | extract_keys to two part 45 | @param key_string:the key string 46 | @param key_size:key size 47 | @return tuple(keys,rand_pad) 48 | """ 49 | key = key_string.decode('base64') 50 | if len(key) != (key_size / 8 + cls.SIG_SIZE): 51 | return "" 52 | else: 53 | return key[:-cls.SIG_SIZE], key[-cls.SIG_SIZE:] 54 | 55 | def encrypt(self, data): 56 | """ 57 | encrypt data with AES-CBC and sign it with HMAC-SHA256 58 | @param data string: 59 | @return string:aes_string 60 | """ 61 | aes_key, hmac_key = self.keys 62 | pad = self.AES_BLOCK_SIZE - len(data) % self.AES_BLOCK_SIZE 63 | data = data + pad * chr(pad) 64 | iv_bytes = os.urandom(self.AES_BLOCK_SIZE) 65 | cypher = AES.new(aes_key, AES.MODE_CBC, iv_bytes) 66 | data = iv_bytes + cypher.encrypt(data) 67 | sig = hmac.new(hmac_key, data, hashlib.sha256).digest() 68 | return data + sig 69 | 70 | def decrypt(self, data): 71 | """ 72 | verify HMAC-SHA256 signature and decrypt data with AES-CBC 73 | @param data string: 74 | @return string 75 | """ 76 | aes_key, hmac_key = self.keys 77 | sig = data[-self.SIG_SIZE:] 78 | data = data[:-self.SIG_SIZE] 79 | if hmac.new(hmac_key, data, hashlib.sha256).digest() != sig: 80 | raise SwallAuthenticationError('message authentication failed') 81 | iv_bytes = data[:self.AES_BLOCK_SIZE] 82 | data = data[self.AES_BLOCK_SIZE:] 83 | cypher = AES.new(aes_key, AES.MODE_CBC, iv_bytes) 84 | data = cypher.decrypt(data) 85 | return data[:-ord(data[-1])] 86 | 87 | def dumps(self, obj): 88 | """ 89 | Serialize and encrypt a python object 90 | @param obj python_obj: 91 | @return string 92 | """ 93 | return self.encrypt(self.PICKLE_PAD + self.serial.dumps(obj)) 94 | 95 | def loads(self, data): 96 | """ 97 | Decrypt and un-serialize a python object 98 | @param data string: 99 | @return python_obj: 100 | """ 101 | data = self.decrypt(data) 102 | if not data.startswith(self.PICKLE_PAD): 103 | return {} 104 | return self.serial.loads(data[len(self.PICKLE_PAD):]) 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /swall/client.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # --code 3 | 4 | __author__ = 'lufeng4828@163.com' 5 | 6 | import os 7 | import logging 8 | from swall.job import Job 9 | from swall.utils import timeout as iTimeout 10 | from swall.utils import app_abs_path,\ 11 | Automagic, \ 12 | Timeout, \ 13 | agent_config 14 | 15 | log = logging.getLogger() 16 | 17 | DEFAULT_CONF_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conf") 18 | 19 | 20 | class Client(object): 21 | def __init__(self, globs=None, exclude_globs=None, wait_all=False, timeout=30, nthread=None, 22 | conf_dir=DEFAULT_CONF_DIR): 23 | self.config = {} 24 | for f in ('swall', 'fs', 'redis'): 25 | abs_path = app_abs_path(os.path.join(conf_dir, "%s.conf" % f)) 26 | self.config[f] = agent_config(abs_path) 27 | 28 | self.job = Job(self.config, env="aes") 29 | self.globs = globs if globs else "" 30 | self.exclude_globs = exclude_globs if exclude_globs else "" 31 | self.wait_all = wait_all 32 | self.timeout = timeout 33 | self.nthread = nthread 34 | 35 | def submit_job(self, func=None, *args, **kwargs): 36 | wait_timeout = self.timeout if self.wait_all else 0 37 | rets = self.job.submit_job( 38 | func, 39 | self.globs, 40 | self.exclude_globs, 41 | args=list(args), 42 | kwargs=kwargs, 43 | wait_timeout=wait_timeout, 44 | nthread=int(self.nthread) if self.nthread is not None else -1 45 | ) 46 | return rets 47 | 48 | def job_info(self, jid, *args, **kwargs): 49 | """ 50 | 直接通过redis查看任务状态 51 | """ 52 | job_rets = {} 53 | match_nodes = self.get_host() 54 | for node in match_nodes: 55 | job_ret = self.job.get_job_info(node, jid) 56 | if job_rets: 57 | job_rets.update({node: job_ret}) 58 | 59 | log.info("end to get job_info for job [%s]" % self.job.get_jid()) 60 | return job_rets 61 | 62 | def get_host(self): 63 | """ 64 | 获取节点列表 65 | @return dict: 66 | """ 67 | match_nodes = self.job.keeper.get_nodes_by_regex(self.globs, self.exclude_globs) 68 | return match_nodes 69 | 70 | def get_return(self, timeout=60): 71 | """ 72 | 获取结果数据 73 | @param _timeout int:获取数据的超时 74 | @return dict: 75 | """ 76 | 77 | @iTimeout(timeout) 78 | def _return(nodes, job_rets): 79 | while 1: 80 | job_ret = self.job.get_job([(n, self.job.jid) for n in nodes]) 81 | 82 | for node, ret_ in job_ret.iteritems(): 83 | if ret_: 84 | i_ret = ret_["payload"].get("return") 85 | if i_ret is not None: 86 | if job_rets: 87 | job_rets.update({node: i_ret}) 88 | else: 89 | job_rets[node] = i_ret 90 | is_wait = False 91 | for ret_ in job_ret.itervalues(): 92 | if not ret_: 93 | is_wait = True 94 | else: 95 | i_ret = ret_["payload"].get("return") 96 | if i_ret is None: 97 | is_wait = True 98 | if is_wait: 99 | continue 100 | else: 101 | break 102 | job_rets = {} 103 | try: 104 | match_nodes = self.get_host() 105 | if not match_nodes: 106 | return job_rets 107 | _return(match_nodes, job_rets) 108 | except Timeout, e: 109 | log.error(e) 110 | log.info("end to get result for job [%s]" % self.job.get_jid()) 111 | return job_rets 112 | 113 | def call_func(self, func, args): 114 | """ 115 | 调用客户端模块,然后返回job id,如果执行失败返回0 116 | """ 117 | return self.submit_job(func, *args) 118 | 119 | def __getattr__(self, name): 120 | return Automagic(self, [name]) 121 | -------------------------------------------------------------------------------- /swall/mq.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import time 5 | import msgpack 6 | import logging 7 | import traceback 8 | from datetime import datetime 9 | from swall.utils import Conf 10 | from redis import ConnectionPool, Redis, ConnectionError 11 | 12 | 13 | log = logging.getLogger() 14 | 15 | 16 | class MQ(object): 17 | """ 18 | 消息传输 19 | """ 20 | 21 | def __init__(self, config): 22 | self.redis_conf = Conf(config["redis"]) 23 | self.main_conf = Conf(config["swall"]) 24 | self.redis = self.redis() 25 | self.node_ping = "SWALL:PING" 26 | self.node_job_req = "SWALL:JOBQ" 27 | self.node_job_res = "SWALL:JOBR" 28 | 29 | def redis(self): 30 | """ 31 | 返回redis链接对象 32 | :return: 33 | """ 34 | return Redis( 35 | host=self.redis_conf.host, 36 | port=int(self.redis_conf.port), 37 | db=int(self.redis_conf.db), 38 | password=self.redis_conf.password, 39 | socket_connect_timeout=5 40 | ) 41 | 42 | def tos(self, node): 43 | try: 44 | str_time = time.strftime('%y-%m-%d %H:%M:%S', time.localtime()) 45 | self.redis.hset(self.node_ping, node, "%s@%s@%s" % (node, self.main_conf.node_ip, str_time)) 46 | return True 47 | except Exception: 48 | log.error(traceback.format_exc()) 49 | return False 50 | 51 | def mget_job(self, job_info): 52 | """ 53 | 获取job数据 54 | :param job_info list: 55 | :return: 56 | """ 57 | job_dict = [] 58 | for node, jid in job_info: 59 | job_dict.append("%s:%s" % (node, jid)) 60 | jobs = self.redis.hmget(self.node_job_res, job_dict) 61 | result = {} 62 | if jobs: 63 | for ret in zip([node for node, _ in job_info], jobs): 64 | result[ret[0]] = msgpack.loads(ret[1]) if ret[1] else ret[1] 65 | return result 66 | 67 | def get_job(self, node): 68 | item = self.redis.lpop("%s:%s" % (self.node_job_req, node)) 69 | if item: 70 | item = msgpack.loads(item) 71 | return item 72 | 73 | def set_res(self, node, jid, result): 74 | """ 75 | 保存执行后的任务信息 76 | :param node: 77 | :param jid: 78 | :param result: 79 | :return: 80 | """ 81 | key = "%s:%s" % (node, jid) 82 | self.redis.hset(self.node_job_res, key, msgpack.dumps(result)) 83 | return True 84 | 85 | def del_res(self, node, jid): 86 | """ 87 | 删除任务执行结果 88 | :param node: 89 | :param jid: 90 | :return: 91 | """ 92 | key = "%s:%s" % (node, jid) 93 | self.redis.hdel(self.node_job_res, key) 94 | return True 95 | 96 | def get_res(self, node, jid): 97 | """ 98 | 获取任务执行结果 99 | :param node: 100 | :param jid: 101 | :return: 102 | """ 103 | key = "%s:%s" % (node, jid) 104 | item = self.redis.hget(self.node_job_res, key) 105 | if item: 106 | item = msgpack.loads(item) 107 | return item 108 | 109 | def mset_job(self, job_data): 110 | """ 111 | 112 | :param job_data=[(node_name, data)] 113 | :return: 114 | """ 115 | pl = self.redis.pipeline() 116 | for job in job_data: 117 | pl.rpush('%s:%s' % (self.node_job_req, job[0]), msgpack.dumps(job[1])) 118 | pl.execute() 119 | return True 120 | 121 | def get_nodes(self, type_="online"): 122 | """ 123 | 获取节点,默认获取可用节点,type_=online|offline|all 124 | """ 125 | nodes = self.redis.hgetall(self.node_ping) 126 | log.info("get nodes [%s]" % len(nodes)) 127 | final_nodes = {} 128 | nodes_t = {} 129 | for node in nodes: 130 | node_data = nodes[node].split('@') 131 | timedelta = datetime.now() - datetime.strptime(node_data[2], '%y-%m-%d %H:%M:%S') 132 | nodes_t.update({node: {"ip": node_data[0], "update_time": node_data[2], "delta_seconds": timedelta.seconds}}) 133 | if type_ == "online": 134 | for key in nodes_t: 135 | if nodes_t[key]["delta_seconds"] <= 60: 136 | final_nodes[key] = nodes_t[key] 137 | elif type_ == "offline": 138 | for key in nodes_t: 139 | if nodes_t[key]["delta_seconds"] > 60: 140 | final_nodes[key] = nodes_t[key] 141 | else: 142 | final_nodes = [key for key in nodes_t] 143 | log.info("get final_nodes [%s]" % len(final_nodes)) 144 | return final_nodes 145 | 146 | def is_valid(self, node_name): 147 | """ 148 | 检查节点是否可用 149 | """ 150 | data = self.redis.hget(self.node_ping, node_name) 151 | if data: 152 | data = data.split('@') 153 | timedelta = datetime.now() - datetime.strptime(data[1], '%Y-%m-%d %H:%M:%S') 154 | if timedelta.min <= 1: 155 | return True 156 | return False 157 | -------------------------------------------------------------------------------- /module/ps.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import time 5 | import psutil 6 | import logging 7 | from swall.utils import node 8 | 9 | log = logging.getLogger() 10 | 11 | 12 | @node 13 | def top(num_processes=5, interval=3, *args, **kwarg): 14 | """ 15 | def top((num_processes=5, interval=3, *args, **kwarg) -> Return a list of top CPU consuming processes during the interval. 16 | @param num_processes int:the top N CPU consuming processes 17 | @param interval int:the number of seconds to sample CPU usage over 18 | @return list:a list of top CPU consuming processes 19 | """ 20 | num_processes = int(num_processes) 21 | interval = int(num_processes) 22 | result = [] 23 | start_usage = {} 24 | for pid in psutil.get_pid_list(): 25 | try: 26 | process = psutil.Process(pid) 27 | user, system = process.get_cpu_times() 28 | except psutil.NoSuchProcess: 29 | continue 30 | start_usage[process] = user + system 31 | time.sleep(interval) 32 | usage = set() 33 | for process, start in start_usage.items(): 34 | try: 35 | user, system = process.get_cpu_times() 36 | except psutil.NoSuchProcess: 37 | continue 38 | now = user + system 39 | diff = now - start 40 | usage.add((diff, process)) 41 | 42 | for idx, (diff, process) in enumerate(reversed(sorted(usage))): 43 | if num_processes and idx >= num_processes: 44 | break 45 | if len(process.cmdline()) == 0: 46 | cmdline = [process.name()] 47 | else: 48 | cmdline = process.cmdline() 49 | info = {'cmd': cmdline, 50 | 'user': process.username(), 51 | 'status': process.status(), 52 | 'pid': process.pid, 53 | 'create_time': process.create_time(), 54 | 'cpu': {}, 55 | 'mem': {}, 56 | } 57 | for key, value in process.get_cpu_times()._asdict().items(): 58 | info['cpu'][key] = value 59 | for key, value in process.get_memory_info()._asdict().items(): 60 | info['mem'][key] = value 61 | result.append(info) 62 | return result 63 | 64 | 65 | @node 66 | def get_pid_list(*args, **kwarg): 67 | """ 68 | def get_pid_list(*args, **kwarg) -> Return a list of process ids (PIDs) for all running processes. 69 | @return list: 70 | """ 71 | return psutil.get_pid_list() 72 | 73 | 74 | @node 75 | def kill_pid(pid, signal=15, *args, **kwarg): 76 | """ 77 | def kill_pid(pid, signal=15, *args, **kwarg) -> Kill a process by PID. 78 | @param pid int:PID of process to kill. 79 | @param signal int:Signal to send to the process. See manpage entry for kill for possible values. Default: 15 (SIGTERM). 80 | @return bool:True or False 81 | """ 82 | try: 83 | psutil.Process(int(pid)).send_signal(signal) 84 | return True 85 | except psutil.NoSuchProcess: 86 | return False 87 | 88 | 89 | @node 90 | def pkill(pattern, user=None, signal=15, full=False, *args, **kwarg): 91 | """ 92 | def pkill(pattern, user=None, signal=15, full=False, *args, **kwarg) -> Kill processes matching a pattern. 93 | @param pattern string: Pattern to search for in the process list. 94 | @param user string: Limit matches to the given username. Default: All users. 95 | @param int signal: Signal to send to the process(es). See manpage entry for kill for possible values. Default: 15 (SIGTERM). 96 | @param full bool: A boolean value indicating whether only the name of the command or the full command line should be matched against the pattern. 97 | @return list:killed pid 98 | """ 99 | signal = int(signal) 100 | killed = [] 101 | for proc in psutil.process_iter(): 102 | name_match = pattern in ' '.join(proc.cmdline()) if full \ 103 | else pattern in proc.name() 104 | user_match = True if user is None else user == proc.username() 105 | if name_match and user_match: 106 | try: 107 | proc.send_signal(signal) 108 | killed.append(proc.pid) 109 | except psutil.NoSuchProcess: 110 | pass 111 | if not killed: 112 | return None 113 | else: 114 | return {'killed': killed} 115 | 116 | 117 | @node 118 | def pgrep(pattern, user=None, full=False, *args, **kwarg): 119 | """ 120 | def pgrep(pattern, user=None, full=False, *args, **kwarg) -> Return the pids for processes matching a pattern. If full is true, the full command line is searched for a match, 121 | otherwise only the name of the command is searched. 122 | @param pattern string: Pattern to search for in the process list. 123 | @param user string: Limit matches to the given username. Default: All users. 124 | @param full bool: A boolean value indicating whether only the name of the command or the full command line should be matched against the pattern. 125 | @return list: 126 | """ 127 | 128 | procs = [] 129 | for proc in psutil.process_iter(): 130 | name_match = pattern in ' '.join(proc.cmdline()) if full \ 131 | else pattern in proc.name() 132 | user_match = True if user is None else user == proc.username() 133 | if name_match and user_match: 134 | procs.append({"pname": ','.join(proc.cmdline()), "pid": proc.pid}) 135 | return procs or None 136 | 137 | 138 | @node 139 | def cpu_percent(interval=0.1, per_cpu=False, *args, **kwarg): 140 | """ 141 | def cpu_percent(interval=0.1, per_cpu=False) -> Return the percent of time the CPU is busy. 142 | @param interval int: the number of seconds to sample CPU usage over 143 | @param per_cpu bool:if True return an array of CPU percent busy for each CPU, otherwise aggregate all percents into one number 144 | @return list: 145 | """ 146 | interval = float(interval) 147 | if per_cpu: 148 | result = list(psutil.cpu_percent(interval, True)) 149 | else: 150 | result = psutil.cpu_percent(interval) 151 | return result 152 | 153 | 154 | @node 155 | def cpu_times(per_cpu=False, *args, **kwarg): 156 | """ 157 | def cpu_times(per_cpu=False) -> Return the percent of time the CPU spends in each state, e.g. user, system, idle, nice, iowait, irq, softirq. 158 | @param per_cpu bool:if True return an array of percents for each CPU, otherwise aggregate all percents into one number 159 | @return dict: 160 | """ 161 | if per_cpu: 162 | result = [dict(times._asdict()) for times in psutil.cpu_times(True)] 163 | else: 164 | result = dict(psutil.cpu_times(per_cpu)._asdict()) 165 | return result 166 | 167 | 168 | @node 169 | def virtual_memory(*args, **kwarg): 170 | """ 171 | def virtual_memory(*args, **kwarg) -> Return a dict that describes statistics about system memory usage. 172 | @return dict: 173 | """ 174 | return dict(psutil.virtual_memory()._asdict()) 175 | 176 | 177 | @node 178 | def swap_memory(*args, **kwarg): 179 | """ 180 | def swap_memory(*args, **kwarg) -> Return a dict that describes swap memory statistics. 181 | @return dict: 182 | """ 183 | return dict(psutil.swap_memory()._asdict()) 184 | 185 | 186 | @node 187 | def physical_memory_usage(*args, **kwarg): 188 | """ 189 | def physical_memory_usage(*args, **kwarg) -> Return a dict that describes free and available physical memory. 190 | @return dict: 191 | """ 192 | return dict(psutil.phymem_usage()._asdict()) 193 | 194 | 195 | @node 196 | def virtual_memory_usage(*args, **kwarg): 197 | """ 198 | def virtual_memory_usage(*args, **kwarg) -> Return a dict that describes free and available memory, both physical 199 | @return dict: 200 | """ 201 | return dict(psutil.virtmem_usage()._asdict()) 202 | 203 | 204 | @node 205 | def cached_physical_memory(*args, **kwarg): 206 | """ 207 | def cached_physical_memory(*args, **kwarg) -> Return the amount cached memory. 208 | @return int: 209 | """ 210 | return psutil.cached_phymem() 211 | 212 | 213 | @node 214 | def physical_memory_buffers(*args, **kwarg): 215 | """ 216 | def physical_memory_buffers(*args, **kwarg) -> Return the amount of physical memory buffers. 217 | @return int 218 | """ 219 | return psutil.phymem_buffers() 220 | 221 | 222 | @node 223 | def disk_partitions(all=False, *args, **kwarg): 224 | """ 225 | def disk_partitions(all=False, *args, **kwarg) -> Return a list of disk partitions and their device, mount point, and filesystem type. 226 | @param all bool: if set to False, only return local, physical partitions (hard disk, USB, CD/DVD partitions). If True, return all filesystems. 227 | return list(dict): 228 | """ 229 | result = [dict(partition._asdict()) for partition in 230 | psutil.disk_partitions(all)] 231 | return result 232 | 233 | 234 | @node 235 | def disk_usage(path, *args, **kwarg): 236 | """ 237 | def disk_usage(path, *args, **kwarg) -> Given a path, return a dict listing the total available space as well as the free space, and used space. 238 | @param path string:e.g /home 239 | @return dict: 240 | """ 241 | return dict(psutil.disk_usage(path)._asdict()) 242 | 243 | 244 | @node 245 | def disk_partition_usage(all=False, *args, **kwarg): 246 | """ 247 | def disk_partition_usage(all=False, *args, **kwarg) -> Return a list of disk partitions plus the mount point, filesystem and usage statistics. 248 | @param all bool:if set to False, only return local, physical partitions (hard disk, USB, CD/DVD partitions). If True, return all filesystems. 249 | @return list(dict): 250 | """ 251 | result = disk_partitions(all) 252 | for partition in result: 253 | partition.update(disk_usage(partition['mountpoint'])) 254 | return result 255 | 256 | 257 | @node 258 | def total_physical_memory(*args, **kwarg): 259 | """ 260 | def total_physical_memory(*args, **kwarg) -> Return the total number of bytes of physical memory. 261 | @return int: 262 | """ 263 | return psutil.TOTAL_PHYMEM 264 | 265 | 266 | @node 267 | def num_cpus(*args, **kwarg): 268 | """ 269 | def num_cpus(*args, **kwarg) -> Return the number of CPUs. 270 | @return int: 271 | """ 272 | return psutil.NUM_CPUS 273 | 274 | -------------------------------------------------------------------------------- /swall/job.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import traceback 6 | import datetime 7 | import logging 8 | from copy import deepcopy 9 | from swall.mq import MQ 10 | from swall.crypt import Crypt 11 | from swall.keeper import Keeper 12 | from swall.utils import timeout as iTimeout 13 | from swall.utils import cp, check_cache, make_dirs, Conf, load_fclient, app_abs_path, checksum, Timeout 14 | 15 | log = logging.getLogger() 16 | 17 | 18 | class Job(object): 19 | """ 20 | 任务管理相关 21 | """ 22 | 23 | def __init__(self, config, jid="", env="clear", timeout=60, retry_times=3): 24 | self.fs_conf = Conf(config["fs"]) 25 | self.main_conf = Conf(config["swall"]) 26 | self.keeper = Keeper(config) 27 | self.mq = MQ(config) 28 | self.jid = jid 29 | self.env = env 30 | self.timeout = timeout 31 | self.retry_times = retry_times 32 | 33 | def _gen_jid(self): 34 | """ 35 | 如果没有传jid进来,需要生成一个jid 36 | """ 37 | if not self.jid: 38 | self.jid = "{0:%Y%m%d%H%M%S%f}".format(datetime.datetime.now()) 39 | 40 | def get_jid(self): 41 | """ 42 | 获取jid 43 | @return string:jid字符串 44 | """ 45 | self._gen_jid() 46 | return self.jid 47 | 48 | def _send_job(self, node_data): 49 | """ 50 | 发送job 51 | @param data dict: 52 | @param node_name string: 53 | @return int:1 for success else 0 54 | """ 55 | ret = 0 56 | try: 57 | key_str = self.main_conf.token 58 | crypt = Crypt(key_str) 59 | jobs = [] 60 | for node in node_data: 61 | data = node[0] 62 | node_name = node[1] 63 | if data.get("env") == "aes": 64 | data["payload"] = crypt.dumps(data.get("payload")) 65 | jobs.append((node_name, data)) 66 | if jobs: 67 | self.keeper.mq.mset_job(jobs) 68 | ret = 1 69 | except Exception, e: 70 | log.error("send_job error:%s" % traceback.format_exc()) 71 | return ret 72 | 73 | def submit_job(self, cmd, nregex, nexclude=None, args=[], kwargs={}, wait_timeout=0, nthread=-1): 74 | """ 75 | 提交任务 76 | @param cmd string:需要执行的命令 77 | @param nregex string:节点匹配正则表达式 78 | @param nexclude string:排除节点正则,会从nregex结果排除掉 79 | @param args list:传给cmd命令的位置参数 80 | @param kwargs dict:传给cmd的位置参数 81 | @param wait_timeout int:等待结果的时间 82 | @param nthread int:单个机器上面执行任务的并发数量 83 | @return dict:{ 84 | "retcode": 返回值 85 | "extra_data": 其他信息, 86 | "msg": 提示信息, 87 | } 88 | """ 89 | self._gen_jid() 90 | match_nodes = [] 91 | match = self.keeper.get_nodes_by_regex(nregex, nexclude) 92 | if match: 93 | match_nodes = match 94 | if not match_nodes: 95 | log.warn("0 node match for %s [%s]" % (self.jid, cmd)) 96 | return { 97 | "retcode": 1, 98 | "extra_data": {}, 99 | "msg": "send_job complete,0 node match" 100 | } 101 | if cmd == "sys.copy": 102 | if "help" not in args: 103 | FsClient = load_fclient(app_abs_path(self.main_conf.fs_plugin), ftype=self.fs_conf.fs_type) 104 | fscli = FsClient(self.fs_conf) 105 | if "local_path" in kwargs and "remote_path" in kwargs: 106 | local_path = kwargs["local_path"] 107 | else: 108 | local_path = args[0] 109 | fid = fscli.upload(local_path) 110 | if "local_path" in kwargs and "remote_path" in kwargs: 111 | kwargs["path_pair"] = "%s,%s" % (fid, os.path.basename(local_path)) 112 | else: 113 | args[0] = "%s,%s" % (fid, os.path.basename(local_path)) 114 | if cmd == "sys.rsync_module": 115 | if not args or args[0] != "help": 116 | FsClient = load_fclient(app_abs_path(self.main_conf.fs_plugin), ftype=self.fs_conf.fs_type) 117 | fscli = FsClient(self.fs_conf) 118 | modules = [mod for mod in os.listdir(app_abs_path(self.main_conf.module)) if mod.endswith(".py")] 119 | copy_pair = [] 120 | for mod in modules: 121 | mod_path = os.path.join(app_abs_path(self.main_conf.module), mod) 122 | fid = fscli.upload(mod_path) 123 | copy_pair.append((fid, mod)) 124 | kwargs["copy_pair"] = copy_pair 125 | data = { 126 | "env": self.env, 127 | "payload": 128 | { 129 | "jid": self.jid, 130 | "cmd": cmd, 131 | "args": args, 132 | "kwargs": kwargs, 133 | "status": "READY", 134 | "timeout": self.timeout, 135 | "retry_times": self.retry_times 136 | } 137 | } 138 | if nthread != -1: 139 | data["payload"]["nthread"] = nthread 140 | node_data = [] 141 | for node_name in match_nodes: 142 | job_data = deepcopy(data) 143 | node_data.append((job_data, node_name)) 144 | ret = self._send_job(node_data) 145 | send_ret = {n: ret for n in match_nodes} 146 | if wait_timeout: 147 | rets = {} 148 | 149 | @iTimeout(wait_timeout) 150 | def _return(nodes, job_rets): 151 | while 1: 152 | job_ret = self.get_job([(n, self.jid) for n in nodes]) 153 | 154 | for node, ret_ in job_ret.iteritems(): 155 | if ret_: 156 | i_ret = ret_["payload"].get("return") 157 | if i_ret is not None: 158 | if job_rets: 159 | job_rets.update({node: i_ret}) 160 | else: 161 | job_rets = {node: i_ret} 162 | is_wait = False 163 | for ret_ in job_ret.itervalues(): 164 | if not ret_: 165 | is_wait = True 166 | else: 167 | i_ret = ret_["payload"].get("return") 168 | if i_ret is None: 169 | is_wait = True 170 | if is_wait: 171 | continue 172 | else: 173 | break 174 | try: 175 | _return(match_nodes, rets) 176 | except Timeout, e: 177 | log.error(e) 178 | 179 | return { 180 | "retcode": 1, 181 | "extra_data": rets, 182 | "msg": "get result complete!" 183 | } 184 | else: 185 | if all([ret for ret in send_ret.itervalues()]): 186 | return { 187 | "retcode": 1, 188 | "extra_data": {"jid": self.jid}, 189 | "msg": "send_job complete,all success" 190 | } 191 | else: 192 | return { 193 | "retcode": 0, 194 | "extra_data": {}, 195 | "msg": "send_job complete,fail", 196 | } 197 | 198 | def get_job(self, job_data): 199 | """ 200 | 获取任务 201 | @param node_name string:节点名称 202 | @param jid string:任务id 203 | @return dict:a job info 204 | """ 205 | ret = {} 206 | key_str = self.main_conf.token 207 | crypt = Crypt(key_str) 208 | try: 209 | rets = self.mq.mget_job(job_data) 210 | for node, data in rets.items(): 211 | if data: 212 | env = data.get("env") 213 | if env == "aes": 214 | data["payload"] = crypt.loads(data.get("payload")) 215 | payload = data["payload"] 216 | if payload["cmd"] == "sys.get" and payload["status"] == "FINISH" and payload["return"] != "": 217 | if payload["args"][0] != "help": 218 | fid = payload["return"] 219 | if "local_path" in payload["kwargs"] and "remote_path" in payload["kwargs"]: 220 | local_path = payload["kwargs"]["local_path"] 221 | remote_path = payload["kwargs"]["remote_path"] 222 | else: 223 | local_path = payload["args"][1] 224 | remote_path = payload["args"][0] 225 | stat = payload["kwargs"].get("stat") 226 | if local_path.endswith('/') or os.path.isdir(local_path): 227 | local_path = os.path.join(local_path, os.path.basename(remote_path)) 228 | if checksum(local_path) != fid: 229 | if not check_cache(app_abs_path(self.main_conf.cache), fid): 230 | FsClient = load_fclient(app_abs_path(self.main_conf.fs_plugin), 231 | ftype=self.fs_conf.fs_type) 232 | fscli = FsClient(self.fs_conf) 233 | fscli.download(fid, os.path.join(app_abs_path(self.main_conf.cache), fid)) 234 | 235 | if check_cache(app_abs_path(self.main_conf.cache), fid): 236 | if not make_dirs(os.path.dirname(local_path)): 237 | log.error("创建目标目录:%s失败" % local_path) 238 | if cp(os.path.join(app_abs_path(self.main_conf.cache), fid), local_path, stat): 239 | payload["return"] = local_path 240 | else: 241 | payload["return"] = "" 242 | else: 243 | payload["return"] = local_path 244 | ret[node] = data 245 | 246 | except Exception, e: 247 | log.error(traceback.format_exc()) 248 | return ret 249 | 250 | def get_job_info(self, node_name, jid): 251 | """ 252 | 返回任务状态 253 | @param node_name string:节点名称 254 | @param jid string:任务id 255 | @return dict: 256 | """ 257 | payload = {} 258 | data = self.mq.get_res(node_name, jid) 259 | if data: 260 | if data["env"] == "aes": 261 | key_str = self.main_conf.token 262 | crypt = Crypt(key_str) 263 | payload = crypt.loads(data.get("payload")) 264 | return payload 265 | 266 | def del_job(self, node_name, jid): 267 | """ 268 | 删除任务 269 | @param node_name string:节点名称 270 | @param jid string:任务id 271 | @return int:1 for success else 0 272 | """ 273 | ret = 0 274 | try: 275 | return self.mq.del_res(node_name, jid) 276 | except Exception, e: 277 | log.error(e.message) 278 | return ret 279 | -------------------------------------------------------------------------------- /swall/agent.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import re 6 | import time 7 | import signal 8 | import logging 9 | import traceback 10 | from copy import deepcopy 11 | from swall.mq import MQ 12 | from swall.crypt import Crypt 13 | from swall.utils import cp, thread, prog_dir, check_cache, make_dirs, Conf, load_env, load_fclient, app_abs_path, load_module 14 | from swall.excpt import SwallCommandExecutionError 15 | 16 | log = logging.getLogger() 17 | 18 | 19 | class JobSubject(object): 20 | def __init__(self): 21 | self.observers = [] 22 | self.job = None 23 | 24 | def register(self, observer): 25 | if observer not in self.observers: 26 | self.observers.append(observer) 27 | 28 | def unregister(self, observer): 29 | if observer in self.observers: 30 | self.observers.remove(observer) 31 | 32 | @thread() 33 | def notify_observers(self): 34 | for o in self.observers: 35 | o.update(self.job) 36 | 37 | def data_changed(self): 38 | self.notify_observers() 39 | 40 | def set_data(self, job): 41 | self.job = job 42 | self.data_changed() 43 | 44 | 45 | class Agent(object): 46 | """ 47 | 节点处理 48 | """ 49 | 50 | def __init__(self, config): 51 | self.main_conf = Conf(config["swall"]) 52 | self.fs_conf = Conf(config["fs"]) 53 | self.node = self.main_conf.node_name 54 | self.node_ip = self.main_conf.node_ip 55 | self.node_funcs = self.load_module() 56 | self.mq = MQ(config) 57 | self._stop = 0 58 | self.sys_envs = self.load_env() 59 | self.job_sub = JobSubject() 60 | self.job_sub.register(self) 61 | self.crypt = Crypt(self.main_conf.token) 62 | 63 | def _get_func(self, module=None, *args, **kwargs): 64 | """ 65 | def _get_func(self, module=None, *args, **kwargs) -> 获取某个模块的所有函数 66 | @param module string:模块名称 67 | @return list: 68 | """ 69 | role_funcs = self.node_funcs 70 | if module: 71 | return [k for k in role_funcs if "%s." % module in k] 72 | else: 73 | return [k for k in role_funcs] 74 | 75 | def ping(self, *args, **kwargs): 76 | """ 77 | def ping(self, *args, **kwargs) -> ping节点 78 | @return int:1 79 | """ 80 | return 1 81 | 82 | def load_module(self, *args, **kwargs): 83 | """ 84 | 加载模块 85 | """ 86 | node_funcs = load_module(app_abs_path("module/")) 87 | node_funcs.update({ 88 | "sys.reload_module": self._reload_module, 89 | "sys.reload_env": self._reload_env, 90 | "sys.get_env": self._get_env, 91 | "sys.copy": self._copy, 92 | "sys.get": self._get, 93 | "sys.job_info": self._job_info, 94 | "sys.exprs": self.exprs, 95 | "sys.rsync_module": self._rsync_module, 96 | "sys.ping": self.ping, 97 | "sys.funcs": self._get_func, 98 | "sys.version": self._version, 99 | }) 100 | return node_funcs 101 | 102 | def _job_info(self, jid, *args, **kwargs): 103 | """ 104 | def _job_info(self, jid, *args, **kwargs) -> get the job info of jid 105 | @param jid string:the job id 106 | @return dict: 107 | """ 108 | #为了加快速度,这部分在client.py实现了,不会调用到这里 109 | pass 110 | 111 | def _rsync_module(self, *args, **kwargs): 112 | """ 113 | def rsync_module(*args, **kwargs) -> 同步模块 114 | @param args list:支持位置参数,例如sys.rsync_module common_tools.py game_tools.py 115 | @param kwargs dict:支持关键字参数,例如:sys.rsync_module mods=common_tools.py,game_tools.py 116 | @return int:1 if success 117 | """ 118 | copy_pair = kwargs.get("copy_pair", []) 119 | copy_ret = [] 120 | copy_mods = [] 121 | for ifile, dfile in copy_pair: 122 | dfile = os.path.join(app_abs_path(self.main_conf.module), dfile) 123 | copy_mods.append(dfile) 124 | if not ifile: 125 | log.error("rsync_module [%s] error" % dfile) 126 | continue 127 | copy_ret.append( 128 | self._copy(path_pair="%s,%s" % (ifile, os.path.basename(dfile)), remote_path=dfile, 129 | ret_type="full") == dfile) 130 | if all(copy_ret) and copy_ret: 131 | log.info( 132 | "rsync_module [%s] ok" % (','.join([os.path.basename(mod) for mod in copy_mods]))) 133 | self.node_funcs.update(self.load_module()) 134 | return 1 135 | else: 136 | return 0 137 | 138 | def _version(self, *args, **kargs): 139 | """ 140 | def get_version(*args,**kargs) -> 获取swall的版本号 141 | @return string: 142 | """ 143 | version = "" 144 | try: 145 | program_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 146 | ver_file = os.path.join(program_path, "version.txt") 147 | f = open(ver_file, "r") 148 | version = f.readline() 149 | except: 150 | log.error(traceback.format_exc()) 151 | return version.strip() 152 | 153 | def _reload_module(self, *args, **kwargs): 154 | """ 155 | def reload_module(*args, **kwargs) -> 重新加载模块 156 | @return int:1 if sucess 157 | """ 158 | self.node_funcs.update(self.load_module()) 159 | return 1 160 | 161 | def _reload_env(self, *args, **kwargs): 162 | """ 163 | def reload_env(self, *args, **kwargs) -> 重新加载env模块 164 | @return int:1 if sucess 165 | """ 166 | self.sys_envs = self.load_env() 167 | return 1 168 | 169 | def _get_env(self, *args, **kwargs): 170 | """ 171 | def _get_env(self, *args, **kwargs) ->获取系统变量 172 | @return tuple: 173 | """ 174 | return [i for i in self.sys_envs] 175 | 176 | def exprs(self, str1, *args, **kwargs): 177 | """ 178 | def exprs(self, str, *args, **kwargs) -> 扩展env变量 179 | @param str string:需要扩展的字符串,例如:/tmp/{node}_mnesia.beam 180 | @return string:扩展以后的字符串,例如/tmp/{node}_mnesia.beam扩展为:/tmp/jxz_tx_5001_mnesia.beam 181 | """ 182 | return str1 183 | 184 | def load_env(self, *args, **kwargs): 185 | """ 186 | 加载模块 187 | """ 188 | node_envs = load_env(app_abs_path("module/")) 189 | return node_envs 190 | 191 | def _copy(self, *args, **kwargs): 192 | """ 193 | def copy(*args, **kwargs) -> 拷贝文件到远程 可以增加一个ret_type=full,支持返回文件名 194 | @param args list:支持位置参数,例如 sys.copy /etc/src.tar.gz /tmp/src.tar.gz ret_type=full 195 | @param kwargs dict:支持关键字参数,例如sys.copy local_path=/etc/src.tar.gz remote_path=/tmp/src.tar.gz ret_type=full 196 | @return int:1 if success else 0 197 | """ 198 | if "path_pair" in kwargs and "remote_path" in kwargs: 199 | fid, file_name = kwargs["path_pair"].split(',') 200 | remote_path = kwargs["remote_path"] 201 | make_path = kwargs.get("make_path", 1) 202 | else: 203 | fid, file_name = args[0].split(',') 204 | remote_path = args[1] 205 | make_path = args[2] if len(args) >= 3 else 1 206 | stat = kwargs.get("stat") 207 | ret_type = kwargs.get("ret_type") 208 | if os.path.isdir(remote_path) or remote_path.endswith('/'): 209 | remote_path = os.path.join(remote_path, file_name) 210 | 211 | try: 212 | if int(make_path): 213 | make_dirs(os.path.dirname(remote_path)) 214 | else: 215 | if not os.path.exists(os.path.dirname(remote_path)): 216 | return "" 217 | except: 218 | log.info(traceback.format_exc()) 219 | 220 | #如果cache中没有对应的文件,则先从fs中拷贝过来 221 | if not check_cache(app_abs_path(self.main_conf.cache), fid): 222 | FsClient = load_fclient(app_abs_path(self.main_conf.fs_plugin), ftype=self.fs_conf.fs_type) 223 | fscli = FsClient(self.fs_conf) 224 | fscli.download(fid, os.path.join(app_abs_path(self.main_conf.cache), fid)) 225 | #从cache目录中拷贝文件到目标 226 | ret = cp(os.path.join(app_abs_path(self.main_conf.cache), fid), remote_path, stat) 227 | if ret_type == "full": 228 | return remote_path if ret else "" 229 | else: 230 | return ret 231 | 232 | def _get(self, *args, **kwargs): 233 | """ 234 | def get(*args, **kwargs) -> 从远程获取文件 235 | @param args list:支持位置参数,例如 sys.get /tmp/src.tar.gz /etc/src.tar.gz 236 | @param kwargs dict:支持关键字参数,例如sys.get remote_path=/tmp/src.tar.gz local_path=/etc/src.tar.gz 237 | @return string:local_path 238 | """ 239 | if "local_path" in kwargs and "remote_path" in kwargs: 240 | remote_path = kwargs["remote_path"] 241 | else: 242 | remote_path = args[0] 243 | FsClient = load_fclient(app_abs_path(self.main_conf.fs_plugin), ftype=self.fs_conf.fs_type) 244 | fscli = FsClient(self.fs_conf) 245 | return fscli.upload(remote_path) 246 | 247 | @thread() 248 | def loop_tos(self): 249 | """ 250 | 定时检查tos 251 | """ 252 | while 1: 253 | if self._stop: 254 | log.warn("loop_tos stopping") 255 | return 256 | try: 257 | self.mq.tos(self.node) 258 | except: 259 | log.error(traceback.format_exc()) 260 | time.sleep(5) 261 | 262 | @thread() 263 | def loop_job_rev(self): 264 | """ 265 | 实时检查job 266 | :return: 267 | """ 268 | while 1: 269 | if self._stop: 270 | log.warn("loop_job_rev stopping") 271 | return 272 | job = self.mq.get_job(self.node) 273 | if job: 274 | self.job_sub.set_data(job) 275 | time.sleep(0.001) 276 | 277 | def update(self, data): 278 | """ 279 | 执行任务 280 | """ 281 | try: 282 | 283 | if data["env"] == "aes": 284 | data["payload"] = self.crypt.loads(data.get("payload")) 285 | cmd = data["payload"]["cmd"] 286 | args = list(data["payload"]["args"]) 287 | kwargs = data["payload"]["kwargs"] 288 | jid = data["payload"]["jid"] 289 | #修改任务状态为RUNNING 290 | data_t = deepcopy(data) 291 | data_t["payload"]["status"] = "RUNNING" 292 | if data_t["env"] == "aes": 293 | data_t["payload"] = self.crypt.dumps(data_t.get("payload")) 294 | self.mq.set_res(self.node, jid, data_t) 295 | 296 | os.chdir(prog_dir()) 297 | ret = '' 298 | #做一些变量替换,把变量中如{ip}、{node}替换为具体的值 299 | i = 0 300 | kwargs.update({"node_name": self.node, "node_ip": self.node_ip}) 301 | env_regx = re.compile(r'{([a-zA-Z0-9]+)}') 302 | while i < len(args): 303 | if not isinstance(args[i], str): 304 | continue 305 | matchs = env_regx.findall(args[i]) 306 | for match in matchs: 307 | if match in self.sys_envs: 308 | val = self.sys_envs[match](**kwargs) 309 | args[i] = env_regx.sub(val, args[i], count=1) 310 | i += 1 311 | data["payload"]["args"] = args 312 | for key in kwargs.iterkeys(): 313 | if not isinstance(kwargs[key], str): 314 | continue 315 | matchs = env_regx.findall(kwargs[key]) 316 | for match in matchs: 317 | if match in self.sys_envs: 318 | val = self.sys_envs[match](**kwargs) 319 | kwargs[key] = env_regx.sub(val, kwargs[key], count=1) 320 | data["payload"]["kwargs"] = kwargs 321 | #判断是否需要返回函数help信息 322 | if len(args) == 1 and args[0] == "help": 323 | ret = self.node_funcs[cmd].__doc__ 324 | else: 325 | ret = self.node_funcs[cmd]( 326 | *args, 327 | **kwargs 328 | ) 329 | except KeyError as exc: 330 | ret = "cmd %s not found: %s" % (cmd, str(exc)) 331 | except SwallCommandExecutionError as exc: 332 | ret = "cmd %s running error: %s" % (cmd, str(exc)) 333 | except TypeError as exc: 334 | ret = "cmd %s argument error: %s" % (cmd, str(exc)) 335 | except: 336 | ret = traceback.format_exc() 337 | log.error(ret) 338 | finally: 339 | os.chdir(prog_dir()) 340 | data["payload"]["return"] = ret 341 | data["payload"]["status"] = "FINISH" 342 | if data["env"] == "aes": 343 | data["payload"] = self.crypt.dumps(data.get("payload")) 344 | self.mq.set_res(self.node, jid, data) 345 | return True 346 | 347 | def loop(self): 348 | """ 349 | 主体循环 350 | """ 351 | def sigterm_stop(signum, frame): 352 | self._stop = 1 353 | 354 | signal.signal(signal.SIGUSR1, sigterm_stop) 355 | self.loop_tos() 356 | self.loop_job_rev() 357 | while 1: 358 | if self._stop: 359 | break 360 | time.sleep(5) 361 | 362 | -------------------------------------------------------------------------------- /swall/parser.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import os 5 | import sys 6 | import logger 7 | import logging 8 | import optparse 9 | from swall.utils import c, \ 10 | format_obj, \ 11 | daemonize, \ 12 | app_abs_path, \ 13 | parse_args_and_kwargs, \ 14 | color, \ 15 | sort_ret, \ 16 | kill_daemon, \ 17 | agent_config, \ 18 | set_pidfile 19 | 20 | from swall.client import Client 21 | from swall.agent import Agent 22 | from swall.keeper import Keeper 23 | 24 | 25 | class OptionParserMeta(type): 26 | def __new__(cls, name, bases, attrs): 27 | instance = super(OptionParserMeta, cls).__new__(cls, name, bases, attrs) 28 | if not hasattr(instance, '_mixin_setup_funcs'): 29 | instance._mixin_setup_funcs = [] 30 | if not hasattr(instance, '_mixin_process_funcs'): 31 | instance._mixin_process_funcs = [] 32 | 33 | for base in bases + (instance,): 34 | func = getattr(base, '_mixin_setup', None) 35 | if func is not None and func not in instance._mixin_setup_funcs: 36 | instance._mixin_setup_funcs.append(func) 37 | 38 | return instance 39 | 40 | 41 | class BaseOptionParser(optparse.OptionParser, object): 42 | usage = '%prog [OPTIONS] COMMAND [arg...]' 43 | description = None 44 | version = None 45 | 46 | def __init__(self, *args, **kwargs): 47 | if self.version: 48 | kwargs.setdefault('version', self.version) 49 | 50 | kwargs.setdefault('usage', self.usage) 51 | 52 | if self.description: 53 | kwargs.setdefault('description', self.description) 54 | 55 | optparse.OptionParser.__init__(self, *args, **kwargs) 56 | 57 | def parse_args(self, args=None, values=None): 58 | options, args = optparse.OptionParser.parse_args(self, args, values) 59 | self.options, self.args = options, args 60 | return options, args 61 | 62 | def _populate_option_list(self, option_list, add_help=True): 63 | optparse.OptionParser._populate_option_list( 64 | self, option_list, add_help=add_help 65 | ) 66 | for mixin_setup_func in self._mixin_setup_funcs: 67 | mixin_setup_func(self) 68 | 69 | def print_help(self, file=None): 70 | """ 71 | overwrite the print_help 72 | """ 73 | if file is None: 74 | file = sys.stdout 75 | result = [] 76 | if self.usage: 77 | result.append(self.get_usage() + "\n") 78 | if self.description: 79 | result.append(self.description) 80 | result.append(self.format_option_help(self.formatter)) 81 | 82 | encoding = self._get_encoding(file) 83 | file.write("%s\n" % "".join(result).encode(encoding, "replace")) 84 | 85 | 86 | class ConfParser(BaseOptionParser): 87 | def setup_config(self): 88 | opts = {} 89 | for f in ('swall', 'fs', 'redis'): 90 | opts[f] = agent_config(self.get_config_file_path("%s.conf" % f)) 91 | return opts 92 | 93 | def __merge_config_with_cli(self, *args): 94 | for option in self.option_list: 95 | if option.dest is None: 96 | continue 97 | value = getattr(self.options, option.dest) 98 | if option.dest not in self.config["swall"]: 99 | if value is not None: 100 | self.config["swall"][option.dest] = value 101 | elif value is not None and value != self.config["swall"][option.dest]: 102 | self.config["swall"][option.dest] = value 103 | 104 | for group in self.option_groups: 105 | for option in group.option_list: 106 | if option.dest is None: 107 | continue 108 | value = getattr(self.options, option.dest) 109 | if option.dest not in self.config["swall"]: 110 | if value is not None: 111 | self.config["swall"][option.dest] = value 112 | elif value is not None and value != self.config["swall"][option.dest]: 113 | self.config["swall"][option.dest] = value 114 | 115 | def parse_args(self, args=None, values=None): 116 | options, args = super(ConfParser, self).parse_args(args, values) 117 | self.process_config_dir() 118 | logger.setup_file_logger(app_abs_path(self.config["swall"]["log_file"]), self.config["swall"]["log_level"]) 119 | return options, args 120 | 121 | def process_config_dir(self): 122 | self.options.config_dir = os.path.abspath(self.options.config_dir) 123 | if hasattr(self, 'setup_config'): 124 | self.config = self.setup_config() 125 | self.__merge_config_with_cli() 126 | 127 | def get_config_file_path(self, configfile): 128 | return os.path.join(self.options.config_dir, configfile) 129 | 130 | 131 | class ConfMin(object): 132 | def _mixin_setup(self): 133 | group = optparse.OptionGroup( 134 | self, "Options for conf_dir" 135 | ) 136 | self.add_option_group(group) 137 | group.add_option( 138 | '-c', '--config_dir', dest='config_dir', 139 | default=app_abs_path('conf/'), 140 | help='Pass in an alternative configuration dir. Default: %default' 141 | ) 142 | 143 | 144 | class DaemonMin(object): 145 | def _mixin_setup(self): 146 | group = optparse.OptionGroup( 147 | self, "Options for swalld daemon" 148 | ) 149 | self.add_option_group(group) 150 | group.add_option( 151 | '-u', "--user", dest='user', 152 | help='Specify the user to run swall' 153 | ) 154 | group.add_option( 155 | '-D', dest='daemon', 156 | default=True, 157 | action='store_false', 158 | help='Run the {0} as a non daemon'.format(self.get_prog_name()) 159 | ) 160 | group.add_option( 161 | '-C', '--cache_dir', dest='cache', 162 | help='Specify the cache dir' 163 | ) 164 | group.add_option( 165 | '-B', '--backup_dir', dest='backup', 166 | help='Specify the backup dir' 167 | ) 168 | group.add_option( 169 | '-p', '--pid_file', dest='pidfile', 170 | help='Specify the location of the pidfile. Default: %default' 171 | ) 172 | 173 | def daemonize_if_required(self): 174 | if self.options.daemon: 175 | daemonize(self.config["swall"]["user"]) 176 | 177 | def set_pidfile(self): 178 | set_pidfile(self.config["swall"]['pidfile']) 179 | 180 | 181 | class CtlMin(object): 182 | def _mixin_setup(self): 183 | group = optparse.OptionGroup( 184 | self, "Options for swall ctl" 185 | ) 186 | self.add_option_group(group) 187 | group.add_option('-e', '--exclude', 188 | default='', 189 | dest='exclude', 190 | help='Specify the exclude hosts by regix' 191 | ) 192 | group.add_option('-t', '--timeout', 193 | default=30, 194 | dest='timeout', 195 | help='Specify the timeout,the unit is second' 196 | ) 197 | group.add_option('-r', '--is_raw', 198 | action="store_true", 199 | default=False, 200 | dest='is_raw', 201 | help='Specify the raw output' 202 | ) 203 | group.add_option('-n', '--nthread', 204 | default=-1, 205 | dest='nthread', 206 | help='Specify running nthread' 207 | ) 208 | group.add_option('-F', '--no_format', 209 | action="store_true", 210 | default=False, 211 | dest='no_format', 212 | help='Do not format the output' 213 | ) 214 | 215 | 216 | class MainParser(object): 217 | def __init__(self, *args, **kwargs): 218 | self.usage = "Usage: %s [OPTIONS] COMMAND [arg...]" % sys.argv[0] 219 | self.description = """ 220 | A approach to infrastructure management. 221 | 222 | Commands: 223 | server Manage a agent server:start,stop,restart 224 | ctl Send functions to swall server 225 | manage Tools to manage the swall cluster 226 | 227 | """ 228 | 229 | def print_help(self, file=None): 230 | """ 231 | overwrite the print_help 232 | """ 233 | if file is None: 234 | file = sys.stdout 235 | result = [] 236 | result.append(self.usage) 237 | result.append(self.description) 238 | file.write("%s\n" % "".join(result)) 239 | 240 | 241 | class InitParser(ConfParser, ConfMin): 242 | __metaclass__ = OptionParserMeta 243 | 244 | def __init__(self, *args, **kwargs): 245 | super(InitParser, self).__init__(*args, **kwargs) 246 | self.usage = '%prog init [OPTIONS]' 247 | self.description = """ 248 | Init zookeeper db for swall at first. 249 | 250 | """ 251 | 252 | def _mixin_setup(self): 253 | group = optparse.OptionGroup( 254 | self, "Options for init zookeeper" 255 | ) 256 | self.add_option_group(group) 257 | group.add_option( 258 | '-f', "--force", dest='force', 259 | default=False, 260 | action='store_true', 261 | help='Force init zookeeper db' 262 | ) 263 | 264 | 265 | class ServerParser(ConfParser, DaemonMin, ConfMin): 266 | __metaclass__ = OptionParserMeta 267 | 268 | def __init__(self, *args, **kwargs): 269 | super(ServerParser, self).__init__(*args, **kwargs) 270 | self.usage = '%prog server [OPTIONS] COMMAND' 271 | self.description = """ 272 | Run swall server. 273 | 274 | Commands: 275 | start start swall server 276 | stop stop swall server 277 | restart restart swall server 278 | status show the status of the swall server 279 | 280 | """ 281 | 282 | 283 | class ManageParser(ConfParser, ConfMin): 284 | __metaclass__ = OptionParserMeta 285 | 286 | def __init__(self, *args, **kwargs): 287 | super(ManageParser, self).__init__(*args, **kwargs) 288 | self.usage = '%prog manage [OPTIONS] COMMAND' 289 | self.description = """ 290 | Manage tool for swall server. 291 | 292 | Commands: 293 | init Init zookeeper db for swall server 294 | info Show same information for swall 295 | 296 | """ 297 | 298 | 299 | class CtlParser(ConfParser, CtlMin, ConfMin): 300 | __metaclass__ = OptionParserMeta 301 | 302 | def __init__(self, *args, **kwargs): 303 | super(CtlParser, self).__init__(*args, **kwargs) 304 | self.usage = '%prog ctl [target] [arguments]' 305 | self.description = """ 306 | Send command to swall server. 307 | 308 | """ 309 | 310 | 311 | class Ctl(CtlParser): 312 | """ 313 | 发送命令 314 | """ 315 | 316 | def main(self): 317 | self.parse_args() 318 | args, kwargs = parse_args_and_kwargs(self.args[1:]) 319 | 320 | if len(args) < 2: 321 | self.print_help() 322 | sys.exit(1) 323 | #解析参数,获取位置参数和关键字参数 324 | 325 | cli = Client( 326 | globs=args[0], 327 | exclude_globs=self.options.exclude, 328 | nthread=int(self.options.nthread), 329 | conf_dir=self.options.config_dir 330 | ) 331 | rets = {} 332 | if args[0] == "sys.job_info": 333 | if len(args[1:]) == 0 and len(kwargs) == 0: 334 | sys.stderr.write(c("jid needed for sys.job_info\n", 'r')) 335 | sys.stderr.flush() 336 | else: 337 | rets = cli.job_info(*args[2:], **kwargs) 338 | else: 339 | cli.submit_job(args[1], *args[2:], **kwargs) 340 | rets = cli.get_return(self.options.timeout) 341 | 342 | if rets: 343 | rets = sort_ret(rets) 344 | else: 345 | print c('#' * 50, 'y') 346 | print color(rets.get("msg"), 'r') 347 | print c('#' * 50, 'y') 348 | sys.exit(1) 349 | 350 | nfail = 0 351 | for ret in rets: 352 | if not ret[1]: 353 | nfail += 1 354 | 355 | if not self.options.is_raw: 356 | format_ret = enumerate([u"%s : %s" % (c(ret[0], 'b'), color(format_obj(ret[1]))) for ret in rets]) 357 | else: 358 | format_ret = enumerate([u"%s : %s" % (ret[0], ret[1]) for ret in rets]) 359 | print c('#' * 50, 'y') 360 | 361 | for index, item in format_ret: 362 | print item.encode("utf-8") 363 | 364 | print c('#' * 50, 'y') 365 | 366 | if locals().get('index') >= 0: 367 | index += 1 368 | else: 369 | index = 0 370 | print "一共执行了[%s]个,失败了[%s]" % (color(index), color(nfail, 0)) 371 | 372 | 373 | class SwallManage(ManageParser): 374 | def main(self): 375 | self.parse_args() 376 | 377 | if not sys.argv[2:]: 378 | self.print_help() 379 | sys.exit(1) 380 | cmd = sys.argv[2] 381 | self._sub_commands(cmd) 382 | 383 | def _sub_commands(self, cmd): 384 | if cmd == "info": 385 | self._show_info() 386 | else: 387 | self.print_help() 388 | 389 | def _show_info(self): 390 | """ 391 | 显示swall信息 392 | """ 393 | keeper = Keeper(self.config) 394 | valid_nodes = keeper.get_valid_nodes() 395 | info = { 396 | "config": self.config, 397 | "node_list": valid_nodes 398 | } 399 | print format_obj(info) 400 | 401 | 402 | class SwallAgent(ServerParser): 403 | """ 404 | swall进程管理 405 | """ 406 | 407 | def main(self): 408 | self.parse_args() 409 | if self.args[1:]: 410 | action = self.args[1] 411 | else: 412 | self.print_help() 413 | sys.exit(1) 414 | cmds = { 415 | "start": self.start, 416 | "stop": self.stop, 417 | "restart": self.restart, 418 | "status": self.status 419 | } 420 | func = cmds.get(action) 421 | if func: 422 | func() 423 | else: 424 | self.print_help() 425 | sys.exit(1) 426 | 427 | def status(self): 428 | """ 429 | show status 430 | """ 431 | try: 432 | pid = open(self.config["swall"]["pidfile"], 'r').read() 433 | message = c("swall is running[%s]...\n" % pid, 'g') 434 | except IOError: 435 | message = c("swall is not running!\n", 'r') 436 | sys.stdout.write(message) 437 | sys.stdout.flush() 438 | 439 | def stop(self): 440 | """ 441 | stop server 442 | """ 443 | kill_daemon(self.config["swall"]["pidfile"]) 444 | 445 | def start(self): 446 | """ 447 | restart server 448 | """ 449 | self.daemonize_if_required() 450 | try: 451 | sagent = Agent(self.config) 452 | self.set_pidfile() 453 | sagent.loop() 454 | except KeyboardInterrupt: 455 | print "Stopping the Swall Agent" 456 | self.stop() 457 | logging.getLogger().warn() 458 | 459 | def restart(self): 460 | self.stop() 461 | self.start() 462 | 463 | 464 | class Swall(MainParser): 465 | def main(self): 466 | """ 467 | get args for commands 468 | """ 469 | if not sys.argv[1:]: 470 | self.print_help() 471 | sys.exit(1) 472 | cmd = sys.argv[1] 473 | self._sub_commands(cmd) 474 | 475 | def _sub_commands(self, cmd): 476 | if cmd == "server": 477 | agent = SwallAgent() 478 | agent.main() 479 | elif cmd == "manage": 480 | manger = SwallManage() 481 | manger.main() 482 | elif cmd == "ctl": 483 | ctl = Ctl() 484 | ctl.main() 485 | else: 486 | self.print_help() 487 | 488 | 489 | -------------------------------------------------------------------------------- /swall/utils.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | __author__ = 'lufeng4828@163.com' 3 | 4 | import re 5 | import sys 6 | import imp 7 | import time 8 | import json 9 | import traceback 10 | import hashlib 11 | import functools 12 | import pwd 13 | import pipes 14 | import os 15 | import shutil 16 | import logging 17 | import operator 18 | import subprocess 19 | from signal import SIGUSR1 20 | from threading import Thread 21 | from swall.kthread import KThread 22 | from swall.bfclient import BFClient 23 | from ConfigParser import ConfigParser 24 | from swall.excpt import SwallCommandExecutionError 25 | 26 | log = logging.getLogger() 27 | 28 | 29 | def c(s, t=None): 30 | """ 31 | 颜色渲染 32 | """ 33 | color = { 34 | 'r': "\033[1;31;1m%s\033[0m" % s, 35 | 'g': "\033[0;32;1m%s\033[0m" % s, 36 | 'y': "\033[0;33;1m%s\033[0m" % s, 37 | 'b': "\033[0;34;1m%s\033[0m" % s 38 | } 39 | return color.get(t) or s 40 | 41 | 42 | def color(str_ret, t=1): 43 | """ 44 | 颜色渲染 45 | """ 46 | str_ret = str(str_ret) 47 | if any([str_ret == '{}', str_ret == '[]', str_ret == '', str_ret == '0', t != 1]): 48 | return c(str_ret, 'r') 49 | else: 50 | return c(str_ret, 'g') 51 | 52 | 53 | def prog_dir(): 54 | """ 55 | 获取程序的根路径 56 | """ 57 | return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 58 | 59 | 60 | def app_abs_path(rel_path=None): 61 | """ 62 | 返回路径的绝对路径,相对程序目录 63 | :param rel_path:相对路径 64 | :return:绝对路径 65 | """ 66 | abs_path = '' 67 | if rel_path: 68 | abs_path = os.path.join(prog_dir(), rel_path) 69 | return abs_path 70 | 71 | 72 | def listener(state): 73 | """ 74 | listening for connection events 75 | @param state 76 | """ 77 | log.info("zookeeper connection events [%s]" % state) 78 | 79 | 80 | def node(func): 81 | """ 82 | 修饰器,用来说明修饰的函数是node节点函数 83 | :param func: 修饰的函数 84 | """ 85 | func.node = True 86 | @functools.wraps(func) 87 | def wrapped(*args, **kwargs): 88 | ret = func(*args, **kwargs) 89 | return ret 90 | return wrapped 91 | 92 | 93 | def env(func): 94 | """ 95 | 修饰器,用来说明修饰的函数是node节点函数 96 | :param func: 修饰的函数 97 | """ 98 | func.env = True 99 | 100 | @functools.wraps(func) 101 | def wrapped(*args, **kwargs): 102 | ret = func(*args, **kwargs) 103 | return ret 104 | 105 | return wrapped 106 | 107 | 108 | def gen_node(func): 109 | """ 110 | 修饰器,用来检查生成节点列表的函数返回格式是否正确 111 | @param func: 修饰的函数 112 | @return: 113 | """ 114 | func.node = True 115 | 116 | @functools.wraps(func) 117 | def wrapped(*args, **kwargs): 118 | nodes = func(*args, **kwargs) 119 | rets = {} 120 | for k, v in nodes.iteritems(): 121 | if not all(map(lambda x: x in v, ["agent", "project", "role"])): 122 | log.error("node [%s] format is error,node info master include :'agent', 'project', 'role'") 123 | continue 124 | else: 125 | rets.update({k: v}) 126 | return rets 127 | 128 | return wrapped 129 | 130 | 131 | def parse_args_and_kwargs(args): 132 | """ 133 | 解析参数 134 | @param args string:例如:[arg1, arg2, key1=val1, key2=val2] 135 | @return (list,dict): 136 | e.g. (['arg1', 'arg2'], {'val2': 'key2', 'val1': 'key1'}) 137 | """ 138 | r_args = [] 139 | r_kwargs = {} 140 | for arg in args: 141 | regx = re.compile(r"([a-z0-9A-Z_-]+)=([a-z0-9A-Z_-]+)") 142 | kv = regx.findall(arg) 143 | if kv: 144 | r_kwargs.update({kv[0][0]: kv[0][1]}) 145 | else: 146 | r_args.append(arg) 147 | return r_args, r_kwargs 148 | 149 | 150 | def thread(is_join=False, pnum=1): 151 | def _wrap1(func): 152 | @functools.wraps(func) 153 | def _wrap2(*args, **kwargs): 154 | pros = [] 155 | for x in xrange(pnum): 156 | pros.append(Thread(target=lambda: func(*args, **kwargs))) 157 | for th in pros: 158 | th.start() 159 | if is_join: 160 | for th in pros: 161 | th.join() 162 | 163 | return _wrap2 164 | 165 | return _wrap1 166 | 167 | 168 | class Automagic(object): 169 | """ 170 | 一个很神奇的类,无法用言语表达 171 | """ 172 | 173 | def __init__(self, clientref, base): 174 | self.base = base 175 | self.clientref = clientref 176 | 177 | def __getattr__(self, name): 178 | base2 = self.base[:] 179 | base2.append(name) 180 | return Automagic(self.clientref, base2) 181 | 182 | def __call__(self, *args): 183 | if not self.base: 184 | raise AttributeError("something wrong here in Automagic __call__") 185 | if len(self.base) < 2: 186 | raise AttributeError("no method called: %s" % ".".join(self.base)) 187 | func = ".".join(self.base[0:]) 188 | return self.clientref.call_func(func, args) 189 | 190 | 191 | def load_module(mod_dirs): 192 | """ 193 | 加载模块 194 | @param mod_dirs string:模块的路径 195 | @return dict: 196 | """ 197 | names = {} 198 | modules = [] 199 | funcs = {} 200 | mod_dirs = mod_dirs.split(',') 201 | for mod_dir in mod_dirs: 202 | if not os.path.isdir(mod_dir): 203 | continue 204 | for fn_ in os.listdir(mod_dir): 205 | if fn_.startswith('_'): 206 | continue 207 | if fn_.endswith('.py') and not fn_.startswith("_sys_"): 208 | extpos = fn_.rfind('.') 209 | if extpos > 0: 210 | _name = fn_[:extpos] 211 | else: 212 | _name = fn_ 213 | names[_name] = os.path.join(mod_dir, fn_) 214 | for name in names: 215 | try: 216 | #模块的加载mod_dirs一定是一个list类型数据,否则执行失败 217 | fn_, path, desc = imp.find_module(name, mod_dirs) 218 | mod = imp.load_module(name, fn_, path, desc) 219 | except: 220 | log.error(traceback.format_exc()) 221 | continue 222 | modules.append(mod) 223 | for mod in modules: 224 | for attr in dir(mod): 225 | if attr.startswith('_'): 226 | continue 227 | #将加载的模块存放到字典里面 228 | if callable(getattr(mod, attr)): 229 | func = getattr(mod, attr) 230 | if not getattr(func, "node", None): 231 | continue 232 | try: 233 | funcs['{0}.{1}'.format(mod.__name__, attr)] = func 234 | except AttributeError: 235 | continue 236 | return funcs 237 | 238 | 239 | def load_env(mod_dirs): 240 | """ 241 | 加载系统变量 242 | """ 243 | names = {} 244 | modules = [] 245 | funcs = {} 246 | mod_dirs = mod_dirs.split(',') 247 | for mod_dir in mod_dirs: 248 | if not os.path.isdir(mod_dir): 249 | continue 250 | for fn_ in os.listdir(mod_dir): 251 | if fn_.startswith("_sys_") and fn_.endswith(".py"): 252 | extpos = fn_.rfind('.') 253 | if extpos > 0: 254 | _name = fn_[:extpos] 255 | else: 256 | _name = fn_ 257 | names[_name] = os.path.join(mod_dir, fn_) 258 | for name in names: 259 | try: 260 | #模块的加载mod_dirs一定是一个list类型数据,否则执行失败 261 | fn_, path, desc = imp.find_module(name, mod_dirs) 262 | mod = imp.load_module(name, fn_, path, desc) 263 | except: 264 | log.error(traceback.format_exc()) 265 | continue 266 | modules.append(mod) 267 | for mod in modules: 268 | for attr in dir(mod): 269 | if attr.startswith('_'): 270 | continue 271 | #将加载的模块存放到字典里面 272 | if callable(getattr(mod, attr)): 273 | func = getattr(mod, attr) 274 | if not getattr(func, "env", None): 275 | continue 276 | try: 277 | funcs['{0}'.format(attr)] = func 278 | except AttributeError: 279 | continue 280 | return funcs 281 | 282 | 283 | def load_fclient(mod_dir, ftype="ssh"): 284 | """ 285 | 根据ftype加载fservice 286 | @param mod_dir string:fs模块目录 287 | @param ftype string:模块名称 288 | return BFClient: 289 | """ 290 | ret = None 291 | mod_dirs = mod_dir.split(',') 292 | try: 293 | #模块的加载mod_dirs一定是一个list类型数据,否则执行失败 294 | fn_, path, desc = imp.find_module(ftype, mod_dirs) 295 | mod = imp.load_module(ftype, fn_, path, desc) 296 | for attr in dir(mod): 297 | if attr.startswith('_'): 298 | continue 299 | if callable(getattr(mod, attr)): 300 | fcli = getattr(mod, attr) 301 | if issubclass(fcli, BFClient) and str(fcli) != "BFClient": 302 | ret = fcli 303 | continue 304 | except: 305 | log.error(traceback.format_exc()) 306 | return ret 307 | 308 | 309 | def make_dirs(path): 310 | """ 311 | 创建多层次的目录 312 | @param path string: 313 | @return bool:True or False 314 | """ 315 | if os.path.exists(path): 316 | return True 317 | try: 318 | os.makedirs(path) 319 | except OSError: 320 | return False 321 | return True 322 | 323 | 324 | def backup_local(fn, backup_dir, ext): 325 | """ 326 | 备份文件到self.opts['backup_dir']目录,加上后缀 327 | @param fn string:需要备份的文件 328 | @param backup_dir string:备份文件存放目录 329 | @param ext string:备份文件后缀 330 | @return bool:True or False 331 | """ 332 | if not os.path.exists(fn): 333 | return True 334 | backup_dest = os.path.join(backup_dir, '%s.%s' % (os.path.basename(fn), ext)) 335 | try: 336 | shutil.copy2(fn, backup_dest) 337 | except shutil.Error: 338 | return False 339 | return True 340 | 341 | 342 | def cp(src_file, dest_file, stat=None): 343 | """ 344 | 拷贝文件 345 | @param src_file: 346 | @param dest_file: 347 | @return bool:True or False 348 | """ 349 | try: 350 | if os.path.isdir(dest_file): 351 | log.warn("%s is a directory" % dest_file) 352 | return False 353 | shutil.copy(src_file, dest_file) 354 | if stat: 355 | mode, gid, uid = stat 356 | if mode != -1: 357 | os.chmod(dest_file, mode) 358 | if uid != -1 or gid != -1: 359 | os.chown(dest_file, uid, gid) 360 | except IOError as exc: 361 | log.error(exc) 362 | raise SwallCommandExecutionError(str(exc)) 363 | else: 364 | return 1 365 | 366 | 367 | def check_cache(cache_dir, sha): 368 | """ 369 | 检查cache目录是否存在文件 370 | @param cache_dir string: 371 | @param sha string: 372 | @return bool:True or False 373 | """ 374 | cache_file = os.path.join(cache_dir, sha) 375 | if os.path.exists(cache_file) and checksum(cache_file) == sha: 376 | return True 377 | else: 378 | return False 379 | 380 | 381 | def checksum(thing): 382 | """ 383 | 计算文件或者字符串散列 384 | @param thing string:文件名 385 | @return string:散列 386 | """ 387 | CHUNK = 2 ** 16 388 | thissum = hashlib.new('sha1') 389 | if os.path.exists(thing): 390 | fo = open(thing, 'r', CHUNK) 391 | chunk = fo.read 392 | while chunk: 393 | chunk = fo.read(CHUNK) 394 | thissum.update(chunk) 395 | fo.close() 396 | del fo 397 | else: 398 | thissum.update(thing) 399 | return thissum.hexdigest() 400 | 401 | 402 | def daemonize(user): 403 | """ 404 | 守护进程运行 405 | @param user string:运行程序的用户名 406 | """ 407 | try: 408 | pid = os.fork() 409 | if pid > 0: 410 | sys.exit(0) 411 | except OSError, e: 412 | sys.stderr.write('fork #1 failed: %d (%s)\n' % (e.errno, e.strerror)) 413 | sys.exit(1) 414 | os.setsid() 415 | 416 | try: 417 | uinfo = pwd.getpwnam(user) 418 | os.setegid(uinfo.pw_gid) 419 | os.seteuid(uinfo.pw_uid) 420 | except KeyError: 421 | sys.stderr.write("user %s is not valid\n") 422 | sys.exit(1) 423 | 424 | os.chdir('/') 425 | os.umask(022) 426 | try: 427 | pid = os.fork() 428 | if pid > 0: 429 | sys.exit(0) 430 | except OSError, e: 431 | sys.stderr.write('fork #2 failed: %d (%s)\n' % (e.errno, e.strerror)) 432 | sys.exit(1) 433 | 434 | 435 | def set_pidfile(pidfile): 436 | """ 437 | Save the pidfile 438 | @param pidfile string: 439 | @return None 440 | """ 441 | try: 442 | pf = file(pidfile, 'r') 443 | pid = int(pf.read().strip()) 444 | pf.close() 445 | except IOError: 446 | pid = None 447 | 448 | if pid: 449 | message = 'pidfile %s already exist. Daemon already running?\n' 450 | sys.stderr.write(message % pidfile) 451 | sys.exit(1) 452 | 453 | pdir = os.path.dirname(pidfile) 454 | if not os.path.isdir(pdir): 455 | os.makedirs(pdir) 456 | try: 457 | with open(pidfile, 'w+') as f: 458 | f.write(str(os.getpid())) 459 | except IOError, err: 460 | sys.stderr.write(err.message) 461 | sys.exit(1) 462 | 463 | 464 | class Conf(object): 465 | def __init__(self, config): 466 | self.config = config 467 | self.set_opts() 468 | 469 | def set_opts(self): 470 | """ 471 | set attr for opts 472 | """ 473 | for opt in self.config: 474 | setattr(self, opt, self.config[opt]) 475 | 476 | 477 | def load_config(conf_path): 478 | """ 479 | 解析配置文件 480 | @param conf_path string: 配置路径 481 | @return dict: 配置信息{key:val} 482 | """ 483 | opts = {} 484 | confparser = ConfigParser() 485 | if os.path.exists(conf_path): 486 | confparser.read(conf_path) 487 | for k, v in confparser.items("main"): 488 | opts[k] = v 489 | return opts 490 | 491 | 492 | def kill_daemon(pidfile): 493 | """ 494 | kill掉守护进程 495 | @param pidfile string:pid文件 496 | @return None 497 | """ 498 | try: 499 | pf = file(pidfile, 'r') 500 | pid = int(pf.read().strip()) 501 | pf.close() 502 | except IOError: 503 | pid = None 504 | 505 | if not pid: 506 | message = 'pidfile %s does not exist. Daemon not running?\n' 507 | sys.stderr.write(message % pidfile) 508 | return 509 | 510 | try: 511 | times = 1 512 | while times <= 10: 513 | os.kill(pid, SIGUSR1) 514 | time.sleep(2) 515 | times += 1 516 | sys.stderr.write("Stop Swall daemon fail,the pid is %d" % pid) 517 | except OSError, err: 518 | err = str(err) 519 | if err.find('No such process') > 0: 520 | if os.path.exists(pidfile): 521 | os.remove(pidfile) 522 | else: 523 | sys.exit(1) 524 | 525 | 526 | class Timeout(Exception): 527 | pass 528 | 529 | 530 | def timeout(seconds): 531 | """ 532 | 为函数新增超时功能 533 | """ 534 | seconds = int(seconds) 535 | 536 | def timeout_decorator(func): 537 | def _new_func(oldfunc, result, oldfunc_args, oldfunc_kwargs): 538 | result.append(oldfunc(*oldfunc_args, **oldfunc_kwargs)) 539 | 540 | def _(*args, **kwargs): 541 | result = [] 542 | new_kwargs = { 543 | 'oldfunc': func, 544 | 'result': result, 545 | 'oldfunc_args': args, 546 | 'oldfunc_kwargs': kwargs 547 | } 548 | 549 | thd = KThread(target=_new_func, kwargs=new_kwargs) 550 | thd.start() 551 | try: 552 | thd.join(seconds) 553 | alive = thd.isAlive() 554 | thd.kill() 555 | if alive: 556 | raise Timeout(u'%s run timeout %d seconds.' % (func.__name__, seconds)) 557 | elif thd.exception is not None: 558 | raise thd.exception 559 | except KeyboardInterrupt: 560 | thd.kill() 561 | return result[0] if result else '' 562 | 563 | _.__name__ = func.__name__ 564 | _.__doc__ = func.__doc__ 565 | return _ 566 | 567 | return timeout_decorator 568 | 569 | 570 | def retry(times, cmp_val=1): 571 | """ 572 | 如果修饰的函数返回结果不等于cmp_val,则重新执行函数,一共重试times 573 | """ 574 | times = int(times) 575 | 576 | def fail_retry_decorator(func): 577 | def _new_func(oldfunc, oldfunc_args, oldfunc_kwargs): 578 | return oldfunc(*oldfunc_args, **oldfunc_kwargs) 579 | 580 | def _(*args, **kwargs): 581 | tries = 1 582 | result = None 583 | new_kwargs = { 584 | 'oldfunc': func, 585 | 'oldfunc_args': args, 586 | 'oldfunc_kwargs': kwargs 587 | } 588 | while tries <= times: 589 | result = _new_func(**new_kwargs) 590 | if result == cmp_val: 591 | break 592 | else: 593 | print "%s fail,retry %s" % (func.__name__, tries) 594 | time.sleep(3) 595 | tries += 1 596 | return result 597 | 598 | _.__name__ = func.__name__ 599 | _.__doc__ = func.__doc__ 600 | return _ 601 | 602 | return fail_retry_decorator 603 | 604 | 605 | def which(exe=None): 606 | """ 607 | Python clone of POSIX's /usr/bin/which 608 | """ 609 | if exe: 610 | (path, name) = os.path.split(exe) 611 | if os.access(exe, os.X_OK): 612 | return exe 613 | 614 | # default path based on busybox's default 615 | default_path = "/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin" 616 | for path in os.environ.get('PATH', default_path).split(os.pathsep): 617 | full_path = os.path.join(path, exe) 618 | if os.access(full_path, os.X_OK): 619 | return full_path 620 | return None 621 | 622 | 623 | def _run(cmd, 624 | cwd=None, 625 | stdout=subprocess.PIPE, 626 | stderr=subprocess.PIPE, 627 | quiet=False, 628 | runas=None, 629 | with_env=True, 630 | shell="/bin/bash", 631 | env={}, 632 | rstrip=True, 633 | retcode=False): 634 | # Set the default working directory to the home directory 635 | # of the user salt-minion is running as. Default: /root 636 | if not cwd: 637 | cwd = os.path.expanduser('~{0}'.format('' if not runas else runas)) 638 | 639 | # make sure we can access the cwd 640 | # when run from sudo or another environment where the euid is 641 | # changed ~ will expand to the home of the original uid and 642 | # the euid might not have access to it. See issue #1844 643 | if not os.access(cwd, os.R_OK): 644 | cwd = '/' 645 | 646 | ret = {} 647 | 648 | if runas: 649 | # Save the original command before munging it 650 | orig_cmd = cmd 651 | try: 652 | pwd.getpwnam(runas) 653 | except KeyError: 654 | msg = 'User \'{0}\' is not available'.format(runas) 655 | log.error(msg) 656 | return 657 | 658 | cmd_prefix = 'su -s {0}'.format(shell) 659 | 660 | # Load the 'nix environment 661 | if with_env: 662 | cmd_prefix += ' -' 663 | cmd = 'cd {0} && {1}'.format(cwd, cmd) 664 | 665 | cmd_prefix += ' {0} -c'.format(runas) 666 | cmd = '{0} {1}'.format(cmd_prefix, pipes.quote(cmd)) 667 | 668 | if not quiet: 669 | # Put the most common case first 670 | if not runas: 671 | log.info('Executing command {0} in directory {1}'.format(cmd, cwd)) 672 | else: 673 | log.info('Executing command {0} as user {1} in directory {2}'.format( 674 | orig_cmd, runas, cwd)) 675 | 676 | run_env = os.environ 677 | run_env.update(env) 678 | kwargs = {'cwd': cwd, 679 | 'shell': True, 680 | 'env': run_env, 681 | 'stdout': stdout, 682 | 'stderr': stderr} 683 | if not os.environ.get('os', '').startswith('Windows'): 684 | kwargs['executable'] = shell 685 | # This is where the magic happens 686 | proc = subprocess.Popen(cmd, **kwargs) 687 | 688 | # If all we want is the return code then don't block on gathering input, 689 | # this is used to bypass ampersand issues with background processes in 690 | # scripts 691 | if retcode: 692 | while True: 693 | retcode = proc.poll() 694 | if retcode is None: 695 | continue 696 | else: 697 | out = '' 698 | err = '' 699 | break 700 | else: 701 | out, err = proc.communicate() 702 | 703 | if rstrip: 704 | if out: 705 | out = out.rstrip() 706 | # None lacks a rstrip() method 707 | if err: 708 | err = err.rstrip() 709 | 710 | ret['stdout'] = out 711 | ret['stderr'] = err 712 | ret['pid'] = proc.pid 713 | ret['retcode'] = proc.returncode 714 | return ret 715 | 716 | 717 | def run(cmd, cwd=None, runas=None, shell="/bin/bash", env={}): 718 | """ 719 | Execute the passed command and return the output as a string 720 | @param cmd string:执行的命令 721 | @param cmd string:工作目录,执行命令时候需要进入的目录 722 | @param runas string:以runas的身份执行命令 723 | @param shell string:解析脚本的shell,默认是/bin/bash 724 | @paran env dict:执行命令的环境 725 | @return dict: 726 | ret{ 727 | 'stdout': 标准输出 728 | 'stderr': 错误输出 729 | 'pid': 执行脚本的pid 730 | 'retcode': 脚本返回状态 731 | } 732 | """ 733 | ret = _run(cmd, runas=runas, shell=shell, 734 | cwd=cwd, stderr=subprocess.STDOUT, env=env) 735 | return ret 736 | 737 | 738 | def sort_ret(rets): 739 | """ 740 | 特定格式的list进行排序 741 | """ 742 | t = [] 743 | list_result = [] 744 | for node in rets: 745 | try: 746 | id2 = '_'.join(node.split('_')[:-1]) 747 | id3 = int(node.split('_')[-1]) 748 | except: 749 | id2 = '_'.join(node.split('_')[:-1]) 750 | id3 = node.split('_')[-1] 751 | t.append({"id2": id2, "id3": id3, "info": (node, rets[node])}) 752 | t.sort(key=operator.itemgetter("id2", "id3")) 753 | for i in t: 754 | list_result.append(i["info"]) 755 | return list_result 756 | 757 | 758 | def format_obj(obj): 759 | """ 760 | 格式化字典、列表等的输出 761 | @param obj dict: 762 | @param string|dict: 763 | """ 764 | is_true = map(lambda x: isinstance(obj, x), [list, tuple, set]) 765 | if any(is_true) and len(obj) >= 5 or isinstance(obj, dict): 766 | outs = json.dumps(obj, sort_keys=True, indent=4, separators=(',', ': ')) 767 | outs = "\n%s\n" % outs 768 | else: 769 | outs = obj 770 | return outs 771 | 772 | 773 | def agent_config(path): 774 | """ 775 | 读取配置文件,返回配置信息 776 | @param path string:配置文件 777 | @return dict: 778 | """ 779 | opts = { 780 | "swall": 781 | { 782 | "node_name": "server", 783 | "node_ip": "localhost", 784 | "cache": "var/cache", 785 | "backup": "var/backup", 786 | "fs_plugin": "plugins/fservice", 787 | "pidfile": "/tmp/.swall.pid", 788 | "log_file": "var/logs/swall.log", 789 | "log_level": "INFO", 790 | "token": "yhIC7oenuJDpBxqyP3GSHn7mgQThRHtOnNNwqpJnyPVhR1n9Y9Q+/T3PJfjYCZdiGRrX03CM+VI=" 791 | }, 792 | "fs": 793 | { 794 | "fs_type": "rsync", 795 | "fs_host": "localhost", 796 | "fs_port": 873, 797 | "fs_user": "swall", 798 | "fs_pass": "vGjeVUxxxrrrx8CcZ", 799 | "fs_tmp_dir": "/data/swall_fs" 800 | }, 801 | 'redis': { 802 | "host": "0.0.0.0", 803 | "port": 6379, 804 | "db": 0, 805 | "password": "foo", 806 | "expire": 600 807 | } 808 | } 809 | 810 | opt = load_config(path) 811 | ret_opts = opts[os.path.basename(path).rstrip(".conf")] 812 | ret_opts.update(opt) 813 | return ret_opts 814 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 一、Swall概述 2 | ============ 3 | 4 | swall升级以后用比较简单的redis替换zookeeper,swall是一个可以用于管理特别是架构比较灵活的服务,比如游戏。用swall, 5 | 你不用登陆到具体的服务器去操作,你指需要在一台机器上面就可以完成服务管理,比如获取服务器监控信息、执行shell命令等等,你还可以方便的实现自动化配置,一条命令实现所有应用的部署不再是难题。 6 | 7 | 特点: 8 | 9 | 1.使用redis做任务信息存储,性能可靠 10 | 3.简单灵活,五脏六腑俱全(文件拷贝、命令执行、模块管理) 11 | 4.提供比较好的自省功能,可以让你比较方便调用各种模块 12 | 5.容易扩展 13 | 14 | 15 | 16 | 二、Swall安装部署 17 | ================== 18 | 19 | 准备两台机器,相关信息如下: 20 | 21 | =========================================== 22 | 名称 配置 IP地址 23 | ------------------------------------------- 24 | redis1 centos6.2 192.168.0.181 25 | swall1 centos6.2 192.168.0.180 26 | 27 | 28 | (一)安装redis 29 | ---------------------- 30 | 安装redis请自行google或者百度,这里我就不写了,安装好redis以后配置/conf/redis.conf 31 | 32 | [main] 33 | #redis配置 34 | host = 192.168.1.7 35 | port = 6379 36 | db = 0 37 | password = 38 | expire = 600 39 | 40 | 41 | 42 | (二)安装rsync服务 43 | ------------------------- 44 | 45 | swall内部提供了一个组件的方式来实现文件传输,内部已经实现了三个组件:ssh、ftp、rsync,这几个组件按照swall约定实现了访问对应文件 46 | 服务器的api,对应的配置是fs.conf,组件具体功能是为swall提供一个存放和下载文件的临时场所,下面以rsync为例: 47 | 48 | rsync配置在192.168.0.181,和zookeeper配置在一起,实际用的时候最好要部署到不同的机器上 49 | 50 | 1.添加rsync用户和目录 51 | 52 | [root@redis1 ~]# useradd swall 53 | [root@redis1 ~]# mkdir /data/swall_fs 54 | [root@redis1 ~]# chown -R swall:swall /data/swall_fs 55 | [root@redis1 ~]# vim /etc/rsyncd.conf 56 | 57 | 58 | 2.设置rsync配置 59 | 60 | secrets file = /etc/rsyncd.secrets 61 | list = no 62 | port = 61768 63 | read only = yes 64 | uid = swall 65 | gid = swall 66 | max connections = 3000 67 | log file = /var/log/rsyncd_swall.log 68 | pid file = /var/run/rsyncd_swall.pid 69 | lock file = /var/run/rsync_swall.lock 70 | 71 | [swall_fs] 72 | path = /data/swall_fs 73 | auth users = swall 74 | read only = no 75 | 76 | 3.设置rsync密码 77 | 78 | [root@redis1 ~]# echo 'swall:vGjeVUncnbPV8CcZ' > /etc/rsyncd.secrets 79 | [root@redis1 ~]# chmod 600 /etc/rsyncd.secrets 80 | 81 | 82 | 4.防火墙要允许访问61768端口 83 | 84 | [root@redis1 bin]# iptables -A INPUT -p tcp --dport 61768 -j ACCEPT 85 | [root@redis1 bin]# iptables -L -n | grep 61768 86 | ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:61768 87 | [root@redis1 bin]# iptables-save > /etc/sysconfig/iptables 88 | 89 | 5.运行rsync服务 90 | 91 | [root@redis1 bin]# rsync --port=61768 --config=/etc/rsyncd.conf --daemon 92 | 93 | 94 | 6.测试rsync是否正常服务,登录其他机器,这里以192.168.0.180为例 95 | 96 | [root@swall1 ~]# RSYNC_PASSWORD=vGjeVUncnbPV8CcZ rsync -a --port=61768 --partial /etc/services swall@192.168.0.181::swall_fs/service 97 | [root@swall1 ~]# echo $? 98 | 0 99 | [root@swall1 ~]# RSYNC_PASSWORD=vGjeVUncnbPV8CcZ rsync -a --port=61768 --partial swall@192.168.0.181::swall_fs/service /tmp/service 100 | [root@swall1 ~]# ll /tmp/service 101 | -rw-r--r-- 1 root root 640999 Jan 12 2010 /tmp/service 102 | 103 | 如上,说明rsync配置成功。 104 | 105 | 106 | (三)安装Swall 107 | ------------------- 108 | 109 | Swall这里安装到192.168.0.180服务器上 110 | 111 | 1.下载最新版本swall 112 | 113 | [root@swall1 ~]# mkdir /data 114 | [root@swall1 ~]# cd /data 115 | [root@swall1 data]# git clone https://github.com/lufeng4828/swall.git 116 | 117 | 2.安装swall的依赖包,建议用pip安装 118 | 119 | [root@swall1 ~]# cd swall 120 | [root@swall1 swall]# pip install -r requirememts.txt 121 | 注意:如果还没有装pip,centos可以通过yum install python-pip,ubuntu可以通过 apt-get install python-pip安装 122 | 123 | 3.修改swall配置 124 | 125 | [root@swall1 swall]# cd conf 126 | [root@swall1 conf]# vim swall.conf 127 | [main] 128 | #以此用户运行swall 129 | user = swall 130 | #定义角色,多个角色用逗号分开 131 | node_name = swall01 #这里我们只定义节点名称 132 | #agent地址,根据具体情况 133 | node_ip = 192.168.0.180 #这里写上当前服务器ip 192.168.0.180 134 | #缓存路径 135 | cache = var/cache 136 | #模块路径 137 | module = module/ 138 | #文件备份路径 139 | backup = var/backup 140 | #plugins路径 141 | fs_plugin = plugins/fservice 142 | #pid文件 143 | pidfile = /tmp/.swall.pid 144 | #日志定义 145 | log_file = logs/swall.log 146 | log_level = INFO 147 | #认证key,数据传输用 148 | token = yhIC7oenuJDpBxqyP3GSHn7mgQThRHtOnNNwqpJnyPVhR1n9Y9Q+/T3PJfjYCZdiGRrX03CM+VI= 149 | 150 | 说明: 151 | (1)node_name是定义agent名称 152 | (2)路径如果不是绝对路径,以程序根路径为基础,例如程序路径是/data/swall,则fs_plugin为/data/swall/plugins/fservice 153 | (3)node_ip是当前agent的ip地址 154 | (4)如果日志文件不存在,程序日志是记录不了,需要手动生成 155 | 156 | [root@swall1 conf]# vim fs.conf 157 | [main] 158 | fs_type = rsync #选择rsync 159 | fs_host = 192.168.0.181 #这里写我们部署好rsync的服务器地址192.168.0.181 160 | fs_port = 61768 161 | fs_user = swall 162 | fs_pass = vGjeVUncnbPV8CcZ 163 | fs_tmp_dir = /data/swall_fs 164 | fs_failtry = 3 165 | 166 | 说明: 167 | (1)fs_type是指/data/swall/plugins/fservice下面的文件名(不带路径),目前只支持ssh、ftp、rsync,可以自己实现 168 | (2)fs_tmp_dir只有ssh组件使用到,,ftp和rsync用不到 169 | (3)fs_failtry只有rsync用到,当rsync传输失败了可以重试多少次 170 | (4)传输组件是用来给swall上传和下载文件来实现文件传输功能的 171 | 172 | 173 | 4.在启动swall之前,下面给出一个完整配置示例 174 | 175 | ###swall.conf配置 176 | [main] 177 | user = swall 178 | node_name = swall01 179 | node_ip = 192.168.0.180 180 | cache = var/cache 181 | module = module/ 182 | backup = var/backup 183 | fs_plugin = plugins/fservice 184 | pidfile = /tmp/.swall.pid 185 | log_file = /data/logs/swall.log 186 | log_level = INFO 187 | token = yhIC7oenuJDpBxqyP3GSHn7mgQThRHtOnNNwqpJnyPVhR1n9Y9Q+/T3PJfjYCZdiGRrX03CM+VI= 188 | 189 | ###redis.conf配置 190 | [main] 191 | #redis配置 192 | host = 192.168.1.7 193 | port = 6379 194 | db = 0 195 | password = 196 | expire = 600 197 | 198 | 199 | 5.新增PATH和PYTHONPATH路径(PYTHONPATH一定要设置,否则程序运行会提示swall模块找不到的) 200 | 201 | [root@swall1 ~]# export PATH=/data/swall/bin:$PATH 202 | [root@swall1 ~]# export PYTHONPATH=/data/swall:$PYTHONPATH 203 | [root@swall1 ~]# #备注:最好把着两个环境变量写入配置文件 204 | 205 | 6.新建swall用户和设置文件权限 206 | 207 | [root@swall1 ~]# useradd swall 208 | [root@swall1 ~]# chown -R swall:swall /data/swall 209 | 210 | 211 | 7.启动swall节点程序 212 | 213 | [root@swall1 ~]# cd /data/swall/bin 214 | [root@swall1 bin]# ./swall server start 215 | 216 | 8.测试命令 217 | 218 | [root@swall1 bin]# swall ctl "*" sys.ping 219 | ################################################## 220 | swall_sa_server_192.168.0.180 : 1 221 | ################################################## 222 | 一共执行了[1]个,失败了[0] 223 | 224 | 225 | 三、Swall命令入门 226 | ==================== 227 | 228 | 1.swall的管理工具是bin/swall, 使用方法如下 229 | 230 | Usage: swall ctl [target] [arguments] 231 | 232 | Send command to swall server. 233 | 234 | Options: 235 | -h, --help show this help message and exit 236 | 237 | Options for swall ctl: 238 | -e EXCLUDE, --exclude=EXCLUDE 239 | Specify the exclude hosts by regix 240 | 241 | -t TIMEOUT, --timeout=TIMEOUT 242 | Specify the timeout,the unit is second 243 | 244 | -r, --is_raw Specify the raw output 245 | -n NTHREAD, --nthread=NTHREAD 246 | Specify running nthread 247 | 248 | Options for conf_dir: 249 | -c CONFIG_DIR, --config_dir=CONFIG_DIR 250 | Pass in an alternative configuration dir. Default: /data/swall/conf 251 | 252 | 2.参数解释 253 | 254 | target:通配符或者正则,通配符只支持*号,用来匹配具体的节点,主要去匹配swall.conf的node_name 255 | module.function:要执行的函数,例如sys.ping,有内置函数和自定义函数 256 | arguments:传递到module.function中的参数,支持位置参数和关键字参数 257 | 258 | 3.选项解释 259 | 260 | --exclude: 需要从target刷选的列表中排除,支持通配符和正则 261 | --timeout: 设置超时 262 | --is_raw: 打印结果需要显示颜色 263 | --config_dir: 指定swall配置文件,否则使用默认的配置/data/swall/conf 264 | 265 | 4.下面演示一些功能函数的使用,假设已经配置了N个节点了 266 | 267 | (1)查看swall通讯是否正常: 268 | 269 | [root@swall1 ~]# swall ctl "*" sys.ping --timeout=10 270 | ################################################## 271 | swall_sa_server_192.168.0.190 : 1 272 | swall_sa_server_192.168.0.191 : 1 273 | swall_sa_server_192.168.0.195 : 1 274 | swall_sa_server_192.168.0.198 : 1 275 | swall_sa_server_192.168.0.203 : 1 276 | swall_sa_server_192.168.0.180 : 1 277 | ################################################## 278 | 一共执行了[6]个,失败了[0] 279 | 280 | 281 | (2)拷贝文件到远程: 282 | 283 | [root@swall1 ~]# swall ctl "*" sys.copy /etc/hosts /tmp/xx_hosts --timeout=10 284 | ################################################## 285 | swall_sa_server_192.168.0.190 : 1 286 | swall_sa_server_192.168.0.191 : 1 287 | swall_sa_server_192.168.0.195 : 1 288 | swall_sa_server_192.168.0.198 : 1 289 | swall_sa_server_192.168.0.203 : 1 290 | swall_sa_server_192.168.0.180 : 1 291 | ################################################## 292 | 一共执行了[6]个,失败了[0] 293 | [root@swall1 ~]# swall ctl "*" sys.copy /etc/hosts /tmp/xx_hosts ret_type=full --timeout=10 294 | ################################################## 295 | swall_sa_server_192.168.0.190 : /tmp/xx_hosts 296 | swall_sa_server_192.168.0.191 : /tmp/xx_hosts 297 | swall_sa_server_192.168.0.195 : /tmp/xx_hosts 298 | swall_sa_server_192.168.0.198 : /tmp/xx_hosts 299 | swall_sa_server_192.168.0.203 : /tmp/xx_hosts 300 | swall_sa_server_192.168.0.180 : /tmp/xx_hosts 301 | ################################################## 302 | 一共执行了[6]个,失败了[0] 303 | [root@swall1 ~]# 304 | 305 | (3)从远程拷贝文件到当前: 306 | 307 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.get /etc/services /tmp/xxx_service 308 | ################################################## 309 | swall_sa_server_192.168.0.190 : /tmp/xxx_service 310 | ################################################## 311 | 一共执行了[1]个,失败了[0] 312 | [root@swall1 ~]# 313 | 314 | 315 | (4)执行shell命令: 316 | 317 | [root@swall1 swall]# swall ctl "swall_sa_server_192.168.0.190" cmd.call 'echo ok | awk "{print \$0}"' 318 | ################################################## 319 | swall_sa_server_192.168.0.190 : 320 | { 321 | "pid": 1149, 322 | "retcode": 0, 323 | "stderr": null, 324 | "stdout": "ok" 325 | } 326 | 327 | ################################################## 328 | 一共执行了[1]个,失败了[0] 329 | [root@swall1 swall]# 330 | 331 | [root@agent2 swall]# swall ctl "swall_sa_server_192.168.0.190" cmd.call 'echo ok | awk "{print \$0}"' ret_type=stdout 332 | ################################################## 333 | swall_sa_server_192.168.0.190 : ok 334 | ################################################## 335 | 一共执行了[1]个,失败了[0] 336 | [root@swall1 swall]# 337 | 338 | 339 | 五、Swall命令进阶 340 | ========================= 341 | 342 | 1.如果你安装好了swall,可以从sys.funcs和help来一步一步了解swall,swall内置有很多基本功能,如查看agent存活,拷贝文件,同步模块,查看模块,查看swall参数宏变量等, 343 | 同时在module部分也实现了很多功能模块:网络模块,linux信息查看、远程命令执行等,当然你可以自己实现,添加自己的模块很简单,后面再告诉怎么添加。 344 | 345 | (1)查看内置功能 346 | 347 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.funcs sys 348 | ################################################## 349 | swall_sa_server_192.168.0.190 : 350 | [ 351 | "sys.rsync_module", 352 | "sys.get_env", 353 | "sys.job_info", 354 | "sys.funcs", 355 | "sys.exprs", 356 | "sys.copy", 357 | "sys.ping", 358 | "sys.get", 359 | "sys.reload_env", 360 | "sys.roles", 361 | "sys.reload_node", 362 | "sys.reload_module", 363 | "sys.version" 364 | ] 365 | 366 | ################################################## 367 | 一共执行了[1]个,失败了[0] 368 | [root@swall1 ~]# 369 | 370 | (2)查看功能函数帮助,在调用函数后面直接加上help就可以了 371 | 372 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.copy help 373 | ################################################## 374 | swall_sa_server_192.168.0.190 : 375 | def copy(*args, **kwargs) -> 拷贝文件到远程 可以增加一个ret_type=full,支持返回文件名 376 | @param args list:支持位置参数,例如 sys.copy /etc/src.tar.gz /tmp/src.tar.gz ret_type=full 377 | @param kwargs dict:支持关键字参数,例如sys.copy local_path=/etc/src.tar.gz remote_path=/tmp/src.tar.gz 378 | @return int:1 if success else 0 379 | ################################################## 380 | 一共执行了[1]个,失败了[0] 381 | 382 | (3)同步模块到节点 383 | 384 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.rsync_module 385 | ################################################## 386 | swall_sa_server_192.168.0.190 : 1 387 | ################################################## 388 | 一共执行了[1]个 389 | 390 | 支持同步个别模块,多个需要用逗号分隔 391 | 392 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.rsync_module server_tools.py 393 | ################################################## 394 | swall_sa_server_192.168.0.190 : 1 395 | ################################################## 396 | 一共执行了[1]个,失败了[0] 397 | [root@swall1 ~]# 398 | 399 | 2.swall提供一些内置变量,使用在参数中,在真正执行的时候会被替换,查看当前系统支持的“参数宏变量” 400 | 401 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.get_env 402 | ################################################## 403 | swall_sa_server_192.168.0.190 : 404 | [ 405 | "NODE", 406 | "IP", 407 | "TIME", 408 | "DATE" 409 | ] 410 | 411 | ################################################## 412 | 一共执行了[1]个,失败了[0] 413 | [root@swall1 bin]# 414 | 415 | 使用的时候需要加大括号,如{IP}参数宏变量自定义,查看参数宏变量的具体值如下: 416 | 417 | [root@swall1 bin]# swall ctl "*" sys.exprs "ip:{IP},node:{NODE}" 418 | ################################################## 419 | swall_sa_server_192.168.0.190 : ip:192.168.0.190,node:swall_sa_server_192.168.0.190 420 | swall_sa_server_192.168.0.191 : ip:192.168.0.191,node:swall_sa_server_192.168.0.191 421 | swall_sa_server_192.168.0.195 : ip:192.168.0.195,node:swall_sa_server_192.168.0.195 422 | swall_sa_server_192.168.0.198 : ip:192.168.0.198,node:swall_sa_server_192.168.0.198 423 | swall_sa_server_192.168.0.203 : ip:192.168.0.203,node:swall_sa_server_192.168.0.203 424 | swall_sa_server_192.168.0.180 : ip:192.168.0.180,node:swall_sa_server_192.168.0.180 425 | ################################################## 426 | 一共执行了[6]个,失败了[0] 427 | [root@swall1 bin]# 428 | [root@swall1 bin]# swall ctl "*" sys.copy /etc/services /data/{NODE}/ ret_type=full 429 | ################################################## 430 | swall_sa_600 : /data/swall_sa_600/services 431 | swall_sa_601 : /data/swall_sa_601/services 432 | swall_sa_700 : /data/swall_sa_700/services 433 | ################################################## 434 | 一共执行了[3]个,失败了[0] 435 | [root@swall1 bin]# 436 | 437 | 438 | 六、Swall模块编写 439 | =================== 440 | 441 | swall模块存放在module下面的特定目录中,module下面的目录就是swall里面的角色,说白了,角色就是一个含有特定模块文件的组, 442 | 你写的模块属于哪个角色就放到哪个目录下去,例如你写了一个server_tools.py,属于server角色,就放到当前你所在节点的 443 | /data/swall/module/server目录下(角色可以随意创建,只要在/data/swall/module/创建一个目录存放模块即可)一个agent 444 | 可以配置多个角色,就是swall.conf中的node_role,配置好角色还要为角色配置节点(节点的概念在swall中代表node),下面开始编写模块。 445 | 446 | 1.swall模块最小单元是函数,目前不支持直接调用方法,函数需要加上node修饰器,同时最好要给函数设置doc帮助信息 447 | 448 | [root@swall1 server]# pwd 449 | /data/swall/module/server 450 | [root@swall1 server]# vim mem_info.py 451 | import psutil 452 | import logging 453 | from swall.utils import node 454 | 455 | log = logging.getLogger() 456 | 457 | @node 458 | def physical_memory_usage(*args, **kwarg): 459 | """ 460 | def physical_memory_usage(*args, **kwarg) -> Return a dict that describes free and available physical memory. 461 | @return dict: 462 | """ 463 | return dict(psutil.phymem_usage()._asdict()) 464 | 465 | 2.编写好了以后需要同步出去,同步命令会自动加载模块 466 | 467 | [root@swall1 swall]# swall ctl "*" sys.rsync_module mem_info.py 468 | ################################################## 469 | swall_sa_server_192.168.0.190 : 1 470 | swall_sa_server_192.168.0.191 : 1 471 | ################################################## 472 | 一共执行了[2]个,失败了[0] 473 | 474 | 3.查看写好的模块 475 | 476 | [root@swall1 swall]# swall ctl "*" sys.funcs mem_info 477 | ################################################## 478 | swall_sa_server_192.168.0.190 : ['mem_info.physical_memory_usage'] 479 | swall_sa_server_192.168.0.191 : ['mem_info.physical_memory_usage'] 480 | ################################################## 481 | 一共执行了[2]个,失败了[0] 482 | [root@swall1 swall]# 483 | 484 | [root@swall1 swall]# swall ctl "*" mem_info.physical_memory_usage help 485 | ################################################## 486 | swall_sa_server_192.168.0.190 : 487 | def physical_memory_usage(*args, **kwarg) -> Return a dict that describes free and available physical memory. 488 | @return dict: 489 | 490 | swall_sa_server_192.168.0.191 : 491 | def physical_memory_usage(*args, **kwarg) -> Return a dict that describes free and available physical memory. 492 | @return dict: 493 | 494 | ################################################## 495 | 一共执行了[2]个,失败了[0] 496 | 497 | 4.调用执行我们的模块 498 | 499 | [root@swall1 swall]# swall ctl "*" mem_info.physical_memory_usage 500 | ################################################## 501 | swall_sa_server_192.168.0.190 : 502 | { 503 | "active": 417042432, 504 | "available": 57892864, 505 | "buffers": 5967872, 506 | "cached": 45473792, 507 | "free": 6451200, 508 | "inactive": 24846336, 509 | "percent": 88.700000000000003, 510 | "total": 514326528, 511 | "used": 507875328 512 | } 513 | 514 | swall_sa_server_192.168.0.191 : 515 | { 516 | "active": 417067008, 517 | "available": 57929728, 518 | "buffers": 5967872, 519 | "cached": 45518848, 520 | "free": 6443008, 521 | "inactive": 24940544, 522 | "percent": 88.700000000000003, 523 | "total": 514326528, 524 | "used": 507883520 525 | } 526 | 527 | ################################################## 528 | 一共执行了[2]个,失败了[0] 529 | [root@swall1 swall]# 530 | 531 | 532 | 七、Swall参数宏变量 533 | =================== 534 | 535 | swall支持在调用函数的时候,在参数(位置参数、关键字参数)里面加上宏变量,这些变量会在agent执行命令的时候扩展为具体的值,目前swall已经 536 | 支持如下几个参数宏变量: 537 | 538 | NODE: node的名称 539 | IP: node的ip地址 540 | TIME node的当前时间 541 | DATE node的当前日期 542 | 543 | 544 | 1.查看参数宏变量列表 545 | 546 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.get_env 547 | ################################################## 548 | swall_sa_server_192.168.0.190 : 549 | [ 550 | "NODE", 551 | "IP", 552 | "TIME", 553 | "DATE" 554 | ] 555 | 556 | ################################################## 557 | 一共执行了[1]个,失败了[0] 558 | 559 | 2.查看具体参数宏变量的值 560 | 561 | [root@swall1 bin]# swall ctl "*" sys.exprs "{NODE}" 562 | ################################################## 563 | swall_sa_server_192.168.0.190 : node:swall_sa_server_192.168.0.190 564 | swall_sa_server_192.168.0.191 : node:swall_sa_server_192.168.0.191 565 | swall_sa_server_192.168.0.195 : node:swall_sa_server_192.168.0.195 566 | swall_sa_server_192.168.0.198 : node:swall_sa_server_192.168.0.198 567 | swall_sa_server_192.168.0.203 : node:swall_sa_server_192.168.0.203 568 | swall_sa_server_192.168.0.180 : node:swall_sa_server_192.168.0.180 569 | ################################################## 570 | 一共执行了[6]个,失败了[0] 571 | 572 | 这里sys.exprs可以帮你打印节点的参数宏变量值,经常用来做参数宏变量查看的 573 | 574 | 3.在执行sys.copy的时候将当前/etc/hosts文件拷贝到server角色的所有节点的/tmp下面,同时加上拷贝的时间 575 | 576 | [root@swall1 swall]# swall ctl "*190*;*191*" sys.copy /etc/hosts /tmp/hosts.{DATE}_{TIME} ret_type=full 577 | ################################################## 578 | swall_sa_server_192.168.0.190 : /tmp/hosts.2014-07-03_07:36:17 579 | swall_sa_server_192.168.0.191 : /tmp/hosts.2014-07-03_07:36:17 580 | ################################################## 581 | 一共执行了[2]个,失败了[0] 582 | [root@swall1 swall]# 583 | 584 | 4.新增参数宏变量 585 | 586 | (1)编辑module/common/_sys_common.py文件,直接在里面参考其他参数宏变量添加,然后通过下面的命令同步 587 | [root@swall1 swall]# swall ctl "*" sys.rsync_module _sys_common.py 588 | ################################################## 589 | swall_sa_server_192.168.0.190 : 1 590 | swall_sa_server_192.168.0.191 : 1 591 | ################################################## 592 | 一共执行了[2]个,失败了[0] 593 | 594 | (2)由于同步模块不会自动加载参数宏变量,需要手动加载 595 | [root@swall1 swall]# swall ctl "*" sys.reload_env 596 | ################################################## 597 | swall_sa_server_192.168.0.190 : 1 598 | swall_sa_server_192.168.0.191 : 1 599 | ################################################## 600 | 一共执行了[2]个,失败了[0] 601 | [root@swall1 swall]# 602 | 603 | 604 | 八、一些问题 605 | =================== 606 | 607 | 1.怎么匹配节点? 608 | > 答:swall支持通过通配符(*)和正则表达式匹配节点,如: 609 | > > 610 | (1)通配符,只支持星号,功能和linux shell环境下面的功能是一样的,如果有多个通配符,支持通过分号分隔 611 | > > 612 | [root@swall1 swall]# swall ctl "swall_sa_server*" sys.ping 613 | ################################################## 614 | swall_sa_server_192.168.0.190 : 1 615 | swall_sa_server_192.168.0.191 : 1 616 | ################################################## 617 | 一共执行了[2]个,失败了[0] 618 | [root@swall1 swall]# 619 | [root@swall1 swall]# swall ctl "*190;*191" sys.ping 620 | ################################################## 621 | swall_sa_server_192.168.0.190 : 1 622 | swall_sa_server_192.168.0.191 : 1 623 | ################################################## 624 | 一共执行了[2]个,失败了[0] 625 | > > 626 | (2)正则表达式 627 | [root@swall1 swall]# swall ctl "swall_sa_server_192.168.0.\d+" sys.ping 628 | ################################################## 629 | swall_sa_server_192.168.0.190 : 1 630 | swall_sa_server_192.168.0.191 : 1 631 | ################################################## 632 | 一共执行了[2]个,失败了[0] 633 | > > 634 | (3)写完整的节点名称,如果有多个,支持分号分隔 635 | [root@swall1 swall]# swall ctl "swall_sa_server_192.168.0.190" sys.ping 636 | ################################################## 637 | swall_sa_server_192.168.0.190 : 1 638 | ################################################## 639 | 一共执行了[1]个,失败了[0] 640 | 641 | 2.调用模块的时候如果不知道怎么使用模块,不知道传什么参数,怎么办? 642 | > 答:每个函数后面加上 help参数都会打印这个函数使用说明 643 | > > 644 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.copy help 645 | ################################################## 646 | swall_sa_server_192.168.0.190 : 647 | def copy(*args, **kwargs) -> 拷贝文件到远程 可以增加一个ret_type=full,支持返回文件名 648 | @param args list:支持位置参数,例如 sys.copy /etc/src.tar.gz /tmp/src.tar.gz ret_type=full 649 | @param kwargs dict:支持关键字参数,例如sys.copy local_path=/etc/src.tar.gz remote_path=/tmp/src.tar.gz 650 | @return int:1 if success else 0 651 | ################################################## 652 | 一共执行了[1]个,失败了[0] 653 | 654 | 3.需要查看摸个模块的函数列表,怎么办? 655 | > 答:提供了一个sys.funcs函数可以解决这个问题,需要输入想要查看的模块名称(不带后缀) 656 | > > 657 | [root@swall1 swall]# swall ctl "swall_sa_server_192.168.0.190" sys.funcs network 658 | ################################################## 659 | swall_sa_server_192.168.0.190 : ['network.get_ip', 'network.get_ping'] 660 | [root@swall1 ~]# 661 | 662 | 4.写好了模块以后要怎么同步到节点呢? 663 | 664 | > 答:通过调用sys.rsync_module可以同步模块到节点 665 | > > 如果写好了模块并且存放如当前节点的/module/{role},这里的{role}对应你要同步的角色,/module/common是所有角色公用的模块,现在为server同步模块如下: 666 | 667 | > > 668 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.rsync_module 669 | ################################################## 670 | swall_sa_server_192.168.0.190 : 1 671 | ################################################## 672 | 一共执行了[1]个,失败了[0] 673 | 674 | > > 支持同步个别模块,多个需要用逗号分隔: 675 | > > 676 | [root@swall1 ~]# swall ctl "swall_sa_server_192.168.0.190" sys.rsync_module server_tools.py 677 | ################################################## 678 | swall_sa_server_192.168.0.190 : 1 679 | ################################################## 680 | 一共执行了[1]个,失败了[0] 681 | [root@swall1 ~]# 682 | 683 | 684 | 5.如何编写模块? 685 | 686 | > 答:模块编写如下所示: 687 | > > 688 | > > 689 | #coding:utf-8 690 | from swall.utils import node 691 | > > 692 | @node 693 | def ping(*args, **kwargs): 694 | return 1 695 | 696 | > > 说明: 697 | 所有模块需要加上node修饰器才可以让swall调用,函数一定要加上kwargs这个关键字扩展参数,swall内部会传一些信息过来,这些 698 | 信息有:project,agent、role、node_name、node_ip 699 | 在函数里面可以通过kwargs["project"]等获取这些信息 700 | 701 | > > 写好模块以后保存,例如ping.py,存放到module下对应的角色目录中,通过命令同步到agent,归属于这个角色节点就可以调用该 702 | > > 函数 703 | 704 | 6.什么场景下使用参数宏变量呢? 705 | 706 | > 答:例如其他节点获取配置的时候,一般情况下,如果你不加参数宏变量,获取到当前节点的文件是同一个路径,你根本区分不出来,如下: 707 | > > 708 | [root@swall1 bin]# swall ctl "*" sys.get /etc/hosts /tmp/ 709 | ################################################## 710 | swall_sa_server_192.168.0.190 : /etc/hosts 711 | swall_sa_server_192.168.0.191 : /etc/hosts 712 | swall_sa_server_192.168.0.195 : /etc/hosts 713 | swall_sa_server_192.168.0.198 : /etc/hosts 714 | swall_sa_server_192.168.0.203 : /etc/hosts 715 | swall_sa_server_192.168.0.205 : /etc/hosts 716 | ################################################## 717 | 一共执行了[6]个,失败了[0] 718 | [root@swall1 bin]# 719 | 720 | > > 这里就有一个问题了,所有获取的文件路径都是/etc/hosts,区分不出是那个节点的文件,如果使用参数宏变量,就不一样了: 721 | > > 722 | [root@swall1 bin]# swall ctl "*" sys.get /etc/hosts /tmp/hosts.{node} 723 | ################################################## 724 | swall_sa_server_192.168.0.190 : /tmp/hosts.swall_sa_server_192.168.0.190 725 | swall_sa_server_192.168.0.191 : /tmp/hosts.swall_sa_server_192.168.0.191 726 | swall_sa_server_192.168.0.195 : /tmp/hosts.swall_sa_server_192.168.0.195 727 | swall_sa_server_192.168.0.198 : /tmp/hosts.swall_sa_server_192.168.0.198 728 | swall_sa_server_192.168.0.203 : /tmp/hosts.swall_sa_server_192.168.0.203 729 | swall_sa_server_192.168.0.205 : /tmp/hosts.swall_sa_server_192.168.0.205 730 | ################################################## 731 | 一共执行了[6]个,失败了[0] 732 | [root@swall1 bin]# 733 | 734 | 735 | > > 还有一种场景,在游戏运维中,针对一机多服,假设游戏有/data/swall_sa_600,/data/swall_sa_601,/data/swall_sa_700三个程序, 736 | 对应三个game的节点,节点名称就是目录名。如果我要拷贝文件到/data/swall_sa_600,/data/swall_sa_601,/data/swall_sa_700各个目录下,用swall的参数宏变量替换就很容易解决: 737 | > > 738 | [root@swall1 bin]# swall ctl "*" sys.copy /etc/services /data/{node}/ ret_type=full 739 | ################################################## 740 | swall_sa_600 : /data/swall_sa_600/services 741 | swall_sa_601 : /data/swall_sa_601/services 742 | swall_sa_700 : /data/swall_sa_700/services 743 | ################################################## 744 | 一共执行了[3]个,失败了[0] 745 | [root@swall1 bin]# 746 | 747 | 7.怎么找不到sys.py文件? 748 | > 答:swall模块分有两大类,一类是内置的,sys开头,这些模块在agent.py里面实现了,其他模块都可以在module目录下找到 749 | 750 | 751 | 752 | 753 | --------------------------------------------------------------------------------