├── .gitignore ├── LICENSE ├── README.md ├── lsf ├── __init__.py ├── ehosts.py ├── ejobs.py ├── esub.py ├── grouphosts.py ├── groupjobs.py ├── printhosts.py ├── printjobs.py ├── readhosts.py ├── readjobs.py ├── shortcuts.py ├── submitjob.py ├── sumhosts.py ├── sumjobs.py ├── useraliases.py └── utility.py ├── scripts ├── ehosts ├── ejobs └── esub └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | 4 | dist 5 | build 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Elmar Peise 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-lsf 2 | ========== 3 | 4 | Improved Interface for the IBM Platform LSF batch job scheduler 5 | 6 | 7 | Requirements 8 | ------------ 9 | 10 | * python version 2.7.x 11 | * LSF version 9.1.3.0 12 | * `bjobs`, `bhosts`, `bsub`, and `lshosts` available 13 | 14 | 15 | Installation 16 | ------------ 17 | 18 | python setup.py install 19 | 20 | If you are missing the right to write to the system wide python directories, 21 | use 22 | 23 | python setup.py install --user 24 | 25 | to install the package in `~/.local`. You might then have to 26 | 27 | export PATH=$PATH:~/.local/bin 28 | 29 | in order to make the scripts available on the command line. 30 | 31 | 32 | Usage 33 | ----- 34 | 35 | `ejobs`, `ehosts`, and `esub` have essentially the same interfaces as LSF's 36 | `bjobs`, `bhosts`, and `bsub`. 37 | Check 38 | 39 | ejobs -h 40 | ehosts -h 41 | esub -h 42 | man bjobs 43 | man bhosts 44 | man bsub 45 | 46 | 47 | User Alias Resolution 48 | --------------------- 49 | 50 | To resolve commonly encountered and possibly cryptic user names, `ejobs` and 51 | `ehosts` provide a mechanism to replace such user names by user defined aliases. 52 | These aliases are read from `~/.useraliases` (if existing), which needs to be in 53 | the following format: each user is on its own line; the first word on the line 54 | is the user name, all following words are the user alias. E.g.: 55 | 56 | ep123456 Elmar Peise 57 | -------------------------------------------------------------------------------- /lsf/__init__.py: -------------------------------------------------------------------------------- 1 | """LSF module.""" 2 | 3 | __all__ = ["ejobs", "ehosts", "esub", "submitjob"] 4 | __author__ = "Elmar Peise" 5 | 6 | import ejobs 7 | import ehosts 8 | import esub 9 | from submitjob import submitjob 10 | -------------------------------------------------------------------------------- /lsf/ehosts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Wrapper script with bhosts functionality.""" 3 | 4 | import sys 5 | import re 6 | import argparse 7 | 8 | from shortcuts import ehostsshortcuts 9 | 10 | from readhosts import readhosts 11 | from printhosts import printhosts 12 | from grouphosts import grouphosts 13 | from sumhosts import sumhosts 14 | 15 | from readjobs import readjobs 16 | 17 | 18 | def ehosts(args, bhostsargs): 19 | """Wrapper script with bhosts functionality.""" 20 | # construct -R argument 21 | select = [] 22 | for shortcutname, shortcutselect in ehostsshortcuts.items(): 23 | if getattr(args, shortcutname): 24 | select.append(shortcutselect) 25 | if select: 26 | select = " || ".join(select) 27 | if args.model: 28 | if select: 29 | select = "(%s) && model==%s" % (select, args.model) 30 | else: 31 | select = "model==" + args.model 32 | if select: 33 | if "-R" not in bhostsargs: 34 | bhostsargs += ["-R", "select[%s]" % select] 35 | else: 36 | i = bhostsargs.index("-R") + 1 37 | req = bhostsargs[i] 38 | if "select" in req: 39 | bhostsargs[i] = req.replace("select[", 40 | "select[(%s) & " % select, 1) 41 | else: 42 | bhostsargs[i] = "(%s) & (%s)" % (req, select) 43 | 44 | # read 45 | hosts = readhosts(bhostsargs, fast=args.fast) 46 | 47 | if not hosts: 48 | return 49 | 50 | # read jobs 51 | if args.fast: 52 | jobs = [] 53 | else: 54 | hostnames = [h["host_name"] for h in hosts] 55 | jobs = readjobs(["-u", "all", "-r", "-m", " ".join(hostnames)]) 56 | 57 | # sort 58 | if not args.nosort: 59 | hosts.sort(key=lambda h: h["host_name"]) 60 | 61 | # no grouping 62 | if not args.groupby or args.groupby not in hosts[0]: 63 | if args.sum: 64 | printhosts([sumhosts(hosts)], wide=args.wide, header=not 65 | args.noheader) 66 | else: 67 | printhosts(hosts, jobs, wide=args.wide, header=not args.noheader) 68 | return 69 | 70 | # grouping 71 | hostgroups = grouphosts(hosts, args.groupby) 72 | if args.sum: 73 | hosts = [] 74 | for title in sorted(hostgroups.keys()): 75 | hostgroup = hostgroups[title] 76 | sumhost = sumhosts(hostgroup) 77 | sumhost["title"] = title 78 | hosts.append(sumhost) 79 | printhosts(hosts, jobs, wide=args.wide, header=not args.noheader) 80 | else: 81 | 82 | for title in sorted(hostgroups.keys()): 83 | hosts = hostgroups[title] 84 | printhosts(hosts, jobs, wide=args.wide, header=not args.noheader, 85 | title=title) 86 | 87 | 88 | def main(): 89 | """Main program entry point.""" 90 | # argument parser and options 91 | parser = argparse.ArgumentParser( 92 | description="More comprehensive version of bhosts." 93 | ) 94 | parser.add_argument( 95 | "-w", "--wide", 96 | help="show more detailed info", 97 | action="store_true" 98 | ) 99 | parser.add_argument( 100 | "-sum", 101 | help="summarize across hosts", 102 | action="store_true" 103 | ) 104 | parser.add_argument( 105 | "--groupby", 106 | help="group jobs by KEY", 107 | metavar="KEY" 108 | ) 109 | parser.add_argument( 110 | "--fast", 111 | help="read less info frim LSF", 112 | action="store_true" 113 | ) 114 | parser.add_argument( 115 | "--noheader", 116 | help="don't show the header", 117 | action="store_true" 118 | ) 119 | parser.add_argument( 120 | "--nosort", 121 | help="don't sort lexigraphically", 122 | action="store_true" 123 | ) 124 | 125 | # shortcuts 126 | shortcuts = parser.add_argument_group("shortcuts") 127 | shortcuts.add_argument( 128 | "--model", 129 | help="for \"-R model==MODEL\"" 130 | ) 131 | for shortcutname, shortcutselect in ehostsshortcuts.items(): 132 | shortcuts.add_argument( 133 | "-" + shortcutname, 134 | help="for \"-R %s\"" % shortcutselect, 135 | action="store_true" 136 | ) 137 | 138 | # 139 | parser.add_argument_group( 140 | "further arguments", 141 | description="are passed to bhosts" 142 | ) 143 | 144 | # parse arguments 145 | args, bhostsargs = parser.parse_known_args() 146 | 147 | # run ehosts 148 | try: 149 | ehosts(args, bhostsargs) 150 | except (KeyboardInterrupt, IOError): 151 | pass 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /lsf/ejobs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Wrapper script with bjobs functionality.""" 3 | 4 | from __future__ import print_function 5 | 6 | import sys 7 | import re 8 | import argparse 9 | 10 | from utility import color 11 | from useraliases import lookupalias 12 | from shortcuts import ejobsshortcuts 13 | 14 | from readjobs import readjobs 15 | from printjobs import printjobs 16 | from groupjobs import groupjobs 17 | from sumjobs import sumjobs 18 | 19 | from readhosts import readhosts 20 | from printhosts import printhosts 21 | 22 | # highlighting color for pending reasing 23 | pendingcolors = { 24 | "Running an exclusive job": "y", 25 | "Job's requirement for exclusive execution not satisfied": "y", 26 | "An exclusive job has reserved the host": "y", 27 | "Job slot limit reached": "y", 28 | "Not enough processors to meet the job's spanning requirement": "y", 29 | "Not enough slots or resources for whole duration of the job": "r", 30 | "Not enough hosts to meet the job's spanning requirement": "r", 31 | "Job('s)? requirements for reserving resource \(.*\) not satisfied": "r", 32 | } 33 | 34 | # order of status identifiers 35 | statorder = { 36 | "RUN": 4, 37 | "PROV": 4, 38 | "PSUSP": 3, 39 | "USUSP": 3, 40 | "SSUSP": 3, 41 | "PEND": 2, 42 | "WAIT": 2, 43 | "UNKWN": 1, 44 | "DONE": 0, 45 | "ZOMBI": 0, 46 | "EXIT": 0, 47 | } 48 | 49 | 50 | def ejobs(args, bjobsargs): 51 | """Wrapper script with bjobs functionality.""" 52 | # handle arguments 53 | if args.pending: 54 | bjobsargs = ["-p"] + bjobsargs 55 | args.groupby = "pend_reason" 56 | for shortcutname, shortcutargs in ejobsshortcuts.items(): 57 | if getattr(args, shortcutname): 58 | bjobsargs = shortcutargs + bjobsargs 59 | for l in list("rsda"): 60 | if args.__dict__[l]: 61 | bjobsargs = ["-" + l] + bjobsargs 62 | if args.u: 63 | unames = map(lookupalias, args.u.split()) 64 | bjobsargs = ["-u", " ".join(unames)] + bjobsargs 65 | if args.jid: 66 | args.output = ["id"] 67 | args.fast = True 68 | args.noheader = True 69 | if args.output: 70 | args.output = sum([fields.split() for fields in args.output], []) 71 | if len(args.output) == 1: 72 | args.noheader = True 73 | 74 | # read 75 | jobs = readjobs(bjobsargs, fast=args.fast) 76 | 77 | if not jobs: 78 | return 79 | 80 | # sort 81 | jobs.sort(key=lambda j: j["submit_time"]) 82 | jobs.sort(key=lambda j: j["priority"], reverse=True) # can be None 83 | jobs.sort(key=lambda j: -j["run_time"]) 84 | jobs.sort(key=lambda j: -statorder[j["stat"]]) 85 | if args.sort: 86 | try: 87 | jobs.sort(key=lambda j: j[args.sort]) 88 | except: 89 | print("Unknown sorting key \"%s\"!" % args.sort, file=sys.stderr) 90 | 91 | # no grouping 92 | if not args.groupby or args.groupby not in jobs[0]: 93 | if args.sum: 94 | jobs = [sumjobs(jobs)] 95 | printjobs(jobs, wide=args.wide, long=args.long, output=args.output, 96 | header=not args.noheader) 97 | return 98 | 99 | # grouping 100 | jobgroups = groupjobs(jobs, args.groupby) 101 | if not args.pending: 102 | if args.sum: 103 | jobs = [] 104 | for title in sorted(jobgroups.keys()): 105 | sumjob = sumjobs(jobgroups[title]) 106 | if args.groupby not in ("name", "jobname", "user"): 107 | sumjob["title"] = title 108 | jobs.append(sumjob) 109 | printjobs(jobs, wide=args.wide, long=args.long, output=args.output, 110 | header=not args.noheader) 111 | else: 112 | for title in sorted(jobgroups.keys()): 113 | printjobs(jobgroups[title], wide=args.wide, long=args.long, 114 | output=args.output, header=not args.noheader, 115 | title=title) 116 | return 117 | 118 | # pending 119 | for title in sorted(jobgroups.keys()): 120 | jobs = jobgroups[title] 121 | reasons = jobs[0]["pend_reason"] 122 | resreq = jobs[0]["resreq"] 123 | hostreq = jobs[0]["host_req"] 124 | if not reasons or len(reasons) != 1: 125 | title = None 126 | else: 127 | # use singular reason as title 128 | reason = reasons[0] 129 | title = reason[0] 130 | if not isinstance(reason[1], bool): 131 | title += ": %d" % reason[1] 132 | if args.sum: 133 | jobs = [sumjobs(jobs)] 134 | printjobs(jobs, wide=args.wide, long=args.long, output=args.output, 135 | header=not args.noheader, title=title) 136 | if reasons and len(reasons) > 1: 137 | # show pending reasons 138 | for reason, count in reasons: 139 | for pattern in pendingcolors: 140 | if re.match(pattern, reason): 141 | reason = color(reason, pendingcolors[pattern]) 142 | break 143 | if count is True: 144 | print(" " + reason) 145 | else: 146 | print(" %4d %s" % (count, reason)) 147 | # show potential hosts 148 | if resreq and not args.fast: 149 | resreq = re.sub(" && \(hostok\)", "", resreq) 150 | resreq = re.sub(" && \(mem>\d+\)", "", resreq) 151 | hosts = readhosts(["-R", resreq] + hostreq) 152 | hostnames = [h["host_name"] for h in hosts] 153 | jobs = readjobs(["-u", "all", "-r", "-m", " ".join(hostnames)]) 154 | hosts.sort(key=lambda h: h["host_name"]) 155 | printhosts(hosts, jobs, wide=args.wide, 156 | header=not args.noheader) 157 | if len(jobgroups) > 1: 158 | print() 159 | 160 | 161 | def main(): 162 | """Main program entry point.""" 163 | # argument parser and options 164 | parser = argparse.ArgumentParser( 165 | description="More comprehensive version of bjobs." 166 | ) 167 | exg = parser.add_mutually_exclusive_group() 168 | exg.add_argument( 169 | "-w", "--wide", 170 | help="show more detailed (wide) info", 171 | action="store_true" 172 | ) 173 | exg.add_argument( 174 | "-l", "--long", 175 | help="long job description", 176 | action="store_true" 177 | ) 178 | exg.add_argument( 179 | "-o", "--output", 180 | help="show value of FIELD", 181 | action="append", 182 | metavar="FIELD" 183 | ) 184 | parser.add_argument( 185 | "--sum", 186 | help="summarize across jobs", 187 | action="store_true" 188 | ) 189 | exg = parser.add_mutually_exclusive_group() 190 | exg.add_argument( 191 | "-p", "--pending", 192 | help="show pending jobs with reasons and potential hosts", 193 | action="store_true" 194 | ) 195 | exg.add_argument( 196 | "--groupby", 197 | help="group jobs by KEY", 198 | metavar="KEY" 199 | ) 200 | parser.add_argument( 201 | "--sort", 202 | help="sort jobs by KEY", 203 | metavar="KEY", 204 | nargs="?", 205 | const="id" 206 | ) 207 | parser.add_argument( 208 | "--fast", 209 | help="read less info from LSF", 210 | action="store_true" 211 | ) 212 | parser.add_argument( 213 | "--noheader", 214 | help="don't show the header", 215 | action="store_true" 216 | ) 217 | 218 | # shortcuts 219 | shortcuts = parser.add_argument_group("shortcuts") 220 | shortcuts.add_argument( 221 | "--jid", 222 | help="for \"-o id --noheader\"", 223 | action="store_true" 224 | ) 225 | for shortcutname, shortcutargs in ejobsshortcuts.items(): 226 | shortcuts.add_argument( 227 | "-" + shortcutname, 228 | help="for \"%s\"" % " ".join(shortcutargs), 229 | action="store_true" 230 | ) 231 | 232 | # hide or discard some arguments 233 | parser.add_argument( 234 | "-X", # discard 235 | help=argparse.SUPPRESS, 236 | action="store_true" 237 | ) 238 | parser.add_argument( 239 | "-u", # for username lookup 240 | help=argparse.SUPPRESS, 241 | ) 242 | # pass the following on to allow combining (e.g. with -p or -l) 243 | for l in list("rsda"): 244 | parser.add_argument( 245 | "-" + l, 246 | help=argparse.SUPPRESS, 247 | action="store_true" 248 | ) 249 | 250 | # bjobs arguments hint 251 | parser.add_argument_group( 252 | "further arguments", 253 | description="are passed to bjobs" 254 | ) 255 | 256 | # parse arguments 257 | args, bjobsargs = parser.parse_known_args() 258 | 259 | # run ejobs 260 | try: 261 | ejobs(args, bjobsargs) 262 | except (KeyboardInterrupt, IOError): 263 | pass 264 | 265 | 266 | if __name__ == "__main__": 267 | main() 268 | -------------------------------------------------------------------------------- /lsf/esub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Wrapper script with bsub functionality.""" 3 | 4 | from __future__ import print_function 5 | 6 | import sys 7 | import os 8 | import shlex 9 | import argparse 10 | 11 | from submitjob import submitjob 12 | from utility import color 13 | 14 | 15 | def esub(args, bsubargs, jobscript): 16 | """Wrapper script with bsub functionality.""" 17 | data = {"command": ""} 18 | scriptargs = [] 19 | for line in jobscript.splitlines(True): 20 | if line.startswith("#!"): 21 | data["command"] += line 22 | elif line.startswith("#BSUB "): 23 | scriptargs += shlex.split(line[6:].split("#")[0]) 24 | else: 25 | data["command"] += line.split("#")[0] 26 | bsubargs = scriptargs + bsubargs 27 | last = False 28 | cmd = False 29 | for arg in bsubargs: 30 | if cmd: 31 | data["command"] += " " + arg 32 | continue 33 | if arg[0] == "-": 34 | if last: 35 | data[last] = True 36 | last = arg 37 | else: 38 | if last: 39 | data[last] = arg 40 | last = False 41 | else: 42 | cmd = True 43 | data["command"] = arg 44 | if last: 45 | data[last] = True 46 | try: 47 | jobid = submitjob(data) 48 | print(jobid) 49 | except Exception as e: 50 | print(color(e.strerror, "r")) 51 | sys.exit(-1) 52 | 53 | 54 | def main(): 55 | """Main program entry point.""" 56 | parser = argparse.ArgumentParser( 57 | description="Wrapper for bsub." 58 | ) 59 | parser.add_argument_group("further arguments", 60 | description="are passed to bsub") 61 | 62 | args, bsubargs = parser.parse_known_args() 63 | 64 | jobscript = sys.stdin.read() 65 | try: 66 | esub(args, bsubargs, jobscript) 67 | except KeyboardInterrupt: 68 | pass 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /lsf/grouphosts.py: -------------------------------------------------------------------------------- 1 | """Sort the jobs in groups by attributes.""" 2 | 3 | from collections import defaultdict 4 | 5 | 6 | def grouphosts(jobs, key): 7 | """Sort the jobs in groups by attributes.""" 8 | result = defaultdict(list) 9 | for job in jobs: 10 | if isinstance(job[key], dict): 11 | for val in job[key]: 12 | result[val].append(job) 13 | else: 14 | result[job[key]].append(job) 15 | return dict(result) 16 | -------------------------------------------------------------------------------- /lsf/groupjobs.py: -------------------------------------------------------------------------------- 1 | """Sort the jobs in groups by attributes.""" 2 | 3 | from collections import defaultdict 4 | 5 | 6 | def groupjobs(jobs, key): 7 | """Sort the jobs in groups by attributes.""" 8 | result = defaultdict(list) 9 | for job in jobs: 10 | if key == "pend_reason": 11 | if len(job[key]) == 1: 12 | group = repr(job[key]) 13 | else: 14 | group = job["resreq"] 15 | group += repr(sorted(job[key])) 16 | group += repr(sorted(job["host_req"])) 17 | result[group].append(job) 18 | elif isinstance(job[key], dict): 19 | for val in job[key]: 20 | result[val].append(job) 21 | else: 22 | result[job[key]].append(job) 23 | return dict(result) 24 | -------------------------------------------------------------------------------- /lsf/printhosts.py: -------------------------------------------------------------------------------- 1 | """Print a list of hosts.""" 2 | 3 | from __future__ import print_function, division 4 | 5 | import os 6 | import sys 7 | import re 8 | from time import time 9 | from subprocess import check_output 10 | from collections import defaultdict 11 | 12 | from utility import color, fractioncolor, format_duration, format_mem 13 | from groupjobs import groupjobs 14 | from sumjobs import sumjobs 15 | from useraliases import getuseralias 16 | 17 | 18 | def printhosts(hosts, jobs=[], wide=False, header=True, file=sys.stdout): 19 | """Print a list of hosts.""" 20 | if len(hosts) == 0: 21 | return 22 | sumhosts = not isinstance(hosts[0]["status"], str) 23 | jobsbyhost = groupjobs(jobs, "exec_host") 24 | # begin output 25 | screencols = int(check_output(["tput", "cols"])) 26 | whoami = os.getenv("USER") 27 | namelen = max(map(len, (host["host_name"] for host in hosts))) 28 | lens = { 29 | "host_name": min(20, max(6, namelen + 1)), 30 | "status": 8, 31 | "title": 15, 32 | "cpus": 10 33 | } 34 | if wide: 35 | lens["title"] = 20 36 | lens["host_name"] = max(6, namelen + 1) 37 | lens["model"] = 14 38 | if sumhosts: 39 | lens["status"] = 12 40 | lens["cpus"] = 14 41 | if header: 42 | h = "" 43 | if sumhosts and "title" in hosts[0]: 44 | h += "group".ljust(lens["title"]) 45 | h += "".join(n.ljust(lens[n]) for n in ("host_name", "status", "cpus")) 46 | h += "mem (free/total)" 47 | if wide: 48 | h += " " + "model".ljust(lens["model"]) 49 | h = h.upper() 50 | print(h, file=file) 51 | for host in hosts: 52 | l = "" 53 | if sumhosts and "title" in host: 54 | # title 55 | title = host["title"] 56 | if not wide: 57 | if len(title) >= lens["title"]: 58 | title = title[:lens["title"] - 2] + "*" 59 | l += color(title.ljust(lens["title"]), "b") 60 | # host_name 61 | l += host["host_name"].ljust(lens["host_name"]) 62 | # status 63 | if sumhosts: 64 | l += color("%3d " % host["status"]["ok"], "g") 65 | closed = sum(n for stat, n in host["status"].iteritems() if 66 | stat.startswith("closed_")) 67 | l += color("%3d " % closed, "r") 68 | other = len(host["host_names"]) - host["status"]["ok"] - closed 69 | if other: 70 | l += color("%3d " % other, "y") 71 | else: 72 | l += " " 73 | else: 74 | if host["status"] == "ok": 75 | l += color("ok".ljust(lens["status"]), "g") 76 | elif "closed_" in host["status"]: 77 | l += color(host["status"][7:].ljust(lens["status"]), "r") 78 | else: 79 | l += color(host["status"].ljust(lens["status"]), "y") 80 | # cpus 81 | total = host["max"] 82 | used = host["njobs"] 83 | free = total - used 84 | c = fractioncolor(free, total) 85 | if sumhosts: 86 | l += color("%4d" % free, c) + "/%4d" % total 87 | else: 88 | l += color("%2d" % free, c) + "/%2d" % total 89 | # mem 90 | if "mem" in host["load"]: 91 | free, used = host["load"]["mem"] 92 | total = free 93 | if used: # used can be None 94 | total += used 95 | if "maxmem" in host and host["maxmem"]: 96 | total = host["maxmem"] 97 | c = fractioncolor(free, total) 98 | l += " " + format_mem(free, c) + "/" + format_mem(total) 99 | if wide: 100 | if sumhosts: 101 | if len(host["model"]) == 1: 102 | l += host["model"][0].ljust(lens["model"]) 103 | else: 104 | nmodel = len(host["model"]) 105 | l += color((" %d" % nmodel).ljust(lens["model"]), "b") 106 | else: 107 | hoststr = host["model"] 108 | # Xeon Phi(s) 109 | phis = 0 110 | if "mic0" in host["load"]: 111 | phis += int(bool(host["load"]["mic0"][0])) 112 | phis += int(bool(host["load"]["mic0"][1])) 113 | if "mic1" in host["load"]: 114 | phis += int(bool(host["load"]["mic1"][0])) 115 | phis += int(bool(host["load"]["mic1"][1])) 116 | if phis > 0: 117 | hoststr += "+%dPhi" % phis 118 | # GPU 119 | if "gpu" in host["resources"]: 120 | hoststr += "+GPU" 121 | l += " " + hoststr.ljust(14) 122 | l += " " 123 | if host["rsv"] > 0: 124 | l += " %3d*" % host["rsv"] + color("reserved", "y") 125 | if sumhosts: 126 | hostnames = host["host_names"] 127 | else: 128 | hostnames = [host["host_name"]] 129 | jobs = [] 130 | for hostname in hostnames: 131 | if hostname in jobsbyhost: 132 | for job in jobsbyhost[hostname]: 133 | if job not in jobs: 134 | jobs.append(job) 135 | if sumhosts: 136 | jobgroups = groupjobs(jobs, "user") 137 | jobs = [] 138 | for user in sorted(jobgroups.keys()): 139 | jobs.append(sumjobs(jobgroups[user])) 140 | if jobs: 141 | for job in jobs: 142 | exclusive = job["exclusive"] 143 | if sumhosts: 144 | exclusive = len(exclusive) == 1 and True in exclusive 145 | times = color("x", "r") if exclusive else "*" 146 | nslots = sum(job["exec_host"][hn] for hn in hostnames 147 | if hn in job["exec_host"]) 148 | c = "r" if nslots >= 100 else "y" if nslots >= 20 else 0 149 | l += color(" %3d" % nslots, c) 150 | user = job["user"] 151 | if sumhosts: 152 | user = user.keys()[0] 153 | c = "g" if user == whoami else 0 154 | l += times + color(getuseralias(user).ljust(8), c) 155 | if wide and not sumhosts: 156 | if job["mem"]: 157 | l += format_mem(job["mem"]) 158 | else: 159 | l += " " 160 | if job["%complete"] and job["runlimit"]: 161 | ptime = job["%complete"] 162 | c = fractioncolor(1 - ptime / 100) 163 | l += color("%3d" % ptime, c) + "% " 164 | l += format_duration(job["runlimit"]) 165 | if host["comment"]: 166 | if sumhosts: 167 | for key, val in host["comment"].iteritems(): 168 | if key: 169 | l += " %3dx" % val + color(key, "b") 170 | else: 171 | l += " " + color(host["comment"], "b") 172 | print(l, file=file) 173 | file.flush() 174 | -------------------------------------------------------------------------------- /lsf/printjobs.py: -------------------------------------------------------------------------------- 1 | """Print a list of jobs.""" 2 | 3 | from __future__ import print_function, division 4 | 5 | import os 6 | import sys 7 | import re 8 | from time import time 9 | from subprocess import check_output 10 | from collections import defaultdict 11 | 12 | from utility import color, fractioncolor, findstringpattern 13 | from utility import format_duration, format_mem, format_time 14 | from useraliases import getuseralias 15 | 16 | 17 | def printjoblong(job, sumjob=False, file=sys.stdout): 18 | """Print a job in long format.""" 19 | keys = ("jobid", "stat", "user", "user_group", "queue", "job_name", 20 | "job_description", "interactive", "X11", "proj_name", 21 | "application", "service_class", "job_group", "job_priority", 22 | "dependency", "notify_begin", "notify_end", "command", 23 | "pre_exec_command", "post_exec_command", 24 | "resize_notification_command", "pids", "exit_code", "exit_reason", 25 | "exclusive", "from_host", "first_host", "exec_host", "nexec_host", 26 | "alloc_slot", "nalloc_slot", "host_file", "host_req", 27 | "submit_time", "start_time", "estimated_start_time", 28 | "specified_start_time", "specified_terminate_time", "runlimit", 29 | "time_left", "finish_time", "%complete", "warning_action", 30 | "action_warning_time", "pend_time", "pend_reason", "cpu_used", 31 | "run_time", "idle_factor", "exception_status", "slots", "mem", 32 | "max_mem", "avg_mem", "memlimit", "swap", "swaplimit", 33 | "min_req_proc", "max_req_proc", "resreq", "effective_resreq", 34 | "network_req", "filelimit", "corelimit", "stacklimit", 35 | "processlimit", "input_file", "output_file", "error_file", 36 | "output_dir", "sub_cwd", "exec_home", "exec_cwd", 37 | "forward_cluster", "forward_time") 38 | for key in keys: 39 | if not job[key]: 40 | continue 41 | if sumjob and isinstance(job[key], dict): 42 | if len(job[key]) == 1 and job[key].keys()[0] is None: 43 | continue 44 | print(key.ljust(20), file=file, end="") 45 | if key in ("swap", "mem", "avg_mem", "max_mem", "memlimit", 46 | "swaplimit", "corelimit", "stacklimit"): 47 | print(format_mem(job[key]), file=file) 48 | elif key in ("submit_time", "start_time", "finish_time"): 49 | print(format_time(job[key]), file=file) 50 | elif key in ("cpu_used", "time_left", "runlimit", "run_time", 51 | "pend_time"): 52 | print(format_duration(job[key]), file=file) 53 | elif key == "pend_reason": 54 | items = job[key] 55 | key2, val = items[0] 56 | print("%4d * %s" % (val, key2), file=file) 57 | for key2, val in items[1:]: 58 | print(20 * " " + "%4d * %s" % (val, key2), file=file) 59 | elif key in ("command", "pre_exec_command", "post_exec_command", 60 | "resize_notification_command"): 61 | script = job[key] 62 | for _ in xrange(3): 63 | script = script.replace("; ", ";;") 64 | script = script.replace(";;;; ", "; ") 65 | script = script.replace(";", "\n") 66 | script = re.sub("for \(\((.*?)\n\n(.*?)\n\n(.*?)\)\)", 67 | "for ((\\1; \\2; \\3))", script) 68 | script = script.splitlines() 69 | print(script[0], file=file) 70 | for line in script[1:]: 71 | print(20 * " " + line, file=file) 72 | elif key == "pids": 73 | print(" ".join(map(str, job[key])), file=file) 74 | else: 75 | if isinstance(job[key], dict): 76 | if len(job[key]) == 1: 77 | print(job[key].keys()[0], file=file) 78 | else: 79 | items = sorted(job[key].items()) 80 | print("%4d * %s" % items[0][::-1], file=file) 81 | for key2, val in items[1:]: 82 | print(20 * " " + "%4d * %s" % (val, key2), file=file) 83 | elif isinstance(job[key], list): 84 | print(" ".join(job[key]), file=file) 85 | else: 86 | print(job[key], file=file) 87 | 88 | 89 | def printjobs(jobs, wide=False, long=False, output=None, title=None, 90 | header=True, file=sys.stdout): 91 | """Print a list of jobs.""" 92 | if len(jobs) == 0: 93 | return 94 | sumjob = not isinstance(jobs[0]["jobid"], str) 95 | if long: 96 | for job in jobs: 97 | printjoblong(job, sumjob=sumjob, file=file) 98 | return 99 | if output: 100 | if header: 101 | print(*output, sep="\t", file=file) 102 | for job in jobs: 103 | print(*[job[field] for field in output], sep="\t", file=file) 104 | return 105 | # begin output 106 | whoami = os.getenv("USER") 107 | namelen = max(map(len, (job["job_name"] for job in jobs))) 108 | if sumjob: 109 | titlelen = 0 110 | if "title" in jobs[0]: 111 | titlelen = max(map(len, (job["title"] for job in jobs))) 112 | lens = { 113 | "title": 10, 114 | "jobid": 10, 115 | "name": min(20, max(6, namelen + 1)), 116 | "stat": 6, 117 | "user": 10, 118 | "time": 12, 119 | "model": 14 120 | } 121 | if sumjob: 122 | lens["stat"] = 12 123 | else: 124 | if any(job["jobid"][-1] == "]" for job in jobs): 125 | lens["jobid"] = 14 126 | if wide: 127 | if sumjob: 128 | lens["title"] = max(6, titlelen + 1) 129 | lens["name"] = max(6, namelen + 1) 130 | lens["queue"] = 8 131 | lens["project"] = 8 132 | lens["prio."] = 6 133 | # header 134 | if header: 135 | h = "" 136 | if sumjob and "title" in jobs[0]: 137 | h += "group".ljust(lens["title"]) 138 | if not sumjob: 139 | h += "jobid".ljust(lens["jobid"]) 140 | h += "".join(n.ljust(lens[n]) for n in ("name", "stat", "user")) 141 | if wide: 142 | h += "".join(n.ljust(lens[n]) for n in ("queue", "project")) 143 | if not sumjob: 144 | h += "prio.".ljust(lens["prio."]) 145 | if sumjob: 146 | h += "runtime".rjust(lens["time"]) 147 | else: 148 | h += "wait/runtime".rjust(lens["time"]) 149 | h += " resources" 150 | h = h.upper() 151 | if title: 152 | h += " " + color(title, "b") 153 | print(h, file=file) 154 | for job in jobs: 155 | l = "" 156 | if sumjob and "title" in job: 157 | # title 158 | title = job["title"] 159 | if not wide: 160 | if len(title) >= lens["title"]: 161 | title = title[:lens["title"] - 2] + "*" 162 | l += color(title.ljust(lens["title"]), "b") 163 | if not sumjob: 164 | # jobid 165 | l += (job["jobid"] + " ").ljust(lens["jobid"]) 166 | # job name 167 | jobname = job["job_name"] if job["job_name"] else "" 168 | if not wide: 169 | if len(jobname) >= lens["name"]: 170 | jobname = "*" + jobname[-lens["name"] + 2:] 171 | l += jobname.ljust(lens["name"]) 172 | # status 173 | if sumjob and isinstance(job["stat"], defaultdict): 174 | l += color("%3d " % job["stat"]["PEND"], "r") 175 | l += color("%3d " % job["stat"]["RUN"], "g") 176 | done = job["stat"]["EXIT"] + job["stat"]["DONE"] 177 | if done: 178 | l += color("%3d " % done, "y") 179 | else: 180 | l += " " 181 | else: 182 | stat = job["stat"] 183 | if stat == "PEND": 184 | c = "r" 185 | if len(job["pend_reason"]) == 1: 186 | pr = job["pend_reason"][0] 187 | if "New job is waiting for scheduling" in pr[0]: 188 | stat = "NEW" 189 | c = "b" 190 | if "Waiting for rescheduling after parameters" in pr[0]: 191 | stat = "MOD" 192 | c = "b" 193 | if "Job dependency condition not satisfied" in pr[0]: 194 | stat = "DEP" 195 | c = "b" 196 | elif stat == "RUN": 197 | c = "g" 198 | if job["interactive"]: 199 | stat = "INT" 200 | if job["X11"]: 201 | stat = "X11" 202 | else: 203 | c = "y" 204 | l += color(stat.ljust(lens["stat"]), c) 205 | # user 206 | if sumjob and isinstance(job["user"], defaultdict): 207 | l += color(str(len(job["user"])).ljust(lens["user"]), "b") 208 | else: 209 | c = "g" if job["user"] == whoami else 0 210 | username = getuseralias(job["user"]) 211 | l += color((username + " ").ljust(lens["user"]), c) 212 | if wide: 213 | # queue 214 | if sumjob and isinstance(job["queue"], defaultdict): 215 | l += color(str(len(job["queue"])).ljust(lens["queue"]), "b") 216 | else: 217 | l += job["queue"].ljust(lens["queue"]) 218 | # project 219 | if sumjob and isinstance(job["project"], defaultdict): 220 | l += color(str(len(job["project"])).ljust(lens["project"]), 221 | "b") 222 | else: 223 | l += job["project"].ljust(lens["project"]) 224 | if not sumjob: 225 | # priority 226 | l += str(job["priority"]).rjust(lens["prio."] - 1) + " " 227 | # wait/runtime 228 | t = job["run_time"] 229 | if not sumjob and job["stat"] == "PEND": 230 | t = time() - job["submit_time"] 231 | s = format_duration(t) 232 | l += s.rjust(lens["time"]) 233 | # resources 234 | # %t 235 | if job["%complete"]: 236 | ptime = job["%complete"] 237 | c = fractioncolor(1 - ptime / 100) 238 | if wide: 239 | s = "%6.2f" % round(ptime, 2) 240 | else: 241 | s = "%3d" % int(round(ptime)) 242 | l += " " + color(s, c) + "%t" 243 | elif not sumjob and job["stat"] == "RUN": 244 | l += " " 245 | if wide: 246 | l += " " 247 | # %m 248 | if job["memlimit"] and job["mem"] and job["slots"]: 249 | memlimit = job["memlimit"] * job["slots"] 250 | pmem = 100 * job["mem"] / memlimit 251 | c = fractioncolor(1 - pmem / 100) 252 | if wide: 253 | s = "%6.2f" % round(pmem, 2) 254 | else: 255 | s = "%3d" % int(round(pmem)) 256 | l += " " + color(s, c) + "%m" 257 | elif not sumjob and job["stat"] == "RUN": 258 | l += " " 259 | if wide: 260 | l += " " 261 | # time 262 | if job["runlimit"]: 263 | l += " " + format_duration(job["runlimit"]) 264 | # memory 265 | memlimit = None 266 | if job["memlimit"]: 267 | memlimit = job["memlimit"] 268 | if job["min_req_proc"]: 269 | memlimit *= job["min_req_proc"] 270 | if memlimit is not None: 271 | l += format_mem(memlimit).rjust(10) 272 | else: 273 | l += "".rjust(10) 274 | # Hosts 275 | if job["exec_host"]: 276 | if wide or len(job["exec_host"]) == 1: 277 | d = job["exec_host"] 278 | else: 279 | d = defaultdict(int) 280 | for key, val in job["exec_host"].iteritems(): 281 | d[re.match("(.*?)\d+", key).groups()[0] + "*"] += val 282 | for key in sorted(d.keys()): 283 | val = d[key] 284 | c = "r" if val >= 100 else "y" if val >= 20 else 0 285 | exclusive = job["exclusive"] 286 | if sumjob and exclusive not in (True, False): 287 | exclusive = False 288 | times = color("x", "r") if exclusive else "*" 289 | l += color(" %3d" % val, c) + times + "%s" % key 290 | else: 291 | if not sumjob: 292 | if job["min_req_proc"]: 293 | times = color("x", "r") if job["exclusive"] else "*" 294 | l += " %3d" % job["min_req_proc"] + times 295 | elif job["exclusive"]: 296 | l += " 1" + color("x", "r") 297 | else: 298 | l += " 1*" 299 | if job["host_req"]: 300 | hosts = job["host_req"] 301 | if len(hosts) == 1: 302 | hosts = hosts[0] 303 | else: 304 | if wide: 305 | hosts = "(%s)" % ", ".join(hosts) 306 | else: 307 | hosts = findstringpattern(hosts) 308 | l += hosts.ljust(lens["model"]) 309 | elif job["resreq"]: 310 | match = re.search("model==(\w+)", job["resreq"]) 311 | model = "" 312 | if match: 313 | model += match.groups()[0] 314 | if re.search("phi", job["resreq"]): 315 | if match: 316 | model += "+" 317 | model += "Phi" 318 | l += model.ljust(lens["model"]) 319 | if job["alloc_slot"]: 320 | l += color(" rsvd:", "y") 321 | if wide or len(job["alloc_slot"]) == 1: 322 | d = job["alloc_slot"] 323 | else: 324 | d = defaultdict(int) 325 | for key, val in job["alloc_slot"].iteritems(): 326 | d[re.match("(.*?)\d+", key).groups()[0] + "*"] += val 327 | for key, val in d.iteritems(): 328 | c = "r" if val >= 100 else "y" if val >= 20 else 0 329 | l += color(" %3d" % val, c) + "*%s" % key 330 | if wide and job["pend_reason"] and len(job["pend_reason"]) == 1: 331 | reason = job["pend_reason"][0][0] 332 | if reason != title: 333 | l += color(" %s" % reason, "b") 334 | if job["dependency"]: 335 | l += color(":", "b") 336 | if job["dependency"]: 337 | l += color(" %s" % job["dependency"], "b") 338 | print(l, file=file) 339 | file.flush() 340 | -------------------------------------------------------------------------------- /lsf/readhosts.py: -------------------------------------------------------------------------------- 1 | """Read hosts from LSF.""" 2 | 3 | import re 4 | from subprocess import Popen, PIPE, check_output 5 | 6 | 7 | def parseval(val): 8 | """Parse a value that could be int, float, % or contain a memory unit.""" 9 | if val == "-": 10 | return None 11 | if re.match("\d+$", val): 12 | return int(val) 13 | if re.match("\d+(.\d+)?([eE][+-]\d+)?$", val): 14 | return float(val) 15 | if re.match("\d+(.\d+)?%$", val): 16 | return 100 * float(val[:-1]) 17 | if re.match("\d+(.\d+)?[KMGT]$", val): 18 | e = {"K": 1, "M": 2, "G": 3, "T": 4}[val[-1]] 19 | return int(float(val[:-1]) * 1024 ** e) 20 | return val 21 | 22 | 23 | def readhosts(args, fast=False): 24 | """Read hosts from LSF.""" 25 | # read bhosts for dynamic information 26 | p = Popen(["bhosts", "-l"] + args, stdout=PIPE, stderr=PIPE) 27 | out, err = p.communicate() 28 | if err: 29 | return [] 30 | lines = out.splitlines() 31 | lines.reverse() 32 | hostorder = [] 33 | hosts = {} 34 | host = None 35 | stage = None 36 | while lines: 37 | tokens = lines.pop().split() 38 | if not tokens: # ignore empty lines 39 | continue 40 | if tokens[0] == "HOST": 41 | if host: 42 | hostorder.append(host["host_name"]) 43 | hosts[host["host_name"]] = host 44 | host = { 45 | "host_name": tokens[1], 46 | "load": {}, 47 | "threshold": {}, 48 | "comment": None, 49 | "affinity": None, 50 | } 51 | stage = None 52 | elif tokens[0] == "STATUS": 53 | keys = [token.lower() for token in tokens] 54 | try: 55 | vals = lines.pop().split() 56 | for key, val in zip(keys, vals): 57 | host[key] = parseval(val) 58 | except: 59 | pass 60 | elif tokens[0] == "CURRENT": 61 | stage = "load" 62 | elif tokens[0] == "LOAD": 63 | stage = "threshold" 64 | elif tokens[0] == "ADMIN": 65 | host["comment"] = " ".join(tokens[3:])[1:-1] 66 | elif tokens[0] == "CONFIGURED": 67 | host["affinity"] = " ".join(tokens[4:]) 68 | elif stage in ("load", "threshold"): 69 | keys = tokens 70 | try: 71 | total = map(parseval, lines.pop().split()[1:]) 72 | used = map(parseval, lines.pop().split()[1:]) 73 | new = {k: v for k, v in zip(keys, zip(total, used))} 74 | host[stage].update(new) 75 | except: 76 | pass 77 | hostorder.append(host["host_name"]) 78 | hosts[host["host_name"]] = host 79 | if fast: 80 | return [hosts[hn] for hn in hostorder] 81 | # read lshosts for static information 82 | out = check_output(["lshosts", "-w"] + hostorder) 83 | lines = out.splitlines() 84 | keys = lines[0].lower().split() 85 | for line in lines[1:]: 86 | vals = line.split() 87 | host = hosts[vals[0]] 88 | for key, val in zip(keys[1:], vals[1:]): 89 | host[key] = parseval(val) 90 | if key in ("server"): 91 | host[key] = val == "Yes" 92 | resources = vals[len(keys) - 1:] 93 | resources[0] = resources[0][1:] # get rid of () 94 | resources[-1] = resources[-1][:-1] 95 | host[keys[-1]] = resources 96 | return [hosts[hn] for hn in hostorder] 97 | -------------------------------------------------------------------------------- /lsf/readjobs.py: -------------------------------------------------------------------------------- 1 | """Read jobs from bjobs.""" 2 | 3 | from __future__ import division 4 | 5 | import re 6 | from time import strptime, strftime, mktime, time 7 | from subprocess import Popen, check_output, PIPE, CalledProcessError 8 | 9 | 10 | def parsemem(value, unit): 11 | """Parse a memory size value and unit to int.""" 12 | e = {"B": 0, "K": 1, "M": 2, "G": 3, "T": 4}[unit] 13 | return int(float(value) * 1024 ** e) 14 | 15 | 16 | def readjobs(args, fast=False): 17 | """Read jobs from bjobs.""" 18 | keys = ("jobid", "stat", "user", "user_group", "queue", "job_name", 19 | "job_description", "proj_name", "application", "service_class", 20 | "job_group", "job_priority", "dependency", "command", 21 | "pre_exec_command", "post_exec_command", 22 | "resize_notification_command", "pids", "exit_code", "exit_reason", 23 | "from_host", "first_host", "exec_host", "nexec_host", "alloc_slot", 24 | "nalloc_slot", "host_file", "submit_time", "start_time", 25 | "estimated_start_time", "specified_start_time", 26 | "specified_terminate_time", "time_left", "finish_time", 27 | "%complete", "warning_action", "action_warning_time", "pend_time", 28 | "cpu_used", "run_time", "idle_factor", "exception_status", "slots", 29 | "mem", "max_mem", "avg_mem", "memlimit", "swap", "swaplimit", 30 | "min_req_proc", "max_req_proc", "effective_resreq", "network_req", 31 | "filelimit", "corelimit", "stacklimit", "processlimit", 32 | "input_file", "output_file", "error_file", "output_dir", "sub_cwd", 33 | "exec_home", "exec_cwd", "forward_cluster", "forward_time") 34 | aliases = ( 35 | ("id", "jobid"), 36 | ("ugroup", "user_group"), 37 | ("name", "job_name"), 38 | ("description", "job_description"), 39 | ("proj", "proj_name"), 40 | ("project", "proj_name"), 41 | ("app", "application"), 42 | ("sla", "service_class"), 43 | ("group", "job_group"), 44 | ("priority", "job_priority"), 45 | ("cmd", "command"), 46 | ("pre_cmd", "pre_exec_command"), 47 | ("post_cmd", "post_exec_command"), 48 | ("resize_cmd", "resize_notification_command"), 49 | ("estart_time", "estimated_start_time"), 50 | ("sstart_time", "specified_start_time"), 51 | ("sterminate_time", "specified_terminate_time"), 52 | ("warn_act", "warning_action"), 53 | ("warn_time", "action_warning_time"), 54 | ("except_stat", "exception_status"), 55 | ("eresreq", "effective_resreq"), 56 | ("fwd_cluster", "forward_cluster"), 57 | ("fwd_time", "forward_time") 58 | ) 59 | delimiter = "\7" 60 | # get detailed job information 61 | cmd = ["bjobs", "-X", "-o", 62 | " ".join(keys) + " delimiter='" + delimiter + "'"] + args 63 | p = Popen(cmd, stdout=PIPE, stderr=PIPE) 64 | out, err = p.communicate() 65 | # ignore certain errors 66 | err = [line for line in err.splitlines() if line] 67 | # (fix for bjobs display_flexibleOutput bug) 68 | err = [line for line in err if "display_flexibleOutput: Failed to get the " 69 | "value of job_name" not in line] 70 | if err: 71 | return [] 72 | out = out.splitlines()[1:] # get rid of header 73 | joborder = [] 74 | jobs = {} 75 | for line in out: 76 | job = dict(zip(keys, line.split(delimiter))) 77 | for key, val in job.iteritems(): 78 | if val == "-": 79 | job[key] = None 80 | elif key in ("exit_code", "nexec_host", "slots", "job_priority", 81 | "min_req_proc", "max_req_proc"): 82 | job[key] = int(val) 83 | elif key in ("cpu_used", "run_time", "idle_factor"): 84 | job[key] = float(val.split()[0]) 85 | elif key in ("submit_time", "start_time", "finish_time"): 86 | if val[-1] in "ELXA": 87 | val = val[:-2] 88 | job[key] = mktime(strptime(val, 89 | "%b %d %H:%M:%S %Y")) 90 | elif key == "time_left": 91 | if val[-1] in "ELXA": 92 | val = val[:-2] 93 | try: 94 | v = val.split(":") 95 | job[key] = 60 * (60 * int(v[0]) + int(v[1])) 96 | except: 97 | job[key] = mktime(strptime(year + " " + val, 98 | "%Y %b %d %H:%M")) 99 | elif key == "%complete": 100 | job[key] = float(val.split("%")[0]) 101 | elif key in ("exec_host", "alloc_slot"): 102 | val = val.split(":") 103 | hosts = {} 104 | for v in val: 105 | if "*" in v: 106 | v = v.split("*") 107 | hosts[v[1]] = int(v[0]) 108 | else: 109 | hosts[v] = 1 110 | job[key] = hosts 111 | elif key in ("swap", "mem", "avg_mem", "max_mem", "memlimit", 112 | "swaplimit", "corelimit", "stacklimit"): 113 | val = val.split() 114 | job[key] = parsemem(val[0], val[1][0]) 115 | elif key == "pids": 116 | if val: 117 | job[key] = map(int, val.split(",")) 118 | else: 119 | job[key] = [] 120 | 121 | # set jet unknown keys 122 | for key in ("pend_reason", "runlimit", "mail", "exclusive", "resreq", 123 | "combined_resreq", "notify_begin", "notify_end"): 124 | job[key] = None 125 | # info from resreq 126 | if job["effective_resreq"]: 127 | job["exclusive"] = "exclusive=1" in job["effective_resreq"] 128 | if "runlimit" in job["effective_resreq"]: 129 | match = re.match("runlimit=\d+", job["effective_resreq"]) 130 | job["runlimit"] = int(match.groups()[0]) 131 | elif job["run_time"] and job["%complete"]: 132 | t = job["run_time"] / job["%complete"] * 100 133 | # rounding 134 | if t > 10 * 60 * 60: 135 | job["runlimit"] = round(t / (60 * 60)) * 60 * 60 136 | else: 137 | job["runlimit"] = round(t / 60) * 60 138 | # extract array id 139 | if job["job_name"]: 140 | match = re.match(".*(\[\d+\])$", job["job_name"]) 141 | if match: 142 | job["jobid"] += match.groups()[0] 143 | joborder.append(job["jobid"]) 144 | jobs[job["jobid"]] = job 145 | if not joborder: 146 | return [] 147 | # set some keys 148 | for job in jobs.values(): 149 | job.update({ 150 | "interactive": None, 151 | "pend_reason": [], 152 | "host_req": [] 153 | }) 154 | if fast: 155 | for job in jobs.values(): 156 | job.update({alias: job[key] for alias, key in aliases}) 157 | return [jobs[jid] for jid in joborder] 158 | # get more accurate timestamps from -W output 159 | try: 160 | out = check_output(["bjobs", "-noheader", "-W"] + joborder) 161 | except CalledProcessError as e: 162 | out = e.output 163 | for line in out.splitlines(): 164 | line = line.split() 165 | if len(line) != 15: 166 | continue 167 | jobid = line[0] 168 | match = re.match(".*(\[\d+\])$", line[-9]) 169 | if match: 170 | jobid += match.groups()[0] 171 | job = jobs[jobid] 172 | for n, key in ( 173 | (-8, "submit_time"), 174 | (-2, "start_time"), 175 | (-1, "finish_time") 176 | ): 177 | if line[n] != "-": 178 | try: 179 | year = strftime("%Y") # guess year 180 | t = mktime(strptime(year + " " + line[n], 181 | "%Y %m/%d-%H:%M:%S")) 182 | if t > time(): 183 | # adjust guess for year 184 | year = str(int(year) - 1) 185 | t = mktime(strptime(year + " " + line[n], 186 | "%Y %m/%d-%H:%M:%S")) 187 | job[key] = t 188 | except: 189 | pass 190 | # get pending reasons (if any) 191 | pids = [jid for jid in joborder if jobs[jid]["stat"] == "PEND"] 192 | if pids: 193 | try: 194 | out = check_output(["bjobs", "-p"] + pids) 195 | except CalledProcessError as e: 196 | out = e.output 197 | job = None 198 | for line in out.split("\n")[1:-1]: 199 | if line[0] == " " or line[:4] == "JOBS": 200 | # pending reason 201 | if ":" in line: 202 | match = re.match(" ?(.*): (\d+) hosts?;", line).groups() 203 | job["pend_reason"].append((match[0], int(match[1]))) 204 | else: 205 | match = re.match(" ?(.*);", line).groups() 206 | job["pend_reason"].append((match[0], True)) 207 | else: 208 | if job: 209 | job["pend_reason"].sort(key=lambda p: -p[1]) 210 | # next job 211 | line = line.split() 212 | jobid = line[0] 213 | match = re.match(".*(\[\d+\])$", " ".join(line[5:-3])) 214 | if match: 215 | jobid += match.groups()[0] 216 | job = jobs[jobid] 217 | job["pend_reason"] = [] 218 | # get -UF (long) output (may be restricted) 219 | try: 220 | out = check_output(["bjobs", "-UF"] + joborder) 221 | except CalledProcessError as e: 222 | out = e.output 223 | out = out.split(78 * "-" + "\n") 224 | for jobout in out: 225 | lines = [line.strip() for line in jobout.splitlines()] 226 | jobid = re.match("Job <(\d+(?:\[\d+\])?)>", lines[1]).groups()[0] 227 | job = jobs[jobid] 228 | # name (fix for bjobs display_flexibleOutput bug) 229 | match = re.search("Name <(.*?)>", lines[1]) 230 | if match: 231 | job["job_name"] = match.groups()[0] 232 | # mail 233 | match = re.search("Mail <(.*?)>", lines[1]) 234 | if match: 235 | job["mail"] = match.groups()[0] 236 | # flags 237 | job["exclusive"] = "Exclusive Execution" in lines[2] 238 | job["notify_begin"] = "Notify when job begins" in lines[2] 239 | job["notify_end"] = bool(re.search("Notify when job (?:begins/)?ends", 240 | lines[2])) 241 | job["interactive"] = "Interactive pseudo-terminal shell" in lines[1] 242 | job["X11"] = "ssh X11 forwarding mode" in lines[1] 243 | # resource request 244 | match = re.search("Requested Resources <(.*?)>[,;]", lines[2]) 245 | if match: 246 | job["resreq"] = match.groups()[0] 247 | if lines[-2].startswith("Combined: "): 248 | job["combined_resreq"] = lines[-2].split(": ", 1)[1] 249 | # requested hosts 250 | match = re.search("Specified Hosts <(.*?)>(?:;|, [^<])", lines[2]) 251 | if match: 252 | job["host_req"] = match.groups()[0].split(">, <") 253 | # runlimit 254 | idx = lines.index("RUNLIMIT") 255 | job["runlimit"] = int(float(lines[idx + 1].split()[0]) * 60) 256 | # memlimits 257 | # aliases 258 | for job in jobs.values(): 259 | job.update({alias: job[key] for alias, key in aliases}) 260 | return [jobs[jid] for jid in joborder] 261 | -------------------------------------------------------------------------------- /lsf/shortcuts.py: -------------------------------------------------------------------------------- 1 | """Argument shortcuts for ejobs/ehosts/esub.""" 2 | 3 | ejobsshortcuts = { 4 | "aices": ["-G", "p_aices", "-P", "aices"], 5 | "aices2": ["-G", "p_aices", "-P", "aices2"], 6 | "aices24": ["-G", "p_aices", "-P", "aices-24"], 7 | } 8 | 9 | ehostsshortcuts = { 10 | "aices": "aices", 11 | "aices2": "aices2", 12 | "aices24": "aices24", 13 | } 14 | -------------------------------------------------------------------------------- /lsf/submitjob.py: -------------------------------------------------------------------------------- 1 | """Submit a job to LSF.""" 2 | 3 | from __future__ import print_function, division 4 | 5 | import sys 6 | import re 7 | from subprocess import Popen, PIPE 8 | 9 | 10 | def submitjob(data, shell=False): 11 | """Submit a job to LSF.""" 12 | if "command" not in data: 13 | print("no command given", file=sys.stderr) 14 | return False 15 | aliases = ( 16 | ("id", "jobid"), 17 | ("name", "job_name"), 18 | ("description", "job_description"), 19 | ("proj", "proj_name"), 20 | ("project", "proj_name"), 21 | ("app", "application"), 22 | ("sla", "service_class"), 23 | ("group", "job_group"), 24 | ("priority", "job_priority"), 25 | ("cmd", "command"), 26 | ("pre_cmd", "pre_exec_command"), 27 | ("post_cmd", "post_exec_command"), 28 | ("resize_cmd", "resize_notification_command"), 29 | ("estart_time", "estimated_start_time"), 30 | ("sstart_time", "specified_start_time"), 31 | ("sterminate_time", "specified_terminate_time"), 32 | ("warn_act", "warning_action"), 33 | ("warn_time", "action_warning_time"), 34 | ("except_stat", "exception_status"), 35 | ("eresreq", "effective_resreq"), 36 | ("fwd_cluster", "forward_cluster"), 37 | ("fwd_time", "forward_time") 38 | ) 39 | strargs = { 40 | "job_name": "-J", 41 | "job_description": "-Jd", 42 | "input_file": "-i", 43 | "output_file": "-o", 44 | "error_file": "-e", 45 | "project": "-P", 46 | "dependency": "-w" 47 | } 48 | intargs = { 49 | "slots": "-n" 50 | } 51 | memargs = { 52 | "memlimit": "-M", 53 | "corelimit": "-C", 54 | "stacklimit": "-S" 55 | } 56 | timeargs = { 57 | "runlimit": "-W" 58 | } 59 | args = [] 60 | for key, val in data.iteritems(): 61 | if key[0] == "-": 62 | if val is True: 63 | args += [key] 64 | if not isinstance(val, bool): 65 | args += [key, val] 66 | continue 67 | if key in aliases: 68 | key = aliases[key] 69 | if key in strargs: 70 | args += [key, val] 71 | if key in intargs: 72 | args += [key, str(val)] 73 | if key in memargs: 74 | args += [key, str(val // 1024)] 75 | if key in timeargs: 76 | args += [key, str(val // 60)] 77 | # output file from jobname 78 | if "-o" not in args and "-J" in args: 79 | args += ["-o", args[args.index["-J"] + 1] + ".%J.out"] 80 | cmd = ["bsub"] + args 81 | p = Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=PIPE) 82 | if shell: 83 | command = '#!/bin/bash -l\n' 84 | else: 85 | command = '' 86 | command += data["command"] 87 | out, err = p.communicate(command) 88 | match = re.search("Job <(.*?)> is submitted", out) 89 | if match: 90 | return match.groups()[0] 91 | else: 92 | match = re.search("Error: (.*)\n", err) 93 | if match: 94 | err = match.groups()[0] 95 | raise EnvironmentError(1, err) 96 | -------------------------------------------------------------------------------- /lsf/sumhosts.py: -------------------------------------------------------------------------------- 1 | """Summarize a list of hosts.""" 2 | 3 | from collections import defaultdict 4 | 5 | from utility import findstringpattern 6 | 7 | 8 | def sumhosts(hosts): 9 | """Summarize a list of hosts.""" 10 | sumhost = {} 11 | for key in hosts[0]: 12 | if key in ("host_name"): 13 | # find string pattern 14 | sumhost[key] = findstringpattern([host[key] for host in hosts 15 | if host[key]]) 16 | elif key in ("max", "njobs", "run", "ssusp", "ususp", "rsv", "ncpus", 17 | "maxmem", "maxswp"): 18 | # sum 19 | sumhost[key] = sum(host[key] for host in hosts if host[key]) 20 | elif key in ("status", "server", "type", "comment"): 21 | sumhost[key] = defaultdict(int) 22 | for host in hosts: 23 | sumhost[key][host[key]] += 1 24 | elif key in ("load", "threshold"): 25 | # sum up free/used pairs 26 | sumhost[key] = dict() 27 | for key2 in hosts[0][key]: 28 | free, used = zip(*[host[key][key2] for host in hosts]) 29 | if all(x is None for x in free): 30 | free = None 31 | else: 32 | free = sum(x for x in free if x) 33 | if all(x is None for x in used): 34 | used = None 35 | else: 36 | used = sum(x for x in used if x) 37 | sumhost[key][key2] = [free, used] 38 | else: 39 | # colect 40 | sumhost[key] = [] 41 | for host in hosts: 42 | if host[key] and host[key] not in sumhost[key]: 43 | sumhost[key].append(host[key]) 44 | sumhost["host_names"] = [host["host_name"] for host in hosts] 45 | return sumhost 46 | -------------------------------------------------------------------------------- /lsf/sumjobs.py: -------------------------------------------------------------------------------- 1 | """Summarize a list of jobs.""" 2 | 3 | from __future__ import division 4 | 5 | from utility import findstringpattern 6 | 7 | from collections import defaultdict 8 | 9 | 10 | def sumjobs(jobs): 11 | """Summarize a list of jobs.""" 12 | sumjob = {} 13 | for key in jobs[0]: 14 | if key in ("job_name", "job_description", "input_file", "output_file", 15 | "error_file", "output_dir", "sub_cwd", "exec_home", 16 | "exec_cwd", "exit_reson", "application", "dependency", 17 | "command", "pre_exec_command", "post_exec_command", 18 | "resize_notification_command", "effective_resreq"): 19 | # find string pattern 20 | sumjob[key] = findstringpattern([job[key] for job in jobs 21 | if job[key]]) 22 | elif key in ("runlimit", "swaplimit", "stacklimit", "memlimit", 23 | "filelimit", "processlimit", "corelimit", "run_time", 24 | "swap", "slots", "min_req_proc", "max_req_proc", "mem", 25 | "max_mem", "avg_mem", "nexec_host", "cpu_used", 26 | "time_left"): 27 | # sum 28 | sumjob[key] = sum(job[key] for job in jobs if job[key]) 29 | elif key in ("%complete", "job_priority", "idle_factor"): 30 | # compute average 31 | pcomp = [job[key] for job in jobs if job[key]] 32 | if pcomp: 33 | sumjob[key] = sum(pcomp) / len(pcomp) 34 | else: 35 | sumjob[key] = None 36 | elif key in ("exec_host", "rsvd_host"): 37 | # collect host counts 38 | sumjob[key] = defaultdict(int) 39 | for job in jobs: 40 | if job[key]: 41 | for host, count in job[key].iteritems(): 42 | sumjob[key][host] += count 43 | elif key == "pids": 44 | # collect 45 | sumjob[key] = sum((job[key] for job in jobs if job[key]), []) 46 | elif key == "jobid": 47 | # collect 48 | sumjob[key] = [] 49 | for job in jobs: 50 | if job[key] and job[key] not in sumjob[key]: 51 | sumjob[key].append(job[key]) 52 | elif key == "pend_reason": 53 | # sum 54 | sumjob[key] = defaultdict(int) 55 | for job in jobs: 56 | if not job[key]: 57 | continue 58 | for key2, val in job[key]: 59 | sumjob[key][key2] += val 60 | sumjob[key] = sumjob[key].items() 61 | else: 62 | # collect and count 63 | sumjob[key] = defaultdict(int) 64 | for job in jobs: 65 | if key == "host_req": 66 | for host in job[key]: 67 | sumjob[key][host] += 1 68 | elif key == "alloc_slot": 69 | if job[key]: 70 | for host in job[key]: 71 | sumjob[key][host] += job[key][host] 72 | else: 73 | sumjob[key][job[key]] += 1 74 | if key not in ("stat", "alloc_slot"): 75 | if len(sumjob[key]) == 1: 76 | sumjob[key] = sumjob[key].keys()[0] 77 | return sumjob 78 | -------------------------------------------------------------------------------- /lsf/useraliases.py: -------------------------------------------------------------------------------- 1 | """Username to real name aliasing utilities.""" 2 | 3 | import os 4 | 5 | useraliases = None 6 | 7 | 8 | def loadaliases(): 9 | """Load all aliases from ~/.useraliases.""" 10 | global useraliases 11 | if useraliases is None: 12 | filename = os.environ["HOME"] + "/.useraliases" 13 | if os.path.isfile(filename): 14 | with open(filename) as fin: 15 | useraliases = dict(line.strip().split(None, 1) for line in fin) 16 | else: 17 | useraliases = {} 18 | return useraliases 19 | 20 | 21 | def getuseralias(user): 22 | """Look up the alias for a user.""" 23 | aliases = loadaliases() 24 | if user in aliases: 25 | return aliases[user] 26 | else: 27 | return user 28 | 29 | 30 | def lookupalias(alias): 31 | """Look up the user for an alias.""" 32 | aliases = loadaliases() 33 | try: 34 | return next(k for k, v in aliases.iteritems() if v == alias) 35 | except: 36 | return alias 37 | -------------------------------------------------------------------------------- /lsf/utility.py: -------------------------------------------------------------------------------- 1 | """Utility function.""" 2 | 3 | from __future__ import division 4 | 5 | import sys 6 | from time import strftime, localtime 7 | 8 | 9 | def color(string, c): 10 | """Surround a string by shell coloring commands.""" 11 | if not sys.stdout.isatty(): 12 | return string 13 | names = {"r": 31, "g": 32, "y": 33, "b": 34} 14 | if c in names: 15 | c = names[c] 16 | return "\033[{}m{}\033[0m".format(c, string) 17 | 18 | 19 | def fractioncolor(nom, denom=1): 20 | """Color for utilization fractions.""" 21 | if denom == 0: 22 | return "r" 23 | fraction = nom / denom 24 | if fraction < .1: 25 | return "r" 26 | if fraction < .25: 27 | return "y" 28 | if fraction < .9: 29 | return 0 30 | return "g" 31 | 32 | 33 | def format_duration(t): 34 | """Format a duration.""" 35 | t = int(t) 36 | if t == 0: 37 | return " 0" 38 | # seconds 39 | s = "{:0>2}".format(t % 60) 40 | t //= 60 41 | # minutes 42 | if t >= 60: 43 | s = "{:0>2}:".format(t % 60) + s 44 | else: 45 | s = "{:>2}:".format(t % 60) + s 46 | t //= 60 47 | if t == 0: 48 | return " " + s 49 | s = s.rjust(5, "0") 50 | # hours 51 | s = "{:>2}:".format(t % 24) + s 52 | t //= 24 53 | if t == 0: 54 | return " " + s 55 | # days 56 | c = "r" if t >= 7 else "y" 57 | s = color("{:>2}d ".format(t), c) + s 58 | return s 59 | 60 | 61 | def format_time(t): 62 | """Format a time stamp.""" 63 | return strftime("%x %X", localtime(t)) 64 | 65 | 66 | def format_mem(s, c=0): 67 | """Format a memory size value.""" 68 | i = 0 69 | while abs(s) >= 1024: 70 | s /= 1024 71 | i += 1 72 | e = ["B ", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"][i] 73 | return color("%6.1f" % s, c) + e 74 | 75 | 76 | def findstringpattern(strings): 77 | """Find a common patter in a list of string.""" 78 | if not len(strings): 79 | return "" 80 | if all(strings[0] == s for s in strings[1:]): 81 | return strings[0] 82 | prefix = "" 83 | while strings[0] and all(strings[0][0] == s[0] for s in strings[1:] if s): 84 | prefix += strings[0][0] 85 | strings = [s[1:] for s in strings] 86 | suffix = "" 87 | while strings[0] and all(strings[0][-1] == s[-1] 88 | for s in strings[1:] if s): 89 | suffix = strings[0][-1] + suffix 90 | strings = [s[:-1] for s in strings] 91 | return prefix + "*" + suffix 92 | -------------------------------------------------------------------------------- /scripts/ehosts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from lsf import ehosts 3 | 4 | if __name__ == "__main__": 5 | ehosts.main() 6 | -------------------------------------------------------------------------------- /scripts/ejobs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from lsf import ejobs 3 | 4 | if __name__ == "__main__": 5 | ejobs.main() 6 | -------------------------------------------------------------------------------- /scripts/esub: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from lsf import esub 3 | 4 | if __name__ == "__main__": 5 | esub.main() 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from distutils.core import setup 3 | 4 | setup(name="lsf", 5 | version="1.2", 6 | description="LSF job scheduler utilities", 7 | author="Elmar Peise", 8 | author_email="peise@aices.rwth-aachen.de", 9 | url="http://github.com/elmar-peise/python-lsf", 10 | packages=["lsf"], 11 | scripts=["scripts/ejobs", "scripts/ehosts", "scripts/esub"] 12 | ) 13 | --------------------------------------------------------------------------------