├── BUGS ├── COPYING ├── Makefile ├── README ├── contrib └── torque │ ├── README.md │ └── lustrejobtop.py ├── dict.c ├── dict.h ├── hooks.c ├── hooks.h ├── job-map.c ├── list.h ├── lltop-ev.c ├── lltop.h ├── main.c ├── qhost_job_map ├── rbtree.c ├── rbtree.h ├── serv-cts.c ├── serv.c ├── string1.h └── tacc_lltop /BUGS: -------------------------------------------------------------------------------- 1 | ssh may breakup lines coming from lltop-serv confusing lltop. 2 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CPPFLAGS = $(CDEBUG) 3 | CFLAGS = -Wall 4 | lltop_objects = main.o hooks.o rbtree.o 5 | lltop_serv_objects = serv.o rbtree.o 6 | 7 | all: lltop lltop-serv 8 | 9 | lltop: $(lltop_objects) 10 | $(CC) $(CFLAGS) $^ -o $@ 11 | 12 | lltop-serv: $(lltop_serv_objects) 13 | $(CC) $(CFLAGS) $^ -o $@ -lrt 14 | 15 | clean: 16 | rm -f lltop $(lltop_objects) lltop-serv $(lltop_serv_objects) 17 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | *Lltop* 2 | 3 | Lltop[0] is a command line utility which gathers I/O statistics from 4 | Lustre[1] filesystem servers, along with job assignment data from 5 | cluster batch schedulers, to give a job-by-job accounting of 6 | filesystem load. Under typical usage, lltop is invoked with the name 7 | of a filesystem, runs for a configurable interval (10 seconds say), 8 | and outputs a table summarizing I/O and RPC loads indexed by job 9 | identifier; for example: 10 | 11 | $ lltop work 12 | JOB WR_MB RD_MB REQS OWNER WORKDIR 13 | 12101 15925 67630 133694 jfourier /work/jfourier/fftw_run 14 | 10322 2254 1027 2504 claude /work/claude/viscous-flow-08 15 | 13007 756 21024 10007 ludwig /work/ludwig/boltzeq.mvapich2 16 | ... 17 | 18 | Normally, lltop is run in response to observations of excessive load 19 | on file servers or degraded filesystem performance, and is used to 20 | assist system administrators in identifying jobs (and users) with 21 | problematic I/O patterns. A potential secondary use is to determine 22 | the I/O profiles of applications running at scale. lltop is designed 23 | to run as a point and shoot diagnostic utility, and is not a 24 | replacement for continuous monitoring tools such as LMT[2] or 25 | Collectl[3]. 26 | 27 | *Overview* 28 | 29 | Lltop has two executable components, lltop itself, and lltop-serv. 30 | lltop is usually run directly and given the name of a filesystem to 31 | query. From the filesystem name, it derives a list of servers (MDSs 32 | and OSSs), and for each it forks and execs ssh to run a copy of 33 | lltop-serv on the server. 34 | 35 | On the server, lltop-serv scrapes the per-client stats files 36 | 37 | /proc/fs/lustre/{mds,obdfilter}//exports//stats 38 | 39 | to determine each client's load in terms of bytes written, bytes read, 40 | and requests processed. It actually makes two passes through the 41 | stats files[4], sleeping for a configurable interval between, and 42 | returns the differences. The output of lltop-serv consists of lines 43 | [5] of the form 44 | 45 | @ 46 | 47 | where 48 | 49 | @ is the client address according to Lustre, 50 | for example 192.0.32.10@tcp, 51 | and are the number of bytes written and read, 52 | is the number of request other than pings[6]. 53 | 54 | Lltop reads this output and translates client addresses to hostnames, 55 | and hostnames to jobids[7, 8], to account for each client's load against 56 | its current job. If lltop cannot find a job assignment for a given 57 | client then considers the client to be the sole member of a job whose 58 | jobid is the clients hostname. Similarly, if lltop cannot find a 59 | hostname for a given client IP address, it uses the address as the 60 | clients name and current jobid. This allows us to handle load 61 | generated by login or admin nodes in the same band. 62 | 63 | *Configuring lltop* 64 | 65 | To get lltop to work on your site you probably need to override some 66 | of the default configuration. Most of this can be accomplished 67 | through command line options, but the source is organized so that the 68 | same effects (and more) can be acheived by modifying the functions in 69 | hooks.c. Here are the main things you may need to do, along with some 70 | suggestions. 71 | 72 | 1. Tell lltop on which servers it should run lltop-serv. You have 73 | three options: 74 | 75 | a. Modify the function get_serv_list() in hooks.c, so that lltop may 76 | be invoked with the filesystem name as an argument. 77 | 78 | b. Use the -l (--server-list) option to specify a list of servers 79 | directly: 80 | 81 | lltop -l mds1.example.com oss{01..27}.example.com 82 | 83 | c. Provided that FILESYSTEM is mounted on the current host, use some 84 | crazy pipeline, like: 85 | 86 | sed 's/@.*$//' /proc/fs/lustre/{mdc,osc}/FILESYSTEM-*/*_conn_uuid | sort | uniq | xargs lltop -l 87 | 88 | 2. Tell lltop how to translate Lustre client addresses (usually dotted 89 | quads with the @ stripped off) to hostnames. How well 90 | does reverse DNS work at your site? If the answer is "Uhhh, not real 91 | well.", or if you have some weird LNET with a weird address format 92 | like qswlnd, whatever that is, then keep reading, otherwise skip to 3. 93 | The default address to host lookup uses getnameinfo(), which should 94 | work fine given a correct site config. If not, here are three 95 | possibilities: 96 | 97 | a. Using getnameinfo_get_host() as a template, add the function 98 | my_site_get_host() to hooks.c and tell lltop to use it. 99 | 100 | b. Use the -g (--get-host) option to specify an external command 101 | which should take the address as its only argument and print a 102 | hostname. If it succeeds, your exernal command should return 0, 103 | otherwise lltop will treat the dotted quad as if it is the client's 104 | hostname. 105 | 106 | c. Fix /etc/hosts, /etc/nsswitch.conf, /etc/resolv.conf,..., so 107 | that getnameinfo() works on the host where you run lltop. 108 | 109 | 3. Tell lltop how to lookup the current job for a host. Lltop was 110 | originally written for TACC Ranger which uses SGE for batch 111 | scheduling. Under that setup the JOBID of the current job on HOST is 112 | determined from the existence of a file 113 | 114 | /share/sge6.2/execd_spool/HOST/active_jobs/JOBID.* 115 | 116 | This is the default method in lltop. Otherwise: 117 | 118 | a. If you run SGE but you need to override the execd_spool path then 119 | do so by modifying hooks.c or passing --execd-spool=PATH. 120 | 121 | b. Using execd_spool_get_job() as a template, add the function 122 | my_site_get_job() to hooks.c and tell lltop to use it. 123 | 124 | c. Use the -j (--get-job) option to specify an external command to 125 | do job lookup. It should function like the external host lookup 126 | command described above. 127 | 128 | d. Use the -m (--job-map) option to specify an external command 129 | which produces a "job map." This is useful if you use something 130 | like qhost for job lookup, since using 'qhost -j -h ' to get 131 | the current job of a single takes about the same time as calling 132 | 'qhost -j' to get the current job of all nodes at once. See the 133 | attached script qhost_job_map. 134 | 135 | *Installing lltop* 136 | 137 | Run make, put lltop somewhere in your path on an admin node, put 138 | lltop-serv somewhere in your path on the Lustre servers. Also see the 139 | included script tacc_lltop which we use to add job owner and workdir 140 | to the output of lltop. 141 | 142 | *Getting Help* 143 | 144 | $ lltop --help 145 | Usage: lltop [OPTION]... FILESYSTEM 146 | or: lltop [OPTION]... -l SERVER... 147 | Report load by job for Lustre FILESYSTEM or SERVER(s). 148 | 149 | Mandatory arguments to long options are mandatory for short options too. 150 | -f, --fqdn use fully qualified domain names for clients 151 | -g, --get-host=COMMAND use COMMAND for reverse DNS lookups 152 | -h, --help display this help and exit 153 | -i, --interval=NUMBER report load over NUMBER seconds 154 | -j, --get-job=COMMAND use COMMAND for job lookup 155 | -l, --server-list report load on servers given as arguments 156 | -m, --job-map=COMMAND use COMMAND to get job map 157 | -n, --limit=NUMBER limit output to NUMBER jobs 158 | --no-header do not display header 159 | --lltop-serv=PATH use lltop-serv at PATH on servers 160 | --remote-shell=PATH use remote shell at PATH to execute lltop-serv 161 | --execd-spool=PATH use execd_spool directory PATH for job lookup 162 | 163 | lltop GitHub repository: 164 | 165 | Otherwise, please send me any comments, questions, improvements. I am 166 | especially interested in receiving/including any code/scripts to do 167 | job lookup for batch schedulers other than SGE. Please, put lltop in 168 | the subject line. 169 | 170 | John L. Hammond 171 | TACC, The University of Texas at Austin 172 | 173 | 174 | -- 175 | 176 | 0. lltop is a recursive anagram of lltop. 177 | 178 | 1. According to the headers, Lustre is a trademark of Sun 179 | Microsystems. 180 | 181 | 2. Lustre Monitoring Tool: http://code.google.com/p/lmt/ 182 | 183 | 3. Collectl: http://collectl.sourceforge.net/ 184 | 185 | 4. Note that lltop-serv does not clear the stats files. In fact 186 | clearing stats files while lltop-serv is running may cause it to 187 | misreport or under report usage. Client evictions can also affect the 188 | accuracy of the data returned, but lltop-serv does use some simple 189 | heuristics to mitigate their effects. However it should be remembered 190 | that lltop is not an exact tool and should be used with judgement. 191 | 192 | 5. Lltop-serv does not count pings because doing so tends to distort 193 | the statistics for large jobs. 194 | 195 | 6. As an optimization, if a client fails to geterate any load during 196 | the interval, then lltop-serv omits that client from its output. 197 | 198 | 7. Lltop keeps a cache of address to jobid mappings so that the 199 | hostname and jobid lookups are done at most once per client. 200 | 201 | 8. If your site runs multiple concurrent jobs on single hosts then it 202 | may be hard to adapt lltop. I welcome suggestions on how to handle 203 | this case. 204 | -------------------------------------------------------------------------------- /contrib/torque/README.md: -------------------------------------------------------------------------------- 1 | Lustrejobtop 2 | -------------- 3 | 4 | Take the output from lltop and map it to torque jobs. If the lustre 5 | client names are different than the torque node names one must do the 6 | appropriate mapping in the function lustre_to_torque(). 7 | 8 | The output looks something like this 9 | 10 | ``` 11 | # python ./lustrejobtop.py 12 | top writers 13 | host write MB read MB reqs jobid(user) 14 | cib44-4 1663 200 3740 2273289(userA) 15 | cib4-13 1567 390 3968 2272148(userA) 16 | cib52-5 1456 0 2913 2282404(userB) 17 | cib4-3 1455 226 3674 2269783(userA) 18 | cib1-11 889 0 1783 2279077(userC) 19 | cib2-7 761 333 2274 2279082(userC) 20 | cib12-5 686 258 1892 2265178(userA) 21 | cib19-10 502 1850 4707 2236324(userD) 2252649(userD) 22 | cib55-1 350 0 718 2277005(userE) 23 | cib11-3 329 7 2730 2219852(userE) 24 | top readers 25 | host write MB read MB reqs jobid(user) 26 | cib19-3 0 1955 4159 2277674(userB) 27 | cib60-4 0 1871 4084 2275433(userB) 28 | cib19-10 502 1850 4707 2236324(userD) 2252649(userD) 29 | cib43-5 286 1735 5103 2278431(userE) 30 | cib33-4 259 1732 4340 2278431(userE) 31 | cib19-8 0 1318 2955 2277320(userB) 32 | cib1-14 0 1268 3927 2269780(userA) 33 | cib14-13 0 1174 2398 2265053(userB) 34 | cib7-9 142 1012 2456 2283945(userE) 35 | cib4-14 0 967 3011 2269788(userA) 36 | top iops 37 | host write MB read MB reqs jobid(user) 38 | cib43-5 286 1735 5103 2278431(userE) 39 | cib19-10 502 1850 4707 2236324(userD) 2252649(userD) 40 | cib33-4 259 1732 4340 2278431(userE) 41 | cib19-3 0 1955 4159 2277674(userB) 42 | cib60-4 0 1871 4084 2275433(userB) 43 | cib4-13 1567 390 3968 2272148(userA) 44 | cib1-14 0 1268 3927 2269780(userA) 45 | cib44-4 1663 200 3740 2273289(userA) 46 | cib4-3 1455 226 3674 2269783(userA) 47 | cib17-15 0 758 3558 2281925(userE) 48 | Total: writes 11648 MB, reads 22491 MB, iops 81232 49 | ``` 50 | -------------------------------------------------------------------------------- /contrib/torque/lustrejobtop.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import operator 4 | from lxml import objectify 5 | 6 | LLTOPCMD = "/home/royd/projects/git/lltop/lltop --lltop-serv=/root/lltop-serv --job-map=/bin/true" 7 | OSSLIST = "10.1.3.1,10.1.3.11,10.1.3.12,10.1.3.13,10.1.3.14".split(",") 8 | TOPLINES = 10 9 | DELAY = 10 10 | 11 | def pbsnodes(): 12 | pbsnodesxml = os.popen('pbsnodes -x', 'r') 13 | pbsnodesdata = objectify.parse(pbsnodesxml).getroot() 14 | nodes = dict() 15 | for n in pbsnodesdata.iterchildren(tag='Node'): 16 | nodename = str(n['name']) 17 | nodes[nodename] = n 18 | return nodes 19 | 20 | def qstat(): 21 | qstatxml = os.popen('qstat -t -f -x', 'r') 22 | qstatdata = objectify.parse(qstatxml).getroot() 23 | jobs = dict() 24 | for j in qstatdata.iterchildren(tag='Job'): 25 | jobid = str(j['Job_Id']) 26 | jobs[jobid] = j 27 | return jobs 28 | 29 | def lltop(hostlist): 30 | lltop = os.popen(" ".join([LLTOPCMD, "-i", str(DELAY), "-l"] + hostlist)).readlines() 31 | #lltop = file("/tmp/lltop.txt", "r").readlines() 32 | lltopdata = list() 33 | for line in lltop[1:]: 34 | items = line.strip().split() 35 | lltopdata.append([items[0]] + map(int, items[1:])) 36 | return lltopdata 37 | 38 | def nodejobs(status): 39 | for s in status.split(","): 40 | if s.startswith("jobs="): 41 | joblist = s.split("=", 1)[1] 42 | return joblist.split(" ") 43 | 44 | def lustre_to_torque(clientname): 45 | # map from ib network hostname to torque nodelist hostname 46 | # our cluster has cX-Y as torque nodename with cibX-Y 47 | # as lustre client hostname (we run lustre over infiniband) 48 | return clientname.replace("ib", "") 49 | 50 | def printstats(lltopdata, nodeinfo, joblist): 51 | print "host write MB read MB reqs jobid(user)" 52 | for llt in lltopdata[:TOPLINES]: 53 | print "%10s %10s %10s %10s" % tuple(llt), 54 | hostname = lustre_to_torque(llt[0]) 55 | if hostname in nodeinfo: 56 | jobs = nodejobs(str(nodeinfo[hostname].status)) 57 | for j in jobs: 58 | jobstats = joblist[j] 59 | print " %s(%s)" % (j.split(".")[0], 60 | str(jobstats.Job_Owner).split("@")[0]), 61 | print 62 | 63 | def printsummary(lltopdata): 64 | total_writemb = sum(map(operator.itemgetter(1), lltopdata)) 65 | total_readmb = sum(map(operator.itemgetter(2), lltopdata)) 66 | total_reqs = sum(map(operator.itemgetter(3), lltopdata)) 67 | print "Total: writes %s MB/s, reads %s MB/s, iop/s %s" % (total_writemb/DELAY, total_readmb/DELAY, total_reqs/DELAY) 68 | 69 | def main(): 70 | lltopdata = lltop(OSSLIST) 71 | nodeinfo = pbsnodes() 72 | joblist = qstat() 73 | 74 | writecol = lambda x: x[1] 75 | readcol = lambda x: x[2] 76 | iopcol = lambda x: x[3] 77 | print "top writers" 78 | lltopdata.sort(key=writecol, reverse=True) 79 | printstats(lltopdata, nodeinfo, joblist) 80 | print "top readers" 81 | lltopdata.sort(key=readcol, reverse=True) 82 | printstats(lltopdata, nodeinfo, joblist) 83 | print "top iops" 84 | lltopdata.sort(key=iopcol, reverse=True) 85 | printstats(lltopdata, nodeinfo, joblist) 86 | 87 | printsummary(lltopdata) 88 | 89 | if __name__ == "__main__": 90 | main() 91 | 92 | -------------------------------------------------------------------------------- /dict.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dict.h" 6 | 7 | #define TRACE(args...) ((void) 0) 8 | 9 | #define DICT_HASH_DUMMY (((hash_t) 1) << (8 * sizeof(hash_t) - 1)) 10 | #define DICT_TABLE_LEN_MIN 8 11 | #define DICT_TABLE_LEN_MAX (((size_t) 1) << (8 * sizeof(size_t) - 1)) 12 | #define PERTURB_SHIFT 5 13 | 14 | /* Stolen from Python's stringobject.c. GPL. */ 15 | // static long 16 | // string_hash(PyStringObject *a) 17 | // { 18 | // register Py_ssize_t len; 19 | // register unsigned char *p; 20 | // register long x; 21 | // 22 | // if (a->ob_shash != -1) 23 | // return a->ob_shash; 24 | // len = Py_SIZE(a); 25 | // p = (unsigned char *) a->ob_sval; 26 | // x = *p << 7; 27 | // while (--len >= 0) 28 | // x = (1000003*x) ^ *p++; 29 | // x ^= Py_SIZE(a); 30 | // if (x == -1) 31 | // x = -2; 32 | // a->ob_shash = x; 33 | // return x; 34 | // } 35 | 36 | /* dict_strhash() will never return DICT_HASH_DUMMY. */ 37 | hash_t dict_strhash(const char *s) 38 | { 39 | const unsigned char *p = (const unsigned char *) s; 40 | hash_t x = *p << 7; 41 | 42 | for (; *p != 0; p++) 43 | x = (1000003 * x) ^ *p; 44 | 45 | x ^= p - (const unsigned char *) s; 46 | 47 | return x & ~DICT_HASH_DUMMY; 48 | } 49 | 50 | int dict_init(struct dict *dict, size_t count) 51 | { 52 | size_t table_len = DICT_TABLE_LEN_MIN; 53 | 54 | /* Need count < 2/3 of table_size. */ 55 | while (3 * count >= 2 * table_len && table_len < DICT_TABLE_LEN_MAX) 56 | table_len *= 2; 57 | 58 | memset(dict, 0, sizeof(struct dict)); 59 | dict->d_table = calloc(table_len, sizeof(struct dict_entry)); 60 | if (dict->d_table == NULL) 61 | return -1; 62 | 63 | dict->d_table_len = table_len; 64 | return 0; 65 | } 66 | 67 | void dict_destroy(struct dict *dict, void (*key_dtor)(void*)) 68 | { 69 | if (key_dtor != NULL) { 70 | size_t i; 71 | for (i = 0; i < dict->d_table_len; i++) 72 | if (dict->d_table[i].d_key != NULL) 73 | (*key_dtor)(dict->d_table[i].d_key); 74 | } 75 | free(dict->d_table); 76 | memset(dict, 0, sizeof(struct dict)); 77 | } 78 | 79 | /* new_table_len must be a power of two. */ 80 | static int dict_resize(struct dict *dict, size_t new_table_len) 81 | { 82 | TRACE("table_len %zu, load %zu, count %zu, new_table_len %zu\n", 83 | dict->d_table_len, dict->d_load, dict->d_count, new_table_len); 84 | 85 | struct dict_entry *table, *old_table; 86 | size_t mask, old_table_len; 87 | 88 | table = calloc(new_table_len, sizeof(struct dict_entry)); 89 | if (table == NULL) 90 | return -1; 91 | 92 | old_table = dict->d_table; 93 | old_table_len = dict->d_table_len; 94 | 95 | dict->d_table = table; 96 | dict->d_table_len = new_table_len; 97 | dict->d_load = dict->d_count; 98 | mask = dict->d_table_len - 1; 99 | 100 | size_t i, j; 101 | for (j = 0; j < old_table_len; j++) { 102 | hash_t hash = old_table[j].d_hash; 103 | char *key = old_table[j].d_key; 104 | 105 | /* Do we need to check hash here? */ 106 | if (key == NULL || (hash & DICT_HASH_DUMMY)) 107 | continue; 108 | 109 | size_t perturb = hash; 110 | i = hash & mask; 111 | 112 | while (table[i & mask].d_key != NULL) { 113 | i = (i << 2) + i + perturb + 1; 114 | perturb >>= PERTURB_SHIFT; 115 | } 116 | 117 | table[i & mask].d_hash = hash; 118 | table[i & mask].d_key = key; 119 | } 120 | 121 | free(old_table); 122 | 123 | return 0; 124 | } 125 | 126 | void dict_shrink(struct dict *dict, size_t hint) 127 | { 128 | /* TODO */ 129 | 130 | if (dict->d_count == 0 && dict->d_load > dict->d_table_len / 3) { 131 | memset(dict->d_table, 0, dict->d_table_len * sizeof(struct dict_entry)); 132 | dict->d_load = 0; 133 | } 134 | } 135 | 136 | struct dict_entry *dict_entry_ref(struct dict *dict, hash_t hash, const char *key) 137 | { 138 | size_t mask, i, perturb; 139 | struct dict_entry *table, *dummy, *ent; 140 | 141 | mask = dict->d_table_len - 1; 142 | table = dict->d_table; 143 | dummy = NULL; 144 | 145 | i = hash & mask; 146 | ent = &table[i]; 147 | 148 | /* TODO Check for ent->d_hash == hash first. */ 149 | if (ent->d_hash & DICT_HASH_DUMMY) 150 | dummy = ent; 151 | else if (ent->d_key == NULL) 152 | return ent; 153 | else if (ent->d_hash == hash && strcmp(ent->d_key, key) == 0) 154 | return ent; 155 | 156 | perturb = hash; 157 | while (1) { 158 | i = (i << 2) + i + perturb + 1; 159 | ent = &table[i & mask]; 160 | 161 | if (ent->d_hash & DICT_HASH_DUMMY) { 162 | if (dummy == NULL) 163 | dummy = ent; 164 | } else if (ent->d_key == NULL) { 165 | return (dummy != NULL) ? dummy : ent; 166 | } else if (ent->d_hash == hash && strcmp(ent->d_key, key) == 0) { 167 | return ent; 168 | } 169 | 170 | perturb >>= PERTURB_SHIFT; 171 | } 172 | } 173 | 174 | int dict_entry_set(struct dict *dict, struct dict_entry *ent, hash_t hash, char *key) 175 | { 176 | /* If we're overwriting an existing entry then we don't need to 177 | resize. */ 178 | if (ent->d_key != NULL) 179 | goto out_exist; 180 | 181 | /* Overwriting a dummy entry doesn't affect the load, so we don't 182 | need to resize. */ 183 | if (ent->d_hash & DICT_HASH_DUMMY) 184 | goto out_dummy; 185 | 186 | size_t new_load = dict->d_load + 1; 187 | if (3 * new_load >= 2 * dict->d_table_len) { 188 | size_t new_count = dict->d_count + 1; 189 | size_t new_table_len = dict->d_table_len; 190 | while (3 * new_count >= 2 * new_table_len && new_table_len < DICT_TABLE_LEN_MAX) 191 | new_table_len *= 2; 192 | 193 | if (new_count >= new_table_len) { 194 | TRACE("new_count %zu >= new_table_len %zu\n", new_count, new_table_len); 195 | errno = ENOMEM; 196 | return -1; 197 | } 198 | 199 | if (dict_resize(dict, new_table_len) < 0) 200 | return -1; 201 | 202 | /* Revalidate ent after resize. */ 203 | ent = dict_entry_ref(dict, hash, key); 204 | } 205 | 206 | dict->d_load++; 207 | out_dummy: 208 | dict->d_count++; 209 | out_exist: 210 | ent->d_hash = hash; 211 | ent->d_key = key; 212 | 213 | return 0; 214 | } 215 | 216 | char *dict_entry_remv(struct dict *dict, struct dict_entry *ent, int may_resize) 217 | { 218 | char *key = ent->d_key; 219 | if (key != NULL) { 220 | ent->d_hash = DICT_HASH_DUMMY; 221 | ent->d_key = NULL; 222 | dict->d_count--; 223 | if (may_resize) 224 | dict_shrink(dict, dict->d_count); 225 | } 226 | 227 | return key; 228 | } 229 | 230 | char *dict_remv(struct dict *dict, const char *key) 231 | { 232 | hash_t hash = dict_strhash(key); 233 | struct dict_entry *ent = dict_entry_ref(dict, hash, key); 234 | 235 | return dict_entry_remv(dict, ent, 1); 236 | } 237 | 238 | char *dict_ref(struct dict *dict, const char *key) 239 | { 240 | hash_t hash = dict_strhash(key); 241 | struct dict_entry *ent = dict_entry_ref(dict, hash, key); 242 | 243 | if (ent->d_hash & DICT_HASH_DUMMY) /* I don't think we need this. */ 244 | return NULL; 245 | 246 | return ent->d_key; 247 | } 248 | 249 | int dict_set(struct dict *dict, char *key) 250 | { 251 | hash_t hash = dict_strhash(key); 252 | struct dict_entry *ent = dict_entry_ref(dict, hash, key); 253 | 254 | if (ent->d_key != NULL) { 255 | TRACE("overwriting old key `%s', hash %zu, with new key `%s' hash %zu\n", 256 | ent->d_key, ent->d_hash, key, hash); 257 | ent->d_key = key; 258 | return 0; 259 | } 260 | 261 | if (dict_entry_set(dict, ent, hash, key) < 0) 262 | return -1; 263 | 264 | return 0; 265 | } 266 | 267 | struct dict_entry *dict_for_each_ref(struct dict *dict, size_t *i) 268 | { 269 | while (*i < dict->d_table_len) { 270 | struct dict_entry *ent = dict->d_table + (*i)++; 271 | if (ent->d_key != NULL) 272 | return ent; 273 | } 274 | 275 | return NULL; 276 | } 277 | 278 | char *dict_for_each(struct dict *dict, size_t *i) 279 | { 280 | struct dict_entry *ent = dict_for_each_ref(dict, i); 281 | if (ent != NULL) 282 | return ent->d_key; 283 | return NULL; 284 | } 285 | -------------------------------------------------------------------------------- /dict.h: -------------------------------------------------------------------------------- 1 | #ifndef _DICT_H_ 2 | #define _DICT_H_ 3 | #include 4 | 5 | typedef unsigned long hash_t; 6 | hash_t dict_strhash(const char *s); 7 | 8 | struct dict_entry { 9 | hash_t d_hash; 10 | char *d_key; 11 | }; 12 | 13 | struct dict { 14 | struct dict_entry *d_table; 15 | size_t d_table_len; 16 | size_t d_load; 17 | size_t d_count; 18 | }; 19 | 20 | #define DEFINE_DICT(d) \ 21 | struct dict d = { .d_table = NULL, } 22 | 23 | /* The count argument is only a hint. */ 24 | int dict_init(struct dict *dict, size_t hint); 25 | 26 | /* dict_destory() is valid for dicts defined by DEFINE_DICT() or 27 | initialized by dict_init(). It does not free entry keys. */ 28 | void dict_destroy(struct dict *dict, void (*key_dtor)(void*)); 29 | 30 | struct dict_entry *dict_entry_ref(struct dict *dict, hash_t hash, const char *key); 31 | int dict_entry_set(struct dict *dict, struct dict_entry *ent, hash_t hash, char *key); 32 | char *dict_entry_remv(struct dict *dict, struct dict_entry *ent, int may_resize); 33 | static inline void dict_allow_resize(struct dict *dict, size_t hint) 34 | { 35 | /* TODO, maybe. */ 36 | } 37 | 38 | char *dict_ref(struct dict *dict, const char *key); 39 | int dict_set(struct dict *dict, char *key); 40 | char *dict_remv(struct dict *dict, const char *key); 41 | 42 | /* Returns only non-NULL keys. */ 43 | char *dict_for_each(struct dict *dict, size_t *i); 44 | struct dict_entry *dict_for_each_ref(struct dict *dict, size_t *i); 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /hooks.c: -------------------------------------------------------------------------------- 1 | /* lltop hooks.c 2 | * Copyright 2010 by John L. Hammond 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License as 6 | * published by the Free Software Foundation; either version 2 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 | * 02110-1301 USA. 18 | */ 19 | #define _GNU_SOURCE 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "lltop.h" 32 | #include "hooks.h" 33 | 34 | int lltop_intvl = DEFAULT_LLTOP_INTVL; 35 | const char *lltop_ssh_path = "/usr/bin/ssh"; 36 | const char *lltop_serv_path = "lltop-serv"; 37 | int (*lltop_get_host)(const char *addr, char *host, size_t host_size); 38 | int (*lltop_get_job)(const char *host, char *job, size_t job_size); 39 | int (*lltop_job_map)(void); 40 | 41 | static int serv_list_from_args = 0; 42 | static int get_serv_list(const char *fs_name, char ***serv_list, int *serv_count); 43 | 44 | static const char *get_host_path = NULL; 45 | static int external_get_host(const char *addr, char *host, size_t host_size); 46 | 47 | static int getnameinfo_use_fqdn = 0; 48 | static int getnameinfo_get_host(const char *addr, char *host, size_t host_size); 49 | 50 | static const char *get_job_path = NULL; 51 | static int external_get_job(const char *host, char *job, size_t job_size); 52 | 53 | static const char *execd_spool_path = "/share/sge6.2/execd_spool"; 54 | static int execd_spool_get_job(const char *host, char *job, size_t job_size); 55 | 56 | static const char *job_map_cmd = NULL; 57 | static int external_job_map(void); 58 | 59 | static int print_header = 1; 60 | static int print_limit = INT_MAX; 61 | 62 | static int usage(void) 63 | { 64 | fprintf(stderr, 65 | "Usage: lltop [OPTION]... FILESYSTEM\n" 66 | " or: lltop [OPTION]... -l SERVER...\n" 67 | "Report load by job for Lustre FILESYSTEM or SERVER(s).\n" 68 | "\n" 69 | "Mandatory arguments to long options are mandatory for short options too.\n" 70 | " -f, --fqdn use fully qualified domain names for clients\n" 71 | " -g, --get-host=COMMAND use COMMAND for reverse DNS lookups\n" 72 | " -h, --help display this help and exit\n" 73 | " -i, --interval=NUMBER report load over NUMBER seconds\n" 74 | " -j, --get-job=COMMAND use COMMAND for job lookup\n" 75 | " -l, --server-list report load on servers given as arguments\n" 76 | " -m, --job-map=COMMAND use COMMAND to get job map\n" 77 | " -n, --limit=NUMBER limit output to NUMBER jobs\n" 78 | " --no-header do not display header\n" 79 | " --lltop-serv=PATH use lltop-serv at PATH on servers\n" 80 | " --remote-shell=PATH use remote shell at PATH to execute lltop-serv\n" 81 | " --execd-spool=PATH use execd_spool directory PATH for job lookup\n" 82 | "\n" 83 | /* TODO Describe function, document default argument values. */ 84 | /* TODO "Report lltop bugs to ...\n" */ 85 | "lltop GitHub repository: \n"); 86 | exit(1); 87 | } 88 | 89 | int lltop_config(int argc, char *argv[], char ***serv_list, int *serv_count) 90 | { 91 | struct option opts[] = { 92 | { "fqdn", 0, 0, 'f' }, /* Set getnameinfo_use_fqdn. */ 93 | { "get-host", 1, 0, 'g' }, /* get_host_path */ 94 | { "help", 0, 0, 'h' }, /* Call usage(). */ 95 | { "interval", 1, 0, 'i' }, /* lltop_intvl */ 96 | { "get-job", 1, 0, 'j' }, /* get_job_path */ 97 | { "server-list", 0, 0, 'l' }, /* Set serv_list_from_args. */ 98 | { "job-map", 1, 0, 'm' }, /* job_map_cmd */ 99 | { "limit", 1, 0, 'n' }, /* print_limit */ 100 | { "no-header", 0, &print_header, 0 }, /* Unset print_header. */ 101 | { "lltop-serv", 1, 0, 256 }, /* lltop_serv_path */ 102 | { "remote-shell", 1, 0, 257 }, /* lltop_ssh_path */ 103 | { "execd-spool", 1, 0, 258 }, 104 | { 0, 0, 0, 0, }, 105 | }; 106 | 107 | int c; 108 | while ((c = getopt_long(argc, argv, "fg:hi:j:lm:n:", opts, 0)) != -1) { 109 | switch (c) { 110 | case 'f': 111 | getnameinfo_use_fqdn = 1; 112 | break; 113 | case 'g': 114 | get_host_path = optarg; 115 | lltop_get_host = &external_get_host; 116 | break; 117 | case 'h': 118 | usage(); 119 | break; 120 | case 'i': 121 | lltop_intvl = atoi(optarg); 122 | if (lltop_intvl <= 0) 123 | FATAL("invalid sleep interval \"%s\"\n", optarg); 124 | break; 125 | case 'j': 126 | get_job_path = optarg; 127 | lltop_get_job = &external_get_job; 128 | break; 129 | case 'l': 130 | serv_list_from_args = 1; 131 | break; 132 | case 'm': 133 | job_map_cmd = optarg; 134 | lltop_job_map = &external_job_map; 135 | break; 136 | case 'n': 137 | print_limit = atoi(optarg); 138 | break; 139 | case 256: 140 | lltop_serv_path = optarg; 141 | break; 142 | case 257: 143 | lltop_ssh_path = optarg; 144 | break; 145 | case 258: 146 | execd_spool_path = optarg; 147 | lltop_get_job = &execd_spool_get_job; 148 | break; 149 | case '?': 150 | fprintf(stderr, "Try `lltop --help' for more information.\n"); 151 | exit(1); 152 | } 153 | } 154 | 155 | if (optind >= argc) 156 | FATAL("missing filesystem or server list argument(s)\n" 157 | "Try `lltop --help' for more information.\n"); 158 | 159 | if (serv_list_from_args) { 160 | *serv_list = argv + optind; 161 | *serv_count = argc - optind; 162 | } else if (get_serv_list(argv[optind], serv_list, serv_count) < 0) { 163 | FATAL("cannot get server list for %s: %m\n", argv[optind]); 164 | } 165 | 166 | /* BLECH. */ 167 | if (lltop_get_host == NULL) 168 | lltop_get_host = &getnameinfo_get_host; 169 | 170 | /* BLECH. */ 171 | if (lltop_get_job == NULL && lltop_job_map == NULL) 172 | lltop_get_job = &execd_spool_get_job; 173 | 174 | return 0; 175 | } 176 | 177 | static int get_serv_list(const char *fs_name, char ***serv_list, int *serv_count) 178 | { 179 | /* Get the server list for filesystem named fs_name and store in 180 | * *serv_list, assigning its length in *serv_count. 181 | * 182 | * Order does not matter, but make sure that the list returned is 183 | * free of duplicates. Also, note that these will be passed to ssh 184 | * as the host argument, so it's OK to return IP addresses. If you 185 | * need to specify a user name to ssh then you should prepend 186 | * '@' to each server name. 187 | * 188 | * Here we just switch on fs_name to get a hard coded range of 189 | * servers. For eaxmple, scratch is mds3, mds4, oss23,..., oss72. 190 | * Pretty gross, huh? */ 191 | 192 | int scratch_r[] = { 3, 3, 23, 72, }; 193 | int share_r[] = { 1, 1, 1, 6, }; 194 | int work_r[] = { 5, 5, 7, 20, }; 195 | 196 | /* Allow user to specify '/scratch' or 'scratch'. */ 197 | if (fs_name[0] == '/') 198 | fs_name++; 199 | 200 | int *fs_r; 201 | if (strcmp(fs_name, "scratch") == 0) 202 | fs_r = scratch_r; 203 | else if (strcmp(fs_name, "share") == 0) 204 | fs_r = share_r; 205 | else if (strcmp(fs_name, "work") == 0) 206 | fs_r = work_r; 207 | else 208 | FATAL("%s: unknown filesystem \"%s\"\n", __func__, fs_name); 209 | 210 | *serv_count = 2 - fs_r[0] + fs_r[1] - fs_r[2] + fs_r[3]; 211 | *serv_list = alloc(*serv_count * sizeof(char*)); 212 | 213 | int i; 214 | char **s = *serv_list; 215 | for (i = fs_r[0]; i <= fs_r[1]; i++) 216 | asprintf(s++, "mds%d", i); 217 | 218 | for (i = fs_r[2]; i <= fs_r[3]; i++) 219 | asprintf(s++, "oss%d", i); 220 | 221 | return 0; 222 | } 223 | 224 | void lltop_free_serv_list(char **serv_list, int serv_count) 225 | { 226 | /* Clean up the server list gotten by the last function. Utterly 227 | * pointless since we'll be exiting soon anyway. */ 228 | if (serv_list_from_args) 229 | return; 230 | 231 | int i; 232 | for (i = 0; i < serv_count; i++) 233 | free(serv_list[i]); 234 | 235 | free(serv_list); 236 | } 237 | 238 | void lltop_print_header(FILE *file) 239 | { 240 | /* Called once before lltop_print_name_stats(). This is your chance 241 | * to make a pretty header for your data. Try to keep field widths 242 | * consistent between header and stats. */ 243 | if (print_header) 244 | fprintf(file, "%-16s %8s %8s %8s\n", "JOBID", "WR_MB", "RD_MB", "REQS"); 245 | } 246 | 247 | void lltop_print_name_stats(FILE *file, const char *name, long wr_B, long rd_B, long reqs) 248 | { 249 | /* Called for each job to be output by lltop. Note we convert bytes 250 | * to MB, and that we don't print if all values would be zero. */ 251 | 252 | if (print_limit <= 0) 253 | return; 254 | 255 | long wr_MB = wr_B >> 20, rd_MB = rd_B >> 20; 256 | 257 | if (wr_MB != 0 || rd_MB != 0 || reqs != 0) { 258 | fprintf(file, "%-16s %8lu %8lu %8lu\n", name, wr_MB, rd_MB, reqs); 259 | print_limit--; 260 | } 261 | } 262 | 263 | static int command(const char *path, const char *arg, char *buf, size_t buf_size) 264 | { 265 | /* Helper to do basic command substitution. */ 266 | 267 | int fscanf_rc = -1, pclose_rc = -1; 268 | char *cmd = NULL; 269 | FILE *pipe = NULL; 270 | 271 | asprintf(&cmd, "%s %s", path, arg); 272 | pipe = popen(cmd, "r"); 273 | if (pipe == NULL) { 274 | ERROR("cannot execute '%s': %m\n", cmd); 275 | goto out; 276 | } 277 | 278 | /* Stupid fscanf() does not have direct support for varaible maximum 279 | field width specifications. So we use snprintf() to create a 280 | format string with the right field width encoded. For example, 281 | if job_size is 1234, then fscanf_fmt should be "%1234s". 282 | This is stupid, should just use getline(). 283 | XXX -1 */ 284 | char fscanf_fmt[3 + 3 * sizeof(size_t)]; 285 | snprintf(fscanf_fmt, sizeof(fscanf_fmt), "%%%zus", buf_size - 1); 286 | 287 | /* CHECKME What is fscanf_rc when output is all whitespace? */ 288 | fscanf_rc = fscanf(pipe, fscanf_fmt, buf); 289 | if (fscanf_rc != 1) 290 | buf[0] = '\0'; 291 | 292 | if ((pclose_rc = pclose(pipe)) < 0) 293 | ERROR("cannot obtain termination status of %s: %m\n", cmd); 294 | 295 | out: 296 | free(cmd); 297 | return fscanf_rc == 1 && pclose_rc == 0 ? 0 : -1; 298 | } 299 | 300 | static int external_get_host(const char *addr, char *host, size_t host_size) 301 | { 302 | return command(get_host_path, addr, host, host_size); 303 | } 304 | 305 | static int getnameinfo_get_host(const char *addr, char *host, size_t host_size) 306 | { 307 | /* Find hostname for addr (a dotted-quad string) and store in buffer 308 | * host of size host_size. Return 0 if host was written, -1 309 | * otherwise. Note that host_size is the size of the buffer so you 310 | * can safely do snprintf(host, host_size, "%s", very_long_str). */ 311 | 312 | if (host_size < NI_MAXHOST) 313 | ERROR("%s: warning host_size %zu is less than NI_MAXHOST %zu\n", 314 | __func__, host_size, (size_t) NI_MAXHOST); 315 | 316 | struct sockaddr_in sin = { .sin_family = AF_INET, }; 317 | if (inet_pton(AF_INET, addr, &sin.sin_addr) < 1) { 318 | ERROR("%s: invalid IPv4 address \"%s\"\n", __func__, addr); 319 | return -1; 320 | } 321 | 322 | int ni_rc = getnameinfo((const struct sockaddr*) &sin, sizeof(sin), 323 | host, host_size, NULL, 0, NI_NAMEREQD); 324 | if (ni_rc != 0) { 325 | if (ni_rc != EAI_NONAME) 326 | ERROR("%s: cannot get name info for address \"%s\": %s\n", __func__, 327 | addr, gai_strerror(ni_rc)); 328 | return -1; 329 | } 330 | 331 | /* Setting NI_NOFQDN for the same effect doesn't seem to work here. 332 | * Maybe a problem with site config. We shouldn't have to worry 333 | * about truncating numerical addresses since we set NI_NAMEREQD. */ 334 | if (!getnameinfo_use_fqdn) 335 | chop(host, '.'); 336 | 337 | return 0; 338 | } 339 | 340 | static int external_get_job(const char *host, char *job, size_t job_size) 341 | { 342 | return command(get_job_path, host, job, job_size); 343 | } 344 | 345 | static int execd_spool_get_job(const char *host, char *job, size_t job_size) 346 | { 347 | /* Find jobname for host and store in buffer job of size job_size. 348 | * Return 0 if job was written, -1 otherwise. Note that job_size is 349 | * the size of the buffer so you can safely do snprintf(job, 350 | * job_size, "%s", very_long_str). 351 | * 352 | * This works for the current TACC Ranger SGE setup. A running job 353 | * with jobid on host is associated with a directory 354 | * /share/sge6.2/execd_spool//active_jobs/.. */ 355 | 356 | int rc = -1; 357 | char *jobs_dir_path = NULL; 358 | asprintf(&jobs_dir_path, "%s/%s/active_jobs", execd_spool_path, host); 359 | 360 | DIR *jobs_dir = opendir(jobs_dir_path); 361 | if (jobs_dir == NULL) { 362 | /* Cannot find an active_jobs directory for host. This need not 363 | be an error. */ 364 | if (errno != ENOENT) 365 | ERROR("%s: cannot open %s: %m\n", __func__, jobs_dir_path); 366 | 367 | static int access_checked; 368 | if (!access_checked && access(execd_spool_path, R_OK|X_OK) < 0) 369 | ERROR("%s: cannot access %s: %m\n", __func__, execd_spool_path); 370 | access_checked = 1; 371 | goto out; 372 | } 373 | 374 | struct dirent *ent; 375 | while ((ent = readdir(jobs_dir))) { 376 | if (ent->d_type == DT_DIR && ent->d_name[0] != '.') { 377 | /* Chop off '. suffix. OK to modify d_name, right? */ 378 | snprintf(job, job_size, "%s", chop(ent->d_name, '.')); 379 | rc = 0; 380 | break; 381 | } 382 | } 383 | 384 | out: 385 | if (jobs_dir != NULL) 386 | closedir(jobs_dir); 387 | free(jobs_dir_path); 388 | 389 | return rc; 390 | } 391 | 392 | static int external_job_map(void) 393 | { 394 | int pclose_rc = -1; 395 | FILE *pipe = NULL; 396 | 397 | pipe = popen(job_map_cmd, "r"); 398 | if (pipe == NULL) { 399 | ERROR("cannot execute '%s': %m\n", job_map_cmd); 400 | return -1; 401 | } 402 | 403 | char host[MAXNAME + 1]; 404 | char job[MAXNAME + 1]; 405 | /* XXX MAXNAME */ 406 | 407 | char *line = NULL; 408 | size_t line_size = 0; 409 | 410 | while (getline(&line, &line_size, pipe) >= 0) { 411 | if (sscanf(line, "%1024s %1024s\n", host, job) != 2) { 412 | ERROR("invalid line \"%s\"\n", chop(line, '\n')); 413 | continue; 414 | } 415 | 416 | lltop_set_job(host, job); 417 | } 418 | free(line); 419 | 420 | if ((pclose_rc = pclose(pipe)) < 0) 421 | ERROR("cannot obtain termination status of %s: %m\n", job_map_cmd); 422 | 423 | /* XXX We may be returning -1 with errno unset. */ 424 | return pclose_rc == 0 ? 0 : -1; 425 | } 426 | -------------------------------------------------------------------------------- /hooks.h: -------------------------------------------------------------------------------- 1 | #ifndef _HOOKS_H_ 2 | #define _HOOKS_H_ 3 | #include 4 | 5 | extern int lltop_intvl; 6 | extern const char *lltop_ssh_path; 7 | extern const char *lltop_serv_path; 8 | extern int (*lltop_get_host)(const char *addr, char *host, size_t host_size); 9 | extern int (*lltop_get_job)(const char *host, char *job, size_t job_size); 10 | extern int (*lltop_job_map)(void); 11 | 12 | void lltop_set_job(const char *host, const char *job); 13 | int lltop_config(int argc, char *argv[], char ***serv_list, int *serv_count); 14 | void lltop_free_serv_list(char **serv_list, int serv_count); 15 | void lltop_print_header(FILE *file); 16 | void lltop_print_name_stats(FILE *file, const char *name, long wr_B, long rd_B, long reqs); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /job-map.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define ERROR(fmt,arg...) \ 13 | fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##arg) 14 | 15 | #define FATAL(fmt,arg...) do { \ 16 | ERROR(fmt,##arg); \ 17 | exit(1); \ 18 | } while (0) 19 | 20 | #define JOB_NONE "0" 21 | 22 | struct linux_dirent64 { 23 | uint64_t d_ino; 24 | int64_t d_off; 25 | unsigned short d_reclen; 26 | unsigned char d_type; 27 | char d_name[0]; 28 | }; 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | const char *execd_spool_path = "/share/sge6.2/execd_spool"; 33 | /* const char *out_path = NULL; */ 34 | int sleep_interval = 0; 35 | /* int rename = 0; */ 36 | FILE *out_file = stdout; 37 | 38 | if (chdir(execd_spool_path) < 0) 39 | FATAL("cannot change to `%s': %m\n", execd_spool_path); 40 | 41 | DIR *execd_spool_dir = opendir("."); 42 | if (execd_spool_dir == NULL) 43 | FATAL("cannot open `%s': %m\n", execd_spool_path); 44 | 45 | while (1) { 46 | struct dirent *host_de; 47 | while ((host_de = readdir(execd_spool_dir)) != NULL) { 48 | if (host_de->d_name[0] == '.') 49 | continue; 50 | 51 | const char *host = host_de->d_name; 52 | char active_jobs_path[HOST_NAME_MAX + 1 + 20]; 53 | snprintf(active_jobs_path, sizeof(active_jobs_path), "%s/active_jobs", host); 54 | 55 | int active_jobs_fd = open(active_jobs_path, O_RDONLY|O_DIRECTORY); 56 | if (active_jobs_fd < 0) { 57 | if (errno != ENOENT) 58 | ERROR("cannot open `%s/%s': %m\n", execd_spool_path, active_jobs_path); 59 | continue; 60 | } 61 | 62 | char de_buf[1024]; 63 | int de_n = syscall(SYS_getdents64, active_jobs_fd, de_buf, sizeof(de_buf)); 64 | if (de_n < 0) { 65 | if (errno != ENOENT) 66 | ERROR("cannot read `%s/%s': %m\n", execd_spool_path, active_jobs_path); 67 | goto next; 68 | } 69 | 70 | const char *job_name = JOB_NONE; 71 | char *de_pos = de_buf, *de_end = de_buf + de_n; 72 | while (de_pos < de_end) { 73 | struct linux_dirent64 *job_de = (struct linux_dirent64 *) de_pos; 74 | if (job_de->d_name[0] != '.') { 75 | job_name = job_de->d_name; 76 | break; 77 | } 78 | de_pos += job_de->d_reclen; 79 | } 80 | 81 | fprintf(out_file, "%s %s\n", host, job_name); 82 | next: 83 | close(active_jobs_fd); 84 | } 85 | 86 | fflush(out_file); 87 | 88 | if (sleep_interval <= 0) 89 | break; 90 | 91 | rewinddir(execd_spool_dir); 92 | } 93 | 94 | closedir(execd_spool_dir); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /list.h: -------------------------------------------------------------------------------- 1 | #ifndef _LINUX_LIST_H 2 | #define _LINUX_LIST_H 3 | /* 4 | * Simple doubly linked list implementation. 5 | * 6 | * Some of the internal functions ("__xxx") are useful when 7 | * manipulating whole lists rather than single entries, as 8 | * sometimes we already know the next/prev entries and we can 9 | * generate better code by using them directly rather than 10 | * using the generic single-entry routines. 11 | */ 12 | 13 | #ifndef container_of 14 | #define container_of(ptr, type, member) ({ \ 15 | const typeof(((type *) 0)->member ) *__mptr = (ptr); \ 16 | (type *) ((char *) __mptr - offsetof(type,member)); \ 17 | }) 18 | #endif 19 | 20 | #define prefetch(x) ((void) (x)) 21 | #define LIST_POISON1 0 22 | #define LIST_POISON2 0 23 | 24 | struct list_head { 25 | struct list_head *next, *prev; 26 | }; 27 | 28 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 29 | 30 | #define LIST_HEAD(name) \ 31 | struct list_head name = LIST_HEAD_INIT(name) 32 | 33 | static inline void INIT_LIST_HEAD(struct list_head *list) 34 | { 35 | list->next = list; 36 | list->prev = list; 37 | } 38 | 39 | /* 40 | * Insert a new entry between two known consecutive entries. 41 | * 42 | * This is only for internal list manipulation where we know 43 | * the prev/next entries already! 44 | */ 45 | static inline void __list_add(struct list_head *new, 46 | struct list_head *prev, 47 | struct list_head *next) 48 | { 49 | next->prev = new; 50 | new->next = next; 51 | new->prev = prev; 52 | prev->next = new; 53 | } 54 | 55 | /** 56 | * list_add - add a new entry 57 | * @new: new entry to be added 58 | * @head: list head to add it after 59 | * 60 | * Insert a new entry after the specified head. 61 | * This is good for implementing stacks. 62 | */ 63 | static inline void list_add(struct list_head *new, struct list_head *head) 64 | { 65 | __list_add(new, head, head->next); 66 | } 67 | 68 | /** 69 | * list_add_tail - add a new entry 70 | * @new: new entry to be added 71 | * @head: list head to add it before 72 | * 73 | * Insert a new entry before the specified head. 74 | * This is useful for implementing queues. 75 | */ 76 | static inline void list_add_tail(struct list_head *new, struct list_head *head) 77 | { 78 | __list_add(new, head->prev, head); 79 | } 80 | 81 | /* 82 | * Insert a new entry between two known consecutive entries. 83 | * 84 | * This is only for internal list manipulation where we know 85 | * the prev/next entries already! 86 | */ 87 | static inline void __list_add_rcu(struct list_head * new, 88 | struct list_head * prev, struct list_head * next) 89 | { 90 | new->next = next; 91 | new->prev = prev; 92 | next->prev = new; 93 | prev->next = new; 94 | } 95 | 96 | /** 97 | * list_add_rcu - add a new entry to rcu-protected list 98 | * @new: new entry to be added 99 | * @head: list head to add it after 100 | * 101 | * Insert a new entry after the specified head. 102 | * This is good for implementing stacks. 103 | * 104 | * The caller must take whatever precautions are necessary 105 | * (such as holding appropriate locks) to avoid racing 106 | * with another list-mutation primitive, such as list_add_rcu() 107 | * or list_del_rcu(), running on this same list. 108 | * However, it is perfectly legal to run concurrently with 109 | * the _rcu list-traversal primitives, such as 110 | * list_for_each_entry_rcu(). 111 | */ 112 | static inline void list_add_rcu(struct list_head *new, struct list_head *head) 113 | { 114 | __list_add_rcu(new, head, head->next); 115 | } 116 | 117 | /** 118 | * list_add_tail_rcu - add a new entry to rcu-protected list 119 | * @new: new entry to be added 120 | * @head: list head to add it before 121 | * 122 | * Insert a new entry before the specified head. 123 | * This is useful for implementing queues. 124 | * 125 | * The caller must take whatever precautions are necessary 126 | * (such as holding appropriate locks) to avoid racing 127 | * with another list-mutation primitive, such as list_add_tail_rcu() 128 | * or list_del_rcu(), running on this same list. 129 | * However, it is perfectly legal to run concurrently with 130 | * the _rcu list-traversal primitives, such as 131 | * list_for_each_entry_rcu(). 132 | */ 133 | static inline void list_add_tail_rcu(struct list_head *new, 134 | struct list_head *head) 135 | { 136 | __list_add_rcu(new, head->prev, head); 137 | } 138 | 139 | /* 140 | * Delete a list entry by making the prev/next entries 141 | * point to each other. 142 | * 143 | * This is only for internal list manipulation where we know 144 | * the prev/next entries already! 145 | */ 146 | static inline void __list_del(struct list_head * prev, struct list_head * next) 147 | { 148 | next->prev = prev; 149 | prev->next = next; 150 | } 151 | 152 | /** 153 | * list_del - deletes entry from list. 154 | * @entry: the element to delete from the list. 155 | * Note: list_empty on entry does not return true after this, the entry is 156 | * in an undefined state. 157 | */ 158 | static inline void list_del(struct list_head *entry) 159 | { 160 | __list_del(entry->prev, entry->next); 161 | entry->next = LIST_POISON1; 162 | entry->prev = LIST_POISON2; 163 | } 164 | 165 | /** 166 | * list_del_rcu - deletes entry from list without re-initialization 167 | * @entry: the element to delete from the list. 168 | * 169 | * Note: list_empty on entry does not return true after this, 170 | * the entry is in an undefined state. It is useful for RCU based 171 | * lockfree traversal. 172 | * 173 | * In particular, it means that we can not poison the forward 174 | * pointers that may still be used for walking the list. 175 | * 176 | * The caller must take whatever precautions are necessary 177 | * (such as holding appropriate locks) to avoid racing 178 | * with another list-mutation primitive, such as list_del_rcu() 179 | * or list_add_rcu(), running on this same list. 180 | * However, it is perfectly legal to run concurrently with 181 | * the _rcu list-traversal primitives, such as 182 | * list_for_each_entry_rcu(). 183 | * 184 | * Note that the caller is not permitted to immediately free 185 | * the newly deleted entry. Instead, either synchronize_rcu() 186 | * or call_rcu() must be used to defer freeing until an RCU 187 | * grace period has elapsed. 188 | */ 189 | static inline void list_del_rcu(struct list_head *entry) 190 | { 191 | __list_del(entry->prev, entry->next); 192 | entry->prev = LIST_POISON2; 193 | } 194 | 195 | /** 196 | * list_replace - replace old entry by new one 197 | * @old : the element to be replaced 198 | * @new : the new element to insert 199 | * Note: if 'old' was empty, it will be overwritten. 200 | */ 201 | static inline void list_replace(struct list_head *old, 202 | struct list_head *new) 203 | { 204 | new->next = old->next; 205 | new->next->prev = new; 206 | new->prev = old->prev; 207 | new->prev->next = new; 208 | } 209 | 210 | static inline void list_replace_init(struct list_head *old, 211 | struct list_head *new) 212 | { 213 | list_replace(old, new); 214 | INIT_LIST_HEAD(old); 215 | } 216 | 217 | /* 218 | * list_replace_rcu - replace old entry by new one 219 | * @old : the element to be replaced 220 | * @new : the new element to insert 221 | * 222 | * The old entry will be replaced with the new entry atomically. 223 | * Note: 'old' should not be empty. 224 | */ 225 | static inline void list_replace_rcu(struct list_head *old, 226 | struct list_head *new) 227 | { 228 | new->next = old->next; 229 | new->prev = old->prev; 230 | new->next->prev = new; 231 | new->prev->next = new; 232 | old->prev = LIST_POISON2; 233 | } 234 | 235 | /** 236 | * list_del_init - deletes entry from list and reinitialize it. 237 | * @entry: the element to delete from the list. 238 | */ 239 | static inline void list_del_init(struct list_head *entry) 240 | { 241 | __list_del(entry->prev, entry->next); 242 | INIT_LIST_HEAD(entry); 243 | } 244 | 245 | /** 246 | * list_move - delete from one list and add as another's head 247 | * @list: the entry to move 248 | * @head: the head that will precede our entry 249 | */ 250 | static inline void list_move(struct list_head *list, struct list_head *head) 251 | { 252 | __list_del(list->prev, list->next); 253 | list_add(list, head); 254 | } 255 | 256 | /** 257 | * list_move_tail - delete from one list and add as another's tail 258 | * @list: the entry to move 259 | * @head: the head that will follow our entry 260 | */ 261 | static inline void list_move_tail(struct list_head *list, 262 | struct list_head *head) 263 | { 264 | __list_del(list->prev, list->next); 265 | list_add_tail(list, head); 266 | } 267 | 268 | /** 269 | * list_is_last - tests whether @list is the last entry in list @head 270 | * @list: the entry to test 271 | * @head: the head of the list 272 | */ 273 | static inline int list_is_last(const struct list_head *list, 274 | const struct list_head *head) 275 | { 276 | return list->next == head; 277 | } 278 | 279 | /** 280 | * list_empty - tests whether a list is empty 281 | * @head: the list to test. 282 | */ 283 | static inline int list_empty(const struct list_head *head) 284 | { 285 | return head->next == head; 286 | } 287 | 288 | /** 289 | * list_empty_careful - tests whether a list is empty and not being modified 290 | * @head: the list to test 291 | * 292 | * Description: 293 | * tests whether a list is empty _and_ checks that no other CPU might be 294 | * in the process of modifying either member (next or prev) 295 | * 296 | * NOTE: using list_empty_careful() without synchronization 297 | * can only be safe if the only activity that can happen 298 | * to the list entry is list_del_init(). Eg. it cannot be used 299 | * if another CPU could re-list_add() it. 300 | */ 301 | static inline int list_empty_careful(const struct list_head *head) 302 | { 303 | struct list_head *next = head->next; 304 | return (next == head) && (next == head->prev); 305 | } 306 | 307 | static inline void __list_splice(struct list_head *list, 308 | struct list_head *head) 309 | { 310 | struct list_head *first = list->next; 311 | struct list_head *last = list->prev; 312 | struct list_head *at = head->next; 313 | 314 | first->prev = head; 315 | head->next = first; 316 | 317 | last->next = at; 318 | at->prev = last; 319 | } 320 | 321 | /** 322 | * list_splice - join two lists 323 | * @list: the new list to add. 324 | * @head: the place to add it in the first list. 325 | */ 326 | static inline void list_splice(struct list_head *list, struct list_head *head) 327 | { 328 | if (!list_empty(list)) 329 | __list_splice(list, head); 330 | } 331 | 332 | /** 333 | * list_splice_init - join two lists and reinitialise the emptied list. 334 | * @list: the new list to add. 335 | * @head: the place to add it in the first list. 336 | * 337 | * The list at @list is reinitialised 338 | */ 339 | static inline void list_splice_init(struct list_head *list, 340 | struct list_head *head) 341 | { 342 | if (!list_empty(list)) { 343 | __list_splice(list, head); 344 | INIT_LIST_HEAD(list); 345 | } 346 | } 347 | 348 | /** 349 | * list_entry - get the struct for this entry 350 | * @ptr: the &struct list_head pointer. 351 | * @type: the type of the struct this is embedded in. 352 | * @member: the name of the list_struct within the struct. 353 | */ 354 | #define list_entry(ptr, type, member) \ 355 | container_of(ptr, type, member) 356 | 357 | /** 358 | * list_for_each - iterate over a list 359 | * @pos: the &struct list_head to use as a loop cursor. 360 | * @head: the head for your list. 361 | */ 362 | #define list_for_each(pos, head) \ 363 | for (pos = (head)->next; prefetch(pos->next), pos != (head); \ 364 | pos = pos->next) 365 | 366 | /** 367 | * __list_for_each - iterate over a list 368 | * @pos: the &struct list_head to use as a loop cursor. 369 | * @head: the head for your list. 370 | * 371 | * This variant differs from list_for_each() in that it's the 372 | * simplest possible list iteration code, no prefetching is done. 373 | * Use this for code that knows the list to be very short (empty 374 | * or 1 entry) most of the time. 375 | */ 376 | #define __list_for_each(pos, head) \ 377 | for (pos = (head)->next; pos != (head); pos = pos->next) 378 | 379 | /** 380 | * list_for_each_prev - iterate over a list backwards 381 | * @pos: the &struct list_head to use as a loop cursor. 382 | * @head: the head for your list. 383 | */ 384 | #define list_for_each_prev(pos, head) \ 385 | for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \ 386 | pos = pos->prev) 387 | 388 | /** 389 | * list_for_each_safe - iterate over a list safe against removal of list entry 390 | * @pos: the &struct list_head to use as a loop cursor. 391 | * @n: another &struct list_head to use as temporary storage 392 | * @head: the head for your list. 393 | */ 394 | #define list_for_each_safe(pos, n, head) \ 395 | for (pos = (head)->next, n = pos->next; pos != (head); \ 396 | pos = n, n = pos->next) 397 | 398 | /** 399 | * list_for_each_entry - iterate over list of given type 400 | * @pos: the type * to use as a loop cursor. 401 | * @head: the head for your list. 402 | * @member: the name of the list_struct within the struct. 403 | */ 404 | #define list_for_each_entry(pos, head, member) \ 405 | for (pos = list_entry((head)->next, typeof(*pos), member); \ 406 | prefetch(pos->member.next), &pos->member != (head); \ 407 | pos = list_entry(pos->member.next, typeof(*pos), member)) 408 | 409 | /** 410 | * list_for_each_entry_reverse - iterate backwards over list of given type. 411 | * @pos: the type * to use as a loop cursor. 412 | * @head: the head for your list. 413 | * @member: the name of the list_struct within the struct. 414 | */ 415 | #define list_for_each_entry_reverse(pos, head, member) \ 416 | for (pos = list_entry((head)->prev, typeof(*pos), member); \ 417 | prefetch(pos->member.prev), &pos->member != (head); \ 418 | pos = list_entry(pos->member.prev, typeof(*pos), member)) 419 | 420 | /** 421 | * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue 422 | * @pos: the type * to use as a start point 423 | * @head: the head of the list 424 | * @member: the name of the list_struct within the struct. 425 | * 426 | * Prepares a pos entry for use as a start point in list_for_each_entry_continue. 427 | */ 428 | #define list_prepare_entry(pos, head, member) \ 429 | ((pos) ? : list_entry(head, typeof(*pos), member)) 430 | 431 | /** 432 | * list_for_each_entry_continue - continue iteration over list of given type 433 | * @pos: the type * to use as a loop cursor. 434 | * @head: the head for your list. 435 | * @member: the name of the list_struct within the struct. 436 | * 437 | * Continue to iterate over list of given type, continuing after 438 | * the current position. 439 | */ 440 | #define list_for_each_entry_continue(pos, head, member) \ 441 | for (pos = list_entry(pos->member.next, typeof(*pos), member); \ 442 | prefetch(pos->member.next), &pos->member != (head); \ 443 | pos = list_entry(pos->member.next, typeof(*pos), member)) 444 | 445 | /** 446 | * list_for_each_entry_from - iterate over list of given type from the current point 447 | * @pos: the type * to use as a loop cursor. 448 | * @head: the head for your list. 449 | * @member: the name of the list_struct within the struct. 450 | * 451 | * Iterate over list of given type, continuing from current position. 452 | */ 453 | #define list_for_each_entry_from(pos, head, member) \ 454 | for (; prefetch(pos->member.next), &pos->member != (head); \ 455 | pos = list_entry(pos->member.next, typeof(*pos), member)) 456 | 457 | /** 458 | * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry 459 | * @pos: the type * to use as a loop cursor. 460 | * @n: another type * to use as temporary storage 461 | * @head: the head for your list. 462 | * @member: the name of the list_struct within the struct. 463 | */ 464 | #define list_for_each_entry_safe(pos, n, head, member) \ 465 | for (pos = list_entry((head)->next, typeof(*pos), member), \ 466 | n = list_entry(pos->member.next, typeof(*pos), member); \ 467 | &pos->member != (head); \ 468 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 469 | 470 | /** 471 | * list_for_each_entry_safe_continue 472 | * @pos: the type * to use as a loop cursor. 473 | * @n: another type * to use as temporary storage 474 | * @head: the head for your list. 475 | * @member: the name of the list_struct within the struct. 476 | * 477 | * Iterate over list of given type, continuing after current point, 478 | * safe against removal of list entry. 479 | */ 480 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ 481 | for (pos = list_entry(pos->member.next, typeof(*pos), member), \ 482 | n = list_entry(pos->member.next, typeof(*pos), member); \ 483 | &pos->member != (head); \ 484 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 485 | 486 | /** 487 | * list_for_each_entry_safe_from 488 | * @pos: the type * to use as a loop cursor. 489 | * @n: another type * to use as temporary storage 490 | * @head: the head for your list. 491 | * @member: the name of the list_struct within the struct. 492 | * 493 | * Iterate over list of given type from current point, safe against 494 | * removal of list entry. 495 | */ 496 | #define list_for_each_entry_safe_from(pos, n, head, member) \ 497 | for (n = list_entry(pos->member.next, typeof(*pos), member); \ 498 | &pos->member != (head); \ 499 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 500 | 501 | /** 502 | * list_for_each_entry_safe_reverse 503 | * @pos: the type * to use as a loop cursor. 504 | * @n: another type * to use as temporary storage 505 | * @head: the head for your list. 506 | * @member: the name of the list_struct within the struct. 507 | * 508 | * Iterate backwards over list of given type, safe against removal 509 | * of list entry. 510 | */ 511 | #define list_for_each_entry_safe_reverse(pos, n, head, member) \ 512 | for (pos = list_entry((head)->prev, typeof(*pos), member), \ 513 | n = list_entry(pos->member.prev, typeof(*pos), member); \ 514 | &pos->member != (head); \ 515 | pos = n, n = list_entry(n->member.prev, typeof(*n), member)) 516 | 517 | /** 518 | * list_for_each_rcu - iterate over an rcu-protected list 519 | * @pos: the &struct list_head to use as a loop cursor. 520 | * @head: the head for your list. 521 | * 522 | * This list-traversal primitive may safely run concurrently with 523 | * the _rcu list-mutation primitives such as list_add_rcu() 524 | * as long as the traversal is guarded by rcu_read_lock(). 525 | */ 526 | #define list_for_each_rcu(pos, head) \ 527 | for (pos = (head)->next; \ 528 | prefetch(rcu_dereference(pos)->next), pos != (head); \ 529 | pos = pos->next) 530 | 531 | #define __list_for_each_rcu(pos, head) \ 532 | for (pos = (head)->next; \ 533 | rcu_dereference(pos) != (head); \ 534 | pos = pos->next) 535 | 536 | /** 537 | * list_for_each_safe_rcu 538 | * @pos: the &struct list_head to use as a loop cursor. 539 | * @n: another &struct list_head to use as temporary storage 540 | * @head: the head for your list. 541 | * 542 | * Iterate over an rcu-protected list, safe against removal of list entry. 543 | * 544 | * This list-traversal primitive may safely run concurrently with 545 | * the _rcu list-mutation primitives such as list_add_rcu() 546 | * as long as the traversal is guarded by rcu_read_lock(). 547 | */ 548 | #define list_for_each_safe_rcu(pos, n, head) \ 549 | for (pos = (head)->next; \ 550 | n = rcu_dereference(pos)->next, pos != (head); \ 551 | pos = n) 552 | 553 | /** 554 | * list_for_each_entry_rcu - iterate over rcu list of given type 555 | * @pos: the type * to use as a loop cursor. 556 | * @head: the head for your list. 557 | * @member: the name of the list_struct within the struct. 558 | * 559 | * This list-traversal primitive may safely run concurrently with 560 | * the _rcu list-mutation primitives such as list_add_rcu() 561 | * as long as the traversal is guarded by rcu_read_lock(). 562 | */ 563 | #define list_for_each_entry_rcu(pos, head, member) \ 564 | for (pos = list_entry((head)->next, typeof(*pos), member); \ 565 | prefetch(rcu_dereference(pos)->member.next), \ 566 | &pos->member != (head); \ 567 | pos = list_entry(pos->member.next, typeof(*pos), member)) 568 | 569 | 570 | /** 571 | * list_for_each_continue_rcu 572 | * @pos: the &struct list_head to use as a loop cursor. 573 | * @head: the head for your list. 574 | * 575 | * Iterate over an rcu-protected list, continuing after current point. 576 | * 577 | * This list-traversal primitive may safely run concurrently with 578 | * the _rcu list-mutation primitives such as list_add_rcu() 579 | * as long as the traversal is guarded by rcu_read_lock(). 580 | */ 581 | #define list_for_each_continue_rcu(pos, head) \ 582 | for ((pos) = (pos)->next; \ 583 | prefetch(rcu_dereference((pos))->next), (pos) != (head); \ 584 | (pos) = (pos)->next) 585 | 586 | /* 587 | * Double linked lists with a single pointer list head. 588 | * Mostly useful for hash tables where the two pointer list head is 589 | * too wasteful. 590 | * You lose the ability to access the tail in O(1). 591 | */ 592 | 593 | struct hlist_head { 594 | struct hlist_node *first; 595 | }; 596 | 597 | struct hlist_node { 598 | struct hlist_node *next, **pprev; 599 | }; 600 | 601 | #define HLIST_HEAD_INIT { .first = NULL } 602 | #define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } 603 | #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) 604 | static inline void INIT_HLIST_NODE(struct hlist_node *h) 605 | { 606 | h->next = NULL; 607 | h->pprev = NULL; 608 | } 609 | 610 | static inline int hlist_unhashed(const struct hlist_node *h) 611 | { 612 | return !h->pprev; 613 | } 614 | 615 | static inline int hlist_empty(const struct hlist_head *h) 616 | { 617 | return !h->first; 618 | } 619 | 620 | static inline void __hlist_del(struct hlist_node *n) 621 | { 622 | struct hlist_node *next = n->next; 623 | struct hlist_node **pprev = n->pprev; 624 | *pprev = next; 625 | if (next) 626 | next->pprev = pprev; 627 | } 628 | 629 | static inline void hlist_del(struct hlist_node *n) 630 | { 631 | __hlist_del(n); 632 | n->next = LIST_POISON1; 633 | n->pprev = LIST_POISON2; 634 | } 635 | 636 | /** 637 | * hlist_del_rcu - deletes entry from hash list without re-initialization 638 | * @n: the element to delete from the hash list. 639 | * 640 | * Note: list_unhashed() on entry does not return true after this, 641 | * the entry is in an undefined state. It is useful for RCU based 642 | * lockfree traversal. 643 | * 644 | * In particular, it means that we can not poison the forward 645 | * pointers that may still be used for walking the hash list. 646 | * 647 | * The caller must take whatever precautions are necessary 648 | * (such as holding appropriate locks) to avoid racing 649 | * with another list-mutation primitive, such as hlist_add_head_rcu() 650 | * or hlist_del_rcu(), running on this same list. 651 | * However, it is perfectly legal to run concurrently with 652 | * the _rcu list-traversal primitives, such as 653 | * hlist_for_each_entry(). 654 | */ 655 | static inline void hlist_del_rcu(struct hlist_node *n) 656 | { 657 | __hlist_del(n); 658 | n->pprev = LIST_POISON2; 659 | } 660 | 661 | static inline void hlist_del_init(struct hlist_node *n) 662 | { 663 | if (!hlist_unhashed(n)) { 664 | __hlist_del(n); 665 | INIT_HLIST_NODE(n); 666 | } 667 | } 668 | 669 | /* 670 | * hlist_replace_rcu - replace old entry by new one 671 | * @old : the element to be replaced 672 | * @new : the new element to insert 673 | * 674 | * The old entry will be replaced with the new entry atomically. 675 | */ 676 | static inline void hlist_replace_rcu(struct hlist_node *old, 677 | struct hlist_node *new) 678 | { 679 | struct hlist_node *next = old->next; 680 | 681 | new->next = next; 682 | new->pprev = old->pprev; 683 | 684 | if (next) 685 | new->next->pprev = &new->next; 686 | *new->pprev = new; 687 | old->pprev = LIST_POISON2; 688 | } 689 | 690 | static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 691 | { 692 | struct hlist_node *first = h->first; 693 | n->next = first; 694 | if (first) 695 | first->pprev = &n->next; 696 | h->first = n; 697 | n->pprev = &h->first; 698 | } 699 | 700 | 701 | /** 702 | * hlist_add_head_rcu 703 | * @n: the element to add to the hash list. 704 | * @h: the list to add to. 705 | * 706 | * Description: 707 | * Adds the specified element to the specified hlist, 708 | * while permitting racing traversals. 709 | * 710 | * The caller must take whatever precautions are necessary 711 | * (such as holding appropriate locks) to avoid racing 712 | * with another list-mutation primitive, such as hlist_add_head_rcu() 713 | * or hlist_del_rcu(), running on this same list. 714 | * However, it is perfectly legal to run concurrently with 715 | * the _rcu list-traversal primitives, such as 716 | * hlist_for_each_entry_rcu(), used to prevent memory-consistency 717 | * problems on Alpha CPUs. Regardless of the type of CPU, the 718 | * list-traversal primitive must be guarded by rcu_read_lock(). 719 | */ 720 | static inline void hlist_add_head_rcu(struct hlist_node *n, 721 | struct hlist_head *h) 722 | { 723 | struct hlist_node *first = h->first; 724 | n->next = first; 725 | n->pprev = &h->first; 726 | 727 | if (first) 728 | first->pprev = &n->next; 729 | h->first = n; 730 | } 731 | 732 | /* next must be != NULL */ 733 | static inline void hlist_add_before(struct hlist_node *n, 734 | struct hlist_node *next) 735 | { 736 | n->pprev = next->pprev; 737 | n->next = next; 738 | next->pprev = &n->next; 739 | *(n->pprev) = n; 740 | } 741 | 742 | static inline void hlist_add_after(struct hlist_node *n, 743 | struct hlist_node *next) 744 | { 745 | next->next = n->next; 746 | n->next = next; 747 | next->pprev = &n->next; 748 | 749 | if(next->next) 750 | next->next->pprev = &next->next; 751 | } 752 | 753 | /** 754 | * hlist_add_before_rcu 755 | * @n: the new element to add to the hash list. 756 | * @next: the existing element to add the new element before. 757 | * 758 | * Description: 759 | * Adds the specified element to the specified hlist 760 | * before the specified node while permitting racing traversals. 761 | * 762 | * The caller must take whatever precautions are necessary 763 | * (such as holding appropriate locks) to avoid racing 764 | * with another list-mutation primitive, such as hlist_add_head_rcu() 765 | * or hlist_del_rcu(), running on this same list. 766 | * However, it is perfectly legal to run concurrently with 767 | * the _rcu list-traversal primitives, such as 768 | * hlist_for_each_entry_rcu(), used to prevent memory-consistency 769 | * problems on Alpha CPUs. 770 | */ 771 | static inline void hlist_add_before_rcu(struct hlist_node *n, 772 | struct hlist_node *next) 773 | { 774 | n->pprev = next->pprev; 775 | n->next = next; 776 | 777 | next->pprev = &n->next; 778 | *(n->pprev) = n; 779 | } 780 | 781 | /** 782 | * hlist_add_after_rcu 783 | * @prev: the existing element to add the new element after. 784 | * @n: the new element to add to the hash list. 785 | * 786 | * Description: 787 | * Adds the specified element to the specified hlist 788 | * after the specified node while permitting racing traversals. 789 | * 790 | * The caller must take whatever precautions are necessary 791 | * (such as holding appropriate locks) to avoid racing 792 | * with another list-mutation primitive, such as hlist_add_head_rcu() 793 | * or hlist_del_rcu(), running on this same list. 794 | * However, it is perfectly legal to run concurrently with 795 | * the _rcu list-traversal primitives, such as 796 | * hlist_for_each_entry_rcu(), used to prevent memory-consistency 797 | * problems on Alpha CPUs. 798 | */ 799 | static inline void hlist_add_after_rcu(struct hlist_node *prev, 800 | struct hlist_node *n) 801 | { 802 | n->next = prev->next; 803 | n->pprev = &prev->next; 804 | 805 | prev->next = n; 806 | if (n->next) 807 | n->next->pprev = &n->next; 808 | } 809 | 810 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) 811 | 812 | #define hlist_for_each(pos, head) \ 813 | for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ 814 | pos = pos->next) 815 | 816 | #define hlist_for_each_safe(pos, n, head) \ 817 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ 818 | pos = n) 819 | 820 | /** 821 | * hlist_for_each_entry - iterate over list of given type 822 | * @tpos: the type * to use as a loop cursor. 823 | * @pos: the &struct hlist_node to use as a loop cursor. 824 | * @head: the head for your list. 825 | * @member: the name of the hlist_node within the struct. 826 | */ 827 | #define hlist_for_each_entry(tpos, pos, head, member) \ 828 | for (pos = (head)->first; \ 829 | pos && ({ prefetch(pos->next); 1;}) && \ 830 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 831 | pos = pos->next) 832 | 833 | /** 834 | * hlist_for_each_entry_continue - iterate over a hlist continuing after current point 835 | * @tpos: the type * to use as a loop cursor. 836 | * @pos: the &struct hlist_node to use as a loop cursor. 837 | * @member: the name of the hlist_node within the struct. 838 | */ 839 | #define hlist_for_each_entry_continue(tpos, pos, member) \ 840 | for (pos = (pos)->next; \ 841 | pos && ({ prefetch(pos->next); 1;}) && \ 842 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 843 | pos = pos->next) 844 | 845 | /** 846 | * hlist_for_each_entry_from - iterate over a hlist continuing from current point 847 | * @tpos: the type * to use as a loop cursor. 848 | * @pos: the &struct hlist_node to use as a loop cursor. 849 | * @member: the name of the hlist_node within the struct. 850 | */ 851 | #define hlist_for_each_entry_from(tpos, pos, member) \ 852 | for (; pos && ({ prefetch(pos->next); 1;}) && \ 853 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 854 | pos = pos->next) 855 | 856 | /** 857 | * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry 858 | * @tpos: the type * to use as a loop cursor. 859 | * @pos: the &struct hlist_node to use as a loop cursor. 860 | * @n: another &struct hlist_node to use as temporary storage 861 | * @head: the head for your list. 862 | * @member: the name of the hlist_node within the struct. 863 | */ 864 | #define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ 865 | for (pos = (head)->first; \ 866 | pos && ({ n = pos->next; 1; }) && \ 867 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 868 | pos = n) 869 | 870 | /** 871 | * hlist_for_each_entry_rcu - iterate over rcu list of given type 872 | * @tpos: the type * to use as a loop cursor. 873 | * @pos: the &struct hlist_node to use as a loop cursor. 874 | * @head: the head for your list. 875 | * @member: the name of the hlist_node within the struct. 876 | * 877 | * This list-traversal primitive may safely run concurrently with 878 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() 879 | * as long as the traversal is guarded by rcu_read_lock(). 880 | */ 881 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ 882 | for (pos = (head)->first; \ 883 | rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ 884 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 885 | pos = pos->next) 886 | 887 | #endif 888 | -------------------------------------------------------------------------------- /lltop-ev.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "string1.h" 22 | #include "lltop.h" 23 | #include "dict.h" 24 | #include "list.h" 25 | 26 | const char *job_mapper_cmd = "cat /tmp/lltop/mapper-fifo"; 27 | const char *nid_file_path = "/tmp/lltop/client-nids"; 28 | 29 | #define BIND_HOST "0.0.0.0" /* INADDR_ANY */ 30 | #define BIND_PORT "9909" 31 | #define NR_STATS 3 32 | #define NR_CLIENTS_HINT 4096 33 | #define NR_JOBS_HINT 256 34 | #define NR_SERVS_HINT 128 35 | #define RX_BUF_SIZE 8096 36 | #define JOB_NONE "0" 37 | #define FE_AGE_LIMIT 32 38 | #define REFRESH_INTERVAL 10.0 39 | #define SERV_INTERVAL 10.0 40 | 41 | static size_t nr_jobs; 42 | static struct dict name_job_dict; 43 | static struct dict name_client_dict; 44 | static struct dict name_serv_dict; 45 | static struct dict nid_client_dict; 46 | 47 | struct pair { 48 | void *p_value; 49 | char p_key[]; 50 | }; 51 | 52 | struct rx_buf { 53 | char *r_buf; 54 | size_t r_seen, r_count, r_buf_size; 55 | unsigned int r_overflow:1; 56 | }; 57 | 58 | struct job_mapper { 59 | struct ev_child jm_child_w; 60 | struct ev_io jm_io_w; 61 | struct ev_timer jm_timer_w; /* TODO */ 62 | struct rx_buf jm_rx_buf; 63 | const char *jm_cmd; /* Or cmdline. */ 64 | pid_t jm_pid; 65 | }; 66 | 67 | struct job_struct { 68 | long j_stats[NR_STATS]; 69 | struct list_head j_client_list; 70 | struct list_head j_frame_list; 71 | char *j_owner, *j_dir; 72 | hash_t j_hash; 73 | char j_name[]; 74 | }; 75 | 76 | struct client_struct { 77 | struct job_struct *c_job; 78 | struct list_head c_job_link; 79 | char c_name[]; 80 | }; 81 | 82 | struct frame_entry { 83 | struct job_struct *fe_job; 84 | struct list_head fe_job_link; 85 | long fe_stats[2][NR_STATS]; 86 | unsigned int fe_gen; 87 | char fe_name[]; 88 | }; 89 | 90 | /* TODO Add s_fs. */ 91 | struct serv_struct { 92 | struct ev_io s_io_w; 93 | struct ev_timer s_timer_w; 94 | struct rx_buf s_rx_buf; 95 | struct dict s_frame; 96 | struct sockaddr_storage s_addr; 97 | socklen_t s_addrlen; 98 | /* TODO long s_stats[NR_STATS]; */ 99 | unsigned int s_gen; 100 | unsigned int s_connected:1; 101 | char s_name[]; 102 | }; 103 | 104 | #define OOM() FATAL("cannot allocate memory\n"); 105 | 106 | #define may_ignore_errno() (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) 107 | 108 | #define GET_NAMED(ptr,member,name) \ 109 | (ptr) = (typeof(ptr)) (((char *) name) - offsetof(typeof(*ptr), member)) 110 | 111 | /* TODO Try using offsetof() in malloc() below. */ 112 | 113 | #define ALLOC_NAMED(ptr,member,name) do { \ 114 | ptr = malloc(sizeof(*ptr) + strlen(name) + 1); \ 115 | if (ptr == NULL) \ 116 | OOM(); \ 117 | memset(ptr, 0, sizeof(*ptr)); \ 118 | strcpy(ptr->member, name); \ 119 | } while (0) 120 | 121 | static inline void fd_set_nonblock(int fd) 122 | { 123 | int flags = fcntl(fd, F_GETFL); 124 | fcntl(fd, F_SETFL, flags|O_NONBLOCK); 125 | } 126 | 127 | int rx_buf_init(struct rx_buf *rb, size_t size) 128 | { 129 | memset(rb, 0, sizeof(*rb)); 130 | 131 | rb->r_buf = malloc(size); 132 | if (rb->r_buf == NULL) 133 | return -1; 134 | 135 | rb->r_buf_size = size; 136 | return 0; 137 | } 138 | 139 | ssize_t rx_buf_read(int fd, struct rx_buf *rb) 140 | { 141 | char *read_pos; 142 | ssize_t nr_avail, nr_read; 143 | 144 | if (rb->r_seen > 0) { 145 | rb->r_count -= rb->r_seen; 146 | memmove(rb->r_buf, rb->r_buf + rb->r_seen, rb->r_count); 147 | rb->r_seen = 0; 148 | } 149 | 150 | again: 151 | read_pos = rb->r_buf + rb->r_count; 152 | nr_avail = rb->r_buf_size - rb->r_count; 153 | 154 | if (nr_avail <= 0) { 155 | rb->r_count = 0; 156 | rb->r_overflow = 1; 157 | goto again; 158 | } 159 | 160 | nr_read = read(fd, read_pos, nr_avail); 161 | if (nr_read > 0) 162 | rb->r_count += nr_read; 163 | 164 | return nr_read; 165 | } 166 | 167 | char *rx_buf_iter(struct rx_buf *rb) 168 | { 169 | char *pos, *sep, *end; 170 | 171 | again: 172 | pos = rb->r_buf + rb->r_seen; 173 | end = rb->r_buf + rb->r_count; 174 | sep = memchr(pos, '\n', end - pos); /* XXX '\n' */ 175 | 176 | if (sep == NULL) 177 | return NULL; 178 | 179 | rb->r_seen += sep - pos + 1; 180 | 181 | if (rb->r_overflow) { 182 | rb->r_overflow = 0; 183 | goto again; 184 | } 185 | 186 | *sep = 0; 187 | 188 | return pos; 189 | } 190 | 191 | struct job_struct *job_lookup(const char *name, int create) 192 | { 193 | struct job_struct *job; 194 | hash_t hash = dict_strhash(name); 195 | struct dict_entry *de = dict_entry_ref(&name_job_dict, hash, name); 196 | if (de->d_key != NULL) { 197 | GET_NAMED(job, j_name, de->d_key); 198 | return job; 199 | } 200 | 201 | if (!create) 202 | return NULL; 203 | 204 | TRACE("creating job `%s'\n", name); 205 | ALLOC_NAMED(job, j_name, name); 206 | INIT_LIST_HEAD(&job->j_client_list); 207 | INIT_LIST_HEAD(&job->j_frame_list); 208 | job->j_hash = hash; 209 | /* TODO owner, workdir. */ 210 | 211 | if (dict_entry_set(&name_job_dict, de, hash, job->j_name) < 0) 212 | OOM(); 213 | 214 | nr_jobs++; 215 | 216 | return job; 217 | } 218 | 219 | void job_put(struct job_struct *job) 220 | { 221 | struct dict_entry *de; 222 | 223 | if (!list_empty(&job->j_client_list)) 224 | return; 225 | 226 | if (!list_empty(&job->j_frame_list)) 227 | return; 228 | 229 | TRACE("freeing job `%s'\n", job->j_name); 230 | de = dict_entry_ref(&name_job_dict, job->j_hash, job->j_name); 231 | dict_entry_remv(&name_job_dict, de, 1); /* XXX Resize. */ 232 | free(job); 233 | 234 | nr_jobs--; 235 | } 236 | 237 | 238 | struct client_struct *client_lookup_by_name(const char *name, int create) 239 | { 240 | struct client_struct *cli; 241 | hash_t hash = dict_strhash(name); 242 | struct dict_entry *de = dict_entry_ref(&name_client_dict, hash, name); 243 | if (de->d_key != NULL) { 244 | GET_NAMED(cli, c_name, de->d_key); 245 | return cli; 246 | } 247 | 248 | if (!create) 249 | return NULL; 250 | 251 | ALLOC_NAMED(cli, c_name, name); /* XXX */ 252 | INIT_LIST_HEAD(&cli->c_job_link); 253 | 254 | if (dict_entry_set(&name_client_dict, de, hash, cli->c_name) < 0) 255 | OOM(); 256 | 257 | return cli; 258 | } 259 | 260 | void client_set_job_by_name(struct client_struct *cli, const char *job_name) 261 | { 262 | struct job_struct *cur_job, *new_job; 263 | 264 | if (strcmp(job_name, JOB_NONE) == 0) 265 | job_name = cli->c_name; 266 | 267 | cur_job = cli->c_job; 268 | if (cur_job != NULL && strcmp(job_name, cur_job->j_name) == 0) 269 | return; 270 | 271 | TRACE("adding client `%s' to job `%s'\n", cli->c_name, job_name); 272 | new_job = job_lookup(job_name, 1); 273 | if (new_job == NULL) 274 | OOM(); 275 | 276 | list_move(&cli->c_job_link, &new_job->j_client_list); 277 | cli->c_job = new_job; 278 | 279 | if (cur_job != NULL) 280 | job_put(cur_job); 281 | } 282 | 283 | static inline struct job_struct * 284 | client_get_job(struct client_struct *cli, int create) 285 | { 286 | if (cli->c_job == NULL && create) /* Use client name as job name. */ 287 | client_set_job_by_name(cli, cli->c_name); 288 | 289 | return cli->c_job; 290 | } 291 | 292 | void client_add_nid(struct client_struct *cli, const char *nid) 293 | { 294 | struct pair *p; 295 | hash_t hash = dict_strhash(nid); 296 | struct dict_entry *de = dict_entry_ref(&nid_client_dict, hash, nid); 297 | if (de->d_key != NULL) { 298 | GET_NAMED(p, p_key, de->d_key); 299 | if (cli != p->p_value) 300 | ERROR("NID `%s' assigned to clients `%s' and `%s'\n", 301 | nid, ((struct client_struct *) (p->p_value))->c_name, cli->c_name); 302 | p->p_value = cli; /* Most recent wins. */ 303 | return; 304 | } 305 | 306 | TRACE("adding NID `%s' to client `%s'\n", nid, cli->c_name); 307 | ALLOC_NAMED(p, p_key, nid); 308 | p->p_value = cli; 309 | 310 | if (dict_entry_set(&nid_client_dict, de, hash, p->p_key) < 0) 311 | OOM(); 312 | } 313 | 314 | struct client_struct *client_lookup_by_nid(const char *nid, int create) 315 | { 316 | struct pair *p; 317 | hash_t hash = dict_strhash(nid); 318 | struct dict_entry *de = dict_entry_ref(&nid_client_dict, hash, nid); 319 | if (de->d_key != NULL) { 320 | GET_NAMED(p, p_key, de->d_key); 321 | goto have_p; 322 | } 323 | 324 | if (!create) 325 | return NULL; 326 | 327 | /* Fake a client, using NID for name. */ 328 | ALLOC_NAMED(p, p_key, nid); 329 | p->p_value = client_lookup_by_name(nid, 1); 330 | if (p->p_value == NULL) 331 | OOM(); 332 | 333 | if (dict_entry_set(&nid_client_dict, de, hash, p->p_key) < 0) 334 | OOM(); 335 | 336 | have_p: 337 | return p->p_value; 338 | } 339 | 340 | static void job_mapper_child_cb(EV_P_ ev_child *w, int revents) 341 | { 342 | /* TODO Check revents. */ 343 | 344 | struct job_mapper *jm = container_of(w, struct job_mapper, jm_child_w); 345 | 346 | ev_child_stop(EV_A_ w); 347 | ERROR("job mapper `%s', pid %d exited with status %x\n", 348 | jm->jm_cmd, w->rpid, w->rstatus); /* TODO WIFEXITED, ... */ 349 | 350 | jm->jm_pid = 0; 351 | } 352 | 353 | static void job_mapper_io_cb(EV_P_ ev_io *w, int revents) 354 | { 355 | /* TODO EV_ERROR. */ 356 | 357 | struct job_mapper *jm = container_of(w, struct job_mapper, jm_io_w); 358 | struct rx_buf *rb = &jm->jm_rx_buf; 359 | 360 | ssize_t nr_read = rx_buf_read(w->fd, rb); 361 | if (nr_read < 0) { 362 | if (may_ignore_errno()) 363 | return; 364 | /* TODO Add pid. Restart. */ 365 | FATAL("cannot read from job mapper `%s': %m\n", jm->jm_cmd); 366 | } 367 | 368 | char *msg, *cli_name, *job_name; 369 | while ((msg = rx_buf_iter(rb)) != NULL) { 370 | cli_name = wsep(&msg); 371 | job_name = wsep(&msg); 372 | 373 | if (cli_name == NULL || job_name == NULL) 374 | continue; 375 | 376 | struct client_struct *cli = client_lookup_by_name(cli_name, 1); 377 | if (cli == NULL) 378 | OOM(); 379 | 380 | client_set_job_by_name(cli, job_name); 381 | } 382 | } 383 | 384 | int job_mapper_init(EV_P_ struct job_mapper *jm, const char *cmd) 385 | { 386 | int pfd[2]; 387 | if (pipe(pfd) < 0) 388 | FATAL("cannot create pipe: %m\n"); 389 | 390 | pid_t pid = fork(); 391 | if (pid < 0) 392 | FATAL("cannot start job mapper: %m\n"); 393 | 394 | if (pid == 0) { 395 | close(pfd[0]); 396 | dup2(pfd[1], 1); 397 | signal(SIGPIPE, SIG_DFL); 398 | setpgid(0, 0); 399 | execl("/bin/sh", "sh", "-c", cmd, (char *) NULL); 400 | ERROR("cannot execute command `%s': %m\n", cmd); 401 | exit(255); 402 | } 403 | 404 | close(pfd[1]); 405 | 406 | fd_set_nonblock(pfd[0]); 407 | 408 | memset(jm, 0, sizeof(*jm)); 409 | 410 | ev_io_init(&jm->jm_io_w, &job_mapper_io_cb, pfd[0], EV_READ); 411 | ev_io_start(EV_A_ &jm->jm_io_w); 412 | 413 | ev_child_init(&jm->jm_child_w, &job_mapper_child_cb, pid, 0); 414 | ev_child_start(EV_A_ &jm->jm_child_w); 415 | 416 | if (rx_buf_init(&jm->jm_rx_buf, RX_BUF_SIZE) < 0) 417 | OOM(); 418 | 419 | jm->jm_cmd = cmd; 420 | jm->jm_pid = pid; 421 | 422 | return 0; 423 | } 424 | 425 | static void serv_io_cb(EV_P_ ev_io *w, int revents); 426 | static void serv_timer_cb(EV_P_ ev_timer *w, int revents); 427 | 428 | struct serv_struct * 429 | serv_create(const char *name, ev_tstamp offset, ev_tstamp interval) 430 | { 431 | struct serv_struct *serv; 432 | hash_t hash = dict_strhash(name); 433 | struct dict_entry *de = dict_entry_ref(&name_serv_dict, hash, name); 434 | if (de->d_key != NULL) { 435 | GET_NAMED(serv, s_name, de->d_key); 436 | ERROR("server struct `%s' already exists\n", name); 437 | return serv; 438 | } 439 | 440 | TRACE("creating serv `%s', offset %f, interval %f\n", name, offset, interval); 441 | ALLOC_NAMED(serv, s_name, name); 442 | ev_init(&serv->s_io_w, &serv_io_cb); /* Don't start IO. */ 443 | ev_timer_init(&serv->s_timer_w, &serv_timer_cb, offset, interval); 444 | 445 | if (rx_buf_init(&serv->s_rx_buf, RX_BUF_SIZE) < 0) 446 | OOM(); 447 | if (dict_init(&serv->s_frame, NR_JOBS_HINT) < 0) 448 | OOM(); 449 | 450 | return serv; 451 | } 452 | 453 | static void serv_disconnect(EV_P_ struct serv_struct *serv) 454 | { 455 | if (!serv->s_connected) 456 | return; 457 | 458 | TRACE("disconnection server `%s'\n", serv->s_name); 459 | ev_io_stop(EV_A_ &serv->s_io_w); 460 | close(serv->s_io_w.fd); 461 | serv->s_io_w.fd = -1; 462 | /* Clear frames? */ 463 | /* ... */ 464 | 465 | serv->s_connected = 0; 466 | } 467 | 468 | static struct serv_struct * 469 | serv_lookup(const char *name, int create) 470 | { 471 | struct serv_struct *serv = NULL; 472 | hash_t hash = dict_strhash(name); 473 | struct dict_entry *de = dict_entry_ref(&name_serv_dict, hash, name); 474 | if (de->d_key != NULL) { 475 | GET_NAMED(serv, s_name, de->d_key); 476 | goto have_serv; 477 | } 478 | 479 | /* TODO port. */ 480 | if (create) 481 | serv = serv_create(name, 0., SERV_INTERVAL); 482 | 483 | have_serv: 484 | return serv; 485 | } 486 | 487 | static void serv_connect(EV_P_ struct serv_struct *serv, 488 | int sfd, struct sockaddr *addr, socklen_t addrlen) 489 | { 490 | serv_disconnect(EV_A_ serv); 491 | TRACE("connecting server `%s'\n", serv->s_name); 492 | memcpy(&serv->s_addr, addr, addrlen); 493 | serv->s_addrlen = addrlen; 494 | ev_io_set(&serv->s_io_w, sfd, EV_READ); 495 | ev_io_start(EV_A_ &serv->s_io_w); 496 | ev_timer_start(EV_A_ &serv->s_timer_w); 497 | serv->s_connected = 1; 498 | } 499 | 500 | static void serv_error(EV_P_ struct serv_struct *serv) 501 | { 502 | ERROR("event error from server `%s': %m\n", serv->s_name); 503 | serv_disconnect(EV_A_ serv); 504 | } 505 | 506 | static void serv_msg(struct serv_struct *serv, char *msg) 507 | { 508 | char *cli_nid = wsep(&msg); 509 | if (cli_nid == NULL || msg == NULL) 510 | return; 511 | 512 | /* TODO Use NR_STATS, strtol(). */ 513 | long stats[NR_STATS]; 514 | if (sscanf(msg, "%ld %ld %ld", &stats[0], &stats[1], &stats[2]) != 3) 515 | return; 516 | 517 | struct client_struct *cli = client_lookup_by_nid(cli_nid, 1); 518 | if (cli == NULL) 519 | OOM(); 520 | 521 | struct job_struct *job = client_get_job(cli, 1); 522 | if (job == NULL) 523 | OOM(); 524 | 525 | struct dict_entry *de; 526 | struct frame_entry *fe; 527 | de = dict_entry_ref(&serv->s_frame, job->j_hash, job->j_name); 528 | if (de->d_key != NULL) { 529 | GET_NAMED(fe, fe_name, de->d_key); 530 | goto have_fe; 531 | } 532 | 533 | ALLOC_NAMED(fe, fe_name, job->j_name); 534 | fe->fe_job = job; 535 | list_add(&fe->fe_job_link, &job->j_frame_list); 536 | fe->fe_gen = serv->s_gen; 537 | 538 | if (dict_entry_set(&serv->s_frame, de, job->j_hash, fe->fe_name) < 0) 539 | FATAL("dict_entry_set: %m\n"); 540 | 541 | have_fe: 542 | TRACE("serv `%s', s_gen %u, job `%s', fe_gen %u\n", 543 | serv->s_name, serv->s_gen, fe->fe_job->j_name, fe->fe_gen); 544 | if (fe->fe_gen == serv->s_gen) 545 | /* OK */; 546 | else if (fe->fe_gen == serv->s_gen - 1) 547 | memset(fe->fe_stats[serv->s_gen % 2], 0, NR_STATS * sizeof(long)); 548 | else 549 | memset(fe->fe_stats, 0, 2 * NR_STATS * sizeof(long)); 550 | 551 | fe->fe_gen = serv->s_gen; 552 | 553 | int i; 554 | for (i = 0; i < NR_STATS; i++) 555 | fe->fe_stats[fe->fe_gen % 2][i] += stats[i]; 556 | 557 | TRACE("fe_stats %ld %ld %ld\n", 558 | fe->fe_stats[fe->fe_gen % 2][0], 559 | fe->fe_stats[fe->fe_gen % 2][1], 560 | fe->fe_stats[fe->fe_gen % 2][2]); 561 | } 562 | 563 | static void serv_io_cb(EV_P_ ev_io *w, int revents) 564 | { 565 | struct serv_struct *serv = container_of(w, struct serv_struct, s_io_w); 566 | struct rx_buf *rb = &serv->s_rx_buf; 567 | 568 | if (revents & EV_ERROR) { 569 | /* ... */ 570 | serv_error(EV_A_ serv); 571 | return; 572 | } 573 | 574 | ssize_t nr_read = rx_buf_read(w->fd, rb); 575 | if (nr_read < 0) { 576 | if (may_ignore_errno()) 577 | return; 578 | ERROR("cannot read from server `%s': %m\n", serv->s_name); 579 | serv_error(EV_A_ serv); 580 | return; 581 | } 582 | 583 | char *msg; 584 | while ((msg = rx_buf_iter(rb)) != NULL) 585 | serv_msg(serv, msg); 586 | } 587 | 588 | static void serv_timer_cb(EV_P_ ev_timer *w, int revents) 589 | { 590 | struct serv_struct *serv = container_of(w, struct serv_struct, s_timer_w); 591 | if (!serv->s_connected) 592 | /* TODO */; 593 | 594 | size_t de_iter = 0; 595 | struct dict_entry *de; 596 | while ((de = dict_for_each_ref(&serv->s_frame, &de_iter)) != NULL) { 597 | struct frame_entry *fe; 598 | GET_NAMED(fe, fe_name, de->d_key); 599 | 600 | int fe_age = serv->s_gen - fe->fe_gen; 601 | if (fe_age > FE_AGE_LIMIT) { 602 | list_del(&fe->fe_job_link); 603 | job_put(fe->fe_job); 604 | dict_entry_remv(&serv->s_frame, de, 0); 605 | free(fe); 606 | continue; 607 | } 608 | 609 | long *s_prev = fe->fe_stats[(serv->s_gen - 1) % 2]; 610 | long *s_next = fe->fe_stats[(serv->s_gen - 0) % 2]; 611 | 612 | int i; 613 | for (i = 0; i < NR_STATS; i++) { 614 | if (fe_age == 0) 615 | fe->fe_job->j_stats[i] += s_next[i] - s_prev[i]; 616 | else if (fe_age == 1) 617 | fe->fe_job->j_stats[i] -= s_prev[i]; 618 | } 619 | } 620 | dict_allow_resize(&serv->s_frame, NR_JOBS_HINT); 621 | 622 | serv->s_gen++; 623 | } 624 | 625 | static int 626 | job_stats_cmp(const void *v1, const void *v2) 627 | { 628 | const struct job_struct **j1 = (void *) v1, **j2 = (void *) v2; 629 | const long *s1 = (*j1)->j_stats, *s2 = (*j2)->j_stats; 630 | 631 | /* Sort descending by writes, then reads, then requests. */ 632 | /* TODO Make sort rank configurable. */ 633 | int i; 634 | for (i = 0; i < NR_STATS; i++) { /* XXX ORDER */ 635 | long diff = s1[i] - s2[i]; 636 | if (diff != 0) 637 | return diff > 0 ? -1 : 1; 638 | } 639 | 640 | return 0; 641 | } 642 | 643 | static void refresh_display(void) 644 | { 645 | TRACE("refresh LINES %d, COLS %d\n", LINES, COLS); 646 | 647 | struct job_struct **job_list = NULL; 648 | job_list = calloc(nr_jobs, sizeof(job_list[0])); 649 | if (job_list == NULL) 650 | OOM(); 651 | 652 | size_t i = 0, j = 0; 653 | char *name; 654 | while ((name = dict_for_each(&name_job_dict, &i)) != NULL && j < nr_jobs) 655 | GET_NAMED(job_list[j++], j_name, name); 656 | 657 | if (j != nr_jobs) 658 | FATAL("internal error: j %zu, nr_jobs %zu\n", j, nr_jobs); 659 | 660 | qsort(job_list, nr_jobs, sizeof(job_list[0]), &job_stats_cmp); 661 | 662 | for (j = 0; j < nr_jobs && j < LINES; j++) { 663 | struct job_struct *job = job_list[j]; 664 | 665 | char buf[4096]; 666 | snprintf(buf, sizeof(buf), "%s %ld %ld %ld\n", job->j_name, 667 | job->j_stats[0], job->j_stats[1], job->j_stats[2]); 668 | mvaddnstr(j, 0, buf, -1); 669 | } 670 | 671 | free(job_list); 672 | 673 | refresh(); 674 | } 675 | 676 | int read_nid_file(const char *path) 677 | { 678 | int rc = -1; 679 | FILE *file = NULL; 680 | char *line = NULL; 681 | size_t line_size = 0; 682 | int line_nr = 0; 683 | 684 | file = fopen(path, "r"); 685 | if (file == NULL) { 686 | ERROR("cannot open `%s': %m\n", path); 687 | goto out; 688 | } 689 | 690 | while (getline(&line, &line_size, file) >= 0) { 691 | char *iter, *name, *nid; 692 | struct client_struct *cli; 693 | 694 | line_nr++; 695 | iter = line; 696 | name = wsep(&iter); 697 | if (name == NULL) 698 | continue; 699 | 700 | cli = client_lookup_by_name(name, 1); 701 | if (cli == NULL) 702 | continue; 703 | 704 | while ((nid = wsep(&iter)) != NULL) 705 | client_add_nid(cli, nid); 706 | } 707 | rc = 0; 708 | 709 | out: 710 | free(line); 711 | if (file != NULL) 712 | fclose(file); 713 | 714 | return rc; 715 | } 716 | 717 | static void listen_cb(EV_P_ ev_io *w, int revents) 718 | { 719 | /* TODO Handle EV_ERROR in revents. */ 720 | int sfd; 721 | struct sockaddr_storage addr; 722 | socklen_t addrlen = sizeof(addr); 723 | struct serv_struct *serv; 724 | 725 | sfd = accept(w->fd, (struct sockaddr *) &addr, &addrlen); 726 | if (sfd < 0) { 727 | if (may_ignore_errno()) 728 | return; 729 | FATAL("cannot accept connections: %m\n"); /* ... */ 730 | } 731 | fd_set_nonblock(sfd); 732 | 733 | char name[NI_MAXHOST], port[NI_MAXSERV]; 734 | int gni_rc = getnameinfo((struct sockaddr *) &addr, addrlen, 735 | name, sizeof(name), port, sizeof(port), 0); 736 | if (gni_rc != 0) { 737 | ERROR("cannot get name info for server connection: %s\n", gai_strerror(gni_rc)); 738 | goto err; 739 | } 740 | 741 | TRACE("received connection from host `%s', port `%s'\n", name, port); 742 | serv = serv_lookup(name, 1); /* XXX create. Port? */ 743 | if (serv == NULL) 744 | goto err; 745 | 746 | serv_connect(EV_A_ serv, sfd, (struct sockaddr *) &addr, addrlen); 747 | return; 748 | 749 | err: 750 | close(sfd); 751 | } 752 | 753 | static void stdin_cb(EV_P_ ev_io *w, int revents) 754 | { 755 | int c = getch(); 756 | if (c == ERR) 757 | return; 758 | 759 | TRACE("got `%c' from stdin\n", c); 760 | switch (c) { 761 | case ' ': 762 | case '\n': 763 | refresh_display(); 764 | break; 765 | case 'q': 766 | ev_break(EV_A_ EVBREAK_ALL); 767 | break; 768 | default: 769 | ERROR("unknown command `%c': try `h' for help\n", c); /* TODO help. */ 770 | break; 771 | } 772 | } 773 | 774 | static void refresh_cb(EV_P_ ev_timer *w, int revents) 775 | { 776 | refresh_display(); 777 | } 778 | 779 | static void sigint_cb(EV_P_ ev_signal *w, int revents) 780 | { 781 | TRACE("handling signal %d `%s'\n", w->signum, strsignal(w->signum)); 782 | ev_break(EV_A_ EVBREAK_ALL); 783 | } 784 | 785 | static void sigwinch_cb(EV_P_ ev_signal *w, int revents) 786 | { 787 | TRACE("handling signal %d `%s'\n", w->signum, strsignal(w->signum)); 788 | struct winsize ws; 789 | 790 | int fd = open("/dev/tty", O_RDWR); 791 | if (fd < 0) { 792 | ERROR("cannot open `/dev/tty': %m\n"); 793 | goto out; 794 | } 795 | 796 | if (ioctl(fd, TIOCGWINSZ, &ws) < 0) { 797 | ERROR("cannot get window size: %m\n"); 798 | goto out; 799 | } 800 | 801 | LINES = ws.ws_row; 802 | COLS = ws.ws_col; 803 | 804 | refresh_display(); 805 | out: 806 | if (fd >= 0) 807 | close(fd); 808 | } 809 | 810 | int main(int argc, char *argv[]) 811 | { 812 | const char *bind_host = BIND_HOST, *bind_port = BIND_PORT; 813 | int listen_backlog = 128; /* XXX */ 814 | struct job_mapper mapper; 815 | 816 | signal(SIGPIPE, SIG_IGN); 817 | 818 | if (dict_init(&name_client_dict, NR_CLIENTS_HINT) < 0) 819 | OOM(); 820 | if (dict_init(&name_job_dict, NR_JOBS_HINT) < 0) 821 | OOM(); 822 | if (dict_init(&name_serv_dict, NR_SERVS_HINT) < 0) 823 | OOM(); 824 | if (dict_init(&nid_client_dict, NR_CLIENTS_HINT) < 0) 825 | OOM(); 826 | 827 | if (read_nid_file(nid_file_path) < 0) 828 | FATAL("cannot read NID file `%s'\n", nid_file_path); 829 | 830 | if (job_mapper_init(EV_DEFAULT_ &mapper, job_mapper_cmd) < 0) 831 | FATAL("cannot start job mapper `%s'\n", job_mapper_cmd); 832 | 833 | /* Begin curses magic. */ 834 | /* setlocale(LC_ALL, ""); */ 835 | initscr(); 836 | cbreak(); 837 | noecho(); 838 | nonl(); 839 | intrflush(stdscr, 0); 840 | keypad(stdscr, 1); 841 | nodelay(stdscr, 1); 842 | 843 | struct addrinfo *info, *list, hints = { 844 | .ai_family = AF_INET, /* Still needed. */ 845 | .ai_socktype = SOCK_STREAM, 846 | .ai_flags = AI_PASSIVE, /* Ignored if bind_host != NULL. */ 847 | }; 848 | 849 | int gai_rc = getaddrinfo(bind_host, bind_port, &hints, &list); 850 | if (gai_rc != 0) 851 | FATAL("cannot resolve host `%s', service `%s': %s\n", 852 | bind_host, bind_port, gai_strerror(gai_rc)); 853 | 854 | int lfd = -1; 855 | for (info = list; info != NULL; info = info->ai_next) { 856 | lfd = socket(info->ai_family, info->ai_socktype, info->ai_protocol); 857 | if (lfd < 0) 858 | continue; 859 | 860 | if (bind(lfd, info->ai_addr, info->ai_addrlen) == 0) 861 | break; 862 | 863 | close(lfd); 864 | lfd = -1; 865 | } 866 | freeaddrinfo(info); 867 | 868 | if (lfd < 0) 869 | FATAL("cannot bind to host `%s', service `%s': %m\n", bind_host, bind_port); 870 | 871 | fd_set_nonblock(lfd); /* SOCK_NONBLOCK */ 872 | 873 | if (listen(lfd, listen_backlog) < 0) 874 | FATAL("cannot listen on `%s', service `%s': %m\n", bind_host, bind_port); 875 | 876 | struct ev_io listen_w; 877 | ev_io_init(&listen_w, &listen_cb, lfd, EV_READ); 878 | ev_io_start(EV_DEFAULT_ &listen_w); 879 | 880 | struct ev_io stdin_w; 881 | ev_io_init(&stdin_w, &stdin_cb, 0, EV_READ); 882 | ev_io_start(EV_DEFAULT_ &stdin_w); 883 | 884 | struct ev_timer refresh_w; 885 | ev_timer_init(&refresh_w, &refresh_cb, 0, REFRESH_INTERVAL); 886 | ev_timer_start(EV_DEFAULT_ &refresh_w); 887 | 888 | struct ev_signal sigint_w; 889 | ev_signal_init(&sigint_w, &sigint_cb, SIGINT); 890 | ev_signal_start(EV_DEFAULT_ &sigint_w); 891 | 892 | struct ev_signal sigwinch_w; 893 | ev_signal_init(&sigwinch_w, &sigwinch_cb, SIGWINCH); 894 | ev_signal_start(EV_DEFAULT_ &sigwinch_w); 895 | 896 | ev_run(EV_DEFAULT_ 0); 897 | 898 | if (mapper.jm_pid > 0 && killpg(mapper.jm_pid, SIGTERM) < 0) 899 | ERROR("cannot kill job mapper `%s', pid %d: %m\n", 900 | mapper.jm_cmd, mapper.jm_pid); 901 | 902 | if (lfd > 0) 903 | shutdown(lfd, SHUT_RDWR); 904 | 905 | /* ... */ 906 | 907 | endwin(); /* TODO Call on OOM(). */ 908 | 909 | return 0; 910 | } 911 | -------------------------------------------------------------------------------- /lltop.h: -------------------------------------------------------------------------------- 1 | #ifndef _LLTOP_H_ 2 | #define _LLTOP_H_ 3 | #define _GNU_SOURCE 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define MAXNAME 1024 11 | #define DEFAULT_LLTOP_INTVL 10 12 | 13 | #ifdef DEBUG 14 | #include 15 | #define ERROR(fmt,arg...) do { \ 16 | struct timeval _tv; \ 17 | gettimeofday(&_tv, NULL); \ 18 | fprintf(stderr, "%s:%s:%d:%s:%ld.%06ld: "fmt, program_invocation_short_name, \ 19 | __FILE__, __LINE__, __func__, _tv.tv_sec, (long) _tv.tv_usec, ##arg); \ 20 | } while (0) 21 | #define TRACE ERROR 22 | #else 23 | #define ERROR(fmt,arg...) \ 24 | fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##arg) 25 | #define TRACE(fmt,arg...) ((void) 0) 26 | #endif 27 | 28 | #define FATAL(fmt,arg...) do { \ 29 | ERROR(fmt, ##arg); \ 30 | exit(1); \ 31 | } while (0) 32 | 33 | static inline char *chop(char *s, int c) 34 | { 35 | char *p = strchr(s, c); 36 | if (p != NULL) 37 | *p = 0; 38 | return s; 39 | } 40 | 41 | static inline void *alloc(size_t size) 42 | { 43 | void *addr = malloc(size); 44 | 45 | if (size != 0 && addr == NULL) 46 | FATAL("out of memory\n"); 47 | 48 | return addr; 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* lltop main.c 2 | * Copyright 2010 by John L. Hammond 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License as 6 | * published by the Free Software Foundation; either version 2 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 | * 02110-1301 USA. 18 | */ 19 | #define _GNU_SOURCE 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "lltop.h" 29 | #include "hooks.h" 30 | #include "rbtree.h" 31 | 32 | struct name_stats { 33 | struct rb_node ns_node; 34 | long ns_wr, ns_rd, ns_reqs; 35 | char ns_name[]; 36 | }; 37 | 38 | struct cache_struct { 39 | struct rb_node c_node; 40 | struct name_stats *c_stats; 41 | char c_name[]; 42 | }; 43 | 44 | struct rb_root addr_cache_root = RB_ROOT; 45 | struct rb_root host_cache_root = RB_ROOT; 46 | struct rb_root name_stats_root = RB_ROOT; 47 | int name_stats_count = 0; 48 | 49 | static struct cache_struct *lookup(struct rb_root *root, const char *name, int create) 50 | { 51 | struct cache_struct *cache; 52 | struct rb_node **link, *parent; 53 | 54 | link = &root->rb_node; 55 | parent = NULL; 56 | 57 | while (*link != NULL) { 58 | cache = rb_entry(*link, struct cache_struct, c_node); 59 | parent = *link; 60 | 61 | int cmp = strcmp(name, cache->c_name); 62 | if (cmp < 0) 63 | link = &((*link)->rb_left); 64 | else if (cmp > 0) 65 | link = &((*link)->rb_right); 66 | else 67 | return cache; 68 | } 69 | 70 | if (!create) 71 | return NULL; 72 | 73 | cache = alloc(sizeof(*cache) + strlen(name) + 1); 74 | memset(cache, 0, sizeof(*cache)); 75 | rb_link_node(&cache->c_node, parent, link); 76 | rb_insert_color(&cache->c_node, root); 77 | strcpy(cache->c_name, name); 78 | return cache; 79 | } 80 | 81 | static struct name_stats *get_name_stats(const char *name) 82 | { 83 | struct name_stats *stats; 84 | struct rb_node **link, *parent; 85 | 86 | link = &name_stats_root.rb_node; 87 | parent = NULL; 88 | 89 | while (*link != NULL) { 90 | stats = rb_entry(*link, struct name_stats, ns_node); 91 | parent = *link; 92 | 93 | int cmp = strcmp(name, stats->ns_name); 94 | if (cmp < 0) 95 | link = &((*link)->rb_left); 96 | else if (cmp > 0) 97 | link = &((*link)->rb_right); 98 | else 99 | return stats; 100 | } 101 | 102 | stats = alloc(sizeof(*stats) + strlen(name) + 1); 103 | memset(stats, 0, sizeof(*stats)); 104 | rb_link_node(&stats->ns_node, parent, link); 105 | rb_insert_color(&stats->ns_node, &name_stats_root); 106 | strcpy(stats->ns_name, name); 107 | name_stats_count++; 108 | 109 | return stats; 110 | } 111 | 112 | void lltop_set_job(const char *host, const char *job) 113 | { 114 | struct cache_struct *cache; 115 | 116 | cache = lookup(&host_cache_root, host, 1); 117 | cache->c_stats = get_name_stats(job); 118 | } 119 | 120 | static void account(const char *addr, long wr, long rd, long reqs) 121 | { 122 | struct name_stats *stats = NULL; 123 | struct cache_struct *addr_cache; 124 | struct cache_struct *host_cache; 125 | char host[MAXNAME + 1]; 126 | char job[MAXNAME + 1]; 127 | 128 | addr_cache = lookup(&addr_cache_root, addr, 1); 129 | if (addr_cache->c_stats != NULL) { 130 | stats = addr_cache->c_stats; 131 | goto have_stats; 132 | } 133 | 134 | if (lltop_get_host == NULL || (*lltop_get_host)(addr, host, sizeof(host)) < 0) { 135 | stats = get_name_stats(addr); 136 | goto have_stats; 137 | } 138 | 139 | host_cache = lookup(&host_cache_root, host, 0); 140 | if (host_cache != NULL) { 141 | stats = host_cache->c_stats; 142 | goto have_stats; 143 | } 144 | 145 | if (lltop_get_job == NULL || (*lltop_get_job)(host, job, sizeof(job)) < 0) { 146 | stats = get_name_stats(host); 147 | goto have_stats; 148 | } 149 | 150 | stats = get_name_stats(job); 151 | 152 | have_stats: 153 | addr_cache->c_stats = stats; 154 | stats->ns_wr += wr; 155 | stats->ns_rd += rd; 156 | stats->ns_reqs += reqs; 157 | } 158 | 159 | static int name_stats_cmp(const struct name_stats **s1, const struct name_stats **s2) 160 | { 161 | /* Sort descending by writes, then reads, then requests. */ 162 | /* TODO Make sort rank configurable. */ 163 | long wr = (*s1)->ns_wr - (*s2)->ns_wr; 164 | if (wr != 0) 165 | return wr > 0 ? -1 : 1; 166 | 167 | long rd = (*s1)->ns_rd - (*s2)->ns_rd; 168 | if (rd != 0) 169 | return rd > 0 ? -1 : 1; 170 | 171 | long reqs = (*s1)->ns_reqs - (*s2)->ns_reqs; 172 | if (reqs != 0) 173 | return reqs > 0 ? -1 : 1; 174 | 175 | return 0; 176 | } 177 | 178 | int main(int argc, char *argv[]) 179 | { 180 | char **serv_list = NULL; 181 | int serv_count = 0; 182 | if (lltop_config(argc, argv, &serv_list, &serv_count) < 0) 183 | FATAL("lltop_config() failed\n"); 184 | 185 | char intvl_arg[80]; 186 | snprintf(intvl_arg, sizeof(intvl_arg), "--interval=%d", lltop_intvl); 187 | 188 | close(0); 189 | open("/dev/null", O_RDONLY); 190 | 191 | int fdv[2]; 192 | if (pipe(fdv) < 0) 193 | FATAL("cannot create pipe for lltop-serv subprocesses: %m\n"); 194 | 195 | TRACE("starting lltop-serv subprocesses\n"); 196 | 197 | int i; 198 | for (i = 0; i < serv_count; i++) { 199 | pid_t pid = fork(); 200 | if (pid < 0) { 201 | FATAL("cannot fork: %m\n"); 202 | } else if (pid == 0) { 203 | /* Close read end of pipe, redirect stdout to write end. */ 204 | close(fdv[0]); 205 | dup2(fdv[1], 1); 206 | close(fdv[1]); 207 | execl(lltop_ssh_path, lltop_ssh_path, serv_list[i], 208 | lltop_serv_path, intvl_arg, (char*) NULL); 209 | FATAL("cannot exec '%s': %m\n", lltop_ssh_path); 210 | } 211 | } 212 | lltop_free_serv_list(serv_list, serv_count); 213 | close(fdv[1]); 214 | 215 | if (lltop_job_map != NULL && (*lltop_job_map)() < 0) 216 | FATAL("cannot get job map: %m\n"); 217 | 218 | TRACE("reading lltop-serv output\n"); 219 | 220 | FILE *stats_pipe = fdopen(fdv[0], "r"); 221 | if (stats_pipe == NULL) 222 | FATAL("cannot create pipe: %m\n"); 223 | 224 | char *line = NULL; 225 | size_t line_size = 0; 226 | int line_count = 0; 227 | 228 | while (getline(&line, &line_size, stats_pipe) >= 0) { 229 | if (line_count++ == 0) 230 | TRACE("got first line from lltop-serv\n"); 231 | 232 | #if MAXNAME != 1024 233 | #error MAXNAME != 1024 may break sscanf(). 234 | #endif 235 | char addr[MAXNAME + 1]; 236 | long wr, rd, reqs; 237 | /* lltop-serv output is @ . */ 238 | if (sscanf(line, "%1024s %ld %ld %ld", addr, &wr, &rd, &reqs) != 4) { 239 | ERROR("invalid line \"%s\"\n", chop(line, '\n')); 240 | continue; 241 | } 242 | 243 | /* Chop off '@' and account. */ 244 | account(chop(addr, '@'), wr, rd, reqs); 245 | } 246 | free(line); 247 | 248 | TRACE("read %d lines from lltop-serv\n", line_count); 249 | 250 | if (ferror(stats_pipe)) 251 | ERROR("error reading from pipe: %m\n"); 252 | 253 | if (fclose(stats_pipe) < 0) 254 | ERROR("error closing pipe: %m\n"); 255 | 256 | TRACE("sorting and printing stats\n"); 257 | 258 | /* OK, done reading, now sort and print. */ 259 | struct name_stats **stats_vec; 260 | stats_vec = alloc(name_stats_count * sizeof(struct name_stats*)); 261 | 262 | i = 0; 263 | struct rb_node *node; 264 | for (node = rb_first(&name_stats_root); node != NULL; node = rb_next(node)) 265 | stats_vec[i++] = rb_entry(node, struct name_stats, ns_node); 266 | 267 | qsort(stats_vec, name_stats_count, sizeof(struct name_stats*), 268 | (int (*)(const void*, const void*)) &name_stats_cmp); 269 | 270 | lltop_print_header(stdout); 271 | 272 | for (i = 0; i < name_stats_count; i++) { 273 | struct name_stats *s = stats_vec[i]; 274 | lltop_print_name_stats(stdout, s->ns_name, s->ns_wr, s->ns_rd, s->ns_reqs); 275 | } 276 | 277 | /* Cleanup is somewhat pointless since we're exiting right away. */ 278 | #ifdef DEBUG 279 | rb_destroy(&addr_cache_root, offsetof(struct cache_struct, c_node), &free); 280 | rb_destroy(&host_cache_root, offsetof(struct cache_struct, c_node), &free); 281 | rb_destroy(&name_stats_root, offsetof(struct name_stats, ns_node), &free); 282 | free(stats_vec); 283 | #endif 284 | 285 | return 0; 286 | } 287 | -------------------------------------------------------------------------------- /qhost_job_map: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PATH=$PATH:XXX 4 | export SGE_CELL=default 5 | export SGE_EXECD_PORT=537 6 | export SGE_QMASTER_PORT=536 7 | export SGE_ROOT=XXX 8 | export SGE_CLUSTER_NAME=XXX 9 | 10 | # Translates qconf -j's busted output to sane ' ' 11 | # form. Not thoroughly tested, but works for me. 12 | 13 | qhost -j | awk '{ 14 | if ($0 ~ /^[[:alpha:]]/) { 15 | current_host = $1; 16 | need_job = 1; 17 | } else if (need_job && $1 ~ /^[0-9]+$/) { 18 | print current_host, $1; 19 | need_job = 0; 20 | } 21 | }' 22 | -------------------------------------------------------------------------------- /rbtree.c: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | (C) 2002 David Woodhouse 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program; if not, write to the Free Software 18 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 | 20 | linux/lib/rbtree.c 21 | */ 22 | #include "rbtree.h" 23 | 24 | static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) 25 | { 26 | struct rb_node *right = node->rb_right; 27 | struct rb_node *parent = rb_parent(node); 28 | 29 | if ((node->rb_right = right->rb_left)) 30 | rb_set_parent(right->rb_left, node); 31 | right->rb_left = node; 32 | 33 | rb_set_parent(right, parent); 34 | 35 | if (parent) 36 | { 37 | if (node == parent->rb_left) 38 | parent->rb_left = right; 39 | else 40 | parent->rb_right = right; 41 | } 42 | else 43 | root->rb_node = right; 44 | rb_set_parent(node, right); 45 | } 46 | 47 | static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) 48 | { 49 | struct rb_node *left = node->rb_left; 50 | struct rb_node *parent = rb_parent(node); 51 | 52 | if ((node->rb_left = left->rb_right)) 53 | rb_set_parent(left->rb_right, node); 54 | left->rb_right = node; 55 | 56 | rb_set_parent(left, parent); 57 | 58 | if (parent) 59 | { 60 | if (node == parent->rb_right) 61 | parent->rb_right = left; 62 | else 63 | parent->rb_left = left; 64 | } 65 | else 66 | root->rb_node = left; 67 | rb_set_parent(node, left); 68 | } 69 | 70 | void rb_insert_color(struct rb_node *node, struct rb_root *root) 71 | { 72 | struct rb_node *parent, *gparent; 73 | 74 | while ((parent = rb_parent(node)) && rb_is_red(parent)) 75 | { 76 | gparent = rb_parent(parent); 77 | 78 | if (parent == gparent->rb_left) 79 | { 80 | { 81 | register struct rb_node *uncle = gparent->rb_right; 82 | if (uncle && rb_is_red(uncle)) 83 | { 84 | rb_set_black(uncle); 85 | rb_set_black(parent); 86 | rb_set_red(gparent); 87 | node = gparent; 88 | continue; 89 | } 90 | } 91 | 92 | if (parent->rb_right == node) 93 | { 94 | register struct rb_node *tmp; 95 | __rb_rotate_left(parent, root); 96 | tmp = parent; 97 | parent = node; 98 | node = tmp; 99 | } 100 | 101 | rb_set_black(parent); 102 | rb_set_red(gparent); 103 | __rb_rotate_right(gparent, root); 104 | } else { 105 | { 106 | register struct rb_node *uncle = gparent->rb_left; 107 | if (uncle && rb_is_red(uncle)) 108 | { 109 | rb_set_black(uncle); 110 | rb_set_black(parent); 111 | rb_set_red(gparent); 112 | node = gparent; 113 | continue; 114 | } 115 | } 116 | 117 | if (parent->rb_left == node) 118 | { 119 | register struct rb_node *tmp; 120 | __rb_rotate_right(parent, root); 121 | tmp = parent; 122 | parent = node; 123 | node = tmp; 124 | } 125 | 126 | rb_set_black(parent); 127 | rb_set_red(gparent); 128 | __rb_rotate_left(gparent, root); 129 | } 130 | } 131 | 132 | rb_set_black(root->rb_node); 133 | } 134 | 135 | static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, 136 | struct rb_root *root) 137 | { 138 | struct rb_node *other; 139 | 140 | while ((!node || rb_is_black(node)) && node != root->rb_node) 141 | { 142 | if (parent->rb_left == node) 143 | { 144 | other = parent->rb_right; 145 | if (rb_is_red(other)) 146 | { 147 | rb_set_black(other); 148 | rb_set_red(parent); 149 | __rb_rotate_left(parent, root); 150 | other = parent->rb_right; 151 | } 152 | if ((!other->rb_left || rb_is_black(other->rb_left)) && 153 | (!other->rb_right || rb_is_black(other->rb_right))) 154 | { 155 | rb_set_red(other); 156 | node = parent; 157 | parent = rb_parent(node); 158 | } 159 | else 160 | { 161 | if (!other->rb_right || rb_is_black(other->rb_right)) 162 | { 163 | struct rb_node *o_left; 164 | if ((o_left = other->rb_left)) 165 | rb_set_black(o_left); 166 | rb_set_red(other); 167 | __rb_rotate_right(other, root); 168 | other = parent->rb_right; 169 | } 170 | rb_set_color(other, rb_color(parent)); 171 | rb_set_black(parent); 172 | if (other->rb_right) 173 | rb_set_black(other->rb_right); 174 | __rb_rotate_left(parent, root); 175 | node = root->rb_node; 176 | break; 177 | } 178 | } 179 | else 180 | { 181 | other = parent->rb_left; 182 | if (rb_is_red(other)) 183 | { 184 | rb_set_black(other); 185 | rb_set_red(parent); 186 | __rb_rotate_right(parent, root); 187 | other = parent->rb_left; 188 | } 189 | if ((!other->rb_left || rb_is_black(other->rb_left)) && 190 | (!other->rb_right || rb_is_black(other->rb_right))) 191 | { 192 | rb_set_red(other); 193 | node = parent; 194 | parent = rb_parent(node); 195 | } 196 | else 197 | { 198 | if (!other->rb_left || rb_is_black(other->rb_left)) 199 | { 200 | register struct rb_node *o_right; 201 | if ((o_right = other->rb_right)) 202 | rb_set_black(o_right); 203 | rb_set_red(other); 204 | __rb_rotate_left(other, root); 205 | other = parent->rb_left; 206 | } 207 | rb_set_color(other, rb_color(parent)); 208 | rb_set_black(parent); 209 | if (other->rb_left) 210 | rb_set_black(other->rb_left); 211 | __rb_rotate_right(parent, root); 212 | node = root->rb_node; 213 | break; 214 | } 215 | } 216 | } 217 | if (node) 218 | rb_set_black(node); 219 | } 220 | 221 | void rb_erase(struct rb_node *node, struct rb_root *root) 222 | { 223 | struct rb_node *child, *parent; 224 | int color; 225 | 226 | if (!node->rb_left) 227 | child = node->rb_right; 228 | else if (!node->rb_right) 229 | child = node->rb_left; 230 | else { 231 | struct rb_node *old = node, *left; 232 | 233 | node = node->rb_right; 234 | while ((left = node->rb_left) != 0) 235 | node = left; 236 | child = node->rb_right; 237 | parent = rb_parent(node); 238 | color = rb_color(node); 239 | 240 | if (child) 241 | rb_set_parent(child, parent); 242 | if (parent == old) { 243 | parent->rb_right = child; 244 | parent = node; 245 | } else 246 | parent->rb_left = child; 247 | 248 | node->rb_parent_color = old->rb_parent_color; 249 | node->rb_right = old->rb_right; 250 | node->rb_left = old->rb_left; 251 | 252 | if (rb_parent(old)) { 253 | if (rb_parent(old)->rb_left == old) 254 | rb_parent(old)->rb_left = node; 255 | else 256 | rb_parent(old)->rb_right = node; 257 | } else 258 | root->rb_node = node; 259 | 260 | rb_set_parent(old->rb_left, node); 261 | if (old->rb_right) 262 | rb_set_parent(old->rb_right, node); 263 | goto color; 264 | } 265 | 266 | parent = rb_parent(node); 267 | color = rb_color(node); 268 | 269 | if (child) 270 | rb_set_parent(child, parent); 271 | 272 | if (parent) { 273 | if (parent->rb_left == node) 274 | parent->rb_left = child; 275 | else 276 | parent->rb_right = child; 277 | } else 278 | root->rb_node = child; 279 | 280 | color: 281 | if (color == RB_BLACK) 282 | __rb_erase_color(child, parent, root); 283 | } 284 | 285 | /* 286 | * This function returns the first node (in sort order) of the tree. 287 | */ 288 | struct rb_node *rb_first(struct rb_root *root) 289 | { 290 | struct rb_node *n; 291 | 292 | n = root->rb_node; 293 | if (!n) 294 | return 0; 295 | while (n->rb_left) 296 | n = n->rb_left; 297 | return n; 298 | } 299 | 300 | struct rb_node *rb_last(struct rb_root *root) 301 | { 302 | struct rb_node *n; 303 | 304 | n = root->rb_node; 305 | if (!n) 306 | return 0; 307 | while (n->rb_right) 308 | n = n->rb_right; 309 | return n; 310 | } 311 | 312 | struct rb_node *rb_next(struct rb_node *node) 313 | { 314 | struct rb_node *parent; 315 | 316 | /* If we have a right-hand child, go down and then left as far 317 | as we can. */ 318 | if (node->rb_right) { 319 | node = node->rb_right; 320 | while (node->rb_left) 321 | node=node->rb_left; 322 | return node; 323 | } 324 | 325 | /* No right-hand children. Everything down and left is 326 | smaller than us, so any 'next' node must be in the general 327 | direction of our parent. Go up the tree; any time the 328 | ancestor is a right-hand child of its parent, keep going 329 | up. First time it's a left-hand child of its parent, said 330 | parent is our 'next' node. */ 331 | while ((parent = rb_parent(node)) && node == parent->rb_right) 332 | node = parent; 333 | 334 | return parent; 335 | } 336 | 337 | struct rb_node *rb_prev(struct rb_node *node) 338 | { 339 | struct rb_node *parent; 340 | 341 | /* If we have a left-hand child, go down and then right as far 342 | as we can. */ 343 | if (node->rb_left) { 344 | node = node->rb_left; 345 | while (node->rb_right) 346 | node=node->rb_right; 347 | return node; 348 | } 349 | 350 | /* No left-hand children. Go up till we find an ancestor which 351 | is a right-hand child of its parent */ 352 | while ((parent = rb_parent(node)) && node == parent->rb_left) 353 | node = parent; 354 | 355 | return parent; 356 | } 357 | 358 | void rb_replace_node(struct rb_node *victim, struct rb_node *new, 359 | struct rb_root *root) 360 | { 361 | struct rb_node *parent = rb_parent(victim); 362 | 363 | /* Set the surrounding nodes to point to the replacement */ 364 | if (parent) { 365 | if (victim == parent->rb_left) 366 | parent->rb_left = new; 367 | else 368 | parent->rb_right = new; 369 | } else { 370 | root->rb_node = new; 371 | } 372 | if (victim->rb_left) 373 | rb_set_parent(victim->rb_left, new); 374 | if (victim->rb_right) 375 | rb_set_parent(victim->rb_right, new); 376 | 377 | /* Copy the pointers/colour from the victim to the replacement */ 378 | *new = *victim; 379 | } 380 | -------------------------------------------------------------------------------- /rbtree.h: -------------------------------------------------------------------------------- 1 | #ifndef _LINUX_RBTREE_H 2 | #define _LINUX_RBTREE_H 3 | /* 4 | Red Black Trees 5 | (C) 1999 Andrea Arcangeli 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program; if not, write to the Free Software 19 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | 21 | linux/include/linux/rbtree.h 22 | 23 | To use rbtrees you'll have to implement your own insert and search cores. 24 | This will avoid us to use callbacks and to drop drammatically performances. 25 | I know it's not the cleaner way, but in C (not in C++) to get 26 | performances and genericity... 27 | 28 | Some example of insert and search follows here. The search is a plain 29 | normal search over an ordered tree. The insert instead must be implemented 30 | int two steps: as first thing the code must insert the element in 31 | order as a red leaf in the tree, then the support library function 32 | rb_insert_color() must be called. Such function will do the 33 | not trivial work to rebalance the rbtree if necessary. 34 | 35 | ----------------------------------------------------------------------- 36 | static inline struct page * rb_search_page_cache(struct inode * inode, 37 | unsigned long offset) 38 | { 39 | struct rb_node * n = inode->i_rb_page_cache.rb_node; 40 | struct page * page; 41 | 42 | while (n) 43 | { 44 | page = rb_entry(n, struct page, rb_page_cache); 45 | 46 | if (offset < page->offset) 47 | n = n->rb_left; 48 | else if (offset > page->offset) 49 | n = n->rb_right; 50 | else 51 | return page; 52 | } 53 | return 0; 54 | } 55 | 56 | static inline struct page * __rb_insert_page_cache(struct inode * inode, 57 | unsigned long offset, 58 | struct rb_node * node) 59 | { 60 | struct rb_node ** p = &inode->i_rb_page_cache.rb_node; 61 | struct rb_node * parent = 0; 62 | struct page * page; 63 | 64 | while (*p) 65 | { 66 | parent = *p; 67 | page = rb_entry(parent, struct page, rb_page_cache); 68 | 69 | if (offset < page->offset) 70 | p = &(*p)->rb_left; 71 | else if (offset > page->offset) 72 | p = &(*p)->rb_right; 73 | else 74 | return page; 75 | } 76 | 77 | rb_link_node(node, parent, p); 78 | 79 | return 0; 80 | } 81 | 82 | static inline struct page * rb_insert_page_cache(struct inode * inode, 83 | unsigned long offset, 84 | struct rb_node * node) 85 | { 86 | struct page * ret; 87 | if ((ret = __rb_insert_page_cache(inode, offset, node))) 88 | goto out; 89 | rb_insert_color(node, &inode->i_rb_page_cache); 90 | out: 91 | return ret; 92 | } 93 | ----------------------------------------------------------------------- 94 | */ 95 | /* Small mofifications to compile out of kernel. Added rb_destroy(). 96 | Nov 2010, John L. Hammond, . */ 97 | #include /* For size_t. */ 98 | 99 | struct rb_node { 100 | unsigned long rb_parent_color; 101 | #define RB_RED 0 102 | #define RB_BLACK 1 103 | struct rb_node *rb_right; 104 | struct rb_node *rb_left; 105 | } __attribute__((aligned(sizeof(long)))); 106 | /* The alignment might seem pointless, but allegedly CRIS needs it */ 107 | 108 | struct rb_root { 109 | struct rb_node *rb_node; 110 | }; 111 | 112 | /* rb_entry is just container_of */ 113 | #define rb_entry(ptr, type, member) ({ \ 114 | const typeof(((type*) 0)->member) *__m = (ptr); \ 115 | (type*) ((char*) __m - offsetof(type, member)); \ 116 | }) 117 | 118 | #define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) 119 | #define rb_color(r) ((r)->rb_parent_color & 1) 120 | #define rb_is_red(r) (!rb_color(r)) 121 | #define rb_is_black(r) rb_color(r) 122 | #define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) 123 | #define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) 124 | 125 | static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) 126 | { 127 | rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; 128 | } 129 | static inline void rb_set_color(struct rb_node *rb, int color) 130 | { 131 | rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; 132 | } 133 | 134 | #define RB_ROOT ((struct rb_root) { .rb_node = 0 }) 135 | 136 | #define RB_EMPTY_ROOT(root) ((root)->rb_node == 0) 137 | #define RB_EMPTY_NODE(node) (rb_parent(node) != node) 138 | #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) 139 | 140 | extern void rb_insert_color(struct rb_node *, struct rb_root *); 141 | extern void rb_erase(struct rb_node *, struct rb_root *); 142 | 143 | /* Find logical next and previous nodes in a tree */ 144 | extern struct rb_node *rb_next(struct rb_node *); 145 | extern struct rb_node *rb_prev(struct rb_node *); 146 | extern struct rb_node *rb_first(struct rb_root *); 147 | extern struct rb_node *rb_last(struct rb_root *); 148 | 149 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ 150 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 151 | struct rb_root *root); 152 | 153 | static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, 154 | struct rb_node ** rb_link) 155 | { 156 | node->rb_parent_color = (unsigned long) parent; 157 | node->rb_left = node->rb_right = 0; 158 | 159 | *rb_link = node; 160 | } 161 | 162 | static inline void rb_destroy(struct rb_root *root, size_t offset, void (*dtor)(void*)) 163 | { 164 | struct rb_node *node = root->rb_node; 165 | *root = RB_ROOT; 166 | 167 | while (node != 0) { 168 | if (node->rb_left != 0) { 169 | struct rb_node *left_child = node->rb_left; 170 | node->rb_left = 0; 171 | node = left_child; 172 | } else if (node->rb_right != 0) { 173 | struct rb_node *right_child = node->rb_right; 174 | node->rb_right = 0; 175 | node = right_child; 176 | } else { 177 | void *container = ((char*) node) - offset; 178 | node = rb_parent(node); 179 | if (dtor != 0) 180 | (*dtor)(container); 181 | } 182 | } 183 | } 184 | 185 | #endif /* _LINUX_RBTREE_H */ 186 | -------------------------------------------------------------------------------- /serv-cts.c: -------------------------------------------------------------------------------- 1 | /* lltop serv-cts.c 2 | * Copyright 2010 by John L. Hammond 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License as 6 | * published by the Free Software Foundation; either version 2 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 | * 02110-1301 USA. 18 | */ 19 | /* TODO Error messages should include hostname. */ 20 | #define _GNU_SOURCE 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "string1.h" 36 | #include "lltop.h" 37 | #include "dict.h" 38 | 39 | #define LLTOP_MSG_MAX 64000 /* UDP max minus stuff minus some other stuff. */ 40 | #define LLTOP_PORT "9907" 41 | #define NR_CLIENTS_HINT 4096 /* Initial dict size. */ 42 | 43 | struct dict name_stats_dict; 44 | 45 | #define NS_WR 0 /* MOVEME. */ 46 | #define NS_RD 1 47 | #define NS_REQS 2 48 | 49 | struct msg_buf { 50 | char *mb_buf; 51 | size_t mb_len, mb_size; 52 | int mb_fd; 53 | }; 54 | 55 | int msg_buf_init(struct msg_buf *mb, int fd, char *buf, size_t size) 56 | { 57 | mb->mb_fd = fd; 58 | mb->mb_len = 0; 59 | mb->mb_size = size; 60 | mb->mb_buf = buf; 61 | return 0; 62 | } 63 | 64 | int msg_buf_send(struct msg_buf *mb, const char *name, long wr, long rd, long reqs) 65 | { 66 | size_t avail, need; 67 | 68 | again: 69 | avail = mb->mb_size - mb->mb_len; 70 | need = snprintf(mb->mb_buf + mb->mb_len, avail, "%s %ld %ld %ld\n", name, wr, rd, reqs); 71 | 72 | if (need >= avail) { 73 | if (mb->mb_len == 0) { 74 | errno = ENAMETOOLONG; 75 | return -1; 76 | } 77 | 78 | if (send(mb->mb_fd, mb->mb_buf, mb->mb_len, 0) < 0) 79 | return -1; 80 | 81 | mb->mb_len = 0; 82 | goto again; 83 | } 84 | 85 | mb->mb_len += need; 86 | 87 | return 0; 88 | } 89 | 90 | int msg_buf_flush(struct msg_buf *mb) 91 | { 92 | if (mb->mb_len > 0) { 93 | if (send(mb->mb_fd, mb->mb_buf, mb->mb_len, 0) < 0) 94 | return -1; 95 | 96 | mb->mb_len = 0; 97 | } 98 | 99 | return 0; 100 | } 101 | 102 | struct name_stats { 103 | long ns_stats[2][3]; 104 | unsigned int ns_gen; 105 | char ns_name[]; 106 | }; 107 | 108 | static inline struct name_stats *key_ns(char *key) 109 | { 110 | /* As an exercise to the reader, figure out why offsetof() doesn't 111 | work here. Or does it? */ 112 | 113 | size_t ns_name_offset = ((struct name_stats *) NULL)->ns_name - (char *) NULL; 114 | return (struct name_stats *) (key - ns_name_offset); 115 | } 116 | 117 | struct lustre_target { 118 | char *name; 119 | char *export_dir_path; 120 | }; 121 | 122 | size_t nr_targets = 0; 123 | struct lustre_target *target_list = NULL; 124 | 125 | int de_is_subdir(const struct dirent *de) 126 | { 127 | return de->d_type == DT_DIR && de->d_name[0] != '.'; 128 | } 129 | 130 | int get_target_list(struct lustre_target **list, size_t *nr, const char *dir_path) 131 | { 132 | struct dirent **de = NULL; 133 | int i, j, new_nr, nr_de = 0, rc = -1; 134 | struct lustre_target *new_list = NULL; 135 | 136 | nr_de = scandir(dir_path, &de, &de_is_subdir, &alphasort); 137 | if (nr_de < 0) { 138 | ERROR("cannot scan `%s': %m\n", dir_path); 139 | goto out; 140 | } 141 | 142 | new_nr = *nr + nr_de; 143 | new_list = realloc(*list, new_nr * sizeof(*list[0])); 144 | if (new_list == NULL) { 145 | ERROR("cannot allocate target list: %m\n"); 146 | goto out; 147 | } 148 | *list = new_list; 149 | 150 | for (i = *nr, j = 0; i < new_nr; i++, j++) { 151 | (*list)[i].name = strdup(de[j]->d_name); 152 | /* XXX */ 153 | (*list)[i].export_dir_path = strf("%s/%s/exports", dir_path, de[j]->d_name); 154 | /* XXX */ 155 | } 156 | 157 | *nr = new_nr; 158 | rc = 0; 159 | 160 | out: 161 | for (j = 0; j < nr_de; j++) 162 | free(de[j]); 163 | free(de); 164 | 165 | return rc; 166 | } 167 | 168 | int read_client_stats(const char *cli_name, unsigned int gen) 169 | { 170 | char stats_path[80]; 171 | FILE *stats_file = NULL; 172 | char stats_file_buf[BUFSIZ]; 173 | char *line = NULL; 174 | size_t line_size = 0; 175 | long wr = 0, rd = 0, reqs = 0; 176 | 177 | TRACE("cli_name %s, gen %d\n", cli_name, gen); 178 | 179 | snprintf(stats_path, sizeof(stats_path), "%s/stats", cli_name); 180 | 181 | stats_file = fopen(stats_path, "r"); 182 | if (stats_file == NULL) { 183 | ERROR("cannot open %s: %m\n", stats_path); 184 | goto out; 185 | } 186 | setvbuf(stats_file, stats_file_buf, _IOFBF, sizeof(stats_file_buf)); 187 | 188 | /* Skip the helpful snapshot_time. */ 189 | getline(&line, &line_size, stats_file); 190 | 191 | while (getline(&line, &line_size, stats_file) >= 0) { 192 | char ctr_name[80]; 193 | long ctr_samples, ctr_sum = 0; 194 | 195 | /* XXX Do we need to check ctr_units? */ 196 | if (sscanf(line, "%79s %ld samples [%*[^]]] %*d %*d %ld", 197 | ctr_name, &ctr_samples, &ctr_sum) < 2) { 198 | ERROR("invalid line \"%s\"\n", chop(line, '\n')); 199 | continue; 200 | } 201 | 202 | if (strcmp(ctr_name, "write_bytes") == 0) { 203 | wr = ctr_sum; 204 | } else if (strcmp(ctr_name, "read_bytes") == 0) { 205 | rd = ctr_sum; 206 | } else if (strcmp(ctr_name, "ping") != 0) { /* Ignore pings. */ 207 | reqs += ctr_samples; 208 | } 209 | } 210 | 211 | /* Look up cli_name. */ 212 | struct name_stats *ns = NULL; 213 | hash_t hash = dict_strhash(cli_name); 214 | struct dict_entry *de = dict_entry_ref(&name_stats_dict, hash, cli_name); 215 | long *s = NULL; 216 | 217 | if (de->d_key != NULL) { 218 | ns = key_ns(de->d_key); 219 | goto have_ns; 220 | } 221 | 222 | ns = alloc(sizeof(*ns) + strlen(cli_name) + 1); 223 | memset(ns, 0, sizeof(*ns)); 224 | ns->ns_gen = gen; 225 | strcpy(ns->ns_name, cli_name); 226 | 227 | if (dict_entry_set(&name_stats_dict, de, hash, ns->ns_name) < 0) 228 | FATAL("dict_entry_set: %m\n"); 229 | 230 | have_ns: 231 | s = ns->ns_stats[gen % 2]; 232 | if (ns->ns_gen != gen) { 233 | memset(s, 0, 3 * sizeof(long)); 234 | ns->ns_gen = gen; 235 | } 236 | 237 | s[NS_WR] += wr; 238 | s[NS_RD] += rd; 239 | s[NS_REQS] += reqs; 240 | 241 | out: 242 | free(line); 243 | if (stats_file != NULL) 244 | fclose(stats_file); 245 | 246 | return 0; 247 | } 248 | 249 | int read_target_stats(struct lustre_target *target, unsigned int gen) 250 | { 251 | DIR *exp_dir = NULL; 252 | TRACE("target %s, gen %d\n", target->name, gen); 253 | 254 | if (chdir(target->export_dir_path) < 0) { 255 | ERROR("cannot chdir to `%s': %m\n", target->export_dir_path); 256 | /* TODO Invalidate target or something. */ 257 | goto out; 258 | } 259 | 260 | exp_dir = opendir("."); 261 | if (exp_dir == NULL) { 262 | ERROR("cannot open `%s': %m\n", target->export_dir_path); 263 | goto out; 264 | } 265 | 266 | struct dirent *de; 267 | while ((de = readdir(exp_dir)) != NULL) { 268 | if (de_is_subdir(de)) 269 | read_client_stats(de->d_name, gen); 270 | } 271 | 272 | out: 273 | if (exp_dir != NULL) 274 | closedir(exp_dir); 275 | 276 | return 0; 277 | } 278 | 279 | int main(int argc, char *argv[]) 280 | { 281 | int daemonize = 0; 282 | int send_all = 0; 283 | int intvl = DEFAULT_LLTOP_INTVL; 284 | char *host_arg = NULL, *port_arg = LLTOP_PORT; 285 | int sfd = -1; 286 | struct msg_buf mb; 287 | char mb_buf[LLTOP_MSG_MAX]; 288 | 289 | struct option opts[] = { 290 | { "send-all", 0, NULL, 'a' }, 291 | { "daemon", 0, NULL, 'd' }, 292 | { "interval", 1, NULL, 'i' }, 293 | { "port", 1, NULL, 'p' }, 294 | { NULL, 0, NULL, 0 }, 295 | }; 296 | 297 | int c; 298 | while ((c = getopt_long(argc, argv, "adi:p:", opts, 0)) > 0) { 299 | switch (c) { 300 | case 'a': 301 | send_all = 1; 302 | continue; 303 | case 'd': 304 | daemonize = 1; 305 | continue; 306 | case 'i': 307 | intvl = atoi(optarg); 308 | if (intvl <= 0) 309 | FATAL("invalid sleep interval `%s'\n", optarg); 310 | continue; 311 | case 'p': 312 | port_arg = optarg; 313 | continue; 314 | case '?': 315 | FATAL("invalid option\n"); 316 | } 317 | } 318 | 319 | if (argc - optind <= 0) { 320 | fprintf(stderr, "Usage: %s [OPTIONS] HOST\n", program_invocation_short_name); 321 | exit(1); 322 | } 323 | host_arg = argv[optind]; 324 | 325 | struct addrinfo hints, *list, *info; 326 | hints = (struct addrinfo) { 327 | .ai_family = AF_INET, /* XXX */ 328 | .ai_socktype = SOCK_DGRAM, 329 | }; 330 | 331 | int gai_rc = getaddrinfo(host_arg, port_arg, &hints, &list); 332 | if (gai_rc != 0) 333 | FATAL("cannot resolve host `%s', service `%s': %s\n", 334 | host_arg, port_arg, gai_strerror(gai_rc)); 335 | 336 | for (info = list; info != 0; info = info->ai_next) { 337 | if ((sfd = socket(info->ai_family, info->ai_socktype, info->ai_protocol)) < 0) 338 | continue; 339 | if (connect(sfd, info->ai_addr, info->ai_addrlen) == 0) 340 | break; 341 | close(sfd); 342 | sfd = -1; 343 | } 344 | 345 | freeaddrinfo(list); 346 | 347 | if (sfd < 0) 348 | FATAL("cannot connect to host `%s', service `%s': %m\n", host_arg, port_arg); 349 | 350 | if (msg_buf_init(&mb, sfd, mb_buf, sizeof(mb_buf)) < 0) 351 | FATAL("cannot create message buffer: %m\n"); 352 | 353 | if (get_target_list(&target_list, &nr_targets, "/proc/fs/lustre/mdt") < 0) 354 | exit(1); 355 | 356 | if (get_target_list(&target_list, &nr_targets, "/proc/fs/lustre/obdfilter") < 0) 357 | exit(1); 358 | 359 | if (nr_targets == 0) 360 | FATAL("no targets found\n"); 361 | 362 | if (dict_init(&name_stats_dict, NR_CLIENTS_HINT) < 0) 363 | FATAL("cannot create client dictionary: %m\n"); 364 | 365 | struct timespec intvl_spec; 366 | if (clock_gettime(CLOCK_MONOTONIC, &intvl_spec) < 0) 367 | FATAL("cannot get current time: %m\n"); 368 | 369 | if (daemonize && daemon(0, 0) < 0) 370 | FATAL("cannot daemonize: %m\n"); 371 | 372 | unsigned int gen; 373 | for (gen = 0; ; gen++) { 374 | int i; 375 | for (i = 0; i < nr_targets; i++) 376 | read_target_stats(&target_list[i], gen); 377 | 378 | if (daemonize) 379 | chdir("/"); 380 | 381 | if (gen == 0) 382 | goto sleep; 383 | 384 | size_t de_iter = 0; 385 | struct dict_entry *de; 386 | while ((de = dict_for_each_ref(&name_stats_dict, &de_iter)) != NULL) { 387 | struct name_stats *ns = key_ns(de->d_key); 388 | long *s0, *s1, wr, rd, reqs; 389 | 390 | if (ns->ns_gen != gen) { 391 | TRACE("stale stats found for client `%s', removing\n", ns->ns_name); 392 | dict_entry_remv(&name_stats_dict, de, 0); 393 | free(ns); 394 | continue; 395 | } 396 | 397 | s0 = ns->ns_stats[(gen - 1) % 2]; 398 | s1 = ns->ns_stats[gen % 2]; 399 | wr = s1[NS_WR] - s0[NS_WR]; 400 | rd = s1[NS_RD] - s0[NS_RD]; 401 | reqs = s1[NS_REQS] - s0[NS_REQS]; 402 | 403 | /* If any stats are negative then we assume that the client was 404 | evicted while we slept, so we skip it. */ 405 | if (!send_all && (wr < 0 || rd < 0 || reqs < 0)) { 406 | TRACE("skipping %s %ld %ld %ld\n", ns->ns_name, wr, rd, reqs); 407 | continue; 408 | } 409 | 410 | /* Skip client if all stats are zero. */ 411 | if (!send_all && wr == 0 && rd == 0 && reqs == 0) { 412 | TRACE("skipping %s %ld %ld %ld\n", ns->ns_name, wr, rd, reqs); 413 | continue; 414 | } 415 | 416 | if (msg_buf_send(&mb, ns->ns_name, wr, rd, reqs) < 0) { 417 | if (errno == ENAMETOOLONG) 418 | ERROR("skipping client `%s': name too long\n", ns->ns_name); 419 | else 420 | FATAL("cannot send to host `%s', service `%s': %m\n", host_arg, port_arg); 421 | } 422 | } 423 | 424 | if (msg_buf_flush(&mb) < 0) 425 | FATAL("cannot send to host `%s', service `%s': %m\n", host_arg, port_arg); 426 | 427 | sleep: 428 | intvl_spec.tv_sec += intvl; 429 | errno = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &intvl_spec, NULL); 430 | if (errno != 0) 431 | FATAL("cannot sleep: %m\n"); 432 | } 433 | } 434 | -------------------------------------------------------------------------------- /serv.c: -------------------------------------------------------------------------------- 1 | /* lltop serv.c 2 | * Copyright 2010 by John L. Hammond 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License as 6 | * published by the Free Software Foundation; either version 2 of the 7 | * License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 | * 02110-1301 USA. 18 | */ 19 | /* TODO Consider scanning /proc/fs/lustre/{mds,obdfilter}//exports//ldlm_stats. */ 20 | /* TODO Error messages should include hostname. */ 21 | #define _GNU_SOURCE 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "lltop.h" 31 | #include "rbtree.h" 32 | 33 | const char *filter_path[2] = { 34 | "/proc/fs/lustre/mds", 35 | "/proc/fs/lustre/obdfilter", 36 | }; 37 | 38 | struct name_stats { 39 | struct rb_node ns_node; 40 | long ns_wr, ns_rd, ns_reqs; 41 | char ns_name[]; 42 | }; 43 | 44 | struct rb_root name_stats_root = RB_ROOT; 45 | 46 | int get_client_stats(const char *cli_name, int which) 47 | { 48 | /* Parameter which is 0 or 1 depending on which pass we are in. If 49 | * which is 0 then subtract wr/rd/reqs from stats, otherwise add. */ 50 | TRACE("cli_name %s, which %d\n", cli_name, which); 51 | 52 | char stats_path[80]; 53 | snprintf(stats_path, sizeof(stats_path), "%s/stats", cli_name); 54 | 55 | FILE* stats_file = fopen(stats_path, "r"); 56 | if (stats_file == NULL) { 57 | ERROR("cannot open %s: %m\n", stats_path); 58 | return -1; 59 | } 60 | 61 | char *line = NULL; 62 | size_t line_size = 0; 63 | 64 | /* Skip first line with its busted snapshot_time. */ 65 | getline(&line, &line_size, stats_file); 66 | 67 | long wr = 0, rd = 0, reqs = 0; 68 | 69 | while (getline(&line, &line_size, stats_file) >= 0) { 70 | char ctr_name[80]; 71 | long ctr_samples, ctr_sum = 0; 72 | 73 | /* XXX Do we need to check ctr_units? */ 74 | if (sscanf(line, "%79s %ld samples [%*[^]]] %*d %*d %ld", 75 | ctr_name, &ctr_samples, &ctr_sum) < 2) { 76 | ERROR("invalid line \"%s\"\n", chop(line, '\n')); 77 | continue; 78 | } 79 | 80 | if (strcmp(ctr_name, "write_bytes") == 0) { 81 | wr = ctr_sum; 82 | } else if (strcmp(ctr_name, "read_bytes") == 0) { 83 | rd = ctr_sum; 84 | } else if (strcmp(ctr_name, "ping") != 0) { /* Ignore pings. */ 85 | reqs += ctr_samples; 86 | } 87 | } 88 | free(line); 89 | fclose(stats_file); 90 | 91 | /* Look up name_stats for cli_name. */ 92 | struct name_stats *stats = NULL; 93 | struct rb_node **link, *parent; 94 | 95 | link = &(name_stats_root.rb_node); 96 | parent = NULL; 97 | 98 | while (*link != NULL) { 99 | stats = rb_entry(*link, struct name_stats, ns_node); 100 | parent = *link; 101 | 102 | int cmp = strcmp(cli_name, stats->ns_name); 103 | if (cmp < 0) { 104 | link = &((*link)->rb_left); 105 | } else if (cmp > 0) { 106 | link = &((*link)->rb_right); 107 | } else { 108 | goto have_stats; 109 | } 110 | } 111 | 112 | /* Create name_stats, link, and initialize. */ 113 | stats = alloc(sizeof(*stats) + strlen(cli_name) + 1); 114 | memset(stats, 0, sizeof(*stats)); 115 | rb_link_node(&stats->ns_node, parent, link); 116 | rb_insert_color(&stats->ns_node, &name_stats_root); 117 | strcpy(stats->ns_name, cli_name); 118 | 119 | have_stats: 120 | stats->ns_wr += which ? wr : -wr; 121 | stats->ns_rd += which ? rd : -rd; 122 | stats->ns_reqs += which ? reqs : -reqs; 123 | return 0; 124 | } 125 | 126 | int get_target_stats(const char *tgt_name, int which) 127 | { 128 | TRACE("tgt_name %s, which %d\n", tgt_name, which); 129 | 130 | char exp_dir_path[80]; 131 | snprintf(exp_dir_path, sizeof(exp_dir_path), "%s/exports", tgt_name); 132 | 133 | if (chdir(exp_dir_path) < 0) { 134 | ERROR("cannot open %s: %m\n", exp_dir_path); 135 | return -1; 136 | } 137 | 138 | DIR *exp_dir = opendir("."); 139 | if (exp_dir == NULL) 140 | FATAL("cannot open %s: %m\n", exp_dir_path); 141 | 142 | struct dirent *ent; 143 | while ((ent = readdir(exp_dir)) != NULL) { 144 | if (ent->d_type == DT_DIR && ent->d_name[0] != '.') 145 | get_client_stats(ent->d_name, which); 146 | } 147 | closedir(exp_dir); 148 | 149 | return 0; 150 | } 151 | 152 | int main(int argc, char *argv[]) 153 | { 154 | int intvl = DEFAULT_LLTOP_INTVL; 155 | struct timespec intvl_spec; 156 | 157 | struct option opts[] = { 158 | { "interval", 1, 0, 'i' }, 159 | { 0, 0, 0, 0}, 160 | }; 161 | 162 | int c; 163 | while ((c = getopt_long(argc, argv, "i:", opts, 0)) > 0) { 164 | switch (c) { 165 | case 'i': 166 | intvl = atoi(optarg); 167 | if (intvl <= 0) 168 | FATAL("invalid sleep interval \"%s\"\n", optarg); 169 | continue; 170 | case '?': 171 | FATAL("invalid option\n"); 172 | } 173 | } 174 | 175 | /* Set stdout line buffered so the lines from different lltop-servs 176 | * don't clobber each other. Can't find a guarantee that ssh won't 177 | * break up writes, but it seems to work. */ 178 | setlinebuf(stdout); 179 | 180 | if (clock_gettime(CLOCK_MONOTONIC, &intvl_spec) < 0) 181 | FATAL("cannot read monotonic clock: %m\n"); 182 | 183 | TRACE("scanning stats files\n"); 184 | 185 | int which, type, found = 0; 186 | for (which = 0; which < 2; which++) { 187 | /* Before we start pass 1, we wait until at least intvl seconds 188 | have elapsed since the start of pass 0. */ 189 | if (which == 1) { 190 | intvl_spec.tv_sec += intvl; 191 | errno = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &intvl_spec, NULL); 192 | if (errno != 0) 193 | FATAL("clock_nanosleep() failed: %m\n"); 194 | } 195 | 196 | for (type = 0; type < 2; type++) { 197 | DIR *dir = opendir(filter_path[type]); 198 | if (dir == NULL) { 199 | if (errno != ENOENT) 200 | FATAL("cannot open %s: %m\n", filter_path[type]); 201 | continue; 202 | } 203 | found++; 204 | 205 | struct dirent *ent; 206 | while ((ent = readdir(dir)) != NULL) { 207 | if (ent->d_type == DT_DIR && ent->d_name[0] != '.') { 208 | chdir(filter_path[type]); 209 | get_target_stats(ent->d_name, which); 210 | } 211 | } 212 | closedir(dir); 213 | } 214 | 215 | /* At the end of pass 0, if neither dir exists then we bail. */ 216 | if (found == 0) { 217 | errno = ENOENT; 218 | FATAL("cannot access %s or %s: %m\n", filter_path[0], filter_path[1]); 219 | } 220 | } 221 | 222 | TRACE("done scanning stats files\n"); 223 | 224 | struct rb_node *node; 225 | for (node = rb_first(&name_stats_root); node != NULL; node = rb_next(node)) { 226 | struct name_stats *s = rb_entry(node, struct name_stats, ns_node); 227 | 228 | /* If any stats are negative then we assume that the client was 229 | evicted while we slept, so we skip it. */ 230 | if (s->ns_wr < 0 || s->ns_rd < 0 || s->ns_reqs < 0) { 231 | TRACE("skipping %s %ld %ld %ld\n", s->ns_name, s->ns_wr, s->ns_rd, s->ns_reqs); 232 | continue; 233 | } 234 | 235 | /* As an optimization, skip this client if all stats are zero. */ 236 | if (s->ns_wr == 0 && s->ns_rd == 0 && s->ns_reqs == 0) { 237 | TRACE("skipping %s %ld %ld %ld\n", s->ns_name, s->ns_wr, s->ns_rd, s->ns_reqs); 238 | continue; 239 | } 240 | 241 | printf("%s %ld %ld %ld\n", s->ns_name, s->ns_wr, s->ns_rd, s->ns_reqs); 242 | } 243 | 244 | #ifdef DEBUG 245 | rb_destroy(&name_stats_root, offsetof(struct name_stats, ns_node), &free); 246 | #endif 247 | 248 | return 0; 249 | } 250 | -------------------------------------------------------------------------------- /string1.h: -------------------------------------------------------------------------------- 1 | #ifndef _STRING1_H_ 2 | #define _STRING1_H_ 3 | #include 4 | #include 5 | #include 6 | 7 | static inline char *strsep_ne(char **ref, const char *delim) 8 | { 9 | char *str; 10 | do 11 | str = strsep(ref, delim); 12 | while (str != NULL && *str == 0); 13 | return str; 14 | } 15 | 16 | static inline char *wsep(char **ref) 17 | { 18 | return strsep_ne(ref, " \t\n\v\f\r"); 19 | } 20 | 21 | static inline char *strf(const char *fmt, ...) 22 | { 23 | char *str = NULL; 24 | va_list args; 25 | 26 | va_start(args, fmt); 27 | if (vasprintf(&str, fmt, args) < 0) 28 | str = NULL; 29 | va_end(args); 30 | return str; 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /tacc_lltop: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # tacc_lltop: TACC lltop wrapper, passes all options to lltop, queries 4 | # SGE for job owner and workdir, appends to lltop output: 5 | # 6 | # JOBID WR_MB RD_MB REQS OWNER WORKDIR 7 | # 12345 2835 0 6105 alice /work/99999/alice/jumble 8 | # login3 2065 2482 11173 9 | # 1687244 1335 0 2980 bob /work/99995/bob/sudoku-solvr 10 | # i101-101 23 0 17 11 | # ... 12 | 13 | if [ $# -eq 0 ]; then 14 | echo "$(basename $0): must specify a filesystem" 15 | echo "Try $(basename $0) scratch|share|work, or pass --help for more information." 16 | exit 1 17 | fi 18 | 19 | if [ $UID -ne 0 ]; then 20 | echo "$(basename $0): must be run as root" 21 | exit 1 22 | fi 23 | 24 | lltop "$@" | \ 25 | while read name wr_mb rd_mb reqs; do 26 | case $name in 27 | JOBID) 28 | # Header: add owner and workdir labels. 29 | owner="OWNER" 30 | workdir="WORKDIR" 31 | ;; 32 | [0-9]*) 33 | # Job: get job owner and workdir from qstat. 34 | qstat=$(qstat -j "$name" 2> /dev/null) 35 | owner=$(awk '$1 == "owner:" { print $2 }' <<< "$qstat") 36 | workdir=$(awk '$1 == "sge_o_workdir:" { print $2 }' <<< "$qstat") 37 | ;; 38 | *) 39 | # Non-job. 40 | owner="" 41 | workdir="" 42 | ;; 43 | esac 44 | printf "%-16s %8s %8s %8s %10s %s\n" $name $wr_mb $rd_mb $reqs $owner $workdir 45 | done 46 | --------------------------------------------------------------------------------