├── LICENSE
├── README.md
├── additional_scripts
    └── install_singularity.sh
├── configuration_settings.txt
├── dependencies
    ├── etc
    │   ├── cron.monthly
    │   │   └── logstash_geoip_update
    │   ├── filebeat
    │   │   └── filebeat.yml
    │   ├── genders
    │   ├── init.d
    │   │   └── nvidia
    │   ├── logrotate.d
    │   │   └── slurm
    │   ├── logstash
    │   │   └── conf.d
    │   │   │   ├── 10-beats-input.conf
    │   │   │   ├── 20-syslog-filters.conf
    │   │   │   ├── 90-elasticsearch-output.conf
    │   │   │   └── 91-additional-output.conf
    │   ├── microway
    │   │   └── mcms_database.conf
    │   ├── nhc
    │   │   ├── compute-node-checks.conf
    │   │   ├── compute-node-checks_blocking-io.conf
    │   │   └── compute-node-checks_intense.conf
    │   ├── nvidia-healthmon.conf
    │   ├── slurm
    │   │   ├── cgroup.conf
    │   │   ├── cgroup_allowed_devices_file.conf
    │   │   ├── gres.conf
    │   │   ├── plugstack.conf
    │   │   ├── plugstack.conf.d
    │   │   │   └── x11.conf
    │   │   ├── scripts
    │   │   │   ├── slurm.epilog
    │   │   │   ├── slurm.healthcheck
    │   │   │   ├── slurm.healthcheck_long
    │   │   │   ├── slurm.jobstart_messages.sh
    │   │   │   ├── slurmctld.power_nodes_off
    │   │   │   ├── slurmctld.power_nodes_on
    │   │   │   ├── slurmctld.power_nodes_on_as_root
    │   │   │   ├── slurmctld.prolog
    │   │   │   └── slurmd.gres_init
    │   │   ├── slurm.conf
    │   │   └── slurmdbd.conf
    │   └── sysconfig
    │   │   ├── nhc
    │   │   └── nvidia
    ├── opt
    │   └── ohpc
    │   │   └── pub
    │   │       └── modulefiles
    │   │           └── cuda.lua
    ├── usr
    │   └── lib
    │   │   └── systemd
    │   │       └── system
    │   │           └── nvidia-gpu.service
    └── var
    │   └── spool
    │       └── slurmd
    │           └── validate-ssh-command
├── install_head_node.sh
├── install_login_server.sh
└── install_monitoring_server.sh


/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 
676 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MCMS for OpenHPC Recipe
 2 | 
 3 | [![Join the chat at https://gitter.im/Microway/MCMS-OpenHPC-Recipe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Microway/MCMS-OpenHPC-Recipe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 4 | 
 5 | ## This is an experimental work in progress - it is not ready for production
 6 | 
 7 | MCMS is Microway's Cluster Management Software. This is not the production-ready
 8 | version of MCMS. This is an ongoing project to bring Microway's expertise and
 9 | software tools to the recently-announced OpenHPC collaborative framework.
10 | 
11 | ### Purpose
12 | This recipe contains many of the same elements as the official OpenHPC recipe,
13 | but offers a variety of customizations and enhancements, including:
14 | 
15 |   * Automated power-down of idle compute nodes
16 |   * Support for Mellanox InfiniBand
17 |   * Support for NVIDIA GPU accelerators
18 |   * Monitoring of many additional metrics **(WIP)**
19 |   * More sophisticated log collection and analysis **(WIP)**
20 |   * Nagios-compatible monitoring with a more modern interface **(WIP)**
21 | 
22 | ### Installation
23 | *Given a vanilla CentOS 7.x installation, this collection of scripts will stand
24 | up an OpenHPC cluster. This script will be tested with fresh installations -
25 | attempting to run it on an installation that's had a lot of changes may break.*
26 | 
27 | ```
28 | # Use your favorite text editor to customize the install
29 | vim configuration_settings.txt
30 | 
31 | # Run the installation on the new Head Node
32 | ./install_head_node.sh
33 | ```
34 | 
35 | ### More Information
36 | If you would like to purchase professional support/services for an OpenHPC
37 | cluster, or to fund development of a new feature, please visit:
38 | https://www.microway.com/contact/
39 | 
40 | To learn more about OpenHPC or to view the official installation recipe, visit:
41 | http://www.openhpc.community/
42 | 
43 | 


--------------------------------------------------------------------------------
/additional_scripts/install_singularity.sh:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | ################################################################################
26 | ##
27 | ## Instructions for setting up Singularity on a CentOS system
28 | ##
29 | ################################################################################
30 | 
31 | # Grab the machine architecture (most commonly x86_64)
32 | machine_arch=$(uname -m)
33 | 
34 | # Set the default node VNFS chroot if one is not already set
35 | node_chroot="${node_chroot:-/opt/ohpc/admin/images/centos-7/}"
36 | 
37 | git clone -b master --depth empty https://github.com/gmkurtzer/singularity.git
38 | 
39 | cd singularity/
40 | sh ./autogen.sh
41 | make dist
42 | rpmbuild -ta singularity-[0-9]*.tar.gz
43 | cd ../
44 | rm -Rf singularity
45 | 
46 | yum -y install ~/rpmbuild/RPMS/${machine_arch}/singularity-[0-9]*.rpm
47 | yum -y --installroot=${node_chroot} install ~/rpmbuild/RPMS/${machine_arch}/singularity-[0-9]*.rpm
48 | 
49 | 


--------------------------------------------------------------------------------
/configuration_settings.txt:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  3 | ################################################################################
  4 | #
  5 | # Copyright (c) 2015-2016 by Microway, Inc.
  6 | #
  7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  8 | #
  9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 10 | #    it under the terms of the GNU General Public License as published by
 11 | #    the Free Software Foundation, either version 3 of the License, or
 12 | #    (at your option) any later version.
 13 | #
 14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | #    GNU General Public License for more details.
 18 | #
 19 | #    You should have received a copy of the GNU General Public License
 20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 21 | #
 22 | ################################################################################
 23 | 
 24 | 
 25 | 
 26 | ################################################################################
 27 | ##
 28 | ## This file defines the configuration settings for an OpenHPC cluster install.
 29 | ## Configure the settings below before starting the cluster installation script.
 30 | ##
 31 | ## The System Management Server (SMS) is often called the Head or Master Node.
 32 | ##
 33 | ################################################################################
 34 | 
 35 | 
 36 | 
 37 | ################################################################################
 38 | # Mandatory settings - the default passwords are not acceptable!
 39 | ################################################################################
 40 | 
 41 | # Number of compute nodes to initialize
 42 | compute_node_count=4
 43 | 
 44 | # Root password for the databases (MariaDB/MongoDB)
 45 | db_root_password="ChangeMe"
 46 | 
 47 | # Management password for the databases (will be used by Warewulf/SLURM)
 48 | db_mgmt_password="ChangeMe"
 49 | 
 50 | # BMC username and password for use by IPMI
 51 | # Warewulf will add this user to the BMC on each compute node
 52 | bmc_username="wwipmi"
 53 | bmc_password="ChangeMe"
 54 | 
 55 | # A mail server to which the cluster may forward notices, alerts, etc.
 56 | # Most commonly, this will be the mail server used on your internal network.
 57 | mail_server="mailserver.example.com"
 58 | 
 59 | 
 60 | ################################################################################
 61 | # Optional settings
 62 | ################################################################################
 63 | 
 64 | # Install InfiniBand drivers and tools
 65 | enable_infiniband="true"
 66 | 
 67 | # Install NVIDIA GPU drivers and tools
 68 | enable_nvidia_gpu="true"
 69 | 
 70 | # Install Intel Xeon Phi coprocessor drivers and tools
 71 | # (disabled by default due to the additional steps necessary for Phi)
 72 | enable_phi_coprocessor="false"
 73 | 
 74 | # Restrict users from logging into any node via SSH (unless a job is running)
 75 | restrict_user_ssh_logins="true"
 76 | 
 77 | 
 78 | # This information is used to set up the hierarchy for SLURM accounting. It is
 79 | # easy to add more accounts after installation using the sacctmgr utility.
 80 | #
 81 | # TAKE NOTE: SLURM wants lower-case all-one-word organization and account names!
 82 | #
 83 | declare -A cluster_acct_hierarchy
 84 | cluster_acct_hierarchy['cluster_name']="microway_hpc"
 85 | cluster_acct_hierarchy['default_organization']="unnamed_organization"
 86 | cluster_acct_hierarchy['default_organization_description']="Unnamed Organization"
 87 | cluster_acct_hierarchy['default_account']="default_account"
 88 | cluster_acct_hierarchy['default_account_description']="Default User Account"
 89 | 
 90 | 
 91 | # MAC addresses for compute nodes
 92 | #
 93 | # If you don't know the MAC addresses, leave the defaults. However, you will
 94 | # need to update these later using the Warewulf wwsh tool. Compute Nodes will
 95 | # not boot correctly until each MAC address is registered within Warewulf.
 96 | declare -A c_mac
 97 | #
 98 | # For now, we'll generate bogus MAC addresses
 99 | for ((i=0; i<${compute_node_count}; i++)); do
100 |     # This algorithm supports up to 10^6 compute nodes
101 |     first_octet=$(( ($i+1) / 100000 ))
102 |     second_octet=$(( ($i+1) / 10000 ))
103 |     third_octet=$(( ($i+1) / 1000 ))
104 |     fourth_octet=$(( ($i+1) / 100 ))
105 |     fifth_octet=$(( ($i+1) / 10 ))
106 |     sixth_octet=$(( ($i+1) % 10 ))
107 |     c_mac[$i]=0${first_octet}:0${second_octet}:0${third_octet}:0${fourth_octet}:0${fifth_octet}:0${sixth_octet}
108 | done
109 | #
110 | # Set these values to have the nodes come up on boot:
111 | #
112 | # c_mac[0]=01:02:03:04:05:06
113 | # c_mac[1]=02:02:03:04:05:06
114 | # c_mac[2]=03:02:03:04:05:06
115 | # c_mac[3]=04:02:03:04:05:06
116 | #
117 | 
118 | 
119 | 
120 | # MCMS cluster hosts
121 | # ==================
122 | #
123 | # A cluster needs a head node:
124 | #
125 | #   head
126 | #
127 | # For redundancy, it needs two:
128 | #
129 | #   head-a
130 | #   head-b
131 | #
132 | #
133 | # There may also be storage and login/session nodes. For example:
134 | #
135 | #   metadata1
136 | #   metadata2
137 | #
138 | #   storage1
139 | #   storage2
140 | #    ...
141 | #   storage63
142 | #
143 | #   login-a
144 | #   login-b
145 | #
146 | #
147 | # Compute node names can vary as needed, but should not include any of the names
148 | # listed above. Keep it simple and end with a number (which allows admins to
149 | # specify node ranges such as node[1-20] node[2,4,6,8] node[1,10-20] ).
150 | #
151 | #   node1
152 | #   node2
153 | #    ...
154 | #   node32768
155 | #
156 | #
157 | #
158 | # Network address ranges
159 | # ======================
160 | #
161 | # In a modern cluster, each server/node contains several IP-enabled devices,
162 | # such as Ethernet, InfiniBand, IPMI, etc. Microway recommends that a class B
163 | # network be devoted to each type of traffic (for simplicity and scaling).
164 | #
165 | # By default, Microway recommends that one of the following subnets be
166 | # divided up and used for the cluster traffic. Choose whichever does not
167 | # conflict with your existing private networks:
168 | #
169 | #   10.0.0.0/8       default (supports IPs 10.0.0.1 through 10.255.255.254)
170 | #   172.16.0.0/12            (supports IPs 172.16.0.1 through 172.31.255.254)
171 | #
172 | #
173 | # The following subnets are recommended:
174 | # ======================================
175 | #
176 | #   10.0.0.1  - 10.0.255.254   (Ethernet)
177 | #   10.10.0.1 - 10.10.255.254  (InfiniBand)
178 | #   10.13.0.1 - 10.13.255.254  (IPMI)
179 | #
180 | # For clusters with IP-enabled accelerators (such as Xeon Phi), use numbering:
181 | #   10.100.0.1 +               (Accelerator #0)
182 | #   10.101.0.1 +               (Accelerator #1)
183 | #      ...
184 | #   10.10N.0.1 +               (Accelerator #N)
185 | #
186 | 
187 | # The network prefix and subnet netmask for the internal network
188 | internal_subnet_prefix=10.0
189 | internal_netmask=255.255.0.0
190 | 
191 | # Network Prefix and Subnet Netmask for internal IPoIB (if IB is enabled)
192 | ipoib_network_prefix=10.10
193 | ipoib_netmask=255.255.0.0
194 | 
195 | # The network prefix and subnet netmask for the BMC/IPMI network
196 | bmc_subnet_prefix=10.13
197 | bmc_netmask=255.255.0.0
198 | 
199 | # The network interface on the compute nodes which will download the node image
200 | eth_provision=eth0
201 | 
202 | # The first part of each node's name. Node numbers will be appended, so if the
203 | # prefix is set to 'stars' then the nodes will be: star1, star2, star3, etc.
204 | compute_node_name_prefix="node"
205 | 
206 | # A regular expression which will capture all compute node names. This value is
207 | # almost always safe (unless exotic and irregular names are selected).
208 | compute_regex="${compute_node_name_prefix}*"
209 | 
210 | # OpenHPC repo location
211 | ohpc_repo=https://github.com/openhpc/ohpc/releases/download/v1.1.GA/ohpc-release-centos7.2-1.1-1.x86_64.rpm
212 | 
213 | # Local ntp server for time synchronization - this is typically only necessary
214 | # if the cluster doesn't have access to NTP servers from the Internet.
215 | ntp_server=""
216 | 
217 | # Additional arguments to send to the Linux kernel on compute nodes
218 | kargs=""
219 | 
220 | # Set up a Lustre filesystem mount
221 | enable_lustre_client="false"
222 | 
223 | # Lustre MGS mount name (if Lustre is enabled)
224 | mgs_fs_name="${mgs_fs_name:-10.0.255.254@o2ib:/lustre}"
225 | 
226 | 
227 | 
228 | 
229 | ################################################################################
230 | # Settings which should be set up before the installation script is executed. In
231 | # other words, you must set the SMS/Head Node's hostname and IP addresses before
232 | # beginning. The values you have set are loaded in at the end of this file.
233 | #
234 | #   sms_name                       Hostname for SMS server
235 | #   sms_ip                         Internal IP address on SMS server
236 | #   sms_eth_internal               Internal Ethernet interface on SMS
237 | #   sms_ipoib                      IPoIB address for SMS server
238 | #
239 | ################################################################################
240 | 
241 | sms_name=$(hostname --short)
242 | sms_ip=$(ip route get ${internal_subnet_prefix}.0.1 | head -n1 | sed 's/.*src //' | tr -d '[[:space:]]')
243 | sms_eth_internal=$(ip route get ${internal_subnet_prefix}.0.1 | head -n1 | sed 's/.*dev \([^ ]*\) .*/\1/')
244 | sms_ipoib=${ipoib_network_prefix}.$(echo ${sms_ip} | sed -r 's/[0-9]+\.[0-9]+\.//')
245 | 
246 | # How would we get to Google? That defines the external interface.
247 | sms_eth_external=$(ip route get 8.8.8.8 | head -n1 | sed 's/.*dev \([^ ]*\) .*/\1/')
248 | 
249 | 
250 | 
251 | 
252 | ################################################################################
253 | # Settings which will be auto-calculated during execution:
254 | #
255 | #   c_ip[0], c_ip[1], ...          Desired compute node addresses
256 | #   c_bmc[0], c_bmc[1], ...        BMC addresses for compute nodes
257 | #   c_ipoib[0], c_ipoib[1], ...    IPoIB addresses for computes
258 | #
259 | ################################################################################
260 | 
261 | 
262 | # Compute node IP addresses will start at x.x.0.1 and increase from there (with
263 | # a maximum limit of x.x.255.254, at which point the addresses will wrap).
264 | declare -A c_ip
265 | for ((i=0; i<${compute_node_count}; i++)); do
266 |     third_octet=$(( ($i+1) / 255 ))
267 |     fourth_octet=$(( ($i+1) % 255 ))
268 |     c_ip[$i]=${internal_subnet_prefix}.${third_octet}.${fourth_octet}
269 | done
270 | 
271 | 
272 | # Node BMC IP addresses will start at x.x.0.1 and increase from there (with
273 | # a maximum limit of x.x.255.254, at which point the addresses will wrap).
274 | declare -A c_bmc
275 | for ((i=0; i<${compute_node_count}; i++)); do
276 |     third_octet=$(( ($i+1) / 255 ))
277 |     fourth_octet=$(( ($i+1) % 255 ))
278 |     c_bmc[$i]=${bmc_subnet_prefix}.${third_octet}.${fourth_octet}
279 | done
280 | 
281 | 
282 | # Node IPoIB addresses will start at x.x.0.1 and increase from there (with
283 | # a maximum limit of x.x.255.254, at which point the addresses will wrap).
284 | declare -A c_ipoib
285 | for ((i=0; i<${compute_node_count}; i++)); do
286 |     third_octet=$(( ($i+1) / 255 ))
287 |     fourth_octet=$(( ($i+1) % 255 ))
288 |     c_ipoib[$i]=${ipoib_network_prefix}.${third_octet}.${fourth_octet}
289 | done
290 | 
291 | 


--------------------------------------------------------------------------------
/dependencies/etc/cron.monthly/logstash_geoip_update:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 4 | ################################################################################
 5 | #
 6 | # Copyright (c) 2015-2016 by Microway, Inc.
 7 | #
 8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 9 | #
10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
11 | #    it under the terms of the GNU General Public License as published by
12 | #    the Free Software Foundation, either version 3 of the License, or
13 | #    (at your option) any later version.
14 | #
15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 | #    GNU General Public License for more details.
19 | #
20 | #    You should have received a copy of the GNU General Public License
21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
22 | #
23 | ################################################################################
24 | 
25 | 
26 | 
27 | ################################################################################
28 | #
29 | # Update the GeoLiteCity database which is used for GeoIP lookups in Logstash
30 | #
31 | ################################################################################
32 | 
33 | db_url="http://geolite.maxmind.com/download/geoip/database/GeoLiteCity.dat.gz"
34 | 
35 | tmp_file="/etc/logstash/GeoLiteCity.dat.tmp"
36 | db_file="/etc/logstash/GeoLiteCity.dat"
37 | 
38 | 
39 | # Download the new database file with some fairly lenient settings (to survive
40 | # interruptions and to minimize network load).
41 | wget_options="--quiet --tries=20 --waitretry=100 --retry-connrefused --limit-rate=50k"
42 | 
43 | 
44 | wget_path=$(which wget)
45 | if [[ "$?" -gt "0" ]]; then
46 |     echo "Unable to locate wget utility"
47 |     exit 1
48 | fi
49 | 
50 | 
51 | # Sleep a random amount (up to 8 hours) to prevent multiple
52 | # clusters from DDOSing the GeoIP database site.
53 | sleep $(( $RANDOM % (60 * 60 * 8) ))
54 | 
55 | 
56 | $wget_path $wget_options --output-document=- $db_url | gunzip > $tmp_file
57 | 
58 | 
59 | # Assuming the update completed successfully, move the new file into place
60 | RETVAL=$?
61 | if [[ "$RETVAL" -eq "0" ]]; then
62 |     mv --force $tmp_file $db_file
63 | else
64 |     echo "Unable to update GeoLiteCity IP geolocation database for Logstash"
65 |     exit $RETVAL
66 | fi
67 | 


--------------------------------------------------------------------------------
/dependencies/etc/filebeat/filebeat.yml:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | 
26 | ################################################################################
27 | ##
28 | ## The following log files will be monitored and their contents forwarded to
29 | ## the specified logstash server(s). Log parsing takes place on those host(s).
30 | ##
31 | ################################################################################
32 | 
33 | 
34 | filebeat:
35 |   prospectors:
36 |     -
37 |       paths:
38 |         - /var/log/cron
39 |         - /var/log/maillog
40 |         - /var/log/messages
41 |         - /var/log/secure
42 |       input_type: log
43 |       document_type: syslog
44 |     -
45 |       paths:
46 |         - /var/log/slurm/*.log
47 |       input_type: log
48 |       document_type: slurm
49 | 
50 |   registry_file: /var/lib/filebeat/registry
51 | 
52 | output:
53 |   logstash:
54 |     hosts: ["{sms_ip}:5044"]
55 | 
56 |     tls:
57 |       certificate_authorities: ["/etc/pki/tls/certs/logstash-forwarder.crt"]
58 | 


--------------------------------------------------------------------------------
/dependencies/etc/genders:
--------------------------------------------------------------------------------
 1 | # /etc/genders
 2 | #
 3 | # Defines cluster components (Head Node, Compute Nodes, etc.)
 4 | #
 5 | # Used by genders library for PDSH, PDCP and other utilities
 6 | #
 7 | #
 8 | # Recommended types to define in every cluster:
 9 | #   head (sms)    (the Head/Master/System Management Server)
10 | #   login         (the Login Nodes of the cluster)
11 | #   compute       (the Compute Nodes of the cluster)
12 | #
13 | ##############################################################################
14 | 


--------------------------------------------------------------------------------
/dependencies/etc/init.d/nvidia:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # nvidia    Set up NVIDIA GPU Compute Accelerators
  4 | #
  5 | # chkconfig: 2345 55 25
  6 | # description:    NVIDIA GPUs provide additional compute capability. \
  7 | #    This service sets the GPUs into the desired state.
  8 | #
  9 | # config: /etc/sysconfig/nvidia
 10 | 
 11 | ### BEGIN INIT INFO
 12 | # Provides: nvidia
 13 | # Required-Start: $local_fs $network $syslog
 14 | # Required-Stop: $local_fs $syslog
 15 | # Should-Start: $syslog
 16 | # Should-Stop: $network $syslog
 17 | # Default-Start: 2 3 4 5
 18 | # Default-Stop: 0 1 6
 19 | # Short-Description: Set GPUs into the desired state
 20 | # Description:    NVIDIA GPUs provide additional compute capability.
 21 | #    This service sets the GPUs into the desired state.
 22 | ### END INIT INFO
 23 | 
 24 | 
 25 | ################################################################################
 26 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 27 | ################################################################################
 28 | #
 29 | # Copyright (c) 2015-2016 by Microway, Inc.
 30 | #
 31 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 32 | #
 33 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 34 | #    it under the terms of the GNU General Public License as published by
 35 | #    the Free Software Foundation, either version 3 of the License, or
 36 | #    (at your option) any later version.
 37 | #
 38 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 39 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 40 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 41 | #    GNU General Public License for more details.
 42 | #
 43 | #    You should have received a copy of the GNU General Public License
 44 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 45 | #
 46 | ################################################################################
 47 | 
 48 | 
 49 | # source function library
 50 | . /etc/rc.d/init.d/functions
 51 | 
 52 | # Some definitions to make the below more readable
 53 | NVSMI=/usr/bin/nvidia-smi
 54 | NVCONFIG=/etc/sysconfig/nvidia
 55 | prog="nvidia"
 56 | 
 57 | # default settings
 58 | NVIDIA_ACCOUNTING=1
 59 | NVIDIA_PERSISTENCE_MODE=1
 60 | NVIDIA_COMPUTE_MODE=0
 61 | NVIDIA_CLOCK_SPEEDS=max
 62 | # pull in sysconfig settings
 63 | [ -f $NVCONFIG ] && . $NVCONFIG
 64 | 
 65 | RETVAL=0
 66 | 
 67 | 
 68 | # Determine the maximum graphics and memory clock speeds for each GPU.
 69 | # Create an array of clock speed pairs (memory,graphics) to be passed to nvidia-smi
 70 | declare -a MAX_CLOCK_SPEEDS
 71 | get_max_clocks()
 72 | {
 73 |     GPU_QUERY="$NVSMI --query-gpu=clocks.max.memory,clocks.max.graphics --format=csv,noheader,nounits"
 74 | 
 75 |     MAX_CLOCK_SPEEDS=( $($GPU_QUERY | awk '{print $1 $2}') )
 76 | }
 77 | 
 78 | 
 79 | start()
 80 | {
 81 |     /sbin/lspci | grep -qi nvidia
 82 |     if [ $? -ne 0 ] ; then
 83 |         echo -n $"No NVIDIA GPUs present. Skipping NVIDIA GPU tuning."
 84 |         warning
 85 |         echo
 86 |         exit 0
 87 |     fi
 88 | 
 89 |     echo -n $"Starting $prog: "
 90 | 
 91 |     # If the nvidia-smi utility is missing, this script can't do its job
 92 |     [ -x $NVSMI ] || exit 5
 93 | 
 94 |     # A configuration file is not required
 95 |     if [ ! -f $NVCONFIG ] ; then
 96 |         echo -n $"No GPU config file present ($NVCONFIG) - using defaults"
 97 |         echo
 98 |     fi
 99 | 
100 |     # Set persistence mode first to speed things up
101 |     echo -n "persistence"
102 |     $NVSMI --persistence-mode=$NVIDIA_PERSISTENCE_MODE 1> /dev/null
103 |     RETVAL=$?
104 | 
105 |     if [ ! $RETVAL -gt 0 ]; then
106 |         echo -n " accounting"
107 |         $NVSMI --accounting-mode=$NVIDIA_ACCOUNTING 1> /dev/null
108 |         RETVAL=$?
109 |     fi
110 | 
111 |     if [ ! $RETVAL -gt 0 ]; then
112 |         echo -n " compute"
113 |         $NVSMI --compute-mode=$NVIDIA_COMPUTE_MODE 1> /dev/null
114 |         RETVAL=$?
115 |     fi
116 | 
117 | 
118 |     if [ ! $RETVAL -gt 0 ]; then
119 |         echo -n " clocks"
120 |         if [ -n "$NVIDIA_CLOCK_SPEEDS" ]; then
121 |             # If the requested clock speed value is "max",
122 |             # work through each GPU and set to max speed.
123 |             if [ "$NVIDIA_CLOCK_SPEEDS" == "max" ]; then
124 |                 get_max_clocks
125 | 
126 |                 GPU_COUNTER=0
127 |                 GPUS_SKIPPED=0
128 |                 while [ "$GPU_COUNTER" -lt ${#MAX_CLOCK_SPEEDS[*]} ] && [ ! $RETVAL -gt 0 ]; do
129 |                     if [[ ${MAX_CLOCK_SPEEDS[$GPU_COUNTER]} =~ Supported ]] ; then
130 |                         if [ $GPUS_SKIPPED -eq 0 ] ; then
131 |                             echo
132 |                             GPUS_SKIPPED=1
133 |                         fi
134 |                         echo "Skipping non-boostable GPU"
135 |                     else
136 |                         $NVSMI -i $GPU_COUNTER --applications-clocks=${MAX_CLOCK_SPEEDS[$GPU_COUNTER]} 1> /dev/null
137 |                     fi
138 |                     RETVAL=$?
139 | 
140 |                     GPU_COUNTER=$(( $GPU_COUNTER + 1 ))
141 |                 done
142 |             else
143 |                 # This sets all GPUs to the same clock speeds (which only works
144 |                 # if the GPUs in this system are all the same).
145 |                 $NVSMI --applications-clocks=$NVIDIA_CLOCK_SPEEDS 1> /dev/null
146 |             fi
147 |         else
148 |             $NVSMI --reset-applications-clocks 1> /dev/null
149 |         fi
150 |         RETVAL=$?
151 |     fi
152 | 
153 |     if [ ! $RETVAL -gt 0 ]; then
154 |         if [ -n "$NVIDIA_POWER_LIMIT" ]; then
155 |             echo -n " power-limit"
156 |             $NVSMI --power-limit=$NVIDIA_POWER_LIMIT 1> /dev/null
157 |             RETVAL=$?
158 |         fi
159 |     fi
160 | 
161 |     if [ ! $RETVAL -gt 0 ]; then
162 |         success
163 |     else
164 |         failure
165 |     fi
166 |     echo
167 |     return $RETVAL
168 | }
169 | 
170 | stop()
171 | {
172 |     /sbin/lspci | grep -qi nvidia
173 |     if [ $? -ne 0 ] ; then
174 |         echo -n $"No NVIDIA GPUs present. Skipping NVIDIA GPU tuning."
175 |         warning
176 |         echo
177 |         exit 0
178 |     fi
179 | 
180 |     echo -n $"Stopping $prog: "
181 |     [ -x $NVSMI ] || exit 5
182 | 
183 |     $NVSMI --persistence-mode=0 1> /dev/null && success || failure
184 |     RETVAL=$?
185 |     echo
186 |     return $RETVAL
187 | }
188 | 
189 | restart() {
190 |     stop
191 |     start
192 | }
193 | 
194 | force_reload() {
195 |     restart
196 | }
197 | 
198 | status() {
199 |     $NVSMI
200 | }
201 | 
202 | case "$1" in
203 |     start)
204 |         start
205 |         ;;
206 |     stop)
207 |         stop
208 |         ;;
209 |     restart)
210 |         restart
211 |         ;;
212 |     force-reload)
213 |         force_reload
214 |         ;;
215 |     status)
216 |         status
217 |         RETVAL=$?
218 |         ;;
219 |     *)
220 |         echo $"Usage: $0 {start|stop|restart|force-reload|status}"
221 |         RETVAL=2
222 | esac
223 | exit $RETVAL
224 | 


--------------------------------------------------------------------------------
/dependencies/etc/logrotate.d/slurm:
--------------------------------------------------------------------------------
 1 | /var/log/slurm/*.log {
 2 |         weekly
 3 |         compress
 4 |         missingok
 5 |         nocopytruncate
 6 |         nodelaycompress
 7 |         nomail
 8 |         notifempty
 9 |         noolddir
10 |         rotate 7
11 |         sharedscripts
12 |         size 10M
13 |         postrotate
14 |             /etc/init.d/slurm reconfig
15 |         endscript
16 | }
17 | 


--------------------------------------------------------------------------------
/dependencies/etc/logstash/conf.d/10-beats-input.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | 
26 | ################################################################################
27 | ##
28 | ## This host will listen for FileBeats traffic
29 | ##
30 | ################################################################################
31 | 
32 | 
33 | input {
34 |   beats {
35 |     port => 5044
36 |     ssl => true
37 |     ssl_certificate => "/etc/pki/tls/certs/logstash-forwarder.crt"
38 |     ssl_key => "/etc/pki/tls/private/logstash-forwarder.key"
39 |   }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/dependencies/etc/logstash/conf.d/20-syslog-filters.conf:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  3 | ################################################################################
  4 | #
  5 | # Copyright (c) 2015-2016 by Microway, Inc.
  6 | #
  7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  8 | #
  9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 10 | #    it under the terms of the GNU General Public License as published by
 11 | #    the Free Software Foundation, either version 3 of the License, or
 12 | #    (at your option) any later version.
 13 | #
 14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | #    GNU General Public License for more details.
 18 | #
 19 | #    You should have received a copy of the GNU General Public License
 20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 21 | #
 22 | ################################################################################
 23 | 
 24 | 
 25 | # MCMS tag documentation
 26 | # ========================
 27 | #
 28 | # The following event tags are interpreted and monitored by MCMS:
 29 | #
 30 | #  * user_account_event     creation or deletion of a user
 31 | #  * group_account_event    creation or deletion of a group
 32 | #
 33 | #  * remote_login_event     a user logged in from a remote location
 34 | #  * local_login_event      a user logged in from a local console
 35 | #  * remote_logout_event    a user logged out from a remote location
 36 | #  * local_logout_event     a user logged out from a local console
 37 | #
 38 | #  * auth_failure_event     security issue: a user failed to authenticate
 39 | #
 40 | #  * config_error_event     a configuration issue is causing errors
 41 | #
 42 | #  * hardware_event         an event occurred in the hardware
 43 | #
 44 | #
 45 | #
 46 | # When something must be reported, assign a value to the field "report".
 47 | # The following values are interpreted and escalated by MCMS:
 48 | #
 49 | #  * emergency              The system is no longer functioning or is unusable.
 50 | #                           Most likely, all staff on call should be alerted.
 51 | #                           Example: Users are unable to read /home
 52 | #
 53 | #  * alert                  A major issue has occurred which requires immediate
 54 | #                           attention. A staff member should be alerted.
 55 | #                           Example: the primary network uplink is down
 56 | #
 57 | #  * critical               An issue requires immediate attention, but is
 58 | #                           related to a secondary system.
 59 | #                           Example: the secondary network uplink is down
 60 | #
 61 | #  * error                  A non-urgent error has occurred. An administrator
 62 | #                           will need to take action, but can do so during the
 63 | #                           next business day.
 64 | #                           Example: one compute node has gone down
 65 | #
 66 | #  * warning                Warn of a condition which may result in an error if
 67 | #                           action is not taken soon - not urgent.
 68 | #                           Example: a filesystem is nearing 90% full
 69 | #
 70 | #  * notice                 An unusual event has occurred, but it was not an
 71 | #                           error. A message should be sent to administrators
 72 | #                           for follow-up.
 73 | #
 74 | #  * informational          System is operating normally - no action required.
 75 | #                           Often harvested for reporting or measuring purposes.
 76 | #
 77 | #  * debug                  Not used during normal operations. This message
 78 | #                           could be useful to developers when debugging.
 79 | #
 80 | #
 81 | # These severity levels are the same as those used by syslog:
 82 | #   http://en.wikipedia.org/wiki/Syslog#Severity_levels
 83 | 
 84 | 
 85 | filter {
 86 |   if [type] == "syslog" or [type] == "cron" or [type] == "mail" or [type] == "secure" {
 87 |     mutate { add_field => ["format", "syslog"] }
 88 | 
 89 |     # Feb 23 04:48:16 head slurmctld[3166]: completing job 36
 90 |     grok {
 91 |       overwrite => "message"
 92 |       # We're not saving the hostname - it's grabbed by logstash-forwarder
 93 |       match => [
 94 |         "message", "^(?:<%{POSINT:syslog_pri}>)?%{SYSLOGTIMESTAMP:timestamp} %{SYSLOGHOST} (?:%{SYSLOGPROG}: )?%{GREEDYDATA:message}"
 95 |       ]
 96 |     }
 97 |     syslog_pri { }
 98 |     date {
 99 |       match => [ "timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss", "ISO8601" ]
100 |     }
101 |   }
102 | 
103 | 
104 |   if [type] == "audit" {
105 |     # type=USER_END msg=audit(1392782461.768:2961): user pid=75557 uid=0 auid=0 ses=447 msg='op=PAM:session_close acct="root" exe="/usr/sbin/crond" hostname=? addr=? terminal=cron res=success'
106 |     grok {
107 |       overwrite => "message"
108 |       match => [ "message", "type=%{WORD:audit_type} msg=audit\(%{BASE10NUM:timestamp}:%{POSINT:audit_id}\): %{GREEDYDATA:message}" ]
109 |       add_tag => [ "grokked" ]
110 |     }
111 |     date {
112 |       match => [ "timestamp", "UNIX_MS" ]
113 |     }
114 |   }
115 | 
116 | 
117 |   else if [type] == "apache-access" {
118 |     # 10.0.0.3 - - [16/Feb/2014:05:22:51 -0500] "GET /WW/vnfs?hwaddr=00:25:90:6b:bb:5c HTTP/1.1" 500 620 "-" "Wget"
119 |     grok {
120 |       overwrite => "message"
121 |       match => [ "message", "%{COMBINEDAPACHELOG}" ]
122 |       add_tag => [ "grokked" ]
123 |     }
124 |   }
125 | 
126 | 
127 |   else if [type] == "apache-error" {
128 |     # [Sun Feb 16 09:32:44 2014] [notice] Apache/2.2.15 (Unix) DAV/2 PHP/5.3.3 mod_perl/2.0.4 Perl/v5.10.1 configured -- resuming normal operations
129 |     #
130 |     # [Sun Feb 16 03:22:27 2014] [error] [client 127.0.0.1] PHP Warning:  date(): It is not safe to rely on the system's timezone settings.
131 |     grok {
132 |       overwrite => "message"
133 |       match => [
134 |         "message", "\[%{GREEDYDATA:timestamp}\] \[%{WORD:loglevel}\](?: \[%{WORD:originator} %{IP:remote_ip}\])? %{GREEDYDATA:message}"
135 |       ]
136 |       add_tag => [ "grokked" ]
137 |     }
138 |     date {
139 |       match => [ "timestamp", "EEE MMM dd HH:mm:ss YYYY" ]
140 |     }
141 |   }
142 | 
143 | 
144 |   else if [type] == "cron" {
145 |     # Uses syslog format, so timestamp/host/program/pid were parsed above
146 | 
147 |     # (root) CMD (run-parts /etc/cron.hourly)
148 |     # run-parts(/etc/cron.hourly)[12910]: starting 0anacron
149 |     grok {
150 |       overwrite => "message"
151 |       match => [
152 |         "message", "\(%{USERNAME:user}\) %{CRON_ACTION:action} \(%{DATA:message}\)",
153 |         "message", "%{PROG:program}\(%{UNIXPATH:cron_stage}\)(?:\[%{POSINT:pid}\])?: %{GREEDYDATA:message}"
154 |       ]
155 |       add_tag => [ "grokked" ]
156 |     }
157 |   }
158 | 
159 | 
160 |   # else if [type] == "mail" {
161 |       # Uses syslog format, so timestamp/host/program/pid were parsed above
162 |   # }
163 | 
164 | 
165 |   # else if [type] == "mcelog" {
166 |   #
167 |   # }
168 | 
169 | 
170 |   # else if [type] == "mysqld" {
171 |   #
172 |   # }
173 | 
174 | 
175 |   # else if [type] == "opensm" {
176 |   #
177 |   # }
178 | 
179 | 
180 |   else if [type] == "secure" {
181 |     # Uses syslog format, so timestamp/host/program/pid were parsed above
182 | 
183 |     if [program] == "groupadd" {
184 |       # new group: name=elasticsearch, GID=494
185 |       grok {
186 |         overwrite => "message"
187 |         match => [ "message", "new group: name=%{WORD:group_name}, GID=%{NONNEGINT:gid}" ]
188 |         add_tag => [ "group_account_event", "grokked" ]
189 |       }
190 |       # group added to /etc/group: name=elasticsearch, GID=494
191 |       grok {
192 |         overwrite => "message"
193 |         match => [ "message", "group added to %{UNIXPATH:group_file} name=%{WORD:group_name}(?:, GID=%{NONNEGINT:gid})?" ]
194 |         add_tag => [ "grokked" ]
195 |       }
196 |     }
197 |     else if [program] == "groupdel" {
198 |       # group 'testgroup' removed
199 |       grok {
200 |         overwrite => "message"
201 |         match => [ "message", "group '%{WORD:group_name}' removed" ]
202 |         add_tag => [ "group_account_event", "grokked" ]
203 |       }
204 |       # group 'testgroup' removed from /etc/group
205 |       grok {
206 |         overwrite => "message"
207 |         match => [ "message", "group '%{WORD:group_name}' removed from %{UNIXPATH:group_file}" ]
208 |         add_tag => [ "grokked" ]
209 |       }
210 |     }
211 |     else if [program] == "useradd" {
212 |       # new user: name=logstash, UID=494, GID=493, home=/opt/logstash, shell=/sbin/nologin
213 |       grok {
214 |         overwrite => "message"
215 |         # Careful below: *nix PATHs can include commas
216 |         match => [ "message", "new user: name=%{WORD:username}, UID=%{NONNEGINT:uid}, GID=%{NONNEGINT:gid}, home=%{GREEDYDATA:home_dir}, shell=%{UNIXPATH:shell}" ]
217 |         add_tag => [ "user_account_event", "grokked" ]
218 |       }
219 |       # add 'eliot' to group 'benchmark'
220 |       # add 'eliot' to shadow group 'benchmark'
221 |       grok {
222 |         overwrite => "message"
223 |         match => [ "message", "add '%{WORD:username}' to(?: %{WORD:group_file})? group '%{WORD:group_name}'" ]
224 |         add_tag => [ "grokked" ]
225 |       }
226 |     }
227 |     else if [program] == "userdel" {
228 |       # delete user 'test'
229 |       grok {
230 |         overwrite => "message"
231 |         match => [ "message", "delete user '%{WORD:username}'" ]
232 |         add_tag => [ "user_account_event", "grokked" ]
233 |       }
234 |       # TODO: removed group 'slurm' owned by 'slurm'
235 |     }
236 | 
237 |     # else if [program] == "slurm" {
238 |     #
239 |     # }
240 | 
241 |     # else if [program] == "slurmctld" {
242 |     #
243 |     # }
244 | 
245 |     else if [program] == "sshd" {
246 |       if "grokked" not in [tags] {
247 |         # Accepted password for tiwa from 72.83.55.11 port 58019 ssh2
248 |         grok {
249 |           overwrite => "message"
250 |           match => [ "message", "Accepted %{WORD:auth_method} for %{USERNAME:user} from %{IPORHOST:remote_ip} port %{POSINT:remote_port} %{WORD:remote_utility}" ]
251 |           add_tag => [ "remote_login_event", "grokked" ]
252 |         }
253 |       }
254 | 
255 |       if "grokked" not in [tags] {
256 |         # pam_unix(sshd:session): session opened for user coma by (uid=0)
257 |         grok {
258 |           overwrite => "message"
259 |           match => [ "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): session opened for user %{USERNAME:username} by \(uid=%{NONNEGINT:uid}\)" ]
260 |           add_tag => [ "remote_login_event", "grokked" ]
261 |         }
262 |       }
263 | 
264 |       if "grokked" not in [tags] {
265 |         # pam_lastlog(sshd:session): unable to open /var/log/lastlog: No such file or directory
266 |         # lastlog_openseek: Couldn't stat /var/log/lastlog: No such file or directory
267 |         grok {
268 |           overwrite => "message"
269 |           match => [ "message", "%{NOTSPACE:calling_function}: (unable to open|Couldn't stat) %{GREEDYDATA:missing_file}: No such file or directory" ]
270 |           add_tag => [ "config_error_event", "grokked" ]
271 |         }
272 |       }
273 | 
274 |       if "grokked" not in [tags] {
275 |         # subsystem request for sftp
276 |         grok {
277 |           match => [ "message", "subsystem request for %{WORD:subsystem_type}" ]
278 |           add_tag => [ "remote_login_event", "grokked" ]
279 |         }
280 |       }
281 | 
282 |       if "grokked" not in [tags] {
283 |         # Failed password for eliot from 76.19.216.82 port 40415 ssh2
284 |         # Failed password for invalid user cordm from 50.159.20.141 port 51799 ssh2
285 |         grok {
286 |           overwrite => "message"
287 |           match => [ "message", "Failed %{WORD:auth_method} for (?:%{WORD:user_status} user )?%{USERNAME:username} from %{IPORHOST:remote_ip} port %{POSINT:remote_port} %{WORD:remote_utility}" ]
288 |           add_tag => [ "auth_failure_event", "invalid_password", "grokked" ]
289 |         }
290 |       }
291 | 
292 |       if "grokked" not in [tags] {
293 |         # pam_unix(sshd:auth): check pass; user unknown
294 |         grok {
295 |           match => [ "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): check pass; user unknown" ]
296 |           add_tag => [ "auth_failure_event", "invalid_username", "grokked" ]
297 |         }
298 |       }
299 | 
300 |       if "grokked" not in [tags] {
301 |         # input_userauth_request: invalid user cordm
302 |         grok {
303 |           match => [ "message", "%{WORD:auth_request}: invalid user %{USERNAME:username}" ]
304 |           add_tag => [ "auth_failure_event", "invalid_username", "grokked" ]
305 |         }
306 |       }
307 | 
308 |       if "grokked" not in [tags] {
309 |         # Invalid user cordm from 50.159.20.141
310 |         grok {
311 |           match => [ "message", "Invalid user %{USERNAME:username} from %{IPORHOST:remote_ip}" ]
312 |           add_tag => [ "auth_failure_event", "invalid_username", "grokked" ]
313 |         }
314 |       }
315 | 
316 |       if "grokked" not in [tags] {
317 |         # pam_succeed_if(sshd:auth): error retrieving information about user cordm
318 |         grok {
319 |           match => [ "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): error retrieving information about user %{USERNAME:username}" ]
320 |           add_tag => [ "auth_failure_event", "invalid_username", "grokked" ]
321 |         }
322 |       }
323 | 
324 |       if "grokked" not in [tags] {
325 |         # pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=10.50.1.1  user=root
326 |         # pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=c-50-159-20-141.hsd1.wa.comcast.net
327 |         grok {
328 |           overwrite => "message"
329 |           match => [ "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): authentication failure; logname=(?:%{WORD:logname})? uid=%{NONNEGINT:uid} euid=%{NONNEGINT:euid} tty=%{WORD:tty} ruser=(?:%{USERNAME:remote_user})? rhost=%{IPORHOST:remote_ip}(?:%{SPACE}user=%{USERNAME:username})?" ]
330 |           add_tag => [ "auth_failure_event", "grokked" ]
331 |         }
332 |       }
333 | 
334 |       if "grokked" not in [tags] {
335 |         # PAM 1 more authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=c-50-159-20-141.hsd1.wa.comcast.net
336 |         # PAM 2 more authentication failures; logname= uid=0 euid=0 tty=ssh ruser= rhost=c-76-19-216-82.hsd1.ma.comcast.net  user=eliot
337 |         grok {
338 |           overwrite => "message"
339 |           match => [ "message", "PAM %{POSINT:failure_count} more authentication failure(?:s)?; logname=(?:%{WORD:logname})? uid=%{NONNEGINT:uid} euid=%{NONNEGINT:euid} tty=%{WORD:tty} ruser=(?:%{USERNAME:remote_user})? rhost=%{IPORHOST:remote_ip}(?:%{SPACE}user=%{USERNAME:username})?" ]
340 |           add_tag => [ "auth_failure_event", "grokked" ]
341 |         }
342 |       }
343 | 
344 |       if "grokked" not in [tags] {
345 |         # Connection closed by 10.0.0.4
346 |         grok {
347 |           match => [ "message", "Connection closed by %{IPORHOST:remote_ip}" ]
348 |           add_tag => [ "remote_logout_event", "grokked" ]
349 |         }
350 |       }
351 | 
352 |       if "grokked" not in [tags] {
353 |         # pam_unix(sshd:session): session closed for user coma
354 |         grok {
355 |           overwrite => "message"
356 |           match => [ "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): session closed for user %{USERNAME:username}" ]
357 |           add_tag => [ "remote_logout_event", "grokked" ]
358 |         }
359 |       }
360 | 
361 |       if "grokked" not in [tags] {
362 |         # Received disconnect from 10.0.0.254: 11: disconnected by user
363 |         grok {
364 |           match => [ "message", "Received disconnect from %{IPORHOST:remote_ip}" ]
365 |           add_tag => [ "remote_logout_event", "grokked" ]
366 |         }
367 |       }
368 |     }
369 | 
370 |     else if [program] == "sudo" {
371 |       # pam_unix(sudo:auth): authentication failure; logname=root uid=500 euid=0 tty=/dev/pts/5 ruser=eliot rhost= user=eliot
372 |       # eliot : user NOT in sudoers ; TTY=pts/5 ; PWD=/home/eliot ; USER=root ; COMMAND=/bin/ls /
373 |       grok {
374 |           overwrite => "message"
375 |           match => [
376 |             "message", "%{WORD:pam_service}\(%{NOTSPACE:session_type}\): authentication failure; logname=%{USERNAME:logname} uid=%{NONNEGINT:uid} euid=%{NONNEGINT:euid} tty=%{TTY:tty} ruser=%{USERNAME:remote_username} rhost=(?:%{IPORHOST:remote_ip})? user=%{USERNAME:username}",
377 |             "message", "%{USERNAME:username} : user NOT in sudoers ; TTY=%{GREEDYDATA:tty} ; PWD=%{UNIXPATH:pwd} ; USER=%{USERNAME:sudo_username} ; COMMAND=%{GREEDYDATA:command}"
378 |           ]
379 |           add_tag => [ "auth_failure_event", "grokked" ]
380 |       }
381 |     }
382 |   }
383 | 
384 |   # else if [type] == "spooler" {
385 |   #
386 |   # }
387 | 
388 |   # else if [type] == "yum" {
389 |   #
390 |   # }
391 | 
392 | 
393 |   ################################################################
394 |   # Mark type of host (compute node events are less critical)
395 |   ################################################################
396 |   if [host] =~ /^node.+/ {
397 |     mutate { add_field => ["node_type", "compute"] }
398 |   }
399 |   else if [host] =~ /^storage.+/ {
400 |     mutate { add_field => ["node_type", "storage"] }
401 |   }
402 |   else {
403 |     mutate { add_field => ["node_type", "management"] }
404 |   }
405 | 
406 | 
407 |   ################################################################
408 |   # Lookup Geolocation data for remote hosts
409 |   ################################################################
410 |   if [remote_ip] =~ /(.+)/ {
411 |     # Make sure we didn't pick up a hostname instead of an IP.
412 |     # If so, overwrite the hostname with the IP address.
413 |     if ! ([remote_ip] =~ /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) {
414 |       dns {
415 |         action => "replace"
416 |         resolve => "remote_ip"
417 |       }
418 |     }
419 | 
420 |     geoip {
421 |       source => "remote_ip"
422 |       target => "geoip"
423 |       database => "/etc/logstash/GeoLiteCity.dat"
424 |       add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
425 |       add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
426 |     }
427 |     mutate {
428 |       convert => [ "[geoip][coordinates]", "float"]
429 |     }
430 |   }
431 | 
432 | 
433 |   ################################################################
434 |   # Mark hardware events (likely to be more critical) if they
435 |   # have not already been assigned a severity level.
436 |   ################################################################
437 |   if ! ([report] =~ /(.+)/) {
438 |     if [type] == "mcelog" {
439 |       mutate { add_field => ["report", "error"] }
440 |     }
441 |     else if [type] == "syslog" {
442 |       if [program] =~ /.*ipmiseld/ {
443 |         mutate { add_field => ["report", "error"] }
444 |       }
445 |     }
446 |   }
447 | 
448 | 
449 |   ################################################################
450 |   # Set up events which will be forwarded to Shinken/Nagios
451 |   ################################################################
452 |   if [report] =~ /(.+)/ {
453 |     mutate {
454 |       add_field => [ "nagios_host", "%{host}",
455 |                      "nagios_service", "%{program}" ]
456 |     }
457 |   }
458 | 
459 | 
460 |   ################################################################
461 |   # Clean up after ourselves - remove any internal tags/fields
462 |   ################################################################
463 |   if [format] =~ /(.+)/ {
464 |     mutate { remove_field => ["format"] }
465 |   }
466 |   if "_grokparsefailure" in [tags] and "grokked" in [tags] {
467 |     mutate { remove_tag => ["_grokparsefailure"] }
468 |   }
469 | }
470 | 
471 | 


--------------------------------------------------------------------------------
/dependencies/etc/logstash/conf.d/90-elasticsearch-output.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | #################################################################################
25 | ##
26 | ## All data will be written to the local ElasticSearch database
27 | ##
28 | #################################################################################
29 | 
30 | 
31 | output {
32 |   elasticsearch {
33 |     hosts => ["localhost:9200"]
34 |     sniffing => true
35 |     manage_template => false
36 |     index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}"
37 |     document_type => "%{[@metadata][type]}"
38 |   }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/dependencies/etc/logstash/conf.d/91-additional-output.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | #################################################################################
25 | ##
26 | ## Additional optional outputs
27 | ##
28 | #################################################################################
29 | 
30 | 
31 | output {
32 |   ################################################################
33 |   # Optionally, elect to forward items to Graphite/Statsd
34 |   ################################################################
35 |   # if "hardware_event" in [tags] or "user_account_event" in [tags] or "group_account_event" in [tags] {
36 |   #   graphite {
37 |   #     host => "10.0.0.254"
38 |   #   }
39 |   #   statsd { }
40 |   # }
41 | 
42 | 
43 |   ################################################################
44 |   # Optionally, elect to forward items to Shinken/Nagios
45 |   ################################################################
46 |   # if [report] =~ /(.+)/ {
47 |   #   nagios { }
48 |   # }
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/dependencies/etc/microway/mcms_database.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | ################################################################################
26 | #
27 | # This file contains the credentials for accessing the MCMS database.
28 | #
29 | # NORMAL USERS SHOULD NOT BE ABLE TO READ THIS FILE:
30 | #
31 | #   chmod 600 /etc/microway/mcms_database.conf
32 | #
33 | ################################################################################
34 | 
35 | 
36 | ################################################################################
37 | # This file uses BASH syntax (and is sourced by a bash shell). If you need to,
38 | # you can use shell programming and advanced features inside this file.
39 | ################################################################################
40 | 
41 | mcms_database_user='mcmsDBAdmin'
42 | 
43 | mcms_database_password='ChangeMe'
44 | 
45 | 


--------------------------------------------------------------------------------
/dependencies/etc/nhc/compute-node-checks.conf:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  3 | ################################################################################
  4 | #
  5 | # Copyright (c) 2015-2016 by Microway, Inc.
  6 | #
  7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  8 | #
  9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 10 | #    it under the terms of the GNU General Public License as published by
 11 | #    the Free Software Foundation, either version 3 of the License, or
 12 | #    (at your option) any later version.
 13 | #
 14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | #    GNU General Public License for more details.
 18 | #
 19 | #    You should have received a copy of the GNU General Public License
 20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 21 | #
 22 | ################################################################################
 23 | 
 24 | 
 25 | ################################################################################
 26 | #
 27 | # Node Health Check (NHC) configuration file
 28 | #
 29 | # Checks to be run on compute nodes only
 30 | #
 31 | # These tests are typically run by the batch scheduler (e.g., Torque, SLURM) to
 32 | # ensure that the compute nodes are in a healthy state. A variety of checks are
 33 | # executed, including hardware health, software health and filesystem health.
 34 | #
 35 | # All tests in this file should run very fast and should not use system
 36 | # resources as they will be running at the same time as compute jobs. Tests must
 37 | # not access any network filesystems, as they can hang. Longer-running health
 38 | # checks and filesystem checks should be put in one of the following files:
 39 | #
 40 | #   compute-node-checks_intense.conf        (resource-intensive checks)
 41 | #   compute-node-checks_blocking-io.conf    (checks which could lock up)
 42 | #
 43 | #
 44 | # Lines are in the form "<hostmask>||<check>"
 45 | # Hostmask is a glob, /regexp/, or {noderange}
 46 | # Comments begin with '#'
 47 | #
 48 | ################################################################################
 49 | 
 50 | 
 51 | #######################################################################
 52 | ###
 53 | ### NHC Configuration Variables
 54 | ###
 55 | 
 56 | #
 57 | # NHC-wide configuration settings (such as PATH and resource manager)
 58 | # are set system-wide in the file:
 59 | #
 60 | #    /etc/sysconfig/nhc
 61 | #
 62 | 
 63 | 
 64 | #######################################################################
 65 | ###
 66 | ### CPU & Memory Hardware checks
 67 | ###
 68 | 
 69 | # Set these to your correct CPU socket, core, and thread counts
 70 |  *  || check_hw_cpuinfo 2 28 28
 71 | 
 72 | # Compares the accumulated CPU time (in seconds) between kswapd kernel threads
 73 | # to make sure there's no imbalance among different NUMA nodes (which could be
 74 | # an early symptom of failure).
 75 | #
 76 | # Max 500 CPU hours; 100x discrepancy limit
 77 |  *  || check_ps_kswapd 1800000 100 log syslog
 78 | 
 79 | # Check that the correct amount of memory is present (with a fudge factor)
 80 |  *  || check_hw_physmem 256GB 256GB 2%
 81 | 
 82 | # Check that at least 1MB of physical memory is free
 83 |  *  || check_hw_physmem_free 1MB
 84 | 
 85 | # If less than 100MB of Memory+SWAP is free, things will die soon
 86 |  *  || check_hw_mem_free 100MB
 87 | 
 88 | # Make sure swap is present (without being too picky on the capacity)
 89 |  *  || check_hw_swap 2G 1TB
 90 | 
 91 | # If less than 1GB of SWAP is free, things will be moving slowly!
 92 |  *  || check_hw_swap_free 1GB
 93 | 
 94 | # Make sure the memory is running at the correct frequency / bus rate
 95 |  *  || check_dmi_data_match -t "Memory Device" "*Speed: 2400 MHz"
 96 | 
 97 | # Check for MCEs (memory warnings and errors)
 98 |  *  || check_hw_mcelog
 99 | 
100 | # Ensure nodes are not overloaded. The rule of thumb is that load should remain
101 | # below 2-times the number of CPU cores, but we'll allow for short bursts. The
102 | # 1-minute load can be up 4xCoreCount; 5-minute load must be below 2xCoreCount:
103 |  *  || check_ps_loadavg $((4*$HW_CORES)) $((2*$HW_CORES))
104 | 
105 | 
106 | #######################################################################
107 | ###
108 | ### Network checks
109 | ###
110 | 
111 | # Check that there's an active ethernet interface named "eth0"
112 |  *  || check_hw_eth eth0
113 | 
114 | # Check for an IB interface that shows LinkUp (with the specified datarate)
115 |  *  || check_hw_ib 56
116 | 
117 | 
118 | #######################################################################
119 | ###
120 | ### Filesystem checks
121 | ###
122 | 
123 | # Filesystems which should be mounted (simply check for their presence)
124 |  *  || check_fs_mount_rw -f /
125 |  *  || check_fs_mount_rw -f /tmp
126 |  *  || check_fs_mount_rw -f /home
127 |  *  || check_fs_mount_rw /dev/pts '/(none|devpts)/' devpts
128 | 
129 | #
130 | # Check for modest amounts of free space in the important places.
131 | # Free inodes are also important.
132 | #
133 | # Only check local filesystems in this file! Checking network filesystems
134 | # can hang badly, so such things must be checked via this file:
135 | #
136 | #   /etc/nhc/compute-node-checks_blocking-io.conf
137 | #
138 | 
139 |  *  || export DF_FLAGS="-Tkal"
140 |  *  || export DFI_FLAGS="-Tkal"
141 | 
142 |  *  || check_fs_free / 3%
143 |  *  || check_fs_ifree / 1k
144 | 
145 |  *  || check_fs_free /tmp 3%
146 |  *  || check_fs_ifree /tmp 1k
147 | 
148 |  *  || check_fs_free /var 3%
149 |  *  || check_fs_ifree /var 1k
150 | 
151 |  *  || check_fs_free /var/tmp 3%
152 |  *  || check_fs_ifree /var/tmp 1k
153 | 
154 |  *  || check_fs_free /var/log 3%
155 |  *  || check_fs_ifree /var/log 1k
156 | 
157 | 
158 | #######################################################################
159 | ###
160 | ### File/metadata checks
161 | ###
162 | 
163 | # These should always be directories and always be read/write/execute and sticky.
164 |  *  || check_file_test -r -w -x -d -k /tmp /var/tmp
165 | 
166 | # Assert common properties for devices which occasionally get clobbered
167 |  *  || check_file_test -c -r -w /dev/null /dev/zero
168 |  *  || check_file_stat -m 0666 -u 0 -g 0 -t 1 -T 3 /dev/null
169 | 
170 | # These should always be readable and should never be empty.
171 |  *  || check_file_test -r -s /etc/passwd /etc/group
172 | 
173 | # Validate a couple important accounts in the passwd and group files
174 |  *  || check_file_contents /etc/passwd "/^root:x:0:0:/" "sshd:*"
175 |  *  || check_file_contents /etc/group "/^root:x:0:/"
176 | 
177 | # Make sure there's relatively recent (~2 hours) activity from the syslog
178 |  *  || check_file_stat -n 7200 /var/log/messages
179 | 
180 | 
181 | #######################################################################
182 | ###
183 | ### Process checks
184 | ###
185 | 
186 | # Ensure the SSH daemon is running (and start it if not)
187 |  *  || check_ps_service -u root -S sshd
188 | 
189 | # Processes which should be running (restart them, if necessary)
190 |  *  || check_ps_service -u root     -r crond
191 |  *  || check_ps_service -u ganglia  -r gmond
192 |  *  || check_ps_service -u root     -r ipmiseld
193 |  *  || check_ps_service -u root     -r filebeat
194 |  *  || check_ps_service -u root     -r mcelog
195 |  *  || check_ps_service -u ntp      -r ntpd
196 | 
197 | # SLURM Resource Manager / Batch Scheduler Processes
198 |  *  || check_ps_service -u munge    -r munged
199 |  *  || check_ps_service -u root     -r slurmd
200 | 
201 | # TORQUE Resource Manager / Batch Scheduler Processes
202 | # *  || check_ps_service -u root     -r trqauthd
203 | # *  || check_ps_service -u root     -r pbs_mom
204 | 
205 | # Most systems also need NFS locking services.
206 | # *  || check_ps_service -d rpc.statd -r nfslock
207 | 
208 | # The audit daemon can sometimes disappear if things get hairy.
209 | # *  || check_ps_service -r auditd
210 | 
211 | # This is only valid for RHEL6 and similar/newer systems.
212 | # *  || check_ps_service -d rsyslogd -r rsyslog
213 | 
214 | # In the case of MySQL, it's typically better to cycle.
215 | # *  || check_ps_service -c mysqld
216 | 
217 | # If desired, watch for users manually running commands and log them.
218 | # *  || check_ps_unauth_users log syslog
219 | 
220 | # If desired, make sure no users are SSH'd in, but don't kill them.
221 | # *  || check_ps_blacklist sshd '!root'
222 | 
223 | 
224 | #######################################################################
225 | ###
226 | ### GPU checks
227 | ###
228 | 
229 | # This is a fast-running, less-intense run of the GPU health test
230 |  *  || NVIDIA_HEALTHMON_ARGS="-v"
231 |  *  || check_nv_healthmon
232 | 
233 | 


--------------------------------------------------------------------------------
/dependencies/etc/nhc/compute-node-checks_blocking-io.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2015-2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | ################################################################################
26 | #
27 | # Node Health Check (NHC) configuration file
28 | #
29 | # Checks to be run on compute nodes only
30 | #
31 | # These tests are started when Compute Nodes are idle (before a job starts). All
32 | # filesystem-intensive checks (including checks of network filesystems) should
33 | # be performed here. These tests should be written with the understanding that
34 | # they may lock up if a filesystem hangs (e.g., if the NFS server goes down).
35 | #
36 | #
37 | # Lines are in the form "<hostmask>||<check>"
38 | # Hostmask is a glob, /regexp/, or {noderange}
39 | # Comments begin with '#'
40 | #
41 | ################################################################################
42 | 
43 | 
44 | #######################################################################
45 | ###
46 | ### NHC Configuration Variables
47 | ###
48 | 
49 | #
50 | # NHC-wide configuration settings (such as PATH and resource manager)
51 | # are set system-wide in the file:
52 | #
53 | #    /etc/sysconfig/nhc
54 | #
55 | 
56 | 
57 | #######################################################################
58 | ###
59 | ### Filesystem checks
60 | ###
61 | 
62 | #
63 | # Check for modest amounts of free space. Free inodes are also important.
64 | #
65 | 
66 |  *  || export DF_FLAGS="-Tka"
67 |  *  || export DFI_FLAGS="-Tka"
68 | 
69 |  *  || check_fs_free / 3%
70 |  *  || check_fs_ifree / 1k
71 | 
72 |  *  || check_fs_free /home 3%
73 |  *  || check_fs_ifree /home 1k
74 | 
75 |  *  || check_fs_free /opt 3%
76 |  *  || check_fs_ifree /opt 1k
77 | 
78 |  *  || check_fs_free /tmp 3%
79 |  *  || check_fs_ifree /tmp 1k
80 | 
81 |  *  || check_fs_free /var 3%
82 |  *  || check_fs_ifree /var 1k
83 | 
84 |  *  || check_fs_free /var/tmp 3%
85 |  *  || check_fs_ifree /var/tmp 1k
86 | 
87 |  *  || check_fs_free /var/log 3%
88 |  *  || check_fs_ifree /var/log 1k
89 | 
90 | 


--------------------------------------------------------------------------------
/dependencies/etc/nhc/compute-node-checks_intense.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2015-2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | ################################################################################
26 | #
27 | # Node Health Check (NHC) configuration file
28 | #
29 | # Checks to be run on compute nodes only
30 | #
31 | # These tests are started when Compute Nodes are idle (before a job starts). All
32 | # resource-intensive checks (excluding filesystems) should be performed here.
33 | # Example subsystems to test would be: CPU, memory, GPUs, accelerators, etc.
34 | #
35 | #
36 | # Lines are in the form "<hostmask>||<check>"
37 | # Hostmask is a glob, /regexp/, or {noderange}
38 | # Comments begin with '#'
39 | #
40 | ################################################################################
41 | 
42 | 
43 | #######################################################################
44 | ###
45 | ### NHC Configuration Variables
46 | ###
47 | 
48 | #
49 | # NHC-wide configuration settings (such as PATH and resource manager)
50 | # are set system-wide in the file:
51 | #
52 | #    /etc/sysconfig/nhc
53 | #
54 | 
55 | 
56 | #######################################################################
57 | ###
58 | ### GPU checks
59 | ###
60 | 
61 | # This test performs an examination of GPU health and bus transfer speeds
62 |  *  || NVIDIA_HEALTHMON_ARGS="-e -v"
63 |  *  || check_nv_healthmon
64 | 
65 | 


--------------------------------------------------------------------------------
/dependencies/etc/nvidia-healthmon.conf:
--------------------------------------------------------------------------------
  1 | ;;;
  2 | ;;; The global section contains configurations that apply to all devices
  3 | ;;;
  4 | [global]
  5 | 
  6 | ;;
  7 | ;; Enable this setting to ensure that the expected number of Tesla brand GPUs
  8 | ;; are detected by the NVML library.
  9 | ;;
 10 | ;; This count only includes Tesla brand GPUs that the nvidia-healthmon
 11 | ;; process has sufficient permission to access.
 12 | ;;
 13 | ;; If this setting is not configured, then checks that require it will skip
 14 | ;;
 15 | ; devices.tesla.count = 1
 16 | 
 17 | ;;
 18 | ;; nvidia-healthmon checks the system for drivers that have been known to
 19 | ;; cause issues with NVIDIA hardware, drivers, and software. The following
 20 | ;; list contains the names of drivers which are known to cause problems.
 21 | ;; If nvidia-healthmon detects any blacklisted drivers it will not
 22 | ;; execute further tests.
 23 | ;;
 24 | ;; You may add/remove drivers on this list at your own risk.
 25 | ;;
 26 | ;; If this setting is not configured, then checks that require it will skip
 27 | ;;
 28 | ;
 29 | drivers.blacklist = nouveau
 30 | 
 31 | ;;;
 32 | ;;; The configuration in each device section only applies to devices of that SKU
 33 | ;;; Below is an explanation of all fields that can be set in the device section
 34 | ;;;
 35 | ;
 36 | ;[Tesla K20m]
 37 | ;
 38 | ;; Each device section starts with the name of the device
 39 | ;; Run nvidia-smi to determine the name of your GPU
 40 | ;
 41 | ;; Bandwidth configuration
 42 | ;
 43 | ;; nvidia-healthmon can check the PCIe bandwidth between the pinned host
 44 | ;; memory and GPU memory
 45 | ;; If the bandwidth from the host to GPU or from the GPU to the host is
 46 | ;; below this value (in MB/s), nvidia-healthmon will generate a warning
 47 | ;;
 48 | ;; If this setting is not configured, then checks that require it will skip
 49 | ;
 50 | ;bandwidth.warn = 1500
 51 | ;
 52 | ;
 53 | ;; nvidia-healthmon can check the PCIe bandwidth between the pinned host
 54 | ;; memory and GPU memory
 55 | ;; If the bandwidth from the host to GPU or from the GPU to the host is
 56 | ;; below this value (in MB/s), nvidia-healthmon will generate an error
 57 | ;;
 58 | ;; If this setting is not configured, then checks that require it will skip
 59 | ;
 60 | ;bandwidth.min = 100
 61 | ;
 62 | ;
 63 | ;; Peer to Peer configuration
 64 | ;
 65 | ;; nvidia-healthmon can check whether peer to peer access is supported between
 66 | ;; GPUs on the same host.  It can then run a bandwidth test between two GPUs.
 67 | ;; In the case that peer access is supported, if the bandwidth from one GPU to
 68 | ;; the other GPU is supported is below this value (in MB/s), nvidia-healthmon
 69 | ;; will generate a warning.  If peer to peer access is not supported, the
 70 | ;; bandwidth test is still run, but no comparison to the minimum bandwidth is
 71 | ;; done.
 72 | ;
 73 | ;peer.bandwidth.warn = 8000
 74 | ;
 75 | ;
 76 | ;; nvidia-healthmon can check whether peer to peer access is supported between
 77 | ;; GPUs on the same host.  It can then run a bandwidth test between two GPUs.
 78 | ;; In the case that peer access is supported, if the bandwidth from one GPU to
 79 | ;; the other GPU is supported is below this value (in MB/s), nvidia-healthmon
 80 | ;; will generate an error.  If peer to peer access is not supported, the
 81 | ;; bandwidth test is still run, but no comparison to the minimum bandwidth is
 82 | ;; done.
 83 | ;
 84 | ;peer.bandwidth.min = 5000
 85 | ;
 86 | ;; PCIe link configuration
 87 | ;
 88 | ;; nvidia-healthmon can compare the maximum PCIe link generation for the PCIe
 89 | ;; link closest to the GPU chip against the value specified here.
 90 | ;;
 91 | ;; If this setting is not configured, then checks that require it will skip
 92 | ;; An error will be generated if there is a mismatch
 93 | ;;
 94 | ;; For GPU board that contain multiple GPU chips, this value will reflect
 95 | ;; the PCIe link generation between the GPU chip and an on board PCIe switch.
 96 | ;; For single GPU boards this value reflects the link width between the GPU
 97 | ;; chip and the PCIe slot the GPU is connected to.
 98 | ;; Note that additional PCIe links upstream from the GPU may have a
 99 | ;; different link generation.  Those links are not considered here.
100 | ;;
101 | ;
102 | ;pci.gen = 1
103 | ;
104 | ;
105 | ;; nvidia-healthmon can compare the maximum PCIe link width for the PCIe
106 | ;; link closest to the GPU chip against the value specified here.
107 | ;;
108 | ;; If this setting is not configured, then checks that require it will skip
109 | ;; An error will be generated if there is a mismatch
110 | ;;
111 | ;; For GPU board that contain multiple GPU chips, this value will reflect
112 | ;; the PCIe link width between the GPU chip and an on board PCIe switch.
113 | ;; For single GPU boards this value reflects the link width  between the GPU
114 | ;; chip and the PCIe slot the GPU is connected to.
115 | ;; Note that additional PCIe links upstream from the GPU may have a
116 | ;; different link width.  Those links are not considered here
117 | ;
118 | ;pci.width = 16
119 | ;
120 | ;; nvidia-healthmon can compare the current temperature to a warning level in
121 | ;; degrees Celsius.  A warning will be generated if the current temperature is
122 | ;; at or above the warning level
123 | ;;
124 | ;; Note that the desired temperature may vary based on the cooling system used
125 | ;;
126 | ;; If this setting is not configured, then checks that require it will skip
127 | ;
128 | ;temperature.warn = 95
129 | ;
130 | 
131 | ;;;
132 | ;;; NVIDIA provides default configuration for various settings of various GPUs
133 | ;;;
134 | ;;; Some fields provide conservative maximum expected values
135 | ;;; Some fields are highly system specific, so no default is provided
136 | ;;; Please adjust these values as needed based on local system configuration
137 | ;;;
138 | 
139 | [Tesla K10.G1.8GB]
140 | ; This value is affected by a number of factors
141 | ; Let's assume a PCIe Gen 2 system with 8x lane width
142 | ; If your system supports only Gen 1 or <8x lane width, this estimate may be
143 | ; too high. If your system supports Gen 3 or >8x lane width, this estimate
144 | ; be too low
145 | ; The theoretical bandwidth for such a link will be:
146 | ; <Gen speed per lane> * <number of lanes>
147 | ; PCIe Gen 2 has 500 MB/s per lane
148 | ; So the max theoretical bandwidth is 500 * 8 =
149 | ; 4000 MB/s
150 | ; In reality we can't hit the max, so we need some value lower
151 | ; Other processes running on other GPUs, processes running on the CPU, and
152 | ; processes communicating over the PCIe bus can affect the measured bandwidth
153 | ;
154 | bandwidth.warn = 9500
155 | ; Set this based on your local system configuration
156 | ;bandwidth.min = 8000
157 | 
158 | ; The bandwidth between peers is also subject to the above
159 | ; estimation in the worst case.  The best case will be much faster.
160 | peer.bandwidth.warn = 9500
161 | ; Set this based on your local system configuration
162 | peer.bandwidth.min = 8000
163 | 
164 | ; The on link in question here is the one between the GPU and the PCI switch
165 | ; on the GPU board.  This is a PCIe Gen 3 link even if the link to the system
166 | ; is lower.  Additionally, this is a 16x wide link.
167 | pci.gen = 3
168 | pci.width = 16
169 | 
170 | ; This is an intentionally high default.  Set it lower based on your system
171 | ; thermal configuration.
172 | temperature.warn = 90
173 | 
174 | 
175 | [Tesla K10.G2.8GB]
176 | ; See [Tesla K10.G1.8GB] section for an explanation of defaults
177 | bandwidth.warn = 9500
178 | bandwidth.min = 8000
179 | peer.bandwidth.warn = 9500
180 | peer.bandwidth.min = 8000
181 | pci.gen = 3
182 | pci.width = 16
183 | temperature.warn = 90
184 | 
185 | 
186 | [Tesla K20]
187 | bandwidth.warn = 5000
188 | bandwidth.min = 4500
189 | peer.bandwidth.warn = 5000
190 | peer.bandwidth.min = 4500
191 | pci.gen = 2
192 | pci.width = 16
193 | temperature.warn = 90
194 | 
195 | 
196 | [Tesla K20X]
197 | bandwidth.warn = 5000
198 | bandwidth.min = 4500
199 | peer.bandwidth.warn = 5000
200 | peer.bandwidth.min = 4500
201 | pci.gen = 2
202 | pci.width = 16
203 | temperature.warn = 90
204 | 
205 | 
206 | [Tesla K20Xm]
207 | bandwidth.warn = 5000
208 | bandwidth.min = 4500
209 | peer.bandwidth.warn = 5000
210 | peer.bandwidth.min = 4500
211 | pci.gen = 2
212 | pci.width = 16
213 | temperature.warn = 90
214 | 
215 | 
216 | [Tesla K20c]
217 | bandwidth.warn = 5000
218 | bandwidth.min = 4500
219 | peer.bandwidth.warn = 5000
220 | peer.bandwidth.min = 4500
221 | pci.gen = 2
222 | pci.width = 16
223 | temperature.warn = 90
224 | 
225 | 
226 | [Tesla K20s]
227 | bandwidth.warn = 5000
228 | bandwidth.min = 4500
229 | peer.bandwidth.warn = 5000
230 | peer.bandwidth.min = 4500
231 | pci.gen = 2
232 | pci.width = 16
233 | temperature.warn = 90
234 | 
235 | 
236 | [Tesla K20m]
237 | bandwidth.warn = 5000
238 | bandwidth.min = 4500
239 | peer.bandwidth.warn = 5000
240 | peer.bandwidth.min = 4500
241 | pci.gen = 2
242 | pci.width = 16
243 | temperature.warn = 90
244 | 
245 | 
246 | [Tesla K40]
247 | bandwidth.warn = 9500
248 | bandwidth.min = 8000
249 | peer.bandwidth.warn = 9500
250 | peer.bandwidth.min = 8000
251 | pci.gen = 3
252 | pci.width = 16
253 | temperature.warn = 90
254 | 
255 | 
256 | [Tesla K40c]
257 | bandwidth.warn = 9500
258 | bandwidth.min = 8000
259 | peer.bandwidth.warn = 9500
260 | peer.bandwidth.min = 8000
261 | pci.gen = 3
262 | pci.width = 16
263 | temperature.warn = 90
264 | 
265 | 
266 | [Tesla K40m]
267 | bandwidth.warn = 9500
268 | bandwidth.min = 8000
269 | peer.bandwidth.warn = 9500
270 | peer.bandwidth.min = 8000
271 | pci.gen = 3
272 | pci.width = 16
273 | temperature.warn = 90
274 | 
275 | 
276 | [Tesla K80]
277 | bandwidth.warn = 9500
278 | bandwidth.min = 8000
279 | peer.bandwidth.warn = 9500
280 | peer.bandwidth.min = 8000
281 | pci.gen = 3
282 | pci.width = 16
283 | temperature.warn = 90
284 | 
285 | 
286 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/cgroup.conf:
--------------------------------------------------------------------------------
 1 | #################################################################################
 2 | ######################### Microway Cluster Management Software (MCMS) for OpenHPC
 3 | #################################################################################
 4 | #
 5 | # Slurm cgroup support configuration file
 6 | #
 7 | # See man slurm.conf and man cgroup.conf for further
 8 | # information on cgroup configuration parameters
 9 | #
10 | #
11 | # This file must be present on all nodes of your cluster.
12 | # See the slurm.conf man page for more information.
13 | #
14 | #################################################################################
15 | 
16 | CgroupAutomount=yes
17 | CgroupReleaseAgentDir="/etc/slurm/cgroup"
18 | CgroupMountpoint="/sys/fs/cgroup"
19 | 
20 | ConstrainCores=yes
21 | ConstrainRAMSpace=yes
22 | ConstrainDevices=yes
23 | AllowedDevicesFile="/etc/slurm/cgroup_allowed_devices_file.conf"
24 | 
25 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/cgroup_allowed_devices_file.conf:
--------------------------------------------------------------------------------
1 | /dev/null
2 | /dev/urandom
3 | /dev/zero
4 | /dev/sd*
5 | /dev/vd*
6 | /dev/cpu/*/*
7 | /dev/pts/*
8 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/gres.conf:
--------------------------------------------------------------------------------
1 | # Defines "generic resources" to be used by SLURM
2 | #
3 | # Each compute node must have its own file in /etc/slurm/gres.conf to define the
4 | # resources that it provides.
5 | #
6 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/plugstack.conf:
--------------------------------------------------------------------------------
 1 | #################################################################################
 2 | ######################### Microway Cluster Management Software (MCMS) for OpenHPC
 3 | #################################################################################
 4 | #
 5 | # Configuration for SLURM plug-ins
 6 | #
 7 | #
 8 | # This file must be present on all nodes of your cluster.
 9 | #
10 | #################################################################################
11 | 
12 | include /etc/slurm/plugstack.conf.d/*
13 | 
14 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/plugstack.conf.d/x11.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # X11 SLURM spank plugin configuration file
 3 | #
 4 | # this plugin can be used to add X11 support in slurm jobs using ssh X11
 5 | # tunneling capabilities
 6 | #
 7 | # The following configuration parameters are available (the character | 
 8 | # replaces the space in compound options) :
 9 | #
10 | # ssh_cmd	: can be used to modify the ssh binary to use.
11 | # 		  default corresponds to ssh_cmd=ssh
12 | # ssh_args	: can be used to modify the ssh arguments to use.
13 | # 		  default corresponds to ssh_cmd=
14 | # helpertask_cmd: can be used to add a trailing argument to the helper task 
15 | # 		  responsible for setting up the ssh tunnel
16 | # 		  default corresponds to helpertask_cmd=
17 | #		  an interesting value can be helpertask_cmd=2>/tmp/log to
18 | #		  capture the stderr of the helper task
19 | #
20 | # Users can ask for X11 support for both interactive (srun) and batch (sbatch)
21 | # jobs using parameter --x11=[batch|first|last|all] or the SLURM_SPANK_X11 
22 | # environment variable set to the required value.
23 | #
24 | # In interactive mode (srun), values can be first to establish a tunnel with
25 | # the first allocated node, last for the last one and all for all nodes.
26 | #
27 | # In batch mode (sbatch), only "batch" mode can be used but batch script can
28 | # be used first|last|all values with srun. In batch mode, the first allocated 
29 | # node will contact the submission node using ssh to establish the tunnel
30 | # from the submission node to itself. As a result, the user must kept its 
31 | # initial connection to the submission host as long as it wants to be able to 
32 | # forward its X11 display to batch execution node.
33 | #
34 | #-------------------------------------------------------------------------------
35 | optional          /usr/lib64/slurm/x11.so
36 | #-------------------------------------------------------------------------------
37 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurm.epilog:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015-2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # This epilog script is run on each compute node after a user's job has
 29 | # completed. Other jobs (from this user or another user) could still be running.
 30 | #
 31 | # If no other jobs from this user are running on the node, we will ensure all
 32 | # their processes are terminated and all temporary/scratch files are removed.
 33 | #
 34 | # Without this script, a user can login to a node while their job is running and
 35 | # that session will persist even after their job has finished.
 36 | #
 37 | ################################################################################
 38 | 
 39 | 
 40 | # The default SLURM path can be replaced, if necessary
 41 | SLURM_BIN_DIR=/usr/slurm/16.05/bin/
 42 | 
 43 | 
 44 | #
 45 | # List of temporary directories which should be cleaned after all of
 46 | # a user's jobs have completed. You can find all such locations on your
 47 | # systems by running this command (it is I/O intensive!):
 48 | #
 49 | # find / -type d -perm 1777
 50 | #
 51 | TMP_DIRS="/dev/shm /tmp /usr/tmp /var/tmp"
 52 | 
 53 | 
 54 | # Exit if this script isn't actually running within a SLURM context
 55 | if [[ -z "$SLURM_JOB_UID" ]] || [[ -z "$SLURM_JOB_ID" ]]; then
 56 |     echo "Do not run this script manually - it is used by SLURM"
 57 |     exit 1
 58 | fi
 59 | 
 60 | 
 61 | #
 62 | # Don't try to kill user root or system daemon jobs.
 63 | #
 64 | # Note that the maximum system UID varies by distro (499 for older RHEL;
 65 | # 999 for Debian and newer versions of RHEL).
 66 | #
 67 | # See UID_MIN in /etc/login.defs:
 68 | #
 69 | #   awk '/^UID_MIN/ {print $2}' /etc/login.defs
 70 | #
 71 | if [[ $SLURM_JOB_UID -lt 1000 ]]; then
 72 |     exit 0
 73 | fi
 74 | 
 75 | 
 76 | # Pull the list of jobs this user is currently running on this node.
 77 | job_list=$(${SLURM_BIN_DIR}squeue --noheader --format=%A --user=$SLURM_JOB_UID --node=localhost)
 78 | squeue_retval=$?
 79 | 
 80 | # If squeue failed, we probably have the wrong PATH or SLURM is down...
 81 | if [[ $squeue_retval -gt 0 ]]; then
 82 |     exit $squeue_retval
 83 | fi
 84 | 
 85 | # Look through each job running on this node
 86 | for job_id in $job_list; do
 87 |     # If the user still has a job on this node, stop here.
 88 |     if [[ $job_id -ne $SLURM_JOB_ID ]]; then
 89 |         exit 0
 90 |     fi
 91 | done
 92 | 
 93 | 
 94 | # Drop clean caches (recommended by OpenHPC)
 95 | echo 3 > /proc/sys/vm/drop_caches
 96 | 
 97 | 
 98 | #
 99 | # No other SLURM jobs found - purge all remaining processes of this user.
100 | #
101 | # Note: the user can have other processes exiting, especially if they have
102 | # an interactive session (e.g., ssh with SPANK plugins). We may need to be more
103 | # descriminating in which processes are killed...
104 | #
105 | pkill -KILL -U $SLURM_JOB_UID
106 | 
107 | 
108 | # Remove any remaining temporary files the user created.
109 | for tmpdir in $TMP_DIRS; do
110 |     find "$tmpdir" -uid $SLURM_JOB_UID -exec rm -Rf {} +
111 |     find_retval=$?
112 | 
113 |     if [[ $find_retval -gt 0 ]]; then
114 |         echo "Epilog error - unable to clean up temp files in $tmpdir"
115 |         exit $find_retval
116 |     fi
117 | done
118 | 
119 | 
120 | # If we've gotten to the end cleanly, everything should have worked
121 | exit 0
122 | 
123 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurm.healthcheck:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 4 | ################################################################################
 5 | #
 6 | # Copyright (c) 2015 by Microway, Inc.
 7 | #
 8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 9 | #
10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
11 | #    it under the terms of the GNU General Public License as published by
12 | #    the Free Software Foundation, either version 3 of the License, or
13 | #    (at your option) any later version.
14 | #
15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 | #    GNU General Public License for more details.
19 | #
20 | #    You should have received a copy of the GNU General Public License
21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
22 | #
23 | ################################################################################
24 | 
25 | 
26 | ################################################################################
27 | #
28 | # Start a node health check.
29 | #
30 | # This should run very fast and not use system resources, as it will be running
31 | # at the same time as compute jobs. Longer-term health checks may be run inside
32 | # the slurm.healthcheck_long script.
33 | #
34 | ################################################################################
35 | 
36 | 
37 | source '/mcms/scripts/util/lib.lockfile.sh'
38 | 
39 | 
40 | NHC=/usr/sbin/nhc
41 | 
42 | NHC_FILE="/etc/nhc/compute-node-checks.conf"
43 | 
44 | 
45 | # Other scripts can also spawn health checks, so we need a lock file
46 | MCMS_LOCKFILE="node-health-check"
47 | 
48 | # Attempt to get the NHC lock
49 | mcms_get_lock
50 | 
51 | # If unable to get the lock, we'll pass. Assume a longer health test is running.
52 | if [[ -z "$MCMS_RECEIVED_LOCK" ]]; then
53 |     exit 0
54 | fi
55 | 
56 | 
57 | # Execute Node Health Check
58 | eval $NHC -c $NHC_FILE
59 | nhc_retval=$?
60 | 
61 | 
62 | exit $nhc_retval
63 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurm.healthcheck_long:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # Start longer-running node health checks.
 29 | #
 30 | # This script expects to be spawned from the front-end SLURM server before a
 31 | # user's job is started. This script is run as root.
 32 | #
 33 | # During execution of this script, the nodes have state POWER_UP/CONFIGURING.
 34 | # This gives us time to run longer health tests than are normally allowed in
 35 | # prolog and epilog scripts.
 36 | #
 37 | # Each NHC script is expected to finish within 2 minutes. If an error (such as
 38 | # a broken NFS mount) causes the script to run beyond 2 minutes, it will be
 39 | # terminated (which results in an error condition and drains the compute node).
 40 | #
 41 | ################################################################################
 42 | 
 43 | 
 44 | source '/mcms/scripts/util/lib.lockfile.sh'
 45 | 
 46 | 
 47 | NHC=/usr/sbin/nhc
 48 | 
 49 | #
 50 | # NHC files to run:
 51 | #  * normal (quick-running) tests
 52 | #  * more intensive tests (e.g., accelerator memory transfer performance)
 53 | #  * filesystem checks that could lock up (e.g., checking free space)
 54 | #
 55 | NHC_FILES="/etc/nhc/compute-node-checks.conf            \
 56 |            /etc/nhc/compute-node-checks_intense.conf    \
 57 |            /etc/nhc/compute-node-checks_blocking-io.conf"
 58 | 
 59 | 
 60 | # Other scripts can also spawn health checks, so we need a lock file
 61 | MCMS_LOCKFILE="node-health-check"
 62 | 
 63 | # Attempt to get the NHC lock.
 64 | # Because the other health tests are quick-running, we should wait a bit.
 65 | attempts=3
 66 | while [[ $attempts -gt 0 ]]
 67 | do
 68 |     mcms_get_lock
 69 | 
 70 |     if [[ -n "$MCMS_RECEIVED_LOCK" ]]; then
 71 |         break
 72 |     fi
 73 | 
 74 |     sleep 0.5s
 75 | 
 76 |     attempts=$(( $attempts - 1 ))
 77 | done
 78 | 
 79 | # If unable to get the lock, we'll pass. Assume a longer health test is running.
 80 | if [[ -z "$MCMS_RECEIVED_LOCK" ]]; then
 81 |     exit 0
 82 | fi
 83 | 
 84 | 
 85 | # Execute Node Health Checks
 86 | for nhc_file in $NHC_FILES
 87 | do
 88 |     eval $NHC -c $nhc_file
 89 |     nhc_retval=$?
 90 | 
 91 |     if [[ $nhc_retval -gt 0 ]]; then
 92 |         exit $nhc_retval
 93 |     fi
 94 | done
 95 | 
 96 | 
 97 | # If we've gotten to the end cleanly, everything should have worked
 98 | exit 0
 99 | 
100 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurm.jobstart_messages.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015-2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # Provide helpful messages for the start and end of a batch job.
 29 | #
 30 | ################################################################################
 31 | 
 32 | # Exit if this script isn't actually running within a SLURM context
 33 | if [[ -z "$SLURM_JOB_UID" ]] || [[ -z "$SLURM_JOB_ID" ]]; then
 34 |     echo "Do not run this script manually - it is used by SLURM"
 35 |     exit 1
 36 | fi
 37 | 
 38 | 
 39 | echo "
 40 | ################################################################################
 41 | # JOB DETAILS
 42 | #
 43 | # Job started at $(date +"%F %T")
 44 | # Job ID number: $SLURM_JOBID
 45 | #
 46 | # Starting from host: $(hostname)
 47 | # The following compute nodes will be used: $SLURM_NODELIST
 48 | #"
 49 | 
 50 | NPROCS=$(( $SLURM_NTASKS * $SLURM_CPUS_PER_TASK ))
 51 | NODES=$SLURM_JOB_NUM_NODES
 52 | NUM_SOCKETS=$((`grep 'physical id' /proc/cpuinfo | sort -u | tail -n1 | cut -d" " -f3` + 1))
 53 | NUM_CORES=$(grep siblings /proc/cpuinfo | head -n1 | cut -d" " -f2)
 54 | 
 55 | echo "#
 56 | # Using $NPROCS processes across $NODES nodes.
 57 | # Reserving $SLURM_MEM_PER_NODE MB of memory per node.
 58 | #
 59 | # The node starting this job has:
 60 | #
 61 | # $NUM_SOCKETS CPU sockets with $NUM_CORES cores each -- $(grep -m1 'model name' /proc/cpuinfo)
 62 | # System memory: $(awk '/MemTotal/ {print $2 $3}' /proc/meminfo)
 63 | #"
 64 | 
 65 | # Check for GPUs and print their status
 66 | if [[ -n "$CUDA_VISIBLE_DEVICES" && "$CUDA_VISIBLE_DEVICES" != "NoDevFiles" ]]; then
 67 |     GPUS_PER_NODE=$(echo $CUDA_VISIBLE_DEVICES | sed 's/,/ /g' | wc --words)
 68 | 
 69 |     first_index=$(echo $CUDA_VISIBLE_DEVICES | sed 's/,.*//')
 70 |     GPU_TYPE=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader --id=$first_index | sed 's/ /_/g')
 71 | 
 72 |     echo "#
 73 | # NVIDIA CUDA device IDs in use: $CUDA_VISIBLE_DEVICES
 74 | #
 75 | # Full list of GPU devices
 76 | # $(nvidia-smi)
 77 | #"
 78 | fi
 79 | 
 80 | # Check for Xeon Phi Coprocessors and print their status
 81 | if [[ -n "$OFFLOAD_DEVICES" ]]; then
 82 |     echo "#
 83 | # Xeon Phi device IDs in use: $OFFLOAD_DEVICES
 84 | #
 85 | # $(micinfo)
 86 | #"
 87 | fi
 88 | 
 89 | 
 90 | # Check for storage devices
 91 | STORAGE_DEVICES=$(awk '!/Attached devices/' /proc/scsi/scsi)
 92 | if [[ -n "$STORAGE_DEVICES" ]]; then
 93 |     echo "#
 94 | # Storage devices attached to this node:
 95 | # $STORAGE_DEVICES
 96 | #"
 97 | else
 98 |     echo "#
 99 | # No storage devices are attached to this node.
100 | #"
101 | fi
102 | 
103 | 
104 | echo "#
105 | # Changing to working directory $SLURM_SUBMIT_DIR
106 | #
107 | ################################################################################
108 | 
109 | "
110 | 
111 | 
112 | ################################################################################
113 | #
114 | # The section below will be run when the job has finished
115 | #
116 | ################################################################################
117 | 
118 | # Trap all exits (both with and without errors)
119 | trap exit_handler EXIT
120 | 
121 | # Remap errors and interrupts to exit (to prevent two calls to the handler)
122 | trap exit ERR INT TERM
123 | 
124 | exit_handler() {
125 |     local error_code="$?"
126 |     local exit_time=$(date +'%F %T')
127 | 
128 |     # If there was an error, report it.
129 |     if [ "$error_code" -gt 0 ]; then
130 |         echo "
131 | 
132 | ################################################################################
133 | #
134 | # WARNING! Job exited abnormally at $exit_time with error code: $error_code
135 | #
136 | ################################################################################"
137 | 
138 |     # If the job completed successfully, report success.
139 |     else
140 |         echo "
141 | 
142 | ################################################################################
143 | #
144 | # Job finished successfully at $exit_time
145 | #
146 | ################################################################################"
147 |     fi
148 | 
149 |     exit $error_code
150 | }
151 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurmctld.power_nodes_off:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # This script is run from the front-end SLURM server when powered-up nodes are
 29 | # idle and just wasting power. This script is run as the SlurmUser, not as root.
 30 | #
 31 | ################################################################################
 32 | 
 33 | 
 34 | NODELIST="$1"
 35 | 
 36 | 
 37 | # Exit if this script isn't actually running within a SLURM context
 38 | if [[ -z "$SLURM_CONF" ]]; then
 39 |     echo "Do not run this script manually - it is used by SLURM"
 40 |     exit 1
 41 | fi
 42 | 
 43 | 
 44 | logger "SLURM is suspending node(s): $NODELIST"
 45 | 
 46 | 
 47 | ################################################################################
 48 | # Parse the short-form node list information from SLURM. scontrol can do this,
 49 | # but we should try not to shell out. Example: node[1,4-7,18]
 50 | #
 51 | full_node_list=( )
 52 | 
 53 | nodename_prefix=${NODELIST%%\[*}
 54 | nodename_postfix=${NODELIST##*\]}
 55 | short_list=${NODELIST##*\[}
 56 | short_list=${short_list%%\]*}
 57 | 
 58 | # If the 'node list' is a single node, we're done
 59 | if [[ "$nodename_prefix" == "$nodename_postfix" ]]; then
 60 |     full_node_list[0]=$NODELIST
 61 | else
 62 |     # Break down the comma-separated list
 63 |     OLD_IFS=$IFS
 64 |     IFS=,
 65 |     for item in $short_list; do
 66 |         range_begin=${item%%-*}
 67 |         range_end=${item##*-}
 68 | 
 69 |         # Add in each node in the specified node range (even if it's just one node)
 70 |         for (( i=$range_begin; i<$(($range_end+1)); i++ )); do
 71 |             full_node_list[${#full_node_list[@]}]=${nodename_prefix}${i}${nodename_postfix}
 72 |         done
 73 |     done
 74 |     IFS=$OLD_IFS
 75 | fi
 76 | ################################################################################
 77 | 
 78 | 
 79 | ################################################################################
 80 | # Power off the nodes
 81 | #
 82 | 
 83 | # Specify arguments to pass to SSH
 84 | # Slurm will use a private SSH key to login as root on each compute node.
 85 | SSH_EXECUTABLE=${SSH_EXECUTABLE:-/usr/bin/ssh}
 86 | ssh_arguments="-i /var/spool/slurmd/.ssh/.poweroff-ssh-key -2 -a -x -lroot"
 87 | 
 88 | # Power off all idle nodes
 89 | for (( i=0; i<${#full_node_list[@]}; i++ )); do
 90 |     $SSH_EXECUTABLE $ssh_arguments ${full_node_list[$i]} /sbin/poweroff
 91 |     ssh_retval=$?
 92 | 
 93 |     if [[ $ssh_retval -gt 0 ]]; then
 94 |         exit $ssh_retval
 95 |     fi
 96 | done
 97 | ################################################################################
 98 | 
 99 | 
100 | exit 0
101 | 
102 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurmctld.power_nodes_on:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # This script is run from the front-end SLURM server when powered-down nodes
 29 | # need to be re-activated and powered back up. This script is run as the
 30 | # SlurmUser, not as root.
 31 | #
 32 | # During execution of this script, the nodes have state POWER_UP/CONFIGURING.
 33 | #
 34 | ################################################################################
 35 | 
 36 | 
 37 | NODELIST="$1"
 38 | 
 39 | 
 40 | # Exit if this script isn't actually running within a SLURM context
 41 | if [[ -z "$SLURM_CONF" ]]; then
 42 |     echo "Do not run this script manually - it is used by SLURM"
 43 |     exit 1
 44 | fi
 45 | 
 46 | 
 47 | logger "SLURM is resuming node(s) $NODELIST"
 48 | 
 49 | 
 50 | ################################################################################
 51 | # Power on the nodes
 52 | #
 53 | POWERUP_EXECUTABLE=${POWERUP_EXECUTABLE:-/etc/slurm/scripts/slurmctld.power_nodes_on_as_root}
 54 | SSH_EXECUTABLE=${SSH_EXECUTABLE:-/usr/bin/ssh}
 55 | 
 56 | # Specify arguments to pass to SSH
 57 | # Slurm will use a private SSH key to login to the master as root
 58 | ssh_arguments="-i /var/spool/slurmd/.ssh/.poweroff-ssh-key -2 -a -x -lroot"
 59 | 
 60 | 
 61 | # We pass the node list in via stdin so that our SSH key checking can securely
 62 | # verify the exact script which is running with root privileges.
 63 | echo $NODELIST | $SSH_EXECUTABLE $ssh_arguments localhost $POWERUP_EXECUTABLE
 64 | powerup_retval=$?
 65 | 
 66 | # The 'wwsh' utility (which starts the nodes) returns 1 even upon success
 67 | if [[ $powerup_retval -gt 1 ]]; then
 68 |     exit $powerup_retval
 69 | fi
 70 | ################################################################################
 71 | 
 72 | 
 73 | ################################################################################
 74 | # Parse the short-form node list information from SLURM. scontrol can do this,
 75 | # but we should try not to shell out. Example: node[1,4-7,18]
 76 | #
 77 | full_node_list=( )
 78 | 
 79 | nodename_prefix=${NODELIST%%\[*}
 80 | nodename_postfix=${NODELIST##*\]}
 81 | short_list=${NODELIST##*\[}
 82 | short_list=${short_list%%\]*}
 83 | 
 84 | # If the 'node list' is a single node, we're done
 85 | if [[ "$nodename_prefix" == "$nodename_postfix" ]]; then
 86 |     full_node_list[0]=$NODELIST
 87 | else
 88 |     # Break down the comma-separated list
 89 |     OLD_IFS=$IFS
 90 |     IFS=,
 91 |     for item in $short_list; do
 92 |         range_begin=${item%%-*}
 93 |         range_end=${item##*-}
 94 | 
 95 |         # Add in each node in the specified node range (even if it's just one node)
 96 |         for (( i=$range_begin; i<$(($range_end+1)); i++ )); do
 97 |             full_node_list[${#full_node_list[@]}]=${nodename_prefix}${i}${nodename_postfix}
 98 |         done
 99 |     done
100 |     IFS=$OLD_IFS
101 | fi
102 | ################################################################################
103 | 
104 | 
105 | ################################################################################
106 | # Wait for the nodes to complete the boot process.
107 | # To start, we'll try one node at random. As soon as more than one node is
108 | # responding, we'll exit and SLURM can verify they are actually up.
109 | #
110 | # SSH will wait up to 5 seconds per attempt; we'll wait up to another 5 seconds
111 | retry_interval="5s"
112 | 
113 | # Specify arguments to pass to SSH
114 | # Slurm will use a private SSH key to login as root on each compute node.
115 | SSH_EXECUTABLE=${SSH_EXECUTABLE:-/usr/bin/ssh}
116 | ssh_arguments="-i /var/spool/slurmd/.poweroff-ssh-key -2 -a -x -lroot -oConnectTimeout=${retry_interval}"
117 | 
118 | # Each retry will last between 5 and 10 seconds (we'll wait 5 to 10 minutes)
119 | retry_attempts=60
120 | ssh_retval=999
121 | nodes_responding=0
122 | while [[ $ssh_retval -gt 0 ]] &&
123 |       [[ $retry_attempts -gt 0 ]] &&
124 |       [[ $nodes_responding -lt 2 ]];
125 | do
126 |     sleep $retry_interval
127 | 
128 |     random_node_index=$(( $RANDOM % ${#full_node_list[@]} ))
129 |     random_node=${full_node_list[$random_node_index]}
130 | 
131 |     $SSH_EXECUTABLE $ssh_arguments $random_node echo
132 |     ssh_retval=$?
133 | 
134 |     # Once nodes start responding, count them
135 |     if [[ $ssh_retval -eq 0 ]]; then
136 |         nodes_responding=$(( $nodes_responding + 1 ))
137 |     fi
138 | 
139 |     retry_attempts=$(( $retry_attempts - 1 ))
140 | done
141 | 
142 | # If we waited the whole time and no nodes are responding, error out
143 | if [[ $ssh_retval -gt 0 ]] && [[ $nodes_responding -lt 2 ]]; then
144 |     logger "SLURM was not able to successfully power up all requested nodes"
145 |     exit $ssh_retval
146 | fi
147 | ################################################################################
148 | 
149 | 
150 | exit 0
151 | 
152 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurmctld.power_nodes_on_as_root:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 4 | ################################################################################
 5 | #
 6 | # Copyright (c) 2015 by Microway, Inc.
 7 | #
 8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 9 | #
10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
11 | #    it under the terms of the GNU General Public License as published by
12 | #    the Free Software Foundation, either version 3 of the License, or
13 | #    (at your option) any later version.
14 | #
15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 | #    GNU General Public License for more details.
19 | #
20 | #    You should have received a copy of the GNU General Public License
21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
22 | #
23 | ################################################################################
24 | 
25 | 
26 | ################################################################################
27 | #
28 | # This script is called via SSH from the front-end SLURM server when powered-
29 | # down nodes need to be re-activated and powered back up. This script will be
30 | # executed remotely via an SSH connection (using SLURM's private SSH key).
31 | #
32 | # We expect the calling script to pass in the nodelist via stdin.
33 | #
34 | ################################################################################
35 | 
36 | 
37 | # Exit if this script isn't actually running within a SLURM context. Because
38 | # we expect to be called via SSH (which strips much of the SLURM context), check
39 | # to be sure that this script was called directly and not from a user session.
40 | if [[ ! "$SSH_ORIGINAL_COMMAND" =~ "slurmctld.power_nodes_on_as_root" ]]; then
41 |     echo "Do not run this script manually - it is used by SLURM"
42 |     exit 1
43 | fi
44 | 
45 | 
46 | read NODELIST
47 | 
48 | 
49 | ################################################################################
50 | # Power on the nodes
51 | #
52 | WWSH_EXECUTABLE=${WWSH_EXECUTABLE:-wwsh}
53 | 
54 | $WWSH_EXECUTABLE ipmi poweron $NODELIST
55 | wwsh_retval=$?
56 | 
57 | # The 'wwsh' utility returns 1 even upon success
58 | if [[ $wwsh_retval -gt 1 ]]; then
59 |     exit $wwsh_retval
60 | fi
61 | ################################################################################
62 | 
63 | 
64 | exit 0
65 | 
66 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurmctld.prolog:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # This prolog script is run from the front-end SLURM server before a user's job
 29 | # is allocated compute nodes. This script is run as the SlurmUser, not as root.
 30 | #
 31 | # During execution of this script, the nodes have state POWER_UP/CONFIGURING.
 32 | # This gives us time to run longer health tests than are normally allowed in
 33 | # prolog and epilog scripts.
 34 | #
 35 | # Each NHC script is expected to finish within 2 minutes. If an error (such as
 36 | # a broken NFS mount) causes the script to run beyond 2 minutes, it will be
 37 | # terminated (which results in an error condition and drains the compute node).
 38 | #
 39 | ################################################################################
 40 | 
 41 | 
 42 | # Exit if this script isn't actually running within a SLURM context
 43 | if [[ -z "$SLURM_JOB_UID" ]] || [[ -z "$SLURM_JOB_ID" ]]; then
 44 |     echo "Do not run this script manually - it is used by SLURM"
 45 |     exit 1
 46 | fi
 47 | 
 48 | 
 49 | ################################################################################
 50 | # Parse the short-form node list information from SLURM. scontrol can do this,
 51 | # but we should try not to shell out. Example: node[1,4-7,18]
 52 | #
 53 | full_node_list=( )
 54 | 
 55 | nodename_prefix=${SLURM_JOB_NODELIST%%\[*}
 56 | nodename_postfix=${SLURM_JOB_NODELIST##*\]}
 57 | short_list=${SLURM_JOB_NODELIST##*\[}
 58 | short_list=${short_list%%\]*}
 59 | 
 60 | # If the 'node list' is a single node, we're done
 61 | if [[ "$nodename_prefix" == "$nodename_postfix" ]]; then
 62 |     full_node_list[0]=$SLURM_JOB_NODELIST
 63 | else
 64 |     # Break down the comma-separated list
 65 |     OLD_IFS=$IFS
 66 |     IFS=,
 67 |     for item in $short_list; do
 68 |         range_begin=${item%%-*}
 69 |         range_end=${item##*-}
 70 | 
 71 |         # Add in each node in the specified node range (even if it's just one node)
 72 |         for (( i=$range_begin; i<$(($range_end+1)); i++ )); do
 73 |             full_node_list[${#full_node_list[@]}]=${nodename_prefix}${i}${nodename_postfix}
 74 |         done
 75 |     done
 76 |     IFS=$OLD_IFS
 77 | fi
 78 | ################################################################################
 79 | 
 80 | 
 81 | # We may have a pause here if SLURM is getting nodes ready (either by powering
 82 | # up nodes that are powered off and/or running long health checks).
 83 | 
 84 | 
 85 | ################################################################################
 86 | # Wait for the nodes to complete the boot process.
 87 | # To start, we'll try one node at random. As soon as more than one node is
 88 | # responding, we'll exit and SLURM can verify they are actually up.
 89 | #
 90 | # SSH will wait up to 5 seconds per attempt; we'll wait up to another 5 seconds
 91 | retry_interval="5s"
 92 | 
 93 | # Specify arguments to pass to SSH
 94 | # Slurm will use a private SSH key to login as root on each compute node.
 95 | SSH_EXECUTABLE=${SSH_EXECUTABLE:-/usr/bin/ssh}
 96 | ssh_arguments="-i /var/spool/slurmd/.ssh/.poweroff-ssh-key -2 -a -x -lroot -oConnectTimeout=${retry_interval}"
 97 | 
 98 | # Each retry will last between 5 and 10 seconds (we'll wait 5 to 10 minutes)
 99 | retry_attempts=60
100 | ssh_retval=999
101 | nodes_responding=0
102 | while [[ $ssh_retval -gt 0 ]] &&
103 |       [[ $retry_attempts -gt 0 ]] &&
104 |       [[ $nodes_responding -lt 2 ]];
105 | do
106 |     sleep $retry_interval
107 | 
108 |     random_node_index=$(( $RANDOM % ${#full_node_list[@]} ))
109 |     random_node=${full_node_list[$random_node_index]}
110 | 
111 |     $SSH_EXECUTABLE $ssh_arguments $random_node echo
112 |     ssh_retval=$?
113 | 
114 |     # Once nodes start responding, count them
115 |     if [[ $ssh_retval -eq 0 ]]; then
116 |         nodes_responding=$(( $nodes_responding + 1 ))
117 |     fi
118 | 
119 |     retry_attempts=$(( $retry_attempts - 1 ))
120 | done
121 | 
122 | # If we waited the whole time and no nodes are responding, error out
123 | if [[ $ssh_retval -gt 0 ]] && [[ $nodes_responding -lt 2 ]]; then
124 |     exit $ssh_retval
125 | fi
126 | ################################################################################
127 | 
128 | 
129 | 
130 | ################################################################################
131 | # Prevent long tests from running over and over on compute nodes. While many
132 | # cluster jobs are long-running, SLURM also supports large numbers of short-
133 | # running jobs. We don't want multi-minute tests between each job.
134 | 
135 | # Assume that one intensive health test per day will be sufficient
136 | LONG_HEALTH_CHECK_INTERVAL=${LONG_HEALTH_CHECK_INTERVAL:-$((60*60*24))}
137 | 
138 | # Number of seconds since epoch
139 | current_time=$(printf "%(%s)T" "-1")
140 | 
141 | # List of nodes which need to be checked
142 | check_node_list=( )
143 | 
144 | # Store the health check cache in memory - requires 4KB per node
145 | cache_dir=/dev/shm/.slurmctld_health_check_cache
146 | 
147 | for compute_node in ${full_node_list[@]}; do
148 |     # Split node cache files into several directories
149 |     node_number=${compute_node##*[a-zA-Z-_]}
150 |     node_dir="${cache_dir}/${node_number:0:1}"
151 |     node_cache_file="${node_dir}/${compute_node}"
152 | 
153 |     # See if the node has ever been checked
154 |     last_tested=0
155 |     if [[ -f "$node_cache_file" ]]; then
156 |         last_tested=$(< $node_cache_file)
157 |     fi
158 | 
159 |     if (( $current_time > ($last_tested + $LONG_HEALTH_CHECK_INTERVAL) )); then
160 |         # Node was not checked recently. Check it now.
161 |         check_node_list[${#check_node_list[@]}]=$compute_node
162 |     fi
163 | done
164 | ################################################################################
165 | 
166 | 
167 | 
168 | ################################################################################
169 | # Start the long healthcheck script on the compute nodes in parallel.
170 | #
171 | LONG_HEALTH_CHECK_SCRIPT=${LONG_HEALTH_CHECK_SCRIPT:-/etc/slurm/scripts/slurm.healthcheck_long}
172 | 
173 | # Specify arguments to pass to SSH - slurm will use a private SSH key to login
174 | # as root on each compute node. Note that the username parameter must be set
175 | # twice (once with '%u' and once with 'root') to prevent PDSH from overwriting
176 | # this setting with the SLURM username.
177 | export PDSH_SSH_ARGS="-i /var/spool/slurmd/.ssh/.healthcheck-ssh-key -2 -a -x -l%u -lroot %h"
178 | export PDSH_EXECUTABLE=${PDSH_EXECUTABLE:-/usr/bin/pdsh}
179 | 
180 | # Execute Node Health Checks on all nodes assigned to this job
181 | $PDSH_EXECUTABLE -Sw $SLURM_JOB_NODELIST $LONG_HEALTH_CHECK_SCRIPT
182 | pdsh_retval=$?
183 | 
184 | if [[ $pdsh_retval -gt 0 ]]; then
185 |     exit $pdsh_retval
186 | fi
187 | ################################################################################
188 | 
189 | 
190 | 
191 | ################################################################################
192 | # If we've gotten to the end cleanly, everything should have worked.
193 | #
194 | # Mark the compute nodes as checked.
195 | #
196 | for compute_node in ${check_node_list[@]}; do
197 |     # Split node cache files into several directories
198 |     node_number=${compute_node##*[a-zA-Z-_]}
199 |     node_dir="${cache_dir}/${node_number:0:1}"
200 |     node_cache_file="${node_dir}/${compute_node}"
201 | 
202 |     if [[ ! -d "$node_dir" ]]; then
203 |         mkdir -p "$node_dir"
204 |     fi
205 | 
206 |     echo $current_time > "$node_cache_file"
207 | done
208 | ################################################################################
209 | 
210 | 
211 | exit 0
212 | 
213 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/scripts/slurmd.gres_init:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015-2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | ################################################################################
 27 | #
 28 | # Additional SLURM configuration
 29 | #
 30 | # This script is run by SLURM during startup to iterate through GPUs and Phis
 31 | #
 32 | ################################################################################
 33 | 
 34 | 
 35 | ############## Discover "Generic Resources" and Populate gres.conf #############
 36 | gresfile=/etc/slurm/gres.conf
 37 | 
 38 | 
 39 | ########### Add GPUs (if present) ##########
 40 | if [[ -n "$(lspci | grep NVIDIA)" ]]; then
 41 |     # If GPU settings are already present, assume they've been set elsewhere,
 42 |     # or that SLURM is restarting and set them up the last time it started.
 43 |     if [[ -z "$(grep '/dev/nvidia' $gresfile 2> /dev/null)" ]]; then
 44 | 
 45 |         # Test for the presence of NVIDIA GPU devices
 46 |         if [[ -c /dev/nvidia0 ]]; then
 47 | 
 48 |             # SLURM desires that we also inform it which CPUs are
 49 |             # allowed to use each GPU. By default, we allow all CPUs:
 50 |             cpu_list=( $(awk '/processor/ {print $3}' /proc/cpuinfo) )
 51 |             first_cpu=${cpu_list[0]}
 52 |             final_cpu=${cpu_list[${#cpu_list[@]}-1]}
 53 | 
 54 |             # Determine the ordering of the GPUs as seen by NVIDIA CUDA
 55 |             check_for_smi=$(which nvidia-smi)
 56 |             if [[ -z "$check_for_smi" ]]; then
 57 |                 echo "SLURM startup - the nvidia-smi tool is unavailable, so unable to set GPU types"
 58 | 
 59 |                 # If we were not able to grab the GPU ordering, we pass a group
 60 |                 # of generic GPU devices to SLURM. SLURM won't know their types.
 61 |                 #
 62 |                 # Loop through each NVIDIA device in PCI order
 63 |                 for gpu_device in $(find /dev/ -type c -name "nvidia[0-9]*" | sort); do
 64 |                     echo "Name=gpu File=${gpu_device} CPUs=${first_cpu}-${final_cpu}" >> $gresfile
 65 |                 done
 66 |             else
 67 |                 # Determine the ordering and types of GPUs
 68 |                 for gpu_device in $(find /dev/ -type c -name "nvidia[0-9]*" | sort); do
 69 |                     gpu_id=${gpu_device#*nvidia}
 70 |                     gpu_name=$(nvidia-smi --format=csv,noheader --query-gpu=gpu_name --id=${gpu_id} | sed 's/ /-/g')
 71 | 
 72 |                     # If we were able to grab the name, we provide the additional
 73 |                     # information to SLURM. If not, SLURM will just see a group of
 74 |                     # generic GPU devices (without knowing their type)
 75 |                     if [[ -n "${gpu_name}" ]]; then
 76 |                         echo "Name=gpu Type=${gpu_name} File=${gpu_device} CPUs=${first_cpu}-${final_cpu}" >> $gresfile
 77 |                     else
 78 |                         echo "SLURM startup - unable to read the name of GPU ${gpu_device}"
 79 |                         echo "Name=gpu File=${gpu_device} CPUs=${first_cpu}-${final_cpu}" >> $gresfile
 80 |                     fi
 81 |                 done
 82 |             fi
 83 |         else
 84 |             echo "SLURM startup - unable to add GPUs to SLURM $gresfile file - lspci reports GPUs, but they are not in /dev !"
 85 |         fi
 86 |     fi
 87 | else
 88 |     echo "SLURM startup - no NVIDIA GPUs detected..."
 89 | fi
 90 | 
 91 | 
 92 | ########### Add Intel Xeon Phi MIC coprocessors (if present) ##########
 93 | if [[ -n "$(lspci | grep 'Xeon Phi coprocessor')" ]]; then
 94 |     # If MIC settings are already present, assume they've been set elsewhere,
 95 |     # or that SLURM is restarting and set them up the last time it started.
 96 |     if [[ -z "$(grep '/dev/mic' $gresfile 2> /dev/null)" ]]; then
 97 |         if [[ -c /dev/mic0 ]]; then
 98 |             for mic in $(find /dev/ -type c -name "mic[0-9]*" | sort); do
 99 |                 echo "Name=mic File=$mic" >> $gresfile
100 |             done
101 |         else
102 |             echo "SLURM startup - unable to add Xeon Phi coprocessors to SLURM $gresfile file - lspci reports PHIs, but they are not in /dev !"
103 |         fi
104 |     fi
105 | else
106 |     echo "SLURM startup - no Intel Xeon PHIs detected..."
107 | fi
108 | 
109 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/slurm.conf:
--------------------------------------------------------------------------------
  1 | #################################################################################
  2 | ######################### Microway Cluster Management Software (MCMS) for OpenHPC
  3 | #################################################################################
  4 | #
  5 | # Configuration for SLURM Resource Manager
  6 | #
  7 | #
  8 | # This file must be present on all nodes of your cluster.
  9 | # See the slurm.conf man page for more information.
 10 | #
 11 | #################################################################################
 12 | 
 13 | ClusterName={clusterName}
 14 | 
 15 | ControlMachine={headName}
 16 | #ControlAddr=
 17 | 
 18 | # Specify a backup SLURM control server
 19 | # (note that both control servers must share a SLURM state filesystem)
 20 | #BackupController=
 21 | #BackupAddr=
 22 | 
 23 | SlurmdPort=6818
 24 | SlurmctldPort=6817
 25 | SlurmdPidFile=/var/run/slurmd.pid
 26 | SlurmctldPidFile=/var/run/slurmctld.pid
 27 | SlurmdSpoolDir=/var/spool/slurmd
 28 | StateSaveLocation=/var/lib/slurmd
 29 | 
 30 | SlurmUser=slurm
 31 | #SlurmdUser=root
 32 | 
 33 | 
 34 | 
 35 | #################################################################################
 36 | # PROLOG, EPILOG AND HEALTH SCRIPTS
 37 | 
 38 | # Prepare for user jobs (must run very quickly)
 39 | #Prolog=
 40 | 
 41 | # Clean up after user jobs
 42 | Epilog=/etc/slurm/scripts/slurm.epilog
 43 | 
 44 | #SrunProlog=
 45 | #SrunEpilog=
 46 | 
 47 | #TaskProlog=
 48 | #TaskEpilog=
 49 | 
 50 | # Prepare nodes for use (periodically run a longer health check)
 51 | PrologSlurmctld=/etc/slurm/scripts/slurmctld.prolog
 52 | 
 53 | #EpilogSlurmctld=
 54 | 
 55 | # Check health of all nodes in the cluster. This program must run very
 56 | # quickly, because it is automatically terminated after 60 seconds.
 57 | HealthCheckProgram=/etc/slurm/scripts/slurm.healthcheck
 58 | 
 59 | # Run health check every 15 minutes (900 seconds)
 60 | HealthCheckInterval=900
 61 | 
 62 | 
 63 | 
 64 | #################################################################################
 65 | AuthType=auth/munge
 66 | CryptoType=crypto/munge
 67 | #JobCredentialPrivateKey=
 68 | #JobCredentialPublicCertificate=
 69 | 
 70 | CacheGroups=0
 71 | #GroupUpdateForce=0
 72 | #GroupUpdateTime=600
 73 | 
 74 | #DisableRootJobs=NO
 75 | 
 76 | # Rather than confuse users by accepting impossibly-sized jobs, such requests
 77 | # will be rejected when the user submits the job.
 78 | EnforcePartLimits=YES
 79 | 
 80 | #FirstJobId=1
 81 | 
 82 | # This is the most active jobs SLURM will support at a time
 83 | MaxJobCount=25000
 84 | #MaxJobId=999999
 85 | 
 86 | GresTypes=gpu,mic
 87 | 
 88 | #CheckpointType=checkpoint/none
 89 | #JobCheckpointDir=/var/slurm/checkpoint
 90 | 
 91 | #JobFileAppend=0
 92 | #JobRequeue=1
 93 | #JobSubmitPlugins=1
 94 | #KillOnBadExit=0
 95 | #LaunchType=launch/slurm
 96 | 
 97 | # If licenses need to be tracked on this cluster, list them here:
 98 | #Licenses=foo*4,bar
 99 | 
100 | MailProg=/usr/bin/mailq
101 | 
102 | #MaxStepCount=40000
103 | #MaxTasksPerNode=128
104 | 
105 | MpiDefault=none
106 | #MpiParams=ports=12000-12999
107 | 
108 | #PluginDir=/usr/local/lib/slurm:/etc/slurm/plugins
109 | PlugStackConfig=/etc/slurm/plugstack.conf
110 | 
111 | # Prevent users from seeing:
112 | #  * reservations they cannot use
113 | #  * other user's usage information
114 | PrivateData=reservations,usage
115 | 
116 | ProctrackType=proctrack/cgroup
117 | #PropagatePrioProcess=0
118 | #PropagateResourceLimits=
119 | PropagateResourceLimitsExcept=MEMLOCK
120 | 
121 | RebootProgram="/sbin/shutdown --reboot +1 SLURM is rebooting this node"
122 | 
123 | # Set this value to 2 if you want downed nodes to be returned to service,
124 | # regardless of why they were set DOWN (e.g., unexpected reboots)
125 | ReturnToService=1
126 | 
127 | #SallocDefaultCommand=
128 | 
129 | SwitchType=switch/none
130 | 
131 | TaskPlugin=task/cgroup
132 | #TaskPluginParam=
133 | 
134 | #TopologyPlugin=topology/tree
135 | 
136 | # Specify which directory on compute nodes is considered temporary storage
137 | #TmpFS=/tmp
138 | 
139 | #TrackWCKey=no
140 | #TreeWidth=
141 | #UnkillableStepProgram=
142 | #UsePAM=0
143 | 
144 | 
145 | 
146 | #################################################################################
147 | # TIMERS
148 | 
149 | #BatchStartTimeout=10
150 | #CompleteWait=0
151 | #EpilogMsgTime=2000
152 | #GetEnvTimeout=2
153 | InactiveLimit=0
154 | KillWait=30
155 | MessageTimeout=30
156 | MinJobAge=300
157 | #ResvOverRun=0
158 | SlurmctldTimeout=120
159 | SlurmdTimeout=300
160 | #UnkillableStepTimeout=60
161 | #VSizeFactor=0
162 | Waittime=0
163 | 
164 | # Allow jobs to run 5 minutes longer than the time that was allocated to them
165 | OverTimeLimit=5
166 | 
167 | 
168 | 
169 | #################################################################################
170 | # SCHEDULING
171 | 
172 | # It's important to set a default in case users don't list their memory needs.
173 | # On modern HPC clusters it's unusual to have less than 1GB per core
174 | DefMemPerCPU=1024
175 | 
176 | FastSchedule=0
177 | #MaxMemPerCPU=0
178 | #SchedulerRootFilter=1
179 | #SchedulerTimeSlice=30
180 | SchedulerType=sched/backfill
181 | SchedulerPort=7321
182 | 
183 | # Use 'select/linear' if you want to allocate whole nodes to each job
184 | SelectType=select/cons_res
185 | 
186 | SelectTypeParameters=CR_Core_Memory
187 | 
188 | # By default, we set the following scheduling options:
189 | #
190 | #  * bf_interval sets how often SLURM works on backfilling jobs
191 | #  * bf_continue is enabled to improve the ability of SLURM to backfill jobs
192 | #  * bf_resolution is increased from 1 to 10 minutes, which will increase system
193 | #    utilization (but may cause some jobs to start a few minutes late)
194 | #  * bf_window is increased from 1 to 2 days, which causes SLURM to look further
195 | #    into the future to determine when and where jobs can start
196 | #  * bf_max_job_test sets the maximum number of jobs to try backfilling
197 | #  * bf_max_job_part limits the number of jobs to backfill from one partition
198 | #  * bf_max_job_user limits the number of backfilled jobs for any given user
199 | #  * bf_max_job_start limits the number of backfill jobs to start at a time
200 | #
201 | SchedulerParameters=bf_interval=60,bf_continue,bf_resolution=600,bf_window=2880,bf_max_job_test=5000,bf_max_job_part=1000,bf_max_job_user=10,bf_max_job_start=100
202 | 
203 | # Allow higher-priority jobs to take resources from lower-priority jobs
204 | PreemptType=preempt/partition_prio
205 | 
206 | # By default, a job which is preempted will simply be paused
207 | PreemptMode=SUSPEND,GANG
208 | 
209 | 
210 | 
211 | #################################################################################
212 | # JOB PRIORITY
213 | 
214 | PriorityFlags=FAIR_TREE,SMALL_RELATIVE_TO_TIME
215 | PriorityType=priority/multifactor
216 | PriorityDecayHalfLife=14-0
217 | #PriorityCalcPeriod=
218 | PriorityFavorSmall=NO
219 | 
220 | # All jobs more than a week old will be given the same age priority
221 | PriorityMaxAge=7-0
222 | #PriorityUsageResetPeriod=
223 | 
224 | # These values set the relative importance of each priority factor
225 | PriorityWeightAge=1000
226 | PriorityWeightFairshare=20000000
227 | PriorityWeightJobSize=1000
228 | PriorityWeightPartition=100000000
229 | PriorityWeightQOS=1000000000
230 | 
231 | 
232 | 
233 | #################################################################################
234 | # LOGGING AND ACCOUNTING
235 | 
236 | AccountingStorageHost={headName}
237 | AccountingStorageEnforce=limits
238 | AccountingStorageType=accounting_storage/slurmdbd
239 | AccountingStoragePass=/var/run/munge/munge.socket.2
240 | 
241 | # If tracking GRES usage is desired, the names of the devices must be included:
242 | AccountingStorageTRES=gres/Tesla-K20m,gres/Tesla-K40m,gres/Tesla-K80,gres/Tesla-M40,gres/Tesla-P100-PCIE-16GB
243 | 
244 | AccountingStoreJobComment=YES
245 | 
246 | #DebugFlags=
247 | 
248 | # We do not worry about this plugin because SLURMDBD offers the same capability
249 | JobCompType=jobcomp/none
250 | #JobCompHost=
251 | #JobCompLoc=
252 | #JobCompPass=
253 | #JobCompPort=
254 | #JobCompUser=
255 | 
256 | JobAcctGatherType=jobacct_gather/linux
257 | JobAcctGatherFrequency=30
258 | 
259 | SlurmctldLogFile=/var/log/slurm/slurmctld.log
260 | SlurmctldDebug=3
261 | 
262 | SlurmdLogFile=/var/log/slurm/slurmd.log
263 | SlurmdDebug=3
264 | 
265 | # By default, the scheduler does not write logs (can be enabled with scontrol)
266 | SlurmSchedLogFile=/var/log/slurm/slurmsched.log
267 | SlurmSchedLogLevel=0
268 | 
269 | 
270 | 
271 | #################################################################################
272 | # POWER SAVE SUPPORT FOR IDLE NODES
273 | SuspendProgram=/etc/slurm/scripts/slurmctld.power_nodes_off
274 | ResumeProgram=/etc/slurm/scripts/slurmctld.power_nodes_on
275 | 
276 | # How long a node must be idle before it will be powered off (in seconds)
277 | SuspendTime=14400   # Four hours
278 | 
279 | SuspendTimeout=30   # Number of seconds we expect the node shutdown to take
280 | ResumeTimeout=300   # Number of seconds we expect the node boot process to take
281 | ResumeRate=100      # Number of nodes we're willing to turn on at a time
282 | SuspendRate=100     # Number of nodes we're willing to power off at a time
283 | 
284 | # Nodes and Partitions which should not be powered off
285 | #SuspendExcNodes=
286 | #SuspendExcParts=
287 | 
288 | 
289 | 
290 | #################################################################################
291 | # COMPUTE NODES
292 | 
293 | NodeName=DEFAULT Sockets=2 CoresPerSocket=8 ThreadsPerCore=2 State=UNKNOWN
294 | 
295 | # CPU-only compute nodes
296 | NodeName=node[1-3]
297 | 
298 | # Compute Nodes with Accelerators/Coprocessors
299 | #
300 | # There's no need to set specific information about GPUs or Phis here - it will
301 | # be automatically detected by each node during startup
302 | #
303 | #NodeName=node[4-32] Gres=gpu
304 | #NodeName=node[33-64] Gres=mic
305 | 
306 | # Identify any nodes which are temporarily down
307 | #
308 | #DownNodes=node56 State=DOWN Reason=fan
309 | 
310 | 
311 | 
312 | #################################################################################
313 | # PARTITION / QUEUE DEFINITIONS
314 | 
315 | # Set the number of nodes here - it will apply to the partitions below
316 | PartitionName=DEFAULT Nodes=node[1-3]
317 | 
318 | # SLURM will use this value when users do not specify their job's length
319 | PartitionName=DEFAULT DefaultTime=4:00:00
320 | 
321 | # By default we will not want SLURM to suspend jobs
322 | PartitionName=DEFAULT PreemptMode=off
323 | 
324 | # If there are multiple login nodes, each must be listed here
325 | #PartitionName=DEFAULT AllocNodes=login[1-2]
326 | 
327 | 
328 | # Send most jobs here
329 | #
330 | # Take note that because SelectType is set to 'select/cons_res', this partition
331 | # will schedule multiple jobs on each compute node. However, it will not force
332 | # jobs to share CPU cores - they'll each receive their own dedicated CPU cores.
333 | #
334 | PartitionName=normal Nodes=ALL Priority=10 Default=YES MaxTime=7-0:00:00 Shared=FORCE:1
335 | 
336 | 
337 | # Users debugging their runs should submit here
338 | PartitionName=debug Nodes=ALL Priority=12 DefaultTime=30:00 MaxTime=30:00 MaxNodes=4
339 | 
340 | 
341 | # Interactive/Realtime sessions are higher priority than queued batch jobs.
342 | PartitionName=interactive Priority=14 Nodes=node[1-3] DefaultTime=4:00:00 MaxTime=8:00:00 MaxNodes=4 MaxCPUsPerNode=2 MaxMemPerNode=4096
343 | 
344 | 
345 | # Administrators/Operators can test and debug the cluster here - regular users
346 | # will not be able to submit jobs to this partition
347 | PartitionName=admin Nodes=ALL Priority=14 DefaultTime=30:00 MaxTime=30:00 AllowGroups=hpc-admin
348 | 
349 | 
350 | # Pre-emptable Partitions
351 | #
352 | # SLURM allows users to submit jobs that are later paused, interrupted or
353 | # rescheduled. This helps give users immediate access to resources they might
354 | # not otherwise have access to (with the understanding that those resources may
355 | # only be available to them for a short period of time). Although other options
356 | # are available with SLURM, the following preemption QOS options are used below:
357 | #
358 | #  * Requeue: Requeue the job (it will be killed and then started again later)
359 | #  * Suspend: Suspend the lower priority job and automatically resume it when
360 | #    the higher priority job terminates (re-uses some gang scheduling logic).
361 | #    For this to work, memory use must be managed/monitored by SLURM.
362 | #
363 | # These partitions have very lax restrictions, but do not guarantee that the job
364 | # will have uninterrupted access to the resources. The GraceTime parameter
365 | # allows preempted jobs to clean themselves up before being cancelled (note that
366 | # the application must cleanly handle SIGCONT and SIGTERM for this to work).
367 | #
368 | PartitionName=DEFAULT Priority=2 MaxTime=14-0:00:00 Shared=FORCE:1 GraceTime=30
369 | 
370 | # Jobs which are designed cleanly (which means handling SIGCONT and SIGTERM)
371 | # should be submitted to 'reschedulable'. When preempted, they will be
372 | # cancelled (freeing all resources) and rescheduled for later execution. We want
373 | # to incentivize users to build this type of job, as it is the cleanest method.
374 | PartitionName=reschedulable Nodes=ALL PreemptMode=REQUEUE
375 | 
376 | # Jobs which cannot properly be cancelled and rescheduled should be submitted
377 | # to 'pausable'. When preempted, they will be paused/suspended (but will remain
378 | # in memory). Once the high-priority jobs are finished, these jobs will be
379 | # resumed. This is fairly clean, but may cause contention for memory.
380 | PartitionName=pausable Nodes=ALL PreemptMode=SUSPEND
381 | 
382 | 


--------------------------------------------------------------------------------
/dependencies/etc/slurm/slurmdbd.conf:
--------------------------------------------------------------------------------
 1 | #################################################################################
 2 | ######################### Microway Cluster Management Software (MCMS) for OpenHPC
 3 | #################################################################################
 4 | #
 5 | # Configuration for SLURM Resource Manager's database daemon 'slurmdbd'
 6 | #
 7 | #
 8 | # This file need only be present on the SLURM management server.
 9 | # See the slurmdbd.conf man page for more information.
10 | #
11 | #################################################################################
12 | 
13 | # Archive info
14 | ArchiveEvents=yes
15 | ArchiveJobs=yes
16 | ArchiveResvs=yes
17 | ArchiveSteps=no
18 | ArchiveSuspend=no
19 | ArchiveDir="/tmp"
20 | #ArchiveScript=
21 | 
22 | 
23 | # Purge individual records after this many days
24 | # Aggregate data is always kept permanently
25 | PurgeEventAfter=1month
26 | PurgeJobAfter=12month
27 | PurgeResvAfter=1month
28 | PurgeStepAfter=1month
29 | PurgeSuspendAfter=1month
30 | 
31 | 
32 | # Authentication info
33 | AuthType=auth/munge
34 | #AuthInfo=/var/run/munge/munge.socket.2
35 | 
36 | # slurmDBD info
37 | DbdAddr=localhost
38 | DbdHost=localhost
39 | #DbdPort=7031
40 | SlurmUser=slurm
41 | #MessageTimeout=300
42 | DebugLevel=4
43 | #DefaultQOS=normal,standby
44 | LogFile=/var/log/slurm/slurmdbd.log
45 | PidFile=/var/run/slurmdbd.pid
46 | #PluginDir=/usr/lib/slurm
47 | 
48 | # Prevent users from seeing:
49 | #  * reservations they cannot use
50 | #  * other user's usage information
51 | PrivateData=reservations,usage
52 | 
53 | #TrackWCKey=yes
54 | 
55 | # Database info
56 | StorageType=accounting_storage/mysql
57 | StorageHost=localhost
58 | #StoragePort=1234
59 | StoragePass={ChangeMe}
60 | StorageUser=slurm
61 | StorageLoc=slurm_acct_db
62 | 
63 | 


--------------------------------------------------------------------------------
/dependencies/etc/sysconfig/nhc:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 3 | ################################################################################
 4 | #
 5 | # Copyright (c) 2015-2016 by Microway, Inc.
 6 | #
 7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 8 | #
 9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
21 | #
22 | ################################################################################
23 | 
24 | 
25 | ################################################################################
26 | #
27 | # Node Health Check (NHC) configuration file
28 | #
29 | #######################################################################
30 | 
31 | 
32 | #######################################################################
33 | ###
34 | ### NHC Configuration Variables
35 | ###
36 | 
37 | # If you are having trouble with NHC, uncomment the following lines
38 | # to get a full verbose log of the situation:
39 | #
40 | # VERBOSE=1
41 | # DEBUG=1
42 | 
43 | # Uncomment to let nodes continue running jobs (even when problems are found)
44 | # MARK_OFFLINE=0
45 | 
46 | # Uncomment to run ALL checks (instead of exiting upon the first failure)
47 | # NHC_CHECK_ALL=1
48 | 
49 | # If necessary, additional directories may be added to PATH
50 | # PATH="/opt/example/bin:$PATH"
51 | 
52 | # Set the resource manager/workload manager to SLURM
53 | PATH="/usr/slurm/16.05/bin:$PATH"
54 | NHC_RM=slurm
55 | 
56 | 


--------------------------------------------------------------------------------
/dependencies/etc/sysconfig/nvidia:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  3 | ################################################################################
  4 | #
  5 | # Copyright (c) 2015 by Microway, Inc.
  6 | #
  7 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  8 | #
  9 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 10 | #    it under the terms of the GNU General Public License as published by
 11 | #    the Free Software Foundation, either version 3 of the License, or
 12 | #    (at your option) any later version.
 13 | #
 14 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | #    GNU General Public License for more details.
 18 | #
 19 | #    You should have received a copy of the GNU General Public License
 20 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 21 | #
 22 | ################################################################################
 23 | 
 24 | 
 25 | # Define start-up configuration settings for NVIDIA GPUs.
 26 | # These settings are enabled by the 'nvidia' init.d service.
 27 | 
 28 | 
 29 | # Supported products:
 30 | # - Full Support
 31 | #     - NVIDIA Tesla Line:     Fermi, Kepler and later generations
 32 | #     - NVIDIA Quadro Line:    Fermi, Kepler and later generations
 33 | #     - NVIDIA GRID Line:      All generations
 34 | #     - NVIDIA GeForce Line:   None
 35 | #
 36 | # - Limited Support
 37 | #     - NVIDIA Tesla and Quadro Line:   Generations before Fermi-architecture
 38 | #     - NVIDIA GeForce Line
 39 | 
 40 | 
 41 | # Enables or disables GPU Accounting. With GPU Accounting one can keep track of
 42 | # usage of resources throughout the lifespan of each process running on the GPU.
 43 | #
 44 | # Accounting data can be queried by any user. Execute:
 45 | #     nvidia-smi -q -d ACCOUNTING
 46 | #
 47 | # Set accounting:
 48 | #     0/DISABLED, 1/ENABLED
 49 | #
 50 | NVIDIA_ACCOUNTING=1
 51 | 
 52 | # A flag that indicates whether persistence mode is enabled for the GPU. Value
 53 | # is either "Enabled" or "Disabled". When persistence mode is enabled the NVIDIA
 54 | # driver remains loaded even when no active clients, such as X11 or nvidia-smi,
 55 | # exist. This minimizes the driver load latency associated with running
 56 | # dependent apps, such as CUDA programs. There is a modest power usage penalty.
 57 | #
 58 | # Set persistence mode:
 59 | #     0/DISABLED, 1/ENABLED
 60 | #
 61 | NVIDIA_PERSISTENCE_MODE=1
 62 | 
 63 | 
 64 | # The compute mode flag indicates whether individual or multiple compute
 65 | # applications may run on the GPU.
 66 | #
 67 | # "Default" means multiple contexts are allowed per device.
 68 | #
 69 | # "Exclusive Thread" means only one context is allowed per device, usable from
 70 | # one thread at a time.
 71 | #
 72 | # "Exclusive Process" means only one context is allowed per device, usable from
 73 | # multiple threads at a time.
 74 | #
 75 | # "Prohibited" means no contexts are allowed per device (no compute apps).
 76 | #
 77 | # "EXCLUSIVE_PROCESS"  was  added  in  CUDA  4.0.   Prior  CUDA  releases
 78 | # supported only one exclusive mode, which is equivalent to "EXCLUSIVE_THREAD"
 79 | # in CUDA 4.0 and beyond.
 80 | #
 81 | # Set MODE for compute applications:
 82 | #     0/DEFAULT, 1/EXCLUSIVE_THREAD,
 83 | #     2/PROHIBITED, 3/EXCLUSIVE_PROCESS
 84 | #
 85 | NVIDIA_COMPUTE_MODE=0
 86 | 
 87 | 
 88 | # Specifies maximum <memory,graphics> clocks as a pair (e.g. 2000,800)
 89 | # that defines GPU’s speed while running applications on a GPU. Values need to
 90 | # be one of the available options as reported by:
 91 | #
 92 | #    nvidia-smi -q -d SUPPORTED_CLOCKS
 93 | #
 94 | # If set to value "max" the maximum speed of each GPU will be queried and set.
 95 | # If not set, default clock speeds are used.
 96 | #
 97 | # For example, to set memory to 3004 MHz and graphics to 875 MHz:
 98 | # NVIDIA_CLOCK_SPEEDS=3004,875
 99 | #
100 | NVIDIA_CLOCK_SPEEDS=max
101 | 
102 | 
103 | # Specifies maximum power limit (in watts). Accepts integer and floating point
104 | # numbers. Value needs to be between Min and Max Power Limit as reported by:
105 | #
106 | #    nvidia-smi --query-gpu=power.min_limit,power.max_limit --format=csv
107 | #
108 | # If not set, GPUs will run at their normal TDP (the default)
109 | #
110 | # For example, to limit the consumption of each GPU to 200 Watts (or less):
111 | # NVIDIA_POWER_LIMIT=200
112 | #
113 | 


--------------------------------------------------------------------------------
/dependencies/opt/ohpc/pub/modulefiles/cuda.lua:
--------------------------------------------------------------------------------
 1 | help(
 2 | [[
 3 | 
 4 | This module provides the environment for NVIDIA CUDA.
 5 | CUDA tools and libraries must be in your path in order
 6 | to take advantage of NVIDIA GPU compute capabilities.
 7 | 
 8 | {version}
 9 | ]])
10 | 
11 | 
12 | whatis("Name: CUDA")
13 | whatis("Version: {version}")
14 | whatis("Category: library, runtime support")
15 | whatis("Description: NVIDIA CUDA libraries and tools for GPU acceleration")
16 | whatis("URL: https://developer.nvidia.com/cuda-downloads")
17 | 
18 | 
19 | family("cuda")
20 | 
21 | 
22 | local version = "{version}"
23 | local base    = "/usr/local/cuda-{version}"
24 | 
25 | 
26 | setenv("CUDA_HOME", base)
27 | setenv("CUDA_VERSION", "{version}")
28 | 
29 | prepend_path("PATH",            pathJoin(base, "bin"))
30 | prepend_path("INCLUDE",         pathJoin(base, "include"))
31 | prepend_path("LD_LIBRARY_PATH", pathJoin(base, "lib64"))
32 | 
33 | -- Having the CUDA SDK samples available can be useful.
34 | prepend_path("PATH",            pathJoin(base, 'samples-bin'))
35 | 
36 | -- Push the 64-bit NVIDIA libraries into the front of the LD path.
37 | -- Necessary to fix applications which stupidly look in /usr/lib/ first.
38 | prepend_path("LD_LIBRARY_PATH", "/usr/lib64/nvidia")
39 | 
40 | 
41 | --
42 | -- No man files included with CUDA
43 | --
44 | 
45 | 


--------------------------------------------------------------------------------
/dependencies/usr/lib/systemd/system/nvidia-gpu.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=NVIDIA GPU Initialization
 3 | After=remote-fs.target
 4 | 
 5 | [Service]
 6 | Type=oneshot
 7 | RemainAfterExit=yes
 8 | ExecStart=/etc/init.d/nvidia start
 9 | ExecStop=/etc/init.d/nvidia stop
10 | 
11 | [Install]
12 | WantedBy=multi-user.target
13 | 
14 | 


--------------------------------------------------------------------------------
/dependencies/var/spool/slurmd/validate-ssh-command:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
 4 | ################################################################################
 5 | #
 6 | # Copyright (c) 2015-2016 by Microway, Inc.
 7 | #
 8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
 9 | #
10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
11 | #    it under the terms of the GNU General Public License as published by
12 | #    the Free Software Foundation, either version 3 of the License, or
13 | #    (at your option) any later version.
14 | #
15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 | #    GNU General Public License for more details.
19 | #
20 | #    You should have received a copy of the GNU General Public License
21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
22 | #
23 | ################################################################################
24 | 
25 | 
26 | ################################################################################
27 | #
28 | # Ensures public-key logins for the slurm user can only run allowed commands:
29 | #   * long-running health check script
30 | #   * power off idle nodes
31 | #   * check availability of node (by echoing a newline)
32 | #
33 | ################################################################################
34 | 
35 | 
36 | rejection_message="The provided SSH key does not have permission to execute these commands."
37 | 
38 | case "$SSH_ORIGINAL_COMMAND" in
39 | *\&*)
40 | echo $rejection_message
41 | ;;
42 | *\(*)
43 | echo $rejection_message
44 | ;;
45 | *\{*)
46 | echo $rejection_message
47 | ;;
48 | *\;*)
49 | echo $rejection_message
50 | ;;
51 | *\<*)
52 | echo $rejection_message
53 | ;;
54 | *\`*)
55 | echo $rejection_message
56 | ;;
57 | *\|*)
58 | echo $rejection_message
59 | ;;
60 | /etc/slurm/scripts/slurm.healthcheck_long)
61 | /etc/slurm/scripts/slurm.healthcheck_long
62 | ;;
63 | /sbin/poweroff)
64 | /sbin/poweroff
65 | ;;
66 | echo)
67 | echo
68 | ;;
69 | *)
70 | echo $rejection_message
71 | ;;
72 | esac
73 | 


--------------------------------------------------------------------------------
/install_login_server.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | 
 27 | ################################################################################
 28 | ##
 29 | ## This script sets up a Login server for an OpenHPC cluster. A Login server
 30 | ## sits on your network and accepts user logins (logging them into the cluster).
 31 | ##
 32 | ##
 33 | ## This script should be run on the cluster's Head Node/SMS Server. It will
 34 | ## create an OpenHPC/Warewulf image that can be deployed to the login server(s).
 35 | ##
 36 | ## The Login servers should be connected to two networks:
 37 | ##   * the internal cluster network (to communicate with the Head/Compute Nodes)
 38 | ##   * the campus/institute's network (for user access)
 39 | ##
 40 | ##
 41 | ## Please note that certain design/configuration choices are made by this script
 42 | ## which may not be compatible with all sites. Efforts are made to maintain
 43 | ## portability, but compatibility cannot be guaranteed.
 44 | ##
 45 | ################################################################################
 46 | 
 47 | 
 48 | # Set the default names of the VNFS images
 49 | export node_chroot_name=centos-7
 50 | export login_chroot_name=login
 51 | 
 52 | 
 53 | 
 54 | ################################################################################
 55 | # Create the new VNFS
 56 | ################################################################################
 57 | ohpc_vnfs_clone ${node_chroot_name} ${login_chroot_name}
 58 | export login_chroot=/opt/ohpc/admin/images/${login_chroot_name}
 59 | 
 60 | # Hybridize some paths which commonly bloat the images
 61 | echo "
 62 | 
 63 | # We will be mounting this from the Head Node via NFS
 64 | exclude += /opt/ohpc
 65 | 
 66 | # These paths will be made available to nodes via NFS
 67 | hybridize += /usr/local
 68 | hybridize += /usr/lib/golang
 69 | hybridize += /usr/lib/jvm
 70 | hybridize += /usr/lib64/nvidia
 71 | hybridize += /usr/lib64/firefox
 72 | 
 73 | " >> /etc/warewulf/vnfs/${login_chroot_name}.conf
 74 | 
 75 | 
 76 | 
 77 | ################################################################################
 78 | # Disable the services that are only needed on Compute Nodes
 79 | ################################################################################
 80 | chroot ${login_chroot} systemctl disable slurmd.service
 81 | 
 82 | 
 83 | 
 84 | ################################################################################
 85 | # Ensure all users can login to this system
 86 | ################################################################################
 87 | sed -i 's/- : ALL EXCEPT root hpc-admin : ALL//' ${login_chroot}/etc/security/access.conf
 88 | sed -i 's/# Reject users who do not have jobs running on this node//' ${login_chroot}/etc/pam.d/sshd
 89 | sed -i 's/account    required     pam_slurm.so//' ${login_chroot}/etc/pam.d/sshd
 90 | 
 91 | 
 92 | 
 93 | ################################################################################
 94 | # Configure the second network interface
 95 | ################################################################################
 96 | mkdir -p /etc/warewulf/files/login_servers/
 97 | echo "
 98 | DEVICE=eth0
 99 | BOOTPROTO=static
100 | ONBOOT=yes
101 | ZONE=trusted
102 | IPADDR=%{NETDEVS::ETH0::IPADDR}
103 | NETMASK=%{NETDEVS::ETH0::NETMASK}
104 | GATEWAY=%{NETDEVS::ETH0::GATEWAY}
105 | HWADDR=%{NETDEVS::ETH0::HWADDR}
106 | MTU=%{NETDEVS::ETH0::MTU}
107 | " > /etc/warewulf/files/login_servers/ifcfg-eth0.ww
108 | wwsh file import /etc/warewulf/files/login_servers/ifcfg-eth0.ww    \
109 |                  --name=loginServers_ifcfg-eth0                     \
110 |                  --path=/etc/sysconfig/network-scripts/ifcfg-eth0
111 | echo "
112 | DEVICE=eth1
113 | BOOTPROTO=static
114 | ONBOOT=yes
115 | ZONE=public
116 | IPADDR=%{NETDEVS::ETH1::IPADDR}
117 | NETMASK=%{NETDEVS::ETH1::NETMASK}
118 | GATEWAY=%{NETDEVS::ETH1::GATEWAY}
119 | HWADDR=%{NETDEVS::ETH1::HWADDR}
120 | MTU=%{NETDEVS::ETH1::MTU}
121 | " > /etc/warewulf/files/login_servers/ifcfg-eth1.ww
122 | wwsh file import /etc/warewulf/files/login_servers/ifcfg-eth1.ww    \
123 |                  --name=loginServers_ifcfg-eth1                     \
124 |                  --path=/etc/sysconfig/network-scripts/ifcfg-eth1
125 | 
126 | 
127 | 
128 | ################################################################################
129 | # Configure the firewall
130 | ################################################################################
131 | # Ensure the firewall is active (it's not usually enabled on compute nodes)
132 | yum -y --installroot=${login_chroot} install firewalld
133 | chroot ${login_chroot} systemctl enable firewalld.service
134 | 
135 | # By default, only SSH is allowed in on the public-facing network interface.
136 | # We can allow more services here, if desired:
137 | #
138 | # chroot ${login_chroot} firewall-offline-cmd --zone=public --add-service=http
139 | # chroot ${login_chroot} firewall-offline-cmd --zone=public --add-port=4000/tcp
140 | #
141 | 
142 | 
143 | 
144 | ################################################################################
145 | # If remote graphical access is desired, NoMachine works well. It is a licensed
146 | # product, but is stable and provides better performance. Microway can help you
147 | # select a version with the capabilities you require:
148 | # https://www.microway.com/technologies/software/responsive-enterprise-class-remote-desktops-nomachine/
149 | ################################################################################
150 | 
151 | # Install common desktop environments
152 | yum -y --installroot=${login_chroot} groups install "GNOME Desktop"
153 | yum -y --installroot=${login_chroot} groups install "KDE Plasma Workspaces"
154 | 
155 | # Disable SELinux
156 | setenforce 0
157 | sed -i 's/SELINUX=enforcing/SELINUX=disabled/' ${login_chroot}/etc/selinux/config
158 | 
159 | # ##############################################################################
160 | # # Syncronize the built-in users/groups between the Head and the Login Nodes
161 | # #
162 | # # If not done now, the users created by the following packages will have
163 | # # different UIDs and GIDs on the Login Nodes than on the Head Node.
164 | # ##############################################################################
165 | #
166 | # groupadd nx
167 | # groupadd nxhtd
168 | #
169 | # useradd --home-dir '/var/NX/nx'    \
170 | #         --password '*'             \
171 | #         --gid nx                   \
172 | #         --shell /bin/false         \
173 | #         --system                   \
174 | #         nx
175 | #
176 | # useradd --home-dir '/var/NX/nxhtd' \
177 | #         --password '*'             \
178 | #         --gid nxhtd                \
179 | #         --shell /bin/false         \
180 | #         --system                   \
181 | #         nxhtd
182 | #
183 | # cp -af /etc/passwd ${login_chroot}/etc/
184 | # cp -af /etc/group ${login_chroot}/etc/
185 | # wwsh file sync
186 | #
187 | #
188 | # An admin will need to manually install the selected NoMachine services
189 | #
190 | #
191 | # chroot ${login_chroot} systemctl enable nxserver.service
192 | 
193 | 
194 | 
195 | ################################################################################
196 | # Re-assemble Login server VNFS with all the changes
197 | ################################################################################
198 | # Clear out the random entries from chrooting into the VNFS image
199 | > ${login_chroot}/root/.bash_history
200 | 
201 | # Rebuild the VNFS
202 | wwvnfs -y --chroot ${login_chroot}
203 | 
204 | 
205 | 
206 | echo "
207 | 
208 | The Login server software image is now ready for use. To deploy to a server, you
209 | should run something similar to the commands below:
210 | 
211 | wwsh node clone node1 login1
212 | 
213 | wwsh provision set login1 --fileadd=loginServers_ifcfg-eth0 --vnfs=login
214 | wwsh provision set login1 --fileadd=loginServers_ifcfg-eth1 --vnfs=login
215 | 
216 | wwsh node set login1 --netdev=eth0 --ipaddr=10.0.254.253 --netmask=255.255.0.0 --hwaddr=00:aa:bb:cc:dd:ee --mtu=9000 --fqdn=login1.hpc.example.com
217 | wwsh node set login1 --netdev=eth1 --ipaddr=<campus IP address> --netmask=<campus netmask> --gateway=<campus gateway> --mtu=9000 --hwaddr=00:aa:bb:cc:dd:ef
218 | wwsh node set login1 --netdev=ib0 --ipaddr=10.10.254.253 --netmask=255.255.0.0
219 | wwsh node set login1 --domain=hpc.example.com
220 | wwsh ipmi set login1 --ipaddr=10.13.254.253
221 | 
222 | "
223 | 


--------------------------------------------------------------------------------
/install_monitoring_server.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | ######################## Microway Cluster Management Software (MCMS) for OpenHPC
  4 | ################################################################################
  5 | #
  6 | # Copyright (c) 2015-2016 by Microway, Inc.
  7 | #
  8 | # This file is part of Microway Cluster Management Software (MCMS) for OpenHPC.
  9 | #
 10 | #    MCMS for OpenHPC is free software: you can redistribute it and/or modify
 11 | #    it under the terms of the GNU General Public License as published by
 12 | #    the Free Software Foundation, either version 3 of the License, or
 13 | #    (at your option) any later version.
 14 | #
 15 | #    MCMS for OpenHPC is distributed in the hope that it will be useful,
 16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #    GNU General Public License for more details.
 19 | #
 20 | #    You should have received a copy of the GNU General Public License
 21 | #    along with MCMS.  If not, see <http://www.gnu.org/licenses/>
 22 | #
 23 | ################################################################################
 24 | 
 25 | 
 26 | 
 27 | ################################################################################
 28 | ##
 29 | ## This script sets up a monitoring server for an OpenHPC cluster
 30 | ##
 31 | ##
 32 | ## This script should be run on a monitoring server on the same network as the
 33 | ## cluster's Head/Master Node - also referred to as the System Management Server
 34 | ## (SMS). This script presumes that a Red Hat derivative (CentOS, SL, etc) has
 35 | ## just been installed (with vanilla configuration).
 36 | ##
 37 | ##
 38 | ## Please note that certain design/configuration choices are made by this script
 39 | ## which may not be compatible with all sites. Efforts are made to maintain
 40 | ## portability, but compatibility cannot be guaranteed.
 41 | ##
 42 | ################################################################################
 43 | 
 44 | 
 45 | 
 46 | ################################################################################
 47 | # Determine where this script is running from (so we can locate patches, etc.)
 48 | ################################################################################
 49 | install_script_dir="$( dirname "$( readlink -f "$0" )" )"
 50 | 
 51 | dependencies_dir=${install_script_dir}/dependencies
 52 | config_file=${install_script_dir}/configuration_settings.txt
 53 | 
 54 | 
 55 | # Ensure the settings have been completed
 56 | if [[ ! -r ${config_file} ]]; then
 57 |     echo "
 58 | 
 59 |     This script requires you to provide configuration settings. Please ensure
 60 |     that the file ${config_file} exists and has been fully completed.
 61 |     "
 62 |     exit 1
 63 | else
 64 |     source ${config_file}
 65 | fi
 66 | 
 67 | if [[ ! -z "$(egrep "^[^#].*ChangeMe" ${config_file})" ]]; then
 68 |     echo "
 69 | 
 70 |     For security, you *must* change the passwords in the configuration file.
 71 |     Please double-check your settings in ${config_file}
 72 |     "
 73 |     exit 1
 74 | fi
 75 | 
 76 | 
 77 | 
 78 | ################################################################################
 79 | # Currently, only RHEL/SL/CentOS 7 is supported for the bootstrap
 80 | ################################################################################
 81 | distribution=$(egrep "CentOS Linux 7|Scientific Linux 7|Red Hat Enterprise Linux Server release 7" /etc/*-release)
 82 | centos_check=$?
 83 | 
 84 | if [[ ${centos_check} -ne 0 ]]; then
 85 |     echo "
 86 | 
 87 |     Currently, only RHEL, Scientific and CentOS Linux 7 are supported
 88 |     "
 89 |     exit 1
 90 | else
 91 |     echo "RHEL/SL/CentOS 7 was detected. Continuing..."
 92 | fi
 93 | 
 94 | 
 95 | 
 96 | ################################################################################
 97 | # Update system packages and EPEL package repo
 98 | ################################################################################
 99 | yum -y update
100 | yum -y install epel-release
101 | 
102 | 
103 | 
104 | ################################################################################
105 | # If enabled, disable auto-update on this server.
106 | ################################################################################
107 | if [[ -r /etc/sysconfig/yum-autoupdate ]]; then
108 |     sed -i 's/ENABLED="true"/ENABLED="false"/' /etc/sysconfig/yum-autoupdate
109 | fi
110 | 
111 | 
112 | 
113 | ################################################################################
114 | # Disable SELinux
115 | ################################################################################
116 | setenforce 0
117 | sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
118 | 
119 | 
120 | 
121 | ################################################################################
122 | # Enable NTP (particularly important for things like SLURM and Ceph)
123 | ################################################################################
124 | yum -y install ntp ntpdate ntp-doc
125 | 
126 | if [[ ! -z "${ntp_server}" ]]; then
127 |     sed -i 's/^server /#server /' /etc/ntp.conf
128 |     echo -e "
129 | 
130 | server ${ntp_server}
131 | 
132 | " >>  /etc/ntp.conf
133 | 
134 |     ntpdate ${ntp_server}
135 | else
136 |     ntpdate 0.rhel.pool.ntp.org \
137 |             1.rhel.pool.ntp.org \
138 |             2.rhel.pool.ntp.org \
139 |             3.rhel.pool.ntp.org
140 | fi
141 | 
142 | # Because some clusters are not connected to the Internet, we need to enable
143 | # orphan mode as described here:
144 | #
145 | #   https://www.eecis.udel.edu/~mills/ntp/html/miscopt.html#tos
146 | #
147 | echo "
148 | 
149 | tos orphan 5
150 | 
151 | " >>  /etc/ntp.conf
152 | hwclock --systohc --utc
153 | systemctl enable ntpd.service
154 | systemctl start ntpd.service
155 | 
156 | 
157 | 
158 | ################################################################################
159 | # Disable X-Windows since this is typically a headless server
160 | ################################################################################
161 | systemctl set-default multi-user.target
162 | 
163 | 
164 | 
165 | ################################################################################
166 | # Install SaltStack, which provides distribution-agnostic configuration mgmt
167 | ################################################################################
168 | yum -y install salt-minion
169 | systemctl enable salt-minion
170 | systemctl start salt-minion
171 | 
172 | 
173 | 
174 | ################################################################################
175 | # Create a group for HPC administrators
176 | ################################################################################
177 | groupadd hpc-admin
178 | 
179 | 
180 | 
181 | ################################################################################
182 | # Install the monitoring tools (Shinken/Thruk/Check_MK)
183 | ################################################################################
184 | useradd --system shinken --home-dir /tmp --no-create-home
185 | 
186 | yum install python-pip
187 | pip install shinken
188 | 
189 | systemctl enable shinken.service
190 | systemctl start shinken.service
191 | 
192 | 


--------------------------------------------------------------------------------