├── .coveragerc
├── .flake8
├── .gitignore
├── .gitlab-ci.yml
├── .vscode
    ├── launch.json
    └── settings.json
├── LICENSE
├── README.md
├── coverage_report.sh
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── bsub.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── job.rst
    │   ├── pbs.rst
    │   ├── pbs_batch.rst
    │   └── scripts.rst
├── examples
    ├── basic
    │   ├── launch.py
    │   └── nas.py
    ├── batch_no_limit
    │   └── launch.py
    ├── batch_with_job_limit
    │   └── launch.py
    ├── dependency_chain
    │   └── launch.py
    ├── hybrid_openmp_mpi
    │   └── launch.py
    └── job_array
    │   └── write_pbs_file.py
├── pbs4py
    ├── __init__.py
    ├── bsub.py
    ├── directory_utils.py
    ├── fake_pbs.py
    ├── job.py
    ├── launcher_base.py
    ├── pbs.py
    ├── pbs_batch.py
    ├── scripts
    │   ├── __init__.py
    │   ├── job_dir.py
    │   └── qdel_user_jobs.py
    └── slurm.py
├── pyproject.toml
└── tests
    ├── job_test
        └── empty_file
    ├── pbs_test_files
        ├── golden0.lsf
        ├── golden0.pbs
        └── golden0.slurm
    ├── test_bsub.py
    ├── test_bsub_regression.py
    ├── test_fake_pbs.py
    ├── test_job.py
    ├── test_launch_base.py
    ├── test_output_files
        └── .empty
    ├── test_pbs.py
    ├── test_pbs_batch.py
    ├── test_pbs_batch_job.py
    ├── test_pbs_header.py
    ├── test_pbs_regression.py
    ├── test_slurm_header.py
    ├── test_slurm_regression.py
    └── testing_bashrc


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=pbs4py
3 | omit=pbs4py/directory_utils.py
4 |      pbs4py/scripts/*.py
5 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 120
 3 | 
 4 | extend-ignore =
 5 |     # black
 6 |     E203,
 7 |     # too many leading '#' for block comment
 8 |     E266,
 9 |     # expected 2 blank lines, found 1
10 |     E302,
11 |     # do not use mutable data structures for argument defaults (too many false positives)
12 |     B006,
13 |     # ===== TODO: to be fixed:
14 |     # invalid escape sequence, necessary for sphinx directives in docstrings but should switch to raw string
15 |     W605,
16 |     # line length, exceeded by some docstrings
17 |     E501,
18 |     # Function definition does not bind loop variable, happens everywhere in our code
19 |     B023,
20 |     # pydocstyle
21 |     D
22 | 
23 | # Only add patterns here that are not included by the defaults of flake8 or other plugins
24 | # extend-select =
25 | 
26 | # flake8-docstrings
27 | docstring-convention = numpy
28 | 
29 | # flake8-rst-docstrings
30 | rst-roles =
31 |     class,
32 |     func,
33 |     ref,
34 |     meth,
35 | 
36 | rst-directives =
37 |     # Custom directives defined in the sphinx_mdolab_theme
38 |     embed-compare,
39 |     embed-bibtex,
40 |     embed-code,
41 |     embed-shell-cmd,
42 |     embed-n2,
43 | 
44 | # mccabe complexity
45 | # max-complexity = 10
46 | 
47 | # ignored files/directories
48 | # we use exclude here and extend-exclude in repo-specific config files
49 | # so that we can pass both to flake8 directly without needing to merge them first
50 | exclude =
51 |     # No need to traverse the git directory
52 |     .git,
53 |     # There's no value in checking cache directories
54 |     __pycache__,
55 |     # The conf file is mostly autogenerated, ignore it
56 |     doc/conf.py,
57 |     # No need for init and setup files
58 |     __init__.py,
59 |     setup.py,
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *pyc
 2 | *swp
 3 | *html
 4 | *egg
 5 | *egg-info
 6 | build
 7 | .vscode/.ropeproject/objectdb
 8 | .vscode/.ropeproject/config.py
 9 | tests/test_output_files
10 | htmlcov
11 | .coverage
12 | .DS_Store
13 | sample*txt
14 | 
15 | *pbs
16 | *log
17 | *out
18 | cov.xml
19 | docs/source/_build
20 | *tar.gz
21 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   - checkout
 3 |   - test
 4 |   - docs
 5 |   - deploy
 6 | 
 7 | variables:
 8 |   user_name: fun3d
 9 |   build_machine: k4
10 |   build_directory: /hpnobackup2/fun3d/component-ci
11 |   project_repo: git@gitlab.larc.nasa.gov:kejacob1/pbs4py.git
12 |   project_name: pbs4py
13 |   build_tag:   pbs4py-$CI_PIPELINE_ID
14 |   project_dir: ${build_directory}/pbs4py-$CI_PIPELINE_ID/${project_name}
15 | 
16 | 
17 | checkout_branch:
18 |   only:
19 |     refs:
20 |       - main
21 |       - merge_requests
22 |   stage: checkout
23 |   variables:
24 |     branch_name: $CI_COMMIT_REF_NAME
25 |     checkout_sha: $CI_COMMIT_SHA
26 |   tags:
27 |     - gitlab_runner
28 |   script:
29 |     - echo $PWD
30 |     - hostname
31 |     - whoami
32 |     - "ssh -o LogLevel=error ${user_name}@${build_machine} \"cd ${build_directory} && mkdir -p ${build_tag}
33 |            && cd ${build_tag} && git clone ${project_repo} ${project_name} \"  "
34 |     - "ssh -o LogLevel=error  ${user_name}@${build_machine} \"cd ${project_dir}
35 |            && git checkout ${branch_name} && git checkout ${checkout_sha} \" "
36 | 
37 | unit_tests:
38 |   stage: test
39 |   only:
40 |     - merge_requests
41 |   tags:
42 |     - gitlab_runner
43 |   script:
44 |     - "ssh -o LogLevel=error  ${user_name}@${build_machine} \"cd ${project_dir}
45 |            && module load Python_3.7.1 && setenv PYTHONPATH `pwd`:$PYTHONPATH && pytest -vs \" "
46 | 
47 | test_doc_build:
48 |   stage: docs
49 |   only:
50 |     - merge_requests
51 |   tags:
52 |     - gitlab_runner
53 |   script:
54 |     - "ssh -o LogLevel=error  ${user_name}@${build_machine} \"cd ${project_dir}
55 |            && module load Python_3.7.1 && setenv PYTHONPATH `pwd`:$PYTHONPATH
56 |            && setenv PATH /u/fun3d/.local/bin:$PATH && cd docs
57 |            && make html SPHINXOPTS='-W --keep-going'
58 |            && cd build && tar zcvf pbs4py_html.tgz html \" "
59 | 
60 | build_docs:
61 |   stage: docs
62 |   only:
63 |     refs:
64 |       - main
65 |   tags:
66 |     - gitlab_runner
67 |   script:
68 |     - "ssh -o LogLevel=error  ${user_name}@${build_machine} \"cd ${project_dir}
69 |            && module load Python_3.7.1 && setenv PYTHONPATH `pwd`:$PYTHONPATH
70 |            && setenv PATH /u/fun3d/.local/bin:$PATH && cd docs && make html
71 |            && cd build && tar zcvf pbs4py_html.tgz html \" "
72 |   after_script:
73 |     - "scp -q ${user_name}@${build_machine}:${project_dir}/docs/build/pbs4py_html.tgz . || true"
74 |   artifacts:
75 |     paths:
76 |       - pbs4py_html.tgz
77 |     expire_in: 1 week
78 | 
79 | pages:
80 |   stage: deploy
81 |   only:
82 |     refs:
83 |       - main
84 |   tags:
85 |     - gitlab_runner
86 |   script:
87 |     - rm -rf public
88 |     - tar xzvf pbs4py_html.tgz
89 |     - mv html public
90 |   artifacts:
91 |     paths:
92 |       - public
93 |     expire_in: 1 week
94 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python: Current File",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "program": "${file}",
12 |             "console": "integratedTerminal",
13 |             "cwd": "${fileDirname}"
14 |         }
15 |     ]
16 | }
17 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "[python]": {
 3 |         "editor.defaultFormatter": "ms-python.autopep8",
 4 |         "editor.formatOnSave": true,
 5 |     },
 6 |     "flake8.args": [
 7 |         "--config",
 8 |         ".flake8"
 9 |     ],
10 |     "autopep8.args": [
11 |         "--max-line-length",
12 |         "100",
13 |         "--experimental"
14 |     ],
15 |     "python.testing.pytestArgs": [
16 |         "tests"
17 |     ],
18 |     "python.testing.unittestEnabled": false,
19 |     "python.testing.pytestEnabled": true
20 | }
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | NASA OPEN SOURCE AGREEMENT VERSION 1.3
  2 | 
  3 | THIS OPEN SOURCE AGREEMENT (“AGREEMENT”) DEFINES THE RIGHTS OF USE, REPRODUCTION, DISTRIBUTION, MODIFICATION AND REDISTRIBUTION OF CERTAIN COMPUTER SOFTWARE ORIGINALLY RELEASED BY THE UNITED STATES GOVERNMENT AS REPRESENTED BY THE GOVERNMENT AGENCY LISTED BELOW ("GOVERNMENT AGENCY"). THE UNITED STATES GOVERNMENT, AS REPRESENTED BY GOVERNMENT AGENCY, IS AN INTENDED THIRD-PARTY BENEFICIARY OF ALL SUBSEQUENT DISTRIBUTIONS OR REDISTRIBUTIONS OF THE SUBJECT SOFTWARE. ANYONE WHO USES, REPRODUCES, DISTRIBUTES, MODIFIES OR REDISTRIBUTES THE SUBJECT SOFTWARE, AS DEFINED HEREIN, OR ANY PART THEREOF, IS, BY THAT ACTION, ACCEPTING IN FULL THE RESPONSIBILITIES AND OBLIGATIONS CONTAINED IN THIS AGREEMENT.
  4 | 
  5 | Government Agency: National Aeronautics and Space Administration
  6 | Government Agency Original Software Designation: LAR-20193-1
  7 | Government Agency Original Software Title: pbs4py - python utilities for submitting jobs on high performance computing systems
  8 | Government Agency Point of Contact for Original Software: kevin.e.jacobson@nasa.gov
  9 | 
 10 | 
 11 | 1.	DEFINITIONS
 12 | 
 13 | A.	“Contributor” means Government Agency, as the developer of the Original Software, and any entity that makes a Modification.
 14 | B.	“Covered Patents” mean patent claims licensable by a Contributor that are necessarily infringed by the use or sale of its Modification alone or when combined with the Subject Software.
 15 | C.	“Display” means the showing of a copy of the Subject Software, either directly or by means of an image, or any other device.
 16 | D.	“Distribution” means conveyance or transfer of the Subject Software, regardless of means, to another.
 17 | E.	“Larger Work” means computer software that combines Subject Software, or portions thereof, with software separate from the Subject Software that is not governed by the terms of this Agreement.
 18 | F.	“Modification” means any alteration of, including addition to or deletion from, the substance or structure of either the Original Software or Subject Software, and includes derivative works, as that term is defined in the Copyright Statute, 17 USC 101. However, the act of including Subject Software as part of a Larger Work does not in and of itself constitute a Modification.
 19 | G.	“Original Software” means the computer software first released under this Agreement by Government Agency with Government Agency designation LAR-20193-1 and entitled pbs4py - python utilities for submitting jobs on high performance computing systems including source code, object code and accompanying documentation, if any.
 20 | H.	“Recipient” means anyone who acquires the Subject Software under this Agreement, including all Contributors.
 21 | I.	“Redistribution” means Distribution of the Subject Software after a Modification has been made.
 22 | J.	“Reproduction” means the making of a counterpart, image or copy of the Subject Software.
 23 | K.	“Sale” means the exchange of the Subject Software for money or equivalent value.
 24 | L.	“Subject Software” means the Original Software, Modifications, or any respective parts thereof.
 25 | M.	“Use” means the application or employment of the Subject Software for any purpose.
 26 | 
 27 | 2.	GRANT OF RIGHTS
 28 | 
 29 | A.	Under Non-Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
 30 | 
 31 | 1.	Use
 32 | 2.	Distribution
 33 | 3.	Reproduction
 34 | 4.	Modification
 35 | 5.	Redistribution
 36 | 6.	Display
 37 | 
 38 | B.	Under Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient under Covered Patents a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
 39 | 
 40 | 1.	Use
 41 | 2.	Distribution
 42 | 3.	Reproduction
 43 | 4.	Sale
 44 | 5.	Offer for Sale
 45 | 
 46 | C.	The rights granted under Paragraph B. also apply to the combination of a Contributor’s Modification and the Subject Software if, at the time the Modification is added by the Contributor, the addition of such Modification causes the combination to be covered by the Covered Patents. It does not apply to any other combinations that include a Modification.
 47 | 
 48 | D.	The rights granted in Paragraphs A. and B. allow the Recipient to sublicense those same rights. Such sublicense must be under the same terms and conditions of this Agreement.
 49 | 
 50 | 3.	OBLIGATIONS OF RECIPIENT
 51 | 
 52 | A.	Distribution or Redistribution of the Subject Software must be made under this Agreement except for additions covered under paragraph 3H.
 53 | 
 54 | 1.	Whenever a Recipient distributes or redistributes the Subject Software, a copy of this Agreement must be included with each copy of the Subject Software; and
 55 | 2.	If Recipient distributes or redistributes the Subject Software in any form other than source code, Recipient must also make the source code freely available, and must provide with each copy of the Subject Software information on how to
 56 | obtain the source code in a reasonable manner on or through a medium customarily used for software exchange.
 57 | 
 58 | B.	Each Recipient must ensure that the following copyright notice appears prominently in the Subject Software:
 59 | This software may be used, reproduced, and provided to others only as permitted under the terms of the agreement under which it was acquired from the U.S. Government.
 60 | Neither title to, nor ownership of, the software is hereby transferred. This notice shall remain on all copies of the software.
 61 | 
 62 | Copyright 2022 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
 63 | 
 64 | Third Party Software:
 65 | 
 66 | This software calls the following third party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing.  Third party software is not bundled with this software, but may be available from the licensor.  License hyperlinks are provided here for information purposes only: numpy, BSD 3-Clause "New" or "Revised" License, https://github.com/numpy/numpy/blob/main/LICENSE.txt
 67 | Third Party Software:
 68 | This software derives analyses using Google Earth Engine's (GEE’s) free and publicly accessible data catalog. GEE is not bundled with this software, but users of this software must obtain their own account at code.earthengine.google.com, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. License hyperlink is provided here for information purposes only: https://earthengine.google.com/terms/.
 69 | 
 70 | C.	Each Contributor must characterize its alteration of the Subject Software as a Modification and must identify itself as the originator of its Modification in a manner that reasonably allows subsequent Recipients to identify the originator of the Modification. In fulfillment of these requirements, Contributor must include a file (e.g., a change log file) that describes the alterations made and the date of the alterations, identifies Contributor as originator of the alterations, and consents to characterization of the alterations as a Modification, for example, by including a statement that the Modification is derived, directly or indirectly, from Original Software provided by Government Agency. Once consent is granted, it may not thereafter be revoked.
 71 | 
 72 | D.	A Contributor may add its own copyright notice to the Subject Software. Once a copyright notice has been added to the Subject Software, a Recipient may not remove it without the express permission of the Contributor who added the notice.
 73 | 
 74 | E.	A Recipient may not make any representation in the Subject Software or in any promotional, advertising or other material that may be construed as an endorsement by Government Agency or by any prior Recipient of any product or service provided by Recipient, or that may seek to obtain commercial advantage by the fact of Government Agency's or a prior Recipient’s participation in this Agreement.
 75 | 
 76 | F.	In an effort to track usage and maintain accurate records of the Subject Software, each Recipient, upon receipt of the Subject Software, is requested to provide Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., the following information: First and Last Name; Email Address; and Affiliation. Recipient’s name and personal information shall be used for statistical purposes only. Once a Recipient makes a Modification available, it is requested that the Recipient
 77 | inform Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., how to access the Modification.
 78 | 
 79 | G.	Each Contributor represents that that its Modification is believed to be Contributor’s original creation and does not violate any existing agreements, regulations, statutes or rules, and further that Contributor has sufficient rights to grant the rights conveyed by this Agreement.
 80 | 
 81 | H.	A Recipient may choose to offer, and to charge a fee for, warranty, support, indemnity and/or liability obligations to one or more other Recipients of the Subject Software. A Recipient may do so, however, only on its own behalf and not on behalf of Government Agency or any other Recipient. Such a Recipient must make it absolutely clear that any such warranty, support, indemnity and/or liability obligation is offered by that Recipient alone. Further, such Recipient agrees to indemnify Government Agency and every other Recipient for any liability incurred by them as a result of warranty, support, indemnity and/or liability offered by such Recipient.
 82 | 
 83 | I.	A Recipient may create a Larger Work by combining Subject Software with separate software not governed by the terms of this agreement and distribute the Larger Work as a single product. In such case, the Recipient must make sure Subject Software, or portions thereof, included in the Larger Work is subject to this Agreement.
 84 | 
 85 | J.	Notwithstanding any provisions contained herein, Recipient is hereby put on notice that export of any goods or technical data from the United States may require some form of export license from the U.S. Government. Failure to obtain necessary export licenses may result in criminal liability under U.S. laws. Government Agency neither represents that a license shall not be required nor that, if required, it shall be issued. Nothing granted herein provides any such export license.
 86 | 
 87 | 4.	DISCLAIMER OF WARRANTIES AND LIABILITIES; WAIVER AND INDEMNIFICATION
 88 | 
 89 | A.	No Warranty: THE SUBJECT SOFTWARE IS PROVIDED “AS IS” WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT “AS IS.”
 90 | B.	Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
 91 | 
 92 | 5.	GENERAL TERMS
 93 | 
 94 | A.	Termination: This Agreement and the rights granted hereunder will terminate automatically if a Recipient fails to comply with these terms and conditions, and fails to cure such noncompliance within thirty (30) days of becoming aware of such noncompliance. Upon termination, a Recipient agrees to immediately cease use and distribution of the Subject Software. All sublicenses to the Subject Software properly granted by the breaching Recipient shall survive any such termination of this Agreement.
 95 | 
 96 | B.	Severability: If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement.
 97 | 
 98 | C.	Applicable Law: This Agreement shall be subject to United States federal law only for all purposes, including, but not limited to, determining the validity of this Agreement, the meaning of its provisions and the rights, obligations and remedies of the parties.
 99 | 
100 | D.	Entire Understanding: This Agreement constitutes the entire understanding and agreement of the parties relating to release of the Subject Software and may not be superseded, modified or amended except by further written agreement duly executed by the parties.
101 | 
102 | E.	Binding Authority: By accepting and using the Subject Software under this Agreement, a Recipient affirms its authority to bind the Recipient to all terms and conditions of this Agreement and that that Recipient hereby agrees to all terms and conditions herein.
103 | 
104 | F.	Point of Contact: Any Recipient contact with Government Agency is to be directed to the designated representative as follows:
105 | 
106 | Maxine Saunders
107 | Software Release Authority
108 | MS 020, NASA Langley Research Center Hampton, VA 23681
109 | Phone: 757-864-2025
110 | 
111 | Email: larc-sra@mail.nasa.gov
112 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | pbs4py is a Python module for automating submission of compute jobs on High Performance
 4 | Computing clusters, such as those that use the Portable Batch System (PBS).
 5 | It includes pre-configured launchers for common NASA HPC systems: the Langley K cluster
 6 | and NASA Advanced Supercomputing (NAS) systems.
 7 | 
 8 | Examples uses are uncertainty quantification where many jobs are submitted
 9 | simultaneously or optimization where sequences of jobs need to scripted.
10 | 
11 | pbs4py also includes scripts for performing tasks associated with PBS jobs
12 | such as a script when given a job number will print the directory from which it was launched
13 | and a script that can delete multiple jobs based on filters.
14 | 
15 | 
16 | # Documentation
17 | [Documentation is hosted using Github Pages](https://nasa.github.io/pbs4py/)
18 | 
19 | The pbs4py documentation is generated from the source code with [Sphinx](https://www.sphinx-doc.org/en/master/).
20 | Once you have installed pbs4py, the documentation is built by running `make html` in the docs directory.
21 | The generated documentation will be in `docs/build/html`.
22 | 
23 | # Installation
24 | pbs4py can be installed with
25 | 
26 | ```
27 | pip install pbs4py
28 | ```
29 | 
30 | # Quick Start
31 | 
32 | After installation,
33 | 
34 | On the K cluster:
35 | ```python
36 | from pbs4py import PBS
37 | pbs = PBS.k4()
38 | pbs.requested_number_of_nodes = 1
39 | pbs.launch(job_name='example_job',job_body=['echo "Hello World"'])
40 | ```
41 | 
42 | On NAS:
43 | ```python
44 | from pbs4py import PBS
45 | group = 'a1111' # your project ID to charge here
46 | pbs = PBS.nas(group, proc_type='san', queue='devel', time=1)
47 | pbs.launch(job_name='example_job',job_body=['echo "Hello World"'])
48 | ```
49 | 
50 | # License Notices and Disclaimers
51 | Notices:
52 | Copyright 2022 United States Government as represented by the Administrator of
53 | the National Aeronautics and Space Administration. No copyright is claimed in
54 | the United States under Title 17, U.S. Code. All Other Rights Reserved.
55 | 
56 | Third Party Software:
57 | 
58 | This software calls the following third party software, which is subject to the
59 | terms and conditions of its licensor, as applicable at the time of licensing.
60 | Third party software is not bundled with this software, but may be available
61 | from the licensor. License hyperlinks are provided here for information purposes
62 | only: numpy, BSD 3-Clause "New" or "Revised" License,
63 | https://github.com/numpy/numpy/blob/main/LICENSE.txt.
64 | 
65 | Disclaimers
66 | No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF
67 | ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED
68 | TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY
69 | IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
70 | FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR
71 | FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE
72 | SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN
73 | ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS,
74 | RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS
75 | RESULTING FROM USE OF THE SUBJECT SOFTWARE.  FURTHER, GOVERNMENT AGENCY
76 | DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF
77 | PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT "AS IS."
78 | 
79 | Waiver and Indemnity:  RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE
80 | UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY
81 | PRIOR RECIPIENT.  IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY
82 | LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE,
83 | INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE
84 | OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED
85 | STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR
86 | RECIPIENT, TO THE EXTENT PERMITTED BY LAW.  RECIPIENT'S SOLE REMEDY FOR ANY SUCH
87 | MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
88 | 


--------------------------------------------------------------------------------
/coverage_report.sh:
--------------------------------------------------------------------------------
1 | pytest --cov-report xml:cov.xml --cov pbs4py
2 | coverage report -m
3 | coverage html
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/bsub.rst:
--------------------------------------------------------------------------------
 1 | .. _bsub_section:
 2 | 
 3 | BSUB Job Launcher
 4 | %%%%%%%%%%%%%%%%%
 5 | 
 6 | The BSUB class is a tool to write and launch job scripts on DOE HPC systems.
 7 | 
 8 | 
 9 | BSUB Class
10 | ==========
11 | 
12 | .. automodule:: pbs4py.bsub
13 | 
14 | .. autoclass:: BSUB
15 |    :members:
16 |    :inherited-members:
17 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'pbs4py'
21 | copyright = '2021, NASA'
22 | author = 'NASA'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon',
31 |               'sphinx_autodoc_typehints', 'sphinxarg.ext']
32 | 
33 | # Add any paths that contain templates here, relative to this directory.
34 | templates_path = ['_templates']
35 | 
36 | # List of patterns, relative to source directory, that match files and
37 | # directories to ignore when looking for source files.
38 | # This pattern also affects html_static_path and html_extra_path.
39 | exclude_patterns = []
40 | 
41 | autoclass_content = 'both'
42 | autodoc_member_order = 'bysource'
43 | autodoc_default_flags = ['members', 'inherited-members']
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'sphinxdoc'
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | #html_static_path = ['_static']
56 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | pbs4py
 2 | ======
 3 | 
 4 | Python scripting for launching and managing PBS jobs.
 5 | 
 6 | 
 7 | Launching jobs
 8 | --------------
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 | 
13 |    pbs.rst
14 |    pbs_batch.rst
15 |    bsub.rst
16 | 
17 | 
18 | Managing jobs
19 | -------------
20 | .. toctree::
21 |    :maxdepth: 2
22 | 
23 |    job.rst
24 |    scripts.rst
25 | 
26 | Indices and tables
27 | ==================
28 | 
29 | * :ref:`genindex`
30 | * :ref:`modindex`
31 | * :ref:`search`
32 | 


--------------------------------------------------------------------------------
/docs/source/job.rst:
--------------------------------------------------------------------------------
 1 | .. _pbs_job_section:
 2 | 
 3 | PBS Job Class
 4 | %%%%%%%%%%%%%
 5 | 
 6 | .. automodule:: pbs4py.job
 7 | 
 8 | .. autoclass:: PBSJob
 9 |    :members:
10 | 


--------------------------------------------------------------------------------
/docs/source/pbs.rst:
--------------------------------------------------------------------------------
 1 | .. _pbs_section:
 2 | 
 3 | PBS Job Launcher
 4 | %%%%%%%%%%%%%%%%
 5 | 
 6 | The PBS class is a tool to define properties of the PBS set up you want to use,
 7 | write pbs scripts, and launch jobs.
 8 | The ``PBS`` class has several classmethods that serve as alternate constructors which fill in properties of some NASA HPC systems and queue.
 9 | Examples of instantiating with this methods is shown below.
10 | For systems or queues not covered by these classmethods, the basic queue attributes are
11 | set in the standard constructor, and less common ones can be adjusted by
12 | changing the attributes of the object.
13 | 
14 | 
15 | PBS Class
16 | =========
17 | 
18 | .. automodule:: pbs4py.pbs
19 | 
20 | .. autoclass:: PBS
21 |    :members:
22 |    :inherited-members:
23 | 
24 | PBS's classmethod constructors
25 | ------------------------------
26 | 
27 | 
28 | .. code-block :: python
29 | 
30 |     from pbs4py import PBS
31 | 
32 |     k4 = PBS.k4(time=48)
33 |     k3 = PBS.k3()
34 |     k3a = PBS.k3a()
35 |     nas = PBS.nas(group_list='n1337', proc_type='skylake', time = 72)
36 | 
37 | FakePBS Class
38 | =============
39 | Some scripts may be set up with the PBS job handler originally, but you may want to run
40 | the script within an existing PBS job without launching new PBS jobs.
41 | The FakePBS object appears to driving scripts as a standard PBS object,
42 | but directly runs the commands instead of putting them into a PBS job and launching the job.
43 | 
44 | .. automodule:: pbs4py.fake_pbs
45 | 
46 | .. autoclass:: FakePBS
47 |    :members:
48 | 


--------------------------------------------------------------------------------
/docs/source/pbs_batch.rst:
--------------------------------------------------------------------------------
 1 | .. _pbs_batch_section:
 2 | 
 3 | PBS Job Batch Submission
 4 | %%%%%%%%%%%%%%%%%%%%%%%%
 5 | 
 6 | The PBSBatch class is a tool launch many jobs simultaneously.
 7 | 
 8 | The basic steps are:
 9 | 
10 | 1. Instantiating a :class:`~pbs4py.pbs.PBS` that will be used to submit the jobs.
11 | 2. Creating a list of :class:`~pbs4py.pbs_batch.BatchJob` objects that hold the name of the job and a list of the commands to run.
12 | 3. Setting up the job directory with the appropriate input files.
13 | 4. Giving the ``PBS`` object and list of ``BatchJob`` to the :class:`~pbs4py.pbs_batch.PBSBatch` constructor and then calling one of the launch methods.
14 | 
15 | Setting up the Job Directories
16 | ==============================
17 | By default jobs are launched in directories with the same name as the job.
18 | This prevents concurrent jobs in the batch from overwriting each other's output files.
19 | 
20 | To set up a job, these directories can be created and populated with code like this:
21 | 
22 | .. code-block :: python
23 | 
24 |     batch = PBSBatch(pbs,jobs)
25 | 
26 |     batch.create_directories()
27 |     common_inputs_to_copy = ['fun3d.nml','*.cfg']
28 | 
29 |     for job in jobs:
30 |         for input in common_inputs_to_copy:
31 |             os.system(f'cp {input} {job.name}')
32 | 
33 | 
34 | Launch Methods
35 | ==============
36 | The batch jobs can be submitted with two different methods of the :class:`~pbs4py.pbs_batch.PBSBatch` class.
37 | 
38 | :func:`~pbs4py.pbs_batch.PBSBatch.launch_jobs_with_limit` will launch every job in the list,
39 | but it will only allow a certain number of jobs to be active in the queue system
40 | (queued, running, held) at a time. This would be the preferred launch method if
41 | you have many jobs and don't want to submit 100s of jobs into the queue at a time
42 | as a courtesy to your fellow HPC users.
43 | 
44 | :func:`~pbs4py.pbs_batch.PBSBatch.launch_all_jobs` will launch every job in the list.
45 | It has an optional argument to wait for the jobs to finish before returning or
46 | returning immediately after all of the jobs are submitted to the queue.
47 | 
48 | Batch Job Class
49 | ===============
50 | .. automodule:: pbs4py.pbs_batch
51 | 
52 | .. autoclass:: BatchJob
53 |    :members:
54 | 
55 | PBSBatch Class
56 | ==============
57 | 
58 | .. autoclass:: PBSBatch
59 |    :members:
60 | 


--------------------------------------------------------------------------------
/docs/source/scripts.rst:
--------------------------------------------------------------------------------
 1 | .. _pbs_script_section:
 2 | 
 3 | Scripts
 4 | %%%%%%%
 5 | 
 6 | Job Directory Script
 7 | ====================
 8 | 
 9 | .. argparse::
10 |     :ref: pbs4py.scripts.job_dir.arg_parser
11 |     :prog: job_dir.py
12 | 
13 | qdir alias to cd to job's directory
14 | -----------------------------------
15 | 
16 | This script to print the job directory can be used in combination with bash
17 | aliases to create a ``qdir`` alias for moving to the directory a job is running in
18 | 
19 | .. code-block:: bash
20 | 
21 |     qdirfun() { cd `job_dir.py $1`;}
22 |     alias qdir=qdirfun
23 | 
24 | Then in the shell instance you can do ``qdir {job_id}`` to move the job's run directory.
25 | 
26 | Qdel for User Jobs Script
27 | =========================
28 | 
29 | 
30 | .. argparse::
31 |     :ref: pbs4py.scripts.qdel_user_jobs.arg_parser
32 |     :prog: qdel_user_jobs.py
33 | 
34 | 
35 | Example
36 | -------
37 | The following command would delete the current users jobs that meet these conditions: PBS ids between 1000 and 2400,
38 | in the K3-standard queue, and have ``crm`` in the job name. By default the list of jobs will be
39 | printed to the screen asking the user for confirmation. Add ``--no-confirm`` would skip this step.
40 | 
41 | .. code-block:: bash
42 | 
43 |     qdel_user_jobs.py --id_range 1000 2400 --queue K3-standard --name crm
44 | 


--------------------------------------------------------------------------------
/examples/basic/launch.py:
--------------------------------------------------------------------------------
 1 | from pbs4py import PBS
 2 | 
 3 | k4 = PBS.k4(time=48)
 4 | k4.mpiexec = 'mpiexec_mpt'
 5 | k4.requested_number_of_nodes = 3
 6 | 
 7 | fun3d_command = 'nodet_mpi --gamma 1.14'
 8 | fun3d_mpi_command = k4.create_mpi_command(fun3d_command, output_root_name='dog')
 9 | 
10 | # list of commands that will be run in the pbs script
11 | pbs_commands = ['echo Start', fun3d_mpi_command, 'echo Done']
12 | 
13 | # submit and move on
14 | job_name = 'test_job'
15 | k4.launch(job_name, pbs_commands, blocking=False)
16 | 
17 | # submit and wait for job to finish before continuing script
18 | job_name = 'blocking_job'
19 | k4.launch(job_name, pbs_commands)
20 | 


--------------------------------------------------------------------------------
/examples/basic/nas.py:
--------------------------------------------------------------------------------
1 | from pbs4py import PBS
2 | 
3 | group = 'a1234'  # replace your charge number here
4 | nas = PBS.nas(group, proc_type='bro', queue_name='devel', time=2)
5 | commands = [nas.create_mpi_command('nodet_mpi', 'debug')]
6 | nas.requested_number_of_nodes = 4
7 | nas.write_job_file('devel.pbs', 'debug', commands)
8 | 


--------------------------------------------------------------------------------
/examples/batch_no_limit/launch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pbs4py import PBS, BatchJob, PBSBatch
 3 | 
 4 | 
 5 | pbs = PBS.k3()
 6 | pbs.requested_number_of_nodes = 1
 7 | 
 8 | jobs = []
 9 | for ijob in range(10):
10 |     name = f'sleep{ijob}'
11 |     commands = [f'sleep {ijob*10}',
12 |                 f'cat {name}.txt']
13 |     jobs.append(BatchJob(name, commands))
14 | 
15 | batch = PBSBatch(pbs, jobs)
16 | batch.create_directories()
17 | 
18 | for job in jobs:
19 |     # use job as context manager to enter directory with the name of job.name and write a file
20 |     with job:
21 |         os.system(f'echo "hello world" > {job.name}.txt')
22 | 
23 | batch.launch_all_jobs(wait_for_jobs_to_finish=True)
24 | print('Done.')
25 | 


--------------------------------------------------------------------------------
/examples/batch_with_job_limit/launch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pbs4py import PBS, PBSBatch, BatchJob
 3 | 
 4 | 
 5 | pbs = PBS.k3a()
 6 | pbs.requested_number_of_nodes = 1
 7 | 
 8 | jobs = []
 9 | for ijob in range(10):
10 |     name = f'sample{ijob}'
11 |     commands = [f'sleep {ijob*60}',
12 |                 f'cat {name}.txt']
13 |     jobs.append(BatchJob(name, commands))
14 | 
15 | batch = PBSBatch(pbs, jobs)
16 | batch.create_directories()
17 | 
18 | for job in jobs:
19 |     # use job as context manager to enter directory with the name of job.name and write a file
20 |     with job:
21 |         os.system(f'echo "hello world" > {job.name}.txt')
22 | 
23 | batch.launch_jobs_with_limit(max_jobs_at_a_time=3)
24 | 


--------------------------------------------------------------------------------
/examples/dependency_chain/launch.py:
--------------------------------------------------------------------------------
 1 | from pbs4py import PBS
 2 | 
 3 | k3 = PBS.k3(time=1)
 4 | k3.mem = '4gb'
 5 | k3.requested_number_of_nodes = 1
 6 | 
 7 | # test_job2 will wait until test_job1 is done before running
 8 | pbs_commands = ['echo Start', 'sleep 1m', 'echo Done']
 9 | pbs1_id = k3.launch('test_job1', pbs_commands, blocking=False)
10 | 
11 | new_commands = ['echo Start 2', 'sleep 2m', 'echo Done 2']
12 | k3.launch('test_job2', new_commands, blocking=False, dependency=pbs1_id)
13 | 


--------------------------------------------------------------------------------
/examples/hybrid_openmp_mpi/launch.py:
--------------------------------------------------------------------------------
 1 | from pbs4py import PBS
 2 | 
 3 | k4 = PBS.k4(time=48)
 4 | k4.mpiexec = 'mpiexec_mpt'
 5 | k4.requested_number_of_nodes = 2
 6 | 
 7 | fun3d_command = 'nodet_mpi'
 8 | fun3d_mpi_command = k4.create_mpi_command(fun3d_command, 'dog', openmp_threads=20)
 9 | 
10 | # commands that will be run in the pbs script
11 | pbs_commands = [fun3d_mpi_command]
12 | 
13 | # submit and wait for job to finish before continuing script
14 | k4.launch('omp_job', pbs_commands)
15 | 


--------------------------------------------------------------------------------
/examples/job_array/write_pbs_file.py:
--------------------------------------------------------------------------------
 1 | from pbs4py import PBS
 2 | 
 3 | k4 = PBS.k4(time=48)
 4 | k4.mpiexec = 'mpiexec_mpt'
 5 | k4.requested_number_of_nodes = 1
 6 | 
 7 | k4.array_range = '1-4'
 8 | 
 9 | command_list = [f'echo "Array job index = ${{PBS_ARRAY_INDEX}}"']
10 | 
11 | k4.write_job_file('test_array.pbs', 'test_array', command_list)
12 | 


--------------------------------------------------------------------------------
/pbs4py/__init__.py:
--------------------------------------------------------------------------------
1 | from .pbs import PBS
2 | from .fake_pbs import FakePBS
3 | from .pbs_batch import BatchJob, PBSBatch
4 | 


--------------------------------------------------------------------------------
/pbs4py/bsub.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List
  3 | 
  4 | from pbs4py.launcher_base import Launcher
  5 | 
  6 | 
  7 | class BSUB(Launcher):
  8 |     def __init__(self,
  9 |                  project: str,
 10 |                  ngpus_per_node: int = 6,
 11 |                  queue_node_limit: int = 1_000_000,
 12 |                  time: int = 72,
 13 |                  profile_filename: str = '~/.bashrc',
 14 |                  requested_number_of_nodes: int = 1):
 15 |         """
 16 |         A Class for creating and running jobs using the Department of Energy
 17 |         batch system.
 18 | 
 19 |         Parameters
 20 |         ----------
 21 |         project:
 22 |             The project which to charge for submitted jobs
 23 |         ngpu_per_node:
 24 |             The number of GPUs per compute node
 25 |         time:
 26 |             The requested wall time for the job(s) in hours
 27 |         profile_filename:
 28 |             The file setting the environment to source inside the PBS job
 29 |         """
 30 |         super().__init__(ngpus_per_node, ngpus_per_node, queue_node_limit,
 31 |                          time, profile_filename, requested_number_of_nodes)
 32 | 
 33 |         #: The project which to charge for submitted jobs
 34 |         self.project: str = project
 35 | 
 36 |         #: Mail a job report when complete
 37 |         self.mail_when_complete: bool = True
 38 | 
 39 |         self.profile_filename = profile_filename
 40 |         self.workdir_env_variable = '$LS_SUBCWD'
 41 |         self.batch_file_extension = 'lsf'
 42 |         self.mpiexec = 'jsrun'
 43 | 
 44 |     def create_mpi_command(self, command: str,
 45 |                            output_root_name: str,
 46 |                            openmp_threads: int = 1,
 47 |                            ranks_per_node: int = None) -> str:
 48 |         num_mpi_procs = self.requested_number_of_nodes * self.ngpus_per_node
 49 |         redirect_output = self._redirect_shell_output(f'{output_root_name}.out')
 50 |         command = f'{self.mpiexec} -n {num_mpi_procs} -a 1 -c {openmp_threads} -g 1 {command} {redirect_output}'
 51 |         return command
 52 | 
 53 |     def _create_list_of_standard_header_options(self, job_name: str) -> List[str]:
 54 |         header_lines = [self._create_hashbang(),
 55 |                         self._create_project_line_of_header(),
 56 |                         self._create_job_name_line_of_header(job_name),
 57 |                         self._create_number_of_nodes_line_of_header(),
 58 |                         self._create_wall_time_line_of_header()]
 59 |         return header_lines
 60 | 
 61 |     def _create_project_line_of_header(self) -> str:
 62 |         return f'#BSUB -P {self.project}'
 63 | 
 64 |     def _create_job_name_line_of_header(self, job_name: str) -> str:
 65 |         return f'#BSUB -J {job_name}'
 66 | 
 67 |     def _create_number_of_nodes_line_of_header(self) -> str:
 68 |         return f'#BSUB -nnodes {self.requested_number_of_nodes}'
 69 | 
 70 |     def _create_wall_time_line_of_header(self) -> str:
 71 |         return f'#BSUB -W {self.time}:00'
 72 | 
 73 |     def _create_list_of_optional_header_lines(self, dependency: str) -> List[str]:
 74 |         header_lines = []
 75 |         header_lines.extend(self._create_job_dependency_header_line(dependency))
 76 |         header_lines.extend(self._create_mail_header_line())
 77 |         return header_lines
 78 | 
 79 |     def _create_job_dependency_header_line(self, dependency: str) -> List[str]:
 80 |         if dependency is not None:
 81 |             return [f'#BSUB -w ended({dependency})']
 82 |         else:
 83 |             return []
 84 | 
 85 |     def _create_mail_header_line(self) -> List[str]:
 86 |         if self.mail_when_complete:
 87 |             return ['#BSUB -N']
 88 |         else:
 89 |             return []
 90 | 
 91 |     def _run_job(self, job_filename: str, blocking: bool, print_command_output: bool = True) -> str:
 92 |         if blocking:
 93 |             print('Warning: Blocking for bsub not implemented')
 94 | 
 95 |         command = f'bsub {job_filename}'
 96 |         if print_command_output:
 97 |             print(command)
 98 |         return os.popen(command).read()
 99 | 
100 |     def _parse_job_id_out_of_bsub_output(self, bsub_output: str) -> int:
101 |         return int(bsub_output.split('>')[0].split('<')[-1])
102 | 


--------------------------------------------------------------------------------
/pbs4py/directory_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | 
 4 | class cd:
 5 |     """Context manager for changing the current working directory"""
 6 |     def __init__(self, newPath):
 7 |         self.newPath = os.path.expanduser(newPath)
 8 | 
 9 |     def __enter__(self):
10 |         self.savedPath = os.getcwd()
11 |         os.chdir(self.newPath)
12 | 
13 |     def __exit__(self, etype, value, traceback):
14 |         os.chdir(self.savedPath)
15 | 


--------------------------------------------------------------------------------
/pbs4py/fake_pbs.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | import subprocess
 3 | from pbs4py import PBS
 4 | 
 5 | 
 6 | class FakePBS(PBS):
 7 |     """
 8 |     A fake PBS class for directly running commands while still calling as
 9 |     if it were a standard PBS driver.
10 |     This can be used to seemless switch between modes where PBS jobs are
11 |     launched for each "job", or using a FakePBS object when you don't want to
12 |     launch a new pbs job for each "job", e.g., driving a script
13 |     while already within the PBS job.
14 |     """
15 | 
16 |     def __init__(self, profile_filename='', stop_at_first_failure=False):
17 |         super().__init__(profile_filename=profile_filename)
18 |         self.stop_at_first_failure = stop_at_first_failure
19 | 
20 |     def launch(self, job_name: str, job_body: List[str],
21 |                blocking: bool = True, dependency: str = None) -> str:
22 |         """
23 |         Runs the commands in the job_body and determines if any failed
24 |         based on status flags
25 | 
26 |         Parameters
27 |         ----------
28 |         job_name:
29 |             [ignored]
30 |         job_body:
31 |             List of commands to run
32 |         blocking:
33 |             [ignored]
34 |         dependency:
35 |             [ignored]
36 | 
37 |         Returns
38 |         -------
39 |         pbs_command_output: str
40 |             Empty string but returning something to match true PBS launch output
41 |         """
42 | 
43 |         number_of_failures = 0
44 |         for line in job_body:
45 |             print(line)
46 |             process = subprocess.Popen(line, shell=True)
47 |             process.wait()
48 | 
49 |             if process.returncode != 0:
50 |                 number_of_failures += 1
51 |                 if self.stop_at_first_failure:
52 |                     break
53 |         return f'FakePBS.{number_of_failures}'
54 | 


--------------------------------------------------------------------------------
/pbs4py/job.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | from typing import List, Union
  4 | import subprocess
  5 | 
  6 | 
  7 | class PBSJob:
  8 |     def __init__(self, id: str):
  9 |         """
 10 |         A class for querying information and managing a particular submitted
 11 |         pbs job. For the id number in the constructor, the qstat command will
 12 |         be used to populate the attributes of the job.
 13 | 
 14 |         Parameters
 15 |         ----------
 16 |         id:
 17 |             The id of the PBS job
 18 |         """
 19 | 
 20 |         #: The ID of the PBS job
 21 |         self.id: str = id
 22 | 
 23 |         #: The name of the job
 24 |         self.name: str = ""
 25 | 
 26 |         #: The model attribute on the select line from the job submission
 27 |         self.model: str = ""
 28 | 
 29 |         #: The number of resources on the select line
 30 |         self.requested_number_of_nodes: int = 0
 31 | 
 32 |         #: The number of cpus for node
 33 |         self.ncpus_per_node = 0
 34 | 
 35 |         #: The queue which this job was submitted to
 36 |         self.queue: str = ""
 37 | 
 38 |         #: Whether the job is queued, running, or finished
 39 |         self.state: str = ""
 40 | 
 41 |         #: The value of $PBS_O_WORKDIR
 42 |         self.workdir: str = ""
 43 | 
 44 |         #: The exit status of the pbs job
 45 |         self.exit_status: int = None
 46 | 
 47 |         self.read_properties_from_qstat()
 48 | 
 49 |     def read_properties_from_qstat(self):
 50 |         """
 51 |         Use qstat to get the current attributes of this job
 52 |         """
 53 |         if "FakePBS" in self.id:
 54 |             self._read_properties_from_fake_pbs_launcher_job()
 55 |         else:
 56 |             self._read_properties_of_real_pbs_job()
 57 | 
 58 |     def _read_properties_of_real_pbs_job(self):
 59 |         qstat_output = self._run_qstat_to_get_full_job_attributes()
 60 |         if self._is_a_known_job(qstat_output):
 61 |             self._parse_attributes_from_qstat_output(qstat_output)
 62 |         else:
 63 |             self._set_empty_attributes()
 64 | 
 65 |     def _read_properties_from_fake_pbs_launcher_job(self):
 66 |         self.exit_status = int(self.id.split(".")[-1])
 67 | 
 68 |     def qdel(self, echo_command: bool = True) -> str:
 69 |         """
 70 |         Call qdel to delete this job
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         echo_command:
 75 |             Whether to print the command before running it
 76 | 
 77 |         Returns
 78 |         -------
 79 |         command_output: str
 80 |             The output of the shell command
 81 |         """
 82 |         command = f"qdel {self.id}"
 83 |         if echo_command:
 84 |             print(command)
 85 |         return os.popen(command).read()
 86 | 
 87 |     def tail_file_until_job_is_finished(self, file_to_tail: str):
 88 |         if self._this_job_was_launched_from_fake_pbs():
 89 |             # cat the file
 90 |             with open(file_to_tail, "r") as file:
 91 |                 for line in file:
 92 |                     print(line)
 93 |         else:
 94 |             # touch the file first
 95 |             if not os.path.exists(file_to_tail):
 96 |                 open(file_to_tail, "w").close()
 97 | 
 98 |             with open(file_to_tail, "r") as file:
 99 |                 for line in file:
100 |                     print(line)
101 |                 while True:
102 |                     line = file.readline()
103 |                     if line:
104 |                         print(line)
105 |                     else:
106 |                         # Sleep for a bit to avoid wasting resources
107 |                         time.sleep(0.1)
108 |                         if self._job_is_still_running_or_queued():
109 |                             continue
110 |                         else:
111 |                             for line in file:
112 |                                 print(line)
113 |                             break
114 | 
115 |     def update_job_state(self) -> str:
116 |         """
117 |         Get the job's status after it has been submitted.
118 |         Returns the entry of job_state in the qstat information, e.g.,
119 |         'Q', 'R', 'F', 'H', etc.
120 | 
121 |         """
122 |         self.read_properties_from_qstat()
123 | 
124 |     def get_exit_status(self) -> int:
125 |         qstat_output = self._run_qstat_to_get_full_job_attributes()
126 |         qstat_dict = self._convert_qstat_output_to_a_dictionary(qstat_output)
127 |         return qstat_dict.get("Exit_status")
128 | 
129 |     def _this_job_was_launched_from_fake_pbs(self):
130 |         return "FakePBS" in self.id
131 | 
132 |     def _job_is_still_running_or_queued(self):
133 |         self.update_job_state()
134 |         if self.state == "Q" or self.state == "R":
135 |             return True
136 |         else:
137 |             return False
138 | 
139 |     def _run_qstat_to_get_full_job_attributes(self) -> Union[List[str], str]:
140 |         result = subprocess.run(
141 |             ["qstat", "-xf", str(self.id)],
142 |             stdout=subprocess.PIPE,
143 |             stderr=subprocess.PIPE,
144 |             text=False,  # Disable automatic decoding
145 |         )
146 |         return result.stdout.decode("utf-8", errors="replace").split("\n")
147 | 
148 |     def _is_a_known_job(self, qstat_output):
149 |         return "Unknown Job Id" not in qstat_output
150 | 
151 |     def _parse_attributes_from_qstat_output(self, qstat_output: List[str]):
152 |         qstat_dict = self._convert_qstat_output_to_a_dictionary(qstat_output)
153 | 
154 |         self.name: str = qstat_dict["Job_Name"]
155 |         self.queue: str = qstat_dict["queue"]
156 |         self.state: str = qstat_dict["job_state"]
157 |         self.workdir = self._parse_workdir(qstat_dict)
158 | 
159 |         if "model" in qstat_dict["Resource_List.select"]:
160 |             self.model = qstat_dict["Resource_List.select"].split("model=")[-1]
161 |         else:
162 |             self.model = ""
163 |         self.requested_number_of_nodes = int(qstat_dict["Resource_List.select"].split(":")[0])
164 |         self.ncpus_per_node = int(
165 |             qstat_dict["Resource_List.select"].split("ncpus=")[-1].split(":")[0])
166 | 
167 |         self.exit_status: int = qstat_dict.get("Exit_status")
168 |         if self.exit_status is not None:
169 |             self.exit_status = int(self.exit_status)
170 | 
171 |         self.walltime_requested = self._convert_walltime_to_seconds(
172 |             qstat_dict["Resource_List.walltime"])
173 |         if self.state != "Q":
174 |             self.hostname = qstat_dict["exec_host"].split("/")[0]
175 |             self.walltime_used = qstat_dict.get("resources_used.walltime")
176 |             if self.walltime_used is not None:
177 |                 self.walltime_used = self._convert_walltime_to_seconds(self.walltime_used)
178 |                 self.walltime_remaining = self.walltime_requested - self.walltime_used
179 |             else:
180 |                 self.walltime_remaining = None
181 | 
182 |     def _convert_walltime_to_seconds(self, walltime: str):
183 |         walltime_split = walltime.split(":")
184 |         return 3600 * int(walltime_split[0]) + 60 * int(walltime_split[1]) + int(walltime_split[2])
185 | 
186 |     def _set_empty_attributes(self):
187 |         self.name = ""
188 |         self.model = ""
189 |         self.queue = ""
190 |         self.state = ""
191 |         self.workdir = ""
192 |         self.requested_number_of_nodes = 0
193 |         self.ncpus_per_node = 0
194 |         self.exit_status = None
195 | 
196 |     def _parse_workdir(self, qstat_dict: dict) -> str:
197 |         return qstat_dict["Variable_List"].split("PBS_O_WORKDIR=")[-1].split(",")[0]
198 | 
199 |     def _convert_qstat_output_to_a_dictionary(self, qstat_output: List[str]) -> dict:
200 |         qstat_dict = {}
201 |         current_key = None
202 |         current_value = []
203 | 
204 |         for line in qstat_output[1:]:
205 |             if len(line) == 0:
206 |                 continue
207 | 
208 |             if not self._is_a_continued_qstat_line(line):
209 |                 split_line = line.split("=", 1)
210 |                 current_key = split_line[0].strip()
211 |                 current_value = split_line[1].strip()
212 |                 qstat_dict[current_key] = current_value
213 |             else:
214 |                 qstat_dict[current_key] += line[1:].strip()
215 | 
216 |         return qstat_dict
217 | 
218 |     def _is_a_continued_qstat_line(self, line):
219 |         return line[0] == "\t"
220 | 


--------------------------------------------------------------------------------
/pbs4py/launcher_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | from typing import List
  4 | import numpy as np
  5 | 
  6 | 
  7 | class Launcher:
  8 |     def __init__(self, ncpus_per_node: int, ngpus_per_node: int, queue_node_limit: int, time: int,
  9 |                  profile_filename: str, requested_number_of_nodes: int):
 10 | 
 11 |         #: The hashbang line which sets the shell for the PBS script.
 12 |         #: If unset, the default is ``#!/usr/bin/env {self.shell}``.
 13 |         self.hashbang: str = None
 14 | 
 15 |         #: The shell flavor to use in the PBS job
 16 |         self.shell = 'bash'
 17 | 
 18 |         #: The maximum number nodes allowed by the queue
 19 |         self.queue_node_limit: int = queue_node_limit
 20 | 
 21 |         #: The number of compute nodes requested
 22 |         self.requested_number_of_nodes: int = requested_number_of_nodes
 23 | 
 24 |         #: The requested wall time for the pbs job(s) in hours
 25 |         self.time: int = time
 26 | 
 27 |         #: The number of CPU cores per node.
 28 |         self.ncpus_per_node: int = ncpus_per_node
 29 | 
 30 |         #: The number of GPUs per node.
 31 |         self.ngpus_per_node: int = ngpus_per_node
 32 | 
 33 |         #: The mpi execution command name: mpiexec, mpirun, mpiexec_mpt, etc.
 34 |         self.mpiexec: str = "mpiexec"
 35 |         self.mpiprocs_per_node = None
 36 | 
 37 |         #: Command line option for mpiexec to specify the number of MPI ranks for host/node.
 38 |         #: Default is to set it based on the mpiexec version.
 39 |         self.ranks_per_node_flag: str = None
 40 | 
 41 |         # these are properties that users typically don't need to set, but are
 42 |         # specific to each queueing software
 43 |         self.workdir_env_variable: str = ''
 44 |         self.profile_filename: str = ''
 45 |         self.batch_file_extension: str = ''
 46 | 
 47 |         #: If true, redirection of the output of mpi commands changed to tee
 48 |         self.tee_output: bool = False
 49 | 
 50 |         self.profile_filename = profile_filename
 51 | 
 52 |     @property
 53 |     def requested_number_of_nodes(self):
 54 |         """
 55 |         The number of nodes to request. That is, the 'select' number in the
 56 |         ``#PBS -l select={requested_number_of_nodes}:ncpus=40:mpiprocs=40``.
 57 | 
 58 |         :type: int
 59 |         """
 60 |         return self._requested_number_of_nodes
 61 | 
 62 |     @requested_number_of_nodes.setter
 63 |     def requested_number_of_nodes(self, number_of_nodes):
 64 |         self._requested_number_of_nodes = np.min((number_of_nodes, self.queue_node_limit))
 65 | 
 66 |     @property
 67 |     def mpiprocs_per_node(self):
 68 |         """
 69 |         The number of requested mpiprocs per node. If not set, the launcher will default
 70 |         to the number of cpus per node.
 71 |         ``#PBS -l select=1:ncpus=40:mpiprocs={mpiprocs_per_node}``.
 72 | 
 73 |         :type: int
 74 |         """
 75 |         if self._mpiprocs_per_node is None:
 76 |             return self.ncpus_per_node
 77 |         else:
 78 |             return self._mpiprocs_per_node
 79 | 
 80 |     @mpiprocs_per_node.setter
 81 |     def mpiprocs_per_node(self, mpiprocs):
 82 |         self._mpiprocs_per_node = mpiprocs
 83 | 
 84 |     @property
 85 |     def profile_filename(self):
 86 |         """
 87 |         The file to source at the start of the pbs script to set the environment.
 88 |         Typical names include '~/.profile', '~/.bashrc', and '~/.cshrc'.
 89 |         If you do not wish to source a file, set to ''.
 90 | 
 91 |         :type: str
 92 |         """
 93 |         return self._profile_filename
 94 | 
 95 |     @profile_filename.setter
 96 |     def profile_filename(self, profile_filename):
 97 |         if (profile_filename == '' or
 98 |                 os.path.isfile(os.path.expanduser(profile_filename))):
 99 |             self._profile_filename = profile_filename
100 |         else:
101 |             raise FileNotFoundError('Unable to set profile file.')
102 | 
103 |     def create_mpi_command(
104 |             self, command: str, output_root_name: str = None, openmp_threads: int = None,
105 |             ranks_per_node: int = None) -> str:
106 |         """
107 |         Wrap a command with mpiexec and route its standard and error output to a file
108 | 
109 |         Parameters
110 |         ----------
111 |         command:
112 |             The command thats needs to run in parallel
113 |         output_root_name:
114 |             The root name of the output file, {output_root_name}.out.
115 |         openmp_threads:
116 |             The number of openmp threads per mpi process.
117 |         ranks_per_node:
118 |             The number of MPI ranks per compute node.
119 | 
120 |         Returns
121 |         -------
122 |         full_command: str
123 |             The full command string.
124 |         """
125 |         omp_env_vars = self._determine_omp_settings(openmp_threads)
126 |         ranks_per_node_info = self._set_ranks_per_node_info(openmp_threads, ranks_per_node)
127 |         openmp_info = self._set_openmp_info(openmp_threads)
128 | 
129 |         full_command = [omp_env_vars, self.mpiexec, ranks_per_node_info, openmp_info, command]
130 |         if output_root_name is not None:
131 |             redirect_output = self._redirect_shell_output(f"{output_root_name}.out")
132 |             full_command.append(redirect_output)
133 |         return self._filter_empty_strings_from_list_and_combine(full_command)
134 | 
135 |     def launch(self, job_name: str, job_body: List[str],
136 |                blocking: bool = True, dependency: str = None) -> str:
137 |         """
138 |         Create a job script and launch the job
139 | 
140 |         Parameters
141 |         ----------
142 |         job_name:
143 |             The name of the job.
144 |         job_body:
145 |             List of commands to run in the body of the job.
146 |         blocking:
147 |             If true, this function will wait for the job to complete before returning.
148 |             If false, this function will launch the job but not wait for it to finish.
149 |         dependency:
150 |             Jobs that this one depends one. For PBS, these are colon separated in the string
151 | 
152 |         Returns
153 |         -------
154 |         command_output: str
155 |             The stdout of the launch command. If the job is successfully launch,
156 |             this will be the job id.
157 |         """
158 |         filename = f'{job_name}.{self.batch_file_extension}'
159 |         self.write_job_file(filename, job_name, job_body, dependency)
160 |         return self._run_job(filename, blocking)
161 | 
162 |     def write_job_file(self, job_filename: str, job_name: str,
163 |                        job_body: List[str], dependency: str = None):
164 |         """
165 |         Create a launch script file in the current directory for the commands defined in ``job_body``.
166 | 
167 |         Parameters
168 |         ----------
169 |         job_filename:
170 |             name of file to write to
171 |         job_name:
172 |             The name of the job.
173 |         job_body:
174 |             List of commands to run in the body of the job.
175 |         dependency:
176 |             Jobs that this one depends one. For PBS, these are colon separated in the string
177 |         """
178 |         with open(job_filename, mode='w') as fh:
179 |             header = self._create_header(job_name, dependency)
180 |             for line in header:
181 |                 fh.write(line + '\n')
182 | 
183 |             for _ in range(2):
184 |                 fh.write('\n')
185 | 
186 |             fh.write(f'cd {self.workdir_env_variable}\n')
187 |             if len(self.profile_filename) > 0:
188 |                 fh.write(f'source {self.profile_filename}\n')
189 | 
190 |             for _ in range(1):
191 |                 fh.write('\n')
192 | 
193 |             for line in job_body:
194 |                 fh.write(line + '\n')
195 | 
196 |     def _create_header(self, job_name: str, dependency: str = None) -> List[str]:
197 |         header = self._create_list_of_standard_header_options(job_name)
198 |         header.extend(self._create_list_of_optional_header_lines(dependency))
199 |         return header
200 | 
201 |     def _create_hashbang(self):
202 |         if self.hashbang is not None:
203 |             return self.hashbang
204 |         else:
205 |             return f'#!/usr/bin/env {self.shell}'
206 | 
207 |     def _create_list_of_standard_header_options(self, job_name: str) -> List[str]:
208 |         return ['']
209 | 
210 |     def _create_list_of_optional_header_lines(self, dependency: str) -> List[str]:
211 |         return ['']
212 | 
213 |     def _run_job(self, job_filename: str, blocking: bool, print_command_output: bool = True) -> str:
214 |         raise NotImplementedError('Launcher must implement a _run_job method')
215 | 
216 |     def _redirect_shell_output(self, output_filename):
217 |         if self.tee_output:
218 |             return f'2>&1 | tee {output_filename}'
219 | 
220 |         if self.shell == 'tcsh':
221 |             return f'>& {output_filename}'
222 |         else:
223 |             return f'&> {output_filename}'
224 | 
225 |     def _use_omplace_command(self) -> bool:
226 |         return self._using_mpt()
227 | 
228 |     def _use_openmp(self, openmp_threads: int = None):
229 |         if openmp_threads is not None:
230 |             if openmp_threads > 1:
231 |                 return True
232 |         return False
233 | 
234 |     def _using_mpt(self) -> bool:
235 |         if self.mpiexec == "mpiexec_mpt":
236 |             return True
237 | 
238 |         try:
239 |             output = subprocess.run(
240 |                 [self.mpiexec, "--version"],
241 |                 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
242 |             return "MPT" in output.stderr or "MPT" in output.stdout
243 |         except FileNotFoundError:
244 |             print(f"Executable '{self.mpiexec}' not found")
245 |             return False
246 | 
247 |     def _get_ranks_per_node_flag(self):
248 |         if self.ranks_per_node_flag is not None:
249 |             return self.ranks_per_node_flag
250 |         else:
251 |             if self._using_mpt():
252 |                 return "-perhost"
253 |             else:
254 |                 return "--npernode"
255 | 
256 |     def _determine_omp_settings(self, openmp_threads: int) -> str:
257 |         if openmp_threads is None:
258 |             return ""
259 | 
260 |         omp_env_vars = [f"OMP_NUM_THREADS={openmp_threads}"]
261 |         if not self._use_omplace_command():
262 |             omp_env_vars.extend(["OMP_PLACES=cores", "OMP_PROC_BIND=close"])
263 |         return self._filter_empty_strings_from_list_and_combine(omp_env_vars)
264 | 
265 |     def _filter_empty_strings_from_list_and_combine(self, lis: List[str]) -> str:
266 |         filtered_for_empty_strings = filter(None, lis)
267 |         return " ".join(filtered_for_empty_strings)
268 | 
269 |     def _set_ranks_per_node_info(self, openmp_threads: int, ranks_per_node: int) -> str:
270 |         if ranks_per_node is None and openmp_threads is None:
271 |             return ""
272 |         elif ranks_per_node is not None:
273 |             mpi_procs_per_node = ranks_per_node
274 |         else:  # openmp_threads is not None:
275 |             mpi_procs_per_node = self.ncpus_per_node // openmp_threads
276 | 
277 |         ranks_per_node_flag = self._get_ranks_per_node_flag()
278 |         ranks_per_proc_info = f"{ranks_per_node_flag} {mpi_procs_per_node}"
279 |         return ranks_per_proc_info
280 | 
281 |     def _set_openmp_info(self, openmp_threads: int) -> str:
282 |         if not self._use_openmp(openmp_threads):
283 |             return ""
284 | 
285 |         openmp_info = ""
286 |         if self._use_omplace_command():
287 |             proc_num_list = ",".join([str(i) for i in range(self.ncpus_per_node)])
288 |             openmp_info = f'omplace -c "{proc_num_list}" -nt {openmp_threads} -vv'
289 |         return openmp_info
290 | 


--------------------------------------------------------------------------------
/pbs4py/pbs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import subprocess
  4 | from typing import List, Union
  5 | import numpy as np
  6 | 
  7 | from pbs4py.launcher_base import Launcher
  8 | 
  9 | 
 10 | class PBS(Launcher):
 11 |     def __init__(
 12 |         self,
 13 |         queue_name: str = "K4-route",
 14 |         ncpus_per_node: int = 40,
 15 |         ngpus_per_node: int = 0,
 16 |         queue_node_limit: int = 10,
 17 |         time: int = 72,
 18 |         mem: str = None,
 19 |         profile_filename: str = "~/.bashrc",
 20 |         requested_number_of_nodes: int = 1,
 21 |     ):
 22 |         """
 23 |         | A class for creating and running pbs jobs. Default queue properties are for K4.
 24 |         | Defaults not set during instantiation can be adjusted by directly modifying attributes.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         queue_name:
 29 |             Queue name which goes on the "#PBS -N {name}" line of the pbs header
 30 |         ncpus_per_node:
 31 |             Number of CPU cores per node
 32 |         ngpus_per_node:
 33 |             Number of GPUs per node
 34 |         queue_node_limit:
 35 |             Maximum number of nodes allowed in this queue
 36 |         time:
 37 |             The requested job walltime in hours
 38 |         mem:
 39 |             The requested memory size. String to allow specifying in G, MB, etc.
 40 |         profile_file:
 41 |             The file setting the environment to source inside the PBS job. Set to
 42 |             '' if you do not wish to source a file.
 43 |         requested_number_of_nodes:
 44 |             The number of compute nodes to request
 45 |         """
 46 |         super().__init__(ncpus_per_node, ngpus_per_node, queue_node_limit,
 47 |                          time, profile_filename, requested_number_of_nodes)
 48 | 
 49 |         #: The name of the queue which goes on the ``#PBS -N {queue_name}``
 50 |         #: line of the pbs header
 51 |         self.queue_name: str = queue_name
 52 | 
 53 |         #: The processor model if it needs to be specified.
 54 |         #: The associated PBS header line is ``#PBS -l select=#:ncpus=#:mpiprocs=#:model={model}``
 55 |         #: If left as `None`, the ``:model={mode}`` will not be added to the header line
 56 |         self.model: Union[str, None] = None
 57 | 
 58 |         #: The group for the group_list entry of the pbs header if necessary.
 59 |         #: The associated PBS header line is ``#PBS -W group_list={group_list}``
 60 |         self.group_list: Union[str, None] = None
 61 | 
 62 |         #: Requested memory size on the select line. Need to include units in the str.
 63 |         #: The associated PBS header line is ``#PBS -l select=#:mem={mem}``
 64 |         self.mem: Union[str, None] = mem
 65 | 
 66 |         #: Index range for PBS array of jobs
 67 |         #: The associated PBS header line is ``#PBS -J {array_range}``
 68 |         self.array_range: Union[str, None] = None
 69 | 
 70 |         #: ``pbs -m`` mail options. 'e' at exit, 'b' at beginning, 'a' at abort
 71 |         self.mail_options: str = None
 72 | 
 73 |         #: ``pbs -M`` mail list. Who to email when mail_options are triggered
 74 |         self.mail_list: Union[str, None] = None
 75 | 
 76 |         #: Type of dependency if dependency active.
 77 |         #: Default is 'afterok' which only launches the new job if the previous one was successful.
 78 |         self.dependency_type: str = "afterok"
 79 | 
 80 |         self.mpiexec: str = "mpiexec"
 81 |         self.ranks_per_node_flag = None
 82 | 
 83 |         self.workdir_env_variable = "$PBS_O_WORKDIR"
 84 |         self.batch_file_extension = "pbs"
 85 |         self.requested_number_of_nodes = requested_number_of_nodes
 86 | 
 87 |     def _create_list_of_standard_header_options(self, job_name: str) -> List[str]:
 88 |         header_lines = [
 89 |             self._create_hashbang(),
 90 |             self._create_job_line_of_header(job_name),
 91 |             self._create_queue_line_of_header(),
 92 |             self._create_select_line_of_header(),
 93 |             self._create_walltime_line_of_header(),
 94 |             self._create_log_name_line_of_header(job_name),
 95 |             self._create_header_line_to_join_standard_and_error_output(),
 96 |             self._create_header_line_to_set_that_job_is_not_rerunnable(),
 97 |         ]
 98 |         return header_lines
 99 | 
100 |     def _create_job_line_of_header(self, job_name: str) -> str:
101 |         return f"#PBS -N {job_name}"
102 | 
103 |     def _create_queue_line_of_header(self) -> str:
104 |         return f"#PBS -q {self.queue_name}"
105 | 
106 |     def _create_select_line_of_header(self) -> str:
107 |         select = f"select={self.requested_number_of_nodes}"
108 |         ncpus = f"ncpus={self.ncpus_per_node}"
109 |         mpiprocs = f"mpiprocs={self.mpiprocs_per_node}"
110 | 
111 |         select_line = f"#PBS -l {select}:{ncpus}"
112 |         if self.ngpus_per_node > 0:
113 |             select_line += f":ngpus={self.ngpus_per_node}"
114 |         select_line += f":{mpiprocs}"
115 |         if self.mem is not None:
116 |             select_line += f":mem={self.mem}"
117 |         if self.model is not None:
118 |             select_line += f":model={self.model}"
119 |         return select_line
120 | 
121 |     def _create_walltime_line_of_header(self) -> str:
122 |         return f"#PBS -l walltime={self.time}:00:00"
123 | 
124 |     def _create_log_name_line_of_header(self, job_name: str) -> str:
125 |         return f"#PBS -o {job_name}_pbs.log"
126 | 
127 |     def _create_header_line_to_join_standard_and_error_output(self):
128 |         return "#PBS -j oe"
129 | 
130 |     def _create_header_line_to_set_that_job_is_not_rerunnable(self) -> str:
131 |         return "#PBS -r n"
132 | 
133 |     def _create_list_of_optional_header_lines(self, dependency):
134 |         header_lines = []
135 |         header_lines.extend(self._create_group_list_header_line())
136 |         header_lines.extend(self._create_array_range_header_line())
137 |         header_lines.extend(self._create_mail_options_header_lines())
138 |         header_lines.extend(self._create_job_dependencies_header_line(dependency))
139 |         return header_lines
140 | 
141 |     def _create_group_list_header_line(self) -> List[str]:
142 |         if self.group_list is not None:
143 |             return [f"#PBS -W group_list={self.group_list}"]
144 |         else:
145 |             return []
146 | 
147 |     def _create_array_range_header_line(self) -> List[str]:
148 |         if self.array_range is not None:
149 |             return [f"#PBS -J {self.array_range}"]
150 |         else:
151 |             return []
152 | 
153 |     def _create_mail_options_header_lines(self) -> List[str]:
154 |         header_lines = []
155 |         if self.mail_options is not None:
156 |             header_lines.append(f"#PBS -m {self.mail_options}")
157 |         if self.mail_list is not None:
158 |             header_lines.append(f"#PBS -M {self.mail_list}")
159 |         return header_lines
160 | 
161 |     def _create_job_dependencies_header_line(self, dependency) -> List[str]:
162 |         if dependency is not None:
163 |             return [f"#PBS -W depend={self.dependency_type}:{dependency}"]
164 |         else:
165 |             return []
166 | 
167 |     def _run_job(self, job_filename: str, blocking: bool, print_command_output=True) -> str:
168 |         options = ""
169 |         if blocking:
170 |             options += "-W block=true"
171 |         command_output = os.popen(f"qsub {options} {job_filename}").read().strip()
172 |         if print_command_output:
173 |             print(command_output)
174 |         return command_output
175 | 
176 |     # Alternate constructors for NASA HPC queues
177 |     @classmethod
178 |     def k4(cls, time: int = 72, profile_filename: str = "~/.bashrc", requested_number_of_nodes: int = 1):
179 |         """
180 |         Constructor for the K4 queues on LaRC's K cluster including K4-standard-512.
181 | 
182 |         Parameters
183 |         ----------
184 |         time:
185 |             The requested job walltime in hours
186 |         profile_file:
187 |             The file setting the environment to source inside the PBS job
188 |         requested_number_of_nodes:
189 |             The number of compute nodes to request
190 |         """
191 |         return cls(
192 |             queue_name="K4-route",
193 |             ncpus_per_node=40,
194 |             queue_node_limit=16,
195 |             time=time,
196 |             profile_filename=profile_filename,
197 |             requested_number_of_nodes=requested_number_of_nodes,
198 |         )
199 | 
200 |     @classmethod
201 |     def k3c(cls, time: int = 72, profile_filename: str = "~/.bashrc", requested_number_of_nodes: int = 1):
202 |         """
203 |         Constructor for the K3b queues on LaRC's K cluster.
204 | 
205 |         Parameters
206 |         ----------
207 |         time:
208 |             The requested job walltime in hours
209 |         profile_file:
210 |             The file setting the environment to source inside the PBS job
211 |         requested_number_of_nodes:
212 |             The number of compute nodes to request
213 |         """
214 |         return cls(
215 |             queue_name="K3c-route",
216 |             ncpus_per_node=28,
217 |             queue_node_limit=74,
218 |             time=time,
219 |             profile_filename=profile_filename,
220 |             requested_number_of_nodes=requested_number_of_nodes,
221 |         )
222 | 
223 |     @classmethod
224 |     def k3b(cls, time: int = 72, profile_filename: str = "~/.bashrc", requested_number_of_nodes: int = 1):
225 |         """
226 |         Constructor for the K3b queues on LaRC's K cluster.
227 | 
228 |         Parameters
229 |         ----------
230 |         time:
231 |             The requested job walltime in hours
232 |         profile_file:
233 |             The file setting the environment to source inside the PBS job
234 |         requested_number_of_nodes:
235 |             The number of compute nodes to request
236 |         """
237 |         return cls(
238 |             queue_name="K3b-route",
239 |             ncpus_per_node=28,
240 |             queue_node_limit=74,
241 |             time=time,
242 |             profile_filename=profile_filename,
243 |             requested_number_of_nodes=requested_number_of_nodes,
244 |         )
245 | 
246 |     @classmethod
247 |     def k3a(cls, time: int = 72, profile_filename: str = "~/.bashrc", requested_number_of_nodes: int = 1):
248 |         """
249 |         Constructor for the K3a queue on LaRC's K cluster.
250 | 
251 |         Parameters
252 |         ----------
253 |         time:
254 |             The requested job walltime in hours
255 |         profile_file:
256 |             The file setting the environment to source inside the PBS job
257 |         requested_number_of_nodes:
258 |             The number of compute nodes to request
259 |         """
260 |         return cls(
261 |             queue_name="K3a-route",
262 |             ncpus_per_node=16,
263 |             queue_node_limit=25,
264 |             time=time,
265 |             profile_filename=profile_filename,
266 |             requested_number_of_nodes=requested_number_of_nodes,
267 |         )
268 | 
269 |     @classmethod
270 |     def k4_v100(
271 |         cls,
272 |         time: int = 72,
273 |         ncpus_per_node=0,
274 |         ngpus_per_node=4,
275 |         mem="200G",
276 |         profile_filename: str = "~/.bashrc",
277 |         requested_number_of_nodes: int = 1,
278 |     ):
279 |         if ncpus_per_node == 0:
280 |             ncpus_per_node = ngpus_per_node
281 |         return cls(
282 |             queue_name="K4-V100",
283 |             ncpus_per_node=ncpus_per_node,
284 |             ngpus_per_node=ngpus_per_node,
285 |             queue_node_limit=4,
286 |             time=time,
287 |             mem=mem,
288 |             profile_filename=profile_filename,
289 |             requested_number_of_nodes=requested_number_of_nodes,
290 |         )
291 | 
292 |     @classmethod
293 |     def k5_a100_80(
294 |         cls,
295 |         time: int = 72,
296 |         ncpus_per_node=0,
297 |         ngpus_per_node=8,
298 |         mem="700G",
299 |         profile_filename: str = "~/.bashrc",
300 |         requested_number_of_nodes: int = 1,
301 |     ):
302 |         if ncpus_per_node == 0:
303 |             ncpus_per_node = ngpus_per_node
304 |         return cls(
305 |             queue_name="K5-A100-80",
306 |             ncpus_per_node=ncpus_per_node,
307 |             ngpus_per_node=ngpus_per_node,
308 |             queue_node_limit=2,
309 |             time=time,
310 |             mem=mem,
311 |             profile_filename=profile_filename,
312 |             requested_number_of_nodes=requested_number_of_nodes,
313 |         )
314 | 
315 |     @classmethod
316 |     def k5_a100_40(
317 |         cls,
318 |         time: int = 72,
319 |         ncpus_per_node=0,
320 |         ngpus_per_node=8,
321 |         mem="700G",
322 |         profile_filename: str = "~/.bashrc",
323 |         requested_number_of_nodes: int = 1,
324 |     ):
325 |         if ncpus_per_node == 0:
326 |             ncpus_per_node = ngpus_per_node
327 |         return cls(
328 |             queue_name="K5-A100-40",
329 |             ncpus_per_node=ncpus_per_node,
330 |             ngpus_per_node=ngpus_per_node,
331 |             queue_node_limit=2,
332 |             time=time,
333 |             mem=mem,
334 |             profile_filename=profile_filename,
335 |             requested_number_of_nodes=requested_number_of_nodes,
336 |         )
337 | 
338 |     @classmethod
339 |     def nas(
340 |         cls,
341 |         group_list: str,
342 |         proc_type: str = "broadwell",
343 |         queue_name: str = "long",
344 |         time: int = 72,
345 |         mem: str = None,
346 |         profile_filename: str = "~/.bashrc",
347 |         requested_number_of_nodes: int = 1,
348 |     ):
349 |         """
350 |         Constructor for the queues at NAS. Must specify the group_list
351 | 
352 |         Parameters
353 |         ----------
354 |         group_list:
355 |             The charge number or group for the group_list entry of the pbs header.
356 |             The associated PBS header line is "#PBS -W group_list={group_list}".
357 |         proc_type:
358 |             The type of processor to submit to. Can write out or just the first 3 letters:
359 |             'cas', 'sky', 'bro', 'has', 'ivy', 'san'.
360 |         queue_name:
361 |             Which queue to submit to: devel, debug, normal, long, etc.
362 |         time:
363 |             The requested job walltime in hours
364 |         profile_file:
365 |             The file setting the environment to source inside the PBS job
366 |         """
367 |         if "sky_gpu" in proc_type.lower():
368 |             ncpus_per_node = 36
369 |             ngpus_per_node = 4
370 |             model = "sky_gpu"
371 |             mem = "200G"
372 |         elif "cas_gpu" in proc_type.lower():
373 |             ncpus_per_node = 48
374 |             ngpus_per_node = 4
375 |             model = "cas_gpu"
376 |             mem = "200G"
377 |         elif "rom_gpu" in proc_type.lower():
378 |             ncpus_per_node = 128
379 |             ngpus_per_node = 8
380 |             model = "rom_gpu"
381 |             mem = "700G"
382 |         elif "mil_a100" in proc_type.lower():
383 |             ncpus_per_node = 64
384 |             ngpus_per_node = 4
385 |             model = "mil_a100"
386 |             mem = "500G"
387 |         elif "cas" in proc_type.lower():
388 |             ncpus_per_node = 40
389 |             ngpus_per_node = 0
390 |             model = "cas_ait"
391 |         elif "sky" in proc_type.lower():
392 |             ncpus_per_node = 40
393 |             ngpus_per_node = 0
394 |             model = "sky_ele"
395 |         elif "bro" in proc_type.lower():
396 |             ncpus_per_node = 28
397 |             ngpus_per_node = 0
398 |             model = "bro"
399 |         elif "has" in proc_type.lower():
400 |             ncpus_per_node = 24
401 |             ngpus_per_node = 0
402 |             model = "has"
403 |         elif "ivy" in proc_type.lower():
404 |             ncpus_per_node = 20
405 |             ngpus_per_node = 0
406 |             model = "ivy"
407 |         elif "san" in proc_type.lower():
408 |             ncpus_per_node = 16
409 |             ngpus_per_node = 0
410 |             model = "san"
411 |         elif "rom" in proc_type.lower():
412 |             ncpus_per_node = 128
413 |             ngpus_per_node = 0
414 |             model = "rom_ait"
415 |         elif "mil" in proc_type.lower():
416 |             ncpus_per_node = 128
417 |             ngpus_per_node = 0
418 |             model = "mil_ait"
419 |         else:
420 |             raise ValueError("Unknown NAS processor selection")
421 | 
422 |         pbs = cls(
423 |             queue_name=queue_name,
424 |             ncpus_per_node=ncpus_per_node,
425 |             ngpus_per_node=ngpus_per_node,
426 |             queue_node_limit=int(1e6),
427 |             time=time,
428 |             mem=mem,
429 |             profile_filename=profile_filename,
430 |             requested_number_of_nodes=requested_number_of_nodes,
431 |         )
432 | 
433 |         pbs.group_list = group_list
434 |         pbs.model = model
435 |         return pbs
436 | 
437 |     @classmethod
438 |     def cf1(
439 |         cls,
440 |         account: str,
441 |         queue_name: str = "normal",
442 |         queue_node_limit: int = 30,
443 |         time: int = 24,
444 |         ncpus_per_node=64,
445 |         profile_filename: str = "~/.bashrc",
446 |         requested_number_of_nodes: int = 2,
447 |     ):
448 |         pbs = cls(
449 |             queue_name=queue_name,
450 |             queue_node_limit=queue_node_limit,
451 |             ncpus_per_node=ncpus_per_node,
452 |             time=time,
453 |             profile_filename=profile_filename,
454 |             requested_number_of_nodes=requested_number_of_nodes,
455 |         )
456 |         pbs.group_list = account
457 |         pbs.workdir_env_variable = "$SLURM_SUBMIT_DIR"
458 |         return pbs
459 | 


--------------------------------------------------------------------------------
/pbs4py/pbs_batch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import time
  4 | from datetime import datetime
  5 | from typing import List
  6 | from .pbs import PBS
  7 | from .directory_utils import cd
  8 | 
  9 | 
 10 | class BatchJob:
 11 |     def __init__(self, name: str, body: List[str]):
 12 |         """
 13 |         Class for individual PBS jobs within a batch of jobs
 14 | 
 15 |         Can be used as a context manager to enter/exit a directory
 16 |         with the job's name
 17 |         """
 18 | 
 19 |         #: str: Name of the job.
 20 |         self.name = name
 21 | 
 22 |         #: List[str]: list of commands to run in PBS job
 23 |         self.body = body
 24 | 
 25 |         #: str: pbs job identifier returned by qsub
 26 |         self.id = None
 27 | 
 28 |     def get_pbs_job_state(self) -> str:
 29 |         """
 30 |         Get the job's status after it has been submitted.
 31 |         Returns the entry of job_state in the qstat information, e.g.,
 32 |         'Q', 'R', 'F', 'H', etc.
 33 | 
 34 |         """
 35 |         if self.id is not None:
 36 |             return os.popen(f'qstat -xf {self.id} | grep job_state').read().split()[-1]
 37 |         else:
 38 |             return ''
 39 | 
 40 |     def __enter__(self):
 41 |         self.savedPath = os.getcwd()
 42 |         os.chdir(self.name)
 43 | 
 44 |     def __exit__(self, etype, value, traceback):
 45 |         os.chdir(self.savedPath)
 46 | 
 47 | 
 48 | class PBSBatch:
 49 |     def __init__(self, pbs: PBS, jobs: List[BatchJob], use_separate_directories: bool = True):
 50 |         """
 51 |         Batch of PBS jobs. Assumes all jobs required the same
 52 |         job request size. By default, separate directories with the job's name
 53 |         will be used to separate output files.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         pbs:
 58 |             PBS handler that will be used to submit the jobs
 59 |         jobs:
 60 |             List of Job objects that will be run
 61 |         use_separate_directories:
 62 |             whether to run each job in a separate directory with the job's name
 63 |         """
 64 |         self.pbs = pbs
 65 |         self.jobs = jobs
 66 |         self.use_separate_directories = use_separate_directories
 67 | 
 68 |     def create_directories(self):
 69 |         """
 70 |         Create the set of directories with the jobs' names
 71 |         """
 72 |         for job in self.jobs:
 73 |             if not os.path.exists(job.name):
 74 |                 os.mkdir(job.name)
 75 | 
 76 |     def launch_all_jobs(self, wait_for_jobs_to_finish: bool = False,
 77 |                         check_frequency_in_secs: float = 30):
 78 |         """
 79 |         Launch of the all of the jobs in the list. Stores the pbs
 80 |         job id in the job objects
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         wait_for_jobs_to_finish:
 85 |             If True, the jobs will be submitted, and this function will not return
 86 |             until all of the jobs are finished.
 87 | 
 88 |         check_frequency_in_secs:
 89 |             Time interval to wait before checking if all jobs are done. Only relevant
 90 |             if ``wait_for_jobs_to_finish`` is True.
 91 |         """
 92 |         self._launch_jobs_in_a_list(self.jobs)
 93 |         if wait_for_jobs_to_finish:
 94 |             self.wait_for_all_jobs_to_finish(check_frequency_in_secs=check_frequency_in_secs)
 95 | 
 96 |     def launch_jobs_with_limit(self, max_jobs_at_a_time: int = 20,
 97 |                                check_frequency_in_secs: float = 30):
 98 |         """
 99 |         The "courteous" version of launch_all_jobs(wait_for_jobs_to_finish=True) and where a limit
100 |         is set for the maximum number of jobs running or in the queue at a time since some people
101 |         may not like if you submit 1000 jobs at once.
102 | 
103 |         Parameters
104 |         ----------
105 |         max_jobs_at_a_time:
106 |             Limit for number of jobs to have queued, running, or held at a time
107 | 
108 |         check_frequency_in_secs:
109 |             Time interval to wait before checking if jobs' statuses.
110 |         """
111 |         total_num_of_jobs = len(self.jobs)
112 | 
113 |         next_job_to_submit = 0
114 |         while True:
115 |             states = self._get_job_states(self.jobs[:next_job_to_submit])
116 |             num_active_jobs = self._count_number_of_jobs_running_queued_or_held(states)
117 |             if num_active_jobs < max_jobs_at_a_time:
118 |                 end_index = np.min(
119 |                     (total_num_of_jobs, next_job_to_submit + max_jobs_at_a_time - num_active_jobs))
120 |                 self._launch_jobs_in_a_list(self.jobs[next_job_to_submit:end_index])
121 |                 next_job_to_submit = end_index
122 | 
123 |             states = self._get_job_states(self.jobs[:next_job_to_submit])
124 |             self._print_summary_of_job_states(states)
125 |             if self._all_jobs_submitted(next_job_to_submit):
126 |                 if not self._any_jobs_are_still_running_queued_or_held(states):
127 |                     break
128 |             time.sleep(check_frequency_in_secs)
129 | 
130 |     def wait_for_all_jobs_to_finish(self, check_frequency_in_secs: float = 30):
131 |         """
132 |         A blocking check for all the jobs in the batch to finish. Can be paired with
133 |         ``launch_all_jobs``.
134 | 
135 |         Parameters
136 |         ----------
137 |         check_frequency_in_secs:
138 |             How often to check and print the jobs' states
139 |         """
140 |         while True:
141 |             states = self._get_job_states(self.jobs)
142 |             self._print_summary_of_job_states(states)
143 |             if self._any_jobs_are_still_running_queued_or_held(states):
144 |                 time.sleep(check_frequency_in_secs)
145 |             else:
146 |                 break
147 | 
148 |     def _launch_jobs_in_a_list(self, jobs: List[BatchJob]):
149 |         for job in jobs:
150 |             dirname = job.name if self.use_separate_directories else '.'
151 |             with cd(dirname):
152 |                 job.id = self.pbs.launch(job.name, job.body, blocking=False)
153 | 
154 |     def _all_jobs_submitted(self, next_job_to_submit):
155 |         return next_job_to_submit == len(self.jobs)
156 | 
157 |     def _get_job_states(self, jobs: List[BatchJob]) -> List[str]:
158 |         states = []
159 |         for job in jobs:
160 |             states.append(job.get_pbs_job_state())
161 |         return states
162 | 
163 |     def _count_number_of_jobs_running_queued_or_held(self, pbs_states: List[str]):
164 |         return pbs_states.count('R') + pbs_states.count('Q') + pbs_states.count('H')
165 | 
166 |     def _any_jobs_are_still_running_queued_or_held(self, pbs_states):
167 |         return self._count_number_of_jobs_running_queued_or_held(pbs_states) > 0
168 | 
169 |     def _print_summary_of_job_states(self, states: List[str]):
170 |         running = states.count('R')
171 |         queued = states.count('Q')
172 |         finished = states.count('F')
173 |         other = len(states) - running - queued - finished
174 |         print(f'Job states at {datetime.now().isoformat()}:')
175 |         print(f'  Queued:        {queued}')
176 |         print(f'  Running:       {running}')
177 |         print(f'  Finished:      {finished}')
178 | 
179 |         num_of_jobs_not_yet_submitted = len(self.jobs)-len(states)
180 |         if num_of_jobs_not_yet_submitted > 0:
181 |             print(f'  Yet to submit: {num_of_jobs_not_yet_submitted}')
182 |         print(f'  Other:         {other}')
183 | 


--------------------------------------------------------------------------------
/pbs4py/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/pbs4py/19a130db07d21358fd02954ef79ca38c61b8c811/pbs4py/scripts/__init__.py


--------------------------------------------------------------------------------
/pbs4py/scripts/job_dir.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | A script to print the directory that a job is running in.
 4 | """
 5 | import argparse
 6 | from pbs4py.job import PBSJob
 7 | 
 8 | 
 9 | def arg_parser() -> argparse.ArgumentParser:
10 |     parser = argparse.ArgumentParser(
11 |         description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
12 | 
13 |     parser.add_argument('job_id', help='The ID number of the job')
14 |     return parser
15 | 
16 | 
17 | def main():
18 |     parser = arg_parser()
19 | 
20 |     args = parser.parse_args()
21 |     job = PBSJob(args.job_id)
22 |     print(job.workdir)
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/pbs4py/scripts/qdel_user_jobs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | A script to delete active PBS jobs of the current user.
  4 | The list of jobs to be deleted can be filtered by id range, job name substring, and queue.
  5 | For safety, the default behavior is to show the user which jobs will be deleted and ask for confirmation
  6 | before any jobs are deleted.
  7 | """
  8 | import os
  9 | import argparse
 10 | import re
 11 | from typing import List
 12 | 
 13 | from pbs4py.job import PBSJob
 14 | 
 15 | 
 16 | def arg_parser() -> argparse.ArgumentParser:
 17 |     parser = argparse.ArgumentParser(
 18 |         description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 19 | 
 20 |     parser.add_argument('--id_range',
 21 |                         nargs=2,
 22 |                         default=(-1, -1),
 23 |                         help='Delete jobs in a range of id numbers, [min id, max id]')
 24 |     parser.add_argument('--queue',
 25 |                         default='',
 26 |                         help='Delete jobs in a specific queue')
 27 |     parser.add_argument('--name',
 28 |                         default='',
 29 |                         help='Delete jobs in a specific string in the name')
 30 |     parser.add_argument('--confirm',
 31 |                         action='store_true',
 32 |                         dest='confirm',
 33 |                         help='Whether to prompt the user for confirmation of before deleting')
 34 |     parser.add_argument('--no-confirm', dest='confirm', action='store_false')
 35 |     parser.set_defaults(confirm=True)
 36 |     return parser
 37 | 
 38 | 
 39 | def get_active_jobs_for_user():
 40 |     user_name = os.environ.get('USER')
 41 |     qstat_output = os.popen(f'qstat -u {user_name}').read().split('\n')
 42 | 
 43 |     # remove header from qstat command
 44 |     qstat_output = qstat_output[3:]
 45 | 
 46 |     jobs = []
 47 |     for line in qstat_output:
 48 |         if line:
 49 |             id = int(re.match('\s*[0-9]+', line)[0])
 50 |             jobs.append(PBSJob(str(id)))
 51 |     return jobs
 52 | 
 53 | 
 54 | def filter_jobs_to_delete_by_id_range(user_jobs: List[PBSJob], min_id: int, max_id: int):
 55 |     return [job for job in user_jobs if (job.id >= min_id and job.id <= max_id)]
 56 | 
 57 | 
 58 | def filter_jobs_to_delete_by_queue(user_jobs: List[PBSJob], queue: str):
 59 |     return [job for job in user_jobs if job.queue == queue]
 60 | 
 61 | 
 62 | def filter_jobs_to_delete_by_name_substring(user_jobs: List[PBSJob], name_substring: str):
 63 |     return [job for job in user_jobs if name_substring in job.name]
 64 | 
 65 | 
 66 | def delete_jobs(jobs: List[PBSJob]):
 67 |     for job in jobs:
 68 |         job.qdel(echo_command=True)
 69 | 
 70 | 
 71 | def print_jobs_that_will_be_deleted(jobs_to_delete: List[PBSJob]):
 72 |     print('Found the following jobs:')
 73 |     print('------------------------')
 74 |     for job in jobs_to_delete:
 75 |         print(f'Job: id = {job.id}, name = {job.name}, queue: {job.queue}')
 76 | 
 77 | 
 78 | def user_confirms():
 79 |     prompt = 'Delete these jobs? [y/n]'
 80 |     valid = {"yes": True, "y": True, "no": False, "n": False}
 81 | 
 82 |     while True:
 83 |         print(prompt)
 84 |         choice = input().lower()
 85 |         if choice in valid:
 86 |             return valid[choice]
 87 |         else:
 88 |             print("Please respond with 'yes' or 'no' ", "(or 'y' or 'n').\n")
 89 | 
 90 | 
 91 | def main():
 92 |     parser = arg_parser()
 93 | 
 94 |     args = parser.parse_args()
 95 |     confirm = args.confirm
 96 |     min_id = int(args.id_range[0])
 97 |     max_id = int(args.id_range[1])
 98 |     queue = args.queue
 99 |     name_substring = args.name
100 | 
101 |     jobs_to_delete = get_active_jobs_for_user()
102 | 
103 |     if min_id > 0 and max_id > 0:
104 |         print('Filtering by id range')
105 |         jobs_to_delete = filter_jobs_to_delete_by_id_range(jobs_to_delete, min_id, max_id)
106 |     if queue:
107 |         print('Filtering by queue')
108 |         jobs_to_delete = filter_jobs_to_delete_by_queue(jobs_to_delete, queue)
109 |     if name_substring:
110 |         print('Filtering by name')
111 |         jobs_to_delete = filter_jobs_to_delete_by_name_substring(jobs_to_delete, name_substring)
112 | 
113 |     if len(jobs_to_delete) == 0:
114 |         print(f'No active jobs found for user with specified filters')
115 |         exit()
116 | 
117 |     if confirm:
118 |         print_jobs_that_will_be_deleted(jobs_to_delete)
119 |         if user_confirms():
120 |             delete_jobs(jobs_to_delete)
121 |         else:
122 |             print('Skipping')
123 | 
124 |     else:
125 |         delete_jobs(jobs_to_delete)
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     main()
130 | 


--------------------------------------------------------------------------------
/pbs4py/slurm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | This is a SLURM class to be used with pbs4py and was modified from pbs.py
  5 | 
  6 | Written/Copied by Matt Opgenorth
  7 | '''
  8 | import os
  9 | from typing import List, Union
 10 | 
 11 | from pbs4py.launcher_base import Launcher
 12 | 
 13 | 
 14 | class SLURM(Launcher):
 15 |     def __init__(
 16 |         self,
 17 |         queue_name: str = "normal",
 18 |         ncpus_per_node: int = 64,
 19 |         ngpus_per_node: int = 0,
 20 |         queue_node_limit: int = 30,
 21 |         time: int = 24,
 22 |         mem: str = None,
 23 |         profile_filename: str = "~/.bashrc",
 24 |         requested_number_of_nodes: int = 1,
 25 |     ):
 26 |         """
 27 |         | A class for creating and running slurm jobs.
 28 |         | Defaults not set during instantiation can be adjusted by directly modifying attributes.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         queue_name:
 33 |             Queue name which goes on the "#SBATCH --partition {queue_name}" line of the slurm header
 34 |         ncpus_per_node:
 35 |             Number of CPU cores per node
 36 |         ngpus_per_node:
 37 |             Number of GPUs per node
 38 |         queue_node_limit:
 39 |             Maximum number of nodes allowed in this queue
 40 |         time:
 41 |             The requested job walltime in hours
 42 |         mem:
 43 |             The requested memory size. String to allow specifying in G, MB, etc.
 44 |         profile_file:
 45 |             The file setting the environment to source inside the SLURM job. Set to
 46 |             '' if you do not wish to source a file.
 47 |         requested_number_of_nodes:
 48 |             The number of compute nodes to request
 49 |         """
 50 |         super().__init__(ncpus_per_node, ngpus_per_node, queue_node_limit,
 51 |                          time, profile_filename, requested_number_of_nodes)
 52 | 
 53 |         #: The name of the queue which goes on the ``#SBATCH --partition {queue_name}``
 54 |         #: line of the slurm header
 55 |         self.queue_name: str = queue_name
 56 | 
 57 |         #: The account for the account entry of the slurm header if necessary.
 58 |         #: The associated SLURM header line is ``#SBATCH --account={account}``
 59 |         self.account: str = None
 60 | 
 61 |         #: Requested memory size on the select line. Need to include units in the str.
 62 |         #: The associated SLURM header line is ``#SBATCH --mem={mem}``
 63 |         self.mem: Union[str, None] = mem
 64 | 
 65 |         #: Index range for SLURM array of jobs
 66 |         #: The associated SLURM header line is ``#SBATCH --array={array_range}``
 67 |         self.array_range: Union[str, None] = None
 68 | 
 69 |         #: ``sbatch --mail-type`` mail options. BEGIN, END, FAIL
 70 |         self.mail_options: str = None
 71 | 
 72 |         #: ``sbatch --mail-user`` mail list. Who to email when mail_options are triggered
 73 |         self.mail_list: Union[str, None] = None
 74 | 
 75 |         #: Type of dependency if dependency active.
 76 |         #: Default is 'afterok' which only launches the new job if the previous one was successful.
 77 |         self.dependency_type: str = "afterok"
 78 | 
 79 |         self.mpiexec: str = "mpiexec"
 80 |         self.ranks_per_node_flag: str = None
 81 | 
 82 |         self.workdir_env_variable = "$SLURM_SUBMIT_DIR"
 83 |         self.batch_file_extension = "slurm"
 84 |         self.mpiprocs_per_node = None
 85 |         self.requested_number_of_nodes = requested_number_of_nodes
 86 | 
 87 |         #: nodelist
 88 |         self.nodelist: str = None
 89 | 
 90 |     def _create_list_of_standard_header_options(self, job_name: str) -> List[str]:
 91 |         header_lines = [
 92 |             self._create_hashbang(),
 93 |             self._create_job_line_of_header(job_name),
 94 |             self._create_queue_line_of_header(),
 95 |             self._create_nodes_line_of_header(),
 96 |             self._create_tasks_per_node_line_of_header(),
 97 |             self._create_walltime_line_of_header(),
 98 |             self._create_log_name_line_of_header(job_name),
 99 |             self._create_header_line_to_error_output(job_name),
100 |             self._create_header_line_to_set_that_job_is_not_rerunnable(),
101 |         ]
102 |         return header_lines
103 | 
104 |     def _create_job_line_of_header(self, job_name: str) -> str:
105 |         return f"#SBATCH --job-name={job_name}"
106 | 
107 |     def _create_queue_line_of_header(self) -> str:
108 |         return f"#SBATCH --partition={self.queue_name}"
109 | 
110 |     def _create_nodes_line_of_header(self) -> str:
111 |         return f'#SBATCH --nodes={self.requested_number_of_nodes}'
112 | 
113 |     def _create_tasks_per_node_line_of_header(self) -> str:
114 |         return f'#SBATCH --ntasks-per-node={self.ncpus_per_node}'
115 | 
116 |     def _create_walltime_line_of_header(self) -> str:
117 |         return f"#SBATCH --time={self.time}:00:00"
118 | 
119 |     def _create_log_name_line_of_header(self, job_name: str) -> str:
120 |         return f"#SBATCH --output=qlog_{job_name}"
121 | 
122 |     def _create_header_line_to_error_output(self, job_name: str):
123 |         return f"#SBATCH --error=err_{job_name}"
124 | 
125 |     def _create_header_line_to_set_that_job_is_not_rerunnable(self) -> str:
126 |         return "#SBATCH --no-requeue"
127 | 
128 |     def _create_list_of_optional_header_lines(self, dependency):
129 |         header_lines = []
130 |         header_lines.extend(self._create_account_header_line())
131 |         header_lines.extend(self._create_array_range_header_line())
132 |         header_lines.extend(self._create_mail_options_header_lines())
133 |         header_lines.extend(self._create_job_dependencies_header_line(dependency))
134 |         header_lines.extend(self._create_nodelist_header_line())
135 |         return header_lines
136 | 
137 |     def _create_account_header_line(self) -> List[str]:
138 |         if self.account is not None:
139 |             return [f"#SBATCH --account={self.account}"]
140 |         else:
141 |             return []
142 | 
143 |     def _create_array_range_header_line(self) -> List[str]:
144 |         if self.array_range is not None:
145 |             return [f"#SBATCH --array={self.array_range}"]
146 |         else:
147 |             return []
148 | 
149 |     def _create_mail_options_header_lines(self) -> List[str]:
150 |         header_lines = []
151 |         if self.mail_options is not None:
152 |             header_lines.append(f"#SBATCH --mail-type={self.mail_options}")
153 |         if self.mail_list is not None:
154 |             header_lines.append(f"#SBATCH --mail-user={self.mail_list}")
155 |         return header_lines
156 | 
157 |     def _create_job_dependencies_header_line(self, dependency) -> List[str]:
158 |         if dependency is not None:
159 |             return [f"#SBATCH --dependency={self.dependency_type}:{dependency}"]
160 |         else:
161 |             return []
162 | 
163 |     def _create_nodelist_header_line(self) -> List[str]:
164 |         if self.nodelist is not None:
165 |             return [f"#SBATCH --nodelist={self.nodelist}"]
166 |         else:
167 |             return []
168 | 
169 |     def _run_job(self, job_filename: str, blocking: bool, print_command_output=True) -> str:
170 |         options = ""
171 |         if blocking:
172 |             options += "-W"
173 |         command_output = os.popen(f"sbatch {options} {job_filename}").read().strip()
174 |         if print_command_output:
175 |             print(command_output)
176 |         return command_output
177 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "pbs4py"
 7 | version = "1.1.0"
 8 | description = "PBS scripting utilities"
 9 | readme = "README.md"
10 | requires-python = ">=3.6"
11 | authors = [{ name = "Kevin Jacobson", email = "kevin.e.jacobson@nasa.gov" }]
12 | dependencies = ["numpy"]
13 | license = { file = "LICENSE"}
14 | 
15 | [project.scripts]
16 | "qdel_user_jobs.py" = "pbs4py.scripts.qdel_user_jobs:main"
17 | "job_dir.py" = "pbs4py.scripts.job_dir:main"
18 | 


--------------------------------------------------------------------------------
/tests/job_test/empty_file:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/pbs4py/19a130db07d21358fd02954ef79ca38c61b8c811/tests/job_test/empty_file


--------------------------------------------------------------------------------
/tests/pbs_test_files/golden0.lsf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env tcsh
 2 | #BSUB -P ard149
 3 | #BSUB -J test_job
 4 | #BSUB -nnodes 2
 5 | #BSUB -W 24:00
 6 | #BSUB -N
 7 | 
 8 | 
 9 | cd $LS_SUBCWD
10 | source_line
11 | 
12 | command1
13 | command2
14 | 


--------------------------------------------------------------------------------
/tests/pbs_test_files/golden0.pbs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | #PBS -N test_job
 3 | #PBS -q queue
 4 | #PBS -l select=1:ncpus=5:mpiprocs=5
 5 | #PBS -l walltime=24:00:00
 6 | #PBS -o test_job_pbs.log
 7 | #PBS -j oe
 8 | #PBS -r n
 9 | 
10 | 
11 | cd $PBS_O_WORKDIR
12 | source_line
13 | 
14 | command1
15 | command2
16 | 


--------------------------------------------------------------------------------
/tests/pbs_test_files/golden0.slurm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | #SBATCH --job-name=test_job
 3 | #SBATCH --partition=queue
 4 | #SBATCH --nodes=4
 5 | #SBATCH --ntasks-per-node=20
 6 | #SBATCH --time=13:00:00
 7 | #SBATCH --output=qlog_test_job
 8 | #SBATCH --error=err_test_job
 9 | #SBATCH --no-requeue
10 | 
11 | 
12 | cd $SLURM_SUBMIT_DIR
13 | source_line
14 | 
15 | command1
16 | command2
17 | 


--------------------------------------------------------------------------------
/tests/test_bsub.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from typing import List
 4 | from pbs4py.bsub import BSUB
 5 | 
 6 | test_directory = os.path.dirname(os.path.abspath(__file__))
 7 | test_profile = f'{test_directory}/testing_bashrc'
 8 | 
 9 | 
10 | def check_list_of_strings(actual: List[str], expected: List[str]):
11 |     assert len(actual) == len(expected)
12 |     for a, e in zip(actual, expected):
13 |         assert a == e
14 | 
15 | 
16 | @pytest.fixture
17 | def bsub_header_test():
18 |     project = 'ard149'
19 |     ngpu = 5
20 |     time = 24
21 |     hashbang = '#!/usr/bin/tcsh'
22 |     bsub_header_test = BSUB(project, ngpus_per_node=ngpu, time=time,
23 |                             profile_filename=test_profile)
24 |     bsub_header_test.hashbang = hashbang
25 |     bsub_header_test.requested_number_of_nodes = 2
26 |     return bsub_header_test
27 | 
28 | 
29 | def test_walltime_line(bsub_header_test: BSUB):
30 |     bsub_header_test.time = 5
31 |     line = bsub_header_test._create_wall_time_line_of_header()
32 |     assert line == '#BSUB -W 5:00'
33 | 
34 | 
35 | def test_number_of_nodes_line(bsub_header_test: BSUB):
36 |     line = bsub_header_test._create_number_of_nodes_line_of_header()
37 |     assert line == '#BSUB -nnodes 2'
38 | 
39 | 
40 | def test_job_line(bsub_header_test: BSUB):
41 |     job_name = 'test'
42 |     line = bsub_header_test._create_job_name_line_of_header(job_name)
43 |     assert line == '#BSUB -J test'
44 | 
45 | 
46 | def test_project_line(bsub_header_test: BSUB):
47 |     bsub_header_test.project = 'ard149'
48 |     line = bsub_header_test._create_project_line_of_header()
49 |     assert line == '#BSUB -P ard149'
50 | 
51 | 
52 | def test_mail_header(bsub_header_test: BSUB):
53 |     bsub_header_test.mail_when_complete = False
54 |     header = bsub_header_test._create_mail_header_line()
55 |     check_list_of_strings(header, [])
56 | 
57 |     bsub_header_test.mail_when_complete = True
58 |     header = bsub_header_test._create_mail_header_line()
59 |     check_list_of_strings(header, ['#BSUB -N'])
60 | 
61 | 
62 | def test_job_dependency_header(bsub_header_test: BSUB):
63 |     header = bsub_header_test._create_job_dependency_header_line(None)
64 |     check_list_of_strings(header, [])
65 | 
66 |     header = bsub_header_test._create_job_dependency_header_line('1234')
67 |     check_list_of_strings(header, ['#BSUB -w ended(1234)'])
68 | 
69 | 
70 | def test_parse_job_id_from_bsub_output(bsub_header_test: BSUB):
71 |     output = 'Job <1983914> is submitted to default queue <batch>.'
72 |     id = bsub_header_test._parse_job_id_out_of_bsub_output(output)
73 |     assert id == 1983914
74 | 
75 | 
76 | def test_create_command(bsub_header_test: BSUB):
77 |     bsub_header_test.ngpus_per_node = 3
78 |     bsub_header_test.requested_number_of_nodes = 2
79 |     command = bsub_header_test.create_mpi_command('a.out', 'dog', openmp_threads=2)
80 |     assert command == 'jsrun -n 6 -a 1 -c 2 -g 1 a.out &> dog.out'
81 | 


--------------------------------------------------------------------------------
/tests/test_bsub_regression.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import filecmp
 3 | from pbs4py.bsub import BSUB
 4 | 
 5 | test_directory = os.path.dirname(os.path.abspath(__file__))
 6 | test_profile = f'{test_directory}/testing_bashrc'
 7 | 
 8 | 
 9 | def modify_golden_file_to_have_right_path_for_profile(golden_file: str, profile_filename: str):
10 |     with open(golden_file, 'r') as fh:
11 |         golden_file_contents = fh.readlines()
12 |     golden_file_with_profile = []
13 |     for line in golden_file_contents:
14 |         if line == 'source_line\n':
15 |             golden_file_with_profile.append(f'source {profile_filename}\n')
16 |         else:
17 |             golden_file_with_profile.append(line)
18 |     golden_mod = f'{test_directory}/test_output_files/golden_mod.lsf'
19 |     with open(golden_mod, 'w') as fh:
20 |         fh.writelines(golden_file_with_profile)
21 |     return golden_mod
22 | 
23 | 
24 | def test_write_job_file_regression_check():
25 |     golden_file = f'{test_directory}/pbs_test_files/golden0.lsf'
26 |     project = 'ard149'
27 |     time = 24
28 |     shell = 'tcsh'
29 |     bsub = BSUB(project, time=time, profile_filename=test_profile)
30 |     bsub.shell = shell
31 |     bsub.requested_number_of_nodes = 2
32 | 
33 |     job_name = 'test_job'
34 |     job_body = ['command1', 'command2']
35 |     bsub_file = f'{test_directory}/test_output_files/test.lsf'
36 |     bsub.write_job_file(bsub_file, job_name, job_body)
37 | 
38 |     golden_mod = modify_golden_file_to_have_right_path_for_profile(
39 |         golden_file, bsub.profile_filename)
40 | 
41 |     assert filecmp.cmp(bsub_file, golden_mod)
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     test_write_job_file_regression_check()
46 | 


--------------------------------------------------------------------------------
/tests/test_fake_pbs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pbs4py import FakePBS
 3 | 
 4 | test_directory = os.path.dirname(os.path.abspath(__file__))
 5 | 
 6 | 
 7 | def test_fakePBS():
 8 |     pbs = FakePBS(profile_filename=f'{test_directory}/testing_bashrc')
 9 |     job_name = 'test'
10 | 
11 |     file = 'fake_file.txt'
12 |     assert not os.path.isfile(f'{test_directory}/{file}')
13 |     job_body = [f'touch {test_directory}/{file}']
14 |     job_id = pbs.launch(job_name, job_body)
15 |     assert os.path.isfile(f'{test_directory}/{file}')
16 |     assert job_id == 'FakePBS.0'
17 |     os.system(f'rm {test_directory}/{file}')
18 | 


--------------------------------------------------------------------------------
/tests/test_job.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pbs4py.job import PBSJob
  3 | 
  4 | class FakeKJob(PBSJob):
  5 |     def _run_qstat_to_get_full_job_attributes(self):
  6 |         stand_in_output = [
  7 |         "Job Id: 4259576.pbssrv1",
  8 |         "    Job_Name = oat_steady_l6",
  9 |         "    Job_Owner = kejacob1@k4-li1-ib0",
 10 |         "    job_state = Q",
 11 |         "    queue = K4-standard",
 12 |         "    server = pbssrv1",
 13 |         "    Checkpoint = u",
 14 |         "    ctime = Thu Oct 24 12:26:31 2024",
 15 |         "    Error_Path = k4-li1-ib0:/lustre3/hpnobackup2/kejacob1/projects/rca/buffet/c",
 16 |         "	ases/oat15a/ncfv_rans_pointwise_revised_grid_cc/grid_l6/aoa3.6/steady/o",
 17 |         "	at_steady_l6.e4259576",
 18 |         "    Hold_Types = n",
 19 |         "    Join_Path = oe",
 20 |         "    Keep_Files = n",
 21 |         "    Mail_Points = a",
 22 |         "    mtime = Thu Oct 24 12:26:32 2024",
 23 |         "    Output_Path = k4-li1-ib0.ccf-beowulf.ndc.nasa.gov:/lustre3/hpnobackup2/keja",
 24 |         "	cob1/projects/rca/buffet/cases/oat15a/ncfv_rans_pointwise_revised_grid_",
 25 |         "	cc/grid_l6/aoa3.6/steady/oat_steady_l6_pbs.log",
 26 |         "    Priority = 0",
 27 |         "    qtime = Thu Oct 24 12:26:31 2024",
 28 |         "    Rerunable = False",
 29 |         "    Resource_List.mem = 96000mb",
 30 |         "    Resource_List.mpiprocs = 400",
 31 |         "    Resource_List.ncpus = 400",
 32 |         "    Resource_List.nodect = 10",
 33 |         "    Resource_List.nodegroup = K4-open",
 34 |         "    Resource_List.place = scatter:excl",
 35 |         "    Resource_List.select = 10:ncpus=40:mpiprocs=40",
 36 |         "    Resource_List.walltime = 72:00:00",
 37 |         "    substate = 10",
 38 |         "    Variable_List = PBS_O_HOME=/u/kejacob1,PBS_O_LANG=C,PBS_O_LOGNAME=kejacob1,",
 39 |         "	PBS_O_PATH=/hpnobackup2/kejacob1/projects/cbse/cbse_clean/cbse/build_o",
 40 |         "	pt/bin:/u/kejacob1/.local/bin:/u/kejacob1/bin:/usr/local/pkgs-viz/cuda_",
 41 |         "	12.2.2/bin:/usr/local/pkgs-viz/cuda_12.2.2/nvvm/bin:/usr/local/pkgs-viz",
 42 |         "	/cuda_12.2.2/nsight-systems-2023.2.3/bin:/u/shared/fun3d/fun3d_users/mo",
 43 |         "	dules/ParMETIS/4.0.3-mpt-2.25-intel_2019.5.281/bin:/opt/hpe/hpc/mpt/mpt",
 44 |         "	-2.25/bin:/usr/local/pkgs-modules/intel_2019/inspector/bin64:/usr/local",
 45 |         "	/pkgs-modules/intel_2019/advisor/bin64:/usr/local/pkgs-modules/intel_20",
 46 |         "	19/compilers_and_libraries_2019.5.281/linux/bin/intel64:/usr/local/pkgs",
 47 |         "	-modules/intel_2019/vtune_amplifier/bin64:/hpnobackup2/shared/kejacob1/",
 48 |         "	modules/gdb/python_3.9.5/bin:/usr/local/pkgs-modules/Python_3.9.5/bin:/",
 49 |         "	hpnobackup2/shared/kejacob1/modules/clang-format/16.0.6/bin:/usr/local/",
 50 |         "	pkgs-modules/gcc_8.2.0/bin:/usr/local/pkgs-modules/autoconf_2.72/bin:/u",
 51 |         "	sr/local/pkgs/modules_4.2.4/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:",
 52 |         "	/sbin,PBS_O_MAIL=/var/spool/mail/kejacob1,PBS_O_SHELL=/bin/bash,",
 53 |         "	PBS_O_WORKDIR=/lustre3/hpnobackup2/kejacob1/projects/rca/buffet/cases/",
 54 |         "	oat15a/ncfv_rans_pointwise_revised_grid_cc/grid_l6/aoa3.6/steady,",
 55 |         "	PBS_O_SYSTEM=Linux,PBS_O_QUEUE=K4-route,PBS_O_HOST=k4-li1-ib0",
 56 |         "    comment = Not Running: Queue K4-standard per-user limit reached on resource",
 57 |         "	 ncpus",
 58 |         "    etime = Thu Oct 24 12:26:31 2024",
 59 |         "    eligible_time = 00:00:01",
 60 |         "    Submit_arguments = oat_steady_l6.pbs",
 61 |         "    project = _pbs_project_default",
 62 |         "    Submit_Host = k4-li1-ib0"]
 63 |         return stand_in_output
 64 | 
 65 | class FakeKJobOld(PBSJob):
 66 |     def _run_qstat_to_get_full_job_attributes(self):
 67 |         stand_in_output = [
 68 |             "    Job: 2493765.pbssrv2", "Job_Name = sample0", "Job_Owner = kejacob1@k4-li1-ib0",
 69 |             "resources_used.cpupercent = 100", "resources_used.cput = 00:00:02",
 70 |             "resources_used.mem = 1528kb", "resources_used.ncpus = 16",
 71 |             "resources_used.vmem = 15936kb", "resources_used.walltime = 00:00:02", "job_state = F",
 72 |             "queue = K3a-standard", "server = pbssrv2", "Checkpoint = u",
 73 |             "ctime = 1649348639 (Thu Apr 07 12:23:59 EDT 2022)",
 74 |             "Error_Path = k4-li1-ib0:/lustre3/hpnobackup2/kejacob1/projects/cad_to_solution/pbs4py/examples/batch_with_job_limit/sample0/sample0.e2493765",
 75 |             "exec_host = k3ar5n1/0*16", "exec_vnode = (k3ar5n1:ncpus=16)", "Hold_Types = n",
 76 |             "Join_Path = oe", "Keep_Files = n", "Mail_Points = a",
 77 |             "mtime = 1649348653 (Thu Apr 07 12:24:13 EDT 2022)",
 78 |             "Output_Path = k4-li1-ib0.ccf-beowulf.larc.nasa.gov:/lustre3/hpnobackup2/kejacob1/projects/cad_to_solution/pbs4py/examples/batch_with_job_limit/sample0/sample0_pbs.log",
 79 |             "Priority = 0", "qtime = 1649348639 (Thu Apr 07 12:23:59 EDT 2022)",
 80 |             "Rerunable = False", "Resource_List.mem = 31gb", "Resource_List.mpiprocs = 16",
 81 |             "Resource_List.ncpus = 16", "Resource_List.nodect = 1",
 82 |             "Resource_List.nodegroup = K3a-open", "Resource_List.place = scatter:excl",
 83 |             "Resource_List.select = 1:ncpus=16:mpiprocs=16", "Resource_List.walltime = 72:00:00",
 84 |             "stime = 1649348640 (Thu Apr 07 12:24:00 EDT 2022)", "session_id = 22053",
 85 |             "jobdir = /u/kejacob1", "substate = 92",
 86 |             "Variable_List = PBS_O_SYSTEM=Linux,PBS_O_SHELL=/bin/bash,PBS_O_HOME=/u/kejacob1,PBS_O_HOST=k4-li1-ib0,PBS_O_LOGNAME=kejacob1,PBS_O_WORKDIR=/lustre3/hpnobackup2/kejacob1/projects/cad_to_solution/pbs4py/examples/batch_with_job_limit/sample0,PBS_O_LANG=C,PBS_O_PATH=/usr/local/pkgs-viz/cuda_11.0.167/bin:/usr/local/pkgs-viz/cuda_11.0.167/nvvm/bin:/u/kejacob1/bin/gdb/bin:/u/kejacob1/.local/bin:/u/kejacob1/bin:/usr/local/pkgs-modules/cmake_3.6.3/bin:/usr/local/pkgs-modules/intel_2018.0.033/inspector/bin64:/usr/local/pkgs-modules/intel_2018.0.033/advisor/bin64:/usr/local/pkgs-modules/intel_2018.0.033/compilers_and_libraries_2018.3.222/linux/bin/intel64:/usr/local/pkgs-modules/intel_2018.0.033/vtune_amplifier/bin64:/usr/local/pkgs-modules/intel_2018.0.033/compilers_and_libraries_2018.3.222/debugger_2018/gdb/intel64/bin:/usr/local/pkgs-modules/openmpi_3.0.1_intel_2018/bin:/usr/local/pkgs-modules/gcc_6.2.0/bin:/usr/local/pkgs-modules/tecplot360ex-2018R1/bin:/usr/local/pkgs-modules/tecplot360ex-2018R2/360ex_2018r2/bin:/lustre3/hpnobackup2/kejacob1/projects/post2/post2_env/bin:/usr/local/pkgs-modules/Python_3.7.1/bin:/usr/local/pkgs/modules_4.2.4/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin,PBS_O_QUEUE=K3a-route,PBS_O_MAIL=/var/spool/mail/kejacob1",
 87 |             "comment = Job run at Thu Apr 07 at 12:24 on (k3ar5n1:ncpus=16) and finished",
 88 |             "etime = 1649348639 (Thu Apr 07 12:23:59 EDT 2022)", "run_count = 1",
 89 |             "eligible_time = 00:00:00", "Stageout_status = 1", "Exit_status = 0",
 90 |             "Submit_arguments = <jsdl-hpcpa:Argument>sample0.pbs</jsdl-hpcpa:Argument>",
 91 |             "history_timestamp = 1649348653", "project = _pbs_project_default"]
 92 |         return stand_in_output
 93 | 
 94 | 
 95 | class FakeNASJob(PBSJob):
 96 |     def _run_qstat_to_get_full_job_attributes(self):
 97 |         stand_in_output = [
 98 |             "Job: 13198744.pbspl1.nas.nasa.gov",
 99 |             "    Job_Name = C006ste",
100 |             "    Job_Owner = kejacob1@pfe23.nas.nasa.gov",
101 |             "    job_state = Q",
102 |             "    queue = devel",
103 |             "    server = pbspl1.nas.nasa.gov",
104 |             "    Checkpoint = u",
105 |             "    ctime = 1649355753 (Thu Apr 07 11:22:33 PDT 2022)",
106 |             "    Error_Path = pfe23.nas.nasa.gov:/nobackup/kejacob1/projects/sfe/support/C006ste.e13198744",
107 |             "    group_list = c1454",
108 |             "    Hold_Types = n",
109 |             "    Join_Path = oe",
110 |             "    Keep_Files = n",
111 |             "    Mail_Points = a",
112 |             "    mtime = 1649355753 (Thu Apr 07 11:22:33 PDT 2022)",
113 |             "    Output_Path = pfe23.nas.nasa.gov:/nobackup/kejacob1/projects/sfe/support/C006ste.o13198744",
114 |             "    Priority = 0",
115 |             "    qtime = 1649355753 (Thu Apr 07 11:22:33 PDT 2022)",
116 |             "    Rerunable = False",
117 |             "    Resource_List.mpiprocs = 640",
118 |             "    Resource_List.ncpus = 640",
119 |             "    Resource_List.nobackupp2 = 1",
120 |             "    Resource_List.nodect = 16",
121 |             "    Resource_List.place = scatter:excl",
122 |             "    Resource_List.select = 16:ncpus=40:mpiprocs=40:model=sky_ele",
123 |             "    Resource_List.walltime = 02:00:00",
124 |             "    schedselect = 16:ncpus=40:mpiprocs=40:model=sky_ele:aoe=toss3:bigmem=False:reboot=free",
125 |             "    substate = 10",
126 |             "    Variable_List = PBS_O_MAIL=/var/mail/kejacob1,PBS_O_PATH=/home1/kejacob1/.local/bin:/home1/kejacob1/bin:/nasa/pkgsrc/toss3/2021Q2/views/python/3.9.5/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/bin:/usr/X11R6/bin:/PBS/bin:/usr/sbin:/sbin:/opt/c3/bin:/opt/sgi/sbin:/opt/sgi/bin,PBS_O_HOME=/home1/kejacob1,PBS_O_SHELL=/bin/bash,PBS_O_TZ=PST8PDT,PBS_O_SYSTEM=Linux,PBS_O_LOGNAME=kejacob1,PBS_O_LANG=C,PBS_O_WORKDIR=/nobackup/kejacob1/projects/sfe/support,PBS_O_QUEUE=devel,PBS_O_HOST=pfe23.nas.nasa.gov",
127 |             "    euser = kejacob1",
128 |             "    egroup = c1454",
129 |             "    queue_type = E",
130 |             "    etime = 1649355753 (Thu Apr 07 11:22:33 PDT 2022)",
131 |             "    eligible_time = 00:00:00",
132 |             "    Submit_arguments = <jsdl-hpcpa:Argument>pfe.pbs</jsdl-hpcpa:Argument>",
133 |             "    project = _pbs_project_default",
134 |             "    Submit_Host = pfe23.nas.nasa.gov"]
135 |         return stand_in_output
136 | 
137 | 
138 | class FakeUnknownJob(PBSJob):
139 |     def _run_qstat_to_get_full_job_attributes(self) -> str:
140 |         return 'qstat: Unknown Job Id 123456.pbssrv2'
141 | 
142 | 
143 | def test_read_K_properties_from_qstat():
144 |     job = FakeKJob('2493761')
145 | 
146 |     assert job.id == '2493761'
147 |     assert job.name == 'oat_steady_l6'
148 |     assert job.queue == 'K4-standard'
149 |     assert job.state == 'Q'
150 |     assert job.workdir == '/lustre3/hpnobackup2/kejacob1/projects/rca/buffet/cases/oat15a/ncfv_rans_pointwise_revised_grid_cc/grid_l6/aoa3.6/steady'
151 |     assert job.model == ''
152 |     assert job.ncpus_per_node == 40
153 |     assert job.requested_number_of_nodes == 10
154 | 
155 | def test_read_K_old_properties_from_qstat():
156 |     job = FakeKJobOld('2493765')
157 | 
158 |     assert job.id == '2493765'
159 |     assert job.name == 'sample0'
160 |     assert job.queue == 'K3a-standard'
161 |     assert job.state == 'F'
162 |     assert job.workdir == '/lustre3/hpnobackup2/kejacob1/projects/cad_to_solution/pbs4py/examples/batch_with_job_limit/sample0'
163 |     assert job.model == ''
164 |     assert job.ncpus_per_node == 16
165 |     assert job.requested_number_of_nodes == 1
166 | 
167 | 
168 | def test_read_NAS_properties_from_qstat():
169 |     job = FakeNASJob('13198744')
170 | 
171 |     assert job.id == '13198744'
172 |     assert job.name == 'C006ste'
173 |     assert job.queue == 'devel'
174 |     assert job.state == 'Q'
175 |     assert job.workdir == '/nobackup/kejacob1/projects/sfe/support'
176 |     assert job.model == 'sky_ele'
177 |     assert job.ncpus_per_node == 40
178 |     assert job.requested_number_of_nodes == 16
179 | 
180 | 
181 | def test_unknown_job():
182 |     job = FakeUnknownJob('123456')
183 |     assert job.id == '123456'
184 |     assert job.name == ''
185 |     assert job.queue == ''
186 |     assert job.state == ''
187 |     assert job.workdir == ''
188 |     assert job.model == ''
189 |     assert job.ncpus_per_node == 0
190 |     assert job.requested_number_of_nodes == 0
191 | 


--------------------------------------------------------------------------------
/tests/test_launch_base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from pbs4py.launcher_base import Launcher
 4 | 
 5 | test_directory = os.path.dirname(os.path.abspath(__file__))
 6 | test_profile = f'{test_directory}/testing_bashrc'
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def launcher():
11 |     return Launcher(1, 1, 1, 1, test_profile, 1)
12 | 
13 | 
14 | def test_profile_file_checking():
15 |     real_file = 'pbs4py_unit_test_dummy.txt'
16 |     os.system(f'touch {real_file}')
17 | 
18 |     launcher = Launcher(1, 1, 1, 1, real_file, 1)
19 |     assert launcher.profile_filename == real_file
20 |     os.system(f'rm {real_file}')
21 | 
22 |     nonexistant_file = "i_am_not_a_file.xyz"
23 |     with pytest.raises(FileNotFoundError):
24 |         launcher.profile_filename = nonexistant_file
25 | 
26 | 
27 | def test_output_redirection(launcher: Launcher):
28 |     launcher.shell = 'tcsh'
29 |     assert launcher._redirect_shell_output('dog.out') == '>& dog.out'
30 | 
31 |     launcher.shell = 'bash'
32 |     assert launcher._redirect_shell_output('dog.out') == '&> dog.out'
33 | 
34 |     launcher.tee_output = True
35 |     assert launcher._redirect_shell_output('dog.out') == '2>&1 | tee dog.out'
36 | 
37 | 
38 | def test_create_mpi_command_openmpi(launcher: Launcher):
39 |     launcher.ncpus_per_node = 30
40 |     launcher.mpiexec = 'mpirun'
41 |     dummy_command = 'foo'
42 |     output_root_name = 'dog'
43 | 
44 |     if not launcher._using_mpt():
45 |         mpi_command = launcher.create_mpi_command(dummy_command, output_root_name)
46 |         expected_command = 'mpirun foo &> dog.out'
47 |         assert mpi_command == expected_command
48 | 
49 |         mpi_command = launcher.create_mpi_command(dummy_command, output_root_name, openmp_threads=5)
50 |         expected_command = 'OMP_NUM_THREADS=5 OMP_PLACES=cores OMP_PROC_BIND=close mpirun --npernode 6 foo &> dog.out'
51 |         assert mpi_command == expected_command
52 | 
53 |         mpi_command = launcher.create_mpi_command(dummy_command, output_root_name, ranks_per_node=3)
54 |         expected_command = 'mpirun --npernode 3 foo &> dog.out'
55 |         assert mpi_command == expected_command
56 | 
57 | 
58 | def test_mpiprocs(launcher: Launcher):
59 |     launcher.ncpus_per_node = 20
60 |     assert launcher.mpiprocs_per_node == 20
61 | 
62 |     launcher.ncpus_per_node = 40
63 |     assert launcher.mpiprocs_per_node == 40
64 | 
65 |     launcher.mpiprocs_per_node = 4
66 |     assert launcher.mpiprocs_per_node == 4
67 |     assert launcher.ncpus_per_node == 40
68 | 


--------------------------------------------------------------------------------
/tests/test_output_files/.empty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/pbs4py/19a130db07d21358fd02954ef79ca38c61b8c811/tests/test_output_files/.empty


--------------------------------------------------------------------------------
/tests/test_pbs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | from pbs4py import PBS
  4 | 
  5 | test_directory = os.path.dirname(os.path.abspath(__file__))
  6 | test_profile = f'{test_directory}/testing_bashrc'
  7 | 
  8 | 
  9 | def test_k3a_class_method():
 10 |     k3a = PBS.k3a(profile_filename=test_profile)
 11 |     assert k3a.queue_name == 'K3a-route'
 12 |     assert k3a.ncpus_per_node == 16
 13 |     assert k3a.queue_node_limit == 25
 14 | 
 15 | 
 16 | def test_k3b_class_method():
 17 |     k3 = PBS.k3b(profile_filename=test_profile)
 18 |     assert k3.queue_name == 'K3b-route'
 19 |     assert k3.ncpus_per_node == 28
 20 |     assert k3.queue_node_limit == 74
 21 | 
 22 | 
 23 | def test_k3c_class_method():
 24 |     k3 = PBS.k3c(profile_filename=test_profile)
 25 |     assert k3.queue_name == 'K3c-route'
 26 |     assert k3.ncpus_per_node == 28
 27 |     assert k3.queue_node_limit == 74
 28 | 
 29 | 
 30 | def test_k4_class_method():
 31 |     k4 = PBS.k4(profile_filename=test_profile)
 32 |     assert k4.queue_name == 'K4-route'
 33 |     assert k4.ncpus_per_node == 40
 34 |     assert k4.queue_node_limit == 16
 35 | 
 36 | 
 37 | def test_k4_v100_class_method():
 38 |     k4v100 = PBS.k4_v100(profile_filename=test_profile)
 39 |     assert k4v100.queue_name == 'K4-V100'
 40 |     assert k4v100.ncpus_per_node == 4
 41 |     assert k4v100.queue_node_limit == 4
 42 | 
 43 | 
 44 | def test_k5_a100_40_class_method():
 45 |     k5 = PBS.k5_a100_40(profile_filename=test_profile)
 46 |     assert k5.queue_name == 'K5-A100-40'
 47 |     assert k5.ncpus_per_node == 8
 48 |     assert k5.queue_node_limit == 2
 49 | 
 50 | 
 51 | def test_k5_a100_80_class_method():
 52 |     k5 = PBS.k5_a100_80(profile_filename=test_profile)
 53 |     assert k5.queue_name == 'K5-A100-80'
 54 |     assert k5.ncpus_per_node == 8
 55 |     assert k5.queue_node_limit == 2
 56 | 
 57 | 
 58 | def test_nas_cascadelake_class_method():
 59 |     nas = PBS.nas('n1337', 'cas', profile_filename=test_profile)
 60 |     assert nas.group_list == 'n1337'
 61 |     assert nas.ncpus_per_node == 40
 62 |     assert nas.model == 'cas_ait'
 63 | 
 64 | 
 65 | def test_nas_skylake_class_method():
 66 |     nas = PBS.nas('n1337', 'skylake', profile_filename=test_profile)
 67 |     assert nas.group_list == 'n1337'
 68 |     assert nas.ncpus_per_node == 40
 69 |     assert nas.model == 'sky_ele'
 70 | 
 71 | 
 72 | def test_nas_broadwell_class_method():
 73 |     nas = PBS.nas('n1337', 'bro', profile_filename=test_profile)
 74 |     assert nas.group_list == 'n1337'
 75 |     assert nas.ncpus_per_node == 28
 76 |     assert nas.model == 'bro'
 77 | 
 78 | 
 79 | def test_nas_haswell_class_method():
 80 |     nas = PBS.nas('n1337', 'has', profile_filename=test_profile)
 81 |     assert nas.group_list == 'n1337'
 82 |     assert nas.ncpus_per_node == 24
 83 |     assert nas.model == 'has'
 84 | 
 85 | 
 86 | def test_nas_ivybridge_class_method():
 87 |     nas = PBS.nas('n1337', 'ivy', profile_filename=test_profile)
 88 |     assert nas.group_list == 'n1337'
 89 |     assert nas.ncpus_per_node == 20
 90 |     assert nas.model == 'ivy'
 91 | 
 92 | 
 93 | def test_nas_sandybridge_class_method():
 94 |     nas = PBS.nas('n1337', 'san', profile_filename=test_profile)
 95 |     assert nas.group_list == 'n1337'
 96 |     assert nas.ncpus_per_node == 16
 97 |     assert nas.model == 'san'
 98 | 
 99 | 
100 | def test_nas_mil_class_method():
101 |     nas = PBS.nas('n1337', 'mil', profile_filename=test_profile)
102 |     assert nas.group_list == 'n1337'
103 |     assert nas.ncpus_per_node == 128
104 |     assert nas.model == 'mil_ait'
105 | 
106 | 
107 | def test_nas_rom_class_method():
108 |     nas = PBS.nas('n1337', 'rom', profile_filename=test_profile)
109 |     assert nas.group_list == 'n1337'
110 |     assert nas.ncpus_per_node == 128
111 |     assert nas.model == 'rom_ait'
112 | 
113 | 
114 | def test_nas_mil_a100_class_method():
115 |     nas = PBS.nas('n1337', 'mil_a100', profile_filename=test_profile)
116 |     assert nas.group_list == 'n1337'
117 |     assert nas.ncpus_per_node == 64
118 |     assert nas.ngpus_per_node == 4
119 |     assert nas.mem == '500G'
120 |     assert nas.model == 'mil_a100'
121 | 
122 | 
123 | def test_nas_sky_gpu_class_method():
124 |     nas = PBS.nas('n1337', 'sky_gpu', profile_filename=test_profile)
125 |     assert nas.group_list == 'n1337'
126 |     assert nas.ncpus_per_node == 36
127 |     assert nas.ngpus_per_node == 4
128 |     assert nas.mem == '200G'
129 |     assert nas.model == 'sky_gpu'
130 | 
131 | 
132 | def test_nas_cas_gpu_class_method():
133 |     nas = PBS.nas('n1337', 'cas_gpu', profile_filename=test_profile)
134 |     assert nas.group_list == 'n1337'
135 |     assert nas.ncpus_per_node == 48
136 |     assert nas.ngpus_per_node == 4
137 |     assert nas.mem == '200G'
138 |     assert nas.model == 'cas_gpu'
139 | 
140 | 
141 | def test_nas_rom_gpu_class_method():
142 |     nas = PBS.nas('n1337', 'rom_gpu', profile_filename=test_profile)
143 |     assert nas.group_list == 'n1337'
144 |     assert nas.ncpus_per_node == 128
145 |     assert nas.ngpus_per_node == 8
146 |     assert nas.mem == '700G'
147 |     assert nas.model == 'rom_gpu'
148 | 
149 | 
150 | def test_nas_class_method_with_bad_queue_name():
151 |     with pytest.raises(ValueError):
152 |         PBS.nas('n1337', 'not_a_queue', profile_filename=test_profile)
153 | 
154 | 
155 | def test_cf1_class_method():
156 |     cf1 = PBS.cf1('acct', profile_filename=test_profile)
157 |     assert cf1.queue_name == "normal"
158 |     assert cf1.group_list == 'acct'
159 |     assert cf1.workdir_env_variable == "$SLURM_SUBMIT_DIR"
160 |     assert cf1.queue_node_limit == 30
161 |     assert cf1.ncpus_per_node == 64
162 | 


--------------------------------------------------------------------------------
/tests/test_pbs_batch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from typing import List
 4 | 
 5 | from pbs4py import PBSBatch, BatchJob
 6 | from pbs4py.directory_utils import cd
 7 | 
 8 | test_directory = os.path.dirname(os.path.abspath(__file__))
 9 | 
10 | 
11 | class MockJob(BatchJob):
12 |     def __init__(self, name: str, body: List[str]):
13 |         super().__init__(name, body)
14 |         self.state_check_count = 0
15 | 
16 |     def get_pbs_job_state(self) -> str:
17 |         self.state_check_count += 1
18 |         if self.state_check_count == 3:
19 |             return 'F'
20 |         elif self.state_check_count == 2:
21 |             return 'R'
22 |         elif self.state_check_count == 1:
23 |             return 'Q'
24 | 
25 | 
26 | class MockPBS:
27 |     def __init__(self):
28 |         self.id_counter = -1
29 | 
30 |     def launch(self, job_name, job_body, blocking=True):
31 |         self.id_counter += 1
32 |         return str(self.id_counter)
33 | 
34 | 
35 | @pytest.fixture
36 | def batch():
37 |     jobs = [MockJob('job0', ['ls']),
38 |             MockJob('job1', ['echo Hello World!']),
39 |             MockJob('job2', ['pwd'])]
40 |     return PBSBatch(MockPBS(), jobs, use_separate_directories=False)
41 | 
42 | 
43 | def test_create_directories(batch: PBSBatch):
44 |     expected_dirs = ['job0', 'job1', 'job2']
45 | 
46 |     with cd(test_directory):
47 |         for d in expected_dirs:
48 |             assert not os.path.exists(d)
49 | 
50 |         batch.create_directories()
51 | 
52 |         for d in expected_dirs:
53 |             assert os.path.exists(d)
54 |             os.system(f'rm -r {d}')
55 | 
56 | 
57 | def test_launch(batch: PBSBatch):
58 |     batch.launch_all_jobs()
59 |     for i, job in enumerate(batch.jobs):
60 |         assert str(i) == job.id
61 | 
62 | 
63 | def test_wait_for_all_jobs_to_finish(batch: PBSBatch):
64 |     batch.wait_for_all_jobs_to_finish(check_frequency_in_secs=0.1)
65 | 
66 |     for job in batch.jobs:
67 |         assert job.state_check_count == 3
68 | 
69 | 
70 | def test_all_jobs_submitted(batch: PBSBatch):
71 |     assert batch._all_jobs_submitted(3)
72 |     assert not batch._all_jobs_submitted(2)
73 | 


--------------------------------------------------------------------------------
/tests/test_pbs_batch_job.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import os
 3 | 
 4 | from pbs4py import BatchJob
 5 | 
 6 | test_directory = os.path.dirname(os.path.abspath(__file__))
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def job():
11 |     return BatchJob('job_test', ['ls'])
12 | 
13 | 
14 | def test_job_directory_context_manager(job: BatchJob):
15 |     cwd = os.getcwd()
16 |     os.chdir(test_directory)
17 | 
18 |     test_file = 'empty_file'
19 |     assert not os.path.exists(test_file)
20 |     with job:
21 |         assert os.path.exists(test_file)
22 | 
23 |     assert not os.path.exists(test_file)
24 | 
25 |     os.chdir(cwd)
26 | 
27 | 
28 | def test_job_state_before_launch(job: BatchJob):
29 |     assert job.get_pbs_job_state() == ''
30 | 


--------------------------------------------------------------------------------
/tests/test_pbs_header.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | from typing import List
  4 | from pbs4py import PBS
  5 | 
  6 | test_directory = os.path.dirname(os.path.abspath(__file__))
  7 | test_profile = f'{test_directory}/testing_bashrc'
  8 | 
  9 | 
 10 | def check_list_of_strings(actual: List[str], expected: List[str]):
 11 |     assert len(actual) == len(expected)
 12 |     for a, e in zip(actual, expected):
 13 |         assert a == e
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def pbs_header_test():
 18 |     queue_name = 'queue'
 19 |     ncpus_per_node = 5
 20 |     queue_node_limit = 10
 21 |     time = 24
 22 |     hashbang = '#!/usr/bin/tcsh'
 23 |     pbs_header_test = PBS(queue_name=queue_name, ncpus_per_node=ncpus_per_node,
 24 |                           queue_node_limit=queue_node_limit, time=time,
 25 |                           profile_filename=test_profile)
 26 |     pbs_header_test.hashbang = hashbang
 27 |     pbs_header_test.requested_number_of_nodes = 2
 28 |     return pbs_header_test
 29 | 
 30 | 
 31 | def test_walltime_line(pbs_header_test: PBS):
 32 |     pbs_header_test.time = 5
 33 |     line = pbs_header_test._create_walltime_line_of_header()
 34 |     assert line == '#PBS -l walltime=5:00:00'
 35 | 
 36 | 
 37 | def test_log_line(pbs_header_test: PBS):
 38 |     job_name = 'test'
 39 |     line = pbs_header_test._create_log_name_line_of_header(job_name)
 40 |     assert line == '#PBS -o test_pbs.log'
 41 | 
 42 | 
 43 | def test_join_output_line(pbs_header_test: PBS):
 44 |     line = pbs_header_test._create_header_line_to_join_standard_and_error_output()
 45 |     assert line == '#PBS -j oe'
 46 | 
 47 | 
 48 | def test_rerunnable_line(pbs_header_test: PBS):
 49 |     line = pbs_header_test._create_header_line_to_set_that_job_is_not_rerunnable()
 50 |     assert line == '#PBS -r n'
 51 | 
 52 | 
 53 | def test_select_line_with_no_model_or_mem_defined(pbs_header_test: PBS):
 54 |     header = pbs_header_test._create_select_line_of_header()
 55 |     expected = "#PBS -l select=2:ncpus=5:mpiprocs=5"
 56 |     assert header == expected
 57 | 
 58 | 
 59 | def test_select_line_with_model_defined(pbs_header_test: PBS):
 60 |     pbs_header_test.model = 'bro'
 61 |     header = pbs_header_test._create_select_line_of_header()
 62 |     expected = "#PBS -l select=2:ncpus=5:mpiprocs=5:model=bro"
 63 |     assert header == expected
 64 | 
 65 | 
 66 | def test_select_line_with_gpus(pbs_header_test: PBS):
 67 |     pbs_header_test.ngpus_per_node = 2
 68 |     header = pbs_header_test._create_select_line_of_header()
 69 |     expected = "#PBS -l select=2:ncpus=5:ngpus=2:mpiprocs=5"
 70 |     assert header == expected
 71 | 
 72 | 
 73 | def test_select_line_with_mpiprocs_defined(pbs_header_test: PBS):
 74 |     pbs_header_test.mpiprocs_per_node = 3
 75 |     header = pbs_header_test._create_select_line_of_header()
 76 |     expected = "#PBS -l select=2:ncpus=5:mpiprocs=3"
 77 |     assert header == expected
 78 | 
 79 | 
 80 | def test_select_line_with_mem_defined(pbs_header_test: PBS):
 81 |     pbs_header_test.mem = '245gb'
 82 |     header = pbs_header_test._create_select_line_of_header()
 83 |     expected = "#PBS -l select=2:ncpus=5:mpiprocs=5:mem=245gb"
 84 |     assert header == expected
 85 | 
 86 | 
 87 | def test_pbs_header_with_group_name_not_defined(pbs_header_test: PBS):
 88 |     header = pbs_header_test._create_group_list_header_line()
 89 |     expected = []
 90 |     check_list_of_strings(header, expected)
 91 | 
 92 | 
 93 | def test_pbs_header_with_group_name_defined(pbs_header_test: PBS):
 94 |     pbs_header_test.group_list = 'n1337'
 95 |     header = pbs_header_test._create_group_list_header_line()
 96 |     expected = ["#PBS -W group_list=n1337"]
 97 |     check_list_of_strings(header, expected)
 98 | 
 99 | 
100 | def test_pbs_header_email_option(pbs_header_test: PBS):
101 |     pbs_header_test.mail_options = 'be'
102 |     pbs_header_test.mail_list = 'kevin@nasa.gov'
103 |     header = pbs_header_test._create_mail_options_header_lines()
104 |     expected = ['#PBS -m be', '#PBS -M kevin@nasa.gov']
105 |     check_list_of_strings(header, expected)
106 | 
107 | 
108 | def test_job_line_of_header(pbs_header_test: PBS):
109 |     job_name = 'test_job'
110 |     assert '#PBS -N test_job' == pbs_header_test._create_job_line_of_header(job_name)
111 | 
112 | 
113 | def test_queue_line_of_header(pbs_header_test: PBS):
114 |     pbs_header_test.queue_name = 'K4-standard'
115 |     assert '#PBS -q K4-standard' == pbs_header_test._create_queue_line_of_header()
116 | 
117 | 
118 | def test_array_range_line_of_header_default_is_off(pbs_header_test: PBS):
119 |     assert [] == pbs_header_test._create_array_range_header_line()
120 | 
121 | 
122 | def test_array_range_line_of_header(pbs_header_test: PBS):
123 |     pbs_header_test.array_range = '1-24'
124 |     assert ['#PBS -J 1-24'] == pbs_header_test._create_array_range_header_line()
125 |     pbs_header_test.array_range = None
126 |     assert [] == pbs_header_test._create_array_range_header_line()
127 | 
128 | 
129 | def test_job_dependency_line_of_header(pbs_header_test: PBS):
130 |     assert [] == pbs_header_test._create_job_dependencies_header_line(dependency=None)
131 |     assert [
132 |         '#PBS -W depend=afterok:a.1234'] == pbs_header_test._create_job_dependencies_header_line(dependency='a.1234')
133 | 
134 |     pbs_header_test.dependency_type = 'before'
135 |     assert [
136 |         '#PBS -W depend=before:b.4321'] == pbs_header_test._create_job_dependencies_header_line(dependency='b.4321')
137 | 


--------------------------------------------------------------------------------
/tests/test_pbs_regression.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import filecmp
 3 | from pbs4py import PBS
 4 | 
 5 | test_directory = os.path.dirname(os.path.abspath(__file__))
 6 | test_profile = f'{test_directory}/testing_bashrc'
 7 | 
 8 | 
 9 | def modify_golden_file_to_have_right_path_for_profile(golden_file: str, profile_filename: str):
10 |     with open(golden_file, 'r') as fh:
11 |         golden_file_contents = fh.readlines()
12 |     golden_file_with_profile = []
13 |     for line in golden_file_contents:
14 |         if line == 'source_line\n':
15 |             golden_file_with_profile.append(f'source {profile_filename}\n')
16 |         else:
17 |             golden_file_with_profile.append(line)
18 |     golden_mod = f'{test_directory}/test_output_files/golden_mod.pbs'
19 |     with open(golden_mod, 'w') as fh:
20 |         fh.writelines(golden_file_with_profile)
21 |     return golden_mod
22 | 
23 | 
24 | def test_write_job_file_regression_check():
25 |     golden_file = f'{test_directory}/pbs_test_files/golden0.pbs'
26 |     queue_name = 'queue'
27 |     ncpus_per_node = 5
28 |     queue_node_limit = 10
29 |     time = 24
30 |     hashbang = '#!/usr/bin/bash'
31 |     pbs = PBS(queue_name=queue_name, ncpus_per_node=ncpus_per_node,
32 |               queue_node_limit=queue_node_limit, time=time,
33 |               profile_filename=test_profile)
34 |     pbs.hashbang = hashbang
35 | 
36 |     job_name = 'test_job'
37 |     job_body = ['command1', 'command2']
38 |     pbs_file = f'{test_directory}/test_output_files/test.pbs'
39 |     pbs.write_job_file(pbs_file, job_name, job_body)
40 | 
41 |     golden_mod = modify_golden_file_to_have_right_path_for_profile(
42 |         golden_file, pbs.profile_filename)
43 | 
44 |     assert filecmp.cmp(pbs_file, golden_mod)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     test_write_job_file_regression_check()
49 | 


--------------------------------------------------------------------------------
/tests/test_slurm_header.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | from pbs4py.slurm import SLURM
  4 | 
  5 | test_directory = os.path.dirname(os.path.abspath(__file__))
  6 | test_profile = f'{test_directory}/testing_bashrc'
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def slurm_header_test():
 11 |     queue_name = 'queue'
 12 |     ncpus_per_node = 5
 13 |     queue_node_limit = 20
 14 |     time = 12
 15 |     hashbang = '#!/usr/bin/tcsh'
 16 |     pbs_header_test = SLURM(queue_name=queue_name, ncpus_per_node=ncpus_per_node,
 17 |                             queue_node_limit=queue_node_limit, time=time,
 18 |                             profile_filename=test_profile)
 19 |     pbs_header_test.hashbang = hashbang
 20 |     pbs_header_test.requested_number_of_nodes = 4
 21 |     return pbs_header_test
 22 | 
 23 | 
 24 | def test_job_line(slurm_header_test: SLURM):
 25 |     line = slurm_header_test._create_job_line_of_header("dog")
 26 |     assert line == "#SBATCH --job-name=dog"
 27 | 
 28 | 
 29 | def test_queue_line(slurm_header_test: SLURM):
 30 |     line = slurm_header_test._create_queue_line_of_header()
 31 |     assert line == "#SBATCH --partition=queue"
 32 | 
 33 | 
 34 | def test_nodes_line(slurm_header_test: SLURM):
 35 |     line = slurm_header_test._create_nodes_line_of_header()
 36 |     assert line == "#SBATCH --nodes=4"
 37 | 
 38 | 
 39 | def test_tasks_per_node_line(slurm_header_test: SLURM):
 40 |     line = slurm_header_test._create_tasks_per_node_line_of_header()
 41 |     assert line == "#SBATCH --ntasks-per-node=5"
 42 | 
 43 | 
 44 | def test_walltime_line(slurm_header_test: SLURM):
 45 |     slurm_header_test.time = 16
 46 |     line = slurm_header_test._create_walltime_line_of_header()
 47 |     assert line == "#SBATCH --time=16:00:00"
 48 | 
 49 | 
 50 | def test_log_line(slurm_header_test: SLURM):
 51 |     line = slurm_header_test._create_log_name_line_of_header("dog")
 52 |     assert line == "#SBATCH --output=qlog_dog"
 53 | 
 54 | 
 55 | def test_error_log_line(slurm_header_test: SLURM):
 56 |     line = slurm_header_test._create_header_line_to_error_output("dog")
 57 |     assert line == "#SBATCH --error=err_dog"
 58 | 
 59 | 
 60 | def test_not_rerunnable_line(slurm_header_test: SLURM):
 61 |     line = slurm_header_test._create_header_line_to_set_that_job_is_not_rerunnable()
 62 |     assert line == "#SBATCH --no-requeue"
 63 | 
 64 | 
 65 | def test_account_line(slurm_header_test: SLURM):
 66 |     lines = slurm_header_test._create_account_header_line()
 67 |     assert len(lines) == 0
 68 | 
 69 |     slurm_header_test.account = "a123"
 70 |     lines = slurm_header_test._create_account_header_line()
 71 |     assert len(lines) == 1
 72 |     assert lines[0] == "#SBATCH --account=a123"
 73 | 
 74 | 
 75 | def test_array_range_header_line(slurm_header_test: SLURM):
 76 |     lines = slurm_header_test._create_array_range_header_line()
 77 |     assert len(lines) == 0
 78 | 
 79 |     slurm_header_test.array_range = '1-2'
 80 |     lines = slurm_header_test._create_array_range_header_line()
 81 |     assert len(lines) == 1
 82 |     assert lines[0] == "#SBATCH --array=1-2"
 83 | 
 84 | 
 85 | def test_mail_options_lines(slurm_header_test: SLURM):
 86 |     lines = slurm_header_test._create_mail_options_header_lines()
 87 |     assert len(lines) == 0
 88 | 
 89 |     slurm_header_test.mail_options = "BEGIN"
 90 |     slurm_header_test.mail_list = "test@nasa.gov"
 91 |     lines = slurm_header_test._create_mail_options_header_lines()
 92 |     assert len(lines) == 2
 93 |     assert lines[0] == "#SBATCH --mail-type=BEGIN"
 94 |     assert lines[1] == "#SBATCH --mail-user=test@nasa.gov"
 95 | 
 96 | 
 97 | def test_dependency_lines(slurm_header_test: SLURM):
 98 |     lines = slurm_header_test._create_job_dependencies_header_line(None)
 99 |     assert len(lines) == 0
100 | 
101 |     lines = slurm_header_test._create_job_dependencies_header_line("a123")
102 |     assert len(lines) == 1
103 |     assert lines[0] == "#SBATCH --dependency=afterok:a123"
104 | 
105 | 
106 | def test_nodelist_line(slurm_header_test: SLURM):
107 |     lines = slurm_header_test._create_nodelist_header_line()
108 |     assert len(lines) == 0
109 | 
110 |     slurm_header_test.nodelist = '1,2,3,4'
111 |     lines = slurm_header_test._create_nodelist_header_line()
112 |     assert len(lines) == 1
113 |     assert lines[0] == "#SBATCH --nodelist=1,2,3,4"
114 | 


--------------------------------------------------------------------------------
/tests/test_slurm_regression.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import filecmp
 3 | from pbs4py.slurm import SLURM
 4 | 
 5 | test_directory = os.path.dirname(os.path.abspath(__file__))
 6 | test_profile = f'{test_directory}/testing_bashrc'
 7 | 
 8 | 
 9 | def modify_golden_file_to_have_right_path_for_profile(golden_file: str, profile_filename: str):
10 |     with open(golden_file, 'r') as fh:
11 |         golden_file_contents = fh.readlines()
12 |     golden_file_with_profile = []
13 |     for line in golden_file_contents:
14 |         if line == 'source_line\n':
15 |             golden_file_with_profile.append(f'source {profile_filename}\n')
16 |         else:
17 |             golden_file_with_profile.append(line)
18 |     golden_mod = f'{test_directory}/test_output_files/golden_mod.slurm'
19 |     with open(golden_mod, 'w') as fh:
20 |         fh.writelines(golden_file_with_profile)
21 |     return golden_mod
22 | 
23 | 
24 | def test_write_job_file_regression_check():
25 |     golden_file = f'{test_directory}/pbs_test_files/golden0.slurm'
26 |     queue_name = 'queue'
27 |     ncpus_per_node = 20
28 |     queue_node_limit = 10
29 |     time = 13
30 |     hashbang = '#!/usr/bin/bash'
31 |     requested_number_of_nodes = 4
32 |     pbs = SLURM(queue_name=queue_name, ncpus_per_node=ncpus_per_node,
33 |                 queue_node_limit=queue_node_limit, time=time,
34 |                 profile_filename=test_profile,
35 |                 requested_number_of_nodes=requested_number_of_nodes)
36 |     pbs.hashbang = hashbang
37 | 
38 |     job_name = 'test_job'
39 |     job_body = ['command1', 'command2']
40 |     pbs_file = f'{test_directory}/test_output_files/test.slurm'
41 |     pbs.write_job_file(pbs_file, job_name, job_body)
42 | 
43 |     golden_mod = modify_golden_file_to_have_right_path_for_profile(
44 |         golden_file, pbs.profile_filename)
45 | 
46 |     assert filecmp.cmp(pbs_file, golden_mod)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     test_write_job_file_regression_check()
51 | 


--------------------------------------------------------------------------------
/tests/testing_bashrc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nasa/pbs4py/19a130db07d21358fd02954ef79ca38c61b8c811/tests/testing_bashrc


--------------------------------------------------------------------------------