├── .flake8
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.rst
├── docs
    ├── Makefile
    ├── conf.py
    ├── devguide
    │   ├── #dev_docs.rst#
    │   ├── .#dev_docs.rst
    │   ├── changelog.rst
    │   ├── design.rst
    │   ├── dev_docs.rst
    │   └── packaging.rst
    ├── index.rst
    ├── libsubmit_art
    │   ├── README.txt
    │   ├── multi_node.png
    │   ├── multi_node.svg
    │   ├── multi_worker.png
    │   ├── multi_worker.svg
    │   ├── single_worker.png
    │   └── single_worker.svg
    ├── quick
    │   └── quickstart.rst
    ├── reference.rst
    ├── stubs
    │   ├── libsubmit.providers.aws.aws.EC2Provider.rst
    │   ├── libsubmit.providers.cobalt.cobalt.Cobalt.rst
    │   ├── libsubmit.providers.condor.condor.Condor.rst
    │   ├── libsubmit.providers.googlecloud.googlecloud.GoogleCloud.rst
    │   ├── libsubmit.providers.gridEngine.gridEngine.GridEngine.rst
    │   ├── libsubmit.providers.jetstream.jetstream.Jetstream.rst
    │   ├── libsubmit.providers.local.local.Local.rst
    │   ├── libsubmit.providers.provider_base.ExecutionProvider.rst
    │   ├── libsubmit.providers.slurm.slurm.Slurm.rst
    │   └── libsubmit.providers.torque.torque.Torque.rst
    └── userguide
    │   ├── configuring.rst
    │   ├── index.rst
    │   └── overview.rst
├── libsubmit
    ├── __init__.py
    ├── channels
    │   ├── __init__.py
    │   ├── channel_base.py
    │   ├── errors.py
    │   ├── local
    │   │   ├── __init__.py
    │   │   └── local.py
    │   ├── ssh
    │   │   ├── __init__.py
    │   │   └── ssh.py
    │   └── ssh_il
    │   │   ├── __init__.py
    │   │   └── ssh_il.py
    ├── error.py
    ├── launchers
    │   ├── __init__.py
    │   └── launchers.py
    ├── providers
    │   ├── __init__.py
    │   ├── aws
    │   │   ├── __init__.py
    │   │   ├── aws.py
    │   │   └── template.py
    │   ├── azure
    │   │   ├── __init__.py
    │   │   ├── azure.py
    │   │   ├── azureconf.json
    │   │   └── deployer.py
    │   ├── cluster_provider.py
    │   ├── cobalt
    │   │   ├── __init__.py
    │   │   ├── cobalt.py
    │   │   └── template.py
    │   ├── condor
    │   │   ├── __init__.py
    │   │   ├── condor.py
    │   │   └── template.py
    │   ├── googlecloud
    │   │   ├── __init__.py
    │   │   └── googlecloud.py
    │   ├── grid_engine
    │   │   ├── __init__.py
    │   │   ├── grid_engine.py
    │   │   └── template.py
    │   ├── jetstream
    │   │   ├── __init__.py
    │   │   ├── jetstream.py
    │   │   └── setup_first_time.sh
    │   ├── kubernetes
    │   │   ├── __init__.py
    │   │   ├── kube.py
    │   │   └── template.py
    │   ├── local
    │   │   ├── __init__.py
    │   │   └── local.py
    │   ├── provider_base.py
    │   ├── slurm
    │   │   ├── __init__.py
    │   │   ├── slurm.py
    │   │   └── template.py
    │   └── torque
    │   │   ├── __init__.py
    │   │   ├── template.py
    │   │   └── torque.py
    ├── tests
    │   ├── setup_path.sh
    │   ├── test_channels
    │   │   ├── remote_run.sh
    │   │   ├── test_channels.py
    │   │   ├── test_local_channel.py
    │   │   ├── test_scp_1.py
    │   │   ├── test_ssh_1.py
    │   │   ├── test_ssh_errors.py
    │   │   ├── test_ssh_file_transport.py
    │   │   └── test_ssh_interactive.py
    │   ├── test_integration
    │   │   └── test_ssh
    │   │   │   ├── test_ssh_beagle.py
    │   │   │   ├── test_ssh_condor_earth.py
    │   │   │   ├── test_ssh_cori.py
    │   │   │   └── test_ssh_swan.py
    │   └── test_providers
    │   │   └── ec2
    │   │       └── test_ec2.py
    ├── utils.py
    └── version.py
├── requirements.txt
├── setup.py
└── test-requirements.txt


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # D203: 1 blank line required before class docstring
 3 | # E124: closing bracket does not match visual indentation
 4 | # E126: continuation line over-indented for hanging indent
 5 | # F403: ‘from module import *’ used; unable to detect undefined names
 6 | # F405: name may be undefined, or defined from star imports: module
 7 | # Ignoring the next one for valid tests
 8 | # F811: redefinition of unused name from line N
 9 | # This one is bad. Sometimes ordering matters, conditional imports
10 | # setting env vars necessary etc.
11 | # E402: module level import not at top of file
12 | ignore = D203, E124, E126, F403, F405, F811, E402, W605
13 | max-line-length = 160
14 | exclude = parsl/executors/serialize/, parsl/libsubmit/
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Emacs temp files
  2 | *~
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # emacs buffers
107 | \#*
108 | 
109 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.4"
 4 |   - "3.5"
 5 |   - "3.6"
 6 | 
 7 | # command to install dependencies
 8 | install:
 9 |     - pip install -r requirements.txt
10 |     - pip install flake8
11 |     - python setup.py install
12 | 
13 | # command to run tests
14 | script:
15 |     - pip install -r test-requirements.txt
16 |     - flake8 libsubmit/
17 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Libsubmit - Scheduler abstraction
 2 | =================================
 3 | |licence| |build-status| |docs|
 4 | 
 5 | **Libsubmit** provides a uniform interface to submit arbitrary bash scripts to a
 6 | variety of execution systems such as clouds, grids, cluster and supercomputers.
 7 | This library is designed to simplify submission of pilot systems such as ipython-parallel
 8 | to a variety of compute resources.
 9 | 
10 | #The latest version available on PyPi is v0.4.1 .
11 | 
12 | .. |licence| image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg
13 |    :target: https://github.com/Parsl/libsubmit/blob/master/LICENSE
14 |    :alt: Apache Licence V2.0
15 | .. |build-status| image:: https://travis-ci.org/Parsl/libsubmit.svg?branch=master
16 |    :target: https://travis-ci.org/Parsl/libsubmit
17 |    :alt: Build status
18 | .. |docs| image:: https://readthedocs.org/projects/libsubmit/badge/?version=latest
19 |    :target: http://libsubmit.readthedocs.io/en/latest/?badge=latest
20 |    :alt: Documentation Status
21 | 
22 | 
23 | Note
24 | ^^^^
25 | 
26 | As of December 20th 2018, (Parsl v0.7.0) the libsubmit repository has been merged into Parsl
27 | to reduce overheads on maintenance with respect to documentation, testing, and release
28 | synchronization. The components offered by libsubmit are now available in Parsl as:
29 | `parsl.channels`, `parsl.launchers` and `parsl.providers`.
30 | 
31 | 
32 | Documentation
33 | =============
34 | 
35 | Developer documentation for libsubmit is available `here <http://libsubmit.readthedocs.io/en/latest/devguide/dev_docs.html#>`_.
36 | Since libsubmit is designed primarily to be used by `Parsl <http://parsl-project.org/>`_ as its resource provider most of the user documentation is blended into Parsl documentation `here <http://parsl.readthedocs.io>`_
37 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python3 -m sphinx
 7 | SPHINXPROJ    = libsubmit
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # libsubmit documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Oct  2 13:39:42 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('../'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'nbsphinx',
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.autosummary',
 38 |     'sphinx.ext.intersphinx',
 39 |     'sphinx.ext.linkcode',
 40 |     'sphinx.ext.napoleon'
 41 | ]
 42 | 
 43 | 
 44 | def linkcode_resolve(domain, info):
 45 |     if domain != 'py':
 46 |         return None
 47 |     if not info['module']:
 48 |         return None
 49 |     filename = info['module'].replace('.', '/')
 50 |     return "http://github.com/Parsl/libsubmit/blob/master/{}.py".format(filename)
 51 | 
 52 | intersphinx_mapping = {
 53 |     'python': ('https://docs.python.org/3', None),
 54 | }
 55 | 
 56 | # Add any paths that contain templates here, relative to this directory.
 57 | templates_path = ['_templates']
 58 | 
 59 | # The suffix(es) of source filenames.
 60 | # You can specify multiple suffix as a list of string:
 61 | #
 62 | # source_suffix = ['.rst', '.md']
 63 | source_suffix = '.rst'
 64 | 
 65 | # The master toctree document.
 66 | master_doc = 'index'
 67 | 
 68 | # General information about the project.
 69 | project = u'libsubmit'
 70 | copyright = u'2017, Yadu Nand Babuji'
 71 | author = u'Yadu Nand Babuji'
 72 | 
 73 | # The version info for the project you're documenting, acts as replacement for
 74 | # |version| and |release|, also used in various other places throughout the
 75 | # built documents.
 76 | #
 77 | # The short X.Y version.
 78 | version = u'0.1.0'
 79 | # The full version, including alpha/beta/rc tags.
 80 | release = u'0.1.0'
 81 | 
 82 | # The language for content autogenerated by Sphinx. Refer to documentation
 83 | # for a list of supported languages.
 84 | #
 85 | # This is also used if you do content translation via gettext catalogs.
 86 | # Usually you set "language" from the command line for these cases.
 87 | language = None
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 93 | 
 94 | # The name of the Pygments (syntax highlighting) style to use.
 95 | pygments_style = 'sphinx'
 96 | 
 97 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 98 | todo_include_todos = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | #
106 | #html_theme = 'alabaster'
107 | html_theme = 'sphinx_rtd_theme'
108 | 
109 | # Theme options are theme-specific and customize the look and feel of a theme
110 | # further.  For a list of options available for each theme, see the
111 | # documentation.
112 | #
113 | # html_theme_options = {}
114 | 
115 | # Add any paths that contain custom static files (such as style sheets) here,
116 | # relative to this directory. They are copied after the builtin static files,
117 | # so a file named "default.css" will overwrite the builtin "default.css".
118 | html_static_path = ['_static']
119 | 
120 | 
121 | # -- Options for HTMLHelp output ------------------------------------------
122 | 
123 | # Output file base name for HTML help builder.
124 | htmlhelp_basename = 'libsubmitdoc'
125 | 
126 | 
127 | # -- Options for LaTeX output ---------------------------------------------
128 | 
129 | latex_elements = {
130 |     # The paper size ('letterpaper' or 'a4paper').
131 |     #
132 |     # 'papersize': 'letterpaper',
133 | 
134 |     # The font size ('10pt', '11pt' or '12pt').
135 |     #
136 |     # 'pointsize': '10pt',
137 | 
138 |     # Additional stuff for the LaTeX preamble.
139 |     #
140 |     # 'preamble': '',
141 | 
142 |     # Latex figure (float) alignment
143 |     #
144 |     # 'figure_align': 'htbp',
145 | }
146 | 
147 | # Grouping the document tree into LaTeX files. List of tuples
148 | # (source start file, target name, title,
149 | #  author, documentclass [howto, manual, or own class]).
150 | latex_documents = [
151 |     (master_doc, 'libsubmit.tex', u'libsubmit Documentation',
152 |      u'Yadu Nand Babuji', 'manual'),
153 | ]
154 | 
155 | 
156 | # -- Options for manual page output ---------------------------------------
157 | 
158 | # One entry per manual page. List of tuples
159 | # (source start file, name, description, authors, manual section).
160 | man_pages = [
161 |     (master_doc, 'libsubmit', u'libsubmit Documentation',
162 |      [author], 1)
163 | ]
164 | 
165 | 
166 | # -- Options for Texinfo output -------------------------------------------
167 | 
168 | # Grouping the document tree into Texinfo files. List of tuples
169 | # (source start file, target name, title, author,
170 | #  dir menu entry, description, category)
171 | texinfo_documents = [
172 |     (master_doc, 'libsubmit', u'libsubmit Documentation',
173 |      author, 'libsubmit', 'One line description of project.',
174 |      'Miscellaneous'),
175 | ]
176 | 
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/docs/devguide/#dev_docs.rst#:
--------------------------------------------------------------------------------
 1 | Developer Documentation
 2 | ***********************
 3 | 
 4 | .. automodule:: libsubmit
 5 |    :no-undoc-members:
 6 | 
 7 | .. autofunction:: set_stream_logger
 8 | 
 9 | .. autofunction:: set_file_logger
10 | 
11 | ExecutionProviders
12 | ------------------
13 | 
14 | An execution provider is basically an adapter to various types of execution resources. The providers abstract
15 | away the interfaces provided by various systems to request, monitor, and cancel computate resources.
16 | 
17 | .. autoclass:: libsubmit.execution_provider_base.ExecutionProvider
18 |    :members:  __init__, submit, status, cancel, scaling_enabled, channels_required
19 | 
20 | 
21 | Slurm
22 | ^^^^^
23 | 
24 | .. autoclass:: libsubmit.providers.slurm.slurm.Slurm
25 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
26 | 
27 | Cobalt
28 | ^^^^^^
29 | 
30 | .. autoclass:: libsubmit.providers.cobalt.cobalt.Cobalt
31 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
32 | 
33 | Condor
34 | ^^^^^^
35 | 
36 | .. autoclass:: libsubmit.providers.condor.condor.Condor
37 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
38 | 
39 | Torque
40 | ^^^^^^
41 | 
42 | .. autoclass:: libsubmit.providers.torque.torque.Torque
43 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
44 | 
45 | Local
46 | ^^^^^
47 | 
48 | .. autoclass:: libsubmit.providers.local.local.Local
49 |    :members:  __init__, submit, status, cancel, scaling_enabled, current_capacity, channels_required
50 | 
51 | AWS
52 | ^^^
53 | 
54 | .. autoclass:: libsubmit.providers.aws.aws.EC2Provider
55 |    :members:  __init__, submit, status, cancel, scaling_enabled, current_capacity, channels_required, create_vpc, read_state_file, write_state_file, create_session, security_group
56 | 
57 | 
58 | 
59 | Channels
60 | --------
61 | 
62 | For certain resources such as campus clusters or supercomputers at research laboratories, resource requirements
63 | may require authentication. For instance some resources may allow access to their job schedulers from only
64 | their login-nodes which require you to authenticate on through SSH, GSI-SSH and sometimes even require
65 | two factor authentication. Channels are simple abstractions that enable the ExecutionProvider component to talk
66 | to the resource managers of compute facilities. The simplest Channel, *LocalChannel* simply executes commands
67 | locally on a shell, while the *SshChannel* authenticates you to remote systems.
68 | 
69 | .. autoclass:: libsubmit.channels.channel_base.Channel
70 |    :members:  execute_wait, script_dir, execute_no_wait, push_file, close
71 | 
72 | LocalChannel
73 | ^^^^^^^^^^^^
74 | .. autoclass:: libsubmit.channels.local.local.LocalChannel
75 |    :members:  __init__, execute_wait, execute_no_wait, push_file, script_dir, close
76 | 
77 | SshChannel
78 | ^^^^^^^^^^^^
79 | .. autoclass:: libsubmit.channels.ssh.ssh.SshChannel
80 |    :members:  __init__, execute_wait, execute_no_wait, push_file, pull_file, script_dir, close
81 | 
82 | SshILChannel
83 | ^^^^^^^^^^^^
84 | .. autoclass:: libsubmit.channels.ssh_il.ssh_il.SshILChannel
85 |    :members:  __init__, execute_wait, execute_no_wait, push_file, pull_file, script_dir, close
86 | 
87 | 
88 | 
89 | Launchers
90 | ---------
91 | 
92 | Launchers are basically wrappers for user submitted scripts as they are submitted to
93 | a specific execution resource.
94 | 
95 | .. autofunction:: libsubmit.launchers.singleNodeLauncher
96 | 
97 | 


--------------------------------------------------------------------------------
/docs/devguide/.#dev_docs.rst:
--------------------------------------------------------------------------------
1 | ben@benbox.7349


--------------------------------------------------------------------------------
/docs/devguide/changelog.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | Libsubmit 0.4.1
 5 | ---------------
 6 | 
 7 | Released. June 18th, 2018.
 8 | This release folds in massive contributions from @annawoodard.
 9 | 
10 | New functionality
11 | ^^^^^^^^^^^^^^^^^
12 | 
13 | * Several code cleanups, doc improvements, and consistent naming
14 | 
15 | * All providers have the initialization and actual start of resources decoupled.
16 | 
17 | 
18 | 
19 | Libsubmit 0.4.0
20 | ---------------
21 | 
22 | Released. May 15th, 2018.
23 | This release folds in contributions from @ahayschi, @annawoodard, @yadudoc
24 | 
25 | New functionality
26 | ^^^^^^^^^^^^^^^^^
27 | 
28 | * Several enhancements and fixes to the AWS cloud provider (#44, #45, #50)
29 | 
30 | * Added support for python3.4
31 | 
32 | 
33 | Bug Fixes
34 | ^^^^^^^^^
35 | 
36 | * Condor jobs left in queue with X state at end of completion  `issue#26 <https://github.com/Parsl/libsubmit/issues/26>`_
37 | 
38 | * Worker launches on Cori seem to fail from broken ENV `issue#27 <https://github.com/Parsl/libsubmit/issues/27>`_
39 | 
40 | * EC2 provider throwing an exception at initial run `issue#46 <https://github.com/Parsl/parsl/issues/46>`_
41 | 
42 | 


--------------------------------------------------------------------------------
/docs/devguide/design.rst:
--------------------------------------------------------------------------------
1 | Design
2 | ======
3 | 
4 | Under construction.
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/devguide/dev_docs.rst:
--------------------------------------------------------------------------------
 1 | Developer documentation
 2 | ***********************
 3 | 
 4 | .. automodule:: libsubmit
 5 |    :no-undoc-members:
 6 | 
 7 | .. autofunction:: set_stream_logger
 8 | 
 9 | .. autofunction:: set_file_logger
10 | 
11 | ExecutionProviders
12 | ------------------
13 | 
14 | An execution provider is basically an adapter to various types of execution resources. The providers abstract
15 | away the interfaces provided by various systems to request, monitor, and cancel computate resources.
16 | 
17 | .. autoclass:: libsubmit.execution_provider_base.ExecutionProvider
18 |    :members:  __init__, submit, status, cancel, scaling_enabled, channels_required
19 | 
20 | 
21 | Slurm
22 | ^^^^^
23 | 
24 | .. autoclass:: libsubmit.providers.slurm.slurm.Slurm
25 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
26 | 
27 | Cobalt
28 | ^^^^^^
29 | 
30 | .. autoclass:: libsubmit.providers.cobalt.cobalt.Cobalt
31 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
32 | 
33 | Condor
34 | ^^^^^^
35 | 
36 | .. autoclass:: libsubmit.providers.condor.condor.Condor
37 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
38 | 
39 | Torque
40 | ^^^^^^
41 | 
42 | .. autoclass:: libsubmit.providers.torque.torque.Torque
43 |    :members:  __init__, submit, status, cancel, _status, scaling_enabled, _write_submit_script, current_capacity, channels_required
44 | 
45 | Local
46 | ^^^^^
47 | 
48 | .. autoclass:: libsubmit.providers.local.local.Local
49 |    :members:  __init__, submit, status, cancel, scaling_enabled, current_capacity, channels_required
50 | 
51 | AWS
52 | ^^^
53 | 
54 | .. autoclass:: libsubmit.providers.aws.aws.EC2Provider
55 |    :members:  __init__, submit, status, cancel, scaling_enabled, current_capacity, channels_required, create_vpc, read_state_file, write_state_file, create_session, security_group
56 | 
57 | 
58 | 
59 | Channels
60 | --------
61 | 
62 | For certain resources such as campus clusters or supercomputers at research laboratories, resource requirements
63 | may require authentication. For instance some resources may allow access to their job schedulers from only
64 | their login-nodes which require you to authenticate on through SSH, GSI-SSH and sometimes even require
65 | two factor authentication. Channels are simple abstractions that enable the ExecutionProvider component to talk
66 | to the resource managers of compute facilities. The simplest Channel, *LocalChannel* simply executes commands
67 | locally on a shell, while the *SshChannel* authenticates you to remote systems.
68 | 
69 | .. autoclass:: libsubmit.channels.channel_base.Channel
70 |    :members:  execute_wait, script_dir, execute_no_wait, push_file, close
71 | 
72 | LocalChannel
73 | ^^^^^^^^^^^^
74 | .. autoclass:: libsubmit.channels.local.local.LocalChannel
75 |    :members:  __init__, execute_wait, execute_no_wait, push_file, script_dir, close
76 | 
77 | SshChannel
78 | ^^^^^^^^^^^^
79 | .. autoclass:: libsubmit.channels.ssh.ssh.SshChannel
80 |    :members:  __init__, execute_wait, execute_no_wait, push_file, pull_file, script_dir, close
81 | 
82 | SshILChannel
83 | ^^^^^^^^^^^^
84 | .. autoclass:: libsubmit.channels.ssh_il.ssh_il.SshILChannel
85 |    :members:  __init__, execute_wait, execute_no_wait, push_file, pull_file, script_dir, close
86 | 
87 | 
88 | 
89 | Launchers
90 | ---------
91 | 
92 | Launchers are basically wrappers for user submitted scripts as they are submitted to
93 | a specific execution resource.
94 | 
95 | .. autofunction:: libsubmit.launchers.singleNodeLauncher
96 | 
97 | 


--------------------------------------------------------------------------------
/docs/devguide/packaging.rst:
--------------------------------------------------------------------------------
 1 | Packaging
 2 | ---------
 3 | 
 4 | Currently packaging is managed by Yadu.
 5 | 
 6 | Here are the steps:
 7 | 
 8 | .. code:: bash
 9 | 
10 |       # Depending on permission all of the following might have to be run as root.
11 |       sudo su
12 | 
13 |       # Make sure to have twine installed
14 |       pip3 install twine
15 | 
16 |       # Create a source distribution
17 |       python3 setup.py sdist
18 | 
19 |       # Create a wheel package, which is a prebuilt package
20 |       python3 setup.py bdist_wheel
21 | 
22 |       # Upload the package with twine
23 |       # This step will ask for username and password for the PyPi account.
24 |       twine upload dist/*
25 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. libsubmit documentation master file, created by
 2 |    sphinx-quickstart on Mon Oct  2 13:39:42 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to libsubmit's documentation!
 7 | =====================================
 8 | 
 9 | Libsubmit is responsible for managing execution resources with a Local Resource
10 | Manager (LRM). For instance, campus clusters and supercomputers generally have
11 | schedulers such as Slurm, PBS, Condor and. Clouds on the other hand have API
12 | interfaces that allow much more fine grain composition of an execution environment.
13 | An execution provider abstracts these resources and provides a single uniform
14 | interface to them.
15 | 
16 | This module provides the following functionality:
17 | 
18 |    1. A standard interface to schedulers
19 |    2. Support for submitting, monitoring and cancelling jobs
20 |    3. A modular design, making it simple to add support for new resources.
21 |    4. Support for pushing files from client side to resources.
22 | 
23 | 
24 | .. toctree::
25 | 
26 |    quick/quickstart
27 |    userguide/index
28 |    reference
29 |    devguide/changelog
30 |    devguide/dev_docs
31 |    devguide/packaging
32 | 
33 | 
34 | Indices and tables
35 | ==================
36 | 
37 | * :ref:`genindex`
38 | * :ref:`modindex`
39 | * :ref:`search`
40 | 


--------------------------------------------------------------------------------
/docs/libsubmit_art/README.txt:
--------------------------------------------------------------------------------
1 | Edittable diagrams embedded in this google doc:
2 | https://docs.google.com/document/d/193LBq7H-dtxrYUER7oZqs0ZlOcLa2fGFPGLOHRS1c5U/edit?usp=sharing
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/libsubmit_art/multi_node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/docs/libsubmit_art/multi_node.png


--------------------------------------------------------------------------------
/docs/libsubmit_art/multi_worker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/docs/libsubmit_art/multi_worker.png


--------------------------------------------------------------------------------
/docs/libsubmit_art/single_worker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/docs/libsubmit_art/single_worker.png


--------------------------------------------------------------------------------
/docs/libsubmit_art/single_worker.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="yes"?>
2 | 
3 | <svg version="1.1" viewBox="0.0 0.0 1280.0 960.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l1280.0 0l0 960.0l-1280.0 0l0 -960.0z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l1280.0 0l0 960.0l-1280.0 0z" fill-rule="evenodd"></path><path fill="#ffe599" d="m174.0 200.13756l0 0c0 -29.548553 23.953827 -53.50238 53.50238 -53.50238l386.9795 0c14.189697 0 27.798279 5.636841 37.83191 15.670471c10.03363 10.033646 15.670471 23.642197 15.670471 37.83191l0 214.0031c0 29.548553 -23.953857 53.50238 -53.50238 53.50238l-386.9795 0c-29.548553 0 -53.50238 -23.953827 -53.50238 -53.50238z" fill-rule="evenodd"></path><path stroke="#fff2cc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m174.0 200.13756l0 0c0 -29.548553 23.953827 -53.50238 53.50238 -53.50238l386.9795 0c14.189697 0 27.798279 5.636841 37.83191 15.670471c10.03363 10.033646 15.670471 23.642197 15.670471 37.83191l0 214.0031c0 29.548553 -23.953857 53.50238 -53.50238 53.50238l-386.9795 0c-29.548553 0 -53.50238 -23.953827 -53.50238 -53.50238z" fill-rule="evenodd"></path><path fill="#000000" d="m205.24425 182.05911l0 -13.59375l5.109375 0q1.546875 0 2.484375 0.40625q0.953125 0.40625 1.484375 1.265625q0.53125 0.859375 0.53125 1.796875q0 0.875 -0.46875 1.65625q-0.46875 0.765625 -1.4375 1.234375q1.234375 0.359375 1.890625 1.234375q0.671875 0.875 0.671875 2.0625q0 0.953125 -0.40625 1.78125q-0.390625 0.8125 -0.984375 1.265625q-0.59375 0.4375 -1.5 0.671875q-0.890625 0.21875 -2.1875 0.21875l-5.1875 0zm1.796875 -7.890625l2.9375 0q1.203125 0 1.71875 -0.15625q0.6875 -0.203125 1.03125 -0.671875q0.359375 -0.46875 0.359375 -1.1875q0 -0.671875 -0.328125 -1.1875q-0.328125 -0.515625 -0.9375 -0.703125q-0.59375 -0.203125 -2.0625 -0.203125l-2.71875 0l0 4.109375zm0 6.28125l3.390625 0q0.875 0 1.21875 -0.0625q0.625 -0.109375 1.046875 -0.359375q0.421875 -0.265625 0.6875 -0.765625q0.265625 -0.5 0.265625 -1.140625q0 -0.765625 -0.390625 -1.328125q-0.390625 -0.5625 -1.078125 -0.78125q-0.6875 -0.234375 -1.984375 -0.234375l-3.15625 0l0 4.671875zm10.474823 1.609375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.5510712 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm15.719467 1.3125l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm2.90625 3.609375l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0z" fill-rule="nonzero"></path><path fill="#6d9eeb" d="m220.0 224.13467l0 0c0 -22.367264 18.132248 -40.499496 40.49951 -40.499496l319.00098 0c10.74115 0 21.042358 4.2668915 28.637512 11.862015c7.595093 7.5951385 11.8619995 17.896347 11.8619995 28.637482l0 161.99313c0 22.367249 -18.132263 40.49948 -40.49951 40.49948l-319.00098 0c-22.367264 0 -40.49951 -18.132233 -40.49951 -40.49948z" fill-rule="evenodd"></path><path stroke="#c9daf8" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m220.0 224.13467l0 0c0 -22.367264 18.132248 -40.499496 40.49951 -40.499496l319.00098 0c10.74115 0 21.042358 4.2668915 28.637512 11.862015c7.595093 7.5951385 11.8619995 17.896347 11.8619995 28.637482l0 161.99313c0 22.367249 -18.132263 40.49948 -40.49951 40.49948l-319.00098 0c-22.367264 0 -40.49951 -18.132233 -40.49951 -40.49948z" fill-rule="evenodd"></path><path fill="#000000" d="m252.68163 213.05124l0 -13.59375l1.84375 0l7.1406403 10.671875l0 -10.671875l1.71875 0l0 13.59375l-1.84375 0l-7.1406403 -10.6875l0 10.6875l-1.71875 0zm12.644821 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm15.672607 4.921875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm16.016327 1.75l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875z" fill-rule="nonzero"></path><path fill="#d9d9d9" d="m279.0 251.6357l0 0c0 -14.359695 11.640839 -26.000519 26.000519 -26.000519l239.99893 0c6.895813 0 13.509155 2.7393188 18.385193 7.6153717c4.8760376 4.8760376 7.6153564 11.48938 7.6153564 18.385147l0 103.99895c0 14.359711 -11.640808 26.000519 -26.00055 26.000519l-239.99893 0c-14.35968 0 -26.000519 -11.640808 -26.000519 -26.000519z" fill-rule="evenodd"></path><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m279.0 251.6357l0 0c0 -14.359695 11.640839 -26.000519 26.000519 -26.000519l239.99893 0c6.895813 0 13.509155 2.7393188 18.385193 7.6153717c4.8760376 4.8760376 7.6153564 11.48938 7.6153564 18.385147l0 103.99895c0 14.359711 -11.640808 26.000519 -26.00055 26.000519l-239.99893 0c-14.35968 0 -26.000519 -11.640808 -26.000519 -26.000519z" fill-rule="evenodd"></path><path fill="#000000" d="m386.8977 310.55518l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm14.161621 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm3.406952 2.0l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 2.9375l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0zm9.453125 0l0 -13.59375l5.109375 0q1.546875 0 2.484375 0.40625q0.953125 0.40625 1.484375 1.265625q0.53125 0.859375 0.53125 1.796875q0 0.875 -0.46875 1.65625q-0.46875 0.765625 -1.4375 1.234375q1.234375 0.359375 1.890625 1.234375q0.671875 0.875 0.671875 2.0625q0 0.953125 -0.40625 1.78125q-0.390625 0.8125 -0.984375 1.265625q-0.59375 0.4375 -1.5 0.671875q-0.890625 0.21875 -2.1875 0.21875l-5.1875 0zm1.796875 -7.890625l2.9375 0q1.203125 0 1.71875 -0.15625q0.6875 -0.203125 1.03125 -0.671875q0.359375 -0.46875 0.359375 -1.1875q0 -0.671875 -0.328125 -1.1875q-0.328125 -0.515625 -0.9375 -0.703125q-0.59375 -0.203125 -2.0625 -0.203125l-2.71875 0l0 4.109375zm0 6.28125l3.390625 0q0.875 0 1.21875 -0.0625q0.625 -0.109375 1.046875 -0.359375q0.421875 -0.265625 0.6875 -0.765625q0.265625 -0.5 0.265625 -1.140625q0 -0.765625 -0.390625 -1.328125q-0.390625 -0.5625 -1.078125 -0.78125q-0.6875 -0.234375 -1.984375 -0.234375l-3.15625 0l0 4.671875zm10.474823 1.609375l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm3.5510864 -4.921875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm15.719452 1.3125l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm2.90625 3.609375l0 -13.59375l1.671875 0l0 7.75l3.953125 -4.015625l2.15625 0l-3.765625 3.65625l4.140625 6.203125l-2.0625 0l-3.25 -5.03125l-1.171875 1.125l0 3.90625l-1.671875 0z" fill-rule="nonzero"></path></g></svg>
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/quick/quickstart.rst:
--------------------------------------------------------------------------------
 1 | Quickstart
 2 | ==========
 3 | 
 4 | Libsubmit is an adapter to a variety of computational resources such as Clouds, Campus Clusters and Supercomputers. This python-module is designed to simplify and expose
 5 | a uniform interface to seemingly diverse class of resource schedulers. This library
 6 | originated from Parsl: Parallel scripting library and is designed to bring dynamic
 7 | resource management capabilities to it.
 8 | 
 9 | 
10 | Installing
11 | ----------
12 | 
13 | Libsubmit is now available on PyPI, but first make sure you have Python3.5+
14 | 
15 |    >>> python3 --version
16 | 
17 | 
18 | Installing on Linux
19 | ^^^^^^^^^^^^^^^^^^^
20 | 
21 | 1. Install Libsubmit::
22 | 
23 |      $ python3 -m pip install libsumit
24 | 
25 | 
26 | 2. Libsubmit supports a variety of computation resource via specific libraries. You might only need a subset of these, which can be installed by specifying the resources names::
27 | 
28 |      $ python3 -m pip install libsumit[<aws>,<azure>,<jetstream>]
29 | 
30 | 
31 | Installing on Mac OS
32 | ^^^^^^^^^^^^^^^^^^^^
33 | 
34 | 1. Install Conda and setup python3.6 following instructions `here <https://conda.io/docs/user-guide/install/macos.html>`_::
35 | 
36 |      $ conda create --name libsubmit_py36 python=3.6
37 |      $ source activate libsubmit_py36
38 | 
39 | 2. Install Libsubnmit::
40 | 
41 |      $ python3 -m pip install libsubmit[<optional_packages...>]
42 | 
43 | 
44 | For Developers
45 | --------------
46 | 
47 | 1. Download Libsubmit::
48 | 
49 |     $ git clone https://github.com/Parsl/libsubmit
50 | 
51 | 2. Install::
52 | 
53 |     $ cd libsubmit
54 |     $ python3 setup.py install
55 | 
56 | 3. Use Libsubmit!
57 | 
58 | Requirements
59 | ============
60 | 
61 | Libsubmit requires the following :
62 | 
63 | * Python 3.5+
64 | * paramiko
65 | * ipyparallel
66 | * boto3 - for AWS
67 | * azure, haikunator - for Azure
68 | * python-novaclient - for jetstream
69 | 
70 | For testing:
71 | 
72 | * nose
73 | * coverage
74 | 
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
 1 | Reference guide
 2 | ***************
 3 | 
 4 | .. autosummary::
 5 |     :toctree: stubs
 6 |     :nosignatures:
 7 | 
 8 |     libsubmit.channels.local.local.LocalChannel
 9 |     libsubmit.channels.ssh.ssh.SshChannel
10 |     libsubmit.providers.aws.aws.EC2Provider
11 |     libsubmit.providers.azureProvider.azureProvider.AzureProvider
12 |     libsubmit.providers.cobalt.cobalt.Cobalt
13 |     libsubmit.providers.condor.condor.Condor
14 |     libsubmit.providers.googlecloud.googlecloud.GoogleCloud
15 |     libsubmit.providers.gridEngine.gridEngine.GridEngine
16 |     libsubmit.providers.jetstream.jetstream.Jetstream
17 |     libsubmit.providers.local.local.Local
18 |     libsubmit.providers.sge.sge.GridEngine
19 |     libsubmit.providers.slurm.slurm.Slurm
20 |     libsubmit.providers.torque.torque.Torque
21 |     libsubmit.providers.provider_base.ExecutionProvider
22 | 


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.aws.aws.EC2Provider.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.aws.aws.EC2Provider
 2 | =======================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.aws.aws
 5 | 
 6 | .. autoclass:: EC2Provider
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~EC2Provider.__init__
17 |       ~EC2Provider.cancel
18 |       ~EC2Provider.config_route_table
19 |       ~EC2Provider.create_session
20 |       ~EC2Provider.create_vpc
21 |       ~EC2Provider.get_instance_state
22 |       ~EC2Provider.goodbye
23 |       ~EC2Provider.initialize_boto_client
24 |       ~EC2Provider.ipyparallel_configuration
25 |       ~EC2Provider.read_state_file
26 |       ~EC2Provider.security_group
27 |       ~EC2Provider.show_summary
28 |       ~EC2Provider.shut_down_instance
29 |       ~EC2Provider.spin_up_instance
30 |       ~EC2Provider.status
31 |       ~EC2Provider.submit
32 |       ~EC2Provider.teardown
33 |       ~EC2Provider.write_state_file
34 |       ~EC2Provider.xstr
35 |    
36 |    
37 | 
38 |    
39 |    
40 |    .. rubric:: Attributes
41 | 
42 |    .. autosummary::
43 |    
44 |       ~EC2Provider.channels_required
45 |       ~EC2Provider.current_capacity
46 |       ~EC2Provider.scaling_enabled
47 |    
48 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.cobalt.cobalt.Cobalt.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.cobalt.cobalt.Cobalt
 2 | ========================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.cobalt.cobalt
 5 | 
 6 | .. autoclass:: Cobalt
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Cobalt.__init__
17 |       ~Cobalt.cancel
18 |       ~Cobalt.status
19 |       ~Cobalt.submit
20 |    
21 |    
22 | 
23 |    
24 |    
25 |    .. rubric:: Attributes
26 | 
27 |    .. autosummary::
28 |    
29 |       ~Cobalt.channels_required
30 |       ~Cobalt.current_capacity
31 |       ~Cobalt.scaling_enabled
32 |    
33 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.condor.condor.Condor.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.condor.condor.Condor
 2 | ========================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.condor.condor
 5 | 
 6 | .. autoclass:: Condor
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Condor.__init__
17 |       ~Condor.cancel
18 |       ~Condor.status
19 |       ~Condor.submit
20 |    
21 |    
22 | 
23 |    
24 |    
25 |    .. rubric:: Attributes
26 | 
27 |    .. autosummary::
28 |    
29 |       ~Condor.channels_required
30 |       ~Condor.current_capacity
31 |       ~Condor.scaling_enabled
32 |    
33 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.googlecloud.googlecloud.GoogleCloud.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.googlecloud.googlecloud.GoogleCloud
 2 | =======================================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.googlecloud.googlecloud
 5 | 
 6 | .. autoclass:: GoogleCloud
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~GoogleCloud.__init__
17 |       ~GoogleCloud.bye
18 |       ~GoogleCloud.cancel
19 |       ~GoogleCloud.create_instance
20 |       ~GoogleCloud.delete_instance
21 |       ~GoogleCloud.get_correct_zone
22 |       ~GoogleCloud.status
23 |       ~GoogleCloud.submit
24 |    
25 |    
26 | 
27 |    
28 |    
29 |    .. rubric:: Attributes
30 | 
31 |    .. autosummary::
32 |    
33 |       ~GoogleCloud.channels_required
34 |       ~GoogleCloud.current_capacity
35 |       ~GoogleCloud.scaling_enabled
36 |    
37 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.gridEngine.gridEngine.GridEngine.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.gridEngine.gridEngine.GridEngine
 2 | ====================================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.gridEngine.gridEngine
 5 | 
 6 | .. autoclass:: GridEngine
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~GridEngine.__init__
17 |       ~GridEngine.cancel
18 |       ~GridEngine.execute_wait
19 |       ~GridEngine.get_configs
20 |       ~GridEngine.status
21 |       ~GridEngine.submit
22 |    
23 |    
24 | 
25 |    
26 |    
27 |    .. rubric:: Attributes
28 | 
29 |    .. autosummary::
30 |    
31 |       ~GridEngine.channels_required
32 |       ~GridEngine.current_capacity
33 |       ~GridEngine.scaling_enabled
34 |    
35 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.jetstream.jetstream.Jetstream.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.jetstream.jetstream.Jetstream
 2 | =================================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.jetstream.jetstream
 5 | 
 6 | .. autoclass:: Jetstream
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Jetstream.__init__
17 |       ~Jetstream.scale_in
18 |       ~Jetstream.scale_out
19 |    
20 |    
21 | 
22 |    
23 |    
24 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.local.local.Local.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.local.local.Local
 2 | =====================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.local.local
 5 | 
 6 | .. autoclass:: Local
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Local.__init__
17 |       ~Local.cancel
18 |       ~Local.status
19 |       ~Local.submit
20 |    
21 |    
22 | 
23 |    
24 |    
25 |    .. rubric:: Attributes
26 | 
27 |    .. autosummary::
28 |    
29 |       ~Local.channels_required
30 |       ~Local.current_capacity
31 |       ~Local.scaling_enabled
32 |    
33 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.provider_base.ExecutionProvider.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.provider\_base.ExecutionProvider
 2 | ====================================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.provider_base
 5 | 
 6 | .. autoclass:: ExecutionProvider
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~ExecutionProvider.cancel
17 |       ~ExecutionProvider.status
18 |       ~ExecutionProvider.submit
19 |    
20 |    
21 | 
22 |    
23 |    
24 |    .. rubric:: Attributes
25 | 
26 |    .. autosummary::
27 |    
28 |       ~ExecutionProvider.channels_required
29 |       ~ExecutionProvider.scaling_enabled
30 |    
31 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.slurm.slurm.Slurm.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.slurm.slurm.Slurm
 2 | =====================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.slurm.slurm
 5 | 
 6 | .. autoclass:: Slurm
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Slurm.__init__
17 |       ~Slurm.cancel
18 |       ~Slurm.execute_wait
19 |       ~Slurm.get_configs
20 |       ~Slurm.status
21 |       ~Slurm.submit
22 |    
23 |    
24 | 
25 |    
26 |    
27 |    .. rubric:: Attributes
28 | 
29 |    .. autosummary::
30 |    
31 |       ~Slurm.channels_required
32 |       ~Slurm.current_capacity
33 |       ~Slurm.scaling_enabled
34 |    
35 |    


--------------------------------------------------------------------------------
/docs/stubs/libsubmit.providers.torque.torque.Torque.rst:
--------------------------------------------------------------------------------
 1 | libsubmit.providers.torque.torque.Torque
 2 | ========================================
 3 | 
 4 | .. currentmodule:: libsubmit.providers.torque.torque
 5 | 
 6 | .. autoclass:: Torque
 7 | 
 8 |    
 9 |    .. automethod:: __init__
10 | 
11 |    
12 |    .. rubric:: Methods
13 | 
14 |    .. autosummary::
15 |    
16 |       ~Torque.__init__
17 |       ~Torque.cancel
18 |       ~Torque.status
19 |       ~Torque.submit
20 |    
21 |    
22 | 
23 |    
24 |    
25 |    .. rubric:: Attributes
26 | 
27 |    .. autosummary::
28 |    
29 |       ~Torque.channels_required
30 |       ~Torque.current_capacity
31 |       ~Torque.scaling_enabled
32 |    
33 |    


--------------------------------------------------------------------------------
/docs/userguide/configuring.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | =============
 3 | 
 4 | The primary mode by which you interact with libsubmit is by instantiating an ExecutionProvider
 5 | with a configuration data structure and optional Channel objects if the ExecutionProvider requires it.
 6 | 
 7 | The configuration datastructure expected by an ExecutionProvider as well as options specifics are
 8 | described below.
 9 | 
10 | The config structure looks like this:
11 | 
12 | .. code-block:: python
13 | 
14 |    config = { "poolName" : <string: Name of the pool>,
15 |               "provider" : <string: Name of provider>,
16 |               "scriptDir" : <string: Path to script directory>,
17 |               "minBlocks" : <int: Minimum number of blocks>,
18 |               "maxBlocks" : <int: Maximum number of blocks>,
19 |               "initBlocks" : <int: Initial number of blocks>,
20 |               "block" : {     # Specify the shape of the block
21 |                   "nodes" : <int: Number of blocs, integer>,
22 |                   "taskBlocks" : <int: Number of task blocks in each block>,
23 |                   "walltime" : <string: walltime in HH:MM:SS format for the block>
24 |                   "options" : { # These are provider specific options
25 |                        "partition" : <string: Name of partition/queue>,
26 |                        "account" : <string: Account id>,
27 |                        "overrides" : <string: String to override and specify options to scheduler>
28 |                   }
29 |             }
30 | 


--------------------------------------------------------------------------------
/docs/userguide/index.rst:
--------------------------------------------------------------------------------
1 | User guide
2 | ==========
3 | 
4 | .. toctree::
5 |    :maxdepth: 5
6 | 
7 |    overview
8 |    configuring
9 | 


--------------------------------------------------------------------------------
/docs/userguide/overview.rst:
--------------------------------------------------------------------------------
1 | Overview
2 | ========
3 | 
4 | Under construction. Please refer to the developer documentation as this section
5 | is being built.
6 | 
7 | 


--------------------------------------------------------------------------------
/libsubmit/__init__.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Libsubmit
  3 | =========
  4 | 
  5 | Uniform interface to diverse and multi-lingual set of computational resources.
  6 | 
  7 | '''
  8 | import logging
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | from libsubmit.version import VERSION
 12 | from libsubmit.providers import LocalProvider
 13 | 
 14 | from libsubmit.providers import CobaltProvider
 15 | from libsubmit.providers import CondorProvider
 16 | from libsubmit.providers import GridEngineProvider
 17 | from libsubmit.providers import SlurmProvider
 18 | from libsubmit.providers import TorqueProvider
 19 | 
 20 | from libsubmit.providers import AWSProvider
 21 | from libsubmit.providers import AzureProvider
 22 | from libsubmit.providers import GoogleCloudProvider
 23 | from libsubmit.providers import JetstreamProvider
 24 | 
 25 | from libsubmit.providers import KubernetesProvider
 26 | 
 27 | from libsubmit.channels import SSHChannel
 28 | from libsubmit.channels import SSHInteractiveLoginChannel
 29 | from libsubmit.channels import LocalChannel
 30 | 
 31 | from libsubmit.launchers import SimpleLauncher, SingleNodeLauncher, SrunLauncher, \
 32 |     AprunLauncher, SrunMPILauncher, AprunLauncher
 33 | 
 34 | 
 35 | __author__ = 'Yadu Nand Babuji'
 36 | __version__ = VERSION
 37 | 
 38 | __all__ = ['LocalProvider',
 39 |            'CobaltProvider',
 40 |            'CondorProvider',
 41 |            'GridEngineProvider',
 42 |            'SlurmProvider',
 43 |            'TorqueProvider',
 44 |            'AWSProvider',
 45 |            'AzureProvider',
 46 |            'GoogleCloudProvider',
 47 |            'JetstreamProvider',
 48 |            'KubernetesProvider',
 49 |            'LocalChannel',
 50 |            'SSHChannel',
 51 |            'SSHInteractiveLoginChannel',
 52 |            'SimpleLauncher',
 53 |            'SingleNodeLauncher',
 54 |            'SrunLauncher',
 55 |            'AprunLauncher',
 56 |            'SrunMPILauncher',
 57 |            'AprunLauncher']
 58 | 
 59 | 
 60 | def set_stream_logger(name='libsubmit', level=logging.DEBUG, format_string=None):
 61 |     '''
 62 |     Add a stream log handler
 63 | 
 64 |     Args:
 65 |          - name (string) : Set the logger name.
 66 |          - level (logging.LEVEL) : Set to logging.DEBUG by default.
 67 |          - format_string (sting) : Set to None by default.
 68 | 
 69 |     Returns:
 70 |          - None
 71 |     '''
 72 | 
 73 |     if format_string is None:
 74 |         format_string = "%(asctime)s %(name)s [%(levelname)s]  %(message)s"
 75 | 
 76 |     logger = logging.getLogger(name)
 77 |     logger.setLevel(level)
 78 |     handler = logging.StreamHandler()
 79 |     handler.setLevel(level)
 80 |     formatter = logging.Formatter(format_string)
 81 |     handler.setFormatter(formatter)
 82 |     logger.addHandler(handler)
 83 | 
 84 | 
 85 | def set_file_logger(filename, name='libsubmit', level=logging.DEBUG, format_string=None):
 86 |     ''' Add a stream log handler
 87 | 
 88 |     Args:
 89 |         - filename (string): Name of the file to write logs to
 90 |         - name (string): Logger name
 91 |         - level (logging.LEVEL): Set the logging level.
 92 |         - format_string (string): Set the format string
 93 | 
 94 |     Returns:
 95 |        -  None
 96 |     '''
 97 | 
 98 |     if format_string is None:
 99 |         format_string = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
100 | 
101 |     logger = logging.getLogger(name)
102 |     logger.setLevel(level)
103 |     handler = logging.FileHandler(filename)
104 |     handler.setLevel(level)
105 |     formatter = logging.Formatter(format_string)
106 |     handler.setFormatter(formatter)
107 |     logger.addHandler(handler)
108 | 
109 | 
110 | class NullHandler(logging.Handler):
111 |     ''' Setup default logging to /dev/null since this is library.
112 | 
113 |     '''
114 | 
115 |     def emit(self, record):
116 |         pass
117 | 
118 | 
119 | logging.getLogger('libsubmit').addHandler(NullHandler())
120 | 


--------------------------------------------------------------------------------
/libsubmit/channels/__init__.py:
--------------------------------------------------------------------------------
1 | from libsubmit.channels.ssh.ssh import SSHChannel
2 | from libsubmit.channels.local.local import LocalChannel
3 | from libsubmit.channels.ssh_il.ssh_il import SSHInteractiveLoginChannel
4 | 
5 | __all__ = ['SSHChannel', 'LocalChannel', 'SSHInteractiveLoginChannel']
6 | 


--------------------------------------------------------------------------------
/libsubmit/channels/channel_base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod, abstractproperty
 2 | 
 3 | 
 4 | class Channel(metaclass=ABCMeta):
 5 |     """ Define the interface to all channels. Channels are usually called via the execute_wait function.
 6 |     For channels that execute remotely, a push_file function allows you to copy over files.
 7 | 
 8 |     .. code:: python
 9 | 
10 |                                 +------------------
11 |                                 |
12 |           cmd, wtime    ------->|  execute_wait
13 |           (ec, stdout, stderr)<-|---+
14 |                                 |
15 |           cmd, wtime    ------->|  execute_no_wait
16 |           (ec, stdout, stderr)<-|---+
17 |                                 |
18 |           src, dst_dir  ------->|  push_file
19 |              dst_path  <--------|----+
20 |                                 |
21 |           dst_script_dir <------|  script_dir
22 |                                 |
23 |                                 +-------------------
24 | 
25 |     """
26 | 
27 |     @abstractmethod
28 |     def execute_wait(self, cmd, walltime, envs={}, *args, **kwargs):
29 |         ''' Executes the cmd, with a defined walltime.
30 | 
31 |         Args:
32 |             - cmd (string): Command string to execute over the channel
33 |             - walltime (int) : Timeout in seconds
34 | 
35 |         KWargs:
36 |             - envs (dict) : Environment variables to push to the remote side
37 | 
38 |         Returns:
39 |             - (exit_code, stdout, stderr) (int, string, string)
40 |         '''
41 |         pass
42 | 
43 |     @abstractproperty
44 |     def script_dir(self):
45 |         ''' This is a property. Returns the directory assigned for storing all internal scripts such as
46 |         scheduler submit scripts. This is usually where error logs from the scheduler would reside on the
47 |         channel destination side.
48 | 
49 |         Args:
50 |             - None
51 | 
52 |         Returns:
53 |             - Channel script dir
54 |         '''
55 |         pass
56 | 
57 |     @abstractmethod
58 |     def execute_no_wait(self, cmd, walltime, envs={}, *args, **kwargs):
59 |         ''' Optional. THis is infrequently used.
60 | 
61 |         Args:
62 |             - cmd (string): Command string to execute over the channel
63 |             - walltime (int) : Timeout in seconds
64 | 
65 |         KWargs:
66 |             - envs (dict) : Environment variables to push to the remote side
67 | 
68 |         Returns:
69 |             - (exit_code(None), stdout, stderr) (int, io_thing, io_thing)
70 |         '''
71 |         pass
72 | 
73 |     @abstractmethod
74 |     def push_file(self, source, dest_dir):
75 |         ''' Channel will take care of moving the file from source to the destination
76 |         directory
77 | 
78 |         Args:
79 |             source (string) : Full filepath of the file to be moved
80 |             dest_dir (string) : Absolute path of the directory to move to
81 | 
82 |         Returns:
83 |             destination_path (string)
84 |         '''
85 |         pass
86 | 
87 |     @abstractmethod
88 |     def close(self):
89 |         ''' Closes the channel. Clean out any auth credentials.
90 | 
91 |         Args:
92 |             None
93 | 
94 |         Returns:
95 |             Bool
96 | 
97 |         '''
98 |         pass
99 | 


--------------------------------------------------------------------------------
/libsubmit/channels/errors.py:
--------------------------------------------------------------------------------
  1 | ''' Exceptions raise by Apps.
  2 | '''
  3 | 
  4 | 
  5 | class ChannelError(Exception):
  6 |     """ Base class for all exceptions
  7 | 
  8 |     Only to be invoked when only a more specific error is not available.
  9 |     """
 10 |     def __repr__(self):
 11 |         return "Hostname:{0}, Reason:{1}".format(self.hostname, self.reason)
 12 | 
 13 |     def __str__(self):
 14 |         return self.__repr__()
 15 | 
 16 | 
 17 | class BadHostKeyException(ChannelError):
 18 |     ''' SSH channel could not be created since server's host keys could not
 19 |     be verified
 20 | 
 21 |     Contains:
 22 |     reason(string)
 23 |     e (paramiko exception object)
 24 |     hostname (string)
 25 |     '''
 26 | 
 27 |     def __init__(self, e, hostname):
 28 |         super().__init__()
 29 |         self.reason = "SSH channel could not be created since server's host keys could not be verified"
 30 |         self.hostname = hostname
 31 |         self.e = e
 32 | 
 33 | 
 34 | class BadScriptPath(ChannelError):
 35 |     ''' An error raised during execution of an app.
 36 |     What this exception contains depends entirely on context
 37 |     Contains:
 38 |     reason(string)
 39 |     e (paramiko exception object)
 40 |     hostname (string)
 41 |     '''
 42 | 
 43 |     def __init__(self, e, hostname):
 44 |         super().__init__()
 45 |         self.reason = "Inaccessible remote script dir. Specify script_dir"
 46 |         self.hostname = hostname
 47 |         self.e = e
 48 | 
 49 | 
 50 | class BadPermsScriptPath(ChannelError):
 51 |     ''' User does not have permissions to access the script_dir on the remote site
 52 | 
 53 |     Contains:
 54 |     reason(string)
 55 |     e (paramiko exception object)
 56 |     hostname (string)
 57 |     '''
 58 | 
 59 |     def __init__(self, e, hostname):
 60 |         super().__init__()
 61 |         self.reason = "User does not have permissions to access the script_dir"
 62 |         self.hostname = hostname
 63 |         self.e = e
 64 | 
 65 | 
 66 | class FileExists(ChannelError):
 67 |     ''' Push or pull of file over channel fails since a file of the name already
 68 |     exists on the destination.
 69 | 
 70 |     Contains:
 71 |     reason(string)
 72 |     e (paramiko exception object)
 73 |     hostname (string)
 74 |     '''
 75 | 
 76 |     def __init__(self, e, hostname, filename=None):
 77 |         super().__init__()
 78 |         self.reason = "File name collision in channel transport phase:" + filename
 79 |         self.hostname = hostname
 80 |         self.e = e
 81 | 
 82 | 
 83 | class AuthException(ChannelError):
 84 |     ''' An error raised during execution of an app.
 85 |     What this exception contains depends entirely on context
 86 |     Contains:
 87 |     reason(string)
 88 |     e (paramiko exception object)
 89 |     hostname (string)
 90 |     '''
 91 | 
 92 |     def __init__(self, e, hostname):
 93 |         super().__init__()
 94 |         self.reason = "Authentication to remote server failed"
 95 |         self.hostname = hostname
 96 |         self.e = e
 97 | 
 98 | 
 99 | class SSHException(ChannelError):
100 |     ''' if there was any other error connecting or establishing an SSH session
101 | 
102 |     Contains:
103 |     reason(string)
104 |     e (paramiko exception object)
105 |     hostname (string)
106 |     '''
107 | 
108 |     def __init__(self, e, hostname):
109 |         super().__init__()
110 |         self.reason = "Error connecting or establishing an SSH session"
111 |         self.hostname = hostname
112 |         self.e = e
113 | 
114 | 
115 | class FileCopyException(ChannelError):
116 |     ''' File copy operation failed
117 | 
118 |     Contains:
119 |     reason(string)
120 |     e (paramiko exception object)
121 |     hostname (string)
122 |     '''
123 | 
124 |     def __init__(self, e, hostname):
125 |         super().__init__()
126 |         self.reason = "File copy failed due to {0}".format(e)
127 |         self.hostname = hostname
128 |         self.e = e
129 | 


--------------------------------------------------------------------------------
/libsubmit/channels/local/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/channels/local/__init__.py


--------------------------------------------------------------------------------
/libsubmit/channels/local/local.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import errno
  3 | import logging
  4 | import os
  5 | import shutil
  6 | import subprocess
  7 | 
  8 | from libsubmit.channels.channel_base import Channel
  9 | from libsubmit.channels.errors import *
 10 | from libsubmit.utils import RepresentationMixin
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class LocalChannel(Channel, RepresentationMixin):
 16 |     ''' This is not even really a channel, since opening a local shell is not heavy
 17 |     and done so infrequently that they do not need a persistent channel
 18 |     '''
 19 | 
 20 |     def __init__(self, userhome=".", envs={}, script_dir="./.scripts", **kwargs):
 21 |         ''' Initialize the local channel. script_dir is required by set to a default.
 22 | 
 23 |         KwArgs:
 24 |             - userhome (string): (default='.') This is provided as a way to override and set a specific userhome
 25 |             - envs (dict) : A dictionary of env variables to be set when launching the shell
 26 |             - script_dir (string): (default="./.scripts") Directory to place scripts
 27 |         '''
 28 |         self.userhome = os.path.abspath(userhome)
 29 |         self.hostname = "localhost"
 30 |         self.envs = envs
 31 |         local_env = os.environ.copy()
 32 |         self._envs = copy.deepcopy(local_env)
 33 |         self._envs.update(envs)
 34 |         self._script_dir = os.path.abspath(script_dir)
 35 |         try:
 36 |             os.makedirs(self._script_dir)
 37 |         except OSError as e:
 38 |             if e.errno != errno.EEXIST:
 39 |                 logger.error("Failed to create script_dir : {0}".format(script_dir))
 40 |                 raise BadScriptPath(e, self.hostname)
 41 | 
 42 |     @property
 43 |     def script_dir(self):
 44 |         return self._script_dir
 45 | 
 46 |     def execute_wait(self, cmd, walltime, envs={}):
 47 |         ''' Synchronously execute a commandline string on the shell.
 48 | 
 49 |         Args:
 50 |             - cmd (string) : Commandline string to execute
 51 |             - walltime (int) : walltime in seconds, this is not really used now.
 52 | 
 53 |         Kwargs:
 54 |             - envs (dict) : Dictionary of env variables. This will be used
 55 |               to override the envs set at channel initialization.
 56 | 
 57 |         Returns:
 58 |             - retcode : Return code from the execution, -1 on fail
 59 |             - stdout  : stdout string
 60 |             - stderr  : stderr string
 61 | 
 62 |         Raises:
 63 |         None.
 64 |         '''
 65 |         retcode = -1
 66 |         stdout = None
 67 |         stderr = None
 68 | 
 69 |         current_env = copy.deepcopy(self._envs)
 70 |         current_env.update(envs)
 71 | 
 72 |         try:
 73 |             proc = subprocess.Popen(
 74 |                 cmd,
 75 |                 stdout=subprocess.PIPE,
 76 |                 stderr=subprocess.PIPE,
 77 |                 cwd=self.userhome,
 78 |                 env=current_env,
 79 |                 shell=True
 80 |             )
 81 |             proc.wait(timeout=walltime)
 82 |             stdout = proc.stdout.read()
 83 |             stderr = proc.stderr.read()
 84 |             retcode = proc.returncode
 85 | 
 86 |         except Exception as e:
 87 |             print("Caught exception : {0}".format(e))
 88 |             logger.warn("Execution of command [%s] failed due to \n %s ", cmd, e)
 89 |             # Set retcode to non-zero so that this can be handled in the provider.
 90 |             if retcode == 0:
 91 |                 retcode = -1
 92 |             return (retcode, None, None)
 93 | 
 94 |         return (retcode, stdout.decode("utf-8"), stderr.decode("utf-8"))
 95 | 
 96 |     def execute_no_wait(self, cmd, walltime, envs={}):
 97 |         ''' Synchronously execute a commandline string on the shell.
 98 | 
 99 |         Args:
100 |             - cmd (string) : Commandline string to execute
101 |             - walltime (int) : walltime in seconds, this is not really used now.
102 | 
103 |         Returns:
104 | 
105 |            - retcode : Return code from the execution, -1 on fail
106 |            - stdout  : stdout string
107 |            - stderr  : stderr string
108 | 
109 |         Raises:
110 |          None.
111 |         '''
112 |         current_env = copy.deepcopy(self._envs)
113 |         current_env.update(envs)
114 | 
115 |         try:
116 |             proc = subprocess.Popen(
117 |                 cmd,
118 |                 stdout=subprocess.PIPE,
119 |                 stderr=subprocess.PIPE,
120 |                 cwd=self.userhome,
121 |                 env=current_env,
122 |                 shell=True,
123 |                 preexec_fn=os.setpgrp
124 |             )
125 |             pid = proc.pid
126 | 
127 |         except Exception as e:
128 |             print("Caught exception : {0}".format(e))
129 |             logger.warn("Execution of command [%s] failed due to \n %s ", (cmd, e))
130 | 
131 |         return pid, proc
132 | 
133 |     def push_file(self, source, dest_dir):
134 |         ''' If the source files dirpath is the same as dest_dir, a copy
135 |         is not necessary, and nothing is done. Else a copy is made.
136 | 
137 |         Args:
138 |             - source (string) : Path to the source file
139 |             - dest_dir (string) : Path to the directory to which the files is to be copied
140 | 
141 |         Returns:
142 |             - destination_path (String) : Absolute path of the destination file
143 | 
144 |         Raises:
145 |             - FileCopyException : If file copy failed.
146 |         '''
147 | 
148 |         local_dest = dest_dir + '/' + os.path.basename(source)
149 | 
150 |         # Only attempt to copy if the target dir and source dir are different
151 |         if os.path.dirname(source) != dest_dir:
152 |             try:
153 |                 shutil.copyfile(source, local_dest)
154 |                 os.chmod(local_dest, 0o777)
155 | 
156 |             except OSError as e:
157 |                 raise FileCopyException(e, self.hostname)
158 | 
159 |         return local_dest
160 | 
161 |     def close(self):
162 |         ''' There's nothing to close here, and this really doesn't do anything
163 | 
164 |         Returns:
165 |              - False, because it really did not "close" this channel.
166 |         '''
167 |         return False
168 | 


--------------------------------------------------------------------------------
/libsubmit/channels/ssh/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/channels/ssh/__init__.py


--------------------------------------------------------------------------------
/libsubmit/channels/ssh/ssh.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | import getpass
  3 | import logging
  4 | import os
  5 | 
  6 | import paramiko
  7 | from libsubmit.channels.errors import *
  8 | from libsubmit.utils import RepresentationMixin
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class SSHChannel(RepresentationMixin):
 14 |     ''' SSH persistent channel. This enables remote execution on sites
 15 |     accessible via ssh. It is assumed that the user has setup host keys
 16 |     so as to ssh to the remote host. Which goes to say that the following
 17 |     test on the commandline should work :
 18 | 
 19 |     >>> ssh <username>@<hostname>
 20 | 
 21 |     '''
 22 | 
 23 |     def __init__(self, hostname, username=None, password=None, script_dir=None, envs=None, **kwargs):
 24 |         ''' Initialize a persistent connection to the remote system.
 25 |         We should know at this point whether ssh connectivity is possible
 26 | 
 27 |         Args:
 28 |             - hostname (String) : Hostname
 29 | 
 30 |         KWargs:
 31 |             - username (string) : Username on remote system
 32 |             - password (string) : Password for remote system
 33 |             - script_dir (string) : Full path to a script dir where
 34 |               generated scripts could be sent to.
 35 |             - envs (dict) : A dictionary of environment variables to be set when executing commands
 36 | 
 37 |         Raises:
 38 |         '''
 39 | 
 40 |         self.hostname = hostname
 41 |         self.username = username
 42 |         self.password = password
 43 |         self.kwargs = kwargs
 44 | 
 45 |         self.ssh_client = paramiko.SSHClient()
 46 |         self.ssh_client.load_system_host_keys()
 47 |         self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
 48 | 
 49 |         if script_dir:
 50 |             self._script_dir = script_dir
 51 |         else:
 52 |             self._script_dir = "/tmp/{0}/scripts/".format(getpass.getuser())
 53 | 
 54 |         self.envs = {}
 55 |         if envs is not None:
 56 |             self.envs = envs
 57 | 
 58 |         try:
 59 |             self.ssh_client.connect(
 60 |                 hostname,
 61 |                 username=username,
 62 |                 password=password,
 63 |                 allow_agent=True
 64 |             )
 65 |             t = self.ssh_client.get_transport()
 66 |             self.sftp_client = paramiko.SFTPClient.from_transport(t)
 67 | 
 68 |         except paramiko.BadHostKeyException as e:
 69 |             raise BadHostKeyException(e, self.hostname)
 70 | 
 71 |         except paramiko.AuthenticationException as e:
 72 |             raise AuthException(e, self.hostname)
 73 | 
 74 |         except paramiko.SSHException as e:
 75 |             raise SSHException(e, self.hostname)
 76 | 
 77 |         except Exception as e:
 78 |             raise SSHException(e, self.hostname)
 79 | 
 80 |     @property
 81 |     def script_dir(self):
 82 |         return self._script_dir
 83 | 
 84 |     def prepend_envs(self, cmd, env={}):
 85 |         env.update(self.envs)
 86 | 
 87 |         if len(env.keys()) > 0:
 88 |             env_vars = ' '.join(['{}={}'.format(key, value) for key, value in env.items()])
 89 |             return 'env {0} {1}'.format(env_vars, cmd)
 90 |         return cmd
 91 | 
 92 |     def execute_wait(self, cmd, walltime=2, envs={}):
 93 |         ''' Synchronously execute a commandline string on the shell.
 94 | 
 95 |         Args:
 96 |             - cmd (string) : Commandline string to execute
 97 |             - walltime (int) : walltime in seconds, this is not really used now.
 98 | 
 99 |         Kwargs:
100 |             - envs (dict) : Dictionary of env variables
101 | 
102 |         Returns:
103 |             - retcode : Return code from the execution, -1 on fail
104 |             - stdout  : stdout string
105 |             - stderr  : stderr string
106 | 
107 |         Raises:
108 |         None.
109 |         '''
110 | 
111 |         # Execute the command
112 |         stdin, stdout, stderr = self.ssh_client.exec_command(
113 |             self.prepend_envs(cmd, envs), bufsize=-1, timeout=walltime
114 |         )
115 |         # Block on exit status from the command
116 |         exit_status = stdout.channel.recv_exit_status()
117 |         return exit_status, stdout.read().decode("utf-8"), stderr.read().decode("utf-8")
118 | 
119 |     def execute_no_wait(self, cmd, walltime=2, envs={}):
120 |         ''' Execute asynchronousely without waiting for exitcode
121 | 
122 |         Args:
123 |             - cmd (string): Commandline string to be executed on the remote side
124 |             - walltime (int): timeout to exec_command
125 | 
126 |         KWargs:
127 |             - envs (dict): A dictionary of env variables
128 | 
129 |         Returns:
130 |             - None, stdout (readable stream), stderr (readable stream)
131 | 
132 |         Raises:
133 |             - ChannelExecFailed (reason)
134 |         '''
135 | 
136 |         # Execute the command
137 |         stdin, stdout, stderr = self.ssh_client.exec_command(
138 |             self.prepend_envs(cmd, envs), bufsize=-1, timeout=walltime
139 |         )
140 |         # Block on exit status from the command
141 |         return None, stdout, stderr
142 | 
143 |     def push_file(self, local_source, remote_dir):
144 |         ''' Transport a local file to a directory on a remote machine
145 | 
146 |         Args:
147 |             - local_source (string): Path
148 |             - remote_dir (string): Remote path
149 | 
150 |         Returns:
151 |             - str: Path to copied file on remote machine
152 | 
153 |         Raises:
154 |             - BadScriptPath : if script path on the remote side is bad
155 |             - BadPermsScriptPath : You do not have perms to make the channel script dir
156 |             - FileCopyException : FileCopy failed.
157 | 
158 |         '''
159 |         remote_dest = remote_dir + '/' + os.path.basename(local_source)
160 | 
161 |         try:
162 |             self.sftp_client.mkdir(remote_dir)
163 |         except IOError as e:
164 |             if e.errno is None:
165 |                 logger.info(
166 |                     "Copying {0} into existing directory {1}".format(local_source, remote_dir)
167 |                 )
168 |             else:
169 |                 logger.exception("Pushing {0} to {1} failed".format(local_source, remote_dir))
170 |                 if e.errno == 2:
171 |                     raise BadScriptPath(e, self.hostname)
172 |                 elif e.errno == 13:
173 |                     raise BadPermsScriptPath(e, self.hostname)
174 |                 else:
175 |                     logger.exception("File push failed due to SFTP client failure")
176 |                     raise FileCopyException(e, self.hostname)
177 | 
178 |         try:
179 |             self.sftp_client.put(local_source, remote_dest, confirm=True)
180 |             # Set perm because some systems require the script to be executable
181 |             self.sftp_client.chmod(remote_dest, 0o777)
182 |         except Exception as e:
183 |             logger.exception("File push from local source {} to remote destination {} failed".format(
184 |                 local_source, remote_dest))
185 |             raise FileCopyException(e, self.hostname)
186 | 
187 |         return remote_dest
188 | 
189 |     def pull_file(self, remote_source, local_dir):
190 |         ''' Transport file on the remote side to a local directory
191 | 
192 |         Args:
193 |             - remote_source (string): remote_source
194 |             - local_dir (string): Local directory to copy to
195 | 
196 | 
197 |         Returns:
198 |             - str: Local path to file
199 | 
200 |         Raises:
201 |             - FileExists : Name collision at local directory.
202 |             - FileCopyException : FileCopy failed.
203 |         '''
204 | 
205 |         local_dest = local_dir + '/' + os.path.basename(remote_source)
206 | 
207 |         try:
208 |             os.makedirs(local_dir)
209 |         except OSError as e:
210 |             if e.errno != errno.EEXIST:
211 |                 logger.exception("Failed to create script_dir: {0}".format(script_dir))
212 |                 raise BadScriptPath(e, self.hostname)
213 | 
214 |         # Easier to check this than to waste time trying to pull file and
215 |         # realize there's a problem.
216 |         if os.path.exists(local_dest):
217 |             logger.exception("Remote file copy will overwrite a local file:{0}".format(local_dest))
218 |             raise FileExists(None, self.hostname, filename=local_dest)
219 | 
220 |         try:
221 |             self.sftp_client.get(remote_source, local_dest)
222 |         except Exception as e:
223 |             logger.exception("File pull failed")
224 |             raise FileCopyException(e, self.hostname)
225 | 
226 |         return local_dest
227 | 
228 |     def close(self):
229 |         return self.ssh_client.close()
230 | 


--------------------------------------------------------------------------------
/libsubmit/channels/ssh_il/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/channels/ssh_il/__init__.py


--------------------------------------------------------------------------------
/libsubmit/channels/ssh_il/ssh_il.py:
--------------------------------------------------------------------------------
 1 | import getpass
 2 | import logging
 3 | 
 4 | import paramiko
 5 | from libsubmit.channels.ssh.ssh import SSHChannel
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class SSHInteractiveLoginChannel(SSHChannel):
11 |     """SSH persistent channel. This enables remote execution on sites
12 |     accessible via ssh. This channel supports interactive login and is appropriate when
13 |     keys are not set up.
14 |     """
15 | 
16 |     def __init__(self, hostname, username=None, password=None, script_dir=None, envs=None, **kwargs):
17 |         ''' Initialize a persistent connection to the remote system.
18 |         We should know at this point whether ssh connectivity is possible
19 | 
20 |         Args:
21 |             - hostname (String) : Hostname
22 | 
23 |         KWargs:
24 |             - username (string) : Username on remote system
25 |             - password (string) : Password for remote system
26 |             - script_dir (string) : Full path to a script dir where
27 |               generated scripts could be sent to.
28 |             - envs (dict) : A dictionary of env variables to be set when executing commands
29 | 
30 |         Raises:
31 |         '''
32 |         self.hostname = hostname
33 |         self.username = username
34 |         self.password = password
35 |         self.kwargs = kwargs
36 | 
37 |         self.ssh_client = paramiko.SSHClient()
38 |         self.ssh_client.load_system_host_keys()
39 |         self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
40 | 
41 |         if script_dir:
42 |             self._script_dir = script_dir
43 |         else:
44 |             self._script_dir = "/tmp/{0}/scripts/".format(getpass.getuser())
45 | 
46 |         self.envs = {}
47 |         if envs is not None:
48 |             self.envs = envs
49 | 
50 |         try:
51 |             self.ssh_client.connect(
52 |                 hostname, username=username, password=password, allow_agent=True
53 |             )
54 | 
55 |         except Exception:
56 |             logger.debug("Caught the SSHException in SSHInteractive")
57 |             pass
58 |         '''
59 |         except paramiko.BadHostKeyException as e:
60 |             raise BadHostKeyException(e, self.hostname)
61 | 
62 |         except paramiko.AuthenticationException as e:
63 |             raise AuthException(e, self.hostname)
64 | 
65 |         except paramiko.SSHException as e:
66 |             logger.debug("Caught the SSHException in SSHInteractive")
67 |             pass
68 | 
69 |         except Exception as e:
70 |             raise SSHException(e, self.hostname)
71 |         '''
72 | 
73 |         transport = self.ssh_client.get_transport()
74 | 
75 |         il_password = getpass.getpass('Enter {0} Logon password :'.format(hostname))
76 |         transport.auth_password(username, il_password)
77 | 
78 |         self.sftp_client = paramiko.SFTPClient.from_transport(transport)
79 | 


--------------------------------------------------------------------------------
/libsubmit/error.py:
--------------------------------------------------------------------------------
 1 | class ConfigurationError(Exception):
 2 |     """Error raised when a class constructor has not been initialized correctly."""
 3 |     pass
 4 | 
 5 | 
 6 | class ExecutionProviderException(Exception):
 7 |     """ Base class for all exceptions
 8 |     Only to be invoked when only a more specific error is not available.
 9 | 
10 |     """
11 |     pass
12 | 
13 | 
14 | class SchedulerMissingArgs(ExecutionProviderException):
15 |     ''' Error raised when the template used to compose the submit script to the local resource manager is missing required arguments
16 |     '''
17 | 
18 |     def __init__(self, missing_keywords, sitename):
19 |         self.missing_keywords = missing_keywords
20 |         self.sitename = sitename
21 | 
22 |     def __repr__(self):
23 |         return "SchedulerMissingArgs: Pool:{0} Arg:{1}".format(self.sitename, self.missing_keywords)
24 | 
25 | 
26 | class ScriptPathError(ExecutionProviderException):
27 |     ''' Error raised when the template used to compose the submit script to the local resource manager is missing required arguments
28 |     '''
29 | 
30 |     def __init__(self, script_path, reason):
31 |         self.script_path = script_path
32 |         self.reason = reason
33 | 
34 |     def __repr__(self):
35 |         return "Unable to write submit script:{0} Reason:{1}".format(self.script_path, self.reason)
36 | 
37 | 
38 | class BadLauncher(ExecutionProviderException):
39 |     ''' Error raised when a non callable object is provider as Launcher
40 |     '''
41 | 
42 |     def __init__(self, launcher, reason):
43 |         self.launcher = launcher
44 |         self.reason = reason
45 | 
46 |     def __repr__(self):
47 |         return "Bad Launcher provided:{0} Reason:{1}".format(self.launcher, self.reason)
48 | 
49 | 
50 | class OptionalModuleMissing(ExecutionProviderException):
51 |     ''' Error raised a required module is missing for a optional/extra provider
52 |     '''
53 | 
54 |     def __init__(self, module_names, reason):
55 |         self.module_names = module_names
56 |         self.reason = reason
57 | 
58 |     def __repr__(self):
59 |         return "Unable to Initialize provider.Missing:{0},  Reason:{1}".format(
60 |             self.module_names, self.reason
61 |         )
62 | 
63 | 
64 | class ChannelRequired(ExecutionProviderException):
65 |     ''' Execution provider requires a channel.
66 |     '''
67 | 
68 |     def __init__(self, provider, reason):
69 |         self.provider = provider
70 |         self.reason = reason
71 | 
72 |     def __repr__(self):
73 |         return "Unable to Initialize provider.Provider:{0}, Reason:{1}".format(
74 |             self.provider, self.reason
75 |         )
76 | 
77 | 
78 | class ScaleOutFailed(ExecutionProviderException):
79 |     ''' Generic catch. Scale out failed in the submit phase on the provider side
80 |     '''
81 | 
82 |     def __init__(self, provider, reason):
83 |         self.provider = provider
84 |         self.reason = reason
85 | 
86 |     def __repr__(self):
87 |         return "Unable to Initialize provider.Provider:{0}, Reason:{1}".format(
88 |             self.provider, self.reason
89 |         )
90 | 


--------------------------------------------------------------------------------
/libsubmit/launchers/__init__.py:
--------------------------------------------------------------------------------
 1 | from libsubmit.launchers.launchers import SimpleLauncher, SingleNodeLauncher, \
 2 |     SrunLauncher, AprunLauncher, SrunMPILauncher, AprunLauncher, \
 3 |     GnuParallelLauncher, MpiExecLauncher
 4 | 
 5 | __all__ = ['SimpleLauncher',
 6 |            'SingleNodeLauncher',
 7 |            'SrunLauncher',
 8 |            'AprunLauncher',
 9 |            'SrunMPILauncher',
10 |            'AprunLauncher',
11 |            'GnuParallelLauncher',
12 |            'MpiExecLauncher']
13 | 


--------------------------------------------------------------------------------
/libsubmit/launchers/launchers.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta, abstractmethod
  2 | 
  3 | from libsubmit.utils import RepresentationMixin
  4 | 
  5 | 
  6 | class Launcher(RepresentationMixin, metaclass=ABCMeta):
  7 |     """ Launcher base class to enforce launcher interface
  8 |     """
  9 |     @abstractmethod
 10 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
 11 |         """ Wraps the command with the Launcher calls.
 12 |         *MUST* be implemented by the concrete child classes
 13 |         """
 14 |         pass
 15 | 
 16 | 
 17 | class SimpleLauncher(Launcher):
 18 |     """ Does no wrapping. Just returns the command as-is
 19 |     """
 20 | 
 21 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
 22 |         """
 23 |         Args:
 24 |         - command (string): The command string to be launched
 25 |         - task_block (string) : bash evaluated string.
 26 | 
 27 |         KWargs:
 28 |         - walltime (int) : This is not used by this launcher.
 29 |         """
 30 |         return command
 31 | 
 32 | 
 33 | class SingleNodeLauncher(Launcher):
 34 |     """ Worker launcher that wraps the user's command with the framework to
 35 |     launch multiple command invocations in parallel. This wrapper sets the
 36 |     bash env variable CORES to the number of cores on the machine. By setting
 37 |     task_blocks to an integer or to a bash expression the number of invocations
 38 |     of the command to be launched can be controlled.
 39 |     """
 40 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
 41 |         """
 42 |         Args:
 43 |         - command (string): The command string to be launched
 44 |         - task_block (string) : bash evaluated string.
 45 | 
 46 |         KWargs:
 47 |         - walltime (int) : This is not used by this launcher.
 48 |         """
 49 |         task_blocks = tasks_per_node * nodes_per_block
 50 | 
 51 |         x = '''export CORES=$(getconf _NPROCESSORS_ONLN)
 52 | echo "Found cores : $CORES"
 53 | WORKERCOUNT={1}
 54 | 
 55 | CMD ( ) {{
 56 | {0}
 57 | }}
 58 | for COUNT in $(seq 1 1 $WORKERCOUNT)
 59 | do
 60 |     echo "Launching worker: $COUNT"
 61 |     CMD &
 62 | done
 63 | wait
 64 | echo "All workers done"
 65 | '''.format(command, task_blocks)
 66 |         return x
 67 | 
 68 | 
 69 | class GnuParallelLauncher(Launcher):
 70 |     """ Worker launcher that wraps the user's command with the framework to
 71 |     launch multiple command invocations via GNU parallel sshlogin.
 72 | 
 73 |     This wrapper sets the bash env variable CORES to the number of cores on the
 74 |     machine.
 75 | 
 76 |     This launcher makes the following assumptions:
 77 |     - GNU parallel is installed and can be located in $PATH
 78 |     - Paswordless SSH login is configured between the controller node and the
 79 |       target nodes.
 80 |     - The provider makes available the $PBS_NODEFILE environment variable
 81 |     """
 82 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
 83 |         """
 84 |         Args:
 85 |         - command (string): The command string to be launched
 86 |         - task_block (string) : bash evaluated string.
 87 | 
 88 |         KWargs:
 89 |         - walltime (int) : This is not used by this launcher.
 90 |         """
 91 |         task_blocks = tasks_per_node * nodes_per_block
 92 | 
 93 |         x = '''export CORES=$(getconf _NPROCESSORS_ONLN)
 94 | echo "Found cores : $CORES"
 95 | WORKERCOUNT={3}
 96 | 
 97 | # Deduplicate the nodefile
 98 | SSHLOGINFILE="$JOBNAME.nodes"
 99 | if [ -z "$PBS_NODEFILE" ]; then
100 |     echo "localhost" > $SSHLOGINFILE
101 | else
102 |     sort -u $PBS_NODEFILE > $SSHLOGINFILE
103 | fi
104 | 
105 | cat << PARALLEL_CMD_EOF > cmd_$JOBNAME.sh
106 | {0}
107 | PARALLEL_CMD_EOF
108 | chmod u+x cmd_$JOBNAME.sh
109 | 
110 | #file to contain the commands to parallel
111 | PFILE=cmd_${{JOBNAME}}.sh.parallel
112 | 
113 | # Truncate the file
114 | cp /dev/null $PFILE
115 | 
116 | for COUNT in $(seq 1 1 $WORKERCOUNT)
117 | do
118 |     echo "sh cmd_$JOBNAME.sh" >> $PFILE
119 | done
120 | 
121 | parallel --env _ --joblog "$JOBNAME.sh.parallel.log" \
122 |     --sshloginfile $SSHLOGINFILE --jobs {1} < $PFILE
123 | 
124 | echo "All workers done"
125 | '''.format(command, tasks_per_node, nodes_per_block, task_blocks)
126 |         return x
127 | 
128 | 
129 | class MpiExecLauncher(Launcher):
130 |     """ Worker launcher that wraps the user's command with the framework to
131 |     launch multiple command invocations via mpiexec.
132 | 
133 |     This wrapper sets the bash env variable CORES to the number of cores on the
134 |     machine.
135 | 
136 |     This launcher makes the following assumptions:
137 |     - mpiexec is installed and can be located in $PATH
138 |     - The provider makes available the $PBS_NODEFILE environment variable
139 |     """
140 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
141 |         """
142 |         Args:
143 |         - command (string): The command string to be launched
144 |         - task_block (string) : bash evaluated string.
145 | 
146 |         KWargs:
147 |         - walltime (int) : This is not used by this launcher.
148 |         """
149 |         task_blocks = tasks_per_node * nodes_per_block
150 | 
151 |         x = '''export CORES=$(getconf _NPROCESSORS_ONLN)
152 | echo "Found cores : $CORES"
153 | WORKERCOUNT={3}
154 | 
155 | # Deduplicate the nodefile
156 | HOSTFILE="$JOBNAME.nodes"
157 | if [ -z "$PBS_NODEFILE" ]; then
158 |     echo "localhost" > $HOSTFILE
159 | else
160 |     sort -u $PBS_NODEFILE > $HOSTFILE
161 | fi
162 | 
163 | cat << MPIEXEC_EOF > cmd_$JOBNAME.sh
164 | {0}
165 | MPIEXEC_EOF
166 | chmod u+x cmd_$JOBNAME.sh
167 | 
168 | mpiexec --bind-to none -n $WORKERCOUNT --hostfile $HOSTFILE /usr/bin/sh cmd_$JOBNAME.sh
169 | 
170 | echo "All workers done"
171 | '''.format(command, tasks_per_node, nodes_per_block, task_blocks)
172 |         return x
173 | 
174 | 
175 | class SrunLauncher(Launcher):
176 |     """ Worker launcher that wraps the user's command with the SRUN launch framework
177 |     to launch multiple cmd invocations in parallel on a single job allocation.
178 |     """
179 | 
180 |     def __init__(self):
181 |         pass
182 | 
183 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
184 |         """
185 |         Args:
186 |         - command (string): The command string to be launched
187 |         - task_block (string) : bash evaluated string.
188 | 
189 |         KWargs:
190 |         - walltime (int) : This is not used by this launcher.
191 |         """
192 |         task_blocks = tasks_per_node * nodes_per_block
193 |         x = '''export CORES=$SLURM_CPUS_ON_NODE
194 | export NODES=$SLURM_JOB_NUM_NODES
195 | 
196 | echo "Found cores : $CORES"
197 | echo "Found nodes : $NODES"
198 | WORKERCOUNT={1}
199 | 
200 | cat << SLURM_EOF > cmd_$SLURM_JOB_NAME.sh
201 | {0}
202 | SLURM_EOF
203 | chmod a+x cmd_$SLURM_JOB_NAME.sh
204 | 
205 | TASKBLOCKS={1}
206 | 
207 | srun --ntasks $TASKBLOCKS -l bash cmd_$SLURM_JOB_NAME.sh
208 | 
209 | echo "Done"
210 | '''.format(command, task_blocks)
211 |         return x
212 | 
213 | 
214 | class SrunMPILauncher(Launcher):
215 |     """Worker launcher that wraps the user's command with the SRUN launch framework
216 |     to launch multiple cmd invocations in parallel on a single job allocation.
217 | 
218 |     """
219 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
220 |         """
221 |         Args:
222 |         - command (string): The command string to be launched
223 |         - task_block (string) : bash evaluated string.
224 | 
225 |         KWargs:
226 |         - walltime (int) : This is not used by this launcher.
227 |         """
228 |         task_blocks = tasks_per_node * nodes_per_block
229 |         x = '''export CORES=$SLURM_CPUS_ON_NODE
230 | export NODES=$SLURM_JOB_NUM_NODES
231 | 
232 | echo "Found cores : $CORES"
233 | echo "Found nodes : $NODES"
234 | WORKERCOUNT={1}
235 | 
236 | cat << SLURM_EOF > cmd_$SLURM_JOB_NAME.sh
237 | {0}
238 | SLURM_EOF
239 | chmod a+x cmd_$SLURM_JOB_NAME.sh
240 | 
241 | TASKBLOCKS={1}
242 | 
243 | # If there are more taskblocks to be launched than nodes use
244 | if (( "$TASKBLOCKS" > "$NODES" ))
245 | then
246 |     echo "TaskBlocks:$TASKBLOCKS > Nodes:$NODES"
247 |     CORES_PER_BLOCK=$(($NODES * $CORES / $TASKBLOCKS))
248 |     for blk in $(seq 1 1 $TASKBLOCKS):
249 |     do
250 |         srun --ntasks $CORES_PER_BLOCK -l bash cmd_$SLURM_JOB_NAME.sh &
251 |     done
252 |     wait
253 | else
254 |     # A Task block could be integer multiples of Nodes
255 |     echo "TaskBlocks:$TASKBLOCKS <= Nodes:$NODES"
256 |     NODES_PER_BLOCK=$(( $NODES / $TASKBLOCKS ))
257 |     for blk in $(seq 1 1 $TASKBLOCKS):
258 |     do
259 |         srun --exclusive --nodes $NODES_PER_BLOCK -l bash cmd_$SLURM_JOB_NAME.sh &
260 |     done
261 |     wait
262 | 
263 | fi
264 | 
265 | 
266 | echo "Done"
267 | '''.format(command, task_blocks)
268 |         return x
269 | 
270 | 
271 | class AprunLauncher(Launcher):
272 |     """  Worker launcher that wraps the user's command with the Aprun launch framework
273 |     to launch multiple cmd invocations in parallel on a single job allocation
274 | 
275 |     """
276 |     def __init__(self, overrides=''):
277 |         self.overrides = overrides
278 | 
279 |     def __call__(self, command, tasks_per_node, nodes_per_block, walltime=None):
280 |         """
281 |         Args:
282 |         - command (string): The command string to be launched
283 |         - tasks_per_node (int) : Workers to launch per node
284 |         - nodes_per_block (int) : Number of nodes in a block
285 | 
286 |         KWargs:
287 |         - walltime (int) : This is not used by this launcher.
288 |         """
289 | 
290 |         tasks_per_block = tasks_per_node * nodes_per_block
291 |         x = '''
292 | WORKERCOUNT={1}
293 | 
294 | cat << APRUN_EOF > cmd_$JOBNAME.sh
295 | {0}
296 | APRUN_EOF
297 | chmod a+x cmd_$JOBNAME.sh
298 | 
299 | aprun -n {tasks_per_block} -N {tasks_per_node} {overrides} /bin/bash cmd_$JOBNAME.sh &
300 | wait
301 | 
302 | echo "Done"
303 | '''.format(command, tasks_per_block,
304 |            tasks_per_block=tasks_per_block,
305 |            tasks_per_node=tasks_per_node,
306 |            overrides=self.overrides)
307 |         return x
308 | 
309 | 
310 | if __name__ == '__main__':
311 | 
312 |     s = SingleNodeLauncher()
313 |     wrapped = s("hello", 1, 1)
314 |     print(wrapped)
315 | 


--------------------------------------------------------------------------------
/libsubmit/providers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Workstation Provider
 2 | from libsubmit.providers.local.local import LocalProvider
 3 | 
 4 | # Cluster Providers
 5 | 
 6 | from libsubmit.providers.cobalt.cobalt import CobaltProvider
 7 | from libsubmit.providers.condor.condor import CondorProvider
 8 | from libsubmit.providers.grid_engine.grid_engine import GridEngineProvider
 9 | from libsubmit.providers.slurm.slurm import SlurmProvider
10 | from libsubmit.providers.torque.torque import TorqueProvider
11 | 
12 | # Cloud Providers
13 | from libsubmit.providers.aws.aws import AWSProvider
14 | from libsubmit.providers.googlecloud.googlecloud import GoogleCloudProvider
15 | from libsubmit.providers.azure.azure import AzureProvider
16 | from libsubmit.providers.jetstream.jetstream import JetstreamProvider
17 | 
18 | # Kubernetes
19 | from libsubmit.providers.kubernetes.kube import KubernetesProvider
20 | 
21 | __all__ = ['LocalProvider',
22 |            'CobaltProvider',
23 |            'CondorProvider',
24 |            'GridEngineProvider',
25 |            'SlurmProvider',
26 |            'TorqueProvider',
27 |            'AWSProvider',
28 |            'GoogleCloudProvider',
29 |            'AzureProvider',
30 |            'JetstreamProvider',
31 |            'KubernetesProvider']
32 | 


--------------------------------------------------------------------------------
/libsubmit/providers/aws/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/aws/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/aws/template.py:
--------------------------------------------------------------------------------
 1 | template_string = """#!/bin/bash
 2 | #sed -i 's/us-east-2\.ec2\.//g' /etc/apt/sources.list
 3 | cd ~
 4 | apt-get update -y
 5 | apt-get install -y python3 python3-pip libffi-dev g++ libssl-dev
 6 | pip3 install numpy scipy parsl
 7 | $overrides
 8 | 
 9 | $user_script
10 | 
11 | # Shutdown the instance as soon as the worker scripts exits
12 | # or times out to avoid EC2 costs.
13 | if ! $linger
14 | then
15 |     halt
16 | fi
17 | """
18 | 


--------------------------------------------------------------------------------
/libsubmit/providers/azure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/azure/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/azure/azure.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | 
  4 | from libsubmit.error import *
  5 | from libsubmit.providers.provider_base import ExecutionProvider
  6 | from libsubmit.utils import RepresentationMixin
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | try:
 11 |     from azure.common.credentials import UserPassCredentials
 12 |     from libsubmit.azure.azure_deployer import Deployer
 13 | 
 14 | except ImportError:
 15 |     _azure_enabled = False
 16 | else:
 17 |     _azure_enabled = True
 18 | 
 19 | translate_table = {
 20 |     'PD': 'PENDING',
 21 |     'R': 'RUNNING',
 22 |     'CA': 'CANCELLED',
 23 |     'CF': 'PENDING',  # (configuring),
 24 |     'CG': 'RUNNING',  # (completing),
 25 |     'CD': 'COMPLETED',
 26 |     'F': 'FAILED',  # (failed),
 27 |     'TO': 'TIMEOUT',  # (timeout),
 28 |     'NF': 'FAILED',  # (node failure),
 29 |     'RV': 'FAILED',  # (revoked) and
 30 |     'SE': 'FAILED'
 31 | }  # (special exit state
 32 | 
 33 | template_string = """
 34 | cd ~
 35 | sudo apt-get update -y
 36 | sudo apt-get install -y python3 python3-pip ipython
 37 | sudo pip3 install ipyparallel parsl
 38 | """
 39 | 
 40 | 
 41 | class AzureProvider(ExecutionProvider, RepresentationMixin):
 42 |     """A provider for using Azure resources.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     profile : str
 47 |         Profile to be used if different from the standard Azure config file ~/.azure/config.
 48 |     template_file : str
 49 |         Location of template file for Azure instance. Default is 'templates/template.json'.
 50 |     walltime : str
 51 |         Walltime requested per block in HH:MM:SS.
 52 |     azure_template_file : str
 53 |         Path to the template file for the Azure instance.
 54 |     init_blocks : int
 55 |         Number of blocks to provision at the start of the run. Default is 1.
 56 |     min_blocks : int
 57 |         Minimum number of blocks to maintain. Default is 0.
 58 |     max_blocks : int
 59 |         Maximum number of blocks to maintain. Default is 10.
 60 |     nodes_per_block : int
 61 |         Nodes to provision per block. Default is 1.
 62 |     """
 63 | 
 64 |     def __init__(self,
 65 |                  subscription_id,
 66 |                  username,
 67 |                  password,
 68 |                  label='azure',
 69 |                  template_file='template.json',
 70 |                  init_blocks=1,
 71 |                  min_blocks=0,
 72 |                  max_blocks=1,
 73 |                  nodes_per_block=1,
 74 |                  state_file=None):
 75 |         self.configure_logger()
 76 | 
 77 |         if not _azure_enabled:
 78 |             raise OptionalModuleMissing(['azure'], "Azure Provider requires the azure module.")
 79 | 
 80 |         credentials = UserPassCredentials(username, password)
 81 | 
 82 |         self.resource_client = ResourceManagementClient(credentials, subscription_id)
 83 |         self.storage_client = StorageManagementClient(credentials, subscription_id)
 84 | 
 85 |         self.resource_group_name = 'my_resource_group'
 86 |         self.deployer = Deployer(subscription_id, self.resource_group_name, self.read_configs(config))
 87 | 
 88 |         self.channel = channel
 89 |         self.config = config
 90 |         self.provisioned_blocks = 0
 91 |         self.resources = {}
 92 |         self.instances = []
 93 | 
 94 |         self.max_nodes = max_blocks * nodes_per_block
 95 | 
 96 |         try:
 97 |             self.initialize_boto_client()
 98 |         except Exception as e:
 99 |             logger.error("Azure '{}' failed to initialize.".format(self.label))
100 |             raise e
101 | 
102 |         try:
103 |             if state_file is None:
104 |                 state_file = '.azure_{}.json'.format(self.label)
105 |             self.read_state_file(state_file)
106 | 
107 |         except Exception:
108 |             self.create_vpc().id
109 |             logger.info("No State File. Cannot load previous options. Creating new infrastructure.")
110 |             self.write_state_file()
111 | 
112 |     def submit(self, command='sleep 1', blocksize=1, job_name="parsl.auto"):
113 |         """Submit command to an Azure instance.
114 | 
115 |         Submit returns an ID that corresponds to the task that was just submitted.
116 | 
117 |         Parameters
118 |         ----------
119 |         command : str
120 |             Command to be invoked on the remote side.
121 |         blocksize : int
122 |             Number of blocks requested.
123 |         job_name : str
124 |              Prefix for job name.
125 | 
126 |         Returns
127 |         -------
128 |         None or str
129 |             If at capacity (no more can be provisioned), None is returned. Otherwise,
130 |             an identifier for the job is returned.
131 |         """
132 | 
133 |         job_name = "parsl.auto.{0}".format(time.time())
134 |         [instance, *rest] = self.deployer.deploy(command=command, job_name=job_name, blocksize=1)
135 | 
136 |         if not instance:
137 |             logger.error("Failed to submit request to Azure")
138 |             return None
139 | 
140 |         logger.debug("Started instance_id: {0}".format(instance.instance_id))
141 | 
142 |         state = translate_table.get(instance.state['Name'], "PENDING")
143 | 
144 |         self.resources[instance.instance_id] = {"job_id": instance.instance_id, "instance": instance, "status": state}
145 | 
146 |         return instance.instance_id
147 | 
148 |     def status(self, job_ids):
149 |         """Get the status of a list of jobs identified by their ids.
150 | 
151 |         Parameters
152 |         ----------
153 |         job_ids : list of str
154 |             Identifiers for the jobs.
155 | 
156 |         Returns
157 |         -------
158 |         list of int
159 |             Status codes for each requested job.
160 |         """
161 |         states = []
162 |         statuses = self.deployer.get_vm_status([self.resources.get(job_id) for job_id in job_ids])
163 |         for status in statuses:
164 |             states.append(translate_table.get(status.state['Name'], "PENDING"))
165 |         return states
166 | 
167 |     def cancel(self, job_ids):
168 |         """Cancel jobs specified by a list of job ids.
169 | 
170 |         Parameters
171 |         ----------
172 |         list of str
173 |             List of identifiers of jobs which should be canceled.
174 | 
175 |         Returns
176 |         -------
177 |         list of bool
178 |             For each entry, True if the cancel operation is successful, otherwise False.
179 |         """
180 |         for job_id in job_ids:
181 |             try:
182 |                 self.deployer.destroy(self.resources.get(job_id))
183 |                 return True
184 |             except e:
185 |                 logger.error("Failed to cancel {}".format(repr(job_id)))
186 |                 logger.error(e)
187 |                 return False
188 | 
189 |     @property
190 |     def scaling_enabled():
191 |         return True
192 | 
193 |     @property
194 |     def current_capacity(self):
195 |         """Returns the current blocksize."""
196 |         return len(self.instances)
197 | 
198 | 
199 | if __name__ == '__main__':
200 |     config = open("azureconf.json")
201 | 


--------------------------------------------------------------------------------
/libsubmit/providers/azure/azureconf.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"site": "azure",
 3 | 	"nodeGranularity": 1,
 4 | 	"maxNodes": 5,
 5 | 	"AMIID": "ami-ae90b6cb",
 6 | 	"logFile": "azureprovider.log",
 7 | 	"username": "",
 8 | 	"pass": "",
 9 | 	"subscriptionId": "",
10 | 	"location": "eastus",
11 | 	"AZURE_CLIENT_ID": "0",
12 | 	"AZURE_CLIENT_SECRET": "",
13 | 	"AZURE_TENANT_ID": "0",
14 | 	"azureTemplateFile": "template.json"
15 | }
16 | 


--------------------------------------------------------------------------------
/libsubmit/providers/azure/deployer.py:
--------------------------------------------------------------------------------
  1 | """A deployer class to deploy a template on Azure"""
  2 | import json
  3 | import os.path
  4 | 
  5 | from azure.common.credentials import ServicePrincipalCredentials
  6 | from azure.mgmt.resource import ResourceManagementClient
  7 | from azure.mgmt.resource.resources.models import DeploymentMode
  8 | 
  9 | 
 10 | class Deployer(object):
 11 |     """ Initialize the deployer class with subscription, resource group and public key.
 12 | 
 13 |     :raises IOError: If the public key path cannot be read (access or not exists)
 14 |     :raises KeyError: If AZURE_CLIENT_ID, AZURE_CLIENT_SECRET or AZURE_TENANT_ID env
 15 |         variables or not defined
 16 |     """
 17 |     config = ""
 18 | 
 19 |     def __init__(self, subscription_id, resource_group, config,
 20 |                  pub_ssh_key_path='~/.ssh/id_rsa.pub'):
 21 |         self.config = config
 22 |         self.subscription_id = subscription_id
 23 |         self.resource_group = resource_group
 24 |         self.dns_label_prefix = self.name_generator.haikunate()
 25 |         self.location = self.config['location']
 26 | 
 27 |         pub_ssh_key_path = os.path.expanduser(pub_ssh_key_path)
 28 |         # Will raise if file not exists or not enough permission
 29 |         with open(pub_ssh_key_path, 'r') as pub_ssh_file_fd:
 30 |             self.pub_ssh_key = pub_ssh_file_fd.read()
 31 |         self.credentials = ServicePrincipalCredentials(
 32 |             client_id=self.config['AZURE_CLIENT_ID'],
 33 |             secret=self.config['AZURE_CLIENT_SECRET'],
 34 |             tenant=self.config['AZURE_TENANT_ID']
 35 |         )
 36 |         self.client = ResourceManagementClient(
 37 |             self.credentials, self.subscription_id)
 38 | 
 39 |     def deploy(self, job_name, command='', blocksize=1):
 40 |         instances = []
 41 |         """Deploy the template to a resource group."""
 42 |         self.client.resource_groups.create_or_update(
 43 |             self.resource_group,
 44 |             {
 45 |                 'location': self.location,
 46 | 
 47 |             }
 48 |         )
 49 | 
 50 |         template_path = os.path.join(os.path.dirname(
 51 |             __file__), 'templates', 'template.json')
 52 |         with open(template_path, 'r') as template_file_fd:
 53 |             template = json.load(template_file_fd)
 54 | 
 55 |         parameters = {
 56 |             'sshKeyData': self.pub_ssh_key,
 57 |             'vmName': 'azure-deployment-sample-vm',
 58 |             'dnsLabelPrefix': self.dns_label_prefix
 59 |         }
 60 |         parameters = {k: {'value': v} for k, v in parameters.items()}
 61 | 
 62 |         deployment_properties = {
 63 |             'mode': DeploymentMode.incremental,
 64 |             'template': template,
 65 |             'parameters': parameters
 66 |         }
 67 |         for i in range(blocksize):
 68 |             deployment_async_operation = self.client.deployments.create_or_update(
 69 |                 self.resource_group,
 70 |                 'azure-sample',
 71 |                 deployment_properties
 72 |             )
 73 |             instances.append(deployment_async_operation.wait())
 74 |         return instances
 75 | 
 76 |     def destroy(self, job_ids):
 77 |         """Destroy the given resource group"""
 78 |         for job_id in job_ids:
 79 |             self.client.resource_groups.delete(self.resource_group)
 80 | 
 81 |     def get_vm(self, resource_group_name, vm_name):
 82 |         '''
 83 |         you need to retry this just in case the credentials token expires,
 84 |         that's where the decorator comes in
 85 |         this will return all the data about the virtual machine
 86 |         '''
 87 |         return self.client.virtual_machines.get(
 88 |             resource_group_name, vm_name, expand='instanceView')
 89 | 
 90 |     def get_vm_status(self, vm_name, rgn):
 91 |         '''
 92 |         this will just return the status of the virtual machine
 93 |         sometime the status may be unknown as shown by the azure portal;
 94 |         in that case statuses[1] doesn't exist, hence retrying on IndexError
 95 |         also, it may take on the order of minutes for the status to become
 96 |         available so the decorator will bang on it forever
 97 |         '''
 98 |         rgn = rgn if rgn else self.resource_group
 99 |         return self.client.virtual_machines.get(
100 |             rgn, vm_name).instance_view.statuses[1].display_status
101 | 


--------------------------------------------------------------------------------
/libsubmit/providers/cluster_provider.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from string import Template
  4 | 
  5 | import libsubmit.error as ep_error
  6 | from libsubmit.providers.provider_base import ExecutionProvider
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class ClusterProvider(ExecutionProvider):
 12 |     """ This class defines behavior common to all cluster/supercompute-style scheduler systems.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     label : str
 17 |         Label for this provider.
 18 |     channel : Channel
 19 |         Channel for accessing this provider. Possible channels include
 20 |         :class:`~libsubmit.channels.LocalChannel` (the default),
 21 |         :class:`~libsubmit.channels.SSHChannel`, or
 22 |         :class:`~libsubmit.channels.SSHInteractiveLoginChannel`.
 23 |     script_dir : str
 24 |         Relative or absolute path to a directory where intermediate scripts are placed.
 25 |     walltime : str
 26 |         Walltime requested per block in HH:MM:SS.
 27 |     launcher : str
 28 |         FIXME
 29 |     cmd_timeout : int
 30 |         Timeout for commands made to the scheduler in seconds
 31 | 
 32 |     .. code:: python
 33 | 
 34 |                                 +------------------
 35 |                                 |
 36 |           script_string ------->|  submit
 37 |                id      <--------|---+
 38 |                                 |
 39 |           [ ids ]       ------->|  status
 40 |           [statuses]   <--------|----+
 41 |                                 |
 42 |           [ ids ]       ------->|  cancel
 43 |           [cancel]     <--------|----+
 44 |                                 |
 45 |           [True/False] <--------|  scaling_enabled
 46 |                                 |
 47 |                                 +-------------------
 48 |     """
 49 | 
 50 |     def __init__(self,
 51 |                  label,
 52 |                  channel,
 53 |                  script_dir,
 54 |                  nodes_per_block,
 55 |                  tasks_per_node,
 56 |                  init_blocks,
 57 |                  min_blocks,
 58 |                  max_blocks,
 59 |                  parallelism,
 60 |                  walltime,
 61 |                  launcher,
 62 |                  cmd_timeout=10):
 63 | 
 64 |         self._scaling_enabled = True
 65 |         self.label = label
 66 |         self.channel = channel
 67 |         self.tasks_per_block = nodes_per_block * tasks_per_node
 68 |         self.nodes_per_block = nodes_per_block
 69 |         self.tasks_per_node = tasks_per_node
 70 |         self.init_blocks = init_blocks
 71 |         self.min_blocks = min_blocks
 72 |         self.max_blocks = max_blocks
 73 |         self.parallelism = parallelism
 74 |         self.provisioned_blocks = 0
 75 |         self.launcher = launcher
 76 |         self.walltime = walltime
 77 |         self.cmd_timeout = cmd_timeout
 78 |         if not callable(self.launcher):
 79 |             raise(ep_error.BadLauncher(self.launcher,
 80 |                                        "Launcher for executor:{} is of type:{}. Expects a libsubmit.launcher.launcher.Launcher or callable".format(
 81 |                                            label,
 82 |                                            type(self.launcher))))
 83 | 
 84 |         self.script_dir = script_dir
 85 |         if not os.path.exists(self.script_dir):
 86 |             os.makedirs(self.script_dir)
 87 | 
 88 |         # Dictionary that keeps track of jobs, keyed on job_id
 89 |         self.resources = {}
 90 | 
 91 |     def execute_wait(self, cmd, timeout=None):
 92 |         t = self.cmd_timeout
 93 |         if timeout is not None:
 94 |             t = timeout
 95 |         return self.channel.execute_wait(cmd, t)
 96 | 
 97 |     def _write_submit_script(self, template, script_filename, job_name, configs):
 98 |         """Generate submit script and write it to a file.
 99 | 
100 |         Args:
101 |               - template (string) : The template string to be used for the writing submit script
102 |               - script_filename (string) : Name of the submit script
103 |               - job_name (string) : job name
104 |               - configs (dict) : configs that get pushed into the template
105 | 
106 |         Returns:
107 |               - True: on success
108 | 
109 |         Raises:
110 |               SchedulerMissingArgs : If template is missing args
111 |               ScriptPathError : Unable to write submit script out
112 |         """
113 | 
114 |         try:
115 |             submit_script = Template(template).substitute(jobname=job_name, **configs)
116 |             # submit_script = Template(template).safe_substitute(jobname=job_name, **configs)
117 |             with open(script_filename, 'w') as f:
118 |                 f.write(submit_script)
119 | 
120 |         except KeyError as e:
121 |             logger.error("Missing keys for submit script : %s", e)
122 |             raise (ep_error.SchedulerMissingArgs(e.args, self.sitename))
123 | 
124 |         except IOError as e:
125 |             logger.error("Failed writing to submit script: %s", script_filename)
126 |             raise (ep_error.ScriptPathError(script_filename, e))
127 |         except Exception as e:
128 |             print("Template : ", template)
129 |             print("Args : ", job_name)
130 |             print("Kwargs : ", configs)
131 |             logger.error("Uncategorized error: %s", e)
132 |             raise (e)
133 | 
134 |         return True
135 | 
136 |     def submit(self, cmd_string, blocksize, job_name="parsl.auto"):
137 |         ''' The submit method takes the command string to be executed upon
138 |         instantiation of a resource most often to start a pilot (such as IPP engine
139 |         or even Swift-T engines).
140 | 
141 |         Args :
142 |              - cmd_string (str) : The bash command string to be executed.
143 |              - blocksize (int) : Blocksize to be requested
144 | 
145 |         KWargs:
146 |              - job_name (str) : Human friendly name to be assigned to the job request
147 | 
148 |         Returns:
149 |              - A job identifier, this could be an integer, string etc
150 | 
151 |         Raises:
152 |              - ExecutionProviderExceptions or its subclasses
153 |         '''
154 |         raise NotImplementedError
155 | 
156 |     def _status(self):
157 |         raise NotImplementedError
158 | 
159 |     def status(self, job_ids):
160 |         """ Get the status of a list of jobs identified by the job identifiers
161 |         returned from the submit request.
162 | 
163 |         Args:
164 |              - job_ids (list) : A list of job identifiers
165 | 
166 |         Returns:
167 |              - A list of status from ['PENDING', 'RUNNING', 'CANCELLED', 'COMPLETED',
168 |                'FAILED', 'TIMEOUT'] corresponding to each job_id in the job_ids list.
169 | 
170 |         Raises:
171 |              - ExecutionProviderException or its subclasses
172 | 
173 |         """
174 |         if job_ids:
175 |             self._status()
176 |         return [self.resources[jid]['status'] for jid in job_ids]
177 | 
178 |     def cancel(self, job_ids):
179 |         """ Cancels the resources identified by the job_ids provided by the user.
180 | 
181 |         Args:
182 |              - job_ids (list): A list of job identifiers
183 | 
184 |         Returns:
185 |              - A list of status from cancelling the job which can be True, False
186 | 
187 |         Raises:
188 |              - ExecutionProviderException or its subclasses
189 |         """
190 | 
191 |         raise NotImplementedError
192 | 
193 |     @property
194 |     def scaling_enabled(self):
195 |         """ The callers of ParslExecutors need to differentiate between Executors
196 |         and Executors wrapped in a resource provider
197 | 
198 |         Returns:
199 |               - Status (Bool)
200 |         """
201 |         return self._scaling_enabled
202 | 
203 |     @property
204 |     def current_capacity(self):
205 |         """ Returns the currently provisioned blocks.
206 |         This may need to return more information in the futures :
207 |         { minsize, maxsize, current_requested }
208 |         """
209 |         return self.provisioned_blocks
210 | 


--------------------------------------------------------------------------------
/libsubmit/providers/cobalt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/cobalt/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/cobalt/cobalt.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | 
  5 | import libsubmit.error as ep_error
  6 | from libsubmit.channels import LocalChannel
  7 | from libsubmit.launchers import AprunLauncher
  8 | from libsubmit.providers.cobalt.template import template_string
  9 | from libsubmit.providers.cluster_provider import ClusterProvider
 10 | from libsubmit.utils import RepresentationMixin, wtime_to_minutes
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | translate_table = {
 15 |     'QUEUED': 'PENDING',
 16 |     'STARTING': 'PENDING',
 17 |     'RUNNING': 'RUNNING',
 18 |     'EXITING': 'COMPLETED',
 19 |     'KILLING': 'COMPLETED'
 20 | }
 21 | 
 22 | 
 23 | class CobaltProvider(ClusterProvider, RepresentationMixin):
 24 |     """ Cobalt Execution Provider
 25 | 
 26 |     This provider uses cobalt to submit (qsub), obtain the status of (qstat), and cancel (qdel)
 27 |     jobs. Theo script to be used is created from a template file in this
 28 |     same module.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     channel : Channel
 33 |         Channel for accessing this provider. Possible channels include
 34 |         :class:`~libsubmit.channels.LocalChannel` (the default),
 35 |         :class:`~libsubmit.channels.SSHChannel`, or
 36 |         :class:`~libsubmit.channels.SSHInteractiveLoginChannel`.
 37 |     label : str
 38 |         Label for this provider.
 39 |     script_dir : str
 40 |         Relative or absolute path to a directory where intermediate scripts are placed.
 41 |     nodes_per_block : int
 42 |         Nodes to provision per block.
 43 |     tasks_per_node : int
 44 |         Tasks to run per node.
 45 |     min_blocks : int
 46 |         Minimum number of blocks to maintain.
 47 |     max_blocks : int
 48 |         Maximum number of blocks to maintain.
 49 |     walltime : str
 50 |         Walltime requested per block in HH:MM:SS.
 51 |     account : str
 52 |         Account that the job will be charged against.
 53 |     queue : str
 54 |         Torque queue to request blocks from.
 55 |     overrides : str
 56 |         String to append to the Torque submit script on the scheduler.
 57 |     launcher : Launcher
 58 |         Launcher for this provider. Possible launchers include
 59 |         :class:`~libsubmit.launchers.AprunLauncher` (the default) or,
 60 |         :class:`~libsubmit.launchers.SingleNodeLauncher`
 61 |     """
 62 |     def __init__(self,
 63 |                  channel=LocalChannel(),
 64 |                  label='cobalt',
 65 |                  script_dir='parsl_scripts',
 66 |                  nodes_per_block=1,
 67 |                  tasks_per_node=1,
 68 |                  init_blocks=0,
 69 |                  min_blocks=0,
 70 |                  max_blocks=10,
 71 |                  parallelism=1,
 72 |                  walltime="00:10:00",
 73 |                  account=None,
 74 |                  queue=None,
 75 |                  overrides='',
 76 |                  launcher=AprunLauncher(),
 77 |                  cmd_timeout=10):
 78 |         super().__init__(label,
 79 |                          channel=channel,
 80 |                          script_dir=script_dir,
 81 |                          nodes_per_block=nodes_per_block,
 82 |                          tasks_per_node=tasks_per_node,
 83 |                          init_blocks=init_blocks,
 84 |                          min_blocks=min_blocks,
 85 |                          max_blocks=max_blocks,
 86 |                          parallelism=parallelism,
 87 |                          walltime=walltime,
 88 |                          launcher=launcher,
 89 |                          cmd_timeout=cmd_timeout)
 90 | 
 91 |         self.account = account
 92 |         self.queue = queue
 93 |         self.overrides = overrides
 94 | 
 95 |     def _status(self):
 96 |         """ Internal: Do not call. Returns the status list for a list of job_ids
 97 | 
 98 |         Args:
 99 |               self
100 | 
101 |         Returns:
102 |               [status...] : Status list of all jobs
103 |         """
104 | 
105 |         jobs_missing = list(self.resources.keys())
106 | 
107 |         retcode, stdout, stderr = super().execute_wait("qstat -u $USER")
108 | 
109 |         # Execute_wait failed. Do no update.
110 |         if retcode != 0:
111 |             return
112 | 
113 |         for line in stdout.split('\n'):
114 |             if line.startswith('='):
115 |                 continue
116 | 
117 |             parts = line.upper().split()
118 |             if parts and parts[0] != 'JOBID':
119 |                 job_id = parts[0]
120 | 
121 |                 if job_id not in self.resources:
122 |                     continue
123 | 
124 |                 status = translate_table.get(parts[4], 'UNKNOWN')
125 | 
126 |                 self.resources[job_id]['status'] = status
127 |                 jobs_missing.remove(job_id)
128 | 
129 |         # squeue does not report on jobs that are not running. So we are filling in the
130 |         # blanks for missing jobs, we might lose some information about why the jobs failed.
131 |         for missing_job in jobs_missing:
132 |             if self.resources[missing_job]['status'] in ['RUNNING', 'KILLING', 'EXITING']:
133 |                 self.resources[missing_job]['status'] = translate_table['EXITING']
134 | 
135 |     def submit(self, command, blocksize, job_name="parsl.auto"):
136 |         """ Submits the command onto an Local Resource Manager job of blocksize parallel elements.
137 |         Submit returns an ID that corresponds to the task that was just submitted.
138 | 
139 |         If tasks_per_node <  1 : ! This is illegal. tasks_per_node should be integer
140 | 
141 |         If tasks_per_node == 1:
142 |              A single node is provisioned
143 | 
144 |         If tasks_per_node >  1 :
145 |              tasks_per_node * blocksize number of nodes are provisioned.
146 | 
147 |         Args:
148 |              - command  :(String) Commandline invocation to be made on the remote side.
149 |              - blocksize   :(float)
150 | 
151 |         Kwargs:
152 |              - job_name (String): Name for job, must be unique
153 | 
154 |         Returns:
155 |              - None: At capacity, cannot provision more
156 |              - job_id: (string) Identifier for the job
157 | 
158 |         """
159 | 
160 |         if self.provisioned_blocks >= self.max_blocks:
161 |             logger.warn("[%s] at capacity, cannot add more blocks now", self.label)
162 |             return None
163 | 
164 |         # Note: Fix this later to avoid confusing behavior.
165 |         # We should always allocate blocks in integer counts of node_granularity
166 |         if blocksize < self.nodes_per_block:
167 |             blocksize = self.nodes_per_block
168 | 
169 |         account_opt = '-A {}'.format(self.account) if self.account is not None else ''
170 | 
171 |         job_name = "parsl.{0}.{1}".format(job_name, time.time())
172 | 
173 |         script_path = "{0}/{1}.submit".format(self.script_dir, job_name)
174 |         script_path = os.path.abspath(script_path)
175 | 
176 |         job_config = {}
177 |         job_config["overrides"] = self.overrides
178 | 
179 |         logger.debug("Requesting blocksize:%s nodes_per_block:%s tasks_per_node:%s",
180 |                      blocksize, self.nodes_per_block, self.tasks_per_node)
181 | 
182 |         # Wrap the command
183 |         job_config["user_script"] = self.launcher(command, self.tasks_per_node, self.nodes_per_block)
184 | 
185 |         queue_opt = '-q {}'.format(self.queue) if self.queue is not None else ''
186 | 
187 |         logger.debug("Writing submit script")
188 |         self._write_submit_script(template_string, script_path, job_name, job_config)
189 | 
190 |         channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
191 | 
192 |         command = 'qsub -n {0} {1} -t {2} {3} {4}'.format(
193 |             self.nodes_per_block, queue_opt, wtime_to_minutes(self.walltime), account_opt, channel_script_path)
194 |         logger.debug("Executing {}".format(command))
195 | 
196 |         retcode, stdout, stderr = super().execute_wait(command)
197 | 
198 |         # TODO : FIX this block
199 |         if retcode != 0:
200 |             logger.error("Failed command: {0}".format(command))
201 |             logger.error("Launch failed stdout:\n{0} \nstderr:{1}\n".format(stdout, stderr))
202 | 
203 |         logger.debug("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
204 | 
205 |         job_id = None
206 | 
207 |         if retcode == 0:
208 |             # We should be getting only one line back
209 |             job_id = stdout.strip()
210 |             self.resources[job_id] = {'job_id': job_id, 'status': 'PENDING', 'blocksize': blocksize}
211 |         else:
212 |             logger.error("Submission of command to scale_out failed: {0}".format(stderr))
213 |             raise (ep_error.ScaleOutFailed(self.__class__, "Request to submit job to local scheduler failed"))
214 | 
215 |         logger.debug("Returning job id : {0}".format(job_id))
216 |         return job_id
217 | 
218 |     def cancel(self, job_ids):
219 |         """ Cancels the jobs specified by a list of job ids
220 | 
221 |         Args:
222 |         job_ids : [<job_id> ...]
223 | 
224 |         Returns :
225 |         [True/False...] : If the cancel operation fails the entire list will be False.
226 |         """
227 | 
228 |         job_id_list = ' '.join(job_ids)
229 |         retcode, stdout, stderr = super().execute_wait("qdel {0}".format(job_id_list))
230 |         rets = None
231 |         if retcode == 0:
232 |             for jid in job_ids:
233 |                 self.resources[jid]['status'] = translate_table['KILLING']  # Setting state to cancelled
234 |             rets = [True for i in job_ids]
235 |         else:
236 |             rets = [False for i in job_ids]
237 | 
238 |         return rets
239 | 


--------------------------------------------------------------------------------
/libsubmit/providers/cobalt/template.py:
--------------------------------------------------------------------------------
 1 | template_string = '''#!/bin/bash -e
 2 | $overrides
 3 | 
 4 | echo "Starting Cobalt job script"
 5 | 
 6 | echo "----Cobalt Nodefile: -----"
 7 | cat $$COBALT_NODEFILE
 8 | echo "--------------------------"
 9 | 
10 | export JOBNAME="${jobname}"
11 | 
12 | $user_script
13 | 
14 | echo "End of Cobalt job"
15 | '''
16 | 


--------------------------------------------------------------------------------
/libsubmit/providers/condor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/condor/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/condor/template.py:
--------------------------------------------------------------------------------
 1 | template_string = '''
 2 | universe                = vanilla
 3 | should_transfer_files   = YES
 4 | when_to_transfer_output = ON_EXIT_OR_EVICT
 5 | Transfer_Executable     = false
 6 | transfer_input_files    = ${input_files}
 7 | machine_count           = ${nodes}
 8 | output                  = ${submit_script_dir}/${job_name}.stdout
 9 | error                   = ${submit_script_dir}/${job_name}.stderr
10 | executable              = /bin/bash
11 | arguments               = ${job_script}
12 | requirements            = ${requirements}
13 | +projectname            = ${project}
14 | leave_in_queue          = TRUE
15 | environment             = "${environment}"
16 | 
17 | ${overrides}
18 | 
19 | queue
20 | 
21 | '''
22 | 
23 | # for later,
24 | # if we want to remove on preemption, this might work:
25 | #    PERIODIC_REMOVE = (NumJobstarts > 1)
26 | # or if the pilot can trap signals, then we can send a special exit code on
27 | # sigterm/sigkill and remove that way. but then we still need to be careful in
28 | # cases where the worker dies, for example-- no signal is sent
29 | 


--------------------------------------------------------------------------------
/libsubmit/providers/googlecloud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/googlecloud/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/googlecloud/googlecloud.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | import logging
  3 | import os
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | 
  7 | try:
  8 |     import googleapiclient.discovery
  9 | 
 10 | except ImportError:
 11 |     _google_enabled = False
 12 | else:
 13 |     _google_enabled = True
 14 | 
 15 | translate_table = {
 16 |     'PENDING': 'PENDING',
 17 |     'PROVISIONING': 'PENDING',
 18 |     "STAGING": "PENDING",
 19 |     'RUNNING': 'RUNNING',
 20 |     'DONE': 'COMPLETED',
 21 |     'STOPPING': 'COMPLETED',
 22 |     'STOPPED': 'COMPLETED',
 23 |     'TERMINATED': 'COMPLETED',
 24 |     'SUSPENDING': 'COMPLETED',
 25 |     'SUSPENDED': 'COMPLETED',
 26 | }
 27 | 
 28 | 
 29 | class GoogleCloudProvider():
 30 |     """A provider for using resources from the Google Compute Engine.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     project_id : str
 35 |         Project ID from Google compute engine.
 36 |     key_file : str
 37 |         Path to authorization private key json file. This is required for auth.
 38 |         A new one can be generated here: https://console.cloud.google.com/apis/credentials
 39 |     region : str
 40 |         Region in which to start instances
 41 |     os_project : str
 42 |         OS project code for Google compute engine.
 43 |     os_family : str
 44 |         OS family to request.
 45 |     label : str
 46 |         A label for this executor. Default is 'google_cloud'.
 47 |     google_version : str
 48 |         Google compute engine version to use. Possibilies include 'v1' (default) or 'beta'.
 49 |     instance_type: str
 50 |         'n1-standard-1',
 51 |     script_dir : str
 52 |         Relative or absolute path to a directory where intermediate scripts are placed.
 53 |     init_blocks : int
 54 |         Number of blocks to provision immediately. Default is 1.
 55 |     min_blocks : int
 56 |         Minimum number of blocks to maintain. Default is 0.
 57 |     max_blocks : int
 58 |         Maximum number of blocks to maintain. Default is 10.
 59 |     parallelism : float
 60 |         Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
 61 |         scaling where as many resources as possible are used; parallelism close to 0 represents
 62 |         the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
 63 | 
 64 |     .. code:: python
 65 | 
 66 |                                 +------------------
 67 |                                 |
 68 |           script_string ------->|  submit
 69 |                id      <--------|---+
 70 |                                 |
 71 |           [ ids ]       ------->|  status
 72 |           [statuses]   <--------|----+
 73 |                                 |
 74 |           [ ids ]       ------->|  cancel
 75 |           [cancel]     <--------|----+
 76 |                                 |
 77 |           [True/False] <--------|  scaling_enabled
 78 |                                 |
 79 |                                 +-------------------
 80 |      """
 81 | 
 82 |     def __init__(self,
 83 |                  project_id,
 84 |                  key_file,
 85 |                  region,
 86 |                  os_project,
 87 |                  os_family,
 88 |                  label='google_cloud',
 89 |                  google_version='v1',
 90 |                  instance_type='n1-standard-1',
 91 |                  script_dir='parsl_scripts',
 92 |                  init_blocks=1,
 93 |                  min_blocks=0,
 94 |                  max_blocks=10,
 95 |                  parallelism=1):
 96 |         self.project_id = project_id
 97 |         os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_file
 98 |         self.zone = self.get_zone(region)
 99 |         self.os_project = os_project
100 |         self.os_family = os_family
101 |         self.label = label
102 |         self.client = googleapiclient.discovery.build('compute', google_version)
103 |         self.instance_type = instance_type
104 |         self.script_dir = script_dir
105 |         if not os.path.exists(self.script_dir):
106 |             os.makedirs(self.script_dir)
107 |         self.init_blocks = init_blocks
108 |         self.min_blocks = min_blocks
109 |         self.max_blocks = max_blocks
110 |         self.parallelism = parallelism
111 |         self.num_instances = 0
112 | 
113 |         # Dictionary that keeps track of jobs, keyed on job_id
114 |         self.resources = {}
115 |         self.provisioned_blocks = 0
116 |         atexit.register(self.bye)
117 | 
118 |     def submit(self, command="", blocksize=1, job_name="parsl.auto"):
119 |         ''' The submit method takes the command string to be executed upon
120 |         instantiation of a resource most often to start a pilot.
121 | 
122 |         Args :
123 |              - command (str) : The bash command string to be executed.
124 |              - blocksize (int) : Blocksize to be requested
125 | 
126 |         KWargs:
127 |              - job_name (str) : Human friendly name to be assigned to the job request
128 | 
129 |         Returns:
130 |              - A job identifier, this could be an integer, string etc
131 | 
132 |         Raises:
133 |              - ExecutionProviderException or its subclasses
134 |         '''
135 |         instance, name = self.create_instance(command=command)
136 |         self.provisioned_blocks += 1
137 |         self.resources[name] = {"job_id": name, "status": translate_table[instance['status']]}
138 |         return name
139 | 
140 |     def status(self, job_ids):
141 |         ''' Get the status of a list of jobs identified by the job identifiers
142 |         returned from the submit request.
143 | 
144 |         Args:
145 |              - job_ids (list) : A list of job identifiers
146 | 
147 |         Returns:
148 |              - A list of status from ['PENDING', 'RUNNING', 'CANCELLED', 'COMPLETED',
149 |                'FAILED', 'TIMEOUT'] corresponding to each job_id in the job_ids list.
150 | 
151 |         Raises:
152 |              - ExecutionProviderException or its subclasses
153 | 
154 |         '''
155 |         statuses = []
156 |         for job_id in job_ids:
157 |             instance = self.client.instances().get(instance=job_id, project=self.project_id, zone=self.zone).execute()
158 |             self.resources[job_id]['status'] = translate_table[instance['status']]
159 |             statuses.append(translate_table[instance['status']])
160 |         return statuses
161 | 
162 |     def cancel(self, job_ids):
163 |         ''' Cancels the resources identified by the job_ids provided by the user.
164 | 
165 |         Args:
166 |              - job_ids (list): A list of job identifiers
167 | 
168 |         Returns:
169 |              - A list of status from cancelling the job which can be True, False
170 | 
171 |         Raises:
172 |              - ExecutionProviderException or its subclasses
173 |         '''
174 |         statuses = []
175 |         for job_id in job_ids:
176 |             try:
177 |                 self.delete_instance(job_id)
178 |                 statuses.append(True)
179 |                 self.provisioned_blocks -= 1
180 |             except Exception:
181 |                 statuses.append(False)
182 |         return statuses
183 | 
184 |     @property
185 |     def scaling_enabled(self):
186 |         ''' Scaling is enabled
187 | 
188 |         Returns:
189 |               - Status (Bool)
190 |         '''
191 |         return True
192 | 
193 |     @property
194 |     def current_capacity(self):
195 |         """Returns the number of currently provisioned blocks."""
196 |         return self.provisioned_blocks
197 | 
198 |     def bye(self):
199 |         self.cancel([i for i in list(self.resources)])
200 | 
201 |     def create_instance(self, command=""):
202 |         name = "parslauto{}".format(self.num_instances)
203 |         self.num_instances += 1
204 |         compute = self.client
205 |         project = self.project_id
206 |         image_response = compute.images().getFromFamily(
207 |             project=self.os_project, family=self.os_family).execute()
208 |         source_disk_image = image_response['selfLink']
209 | 
210 |         # Configure the machine
211 |         machine_type = "zones/{}/machineTypes/{}".format(self.zone, self.instance_type)
212 |         startup_script = command
213 | 
214 |         config = {
215 |             'name': name,
216 |             'machineType': machine_type,
217 | 
218 |             # Specify the boot disk and the image to use as a source.
219 |             'disks': [{
220 |                 'boot': True,
221 |                 'autoDelete': True,
222 |                 'initializeParams': {
223 |                     'sourceImage': source_disk_image,
224 |                 }
225 |             }],
226 |             'networkInterfaces': [{
227 |                 'network': 'global/networks/default',
228 |                 'accessConfigs': [{
229 |                     'type': 'ONE_TO_ONE_NAT',
230 |                     'name': 'External NAT'
231 |                 }]
232 |             }],
233 |             'serviceAccounts': [{
234 |                 'email':
235 |                 'default',
236 |                 'scopes': [
237 |                     'https://www.googleapis.com/auth/devstorage.read_write',
238 |                     'https://www.googleapis.com/auth/logging.write'
239 |                 ]
240 |             }],
241 |             'metadata': {
242 |                 'items': [{
243 |                     # Startup script is automatically executed by the
244 |                     # instance upon startup.
245 |                     'key': 'startup-script',
246 |                     'value': startup_script
247 |                 }]
248 |             }
249 |         }
250 | 
251 |         return compute.instances().insert(project=project, zone=self.zone, body=config).execute(), name
252 | 
253 |     def get_zone(self, region):
254 |         res = self.client.zones().list(project=self.project_id).execute()
255 |         for zone in res['items']:
256 |             if region in zone['name'] and zone['status'] == "UP":
257 |                 return zone["name"]
258 | 
259 |     def delete_instance(self, name):
260 | 
261 |         compute = self.client
262 |         project = self.project_id
263 |         zone = self.zone
264 | 
265 |         return compute.instances().delete(project=project, zone=zone, instance=name).execute()
266 | 


--------------------------------------------------------------------------------
/libsubmit/providers/grid_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/grid_engine/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/grid_engine/grid_engine.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | 
  5 | from libsubmit.channels import LocalChannel
  6 | from libsubmit.providers.cluster_provider import ClusterProvider
  7 | from libsubmit.providers.grid_engine.template import template_string
  8 | from libsubmit.launchers import SingleNodeLauncher
  9 | from libsubmit.utils import RepresentationMixin, wtime_to_minutes
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | translate_table = {
 14 |     'qw': 'PENDING',
 15 |     'hqw': 'PENDING',
 16 |     'hrwq': 'PENDING',
 17 |     'r': 'RUNNING',
 18 |     's': 'FAILED',  # obsuspended
 19 |     'ts': 'FAILED',
 20 |     't': 'FAILED',  # Suspended by alarm
 21 |     'eqw': 'FAILED',  # Error states
 22 |     'ehqw': 'FAILED',  # ..
 23 |     'ehrqw': 'FAILED',  # ..
 24 |     'd': 'COMPLETED',
 25 |     'dr': 'COMPLETED',
 26 |     'dt': 'COMPLETED',
 27 |     'drt': 'COMPLETED',
 28 |     'ds': 'COMPLETED',
 29 |     'drs': 'COMPLETES',
 30 | }
 31 | 
 32 | 
 33 | class GridEngineProvider(ClusterProvider, RepresentationMixin):
 34 |     """A provider for the Grid Engine scheduler.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     channel : Channel
 39 |         Channel for accessing this provider. Possible channels include
 40 |         :class:`~libsubmit.channels.LocalChannel` (the default),
 41 |         :class:`~libsubmit.channels.SSHChannel`, or
 42 |         :class:`~libsubmit.channels.SSHInteractiveLoginChannel`.
 43 |     label : str
 44 |         Label for this provider.
 45 |     script_dir : str
 46 |         Relative or absolute path to a directory where intermediate scripts are placed.
 47 |     nodes_per_block : int
 48 |         Nodes to provision per block.
 49 |     tasks_per_node : int
 50 |         Tasks to run per node.
 51 |     min_blocks : int
 52 |         Minimum number of blocks to maintain.
 53 |     max_blocks : int
 54 |         Maximum number of blocks to maintain.
 55 |     parallelism : float
 56 |         Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
 57 |         scaling where as many resources as possible are used; parallelism close to 0 represents
 58 |         the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
 59 |     walltime : str
 60 |         Walltime requested per block in HH:MM:SS.
 61 |     overrides : str
 62 |         String to prepend to the #SBATCH blocks in the submit script to the scheduler.
 63 |     launcher : Launcher
 64 |         Launcher for this provider. Possible launchers include
 65 |         :class:`~libsubmit.launchers.SingleNodeLauncher` (the default),
 66 |     """
 67 | 
 68 |     def __init__(self,
 69 |                  channel=LocalChannel(),
 70 |                  label='grid_engine',
 71 |                  script_dir='parsl_scripts',
 72 |                  nodes_per_block=1,
 73 |                  tasks_per_node=1,
 74 |                  init_blocks=1,
 75 |                  min_blocks=0,
 76 |                  max_blocks=10,
 77 |                  parallelism=1,
 78 |                  walltime="00:10:00",
 79 |                  overrides='',
 80 |                  launcher=SingleNodeLauncher()):
 81 |         super().__init__(label,
 82 |                          channel,
 83 |                          script_dir,
 84 |                          nodes_per_block,
 85 |                          tasks_per_node,
 86 |                          init_blocks,
 87 |                          min_blocks,
 88 |                          max_blocks,
 89 |                          parallelism,
 90 |                          walltime,
 91 |                          launcher)
 92 |         self.overrides = overrides
 93 | 
 94 |         if launcher in ['srun', 'srun_mpi']:
 95 |             logger.warning("Use of {} launcher is usually appropriate for Slurm providers. "
 96 |                            "Recommended options include 'single_node' or 'aprun'.".format(launcher))
 97 | 
 98 |     def get_configs(self, command):
 99 |         """Compose a dictionary with information for writing the submit script."""
100 | 
101 |         logger.debug("Requesting one block with {} nodes per block and {} tasks per node".format(
102 |             self.nodes_per_block, self.tasks_per_node))
103 | 
104 |         job_config = {}
105 |         job_config["submit_script_dir"] = self.channel.script_dir
106 |         job_config["nodes"] = self.nodes_per_block
107 |         job_config["walltime"] = wtime_to_minutes(self.walltime)
108 |         job_config["overrides"] = self.overrides
109 |         job_config["user_script"] = command
110 | 
111 |         job_config["user_script"] = self.launcher(command,
112 |                                                   self.tasks_per_node,
113 |                                                   self.nodes_per_block)
114 |         return job_config
115 | 
116 |     def submit(self, command="", blocksize=1, job_name="parsl.auto"):
117 |         ''' The submit method takes the command string to be executed upon
118 |         instantiation of a resource most often to start a pilot (such as IPP engine
119 |         or even Swift-T engines).
120 | 
121 |         Args :
122 |              - command (str) : The bash command string to be executed.
123 |              - blocksize (int) : Blocksize to be requested
124 | 
125 |         KWargs:
126 |              - job_name (str) : Human friendly name to be assigned to the job request
127 | 
128 |         Returns:
129 |              - A job identifier, this could be an integer, string etc
130 | 
131 |         Raises:
132 |              - ExecutionProviderException or its subclasses
133 |         '''
134 | 
135 |         # Note: Fix this later to avoid confusing behavior.
136 |         # We should always allocate blocks in integer counts of node_granularity
137 |         if blocksize < self.nodes_per_block:
138 |             blocksize = self.nodes_per_block
139 | 
140 |         # Set job name
141 |         job_name = "{0}.{1}".format(job_name, time.time())
142 | 
143 |         # Set script path
144 |         script_path = "{0}/{1}.submit".format(self.script_dir, job_name)
145 |         script_path = os.path.abspath(script_path)
146 | 
147 |         job_config = self.get_configs(command, blocksize)
148 | 
149 |         logger.debug("Writing submit script")
150 |         self._write_submit_script(template_string, script_path, job_name, job_config)
151 | 
152 |         channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
153 |         cmd = "qsub -terse {0}".format(channel_script_path)
154 |         retcode, stdout, stderr = super().execute_wait(cmd, 10)
155 | 
156 |         if retcode == 0:
157 |             for line in stdout.split('\n'):
158 |                 job_id = line.strip()
159 |                 if not job_id:
160 |                     continue
161 |                 self.resources[job_id] = {'job_id': job_id, 'status': 'PENDING', 'blocksize': blocksize}
162 |                 return job_id
163 |         else:
164 |             print("[WARNING!!] Submission of command to scale_out failed")
165 |             logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
166 | 
167 |     def _status(self):
168 |         ''' Get the status of a list of jobs identified by the job identifiers
169 |         returned from the submit request.
170 | 
171 |         Returns:
172 |              - A list of status from ['PENDING', 'RUNNING', 'CANCELLED', 'COMPLETED',
173 |                'FAILED', 'TIMEOUT'] corresponding to each job_id in the job_ids list.
174 | 
175 |         Raises:
176 |              - ExecutionProviderException or its subclasses
177 | 
178 |         '''
179 | 
180 |         cmd = "qstat"
181 | 
182 |         retcode, stdout, stderr = super().execute_wait(cmd)
183 | 
184 |         # Execute_wait failed. Do no update
185 |         if retcode != 0:
186 |             return
187 | 
188 |         jobs_missing = list(self.resources.keys())
189 |         for line in stdout.split('\n'):
190 |             parts = line.split()
191 |             if parts and parts[0].lower().lower() != 'job-id' \
192 |                     and not parts[0].startswith('----'):
193 |                 job_id = parts[0]
194 |                 status = translate_table.get(parts[4].lower(), 'UNKNOWN')
195 |                 if job_id in self.resources:
196 |                     self.resources[job_id]['status'] = status
197 |                     jobs_missing.remove(job_id)
198 | 
199 |         # Filling in missing blanks for jobs that might have gone missing
200 |         # we might lose some information about why the jobs failed.
201 |         for missing_job in jobs_missing:
202 |             if self.resources[missing_job]['status'] in ['PENDING', 'RUNNING']:
203 |                 self.resources[missing_job]['status'] = 'COMPLETED'
204 | 
205 |     def cancel(self, job_ids):
206 |         ''' Cancels the resources identified by the job_ids provided by the user.
207 | 
208 |         Args:
209 |              - job_ids (list): A list of job identifiers
210 | 
211 |         Returns:
212 |              - A list of status from cancelling the job which can be True, False
213 | 
214 |         Raises:
215 |              - ExecutionProviderException or its subclasses
216 |         '''
217 | 
218 |         job_id_list = ' '.join(job_ids)
219 |         cmd = "qdel {}".format(job_id_list)
220 |         retcode, stdout, stderr = super().execute_wait(cmd, 3)
221 | 
222 |         rets = None
223 |         if retcode == 0:
224 |             for jid in job_ids:
225 |                 self.resources[jid]['status'] = "COMPLETED"
226 |             rets = [True for i in job_ids]
227 |         else:
228 |             rets = [False for i in job_ids]
229 | 
230 |         return rets
231 | 


--------------------------------------------------------------------------------
/libsubmit/providers/grid_engine/template.py:
--------------------------------------------------------------------------------
 1 | template_string = """#!/bin/bash
 2 | #$$ -S /bin/bash
 3 | #$$ -o ${submit_script_dir}/${jobname}.submit.stdout
 4 | #$$ -e ${submit_script_dir}/${jobname}.submit.stderr
 5 | #$$ -cwd
 6 | #$$ -l h_rt=${walltime}
 7 | $overrides
 8 | 
 9 | export JOBNAME="${jobname}"
10 | 
11 | $user_script
12 | """
13 | 


--------------------------------------------------------------------------------
/libsubmit/providers/jetstream/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/jetstream/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/jetstream/jetstream.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import configparser
  3 | import logging
  4 | import os
  5 | 
  6 | from libsubmit.error import *
  7 | 
  8 | try:
  9 |     from novaclient import api_versions
 10 |     from novaclient import client
 11 | 
 12 | except ImportError:
 13 |     _nova_enabled = False
 14 | else:
 15 |     _nova_enabled = True
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | setup_script = '''#!/bin/bash
 20 | LOG=/root/userdata.logs
 21 | echo "Userdata logs " > $LOG
 22 | apt-get update  &>> $LOG
 23 | yes | aptdcon --hide-terminal --install python3-pip &>> $LOG
 24 | pip3 install jupyter ipyparallel parsl &>> $LOG
 25 | cat <<EOF > ipcontroller-engine.json
 26 | {engine_config}
 27 | EOF
 28 | ipengine --file=ipcontroller-engine.json &>> $LOG
 29 | '''
 30 | 
 31 | 
 32 | class JetstreamProvider(object):
 33 |     def __init__(self, config, poolname):
 34 |         self.config = config
 35 |         self.blocks = {}
 36 |         self.pool = poolname
 37 |         controller_file = "~/.ipython/profile_default/security/ipcontroller-engine.json"
 38 | 
 39 |         if not _nova_enabled:
 40 |             raise OptionalModuleMissing(['python-novaclient'],
 41 |                                         "Jetstream Provider requires the python-novaclient module.")
 42 | 
 43 |         self.client = client.Client(
 44 |             api_versions.APIVersion("2.0"),
 45 |             config['sites.jetstream']['OS_USERNAME'],
 46 |             config['sites.jetstream']['OS_PASSWORD'],
 47 |             project_id=config['sites.jetstream']['OS_PROJECT_ID'],
 48 |             project_name=config['sites.jetstream']['OS_PROJECT_NAME'],
 49 |             auth_url=config['sites.jetstream']['OS_AUTH_URL'],
 50 |             insecure=False,
 51 |             region_name=config['sites.jetstream']['OS_REGION_NAME'],
 52 |             user_domain_name=config['sites.jetstream']['OS_USER_DOMAIN_NAME'])
 53 | 
 54 |         api_version = api_versions.get_api_version("2.0")
 55 |         api_version = api_versions.discover_version(self.client, api_version)
 56 |         client.discover_extensions(api_version)
 57 | 
 58 |         logger.debug(self.client.has_neutron())
 59 |         self.server_manager = self.client.servers
 60 | 
 61 |         try:
 62 |             with open(os.path.expanduser(controller_file), 'r') as f:
 63 |                 self.engine_config = f.read()
 64 | 
 65 |         except FileNotFoundError:
 66 |             logger.error("No controller_file found at : %s. Cannot proceed", controller_file)
 67 |             exit(-1)
 68 | 
 69 |         except Exception as e:
 70 | 
 71 |             logger.error("Caught exception while reading from the ipcontroller_engine.json")
 72 |             raise e
 73 | 
 74 |         try:
 75 |             # Check if the authentication worked by forcing a call
 76 |             self.server_manager.list()
 77 | 
 78 |         except Exception as e:
 79 |             logger.error("Caught exception : %s", e)
 80 |             raise e
 81 | 
 82 |         flavors = self.client.flavors.list()
 83 | 
 84 |         try:
 85 |             self.flavor = [f for f in flavors if f.name == config['sites.jetstream.{0}'.format(poolname)]['flavor']][0]
 86 |         except Exception as e:
 87 |             logger.error("Caught exception : ", e)
 88 |             raise e
 89 | 
 90 |         self.sec_groups = ast.literal_eval(config['sites.jetstream.{0}'.format(poolname)]['sec_groups'])
 91 |         self.nics = ast.literal_eval(config['sites.jetstream.{0}'.format(poolname)]['nics'])
 92 | 
 93 |     def scale_out(self, blocks=1, block_size=1):
 94 |         ''' Scale out the existing resources.
 95 |         '''
 96 |         self.config['sites.jetstream.{0}'.format(self.pool)]['flavor']
 97 |         count = 0
 98 |         if blocks == 1:
 99 |             block_id = len(self.blocks)
100 |             self.blocks[block_id] = []
101 |             for instance_id in range(0, block_size):
102 |                 instances = self.server_manager.create(
103 |                     'parsl-{0}-{1}'.format(block_id, instance_id),  # Name
104 |                     self.client.images.get('87e08a17-eae2-4ce4-9051-c561d9a54bde'),  # Image_id
105 |                     self.client.flavors.list()[0],
106 |                     min_count=1,
107 |                     max_count=1,
108 |                     userdata=setup_script.format(engine_config=self.engine_config),
109 |                     key_name='TG-MCB090174-api-key',
110 |                     security_groups=['global-ssh'],
111 |                     nics=[{
112 |                         "net-id": '724a50cf-7f11-4b3b-a884-cd7e6850e39e',
113 |                         "net-name": 'PARSL-priv-net',
114 |                         "v4-fixed-ip": ''
115 |                     }])
116 |                 self.blocks[block_id].extend([instances])
117 |                 count += 1
118 | 
119 |         return count
120 | 
121 |     def scale_in(self, blocks=0, machines=0, strategy=None):
122 |         ''' Scale in resources
123 |         '''
124 |         count = 0
125 |         instances = self.client.servers.list()
126 |         for instance in instances[0:machines]:
127 |             print("Deleting : ", instance)
128 |             instance.delete()
129 |             count += 1
130 | 
131 |         return count
132 | 
133 | 
134 | if __name__ == '__main__':
135 | 
136 |     Config = configparser.ConfigParser()
137 |     Config.read('/home/yadu/.ssh/parsl.config')
138 | 
139 |     print(Config['sites.jetstream']['OS_AUTH_URL'])
140 |     foo = jetstream(Config, 'pool1')
141 | 
142 |     # foo.scale_out(blocks=1, block_size=2)
143 |     foo.scale_in(machines=2)
144 |     # foo.scale_out(blocks=1, block_size=2)
145 | 


--------------------------------------------------------------------------------
/libsubmit/providers/jetstream/setup_first_time.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | check_tools() {
 4 | 
 5 |     which nova
 6 |     if [[ $? != 0 ]]
 7 |     then
 8 |         echo "nova is missing. Try apt-get install nova"
 9 |     fi
10 | 
11 | }
12 | 
13 | setup_secgroups() {
14 |     echo "Setting up sec groups"
15 |     nova secgroup-create global-ssh "ssh & icmp enabled"
16 |     nova secgroup-add-rule global-ssh tcp 22 22 0.0.0.0/0
17 |     nova secgroup-add-rule global-ssh icmp -1 -1 0.0.0.0/0
18 | 
19 | }
20 | 
21 | setup_keypair() {
22 |     ssh-keygen -b 2048 -t rsa -f ${OS_PROJECT_NAME}-api-key -P ""
23 |     nova keypair-add --pub-key ${OS_PROJECT_NAME}-api-key.pub ${OS_PROJECT_NAME}-api-key
24 | }
25 | 
26 | setup_network() {
27 | 
28 |     parsl_net="PARSL-priv-net"
29 |     neutron net-create $parsl_net
30 |     neutron net-list
31 |     neutron subnet-create $parsl_net 10.0.0.0/24 --name parsl-api-subnet1
32 |     neutron net-list
33 |     neutron router-create parsl-api-router
34 |     neutron router-interface-add parsl-api-router parsl-api-subnet1
35 |     neutron router-gateway-set parsl-api-router public
36 |     neutron router-show parsl-api-router
37 | }
38 | 
39 | check_tools
40 | #setup_secgroups
41 | #setup_keypair
42 | setup_network
43 | nova boot parsl-executor-001 --flavor m1.small --image 87e08a17-eae2-4ce4-9051-c561d9a54bde --key-name TG-MCB090174-api-key --security-groups global-ssh --nic net-name=PARSL-priv-net
44 | 


--------------------------------------------------------------------------------
/libsubmit/providers/kubernetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/kubernetes/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/kubernetes/kube.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | from libsubmit.providers.kubernetes.template import template_string
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | 
  7 | from libsubmit.error import *
  8 | from libsubmit.providers.provider_base import ExecutionProvider
  9 | 
 10 | try:
 11 |     from kubernetes import client, config
 12 |     config.load_kube_config()
 13 |     _kubernetes_enabled = True
 14 | except (ImportError, NameError, FileNotFoundError):
 15 |     _kubernetes_enabled = False
 16 | 
 17 | 
 18 | class KubernetesProvider(ExecutionProvider):
 19 |     """ Kubernetes execution provider:
 20 | 
 21 |         TODO: put in a config
 22 |     """
 23 | 
 24 |     def __repr__(self):
 25 |         return "<Kubernetes Execution Provider for site:{0}>".format(self.sitename)
 26 | 
 27 |     def __init__(self, config, channel=None):
 28 |         """ Initialize the Kubernetes execution provider class
 29 | 
 30 |         Args:
 31 |              - Config (dict): Dictionary with all the config options.
 32 | 
 33 |         KWargs :
 34 |              - channel (channel object) : default=None A channel object
 35 |         """
 36 | 
 37 |         self.channel = channel
 38 | 
 39 |         if not _kubernetes_enabled:
 40 |             raise OptionalModuleMissing(['kubernetes'],
 41 |                                         "Kubernetes provider requires kubernetes module and config.")
 42 | 
 43 |         self.kube_client = client.ExtensionsV1beta1Api()
 44 | 
 45 |         self.config = config
 46 |         self.sitename = self.config['site']
 47 |         self.namespace = self.config['execution']['namespace']
 48 |         self.image = self.config['execution']['image']
 49 | 
 50 |         self.init_blocks = self.config["execution"]["block"]["initBlocks"]
 51 |         self.min_blocks = self.config["execution"]["block"]["minBlocks"]
 52 |         self.max_blocks = self.config["execution"]["block"]["maxBlocks"]
 53 | 
 54 |         self.user_id = None
 55 |         self.group_id = None
 56 |         self.run_as_non_root = None
 57 |         if 'security' in self.config['execution']:
 58 |             self.user_id = self.config["execution"]['security']["user_id"]
 59 |             self.group_id = self.config["execution"]['security']["group_id"]
 60 |             self.run_as_non_root = self.config["execution"]['security']["run_as_non_root"]
 61 | 
 62 |         self.secret = None
 63 |         if 'secret' in self.config['execution']:
 64 |             self.secret = self.config['execution']['secret']
 65 | 
 66 |         # Dictionary that keeps track of jobs, keyed on job_id
 67 |         self.resources = {}
 68 | 
 69 |     def submit(self, cmd_string, blocksize, job_name="parsl.auto"):
 70 |         """ Submit a job
 71 | 
 72 |         Args:
 73 |              - cmd_string  :(String) - Name of the container to initiate
 74 |              - blocksize   :(float) - Number of replicas
 75 | 
 76 |         Kwargs:
 77 |              - job_name (String): Name for job, must be unique
 78 | 
 79 |         Returns:
 80 |              - None: At capacity, cannot provision more
 81 |              - job_id: (string) Identifier for the job
 82 | 
 83 |         """
 84 |         if not self.resources:
 85 |             job_name = "{0}-{1}".format(job_name, time.time()).split(".")[0]
 86 | 
 87 |             self.deployment_name = '{}-{}-deployment'.format(job_name,
 88 |                                                              str(time.time()).split('.')[0])
 89 | 
 90 |             formatted_cmd = template_string.format(command=cmd_string,
 91 |                                                    overrides=self.config["execution"]["block"]["options"].get("overrides", ''))
 92 | 
 93 |             print("Creating replicas :", self.init_blocks)
 94 |             self.deployment_obj = self._create_deployment_object(job_name,
 95 |                                                                  self.image,
 96 |                                                                  self.deployment_name,
 97 |                                                                  cmd_string=formatted_cmd,
 98 |                                                                  replicas=self.init_blocks)
 99 |             logger.debug("Deployment name :{}".format(self.deployment_name))
100 |             self._create_deployment(self.deployment_obj)
101 |             self.resources[self.deployment_name] = {'status': 'RUNNING',
102 |                                                     'pods': self.init_blocks}
103 | 
104 |         return self.deployment_name
105 | 
106 |     def status(self, job_ids):
107 |         """ Get the status of a list of jobs identified by the job identifiers
108 |         returned from the submit request.
109 | 
110 |         Args:
111 |              - job_ids (list) : A list of job identifiers
112 | 
113 |         Returns:
114 |              - A list of status from ['PENDING', 'RUNNING', 'CANCELLED', 'COMPLETED',
115 |                'FAILED', 'TIMEOUT'] corresponding to each job_id in the job_ids list.
116 | 
117 |         Raises:
118 |              - ExecutionProviderExceptions or its subclasses
119 | 
120 |         """
121 |         self._status()
122 |         # This is a hack
123 |         return ['RUNNING' for jid in job_ids]
124 | 
125 |     def cancel(self, job_ids):
126 |         """ Cancels the jobs specified by a list of job ids
127 | 
128 |         Args:
129 |         job_ids : [<job_id> ...]
130 | 
131 |         Returns :
132 |         [True/False...] : If the cancel operation fails the entire list will be False.
133 |         """
134 |         for job in job_ids:
135 |             logger.debug("Terminating job/proc_id : {0}".format(job))
136 |             # Here we are assuming that for local, the job_ids are the process id's
137 |             self._delete_deployment(job)
138 | 
139 |             self.resources[job]['status'] = 'CANCELLED'
140 |         rets = [True for i in job_ids]
141 | 
142 |         return rets
143 | 
144 |     def _status(self):
145 |         """ Internal: Do not call. Returns the status list for a list of job_ids
146 | 
147 |         Args:
148 |               self
149 | 
150 |         Returns:
151 |               [status...] : Status list of all jobs
152 |         """
153 | 
154 |         jobs_ids = list(self.resources.keys())
155 |         # TODO: fix this
156 |         return jobs_ids
157 |         # do something to get the deployment's status
158 | 
159 |     def _create_deployment_object(self, job_name, job_image,
160 |                                   deployment_name, port=80,
161 |                                   replicas=1,
162 |                                   cmd_string=None,
163 |                                   engine_json_file='~/.ipython/profile_default/security/ipcontroller-engine.json',
164 |                                   engine_dir='.'):
165 |         """ Create a kubernetes deployment for the job.
166 | 
167 |         Args:
168 |               - job_name (string) : Name of the job and deployment
169 |               - job_image (string) : Docker image to launch
170 | 
171 |         KWargs:
172 |              - port (integer) : Container port
173 |              - replicas : Number of replica containers to maintain
174 | 
175 |         Returns:
176 |               - True: The deployment object to launch
177 |         """
178 | 
179 |         # sorry, quick hack that doesn't pass this stuff through to test it works.
180 |         # TODO it also doesn't only add what is set :(
181 |         security_context = None
182 |         if 'security' in self.config['execution']:
183 |             security_context = client.V1SecurityContext(run_as_group=self.group_id,
184 |                                                         run_as_user=self.user_id,
185 |                                                         run_as_non_root=self.run_as_non_root)
186 |             #                    self.user_id = None
187 |             #                    self.group_id = None
188 |             #                    self.run_as_non_root = None
189 |         # Create the enviornment variables and command to initiate IPP
190 |         environment_vars = client.V1EnvVar(name="TEST", value="SOME DATA")
191 | 
192 |         launch_args = ["-c", "{0}; /app/deploy.sh;".format(cmd_string)]
193 |         print(launch_args)
194 | 
195 |         # Configureate Pod template container
196 |         container = None
197 |         if security_context:
198 |             container = client.V1Container(
199 |                 name=job_name,
200 |                 image=job_image,
201 |                 ports=[client.V1ContainerPort(container_port=port)],
202 |                 command=['/bin/bash'],
203 |                 args=launch_args,
204 |                 env=[environment_vars],
205 |                 security_context=security_context)
206 |         else:
207 |             container = client.V1Container(
208 |                 name=job_name,
209 |                 image=job_image,
210 |                 ports=[client.V1ContainerPort(container_port=port)],
211 |                 command=['/bin/bash'],
212 |                 args=launch_args,
213 |                 env=[environment_vars])
214 |         # Create a secret to enable pulling images from secure repositories
215 |         secret = None
216 |         if self.secret:
217 |             secret = client.V1LocalObjectReference(name=self.secret)
218 | 
219 |         # Create and configurate a spec section
220 |         template = client.V1PodTemplateSpec(
221 |             metadata=client.V1ObjectMeta(labels={"app": job_name}),
222 |             spec=client.V1PodSpec(containers=[container], image_pull_secrets=[secret]))
223 | 
224 |         # Create the specification of deployment
225 |         spec = client.ExtensionsV1beta1DeploymentSpec(replicas=replicas,
226 |                                                       template=template)
227 | 
228 |         # Instantiate the deployment object
229 |         deployment = client.ExtensionsV1beta1Deployment(
230 |             api_version="extensions/v1beta1",
231 |             kind="Deployment",
232 |             metadata=client.V1ObjectMeta(name=deployment_name),
233 |             spec=spec)
234 | 
235 |         return deployment
236 | 
237 |     def _create_deployment(self, deployment):
238 |         """ Create the kubernetes deployment """
239 | 
240 |         api_response = self.kube_client.create_namespaced_deployment(
241 |             body=deployment,
242 |             namespace=self.namespace)
243 | 
244 |         logger.debug("Deployment created. status='{0}'".format(str(api_response.status)))
245 | 
246 |     def _delete_deployment(self, deployment_name):
247 |         """ Delete deployment """
248 | 
249 |         api_response = self.kube_client.delete_namespaced_deployment(
250 |             name=deployment_name,
251 |             namespace=self.namespace,
252 |             body=client.V1DeleteOptions(
253 |                 propagation_policy='Foreground',
254 |                 grace_period_seconds=5))
255 | 
256 |         logger.debug("Deployment deleted. status='{0}'".format(
257 |             str(api_response.status)))
258 | 
259 |     @property
260 |     def scaling_enabled(self):
261 |         return False
262 | 
263 |     @property
264 |     def channels_required(self):
265 |         return False
266 | 


--------------------------------------------------------------------------------
/libsubmit/providers/kubernetes/template.py:
--------------------------------------------------------------------------------
1 | template_string = """{overrides}
2 | 
3 | {command}
4 | """
5 | 


--------------------------------------------------------------------------------
/libsubmit/providers/local/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/local/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/local/local.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import signal
  4 | import time
  5 | 
  6 | import libsubmit.error as ep_error
  7 | from libsubmit.channels import LocalChannel
  8 | from libsubmit.launchers import SingleNodeLauncher
  9 | from libsubmit.providers.provider_base import ExecutionProvider
 10 | from libsubmit.utils import RepresentationMixin
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | translate_table = {
 15 |     'PD': 'PENDING',
 16 |     'R': 'RUNNING',
 17 |     'CA': 'CANCELLED',
 18 |     'CF': 'PENDING',  # (configuring),
 19 |     'CG': 'RUNNING',  # (completing),
 20 |     'CD': 'COMPLETED',
 21 |     'F': 'FAILED',
 22 |     'TO': 'TIMEOUT',
 23 |     'NF': 'FAILED',  # (node failure),
 24 |     'RV': 'FAILED',  # (revoked) and
 25 |     'SE': 'FAILED'
 26 | }  # (special exit state
 27 | 
 28 | 
 29 | class LocalProvider(ExecutionProvider, RepresentationMixin):
 30 |     """ Local Execution Provider
 31 | 
 32 |     This provider is used to provide execution resources from the localhost.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 | 
 37 |     min_blocks : int
 38 |         Minimum number of blocks to maintain.
 39 |     max_blocks : int
 40 |         Maximum number of blocks to maintain.
 41 |     parallelism : float
 42 |         Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
 43 |         scaling where as many resources as possible are used; parallelism close to 0 represents
 44 |         the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
 45 |     """
 46 | 
 47 |     def __init__(self,
 48 |                  channel=LocalChannel(),
 49 |                  label='local',
 50 |                  script_dir='parsl_scripts',
 51 |                  tasks_per_node=1,
 52 |                  nodes_per_block=1,
 53 |                  launcher=SingleNodeLauncher(),
 54 |                  init_blocks=4,
 55 |                  min_blocks=0,
 56 |                  max_blocks=10,
 57 |                  walltime="00:15:00",
 58 |                  parallelism=1):
 59 |         self.channel = channel
 60 |         self.label = label
 61 |         if not os.path.exists(script_dir):
 62 |             os.makedirs(script_dir)
 63 |         self.script_dir = script_dir
 64 |         self.provisioned_blocks = 0
 65 |         self.nodes_per_block = nodes_per_block
 66 |         self.tasks_per_node = tasks_per_node
 67 |         self.launcher = launcher
 68 |         self.init_blocks = init_blocks
 69 |         self.min_blocks = min_blocks
 70 |         self.max_blocks = max_blocks
 71 |         self.parallelism = parallelism
 72 |         self.walltime = walltime
 73 | 
 74 |         # Dictionary that keeps track of jobs, keyed on job_id
 75 |         self.resources = {}
 76 | 
 77 |     def status(self, job_ids):
 78 |         '''  Get the status of a list of jobs identified by their ids.
 79 | 
 80 |         Args:
 81 |             - job_ids (List of ids) : List of identifiers for the jobs
 82 | 
 83 |         Returns:
 84 |             - List of status codes.
 85 | 
 86 |         '''
 87 | 
 88 |         logging.debug("Checking status of : {0}".format(job_ids))
 89 |         for job_id in self.resources:
 90 |             poll_code = self.resources[job_id]['proc'].poll()
 91 |             if self.resources[job_id]['status'] in ['COMPLETED', 'FAILED']:
 92 |                 continue
 93 | 
 94 |             if poll_code is None:
 95 |                 self.resources[job_id]['status'] = 'RUNNING'
 96 |             elif poll_code == 0 and self.resources[job_id]['status'] != 'RUNNING':
 97 |                 self.resources[job_id]['status'] = 'COMPLETED'
 98 |             elif poll_code < 0 and self.resources[job_id]['status'] != 'RUNNING':
 99 |                 self.resources[job_id]['status'] = 'FAILED'
100 | 
101 |         return [self.resources[jid]['status'] for jid in job_ids]
102 | 
103 |     def _write_submit_script(self, script_string, script_filename):
104 |         '''
105 |         Load the template string with config values and write the generated submit script to
106 |         a submit script file.
107 | 
108 |         Args:
109 |               - template_string (string) : The template string to be used for the writing submit script
110 |               - script_filename (string) : Name of the submit script
111 | 
112 |         Returns:
113 |               - True: on success
114 | 
115 |         Raises:
116 |               SchedulerMissingArgs : If template is missing args
117 |               ScriptPathError : Unable to write submit script out
118 |         '''
119 | 
120 |         try:
121 |             with open(script_filename, 'w') as f:
122 |                 f.write(script_string)
123 | 
124 |         except KeyError as e:
125 |             logger.error("Missing keys for submit script : %s", e)
126 |             raise (ep_error.SchedulerMissingArgs(e.args, self.label))
127 | 
128 |         except IOError as e:
129 |             logger.error("Failed writing to submit script: %s", script_filename)
130 |             raise (ep_error.ScriptPathError(script_filename, e))
131 | 
132 |         return True
133 | 
134 |     def submit(self, command, blocksize, job_name="parsl.auto"):
135 |         ''' Submits the command onto an Local Resource Manager job of blocksize parallel elements.
136 |         Submit returns an ID that corresponds to the task that was just submitted.
137 | 
138 |         If tasks_per_node <  1:
139 |              1/tasks_per_node is provisioned
140 | 
141 |         If tasks_per_node == 1:
142 |              A single node is provisioned
143 | 
144 |         If tasks_per_node >  1 :
145 |              tasks_per_node * blocksize number of nodes are provisioned.
146 | 
147 |         Args:
148 |              - command  :(String) Commandline invocation to be made on the remote side.
149 |              - blocksize   :(float) - Not really used for local
150 | 
151 |         Kwargs:
152 |              - job_name (String): Name for job, must be unique
153 | 
154 |         Returns:
155 |              - None: At capacity, cannot provision more
156 |              - job_id: (string) Identifier for the job
157 | 
158 |         '''
159 | 
160 |         job_name = "{0}.{1}".format(job_name, time.time())
161 | 
162 |         # Set script path
163 |         script_path = "{0}/{1}.sh".format(self.script_dir, job_name)
164 |         script_path = os.path.abspath(script_path)
165 | 
166 |         wrap_command = self.launcher(command, self.tasks_per_node, self.nodes_per_block)
167 | 
168 |         self._write_submit_script(wrap_command, script_path)
169 | 
170 |         job_id, proc = self.channel.execute_no_wait('bash {0}'.format(script_path), 3)
171 |         self.resources[job_id] = {'job_id': job_id, 'status': 'RUNNING', 'blocksize': blocksize, 'proc': proc}
172 | 
173 |         return job_id
174 | 
175 |     def cancel(self, job_ids):
176 |         ''' Cancels the jobs specified by a list of job ids
177 | 
178 |         Args:
179 |         job_ids : [<job_id> ...]
180 | 
181 |         Returns :
182 |         [True/False...] : If the cancel operation fails the entire list will be False.
183 |         '''
184 | 
185 |         for job in job_ids:
186 |             logger.debug("Terminating job/proc_id : {0}".format(job))
187 |             # Here we are assuming that for local, the job_ids are the process id's
188 |             proc = self.resources[job]['proc']
189 |             os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
190 |             self.resources[job]['status'] = 'CANCELLED'
191 |         rets = [True for i in job_ids]
192 | 
193 |         return rets
194 | 
195 |     @property
196 |     def scaling_enabled(self):
197 |         return True
198 | 
199 |     @property
200 |     def current_capacity(self):
201 |         return len(self.resources)
202 | 
203 | 
204 | if __name__ == "__main__":
205 | 
206 |     print("Nothing here")
207 | 


--------------------------------------------------------------------------------
/libsubmit/providers/provider_base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod, abstractproperty
 2 | 
 3 | 
 4 | class ExecutionProvider(metaclass=ABCMeta):
 5 |     """ Define the strict interface for all Execution Provider
 6 | 
 7 |     .. code:: python
 8 | 
 9 |                                 +------------------
10 |                                 |
11 |           script_string ------->|  submit
12 |                id      <--------|---+
13 |                                 |
14 |           [ ids ]       ------->|  status
15 |           [statuses]   <--------|----+
16 |                                 |
17 |           [ ids ]       ------->|  cancel
18 |           [cancel]     <--------|----+
19 |                                 |
20 |           [True/False] <--------|  scaling_enabled
21 |                                 |
22 |                                 +-------------------
23 |      """
24 | 
25 |     @abstractmethod
26 |     def submit(self, command, blocksize, job_name="parsl.auto"):
27 |         ''' The submit method takes the command string to be executed upon
28 |         instantiation of a resource most often to start a pilot (such as IPP engine
29 |         or even Swift-T engines).
30 | 
31 |         Args :
32 |              - command (str) : The bash command string to be executed.
33 |              - blocksize (int) : Blocksize to be requested
34 | 
35 |         KWargs:
36 |              - job_name (str) : Human friendly name to be assigned to the job request
37 | 
38 |         Returns:
39 |              - A job identifier, this could be an integer, string etc
40 | 
41 |         Raises:
42 |              - ExecutionProviderException or its subclasses
43 |         '''
44 | 
45 |         pass
46 | 
47 |     @abstractmethod
48 |     def status(self, job_ids):
49 |         ''' Get the status of a list of jobs identified by the job identifiers
50 |         returned from the submit request.
51 | 
52 |         Args:
53 |              - job_ids (list) : A list of job identifiers
54 | 
55 |         Returns:
56 |              - A list of status from ['PENDING', 'RUNNING', 'CANCELLED', 'COMPLETED',
57 |                'FAILED', 'TIMEOUT'] corresponding to each job_id in the job_ids list.
58 | 
59 |         Raises:
60 |              - ExecutionProviderException or its subclasses
61 | 
62 |         '''
63 | 
64 |         pass
65 | 
66 |     @abstractmethod
67 |     def cancel(self, job_ids):
68 |         ''' Cancels the resources identified by the job_ids provided by the user.
69 | 
70 |         Args:
71 |              - job_ids (list): A list of job identifiers
72 | 
73 |         Returns:
74 |              - A list of status from cancelling the job which can be True, False
75 | 
76 |         Raises:
77 |              - ExecutionProviderException or its subclasses
78 |         '''
79 | 
80 |         pass
81 | 
82 |     @abstractproperty
83 |     def scaling_enabled(self):
84 |         ''' The callers of ParslExecutors need to differentiate between Executors
85 |         and Executors wrapped in a resource provider
86 | 
87 |         Returns:
88 |               - Status (Bool)
89 |         '''
90 | 
91 |         pass
92 | 


--------------------------------------------------------------------------------
/libsubmit/providers/slurm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/slurm/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/slurm/slurm.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | 
  5 | from libsubmit.channels import LocalChannel
  6 | from libsubmit.launchers import SingleNodeLauncher
  7 | from libsubmit.providers.cluster_provider import ClusterProvider
  8 | from libsubmit.providers.slurm.template import template_string
  9 | from libsubmit.utils import RepresentationMixin, wtime_to_minutes
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | translate_table = {
 14 |     'PD': 'PENDING',
 15 |     'R': 'RUNNING',
 16 |     'CA': 'CANCELLED',
 17 |     'CF': 'PENDING',  # (configuring),
 18 |     'CG': 'RUNNING',  # (completing),
 19 |     'CD': 'COMPLETED',
 20 |     'F': 'FAILED',  # (failed),
 21 |     'TO': 'TIMEOUT',  # (timeout),
 22 |     'NF': 'FAILED',  # (node failure),
 23 |     'RV': 'FAILED',  # (revoked) and
 24 |     'SE': 'FAILED'
 25 | }  # (special exit state
 26 | 
 27 | 
 28 | class SlurmProvider(ClusterProvider, RepresentationMixin):
 29 |     """Slurm Execution Provider
 30 | 
 31 |     This provider uses sbatch to submit, squeue for status and scancel to cancel
 32 |     jobs. The sbatch script to be used is created from a template file in this
 33 |     same module.
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     partition : str
 38 |         Slurm partition to request blocks from.
 39 |     label : str
 40 |         Label for this provider.
 41 |     channel : Channel
 42 |         Channel for accessing this provider. Possible channels include
 43 |         :class:`~libsubmit.channels.LocalChannel` (the default),
 44 |         :class:`~libsubmit.channels.SSHChannel`, or
 45 |         :class:`~libsubmit.channels.SSHInteractiveLoginChannel`.
 46 |     script_dir : str
 47 |         Relative or absolute path to a directory where intermediate scripts are placed.
 48 |     nodes_per_block : int
 49 |         Nodes to provision per block.
 50 |     tasks_per_node : int
 51 |         Tasks to run per node.
 52 |     min_blocks : int
 53 |         Minimum number of blocks to maintain.
 54 |     max_blocks : int
 55 |         Maximum number of blocks to maintain.
 56 |     parallelism : float
 57 |         Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
 58 |         scaling where as many resources as possible are used; parallelism close to 0 represents
 59 |         the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
 60 |     walltime : str
 61 |         Walltime requested per block in HH:MM:SS.
 62 |     overrides : str
 63 |         String to prepend to the #SBATCH blocks in the submit script to the scheduler.
 64 |     launcher : Launcher
 65 |         Launcher for this provider. Possible launchers include
 66 |         :class:`~libsubmit.launchers.SingleNodeLauncher` (the default),
 67 |         :class:`~libsubmit.launchers.SrunLauncher`, or
 68 |         :class:`~libsubmit.launchers.AprunLauncher`
 69 |     """
 70 | 
 71 |     def __init__(self,
 72 |                  partition,
 73 |                  label='slurm',
 74 |                  channel=LocalChannel(),
 75 |                  script_dir='parsl_scripts',
 76 |                  nodes_per_block=1,
 77 |                  tasks_per_node=1,
 78 |                  init_blocks=1,
 79 |                  min_blocks=0,
 80 |                  max_blocks=10,
 81 |                  parallelism=1,
 82 |                  walltime="00:10:00",
 83 |                  overrides='',
 84 |                  cmd_timeout=10,
 85 |                  launcher=SingleNodeLauncher()):
 86 |         super().__init__(label,
 87 |                          channel,
 88 |                          script_dir,
 89 |                          nodes_per_block,
 90 |                          tasks_per_node,
 91 |                          init_blocks,
 92 |                          min_blocks,
 93 |                          max_blocks,
 94 |                          parallelism,
 95 |                          walltime,
 96 |                          cmd_timeout=cmd_timeout,
 97 |                          launcher=launcher)
 98 |         self.partition = partition
 99 |         self.overrides = overrides
100 | 
101 |     def _status(self):
102 |         ''' Internal: Do not call. Returns the status list for a list of job_ids
103 | 
104 |         Args:
105 |               self
106 | 
107 |         Returns:
108 |               [status...] : Status list of all jobs
109 |         '''
110 |         job_id_list = ','.join(self.resources.keys())
111 |         cmd = "squeue --job {0}".format(job_id_list)
112 | 
113 |         retcode, stdout, stderr = super().execute_wait(cmd)
114 | 
115 |         # Execute_wait failed. Do no update
116 |         if retcode != 0:
117 |             return
118 | 
119 |         jobs_missing = list(self.resources.keys())
120 |         for line in stdout.split('\n'):
121 |             parts = line.split()
122 |             if parts and parts[0] != 'JOBID':
123 |                 job_id = parts[0]
124 |                 status = translate_table.get(parts[4], 'UNKNOWN')
125 |                 self.resources[job_id]['status'] = status
126 |                 jobs_missing.remove(job_id)
127 | 
128 |         # squeue does not report on jobs that are not running. So we are filling in the
129 |         # blanks for missing jobs, we might lose some information about why the jobs failed.
130 |         for missing_job in jobs_missing:
131 |             if self.resources[missing_job]['status'] in ['PENDING', 'RUNNING']:
132 |                 self.resources[missing_job]['status'] = 'COMPLETED'
133 | 
134 |     def submit(self, command, blocksize, job_name="parsl.auto"):
135 |         """Submit the command as a slurm job of blocksize parallel elements.
136 | 
137 |         Parameters
138 |         ----------
139 |         command : str
140 |             Command to be made on the remote side.
141 |         blocksize : int
142 |             Not implemented.
143 |         job_name : str
144 |             Name for the job (must be unique).
145 | 
146 |         Returns
147 |         -------
148 |         None or str
149 |             If at capacity, returns None; otherwise, a string identifier for the job
150 |         """
151 | 
152 |         if self.provisioned_blocks >= self.max_blocks:
153 |             logger.warn("Slurm provider '{}' is at capacity (no more blocks will be added)".format(self.label))
154 |             return None
155 | 
156 |         job_name = "{0}.{1}".format(job_name, time.time())
157 | 
158 |         script_path = "{0}/{1}.submit".format(self.script_dir, job_name)
159 |         script_path = os.path.abspath(script_path)
160 | 
161 |         logger.debug("Requesting one block with {} nodes".format(self.nodes_per_block))
162 | 
163 |         job_config = {}
164 |         job_config["submit_script_dir"] = self.channel.script_dir
165 |         job_config["nodes"] = self.nodes_per_block
166 |         job_config["tasks_per_node"] = self.tasks_per_node
167 |         job_config["walltime"] = wtime_to_minutes(self.walltime)
168 |         job_config["overrides"] = self.overrides
169 |         job_config["partition"] = self.partition
170 |         job_config["user_script"] = command
171 | 
172 |         # Wrap the command
173 |         job_config["user_script"] = self.launcher(command,
174 |                                                   self.tasks_per_node,
175 |                                                   self.nodes_per_block)
176 | 
177 |         logger.debug("Writing submit script")
178 |         self._write_submit_script(template_string, script_path, job_name, job_config)
179 | 
180 |         channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
181 | 
182 |         retcode, stdout, stderr = super().execute_wait("sbatch {0}".format(channel_script_path))
183 | 
184 |         job_id = None
185 |         if retcode == 0:
186 |             for line in stdout.split('\n'):
187 |                 if line.startswith("Submitted batch job"):
188 |                     job_id = line.split("Submitted batch job")[1].strip()
189 |                     self.resources[job_id] = {'job_id': job_id, 'status': 'PENDING', 'blocksize': blocksize}
190 |         else:
191 |             print("Submission of command to scale_out failed")
192 |             logger.error("Retcode:%s STDOUT:%s STDERR:%s", retcode, stdout.strip(), stderr.strip())
193 |         return job_id
194 | 
195 |     def cancel(self, job_ids):
196 |         ''' Cancels the jobs specified by a list of job ids
197 | 
198 |         Args:
199 |         job_ids : [<job_id> ...]
200 | 
201 |         Returns :
202 |         [True/False...] : If the cancel operation fails the entire list will be False.
203 |         '''
204 | 
205 |         job_id_list = ' '.join(job_ids)
206 |         retcode, stdout, stderr = super().execute_wait("scancel {0}".format(job_id_list))
207 |         rets = None
208 |         if retcode == 0:
209 |             for jid in job_ids:
210 |                 self.resources[jid]['status'] = translate_table['CA']  # Setting state to cancelled
211 |             rets = [True for i in job_ids]
212 |         else:
213 |             rets = [False for i in job_ids]
214 | 
215 |         return rets
216 | 
217 |     def _test_add_resource(self, job_id):
218 |         self.resources.extend([{'job_id': job_id, 'status': 'PENDING', 'size': 1}])
219 |         return True
220 | 
221 | 
222 | if __name__ == "__main__":
223 | 
224 |     print("None")
225 | 


--------------------------------------------------------------------------------
/libsubmit/providers/slurm/template.py:
--------------------------------------------------------------------------------
 1 | template_string = '''#!/bin/bash
 2 | 
 3 | #SBATCH --job-name=${jobname}
 4 | #SBATCH --output=${submit_script_dir}/${jobname}.submit.stdout
 5 | #SBATCH --error=${submit_script_dir}/${jobname}.submit.stderr
 6 | #SBATCH --nodes=${nodes}
 7 | #SBATCH --partition=${partition}
 8 | #SBATCH --time=${walltime}
 9 | #SBATCH --ntasks-per-node=${tasks_per_node}
10 | 
11 | $overrides
12 | 
13 | export JOBNAME="${jobname}"
14 | 
15 | $user_script
16 | '''
17 | 


--------------------------------------------------------------------------------
/libsubmit/providers/torque/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Parsl/libsubmit/5c2cbf0c31365050a83b98a93b77edf6b065adea/libsubmit/providers/torque/__init__.py


--------------------------------------------------------------------------------
/libsubmit/providers/torque/template.py:
--------------------------------------------------------------------------------
 1 | template_string = '''#!/bin/bash
 2 | 
 3 | #PBS -S /bin/bash
 4 | #PBS -N ${jobname}
 5 | #PBS -m n
 6 | #PBS -k eo
 7 | #PBS -l walltime=$walltime
 8 | #PBS -l nodes=${nodes_per_block}:ppn=${tasks_per_node}
 9 | #PBS -o ${submit_script_dir}/${jobname}.submit.stdout
10 | #PBS -e ${submit_script_dir}/${jobname}.submit.stderr
11 | #PBS -v WORKER_LOGGING_LEVEL
12 | ${overrides}
13 | 
14 | export JOBNAME="${jobname}"
15 | 
16 | ${user_script}
17 | 
18 | '''
19 | 


--------------------------------------------------------------------------------
/libsubmit/providers/torque/torque.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | 
  5 | from libsubmit.channels import LocalChannel
  6 | from libsubmit.launchers import AprunLauncher
  7 | from libsubmit.providers.torque.template import template_string
  8 | from libsubmit.providers.cluster_provider import ClusterProvider
  9 | from libsubmit.utils import RepresentationMixin
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | # From the man pages for qstat for PBS/Torque systems
 14 | translate_table = {
 15 |     'R': 'RUNNING',
 16 |     'C': 'COMPLETED',  # Completed after having run
 17 |     'E': 'COMPLETED',  # Exiting after having run
 18 |     'H': 'HELD',  # Held
 19 |     'Q': 'PENDING',  # Queued, and eligible to run
 20 |     'W': 'PENDING',  # Job is waiting for it's execution time (-a option) to be reached
 21 |     'S': 'HELD'
 22 | }  # Suspended
 23 | 
 24 | 
 25 | class TorqueProvider(ClusterProvider, RepresentationMixin):
 26 |     """Torque Execution Provider
 27 | 
 28 |     This provider uses sbatch to submit, squeue for status, and scancel to cancel
 29 |     jobs. The sbatch script to be used is created from a template file in this
 30 |     same module.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     channel : Channel
 35 |         Channel for accessing this provider. Possible channels include
 36 |         :class:`~libsubmit.channels.LocalChannel` (the default),
 37 |         :class:`~libsubmit.channels.SSHChannel`, or
 38 |         :class:`~libsubmit.channels.SSHInteractiveLoginChannel`.
 39 |     account : str
 40 |         Account the job will be charged against.
 41 |     queue : str
 42 |         Torque queue to request blocks from.
 43 |     label : str
 44 |         Label for this provider.
 45 |     script_dir : str
 46 |         Relative or absolute path to a directory where intermediate scripts are placed.
 47 |     nodes_per_block : int
 48 |         Nodes to provision per block.
 49 |     tasks_per_node : int
 50 |         Tasks to run per node.
 51 |     init_blocks : int
 52 |         Number of blocks to provision at the start of the run. Default is 1.
 53 |     min_blocks : int
 54 |         Minimum number of blocks to maintain. Default is 0.
 55 |     max_blocks : int
 56 |         Maximum number of blocks to maintain.
 57 |     parallelism : float
 58 |         Ratio of provisioned task slots to active tasks. A parallelism value of 1 represents aggressive
 59 |         scaling where as many resources as possible are used; parallelism close to 0 represents
 60 |         the opposite situation in which as few resources as possible (i.e., min_blocks) are used.
 61 |     walltime : str
 62 |         Walltime requested per block in HH:MM:SS.
 63 |     overrides : str
 64 |         String to prepend to the Torque submit script.
 65 |     launcher : Launcher
 66 |         Launcher for this provider. Possible launchers include
 67 |         :class:`~libsubmit.launchers.AprunLauncher` (the default), or
 68 |         :class:`~libsubmit.launchers.SingleNodeLauncher`,
 69 | 
 70 |     """
 71 |     def __init__(self,
 72 |                  channel=LocalChannel(),
 73 |                  account=None,
 74 |                  queue=None,
 75 |                  overrides='',
 76 |                  label='torque',
 77 |                  script_dir='parsl_scripts',
 78 |                  nodes_per_block=1,
 79 |                  tasks_per_node=1,
 80 |                  init_blocks=1,
 81 |                  min_blocks=0,
 82 |                  max_blocks=100,
 83 |                  parallelism=1,
 84 |                  launcher=AprunLauncher(),
 85 |                  walltime="00:20:00"):
 86 |         super().__init__(label,
 87 |                          channel,
 88 |                          script_dir,
 89 |                          nodes_per_block,
 90 |                          tasks_per_node,
 91 |                          init_blocks,
 92 |                          min_blocks,
 93 |                          max_blocks,
 94 |                          parallelism,
 95 |                          walltime,
 96 |                          launcher)
 97 | 
 98 |         self.account = account
 99 |         self.queue = queue
100 |         self.overrides = overrides
101 |         self.provisioned_blocks = 0
102 | 
103 |         self.script_dir = script_dir
104 |         if not os.path.exists(self.script_dir):
105 |             os.makedirs(self.script_dir)
106 | 
107 |         # Dictionary that keeps track of jobs, keyed on job_id
108 |         self.resources = {}
109 | 
110 |     def _status(self):
111 |         ''' Internal: Do not call. Returns the status list for a list of job_ids
112 | 
113 |         Args:
114 |               self
115 | 
116 |         Returns:
117 |               [status...] : Status list of all jobs
118 |         '''
119 | 
120 |         job_id_list = ' '.join(self.resources.keys())
121 | 
122 |         jobs_missing = list(self.resources.keys())
123 | 
124 |         retcode, stdout, stderr = self.channel.execute_wait("qstat {0}".format(job_id_list), 3)
125 |         for line in stdout.split('\n'):
126 |             parts = line.split()
127 |             if not parts or parts[0].upper().startswith('JOB') or parts[0].startswith('---'):
128 |                 continue
129 |             job_id = parts[0]
130 |             status = translate_table.get(parts[4], 'UNKNOWN')
131 |             self.resources[job_id]['status'] = status
132 |             jobs_missing.remove(job_id)
133 | 
134 |         # squeue does not report on jobs that are not running. So we are filling in the
135 |         # blanks for missing jobs, we might lose some information about why the jobs failed.
136 |         for missing_job in jobs_missing:
137 |             if self.resources[missing_job]['status'] in ['PENDING', 'RUNNING']:
138 |                 self.resources[missing_job]['status'] = translate_table['E']
139 | 
140 |     def submit(self, command, blocksize, job_name="parsl.auto"):
141 |         ''' Submits the command onto an Local Resource Manager job of blocksize parallel elements.
142 |         Submit returns an ID that corresponds to the task that was just submitted.
143 | 
144 |         If tasks_per_node <  1 : ! This is illegal. tasks_per_node should be integer
145 | 
146 |         If tasks_per_node == 1:
147 |              A single node is provisioned
148 | 
149 |         If tasks_per_node >  1 :
150 |              tasks_per_node * blocksize number of nodes are provisioned.
151 | 
152 |         Args:
153 |              - command  :(String) Commandline invocation to be made on the remote side.
154 |              - blocksize   :(float)
155 | 
156 |         Kwargs:
157 |              - job_name (String): Name for job, must be unique
158 | 
159 |         Returns:
160 |              - None: At capacity, cannot provision more
161 |              - job_id: (string) Identifier for the job
162 | 
163 |         '''
164 | 
165 |         if self.provisioned_blocks >= self.max_blocks:
166 |             logger.warn("[%s] at capacity, cannot add more blocks now", self.label)
167 |             return None
168 | 
169 |         # Note: Fix this later to avoid confusing behavior.
170 |         # We should always allocate blocks in integer counts of node_granularity
171 |         if blocksize < self.nodes_per_block:
172 |             blocksize = self.nodes_per_block
173 | 
174 |         # Set job name
175 |         job_name = "parsl.{0}.{1}".format(job_name, time.time())
176 | 
177 |         # Set script path
178 |         script_path = "{0}/{1}.submit".format(self.script_dir, job_name)
179 |         script_path = os.path.abspath(script_path)
180 | 
181 |         logger.debug("Requesting blocksize:%s nodes_per_block:%s tasks_per_node:%s", blocksize, self.nodes_per_block,
182 |                      self.tasks_per_node)
183 | 
184 |         job_config = {}
185 |         # TODO : script_path might need to change to accommodate script dir set via channels
186 |         job_config["submit_script_dir"] = self.channel.script_dir
187 |         job_config["nodes"] = self.nodes_per_block
188 |         job_config["task_blocks"] = self.nodes_per_block * self.tasks_per_node
189 |         job_config["nodes_per_block"] = self.nodes_per_block
190 |         job_config["tasks_per_node"] = self.tasks_per_node
191 |         job_config["walltime"] = self.walltime
192 |         job_config["overrides"] = self.overrides
193 |         job_config["user_script"] = command
194 | 
195 |         # Wrap the command
196 |         job_config["user_script"] = self.launcher(command,
197 |                                                   self.tasks_per_node,
198 |                                                   self.nodes_per_block)
199 | 
200 |         logger.debug("Writing submit script")
201 |         self._write_submit_script(template_string, script_path, job_name, job_config)
202 | 
203 |         channel_script_path = self.channel.push_file(script_path, self.channel.script_dir)
204 | 
205 |         submit_options = ''
206 |         if self.queue is not None:
207 |             submit_options = '{0} -q {1}'.format(submit_options, self.queue)
208 |         if self.account is not None:
209 |             submit_options = '{0} -A {1}'.format(submit_options, self.account)
210 | 
211 |         launch_cmd = "qsub {0} {1}".format(submit_options, channel_script_path)
212 |         retcode, stdout, stderr = self.channel.execute_wait(launch_cmd, 10)
213 | 
214 |         job_id = None
215 |         if retcode == 0:
216 |             for line in stdout.split('\n'):
217 |                 if line.strip():
218 |                     job_id = line.strip()
219 |                     self.resources[job_id] = {'job_id': job_id, 'status': 'PENDING', 'blocksize': blocksize}
220 |         else:
221 |             message = "Command '{}' failed with return code {}".format(launch_cmd, retcode)
222 |             if (stdout is not None) and (stderr is not None):
223 |                 message += "\nstderr:{}\nstdout{}".format(stderr.strip(), stdout.strip())
224 |             logger.error(message)
225 | 
226 |         return job_id
227 | 
228 |     def cancel(self, job_ids):
229 |         ''' Cancels the jobs specified by a list of job ids
230 | 
231 |         Args:
232 |         job_ids : [<job_id> ...]
233 | 
234 |         Returns :
235 |         [True/False...] : If the cancel operation fails the entire list will be False.
236 |         '''
237 | 
238 |         job_id_list = ' '.join(job_ids)
239 |         retcode, stdout, stderr = self.channel.execute_wait("qdel {0}".format(job_id_list), 3)
240 |         rets = None
241 |         if retcode == 0:
242 |             for jid in job_ids:
243 |                 self.resources[jid]['status'] = translate_table['E']  # Setting state to exiting
244 |             rets = [True for i in job_ids]
245 |         else:
246 |             rets = [False for i in job_ids]
247 | 
248 |         return rets
249 | 
250 | 
251 | if __name__ == "__main__":
252 | 
253 |     print("None")
254 | 


--------------------------------------------------------------------------------
/libsubmit/tests/setup_path.sh:
--------------------------------------------------------------------------------
1 | export PATH=$PWD:$PATH
2 | export PYTHONPATH=$PWD/../../:$PYTHONPATH
3 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/remote_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "Hostname: $HOSTNAME"
3 | echo "Cpu info -----"
4 | cat /proc/cpuinfo
5 | echo "Done----------"
6 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_channels.py:
--------------------------------------------------------------------------------
 1 | from libsubmit.channels.local.local import LocalChannel
 2 | 
 3 | 
 4 | def test_local():
 5 | 
 6 |     channel = LocalChannel(None, None)
 7 | 
 8 |     ec, out, err = channel.execute_wait('echo "pwd: $PWD"', 2)
 9 | 
10 |     assert ec == 0, "Channel execute failed"
11 |     print("Stdout: ", out)
12 |     print("Stderr: ", err)
13 | 
14 | 
15 | if __name__ == "__main__":
16 | 
17 |     test_local()
18 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_local_channel.py:
--------------------------------------------------------------------------------
 1 | from libsubmit.channels.local.local import LocalChannel
 2 | 
 3 | 
 4 | def test_env():
 5 |     ''' Regression testing for issue #27
 6 |     '''
 7 | 
 8 |     lc = LocalChannel()
 9 |     rc, stdout, stderr = lc.execute_wait("env", 1)
10 | 
11 |     stdout = stdout.split('\n')
12 |     x = [l for l in stdout if l.startswith("PATH=")]
13 |     assert x, "PATH not found"
14 | 
15 |     x = [l for l in stdout if l.startswith("HOME=")]
16 |     assert x, "HOME not found"
17 | 
18 |     print("RC:{} \nSTDOUT:{} \nSTDERR:{}".format(rc, stdout, stderr))
19 | 
20 | 
21 | def test_env_mod():
22 |     ''' Testing for env update at execute time.
23 |     '''
24 | 
25 |     lc = LocalChannel()
26 |     rc, stdout, stderr = lc.execute_wait("env", 1, {'TEST_ENV': 'fooo'})
27 | 
28 |     stdout = stdout.split('\n')
29 |     x = [l for l in stdout if l.startswith("PATH=")]
30 |     assert x, "PATH not found"
31 | 
32 |     x = [l for l in stdout if l.startswith("HOME=")]
33 |     assert x, "HOME not found"
34 | 
35 |     x = [l for l in stdout if l.startswith("TEST_ENV=fooo")]
36 |     assert x, "User set env missing"
37 | 
38 | 
39 | if __name__ == "__main__":
40 | 
41 |     test_env()
42 |     test_env_mod()
43 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_scp_1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from libsubmit.channels.ssh.ssh import SSHChannel as SSH
 4 | 
 5 | 
 6 | def connect_and_list(hostname, username):
 7 |     out = ''
 8 |     conn = SSH(hostname, username=username)
 9 |     conn.push_file(os.path.abspath('remote_run.sh'), '/home/davidk/')
10 |     # ec, out, err = conn.execute_wait("ls /tmp/remote_run.sh; bash /tmp/remote_run.sh")
11 |     conn.close()
12 |     return out
13 | 
14 | 
15 | script = '''#!/bin/bash
16 | echo "Hostname: $HOSTNAME"
17 | echo "Cpu info -----"
18 | cat /proc/cpuinfo
19 | echo "Done----------"
20 | '''
21 | 
22 | 
23 | def test_connect_1():
24 |     with open('remote_run.sh', 'w') as f:
25 |         f.write(script)
26 | 
27 |     sites = {
28 |         'midway': {
29 |             'url': 'midway.rcc.uchicago.edu',
30 |             'uname': 'yadunand'
31 |         },
32 |         'swift': {
33 |             'url': 'swift.rcc.uchicago.edu',
34 |             'uname': 'yadunand'
35 |         },
36 |         'cori': {
37 |             'url': 'cori.nersc.gov',
38 |             'uname': 'yadunand'
39 |         }
40 |     }
41 | 
42 |     for site in sites.values():
43 |         out = connect_and_list(site['url'], site['uname'])
44 |         print("Sitename :{0}  hostname:{1}".format(site['url'], out))
45 | 
46 | 
47 | if __name__ == "__main__":
48 | 
49 |     test_connect_1()
50 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_ssh_1.py:
--------------------------------------------------------------------------------
 1 | from libsubmit.channels.ssh.ssh import SSHChannel as SSH
 2 | 
 3 | 
 4 | def connect_and_list(hostname, username):
 5 |     conn = SSH(hostname, username=username)
 6 |     ec, out, err = conn.execute_wait("echo $HOSTNAME")
 7 |     conn.close()
 8 |     return out
 9 | 
10 | 
11 | def test_midway():
12 |     ''' Test ssh channels to midway
13 |     '''
14 |     url = 'midway.rcc.uchicago.edu'
15 |     uname = 'yadunand'
16 |     out = connect_and_list(url, uname)
17 |     print("Sitename :{0}  hostname:{1}".format(url, out))
18 | 
19 | 
20 | def test_beagle():
21 |     ''' Test ssh channels to beagle
22 |     '''
23 |     url = 'login04.beagle.ci.uchicago.edu'
24 |     uname = 'yadunandb'
25 |     out = connect_and_list(url, uname)
26 |     print("Sitename :{0}  hostname:{1}".format(url, out))
27 | 
28 | 
29 | def test_osg():
30 |     ''' Test ssh connectivity to osg
31 |     '''
32 |     url = 'login.osgconnect.net'
33 |     uname = 'yadunand'
34 |     out = connect_and_list(url, uname)
35 |     print("Sitename :{0}  hostname:{1}".format(url, out))
36 | 
37 | 
38 | def test_cori():
39 |     ''' Test ssh connectivity to cori
40 |     '''
41 |     url = 'cori.nersc.gov'
42 |     uname = 'yadunand'
43 |     out = connect_and_list(url, uname)
44 |     print("Sitename :{0}  hostname:{1}".format(url, out))
45 | 
46 | 
47 | if __name__ == "__main__":
48 | 
49 |     pass
50 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_ssh_errors.py:
--------------------------------------------------------------------------------
 1 | from libsubmit.channels.errors import *
 2 | from libsubmit.channels.ssh.ssh import SSHChannel as SSH
 3 | 
 4 | 
 5 | def connect_and_list(hostname, username):
 6 |     conn = SSH(hostname, username=username)
 7 |     ec, out, err = conn.execute_wait("echo $HOSTNAME")
 8 |     conn.close()
 9 |     return out
10 | 
11 | 
12 | def test_error_1():
13 |     try:
14 |         connect_and_list("bad.url.gov", "ubuntu")
15 |     except Exception as e:
16 |         assert type(e) == SSHException, "Excpected SSException, got :{0}".format(e)
17 | 
18 | 
19 | def test_error_2():
20 |     try:
21 |         connect_and_list("swift.rcc.uchicago.edu", "mango")
22 |     except SSHException:
23 |         print("Caught the right exception")
24 |     else:
25 |         raise Exception("Expected SSException, got :{0}".format(e))
26 | 
27 | 
28 | def test_error_3():
29 |     ''' This should work
30 |     '''
31 |     try:
32 |         connect_and_list("login.mcs.anl.gov", "yadunand")
33 |     except AuthException as e:
34 |         print("Caught exception : ", e)
35 |     else:
36 |         assert type(e) == BadHostKeyException, "Expected SSException, got :{0}".format(e)
37 | 
38 | 
39 | def test_error_3():
40 |     ''' This should work
41 |     '''
42 |     try:
43 |         connect_and_list("edison.nersc.gov", "yadunand")
44 |     except BadHostKeyException as e:
45 |         print("Caught exception BadHostKeyException: ", e)
46 |     else:
47 |         assert False, "Expected SSException, got :{0}".format(e)
48 | 
49 | 
50 | if __name__ == "__main__":
51 | 
52 |     tests = [test_error_1, test_error_2, test_error_3]
53 | 
54 |     for test in tests:
55 |         print("---------Running : {0}---------------".format(test))
56 |         test()
57 |         print("----------------------DONE--------------------------")
58 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_ssh_file_transport.py:
--------------------------------------------------------------------------------
 1 | import libsubmit
 2 | from libsubmit.channels.ssh.ssh import SSHChannel as SSH
 3 | 
 4 | 
 5 | def connect_and_list(hostname, username):
 6 |     conn = SSH(hostname, username=username)
 7 |     ec, out, err = conn.execute_wait("echo $HOSTNAME")
 8 |     conn.close()
 9 |     return out
10 | 
11 | 
12 | def test_push(conn, fname="test001.txt"):
13 | 
14 |     with open(fname, 'w') as f:
15 |         f.write("Hello from parsl.ssh testing\n")
16 | 
17 |     conn.push_file(fname, "/tmp")
18 |     ec, out, err = conn.execute_wait("ls /tmp/{0}".format(fname))
19 |     print(ec, out, err)
20 | 
21 | 
22 | def test_pull(conn, fname="test001.txt"):
23 | 
24 |     local = "foo"
25 |     conn.pull_file("/tmp/{0}".format(fname), local)
26 | 
27 |     with open("{0}/{1}".format(local, fname), 'r') as f:
28 |         print(f.readlines())
29 | 
30 | 
31 | if __name__ == "__main__":
32 | 
33 |     libsubmit.set_stream_logger()
34 | 
35 |     # This is for testing
36 |     conn = SSH("midway.rcc.uchicago.edu", username="yadunand")
37 | 
38 |     test_push(conn)
39 |     test_pull(conn)
40 | 
41 |     conn.close()
42 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_channels/test_ssh_interactive.py:
--------------------------------------------------------------------------------
 1 | import libsubmit
 2 | from libsubmit.channels.ssh_il.ssh_il import SSHInteractiveLoginChannel as SSH
 3 | 
 4 | 
 5 | def connect_and_list(hostname, username):
 6 |     conn = SSH(hostname, username=username)
 7 |     ec, out, err = conn.execute_wait("echo $HOSTNAME")
 8 |     conn.close()
 9 |     return out
10 | 
11 | 
12 | def test_cooley():
13 |     ''' Test ssh channels to midway
14 |     '''
15 |     url = 'cooley.alcf.anl.gov'
16 |     uname = 'yadunand'
17 |     out = connect_and_list(url, uname)
18 |     print("Sitename :{0}  hostname:{1}".format(url, out))
19 |     return
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     libsubmit.set_stream_logger()
24 |     test_cooley()
25 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_integration/test_ssh/test_ssh_beagle.py:
--------------------------------------------------------------------------------
 1 | import libsubmit
 2 | from libsubmit import SSHChannel
 3 | from libsubmit import Torque
 4 | import time
 5 | 
 6 | 
 7 | def test_1():
 8 | 
 9 |     torque_config = {
10 |         "site": "Beagle.CI",
11 |         "execution": {
12 |             "executor": "ipp",
13 |             "provider": "torque",
14 |             "channel": "ssh",
15 |             "block": {
16 |                 "initBlocks": 1,
17 |                 "maxBlocks": 1,
18 |                 "minBlocks": 0,
19 |                 "taskBlocks": 1,
20 |                 "nodes": 1,
21 |                 "walltime": "00:25:00",
22 |                 "options": {
23 |                     "partition": "debug",
24 |                     "overrides": '''#SBATCH --constraint=haswell'''
25 |                 }
26 |             }
27 |         }
28 |     }
29 | 
30 |     channel = SSHChannel(
31 |         "login4.beagle.ci.uchicago.edu",
32 |         "yadunandb",
33 |         script_dir="/lustre/beagle2/yadunand/parsl_scripts")
34 |     ec, out, err = channel.execute_wait("which qsub; echo $HOSTNAME; pwd")
35 |     print("Stdout : ", out)
36 | 
37 |     provider = Torque(config=torque_config, channel=channel)
38 | 
39 |     x = provider.submit('''echo "sleeping"
40 |     sleep 120
41 |     echo "Done sleeping" ''', 1)
42 |     time.sleep(3)
43 | 
44 |     y = provider.submit('''echo "sleeping"
45 |     sleep 120
46 |     echo "Done sleeping" ''', 1)
47 |     time.sleep(3)
48 | 
49 |     stats = provider.status([x, y])
50 | 
51 |     x = provider.cancel([x, y])
52 |     print(stats)
53 |     print("Cancel stats : ", x)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     libsubmit.set_stream_logger()
58 |     test_1()
59 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_integration/test_ssh/test_ssh_condor_earth.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import libsubmit
 3 | from libsubmit import SSHChannel, Condor
 4 | import time
 5 | 
 6 | 
 7 | def test_1():
 8 |     config = {
 9 |         "site": "T3_US_NotreDame",
10 |         "execution": {
11 |             "script_dir": ".scripts",
12 |             "environment": {
13 |                 'CONDOR_CONFIG': '/opt/condor/RedHat6/etc/condor_config',
14 |                 'CONDOR_LOCATION': '/opt/condor/RedHat6',
15 |                 'PATH': '/opt/condor/RedHat6/bin:${PATH}'
16 |             },
17 |             "block": {
18 |                 "environment": {
19 |                     'foo': 'spacey "quoted" value',
20 |                     'bar': "this 'works' too",
21 |                     'baz': 2
22 |                 },
23 |                 "nodes": 1,
24 |                 "walltime": "01:00:00",
25 |                 "options": {
26 |                     "project": "cms.org.nd",
27 |                     "condor_overrides": "",
28 |                     "requirements": ""
29 |                 }
30 |             }
31 |         }
32 |     }
33 |     channel = SSHChannel("earth.crc.nd.edu", os.environ['USER'])
34 | 
35 |     ec, out, err = channel.execute_wait("printenv", envs=config['execution']['environment'])
36 |     print("current env:", out)
37 | 
38 |     ec, out, err = channel.execute_wait("which condor_submit", envs=config['execution']['environment'])
39 |     print('which condor_submit? ', out)
40 | 
41 |     provider = Condor(config=config, channel=channel)
42 | 
43 |     ids = provider.submit('''echo "sleeping"
44 |     sleep 120
45 |     echo "Done sleeping" ''', 1)
46 |     time.sleep(3)
47 | 
48 |     ids += provider.submit('''echo "sleeping"
49 |     sleep 120
50 |     echo "Done sleeping" ''', 1)
51 |     time.sleep(3)
52 | 
53 |     stats = provider.status(ids)
54 |     print(stats)
55 | 
56 |     provider.cancel(ids)
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     libsubmit.set_stream_logger()
61 |     test_1()
62 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_integration/test_ssh/test_ssh_cori.py:
--------------------------------------------------------------------------------
 1 | import libsubmit
 2 | from libsubmit import SSHChannel, Slurm
 3 | import time
 4 | 
 5 | 
 6 | def test_1():
 7 | 
 8 |     slurm_config = {
 9 |         "site": "Cori/Nersc",
10 |         "execution": {
11 |             "executor": "ipp",
12 |             "provider": "slurm",
13 |             "channel": "local",
14 |             "options": {
15 |                 "init_parallelism": 1,
16 |                 "max_parallelism": 1,
17 |                 "min_parallelism": 0,
18 |                 "tasks_per_node": 1,
19 |                 "node_granularity": 1,
20 |                 "partition": "debug",
21 |                 "walltime": "00:25:00",
22 |                 "submit_script_dir": ".scripts",
23 |                 "overrides": '''#SBATCH --constraint=haswell'''
24 |             }
25 |         }
26 |     }
27 | 
28 |     channel = SSHChannel(
29 |         "cori.nersc.gov",
30 |         "yadunand",
31 |         channel_script_dir="/global/homes/y/yadunand/parsl_scripts")
32 |     ec, out, err = channel.execute_wait("which sbatch; echo $HOSTNAME; pwd")
33 |     print("Stdout: ", out)
34 | 
35 |     provider = Slurm(config=slurm_config, channel=channel)
36 | 
37 |     x = provider.submit('''echo "sleeping"
38 |     sleep 120
39 |     echo "Done sleeping" ''', 1)
40 |     time.sleep(3)
41 | 
42 |     y = provider.submit('''echo "sleeping"
43 |     sleep 120
44 |     echo "Done sleeping" ''', 1)
45 |     time.sleep(3)
46 | 
47 |     stats = provider.status([x, y])
48 | 
49 |     provider.cancel([x, y])
50 |     print(stats)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     libsubmit.set_stream_logger()
55 |     test_1()
56 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_integration/test_ssh/test_ssh_swan.py:
--------------------------------------------------------------------------------
 1 | import libsubmit
 2 | from libsubmit import SSHChannel
 3 | from libsubmit import Torque
 4 | import time
 5 | 
 6 | 
 7 | def test_1():
 8 | 
 9 |     torque_config = {
10 |         "site": "Swan.CrayPN",
11 |         "execution": {
12 |             "executor": "ipp",
13 |             "provider": "torque",
14 |             "channel": "ssh",
15 |             "block": {
16 |                 "initBlocks": 1,
17 |                 "maxBlocks": 1,
18 |                 "minBlocks": 0,
19 |                 "taskBlocks": 1,
20 |                 "nodes": 1,
21 |                 "walltime": "00:25:00",
22 |                 "options": {
23 |                     "partition": "debug",
24 |                     "queue": "ivb12",
25 |                 }
26 |             }
27 |         }
28 |     }
29 | 
30 |     channel = SSHChannel("swan.cray.com", "p01953", script_dir="parsl_scripts")
31 |     ec, out, err = channel.execute_wait("which qsub; echo $HOSTNAME; pwd")
32 |     print("Stdout: ", out)
33 | 
34 |     provider = Torque(config=torque_config, channel=channel)
35 | 
36 |     x = provider.submit('''echo "sleeping"
37 |     sleep 120
38 |     echo "Done sleeping" ''', 1)
39 |     time.sleep(3)
40 | 
41 |     y = provider.submit('''echo "sleeping"
42 |     sleep 120
43 |     echo "Done sleeping" ''', 1)
44 |     time.sleep(3)
45 | 
46 |     stats = provider.status([x, y])
47 | 
48 |     print("Trying to cancel : {0}  {1}".format(x, y))
49 |     provider.cancel([x, y])
50 |     print(stats)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     libsubmit.set_stream_logger()
55 |     test_1()
56 | 


--------------------------------------------------------------------------------
/libsubmit/tests/test_providers/ec2/test_ec2.py:
--------------------------------------------------------------------------------
  1 | import libsubmit
  2 | from libsubmit import EC2Provider as ec2
  3 | import time
  4 | 
  5 | 
  6 | def test_1():
  7 | 
  8 |     config = {
  9 |         "site": "ec2",
 10 |         "auth": {
 11 |             "foo": "foo"
 12 |         },
 13 |         "execution": {
 14 |             "executor": "ipp",
 15 |             "provider": "ec2",
 16 |             "channel": None,
 17 |             "block": {
 18 |                 "initBlocks": 1,
 19 |                 "maxBlocks": 1,
 20 |                 "minBlocks": 0,
 21 |                 "taskBlocks": 1,
 22 |                 "nodes": 1,
 23 |                 "walltime": "00:25:00",
 24 |                 "options": {
 25 |                     "region": "us-east-2",
 26 |                     "imageId": 'ami-82f4dae7',
 27 |                     "stateFile": "awsproviderstate.json",
 28 |                     "keyName": "parsl.test"
 29 |                 }
 30 |             }
 31 |         }
 32 |     }
 33 | 
 34 |     provider = ec2(config=config, channel=None)
 35 | 
 36 |     x = provider.submit('''echo "sleeping"
 37 |     sleep 120
 38 |     echo "Done sleeping" ''', 1)
 39 |     time.sleep(3)
 40 | 
 41 |     y = provider.submit('''echo "sleeping"
 42 |     sleep 120
 43 |     echo "Done sleeping" ''', 1)
 44 |     time.sleep(3)
 45 | 
 46 |     stats = provider.status([x, y])
 47 | 
 48 |     provider.cancel([x, y])
 49 |     print(stats)
 50 | 
 51 | 
 52 | def test_2():
 53 | 
 54 |     config = {
 55 |         "site": "ec2",
 56 |         "auth": {
 57 |             "profile": "default"
 58 |         },
 59 |         "execution": {
 60 |             "executor": "ipp",
 61 |             "provider": "ec2",
 62 |             "channel": None,
 63 |             "block": {
 64 |                 "initBlocks": 1,
 65 |                 "maxBlocks": 1,
 66 |                 "minBlocks": 0,
 67 |                 "taskBlocks": 1,
 68 |                 "nodes": 1,
 69 |                 "walltime": "00:25:00",
 70 |                 "options": {
 71 |                     "region": "us-east-2",
 72 |                     "imageId": 'ami-82f4dae7',
 73 |                     "stateFile": "awsproviderstate.json",
 74 |                     "keyName": "parsl.test"
 75 |                 }
 76 |             }
 77 |         }
 78 |     }
 79 | 
 80 |     provider = ec2(config=config, channel=None)
 81 | 
 82 |     x = provider.submit('''echo "sleeping"
 83 |     sleep 120
 84 |     echo "Done sleeping" ''', 1)
 85 |     time.sleep(3)
 86 | 
 87 |     y = provider.submit('''echo "sleeping"
 88 |     sleep 120
 89 |     echo "Done sleeping" ''', 1)
 90 |     time.sleep(3)
 91 | 
 92 |     print("X : ", x)
 93 |     print("Y : ", y)
 94 |     stats = provider.status([x, y])
 95 |     print("Status : ", stats)
 96 | 
 97 |     provider.cancel([x, y])
 98 | 
 99 |     # provider.teardown()
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     libsubmit.set_stream_logger()
104 |     # test_1 ()
105 |     test_2()
106 | 


--------------------------------------------------------------------------------
/libsubmit/utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | 
 4 | def wtime_to_minutes(time_string):
 5 |     ''' wtime_to_minutes
 6 | 
 7 |     Convert standard wallclock time string to minutes.
 8 | 
 9 |     Args:
10 |         - Time_string in HH:MM:SS format
11 | 
12 |     Returns:
13 |         (int) minutes
14 | 
15 |     '''
16 |     hours, mins, seconds = time_string.split(':')
17 |     return int(hours) * 60 + int(mins) + 1
18 | 
19 | 
20 | class RepresentationMixin(object):
21 |     """A mixin class for adding a __repr__ method.
22 | 
23 |     The __repr__ method will return a string equivalent to the code used to instantiate
24 |     the child class, with any defaults included explicitly. The __max_width__ class variable
25 |     controls the maximum width of the representation string. If this width is exceeded,
26 |     the representation string will be split up, with one argument or keyword argument per line.
27 | 
28 |     Any arguments or keyword arguments in the constructor must be defined as attributes, or
29 |     an AttributeError will be raised.
30 | 
31 |     Examples
32 |     --------
33 |     >>> from libsubmit.utils import RepresentationMixin
34 |     >>> class Foo(RepresentationMixin):
35 |             def __init__(self, first, second, third='three', fourth='fourth'):
36 |                 self.first = first
37 |                 self.second = second
38 |                 self.third = third
39 |                 self.fourth = fourth
40 |     >>> bar = Foo(1, 'two', fourth='baz')
41 |     >>> bar
42 |     Foo(1, 'two', third='three', fourth='baz')
43 |     """
44 |     __max_width__ = 80
45 | 
46 |     def __repr__(self):
47 |         argspec = inspect.getargspec(self.__init__)
48 |         if len(argspec.args) > 1:
49 |             defaults = dict(zip(reversed(argspec.args), reversed(argspec.defaults)))
50 |         else:
51 |             defaults = []
52 | 
53 |         for arg in argspec.args[1:]:
54 |             if not hasattr(self, arg):
55 |                 template = 'class {} uses {} in the constructor, but does not define it as an attribute'
56 |                 raise AttributeError(template.format(self.__class__.__name__, arg))
57 | 
58 |         args = [getattr(self, a) for a in argspec.args[1:-len(defaults)]]
59 |         kwargs = {key: getattr(self, key) for key in defaults}
60 | 
61 |         def assemble_multiline(args, kwargs):
62 |             def indent(text):
63 |                 lines = text.splitlines()
64 |                 if len(lines) <= 1:
65 |                     return text
66 |                 return "\n".join("    " + l for l in lines).strip()
67 |             args = ["\n    {},".format(indent(repr(a))) for a in args]
68 |             kwargs = ["\n    {}={}".format(k, indent(repr(v)))
69 |                       for k, v in sorted(kwargs.items())]
70 | 
71 |             info = "".join(args) + ", ".join(kwargs)
72 |             return self.__class__.__name__ + "({}\n)".format(info)
73 | 
74 |         def assemble_line(args, kwargs):
75 |             kwargs = ['{}={}'.format(k, repr(v)) for k, v in sorted(kwargs.items())]
76 | 
77 |             info = ", ".join([repr(a) for a in args] + kwargs)
78 |             return self.__class__.__name__ + "({})".format(info)
79 | 
80 |         if len(assemble_line(args, kwargs)) <= self.__class__.__max_width__:
81 |             return assemble_line(args, kwargs)
82 |         else:
83 |             return assemble_multiline(args, kwargs)
84 | 


--------------------------------------------------------------------------------
/libsubmit/version.py:
--------------------------------------------------------------------------------
1 | ''' Set module version
2 | <Major>.<Minor>.<maintenance>[-alpha/beta/..]
3 | '''
4 | VERSION = '0.5.1-a3'
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | paramiko
2 | boto3
3 | azure-mgmt>=2.0.0
4 | python-novaclient
5 | google-api-python-client
6 | google-auth
7 | nbsphinx
8 | kubernetes>=6.0.0
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('libsubmit/version.py') as f:
 4 |     exec(f.read())
 5 | 
 6 | install_requires = [
 7 |     'paramiko'
 8 |     ]
 9 | 
10 | tests_require = [
11 |     'paramiko',
12 |     'mock>=1.0.0',
13 |     'nose',
14 |     'pytest'
15 |     ]
16 | 
17 | setup(
18 |     name='libsubmit',
19 |     version=VERSION,
20 |     description='Uniform interface to clouds, clusters, grids and supercomputers.',
21 |     long_description='Submit, track and cancel arbitrary bash scripts on computate resources',
22 |     url='https://github.com/Parsl/libsubmit',
23 |     author='Yadu Nand Babuji',
24 |     author_email='yadu@uchicago.edu',
25 |     license='Apache 2.0',
26 |     download_url = 'https://github.com/Parsl/libsubmit/archive/master.zip',
27 |     package_data={'': ['LICENSE']},
28 |     packages=find_packages(),
29 |     install_requires=install_requires,
30 |     extras_require = {
31 |         'aws' : ['boto3'],
32 |         'azure' : ['azure-mgmt>=2.0.0', 'haikunator'],
33 |         'jetstream' : ['python-novaclient']
34 |         },
35 |     classifiers = [
36 |         # Maturity
37 |         'Development Status :: 3 - Alpha',
38 |         # Intended audience
39 |         'Intended Audience :: Developers',
40 |         # Licence, must match with licence above
41 |         'License :: OSI Approved :: Apache Software License',
42 |         # Python versions supported
43 |         'Programming Language :: Python :: 3.5',
44 |         'Programming Language :: Python :: 3.6',
45 |     ],
46 |     keywords = ['Workflows', 'Scientific computing'],
47 | )
48 | 


--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | nose
3 | pytest
4 | coverage
5 | mock>=1.0.0
6 | 


--------------------------------------------------------------------------------