├── .gitignore ├── Dockerscripts └── DockerfileBase ├── LICENSE ├── MANIFEST.in ├── README.md ├── bin ├── burst ├── burst-config └── burst-monitor ├── burst ├── __init__.py ├── burst.py ├── burst_cli.py ├── config │ ├── __init__.py │ ├── config.py │ ├── config_file_utils.py │ ├── config_template.yml │ ├── configurers.py │ ├── cred_loaders.py │ ├── menus.py │ └── summary.py ├── lcloud.py ├── monitor │ └── monitor.py ├── runrun.py ├── verbos.py └── version.py ├── burst_examples ├── cifar10 │ ├── .burstignore │ ├── .dockerignore │ ├── CIFAR10_CNN.ipynb │ ├── Dockerfile │ ├── cifar_data_tools.py │ ├── ml_tools.py │ ├── requirements.txt │ └── trainCNN_CIFAR10.py ├── cifar10_conda │ ├── .burstignore │ ├── .dockerignore │ ├── CIFAR10_CNN.ipynb │ ├── Dockerfile │ ├── cifar_data_tools.py │ ├── ml_tools.py │ └── trainCNN_CIFAR10.py ├── hello_burst │ ├── .burstignore │ ├── .dockerignore │ ├── Dockerfile │ └── hello_burst.py ├── your_conda_project │ ├── .burstignore │ ├── .dockerignore │ ├── Dockerfile │ ├── Template.ipynb │ └── template.py └── your_python_project │ ├── .burstignore │ ├── .dockerignore │ ├── Dockerfile │ ├── Template.ipynb │ ├── requirements.txt │ └── template.py ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ └── temp │ ├── about.rst │ ├── conf.py │ ├── examples.rst │ ├── getting_started.rst │ ├── help.rst │ ├── index.rst │ ├── license.rst │ └── user_guide.rst ├── foo ├── release_notes.txt ├── setup.py ├── tests ├── .burstignore ├── .dockerignore ├── Dockerfile ├── buildtest.py ├── fulltest.py ├── fulltest_command.py ├── fulltest_ports.py ├── hello_burst.py ├── long_process.py ├── quicktest.py └── requirements.txt └── upload_pip.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv 3 | *~ 4 | docs/build/* 5 | burstable.egg-info/ 6 | burst_examples/cifar10/.burst-gpu 7 | burst_examples/cifar10_conda/.burst-gpu 8 | burst_examples/your_python_project/.burst-gpu 9 | burst_examples/your_conda_project/.burst-gpu 10 | 11 | -------------------------------------------------------------------------------- /Dockerscripts/DockerfileBase: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.3.0-devel-ubuntu20.04 2 | 3 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wget nginx python3 python3-distutils nano fish git curl fuse 4 | 5 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py 6 | RUN pip3 install gputil ipython 7 | 8 | RUN wget https://github.com/rclone/rclone/releases/download/v1.53.1/rclone-v1.53.1-linux-amd64.deb 9 | RUN dpkg --install rclone-v1.53.1-linux-amd64.deb 10 | 11 | WORKDIR /home/burst/work 12 | CMD ["/bin/bash"] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include burst/config/config_template.yml 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # burst 2 | ## A command-line tool to remotely execute code in the cloud 3 | 4 | ## Introduction 5 | 6 | `burst` lets you run your software remotely in the cloud, on powerful GPU's or multi-CPU hardware instances that are 7 | booted up and stopped automatically, so you only pay for the time you use. 8 | 9 | We support remote computing on Amazon Web Services and will be adding more 10 | (Google Cloud Platform support is currently in beta). 11 | 12 | Documentation is available here: 13 | * [Overview](https://burstable.readthedocs.io/en/latest/index.html) 14 | * [Quickstart](https://burstable.readthedocs.io/en/latest/getting_started.html) 15 | * [Examples](https://burstable.readthedocs.io/en/latest/examples.html) 16 | * [Full User Guide](https://burstable.readthedocs.io/en/latest/user_guide.html) 17 | 18 | ## Dependencies 19 | 20 | * Python3 21 | * Docker version 19 or higher 22 | * A folder/project with a working `Dockerfile` 23 | * ssh keys 24 | * AWS or Google Cloud Services account and access keys 25 | 26 | _Note: if you want to contribute to the burst OSS project or just follow bleeding-edge development, install through 27 | gitHub as described [here](https://github.com/burstable-ai/burst/wiki/Contributing-To-Burst) instead._ 28 | -------------------------------------------------------------------------------- /bin/burst: -------------------------------------------------------------------------------- 1 | ../burst/burst_cli.py -------------------------------------------------------------------------------- /bin/burst-config: -------------------------------------------------------------------------------- 1 | ../burst/config/config.py -------------------------------------------------------------------------------- /bin/burst-monitor: -------------------------------------------------------------------------------- 1 | ../burst/monitor/monitor.py -------------------------------------------------------------------------------- /burst/__init__.py: -------------------------------------------------------------------------------- 1 | pass -------------------------------------------------------------------------------- /burst/burst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, argparse, subprocess, time, traceback, json, getpass 3 | # 4 | # the BDFL does not admire scripts which are also importable modules 5 | # well, frack him -- this is how we roll 6 | # 7 | #so absolute imports work in script mode, we need to import from the parent folder 8 | opath = os.path.abspath(".") 9 | abspath = os.path.abspath(__file__) 10 | # print ("BURST PATHS:", opath, abspath) 11 | abspath = abspath[:abspath.rfind('/') + 1] 12 | os.chdir(abspath) 13 | abspath = os.path.abspath("..") 14 | sys.path.insert(0, abspath) 15 | 16 | from burst.lcloud import * 17 | from burst.runrun import run 18 | from burst.version import version 19 | from burst.verbos import set_verbosity, get_verbosity, vprint, vvprint, v0print, get_piper, get_rsync_v, get_dockrunflags 20 | 21 | os.chdir(opath) 22 | 23 | DEFAULT_IMAGE = "burst_image" #FIXME: should be unique to folder structure 24 | 25 | install_burst_sh = "sudo bash -c 'rm -fr /var/lib/dpkg/lock*" \ 26 | " /var/cache/apt/archives/lock /var/lib/apt/lists/lock;" \ 27 | "sudo systemctl stop apt-daily* ; " \ 28 | "apt-get -y update; " \ 29 | "apt-get -y install python3-pip; " \ 30 | "python3 -m pip install --upgrade pip; " \ 31 | "python3 -m pip install easydict apache-libcloud python-dateutil; " \ 32 | "rm -fr burst; " \ 33 | "git clone -b monitor_1.2 https://github.com/burstable-ai/burst'" #for reals 34 | # "git clone -b jup_idle_164 https://github.com/danx0r/burst'" # for testing 35 | 36 | update_burst_sh = "cd burst; sudo bash -c 'git pull https://github.com/burstable-ai/burst monitor_1.2'" #for reals 37 | 38 | def do_ssh(url, cmd): 39 | ssh_cmd = f'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=error {url} ' \ 40 | f'{cmd}' 41 | vvprint (ssh_cmd) 42 | return os.system(ssh_cmd) 43 | 44 | 45 | def ssh_tunnel(url, sshuser, ports, dockerdport): 46 | # set up ssh tunnel mapping docker socket, ports 47 | host_port_args = [] 48 | docker_port_args = "" 49 | if ports: 50 | for pa in ports: 51 | if ':' in pa: 52 | local_port, remote_port = pa.split(':') 53 | else: 54 | remote_port = local_port = pa 55 | docker_port_args += " -p {0}:{0}".format(remote_port) 56 | host_port_args.append("-L {0}:localhost:{1}".format(local_port, remote_port)) 57 | ssh_args = ["ssh", "-o StrictHostKeyChecking=no", "-o UserKnownHostsFile=/dev/null", 58 | "-o ExitOnForwardFailure=yes", 59 | "-o LogLevel=error", "-NL", "{0}:/var/run/docker.sock".format(dockerdport), 60 | "{0}@{1}".format(sshuser, url)] 61 | for arg in host_port_args: 62 | ssh_args.insert(3, arg) 63 | vvprint(ssh_args) 64 | tunnel = subprocess.Popen(ssh_args) 65 | vvprint("TUNNEL:", tunnel) 66 | time.sleep(2) 67 | return tunnel, docker_port_args 68 | 69 | 70 | def burst(args, sshuser=None, url=None, uuid=None, burst_user=None, gpu=False, ports=None, stop=False, 71 | image=None, vmtype=None, pubkey=None, dockerfile="Dockerfile", 72 | cloudmap="", dockerdport=2376, bgd=False, sync_only=False, conf=None): 73 | error = None 74 | tunnel = None 75 | try: 76 | if not os.path.exists(dockerfile): 77 | raise Exception("Dockerfile not found") 78 | if not os.path.exists(".dockerignore"): 79 | raise Exception(""" 80 | 81 | .dockerignore file not found. Burst requires a .dockerignore to avoid sending excess data to docker build. 82 | Because the working directory is rsync'd to the remote host, you typically only need to send the Dockerfile 83 | and files that are referred to (such as requirements.txt) to the build daemon. 84 | 85 | #Template .dockerignore 86 | #Default to ignoring everything: 87 | ** 88 | #exceptions (These will be sent to the docker daemon for building): 89 | !/Dockerfile* 90 | !requirements.txt 91 | """) 92 | 93 | if not os.path.exists(".burstignore"): 94 | raise Exception(""" 95 | 96 | .burstignore file not found. Burst requires a .burstignore to avoid synchronizing irrelevant data (such as 97 | hidden files) with the remote server. Here is a template, copy this to .burstignore in your project directory: 98 | 99 | .* 100 | venv 101 | __pycache__ 102 | """) 103 | 104 | #if url specified, split into user & IP 105 | if url: 106 | if not sshuser: 107 | sshuser, url = url.split('@') 108 | 109 | #launch, restart, or reconnect to node 110 | node = None 111 | 112 | #unless running --local: 113 | if url or uuid or burst_user: 114 | 115 | #if server does not exist, launch a fresh one 116 | fresh = False 117 | restart = False 118 | node = get_server(url=url, uuid=uuid, name=burst_user, conf=conf) 119 | if burst_user and not node: 120 | # print ("PUBKEY:", pubkey) 121 | node = launch_server(burst_user, pubkey=pubkey, vmtype=vmtype, image=image, conf=conf, user=sshuser, gpu=gpu) 122 | fresh = True 123 | restart = True 124 | if node: 125 | 126 | #if stopped, restart 127 | if node.state.lower() != "running": 128 | restart = True 129 | vprint ("Starting server") 130 | node = start_server(node) 131 | 132 | #by now we must have a public IP address 133 | url = node.public_ips[0] 134 | 135 | #wait for ssh daemon to be ready 136 | vprint ("Waiting for sshd") 137 | cmd = ["ssh", "-o StrictHostKeyChecking=no", "-o UserKnownHostsFile=/dev/null", "-o LogLevel=error", "{0}@{1}".format(sshuser, url), "echo", "'sshd responding'"] 138 | vvprint(cmd) 139 | good = False 140 | for z in range(10, -1, -1): 141 | ret = run(cmd, timeout=15) 142 | if ret[0].strip()[-15:]=='sshd responding': 143 | good = True 144 | break 145 | vprint ("still waiting on sshd (this can take a while) -- will try %d more times" % z) 146 | if z: 147 | time.sleep(5) 148 | if not good: 149 | raise Exception("error in ssh call: %s" % ret[0].strip()) 150 | vvprint ("SSH returns -->%s|%s<--" % ret) 151 | else: 152 | raise Exception("Error: node not found") 153 | 154 | docker_port_args = "" 155 | 156 | #we have a url unless running --local: 157 | if url: 158 | 159 | #if just launched, install docker 160 | if fresh: 161 | vprint("Configuring Docker") 162 | # 'sudo apt-get -y update; sudo apt-get -y install docker.io; ' \ #images have docker installed 163 | cmd = 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=error {0}@{1} ' \ 164 | '"sudo usermod -a -G docker ubuntu; ' \ 165 | 'sudo systemctl unmask docker; sudo service docker start"'.format(sshuser, url) 166 | vvprint(cmd) 167 | os.system(cmd) 168 | 169 | vprint ("Connecting through ssh") 170 | tunnel, docker_port_args = ssh_tunnel(url, sshuser, ports, dockerdport) 171 | 172 | #path = absolute working directory on host 173 | relpath = os.path.abspath('.')[len(os.path.expanduser('~')):] 174 | relpath = "/_BURST" + relpath.replace('/', '_') #I can exlain 175 | locpath = os.path.abspath('.') 176 | path = "/home/{0}{1}".format(sshuser, relpath) 177 | 178 | if not sync_only: 179 | # part of check to see if docker is installed and running 180 | remote = "-H localhost:%s" % dockerdport 181 | cmd = ["docker", "{0}".format(remote), "ps", "--format", '{{json .}}'] 182 | vvprint(cmd) 183 | out, err = run(cmd) 184 | vvprint("PS returns:", out) 185 | running = len([x for x in out.strip().split("\n") if x]) 186 | if running: 187 | raise Exception("docker process already running -- burst does not support multiple processes") 188 | 189 | #prepare to build docker container 190 | vprint ("Removing topmost layer") #to avoid running stale image 191 | cmd = ["docker", "{0}".format(remote), "rmi", "--no-prune", DEFAULT_IMAGE] 192 | vvprint (cmd) 193 | out, err = run(cmd) 194 | if "no such image" in out.lower(): 195 | out = "Creating new burst_image" 196 | vvprint (out) 197 | 198 | vmtype, image = fix_vmtype_and_image(vmtype, image) 199 | if vmtype and vmtype != get_server_vmtype(node): #FIXME 200 | raise Exception("Cannot change vmtype (instance type) or gpu status -- need to re-launch") 201 | 202 | # get_server_image is broken, need to prompt better here 203 | # if image and image != get_server_image(node): 204 | # if image and image != get_server_image(node): 205 | # raise Exception("FIXME: cannot change host image -- need to terminate & re-launch server") 206 | 207 | vprint ("burst: name %s vmtype %s image %s url %s" % (node.name, vmtype, image, url)) 208 | 209 | #if using cloud storage (s3 etc), set up config & auth for rclone 210 | if cloudmap: 211 | if remote: 212 | stor = get_config()['storage'] 213 | if stor['provider'] == 'GCS': 214 | #create a keyfile & point to it 215 | srvacctf = ".rclone_key_%s.json" % stor['settings']['private_key']['private_key_id'] 216 | f = open(srvacctf, 'w') 217 | json.dump(stor['settings']['private_key'], f) 218 | f.close() 219 | stor['settings']['service_account_file'] = srvacctf 220 | 221 | # build & save rclone.conf 222 | s = f"[{stor['config']}]\n" 223 | for k, v in stor.items(): 224 | if k != 'settings': 225 | s += f"{k} = {v}\n" 226 | for k, v in stor['settings'].items(): 227 | s += f"{k} = {v}\n" 228 | f = open(".rclone.conf", 'w') 229 | f.write(s) 230 | f.close() 231 | 232 | rsync_ignore_path = os.path.abspath("./.burstignore") 233 | if not sync_only: #sync_only means from remote to local 234 | #sync local working data to host 235 | if not os.path.exists(rsync_ignore_path): 236 | vprint("creating empty .burstignore") 237 | os.system("touch .burstignore") 238 | cmd = 'rsync -rltzu{4} --del --include=.rclone.conf --exclude-from {5} -e "ssh -o StrictHostKeyChecking=no ' \ 239 | '-o UserKnownHostsFile=/dev/null -o LogLevel=error" {0}/. {3}@{1}:{2}/'.format(locpath, 240 | url, path, sshuser, get_rsync_v(), rsync_ignore_path) 241 | vprint ("Synchronizing project folders") 242 | vvprint (cmd) 243 | os.system(cmd) 244 | 245 | if get_config().provider == 'GCE': 246 | # sync service acct creds (for shutdown) 247 | cmd = 'rsync -rltzu{4} --relative -e "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=error" {0}/./.burst/{5} {3}@{1}:{2}/'.format(os.path.expanduser('~'), 248 | url, path, sshuser, get_rsync_v(), get_config().raw_secret) 249 | vprint("Synchronizing credentials for shutdown") 250 | vvprint (cmd) 251 | os.system(cmd) 252 | 253 | #if fresh launch, clone burst locally for monitor 254 | if fresh: 255 | vprint ("Installing burst on server") 256 | vvprint("Delay for apt-get to settle") 257 | time.sleep(30) #trust me this helps 258 | vvprint("Delay done") 259 | err = do_ssh(f"{sshuser}@{url}", '"%s"' % install_burst_sh) #notable quoteables 260 | if err: 261 | raise Exception("Failed to install burst on remote server") 262 | if restart: 263 | vprint ("updating burst installation for monitor") 264 | err = do_ssh(f"{sshuser}@{url}", '"%s"' % update_burst_sh) 265 | if err: 266 | raise Exception("Failed to update burst on remote server") 267 | vprint ("Starting monitor process for shutdown++") 268 | #run monitor (in detached screen) to check if user's burst OR rsync is still running 269 | conf = get_config() 270 | if conf.provider == "GCE": 271 | secret = ".burst/" + conf.raw_secret 272 | else: 273 | secret = conf.secret 274 | 275 | proj = ('--project ' + conf.project) if conf.project else '' 276 | cmd = f"screen -md bash -c 'cd {path}; /usr/bin/python3 ~/burst/burst/monitor/monitor.py" \ 277 | f" --ip {url} --access {conf.access} --provider {conf.provider}" \ 278 | f" --secret={secret} --region {conf.region} {proj} >> ~/burst_monitor.log'" 279 | vvprint (cmd) 280 | err = do_ssh(f"{sshuser}@{url}", '"%s"' % cmd) 281 | if err: 282 | raise Exception("Failed to initialize timeout monitor") 283 | 284 | else: 285 | vprint ("burst: running locally") 286 | remote = "" 287 | path = os.path.abspath('.') 288 | 289 | if not sync_only: 290 | #actually build container -- for reals 291 | vprint ("Building docker container") 292 | cmd = "docker {1} build . --file {2} -t {0} {3}".format(DEFAULT_IMAGE, remote, dockerfile, get_piper()) 293 | vvprint (cmd) 294 | os.system(cmd) 295 | 296 | jupyter = False 297 | if len(args): 298 | jupyter = args[0] == 'jupyter' 299 | 300 | #build argument list -- re-quote if whitespace 301 | s = "" 302 | for a in args: 303 | a = a.strip() 304 | if " " in a: 305 | if '"' in a: 306 | s += f"'{a}' " 307 | else: 308 | s += f'"{a}" ' 309 | else: 310 | s += f"{a} " 311 | args = s.rstrip() 312 | # print ("FINAL args:", args) 313 | # exit() 314 | 315 | if gpu: 316 | gpu_args = "--gpus all" 317 | else: 318 | gpu_args = "" 319 | 320 | #if mounting storage, add arguments & insert commands before (to mount) and after (to unmount) user-specified args 321 | cloud_args = "" 322 | if cloudmap: 323 | cloud, host = cloudmap.split(":") 324 | args = f"bash -c 'mkdir -p {host}; rclone mount --vfs-cache-mode writes --vfs-write-back 0 --config .rclone.conf {cloud}: {host} & sleep 3; {args}; umount {host}'" 325 | cloud_args = " --privileged" 326 | 327 | vprint ("Running docker container") 328 | background_args = "-td" if bgd else "-ti" 329 | 330 | if jupyter: 331 | if len(ports) == 0: 332 | raise Exception("jupyter requires -p (usually 8888)") 333 | jupargs = f"--label ai.burstable.jupyter={ports[0]}" #FIXME: document that 1st port is jupyter 334 | else: 335 | jupargs = "" 336 | 337 | cmd = f"docker {remote} run {gpu_args} {docker_port_args} --rm {background_args}" \ 338 | f" --label ai.burstable.shutdown={stop} {jupargs}" \ 339 | f" -v {path}:/home/burst/work {cloud_args} {DEFAULT_IMAGE} {args}" 340 | 341 | #run main task 342 | vvprint (cmd) 343 | vprint ("") 344 | v0print ("---------------------OUTPUT-----------------------") 345 | sys.stdout.flush() 346 | if bgd: 347 | cmd = cmd.split() 348 | docker_container, err = run(cmd) 349 | print ("Running in background mode. Container =", docker_container[:11]) 350 | else: 351 | os.system(cmd) 352 | sys.stdout.flush() 353 | v0print ("----------------------END-------------------------") 354 | sys.stdout.flush() 355 | 356 | #sync data on host back to local (note: we do not delete in this direction lest a fresh machine wipes our local workspace) 357 | if url and not bgd: 358 | vprint ("Synchronizing folders") 359 | cmd = "rsync -rltzu{4} --exclude-from {5} -e 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=error' '{3}@{1}:{2}/.' {0}/".format(locpath, 360 | url, path, sshuser, get_rsync_v(), rsync_ignore_path) 361 | vvprint (cmd) 362 | err = os.system(cmd + " " + get_piper()) 363 | # print ("RSYNC err:", err) 364 | if err: 365 | vvprint("rsync returns:", err) 366 | vprint("Your session has timed out. Run 'burst sync' to synchronize data") 367 | 368 | except Exception as ex: 369 | if get_verbosity() & 64: 370 | v0print ("--------------------------------") 371 | traceback.print_exc() 372 | v0print ("--------------------------------") 373 | else: 374 | print () 375 | print (ex) 376 | error = "Exception" 377 | if tunnel: 378 | tunnel.kill() 379 | return error 380 | -------------------------------------------------------------------------------- /burst/burst_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, argparse, time, json, getpass 3 | # 4 | # the BDFL does not admire scripts which are also importable modules 5 | # well, frack him -- this is how we roll 6 | # 7 | #so absolute imports work in script mode, we need to import from the parent folder 8 | opath = os.path.abspath(".") 9 | abspath = os.path.abspath(__file__) 10 | # print ("CLI PATHS:", opath, abspath) 11 | abspath = abspath[:abspath.rfind('/') + 1] 12 | os.chdir(abspath) 13 | abspath = os.path.abspath("..") 14 | sys.path.insert(0, abspath) 15 | 16 | os.chdir(opath) 17 | 18 | from burst.burst import * 19 | 20 | # 21 | # typing reduction act 22 | # 23 | def complete(x, a): 24 | num = 0 25 | match = [] 26 | for k in a: 27 | if k[:len(x)] == x: 28 | num += 1 29 | match.append(k) 30 | return match, num 31 | 32 | actions = { 33 | # None, 34 | 'build': "burst build |build project", 35 | 'run': "burst run |run on remote server", 36 | 'help': "burst help |print helpful information", 37 | 'list-servers': "burst list-servers |list available servers; display time till automatic stop", 38 | 'status': "burst status |show status of remote task (if running)", 39 | 'stop-server': "burst stop-server |force-stop server (prompts for confirmation)", 40 | 'terminate-server': "burst termimate-server |terminate (delete) remote server (prompts for confirmation)", 41 | 'attach': "burst attach |attach stdin, stdout, stderr to background process. ctl-C detaches", 42 | 'sync': "burst sync |synchronize local directory to remote", 43 | 'kill': "burst kill |stop docker process on remote", 44 | 'actions': "burst actions |list available actions", 45 | 'configure': "burst configure |Interactive configuration", 46 | 'jupyter': "burst jupyter |Run jupyter lab (respects idle timeout)", 47 | } 48 | 49 | actions_keys_sorted = list(actions) 50 | actions_keys_sorted.sort() 51 | 52 | # 53 | # This hack ensures we do not collect new, undocumented 'actions' (subcommands) 54 | # 55 | def switch(action, *args): 56 | # print ("SWITCH:", action, args, actions, action in actions) 57 | if action == None: 58 | return False 59 | if action not in actions: 60 | raise Exception("Unknown action: %s try: 'burst help'" % action) 61 | for a in args: 62 | if a == action: 63 | return True 64 | return False 65 | 66 | 67 | if __name__ == "__main__": 68 | parser = argparse.ArgumentParser(description=__doc__, add_help=False) 69 | add = parser.add_argument 70 | add("action", nargs='?', help="type 'burst actions' to list available actions") 71 | add("--background", "-b", action="store_true", help="Run task in background mode") 72 | add("--compute-access", metavar="KEY", dest='access', help="libcloud username (aws: ACCESS_KEY)") 73 | add("--compute-provider", dest='provider',default='EC2', help="GCE, EC2 etc.") 74 | add("--compute-region", dest='region', help="libcloud location (aws: region)") 75 | add("--compute-secret", dest='secret', help="libcloud password (aws: SECRET)") 76 | add("--compute-service", dest='compute_config',metavar="COMPUTE_SERVICE", help="override default compute configuration service") 77 | add("--config-file", metavar="FILE", dest='configfile', help="override default config.yml") 78 | add("--disksize", type=int, metavar="GIGABYTES", help="disk size in gigabytes") 79 | add("--docker-file", dest='dockerfile', type=str, default="Dockerfile", metavar="FILE", 80 | help="Docker file (defaults to ./Dockerfile)") 81 | add("--docker-port", dest='dockerdport', type=int, default=2377, metavar="PORT", help="local port to map to remote host docker daemon" 82 | "(default: 2377)") 83 | add("--gcs-project", dest='project', help="Google Cloud project ID") 84 | add("--gpu", action="store_true", help="Build with gpu") 85 | add("--help", action="store_true", help="Print usage info") 86 | add("--local", action="store_true", help="run on local device") 87 | add("--no-gpu", action="store_true", help="Build without gpu") 88 | add("--pubkey-file", dest='pubkey', help="public key to access server (defaults to ~/.ssh/id_rsa.pub)") 89 | add("--session-name", metavar="NAME", dest='burst_user', help="Burst session name (defaults to burst-username; " 90 | "different sessions launch new machine instances)") 91 | add("--stop", type=int, default=900, metavar="SECONDS", help="seconds before server is stopped (default 900) " 92 | "0 means never. Use action 'stop' to force stop") 93 | add("--storage-mount", dest="cloudmap", type=str, default="", metavar="STORAGE:MOUNT", 94 | help="map (mount) burst storage service to local folder") 95 | add("--storage-service", dest="storage_config", metavar="STORAGE_SERVICE", help="override default storage configuration") 96 | add("--tunnel-port", "-p", dest='portmap', action="append", metavar="LOCAL[:REMOTE]", help="port mapping; example: -p 8080 or -p 8081:8080") 97 | add("--verbose", "-v", dest='verbosity', type=int, default=0, help="-1: just task output 0: status 1-255: more verbose " 98 | "(default: 0)") 99 | add("--version", action="store_true", help="Print version # & exit") 100 | add("--vm-image", dest='image', help="libcloud image (aws: ami image_id)") 101 | add("--vm-type", metavar="TYPE", help="aws: instance_type; gce: size)") 102 | add("--vm-username", dest='sshuser', default="ubuntu", help="remote server username for login") 103 | 104 | if len(sys.argv) < 2: 105 | parser.print_help() 106 | sys.exit(1) 107 | # 108 | # this got a bit tricky. 109 | # we want to parse args BEFORE the main command as burst options 110 | # and pass all args AFTER the main command to the command when it runs remotely 111 | # 112 | argv = sys.argv[1:] 113 | try: 114 | args, task_args = parser.parse_known_args(argv) 115 | except SystemExit: 116 | traceback.print_exc() 117 | print ("There was an error parsing arguments. If there is an argument conflict, try 'run -- yourcommands'") 118 | exit() 119 | set_verbosity(args.verbosity) 120 | 121 | if args.action == None: 122 | action = None 123 | else: 124 | action, matches = complete(args.action, actions) 125 | if matches > 1: 126 | raise Exception(f"Ambiguous action: {args.action} could be one of: {', '.join(action)}") 127 | elif matches == 0: 128 | raise Exception("Unknown action '%s'; try: 'burst --help'" % args.action) 129 | else: 130 | action = action[0] 131 | vvprint (f"Expanding action: {args.action} --> {action}") 132 | 133 | vvprint ("ARGV:", argv) 134 | vvprint ("BURST:") 135 | for k, v in args.__dict__.items(): 136 | if v: 137 | vvprint (f" {k}=={v}") 138 | vvprint ("TASK:") 139 | for k in task_args: 140 | vvprint (" ", k) 141 | 142 | if action == 'build' and args.verbosity < 1: 143 | set_verbosity(9) 144 | 145 | if args.help: 146 | parser.print_help() 147 | sys.exit(1) 148 | 149 | #override config credentials on command line: --access implies all must be provided 150 | if args.access: 151 | args_compute = dictobj() 152 | args_compute.access = args.access 153 | args_compute.secret = args.secret 154 | args_compute.region = args.region 155 | args_compute.project = args.project 156 | args_compute.provider = args.provider 157 | else: 158 | burst_conf = {} 159 | 160 | #command line overrides: 161 | if args.compute_config: 162 | burst_conf['compute_config'] = args.compute_config 163 | 164 | if args.storage_config: 165 | burst_conf['storage_config'] = args.storage_config 166 | 167 | if args.project: 168 | burst_conf['project'] = args.project 169 | 170 | if args.region: 171 | burst_conf['region'] = args.region 172 | 173 | if args.configfile: 174 | burst_conf['configfile'] = args.configfile 175 | 176 | if args.disksize: 177 | burst_conf['disksize'] = args.disksize 178 | 179 | # if args.local: 180 | # vprint (args) 181 | # parser.error("when specifying --local, do not set --vm-username or --session-name") 182 | # exit() 183 | 184 | #set default burst_user if necessary: 185 | if not (args.burst_user or args.local or args.version): 186 | burst_user = getpass.getuser() 187 | args.burst_user = "burst-" + burst_user 188 | vprint ("Session: %s" % args.burst_user) 189 | 190 | if action != 'run' and len(task_args) > 0: 191 | raise Exception(f"Unknown arguments: {task_args}") 192 | 193 | ############################################################################ 194 | # #master switch clause. First, stand-alone options 195 | if switch(action, 'help'): 196 | # print ("DBG:", action, args.action, argv) 197 | print ("type 'burst --help' for help, 'burst actions' for documentation on available actions") 198 | exit() 199 | 200 | elif switch(action, 'actions'): 201 | # print ("DBG:", action, args.action, argv) 202 | print (" " * 80 + "\r") 203 | print ("Available actions (note you can abbreviate if unambiguous, 'burst act'):") 204 | for act in actions_keys_sorted: 205 | print (f" {actions[act]}") 206 | exit() 207 | 208 | elif switch(action, 'list-servers'): 209 | init(burst_conf) 210 | # pprint(get_config()) 211 | cconf = get_config()['compute_config'] 212 | v0print ("-------------------------------------------------------------\nSessions with config %s & user %s:" % (cconf, args.burst_user)) 213 | for n, s in list_servers(args.burst_user, burst_conf): 214 | # print ("DBG:", n.public_ips[0]) 215 | print (s) 216 | if n.state.lower()=='running': 217 | cmd = f"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=error ubuntu@{n.public_ips[0]} 'tail -n {max(get_verbosity(), 1)} ~/burst_monitor.log'" 218 | os.system(cmd) 219 | v0print ("-------------------------------------------------------------") 220 | 221 | elif switch(action, 'stop-server'): 222 | v0print ("-------------------------------------------------------------") 223 | count = 0 224 | for node, s in list_servers(args.burst_user, burst_conf): 225 | if node.state == "stopped": 226 | continue 227 | count += 1 228 | yes = input("Stopping (warm shutdown) %s, are you sure? (y/n)" % s) 229 | if yes=='y': 230 | stop_server(node) 231 | else: 232 | print ("Aborted") 233 | if not count: 234 | print ("no servers to shut down") 235 | v0print ("-------------------------------------------------------------") 236 | 237 | elif switch(action, 'terminate-server'): 238 | v0print ("-------------------------------------------------------------") 239 | count = 0 240 | for node, s in list_servers(args.burst_user, burst_conf, terminated=False): 241 | count += 1 242 | yes = input("Terminating %s, are you sure? (y/n)" % s) 243 | if yes=='y': 244 | os.system("rm .burst-gpu") 245 | terminate_server(node) 246 | else: 247 | print ("Aborted") 248 | if not count: 249 | print ("no servers to terminate") 250 | v0print ("-------------------------------------------------------------") 251 | 252 | elif switch(action, 'attach'): 253 | tunnel = None 254 | init(burst_conf) 255 | cconf = get_config()['compute_config'] 256 | url = None 257 | for node, s in list_servers(args.burst_user, burst_conf): 258 | vvprint (node, s) 259 | if node.state.upper() == 'RUNNING': 260 | if url: 261 | raise Exception("multiple docker processes running, this is not supported") 262 | url = node.public_ips[0] 263 | break 264 | if not url: 265 | print ("No process running") 266 | else: 267 | vvprint (f"Attaching to docker process on {url}") 268 | tunnel, _ = ssh_tunnel(url, args.sshuser, args.portmap, args.dockerdport) 269 | vvprint ("Tunnel:", tunnel) 270 | cmd = ["docker", "-H localhost:%s" % args.dockerdport, "ps", "--format", '{{json .}}'] 271 | vvprint (cmd) 272 | out, err = run(cmd) 273 | vvprint("PS returns:", out) 274 | if not out: 275 | print ("\nNo Docker process found") 276 | else: 277 | try: 278 | did = json.loads(out) 279 | vprint ("Attaching to docker process", did['ID']) 280 | cmd = f"docker -H localhost:{args.dockerdport} attach --sig-proxy=false {did['ID']}" 281 | vvprint (cmd) 282 | v0print("ctrl-C only detaches; 'burst kill' to stop") 283 | v0print ("---------------------OUTPUT-----------------------") 284 | os.system(cmd) 285 | v0print ("----------------------END-------------------------") 286 | except: 287 | print ("\nFailed to attach:", out) 288 | sys.stdout.flush() 289 | if tunnel: 290 | tunnel.kill() 291 | 292 | elif switch(action, 'kill'): 293 | tunnel = None 294 | init(burst_conf) 295 | cconf = get_config()['compute_config'] 296 | url = None 297 | for node, s in list_servers(args.burst_user, burst_conf): 298 | vvprint (node, s) 299 | if node.state.upper() == 'RUNNING': 300 | if url: 301 | raise Exception("multiple docker processes running, this is not supported") 302 | url = node.public_ips[0] 303 | break 304 | if not url: 305 | print ("No process running") 306 | else: 307 | vvprint (f"Killing Docker process on {url}") 308 | tunnel, _ = ssh_tunnel(url, args.sshuser, args.portmap, args.dockerdport) 309 | vvprint ("Tunnel:", tunnel) 310 | cmd = ["docker", "-H localhost:%s" % args.dockerdport, "ps", "--format", '{{json .}}'] 311 | vvprint (cmd) 312 | out, err = run(cmd) 313 | vvprint("PS returns:", out) 314 | if not out: 315 | print ("\nNo Docker process found") 316 | else: 317 | try: 318 | did = json.loads(out) 319 | yes = input(f"Killing Docker process {did['ID']}, are you sure? (y/n)") 320 | if yes == 'y': 321 | cmd = f"docker -H localhost:{args.dockerdport} stop {did['ID']}" 322 | vvprint (cmd) 323 | os.system(cmd) 324 | print ("Process killed") 325 | else: 326 | print("Aborted") 327 | except: 328 | print ("\nError:", out) 329 | sys.stdout.flush() 330 | if tunnel: 331 | tunnel.kill() 332 | 333 | elif switch(action, 'status'): 334 | tunnel = None 335 | init(burst_conf) 336 | cconf = get_config()['compute_config'] 337 | url = None 338 | for node, s in list_servers(args.burst_user, burst_conf): 339 | vvprint (node, s) 340 | if node.state.upper() == 'RUNNING': 341 | if url: 342 | raise Exception("multiple docker processes running, this is not supported") 343 | url = node.public_ips[0] 344 | break 345 | if not url: 346 | v0print("-------------------------------------------------------------") 347 | print ("No remote host running") 348 | v0print("-------------------------------------------------------------") 349 | else: 350 | vvprint (f"Looking for docker process on {url}") 351 | tunnel, _ = ssh_tunnel(url, args.sshuser, args.portmap, args.dockerdport) 352 | vvprint ("Tunnel:", tunnel) 353 | cmd = ["docker", "-H localhost:%s" % args.dockerdport, "ps", "--no-trunc", "--format", '{{json .}}'] 354 | vvprint (cmd) 355 | out, err = run(cmd) 356 | vvprint("PS returns:", out) 357 | if not out: 358 | print ("\nNo Docker process found") 359 | else: 360 | try: 361 | did = json.loads(out) 362 | v0print("-------------------------------------------------------------") 363 | print (f"Docker process ID: {did['ID'][:12]}\n" 364 | f"Status: {did['Status']}\n" 365 | f"Command: {did['Command']}") 366 | # f"Mounts: {did['Mounts']}") 367 | v0print("-------------------------------------------------------------") 368 | except: 369 | print ("\nError:", out) 370 | sys.stdout.flush() 371 | if tunnel: 372 | tunnel.kill() 373 | 374 | elif args.version: 375 | print ("VERSION:", version) 376 | 377 | elif switch(action, 'configure'): 378 | if args.configfile: 379 | yam = args.configfile 380 | else: 381 | yam = os.environ['HOME'] + "/.burst/config.yml" 382 | os.system("burst-config --config_path %s" % yam) 383 | 384 | elif switch(action, 'build', 'run', 'sync', 'jupyter'): 385 | #no stand-alone options; do burst for reals 386 | pubkey = None 387 | if not args.local: 388 | if args.pubkey: 389 | file_name = args.pubkey 390 | else: 391 | file_name = os.path.expanduser("~") + "/.ssh/id_rsa.pub" 392 | try: 393 | if ".ssh" not in file_name: 394 | raise Exception ("Public keys (and their private parts) need to be in the ~/.ssh folder") 395 | f=open(file_name) #FIXME: a bit cheeky 396 | pubkey=f.read() 397 | f.close() 398 | except FileNotFoundError: 399 | raise Exception (f"Public key file {file_name} not found") 400 | 401 | if not os.path.exists(args.dockerfile): 402 | raise Exception("No Dockerfile found") 403 | #if we are launching, need to know gpu 404 | if not os.path.exists(".burst-gpu"): 405 | if not (args.gpu or args.no_gpu): 406 | raise Exception("Must specify --gpu or --no-gpu for initial build") 407 | f = open(".burst-gpu", 'w') 408 | f.write(f"{args.gpu}") 409 | f.close() 410 | f = open(".burst-gpu") 411 | gpu = f.read().strip().lower()=='true' 412 | f.close() 413 | 414 | #sanity clause 415 | if (gpu and args.no_gpu) or ((not gpu) and args.gpu): 416 | raise Exception("Gpu status can only be changed with fresh launch (terminate & rebuild)") 417 | 418 | #blech 419 | if gpu: 420 | if args.vm_type == None: 421 | vmtype = 'DEFAULT_GPU_VMTYPE' 422 | else: 423 | vmtype = args.vm_type 424 | if args.image == None: 425 | image = 'DEFAULT_GPU_IMAGE' 426 | else: 427 | image = args.image 428 | else: 429 | if args.vm_type == None: 430 | vmtype = 'DEFAULT_VMTYPE' 431 | else: 432 | vmtype = args.vm_type 433 | if args.image == None: 434 | image = 'DEFAULT_IMAGE' 435 | else: 436 | image = args.image 437 | 438 | if action == 'build': 439 | task_args = ['echo', 'Build phase 1 success'] 440 | 441 | elif action == 'jupyter': 442 | if args.portmap == None: 443 | args.portmap = ["8888"] 444 | task_args = ['jupyter', 'lab', "--no-browser", "--allow-root", "--NotebookApp.token=''", "--ip=0.0.0.0"] 445 | 446 | #let's do this thing 447 | error = burst(task_args, sshuser=args.sshuser, 448 | burst_user=args.burst_user, gpu=gpu, ports=args.portmap, stop=args.stop, 449 | image=image, vmtype=vmtype, pubkey=pubkey, dockerfile=args.dockerfile, cloudmap=args.cloudmap, 450 | dockerdport=args.dockerdport, bgd = args.background, sync_only = action=='sync', conf = burst_conf) 451 | 452 | if error: 453 | v0print ("Build failed") 454 | else: 455 | if action == 'build': 456 | v0print() 457 | print ("Build phase 2 success") 458 | 459 | vprint ("DONE") 460 | v0print() 461 | else: 462 | vprint() 463 | print ("Unknown action:", action) 464 | -------------------------------------------------------------------------------- /burst/config/__init__.py: -------------------------------------------------------------------------------- 1 | pass -------------------------------------------------------------------------------- /burst/config/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, argparse 3 | 4 | #for absolute imports to work in script mode, we need to import from the root folder 5 | opath = os.path.abspath(".") 6 | abspath = os.path.abspath(__file__) 7 | abspath = abspath[:abspath.rfind('/') + 1] 8 | os.chdir(abspath) 9 | abspath = os.path.abspath("../..") 10 | sys.path.insert(0, abspath) 11 | 12 | from burst.config import menus, summary, configurers 13 | from burst.config.config_file_utils import get_config, write_config 14 | 15 | os.chdir(opath) 16 | 17 | # CONFIG_FILE = '~/.burst/config.yml' 18 | 19 | def main(): 20 | 21 | args = parse_arguments() 22 | 23 | config = get_config(args.config_path) 24 | print('Welcome to the burst tool configuration!') 25 | if args.main_choice in [None, 'summary']: 26 | summary.all(config) 27 | 28 | main_selection = args.main_choice if args.main_choice else menus.main_menu() 29 | if main_selection == 'compute': 30 | second_selection = args.second_choice if args.second_choice else menus.main_service_menu('compute') 31 | if second_selection == 'add': 32 | alias, creds = configurers.new_compute(aws_path=args.aws_path) 33 | configurers.check_existance(config, 'compute', alias) 34 | config['compute']['configurations'][alias] = creds 35 | print(f'{alias} has been added to compute') 36 | elif second_selection == 'remove': 37 | remove_alias = configurers.remove_service(config, 'compute', remove_alias=args.third_choice) 38 | del config['compute']['configurations'][remove_alias] 39 | print(f'{remove_alias} has been removed from compute') 40 | 41 | elif main_selection == 'storage': 42 | second_selection = args.second_choice if args.second_choice else menus.main_service_menu('storage') 43 | if second_selection == 'add': 44 | alias, creds = configurers.new_storage(aws_path=args.aws_path) 45 | config['storage']['configurations'][alias] = creds 46 | print(f'{alias} has been added to storage') 47 | elif second_selection == 'remove': 48 | remove_alias = configurers.remove_service(config, 'storage', remove_alias=args.third_choice) 49 | del config['storage']['configurations'][remove_alias] 50 | print(f'{remove_alias} has been removed from storage') 51 | 52 | elif main_selection == 'default': 53 | second_selection = args.second_choice if args.second_choice else menus.default_service_menu() 54 | if second_selection == 'compute': 55 | settings = configurers.set_default(config, 'compute') 56 | config['compute']['settings'] = settings 57 | elif second_selection == 'storage': 58 | settings = configurers.set_default(config, 'storage') 59 | config['storage']['settings'] = settings 60 | 61 | else: 62 | return 0 63 | 64 | if len(config.get('compute', {}).get('configurations', {})) == 1 and not config['compute']['settings']: 65 | settings = configurers.set_default(config, 'compute', list(config['compute']['configurations'])[0]) 66 | config['compute']['settings'] = settings 67 | 68 | if len(config.get('storage', {}).get('configurations', {})) == 1 and not config['storage']['settings']: 69 | settings = configurers.set_default(config, 'storage', list(config['storage']['configurations'])[0]) 70 | config['storage']['settings'] = settings 71 | 72 | write_config(config, args.config_path) 73 | 74 | 75 | def parse_arguments(): 76 | 77 | parser = argparse.ArgumentParser(description=__doc__) 78 | 79 | parser.add_argument('main_choice', choices=['compute', 'storage', 'default', 'summary'], nargs='?') 80 | parser.add_argument('second_choice', nargs='?') 81 | parser.add_argument('third_choice', nargs='?') 82 | 83 | parser.add_argument('--aws_path', '-a', default='~/.aws', help='Specify location of AWS credentials (default "~/.aws")') 84 | parser.add_argument('--config_path', default="~/.burst.config.yml", help='Specify location of config file (default "~/.burst.config.yml")') 85 | 86 | args = parser.parse_args() 87 | 88 | if args.main_choice in ['compute', 'storage'] and args.second_choice not in ['add', 'remove', None]: 89 | raise Exception("Can only specify 'add' or 'remove' when configuring compute") 90 | elif args.main_choice == 'default' and args.second_choice not in ['compute', 'storage', None]: 91 | raise Exception("Can only specify 'compute' or 'storage' when configuring defaults") 92 | 93 | return args 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /burst/config/config_file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | # 5 | # Set literal style to block (more readable for long strings) 6 | # 7 | def selective_representer(dumper, data): 8 | return dumper.represent_scalar(u"tag:yaml.org,2002:str", data, 9 | style="|" if "\n" in data else None) 10 | 11 | yaml.add_representer(str, selective_representer) 12 | 13 | 14 | def get_config(file_name): 15 | 16 | config = {} 17 | 18 | path = os.path.expanduser(file_name) 19 | if os.path.exists(path): 20 | with open(path) as fp: 21 | config = yaml.load(fp, Loader=yaml.FullLoader) 22 | 23 | # defines sections in dict for writing 24 | if 'compute' not in config: 25 | config['compute'] = {'configurations': {}, 'settings': {}} 26 | if 'storage' not in config: 27 | config['storage'] = {'configurations': {}, 'settings': {}} 28 | 29 | return config 30 | 31 | def write_config(config, file_name): 32 | 33 | # Removes empty sections 34 | if len(config.get('storage', {}).get('configurations', {})) == 0: 35 | del config['storage'] 36 | if len(config.get('compute', {}).get('configurations', {})) == 0: 37 | del config['compute'] 38 | 39 | path = os.path.expanduser(file_name) 40 | # print ("PATH", path) 41 | if not os.path.exists(path): 42 | if path.count('/') >= 2: 43 | os.mkdir(path[:path.rfind('/')]) 44 | with open(path, 'w') as fp: 45 | yaml.dump(config, fp) 46 | -------------------------------------------------------------------------------- /burst/config/config_template.yml: -------------------------------------------------------------------------------- 1 | --- 2 | compute: 3 | settings: 4 | default_compute: burst_ec2 5 | 6 | configurations: 7 | burst_ec2: 8 | provider: EC2 9 | access: A...R 10 | region: us-west-2 11 | default_image: ami-ubuntu-18.04-1.16.0-00-1569343567 12 | default_vmtype: t2.medium 13 | default_gpu_image: Deep Learning AMI (Ubuntu 18.04) Version 36.0 14 | default_gpu_vmtype: g4dn.xlarge 15 | disksize: 175 16 | settings: # Everything AWS-specific goes here 17 | secret: U...F 18 | 19 | storage: 20 | settings: 21 | default_storage: burst_s3 22 | 23 | configurations: 24 | burst_s3: 25 | provider: AWS 26 | type: s3 27 | settings: #AWS-specific 28 | access_key_id: A...R 29 | region: us-west-2 30 | env_auth: false 31 | acl: private 32 | secret_access_key: U...F 33 | -------------------------------------------------------------------------------- /burst/config/configurers.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | from burst.config import summary 4 | from burst.config.cred_loaders import get_aws_creds 5 | from burst.config.config_file_utils import get_config 6 | 7 | abspath = os.path.abspath(__file__) 8 | abspath = abspath[:abspath.rfind('/') + 1] 9 | 10 | # The alias used in the template file to set default images/sizes 11 | TEMPLATE_AWS_ALIAS = 'burst_ec2' 12 | 13 | def new_compute(aws_path='~/.aws'): 14 | 15 | print('\nSetting up EC2 on AWS') 16 | 17 | config = get_config(file_name='%sconfig_template.yml' % abspath)['compute']['configurations'][TEMPLATE_AWS_ALIAS] 18 | config['access'], config['settings']['secret'], config['region'] = get_aws_creds(aws_path) 19 | alias = input('\nPlease enter an alias (name) to reference these credentials: ') 20 | # ask if this should be default? 21 | 22 | return alias, config 23 | 24 | 25 | def new_storage(aws_path='~/.aws'): 26 | print('\nSetting up S3 on AWS') 27 | 28 | config = {'settings': {}} 29 | config['provider'] = 'AWS' 30 | config['type'] = 's3' 31 | config['settings']['access_key_id'], config['settings']['secret_access_key'], config['settings']['region'] = get_aws_creds(aws_path) 32 | config['settings']['env_auth'] = False 33 | config['settings']['acl'] = 'private' 34 | # config['default_mount_folder'] = input("\nSet the default mount folder: ") 35 | 36 | alias = input('\nPlease enter an alias (name) to reference these credentials: ') 37 | return alias, config 38 | 39 | 40 | def remove_service(config, service, remove_alias=None): 41 | 42 | if remove_alias is None: 43 | getattr(summary, service)(config) 44 | remove_alias = input('Enter name to remove: ') 45 | 46 | if remove_alias not in config[service].get('configurations', []): 47 | raise Exception("Name entered is not configured") 48 | 49 | return remove_alias 50 | 51 | 52 | def set_default(config, service, default_service=None): 53 | 54 | settings = {} 55 | 56 | if default_service is None: 57 | getattr(summary, service)(config) 58 | default_service = input('\nEnter the name to set for default compute service: ') 59 | if default_service not in config[service].get('configurations', []): 60 | raise Exception("Name entered is not configured") 61 | else: 62 | print(f'\nSince {default_service} is the only {service} configuration, it will be set to default.') 63 | 64 | if service == 'compute': 65 | # default_to_gpu = input("Default to GPU (y/n): ") 66 | # settings['default_to_gpu'] = True if default_to_gpu.lower() in ['y', 'yes'] else False 67 | settings['default_compute'] = default_service 68 | elif service == 'storage': 69 | # sync_policy = input('Enter default sync policy (hit enter to select recommended "lazy"): ') 70 | # settings['sync_policy'] = sync_policy if sync_policy else 'lazy' 71 | settings['default_storage'] = default_service 72 | 73 | return settings 74 | 75 | def check_existance(config, service, alias): 76 | 77 | if alias in config[service]['configurations']: 78 | proceed = input(f"Warning - {alias} is already in use. Overwrite? (y/n)> ") 79 | 80 | if proceed.lower() != 'y': 81 | sys.exit() 82 | -------------------------------------------------------------------------------- /burst/config/cred_loaders.py: -------------------------------------------------------------------------------- 1 | import os 2 | import configparser 3 | 4 | def get_aws_creds(aws_path='~/.aws'): 5 | 6 | config_parser = configparser.ConfigParser() 7 | 8 | aws_abspath = os.path.expanduser(aws_path) 9 | config_parser.read(f'{aws_abspath}/credentials') 10 | config_parser.read(f'{aws_abspath}/config') 11 | 12 | index_map = {} 13 | print('Select AWS credentials to use:') 14 | index = -1 15 | for index, section in enumerate(config_parser.sections()): 16 | print(f'{index+1}: use "{section}" profile from {aws_abspath}') 17 | index_map[str(index+1)] = section 18 | 19 | env_access_key = os.environ.get('AWS_ACCESS_KEY_ID') 20 | env_secret = os.environ.get('AWS_SECRET_ACCESS_KEY') 21 | env_region = os.environ.get('AWS_DEFAULT_REGION') 22 | env_available = env_access_key is not None and env_secret is not None and env_region is not None 23 | 24 | if env_available: 25 | index += 1 26 | print(f'{index+1}: credentials in enviornment variables') 27 | 28 | print(f'{index+2}: Manually enter credentials') 29 | 30 | selected_index = input('\n1/2> ') 31 | 32 | if selected_index and selected_index in index_map: 33 | section = config_parser[index_map[selected_index]] 34 | access_key = section['aws_access_key_id'] 35 | secret = section['aws_secret_access_key'] 36 | region = section.get('region') 37 | elif env_available and selected_index == str(index+1): 38 | access_key = env_access_key 39 | secret = env_secret 40 | region = env_region 41 | elif selected_index == str(index+2): 42 | access_key = input('Access Key> ') 43 | secret = input('Secret Access Key> ') 44 | region = input('Region> ') 45 | else: 46 | raise Exception('Inavlid Selection') 47 | 48 | if region is None: 49 | print('This profile has no region') 50 | if 'region' in dict(config_parser).get('default'): 51 | default_region = config_parser['default']['region'] 52 | choice = input(f'The "default" has region of {default_region}. Should we use this? (y/n)> ') 53 | if choice.lower() == 'y': 54 | region = default_region 55 | if region is None: 56 | region = input('Please enter the region> ') 57 | 58 | return access_key, secret, region 59 | 60 | 61 | if __name__ == '__main__': 62 | print(get_aws_creds()) # used for testing 63 | -------------------------------------------------------------------------------- /burst/config/menus.py: -------------------------------------------------------------------------------- 1 | def main_menu(): 2 | 3 | input_map = {'1': 'compute', '2': 'storage', '3': 'default', '4': 'exit'} 4 | 5 | print('\nSelect from the options below:') 6 | print(' 1. Set up or remove a compute service') 7 | print(' 2. Set up or remove a storage service') 8 | print(' 3. Change settings for compute or storage service (i.e. defaults)') 9 | print(' 4. Exit configuration') 10 | selection = input(f"{'/'.join(input_map)}> ") 11 | 12 | if selection not in input_map: 13 | raise Exception("Invalid selection") 14 | 15 | return input_map[selection] 16 | 17 | 18 | def main_service_menu(service): 19 | 20 | input_map = {'1': 'add', '2': 'remove'} 21 | 22 | print('\nDo you want to:') 23 | print(f' 1. Set up a new {service} service') 24 | print(f' 2. Remove a {service} service') 25 | 26 | selection = input(f"{'/'.join(input_map)}> ") 27 | 28 | if selection not in input_map: 29 | raise Exception("Invalid selection") 30 | 31 | return input_map[selection] 32 | 33 | 34 | def default_service_menu(): 35 | 36 | input_map = {'1': 'compute', '2': 'storage'} 37 | 38 | print('\nDo you want to:') 39 | print(f' 1. Change default compute service') 40 | print(f' 2. Change default storage service') 41 | selection = input(f"{'/'.join(input_map)}> ") 42 | 43 | if selection not in input_map: 44 | raise Exception("Invalid selection") 45 | 46 | return input_map[selection] 47 | -------------------------------------------------------------------------------- /burst/config/summary.py: -------------------------------------------------------------------------------- 1 | 2 | def compute(config): 3 | print('\nThe following compute services are configured:') 4 | if 'compute' in config and len(config['compute'].get('configurations', [])) > 0: 5 | for service in config['compute']['configurations']: 6 | print(f' * {service}') 7 | else: 8 | print('(none)') 9 | 10 | def storage(config): 11 | 12 | print('\nThe following storage services are configured:') 13 | if 'storage' in config and len(config['storage'].get('configurations', [])) > 0: 14 | for service in config['storage']['configurations']: 15 | print(f' * {service}') 16 | else: 17 | print('(none)') 18 | 19 | def all(config): 20 | 21 | compute(config) 22 | storage(config) 23 | -------------------------------------------------------------------------------- /burst/lcloud.py: -------------------------------------------------------------------------------- 1 | import os, sys, time, argparse, json 2 | import yaml 3 | from pprint import pprint 4 | from libcloud.compute.types import Provider 5 | from libcloud.compute.providers import get_driver 6 | from libcloud.compute.base import NodeAuthSSHKey 7 | from libcloud.common.google import ResourceNotFoundError 8 | from easydict import EasyDict as dictobj 9 | 10 | from burst.verbos import vprint 11 | 12 | config = dictobj() 13 | 14 | # 15 | # for now providers are EC2 or GCE 16 | # 17 | def init(conf = None): 18 | #init is a one-time thang 19 | if 'driver' in config: 20 | return 21 | 22 | if conf == None: 23 | conf = {} 24 | 25 | yam = conf.get('configfile', os.environ['HOME'] + "/.burst/config.yml") 26 | 27 | if os.path.exists(yam): 28 | #FIXME: check for local overriding .burst 29 | f = open(yam) 30 | yconf = yaml.load(f, Loader=yaml.FullLoader) 31 | f.close() 32 | # print("DBBG 1", yconf['compute']['configurations']['Ec2Beta']['disksize']) 33 | if 'compute_config' in conf: 34 | compute_config = conf['compute_config'] 35 | else: 36 | compute_config = yconf['compute']['settings']['default_compute'] 37 | #this got a bit strained. sorry 38 | storage_config = None 39 | if 'storage_config' in conf: #if storage_config passed in, use 40 | storage_config = conf['storage_config'] 41 | else: 42 | if 'storage' in yconf: #otherwise check in config.yml 43 | storage_config = yconf['storage']['settings']['default_storage'] 44 | if storage_config: #if it exists, 45 | storage = yconf['storage']['configurations'][storage_config] #use it 46 | storage['config'] = storage_config #and store the config name too 47 | yconf = yconf['compute']['configurations'][compute_config] 48 | # print ("DBBG 2", yconf['disksize']) 49 | yconf.update(yconf['settings']) #easier to deal with all attributes at top level 50 | yconf['compute_config']=compute_config 51 | if storage_config: #if specified, 52 | yconf['storage'] = storage #pull storage to top level for ease 53 | 54 | else: 55 | vprint ("config.yml not found") 56 | yconf = {} #dummy yconf 57 | 58 | if 'provider' in conf: 59 | config.provider = conf['provider'] 60 | else: 61 | if 'provider' in yconf: 62 | config.provider = yconf['provider'] 63 | else: 64 | raise Exception("Configuration file %s not available. Try running:\nburst configure" % yam) 65 | 66 | for param in ['access', 'secret', 'region', 'project', 'default_image', 'default_vmtype', 'default_gpu_image', 67 | 'default_gpu_vmtype', 'default_gpu', 'storage', 'compute_config', 'disksize']: 68 | if param in conf: 69 | config[param] = conf[param] 70 | else: 71 | config[param] = yconf.get(param, None) 72 | 73 | if config.default_vmtype == None or config.default_gpu_vmtype == None: 74 | vprint ("""config.yml syntax has changed: 75 | rename default_size --> default_vmtype 76 | default_gpu_size-->default_gpu_vmtype""") 77 | 78 | cls = get_driver(Provider[config.provider]) 79 | 80 | if config.provider == 'EC2': 81 | config.driver = cls(config.access, config.secret, region=config.region) 82 | 83 | elif config.provider == 'GCE': 84 | if hasattr(config.secret, 'lower'): #string points to key file 85 | privkeypath = config.secret 86 | config.raw_secret = config.secret 87 | else: #if dict, create key file 88 | config.raw_secret = "%s.json" % config.secret['private_key_id'] 89 | privkeypath = "%s/.burst/%s.json" % (os.path.expanduser("~"), config.secret['private_key_id']) 90 | if not os.path.exists(privkeypath): 91 | fp = open(privkeypath, 'w') 92 | json.dump(config.secret, fp) 93 | fp.close() 94 | config.driver = cls(config.access, privkeypath, datacenter=config.region, project=config.project) 95 | else: 96 | vprint ("ERROR: unknown cloud provider", config.provider) 97 | 98 | def get_config(): 99 | return config 100 | 101 | def get_server(url=None, uuid=None, name=None, conf = None): 102 | init(conf) 103 | nodes = config.driver.list_nodes() 104 | if url: 105 | node = [x for x in nodes if url in x.public_ips and x.state != 'terminated'] 106 | elif uuid: 107 | node = [x for x in nodes if x.uuid.find(uuid)==0 and x.state != 'terminated'] 108 | elif name: 109 | node = [x for x in nodes if x.name==name and x.state != 'terminated'] 110 | else: 111 | return "error: specify url, uuid, or name" 112 | return node[0] if node else None 113 | 114 | def get_server_state(srv): 115 | nodes = config.driver.list_nodes() #need to refresh node to get state 116 | node = [x for x in nodes if x.uuid.find(srv.uuid)==0] 117 | if node: 118 | return node[0].state 119 | vprint ("Cannot find server to determine state; assuming terminated") 120 | return 'terminated' 121 | 122 | def get_server_vmtype(srv): 123 | if config.provider=='EC2': 124 | return srv.extra['instance_type'] 125 | elif config.provider=='GCE': 126 | typ = srv.extra['machineType'] 127 | i = typ.rfind('/') 128 | return typ[i+1:] 129 | 130 | # not working now that EC2 image == AMI full name 131 | # def get_server_image(srv): 132 | # if config.provider=='EC2': 133 | # pprint(srv.extra) 134 | # return srv.extra['name'] 135 | # elif config.provider=='GCE': 136 | # return srv.extra['image'] 137 | 138 | def start_server(srv): 139 | result = srv.start() 140 | if not result: 141 | return "error starting server" 142 | state = None 143 | while state != 'running': 144 | state = get_server_state(srv) 145 | time.sleep(2) 146 | vprint ("server state:", state) 147 | vprint ("Waiting for public IP address to be assigned") 148 | config.driver.wait_until_running([srv]) 149 | vprint("Public IPs:", srv.public_ips) 150 | while len(srv.public_ips)==0 or srv.public_ips.count(None) == len(srv.public_ips): #Really? Google? [None]???? 151 | # srv = config.driver.list_nodes(ex_node_ids=[srv.id])[0] 152 | srv = get_server(uuid=srv.uuid) #seems necessary to refresh to update state 153 | vprint("Public IPs:", srv.public_ips) 154 | time.sleep(5) 155 | return srv 156 | 157 | # 158 | # fill in default values for vmtype & image 159 | # 160 | def fix_vmtype_and_image(vmtype, image): 161 | if image=="DEFAULT_IMAGE": 162 | image = config.default_image 163 | 164 | if vmtype=="DEFAULT_VMTYPE": 165 | vmtype = config.default_vmtype 166 | 167 | if image=="DEFAULT_GPU_IMAGE": 168 | image = config.default_gpu_image 169 | 170 | if vmtype=="DEFAULT_GPU_VMTYPE": 171 | vmtype = config.default_gpu_vmtype 172 | return vmtype, image 173 | 174 | def launch_server(name, vmtype=None, image=None, pubkey=None, conf = None, user=None, gpu=False): 175 | init(conf) 176 | vmtype, image = fix_vmtype_and_image(vmtype, image) 177 | image_full_path = image 178 | if config.provider=='EC2': 179 | images = config.driver.list_images(ex_filters={'name': image}) 180 | elif config.provider=='GCE': 181 | #note: GCE libcloud driver list_images is hella borke, list is incomplete so... 182 | images = [] 183 | for proj in ["deeplearning-platform-release", "ubuntu-os-cloud"]: 184 | try: 185 | im = config.driver.ex_get_image(image, ex_project_list=proj) 186 | images = [im] 187 | break 188 | except ResourceNotFoundError: 189 | pass 190 | else: 191 | ims = config.driver.list_images() 192 | images = [x for x in ims if x.name == image] 193 | if not images: 194 | raise Exception("Image %s not found" % image) 195 | image = images[0] 196 | 197 | vmtypes = [x for x in config.driver.list_sizes() if x.name == vmtype] 198 | if not vmtypes: 199 | raise Exception("Instance vmtype %s not found" % vmtype) 200 | vmtype = vmtypes[0] 201 | 202 | if 'disksize' not in config or config.disksize==None: 203 | raise Exception("Need to add disksize to config or specify (in gigabytes, eg --disksize=150)") 204 | 205 | vprint ("Launching instance image=%s, id=%s, session=%s, type=%s ram=%s disk=%s" % (image_full_path, image.id, name, vmtype.id, vmtype.ram, config.disksize)) 206 | 207 | if pubkey: 208 | if config.provider == 'EC2': #Everybody makes it up 209 | auth = NodeAuthSSHKey(pubkey) 210 | node = config.driver.create_node(name, vmtype, image, auth=auth, ex_blockdevicemappings=[ #So sue me 211 | {'Ebs.VolumeSize': config.disksize, 'DeviceName': '/dev/sda1'}]) 212 | elif config.provider == 'GCE': 213 | meta = { 214 | 'items': [ 215 | { 216 | 'key': 'sshKeys', 217 | 'value': '%s: %s' % (user, pubkey) 218 | } 219 | ] 220 | } 221 | if gpu: 222 | vprint ("Launching with GPU") 223 | node = config.driver.create_node(name, vmtype, image, ex_metadata=meta, ex_accelerator_type=config.default_gpu, 224 | ex_accelerator_count=1, ex_on_host_maintenance="TERMINATE") 225 | else: 226 | vprint ("Launching without GPU") 227 | node = config.driver.create_node(name, vmtype, image, ex_metadata=meta) 228 | else: 229 | raise Exception("Unsupported clown provider: %s" % config.provider) 230 | else: 231 | node = config.driver.create_node(name, vmtype, image) 232 | vprint ("Waiting for public IP address to be active") 233 | config.driver.wait_until_running([node]) 234 | while len(node.public_ips)==0: 235 | # node = config.driver.list_nodes(ex_node_ids=[node.id])[0] #refresh node -- is this really necessary 236 | node = get_server(uuid=node.uuid) #seems necessary to refresh to update state 237 | vprint("Public IPs:", node.public_ips) 238 | time.sleep(5) 239 | vprint("Public IPs:", node.public_ips) 240 | return node 241 | 242 | def stop_server(srv): 243 | result = srv.stop_node() 244 | if not result: 245 | return "error stopping server" 246 | state = None 247 | while state != 'stopped': 248 | state = get_server_state(srv) 249 | time.sleep(2) 250 | vprint ("server state:", state) 251 | return "success" 252 | 253 | def terminate_server(srv): 254 | result = config.driver.destroy_node(srv) 255 | if not result: 256 | return "error terminating server" 257 | state = None 258 | while state != 'terminated': 259 | state = get_server_state(srv) 260 | time.sleep(2) 261 | vprint ("server state:", state) 262 | return "success" 263 | 264 | def list_servers(name, conf = None, terminated=True): 265 | init(conf) 266 | ret = [] 267 | nodes = config.driver.list_nodes() 268 | for x in nodes: 269 | x = get_server(uuid=x.uuid) #seems necessary to refresh to update state 270 | if not x: 271 | continue 272 | # print ("DBG", terminated, x.state) 273 | if (not terminated) and (x.state=='terminated'): #don't include terminated 274 | continue 275 | if x.name==name: 276 | ret.append([x]) 277 | img = x.extra['image_id'] if config.provider == 'EC2' else x.image 278 | if img == config.default_image: 279 | img += " (default_image)" 280 | elif img == config.default_gpu_image: 281 | img += " (default_gpu_image)" 282 | s = "IMAGE: %s STATE: %s IPs: %s ID: %s/%s" %(img, x.state, x.public_ips, config.provider, x.id) 283 | ret[-1].append(s) 284 | return ret 285 | 286 | 287 | if __name__ == "__main__": 288 | parser = argparse.ArgumentParser(description=__doc__) 289 | parser.add_argument("--url") 290 | parser.add_argument("--uuid") 291 | parser.add_argument("--name") 292 | args, unknown = parser.parse_known_args() 293 | 294 | n = get_server(args.url, args.uuid, args.name) 295 | pprint (n) 296 | -------------------------------------------------------------------------------- /burst/monitor/monitor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, json, time, subprocess, datetime, argparse, traceback, requests 3 | from dateutil.parser import parse as duparse 4 | import dateutil.tz as dutz 5 | 6 | #for absolute imports to work in script mode, we need to import from the root folder 7 | opath = os.path.abspath(".") 8 | abspath = os.path.abspath(__file__) 9 | abspath = abspath[:abspath.rfind('/') + 1] 10 | os.chdir(abspath) 11 | abspath = os.path.abspath("../..") 12 | sys.path.insert(0, abspath) 13 | 14 | from burst.lcloud import * 15 | from burst.verbos import set_verbosity 16 | set_verbosity(127) 17 | 18 | os.chdir(opath) 19 | print ("monitor.py pwd:", opath) 20 | 21 | def stop_instance_by_url(url, conf): 22 | print ("STOP instance with public IP", url) 23 | # print ("DEBUG", os.path.abspath('.'), conf.secret) 24 | node = get_server(url=url, conf=conf) 25 | if not node: 26 | print ("No active instance found for IP", url) 27 | else: 28 | print ("shutting down node %s" % node) 29 | stop_server(node) 30 | 31 | def check_jupyter(port = 8888): 32 | now = datetime.datetime.utcnow().replace(tzinfo=dutz.tzutc()) # bah humbug 33 | # print("NOW:", now) 34 | recent = datetime.datetime(2021, 1, 6, tzinfo=dutz.tzutc()) 35 | 36 | r = requests.get(f"http://127.0.0.1:{port}/api/kernels") 37 | if r.status_code == 200: 38 | j = json.loads(r.content) 39 | # pprint(j) 40 | for k in j: 41 | if k['execution_state'] == 'busy': 42 | recent = now 43 | break 44 | last = duparse(k['last_activity']) 45 | # print("LAST:", last) 46 | if last > recent: 47 | recent = last 48 | seconds = (now - recent).total_seconds() 49 | # print(f"last activity {seconds} seconds ago") 50 | return seconds 51 | else: 52 | print("Error:", r.status_code, r.content) 53 | return False 54 | 55 | parser = argparse.ArgumentParser(description=__doc__) 56 | parser.add_argument("--ip", required=True) 57 | parser.add_argument("--provider", required=True) 58 | parser.add_argument("--access", required=True) 59 | parser.add_argument("--secret", required=True) 60 | parser.add_argument("--region", required=True) 61 | parser.add_argument("--project", default="") 62 | parser.add_argument("--jupyter_port", type=int) 63 | args = parser.parse_args() 64 | 65 | delay = 3600 # if not specified by burst 66 | print ("~~~~~~~~~~~~~~~~~~~~~~~~~~~SESSION~~~~~~~~~~~~~~~~~~~~~~~~~~") #if you have to ask 67 | shuttime = datetime.datetime.utcnow() + datetime.timedelta(seconds = delay) #default if no process running 68 | while True: 69 | now = datetime.datetime.now(dutz.tzutc()) 70 | recent = now - datetime.timedelta(seconds=15) 71 | really_busy = None 72 | #check if rsync active 73 | rsync_busy = False 74 | proc = subprocess.Popen(["ps", "ax"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 75 | lines = proc.stdout.read().strip().split(b"\n") 76 | for out in lines: 77 | out = out.decode().strip()[1:-1] #seems a docker bug; returning single-quoted json blob 78 | if not out: 79 | continue 80 | columns = out.split() 81 | if len(columns) > 4 and "rsync" in columns[4]: 82 | rsync_busy = True 83 | print ("rsync in progress") 84 | 85 | if not rsync_busy: 86 | #check for running burst processes 87 | docker_busy = False 88 | proc = subprocess.Popen(["docker", "ps", "--format='{{json .}}'"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 89 | lines = proc.stdout.read().strip().split(b"\n") 90 | max_val = -1 91 | ids = [] 92 | juport = None 93 | for out in lines: 94 | out = out.decode().strip()[1:-1] #seems a docker bug; returning single-quoted json blob 95 | # print ("OUT:", out) 96 | if not out: 97 | continue 98 | j = json.loads(out) 99 | ids.append(j['ID']) 100 | # print ("ID:", j['ID']) 101 | for x in j['Labels'].split(','): 102 | if 'burstable' in x: 103 | key, val = x.split('=') 104 | # print ("LABEL: %s = %s" % (key, val)) 105 | if key == 'ai.burstable.shutdown': 106 | docker_busy = True 107 | # print ("docker container running") 108 | val = int(val) 109 | if val == 0: 110 | val = 10000000000 111 | max_val = max(val, max_val) 112 | elif key == 'ai.burstable.jupyter': 113 | juport = val 114 | elif key == 'ai.burstable.monitor': 115 | pass 116 | else: 117 | print ("ERROR -- unknown docker label %s=%s" % (key, val)) 118 | sys.stdout.flush() 119 | 120 | if docker_busy: 121 | really_busy = False 122 | for ID in ids: 123 | # check for root processes spawned inside container -- if none, busy=False 124 | # print ("docker instance: %s PIDs:" % ID) 125 | proc = subprocess.Popen([f"docker", "exec", "-ti", ID, "ps", "ax"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 126 | lines = proc.stdout.read().strip().split(b"\n") 127 | for out in lines: 128 | cmd = out.split()[4:] 129 | cmd = b" ".join(cmd) 130 | cmd = cmd.decode() 131 | # print("CMD:", cmd, "CHK:", cmd.split()[0].lower().split('/')[-1]) 132 | if cmd.split()[0].lower().split('/')[-1] not in ["command", "ps", "bash", "fish", "sh", "tsh", 133 | "zsh"] and 'jupyter-lab' not in cmd and 'ipykernel_launcher' not in cmd and "" not in cmd: 134 | really_busy = True 135 | print ("active process: %s" % cmd) 136 | break 137 | if really_busy: 138 | break 139 | 140 | # check for tty activity 141 | proc = subprocess.Popen([f"docker", "exec", "-ti", ID, "ls", "/dev/pts"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 142 | pts = proc.stdout.read().decode('utf8').strip().split() 143 | pts = [x for x in pts if x[0].isdigit()] 144 | # print ("PTS:", pts) 145 | for pty in pts[:-1]: #last tty is ours 146 | # print ("PTY:", pty) 147 | proc = subprocess.Popen(["docker", "exec", "-ti", ID, "stat", f"/dev/pts/{pty}"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 148 | lines = proc.stdout.read().strip().split(b"\n") 149 | for out in lines: 150 | out = out.decode() 151 | columns = out.split() 152 | # print ("COLS:", columns) 153 | if len(columns) == 4 and columns[0] in ["Access:", "Modify:", "Change:"]: 154 | t = duparse(f"{columns[1]} {columns[2]} {columns[3]}") 155 | # print ("STAT:", t, recent) 156 | if t > recent: 157 | print(f"tty activity {(now-t).total_seconds()} seconds ago") 158 | really_busy = True 159 | break 160 | if really_busy: 161 | break 162 | if really_busy: 163 | break 164 | 165 | # check for jupyter activity 166 | if juport: 167 | sec = check_jupyter(juport) 168 | if sec != False and sec < 15: 169 | print (f"jupyter activity {sec} seconds ago") 170 | really_busy = True 171 | 172 | if really_busy == None: 173 | busy = rsync_busy 174 | else: 175 | busy = really_busy 176 | 177 | # print ("BUSY:", busy) 178 | 179 | if max_val >= 0: 180 | delay = max_val 181 | 182 | now = datetime.datetime.utcnow() 183 | if busy: 184 | shuttime = now + datetime.timedelta(seconds=delay) 185 | 186 | remain = (shuttime-now).total_seconds() 187 | print ("Time:", now.strftime("%Y/%m/%d %H:%M:%S utc"), "Stop:", shuttime.strftime("%Y/%m/%d %H:%M:%S utc"), "in %d sec" % remain) 188 | sys.stdout.flush() 189 | if remain < 0: 190 | print ("Proceeding to shutdown {0}".format(args.ip)) 191 | sys.stdout.flush() 192 | try: 193 | stop_instance_by_url(args.ip, vars(args)) 194 | except: 195 | print ("SHUTDOWN FAIL") 196 | os.system("pwd") 197 | os.system("ls") 198 | traceback.print_exc() 199 | sys.stdout.flush() 200 | time.sleep(999999) 201 | break 202 | 203 | time.sleep(5) 204 | -------------------------------------------------------------------------------- /burst/runrun.py: -------------------------------------------------------------------------------- 1 | import subprocess, time, os, sys, re, socket 2 | from blessings import Terminal 3 | bless_term = Terminal() 4 | 5 | MAXLINES=100 6 | 7 | def print_red(s, **kw): 8 | print (bless_term.red(s), **kw) 9 | 10 | def run(*args, **kw): 11 | # print ("DBG", args, kw) 12 | if 'showoutput' in kw: 13 | showoutput = kw['showoutput'] 14 | # print("showoutput:", showoutput) 15 | del kw['showoutput'] 16 | else: 17 | showoutput = False 18 | if 'timeout' in kw: 19 | timeout = float(kw['timeout']) 20 | if showoutput: 21 | print("running", args[0], "with timeout:", timeout, end=' ') 22 | del kw['timeout'] 23 | else: 24 | timeout = 0 25 | try: 26 | if not timeout: 27 | timeout = 10**10 28 | # print ("args:", args) 29 | proc = subprocess.Popen(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 30 | t0 = time.time() 31 | out = "" 32 | complete = False 33 | while time.time() < t0 + timeout: 34 | line = proc.stdout.readline().decode('utf8') 35 | # print ("DBG", type(out), type(line)) 36 | out += line 37 | i = 0 38 | while line != "": 39 | if showoutput: 40 | sys.stdout.write(line) 41 | i += 1 42 | if i >= MAXLINES: 43 | break 44 | line = proc.stdout.readline().decode('utf8') 45 | out += line 46 | if proc.poll() != None: 47 | complete = True 48 | #get all output 49 | line = proc.stdout.readline().decode('utf8') 50 | out += line 51 | while line != "": 52 | if showoutput: 53 | sys.stdout.write(line) 54 | sys.stdout.write(line) 55 | line = proc.stdout.readline().decode('utf8') 56 | out += line 57 | sys.stdout.flush() 58 | break 59 | ## sys.stdout.write(".") 60 | ## sys.stdout.flush() 61 | time.sleep(0.2) 62 | if not complete: 63 | proc.kill() 64 | 65 | except subprocess.CalledProcessError as e: 66 | out = e.output 67 | return out, complete 68 | 69 | # 70 | # run a git command, capture the output 71 | # 72 | def git(cmd, show=False, debug=False): 73 | if debug: 74 | print_red ("git %s" % cmd) 75 | if hasattr(cmd, "lower"): 76 | cmd = cmd.split() 77 | out, good = run(["git"] + cmd, showoutput=show) 78 | if not good: 79 | err = "ERROR -- git command did not complete" 80 | print (err, file=sys.stderr) 81 | out += "\n\n" + err 82 | return out, not good 83 | 84 | 85 | def get_branch(): 86 | out, err = git("rev-parse --abbrev-ref HEAD") 87 | return out.strip() 88 | 89 | def get_repo(): 90 | out, err = git("remote -v") 91 | return out.split()[1] 92 | 93 | def get_author(): 94 | return git("log -1 --pretty=format:'%an'")[0].strip().replace("'", "") 95 | 96 | def get_username(): 97 | return git("config --get user.name")[0].strip() 98 | 99 | def git_status(show=False, debug=False): 100 | out, err = git("status --porcelain", show=show, debug=debug) 101 | changes=0 102 | for row in out.split("\n"): 103 | row = row.strip() 104 | if not row: 105 | continue 106 | if row[:2] != "??": 107 | changes += 1 108 | return changes 109 | 110 | import subprocess as sp 111 | from threading import Thread 112 | from queue import Queue, Empty 113 | import time 114 | 115 | 116 | def test_func(s): 117 | if not s: 118 | return "" 119 | # print ("----------PARSE------------") 120 | # print (s) 121 | # print ("~~~~~~~~~~~~~~~~~") 122 | N = 7 123 | for L in s.split(): 124 | try: 125 | N = int(L.strip()) 126 | except: 127 | pass 128 | # print ("RESULT:", N) 129 | # print ("----------/PARSE------------") 130 | return "BOOM " * N 131 | 132 | 133 | def stdout_thread(o, q): 134 | def getchar(): 135 | return o.read(1) 136 | 137 | for c in iter(getchar, b''): 138 | q.put(c) 139 | o.close() 140 | 141 | 142 | def get_sub_stdout(q): 143 | r = b'' 144 | while True: 145 | try: 146 | c = q.get(False) 147 | except Empty: 148 | # print (" EMPTY") 149 | break 150 | else: 151 | # print (" DATA") 152 | r += c 153 | return r 154 | 155 | def escape_ansi(line): 156 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 157 | # ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]') 158 | return ansi_escape.sub('', line) 159 | 160 | 161 | SLEEPYTIME = .1 162 | SSH_FORCE_TIMEOUT = 30 163 | class runner: 164 | def __init__(self, cmd): 165 | self.pobj = sp.Popen(cmd.split(), stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.STDOUT) 166 | self.q = Queue() 167 | self.t = Thread(target=stdout_thread, args=(self.pobj.stdout, self.q)) 168 | self.t.daemon = True 169 | self.t.start() 170 | self.in_dat = '' 171 | self.t0 = time.time() 172 | 173 | # Use advanced machine learning algorithms to ascertain if we have a prompt: 174 | def has_prompt(self, s): # A proud moment in hell 175 | # print ("SSS:", s) 176 | if "\n" in s: 177 | s = s.split("\n")[-1] 178 | s = escape_ansi(s.strip()) 179 | i = s.find(self.prompt) 180 | if i<0 or i>12: 181 | # print ("FAIL") 182 | return False 183 | # print("PROMPT FOUND") 184 | return True 185 | 186 | # 187 | # call interact with user input, returns next process text+prompt 188 | # 189 | def interact(self, cmd=None, expect=None): 190 | if cmd != None: #typically None for first interaction to get prompt 191 | #if '', still need to write to stdin to keep rolling ball 192 | # print ("===%s==="%cmd) 193 | self.pobj.stdin.write(bytes(cmd, 'utf-8')) 194 | self.pobj.stdin.write(b'\n') 195 | try: 196 | self.pobj.stdin.flush() 197 | except: 198 | return '' 199 | self.in_dat = cmd 200 | 201 | if expect==None: 202 | expect=[] 203 | elif hasattr(expect, "lower"): 204 | expect = [expect] 205 | # print ("EXPECT:", expect) 206 | o_new = get_sub_stdout(self.q).decode('utf8') 207 | o_dat = o_new 208 | while not o_new: 209 | br = False 210 | for ex in expect: 211 | # print ("TEST:", ex, o_new, "||", ex in o_new, "|||") 212 | if ex in o_new: #additional triggers to return such as Y/n prompts 213 | br = True 214 | break 215 | if br: 216 | break 217 | o_new = get_sub_stdout(self.q).decode('utf8') 218 | o_dat += o_new 219 | time.sleep(SLEEPYTIME) 220 | # print ("DBG A") 221 | # remove echo: 222 | # if o_dat.find(self.in_dat+"\r\n")==0: 223 | # o_dat=o_dat[len(self.in_dat)+2:] 224 | return o_dat, self.has_prompt(o_dat) 225 | 226 | def first(self): 227 | o_dat = "" 228 | t0 = time.time() 229 | while True: 230 | # print (" FIRST:",o_dat) 231 | if time.time()-t0 > SSH_FORCE_TIMEOUT: 232 | return o_dat, True 233 | o_dat += get_sub_stdout(self.q).decode('utf8') 234 | spl = o_dat.rstrip().split("\n") 235 | if len(spl) >= 2 and "last login" in spl[-2].lower(): 236 | break 237 | if "timed out" in spl[-1]: 238 | return o_dat, True 239 | time.sleep(SLEEPYTIME) 240 | # print (o_dat) 241 | prompt = escape_ansi(spl[-1]) 242 | prompt.replace("\r", ']').strip() 243 | i = prompt.find(':') 244 | if i > 0: 245 | # print ("III:", i) 246 | prompt = prompt[0:i+1] 247 | self.prompt = prompt 248 | print ("PROMPT: >>>%s<<<" % prompt) 249 | sys.stdout.flush() 250 | return o_dat, False 251 | 252 | def exit(self): 253 | self.pobj.stdin.write(bytes('exit', 'utf-8')) 254 | self.pobj.stdin.write(b'\n') 255 | time.sleep(2) 256 | o_new = get_sub_stdout(self.q).decode('utf8') 257 | print (o_new) 258 | sys.stdout.flush() 259 | 260 | if __name__=="__main__": 261 | cmd = "ssh -tt -4 localhost" 262 | # cmd = "echoz foo" 263 | print (cmd, end="\n\n") 264 | run = runner(cmd) 265 | o = run.first() #get initial startup spam + prompt 266 | print (o) 267 | run.interact("pwd") 268 | run.exit() 269 | print ("DONE") 270 | 271 | # if __name__ == "__main__": 272 | # print("test run.py") 273 | # cmd = sys.argv[1:] 274 | # s, err = run(cmd, timeout=10, showoutput=False) 275 | # print("output----------\n", s) 276 | # print("end output------") 277 | # print("completed:", err) 278 | -------------------------------------------------------------------------------- /burst/verbos.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | VERBOSITY = 0 4 | LIMIT = 100 5 | 6 | def set_verbosity(v): 7 | global VERBOSITY 8 | VERBOSITY = v 9 | 10 | def get_verbosity(): 11 | return VERBOSITY 12 | 13 | def _vprint_nolf(*args, **kw): 14 | # return 15 | if 'file' not in kw: 16 | file = sys.stdout 17 | sarg = "" 18 | for arg in args: 19 | sarg += (str(arg) + " ").rstrip() 20 | s =sarg[:LIMIT] 21 | s += " " * (LIMIT - len(s)) 22 | s = s.replace("\n", "\r") 23 | s += "\r" 24 | file.write("burst: %s" % s) 25 | 26 | def v0print(*args, **kw): 27 | if VERBOSITY >=0: 28 | print(*args, **kw) 29 | 30 | def vprint(*args, **kw): 31 | if VERBOSITY == 0: 32 | _vprint_nolf(*args, **kw) 33 | elif VERBOSITY > 0 and VERBOSITY & 1: 34 | print (*args, **kw) 35 | 36 | def vvprint(*args, **kw): 37 | if VERBOSITY > 0 and VERBOSITY & 2: 38 | print (*args, **kw) 39 | 40 | #docker verbosity 41 | def get_piper(): 42 | if VERBOSITY < 3: 43 | return ">/dev/null 2>/dev/null" 44 | elif VERBOSITY & 8: 45 | return "" 46 | elif VERBOSITY & 4: 47 | return ">/dev/null" 48 | return "" 49 | 50 | def get_dockrunflags(): 51 | if VERBOSITY > 0 and VERBOSITY & 128: 52 | return "-ti" #run in foreground 53 | return "-d" 54 | 55 | #rsync verbosity 56 | def get_rsync_v(): 57 | if VERBOSITY > 0 and VERBOSITY & 16: 58 | return "v --progress" 59 | if VERBOSITY > 0 and VERBOSITY & 32: 60 | return "vv --progress" 61 | if VERBOSITY > 0 and VERBOSITY & 64: 62 | return "vvv --progress" 63 | if VERBOSITY >= 1: 64 | return " --progress" 65 | return "" -------------------------------------------------------------------------------- /burst/version.py: -------------------------------------------------------------------------------- 1 | version="1.2.1" 2 | -------------------------------------------------------------------------------- /burst_examples/cifar10/.burstignore: -------------------------------------------------------------------------------- 1 | venv 2 | .* 3 | __pycache__ 4 | cifar-10-batches-py 5 | cifar-10-python.tar.gz 6 | 7 | -------------------------------------------------------------------------------- /burst_examples/cifar10/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | !/requirements.txt 4 | -------------------------------------------------------------------------------- /burst_examples/cifar10/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM burstableai/burst_base:ubu2004 2 | 3 | COPY requirements.txt requirements.txt 4 | RUN pip3 install -r ./requirements.txt 5 | 6 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /burst_examples/cifar10/cifar_data_tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision as tv 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | 8 | import ml_tools as ml 9 | 10 | #-------------------------------------------- 11 | # Access and define the CIFAR-10 dataset 12 | #-------------------------------------------- 13 | 14 | def load_CIFAR10_data(verbose=1): 15 | ''' 16 | Define the dataset we are going to use, included data transformers. 17 | Download the data if we haven't already. 18 | ''' 19 | 20 | if verbose > 0: 21 | print('Loading CIFAR dataset...') 22 | 23 | # Define image transforms to use for data augmentation when loading 24 | # NOTE: some of these are much slower than others! 25 | augment_trans = tv.transforms.Compose([tv.transforms.RandomHorizontalFlip(p=0.5), 26 | tv.transforms.RandomRotation(degrees=15), 27 | # tv.transforms.ColorJitter(brightness=0.2, contrast=0.2, 28 | # saturation=0.2, hue=0.2), 29 | # tv.transforms.RandomCrop(32,4), 30 | # tv.transforms.RandomAffine(0, translate=(0.1,0.1)), 31 | # tv.transforms.RandomPerspective(), 32 | tv.transforms.ToTensor() 33 | ]) 34 | # Load train/test dataset 35 | train_dataset = tv.datasets.CIFAR10( 36 | root='.', 37 | train=True, 38 | transform=augment_trans, 39 | download=True 40 | ) 41 | test_dataset = tv.datasets.CIFAR10( 42 | root='.', 43 | train=False, 44 | transform=tv.transforms.ToTensor(), # no augmentation for test set 45 | download=True 46 | ) 47 | 48 | # Define the mapping of target labels (0-9) onto more descriptive strings for CIFAR10 49 | label_dict = {0: 'Airplane', 1: 'Automobile', 2: 'Bird', 3: 'Cat', 4: 'Deer', 50 | 5: 'Dog', 6: 'Frog', 7: 'Horse', 8: 'Ship', 9: 'Truck'} 51 | 52 | # Give use back some information about the dataset we've just loaded 53 | if verbose > 1: 54 | print(' Training data shape: {}'.format(train_dataset.data.shape)) 55 | print(' Test data shape: {}'.format(test_dataset.data.shape)) 56 | print(' Data min,max values: {},{}'.format(train_dataset.data.min(), train_dataset.data.max())) 57 | label_str = ', '.join(['{}:{}'.format(k,v) for k,v in label_dict.items()]) 58 | print(' Data labels ({} categories): {}'.format(len(label_dict), label_str)) 59 | 60 | return train_dataset, test_dataset, label_dict 61 | 62 | #------------------------------------------------------- 63 | # Define the data loaders we will use to train a model 64 | #------------------------------------------------------ 65 | 66 | def get_dataloaders(train_dataset, test_dataset, batch_size=256): 67 | ''' 68 | Create our data loaders. Use batches for batch gradient descent so we aren't 69 | trying to load all training images into memory at the same time! 70 | ''' 71 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 72 | batch_size=batch_size, 73 | shuffle=True) 74 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 75 | batch_size=batch_size, 76 | shuffle=False) 77 | return train_loader, test_loader 78 | 79 | 80 | 81 | #-------------------------------------------- 82 | # Make a plot of some example training data 83 | #-------------------------------------------- 84 | 85 | def display_example_data(train_dataset, label_dict, 86 | filename=None): 87 | ''' 88 | Plot one example from each of the training data categories. 89 | Use this to confirm label assignments and to see what we are up against! 90 | ''' 91 | fig = plt.figure(figsize=(16,8)) 92 | axs = fig.subplots(2,5) 93 | for i,l in enumerate(list(label_dict.keys())): 94 | ind = list(train_dataset.targets).index(l) # just get the first example for each label 95 | im = train_dataset.data[ind] 96 | # Show an example image 97 | axs[i//5,i%5].imshow(im.reshape([32,32,3])) 98 | axs[i//5,i%5].set_title('Label={}: {}'.format(l,label_dict[l])) 99 | if filename: 100 | plt.savefig(filename) 101 | else: 102 | plt.show() 103 | return 104 | 105 | 106 | #------------------------------------------------- 107 | # Make a plot of examples with their predictions 108 | #------------------------------------------------- 109 | 110 | def display_pred_examples(example_indices, test_dataset, 111 | test_targets, test_predictions, 112 | label_dict, 113 | filename=None): 114 | ''' 115 | Given a set of indices for the objects we want to see examples of 116 | (e.g., "correct" predictions or "wrong" predictions), plot a 117 | set of images for inspection. 118 | ''' 119 | fig = plt.figure(figsize=(15,15)) 120 | axs = fig.subplots(4,4) 121 | for i in range(16): 122 | idx = random.sample(list(example_indices), 1)[0] 123 | im = test_dataset.data[idx] 124 | axs[i//4,i%4].imshow(im.reshape([32,32,3])) 125 | axs[i//4,i%4].set_title('True={} Pred={}'.format(label_dict[test_targets[idx]], 126 | label_dict[test_predictions[idx]]), 127 | fontsize='small') 128 | if filename: 129 | plt.savefig(filename) 130 | else: 131 | plt.show() 132 | return 133 | 134 | 135 | -------------------------------------------------------------------------------- /burst_examples/cifar10/ml_tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from datetime import datetime 4 | from sklearn.metrics import confusion_matrix 5 | import matplotlib.pyplot as plt 6 | import itertools 7 | 8 | #-------------------------------------------- 9 | # Training loop --- all NN are the same! 10 | #-------------------------------------------- 11 | 12 | def train(train_loader, model, optimizer, criterion, 13 | n_epochs=10, test_loader=None, device=None, 14 | reshape_inp=None, reshape_targ=None, 15 | verbose=1, print_every=1): 16 | ''' 17 | Train a Neural Net model (all data are the same!) 18 | ''' 19 | 20 | if device is not None: model = model.to(device) 21 | 22 | t0 = datetime.now() 23 | if verbose: print('Training NN through {} epochs. Start time: {}'.format(n_epochs, t0)) 24 | train_losses, test_losses = [], [] 25 | 26 | for e in range(n_epochs): 27 | t0 = datetime.now() 28 | train_loss = [] 29 | 30 | for inputs, targets in train_loader: 31 | # move data to GPU (in batches!) and reshape 32 | if device is not None: inputs, targets = inputs.to(device), targets.to(device) 33 | if reshape_inp is not None: 34 | inputs = reshape_inp(inputs) 35 | if reshape_targ is not None: 36 | targets = reshape_targ(targets) 37 | 38 | # zero the parameter gradients 39 | optimizer.zero_grad() 40 | 41 | # Forward pass 42 | outputs = model(inputs) 43 | loss = criterion(outputs, targets) 44 | train_loss.append(loss.item()) # store losses 45 | 46 | # Backward pass and optimize 47 | loss.backward() 48 | optimizer.step() 49 | 50 | # Average loss for this epoch 51 | train_losses.append(np.mean(train_loss)) 52 | 53 | # Get test losses for tracking 54 | if test_loader is not None: 55 | test_loss = [] 56 | for test_inp, test_targ in test_loader: 57 | if device is not None: test_inp, test_targ = test_inp.to(device), test_targ.to(device) 58 | if reshape_inp is not None: 59 | test_inp = reshape_inp(test_inp) 60 | if reshape_targ is not None: 61 | test_targ = reshape_targ(test_targ) 62 | test_out = model(test_inp) 63 | test_loss.append(criterion(test_out, test_targ).item()) 64 | test_losses.append(np.mean(test_loss)) 65 | 66 | if verbose and e % print_every == 0: 67 | string = ' Iteration {:3d}, avg train_loss = {:2.3f}, '.format(e, np.mean(train_loss)) 68 | if test_loader is not None: 69 | string += 'avg test_loss = {:2.3f},'.format(np.mean(test_loss)) 70 | string += '1 epoch duration: {}'.format(datetime.now()-t0) 71 | print(string) 72 | if verbose: print(' Done training.') 73 | 74 | return train_losses, test_losses 75 | 76 | #-------------------------------------------- 77 | # Plot losses to an output file 78 | #-------------------------------------------- 79 | 80 | def plot_losses(train_losses, test_losses, filename=None): 81 | fig = plt.figure(figsize=(7,7)) 82 | plt.plot(train_losses, label='train') 83 | plt.plot(test_losses, label='test') 84 | plt.legend() 85 | plt.xlabel('Epoch') 86 | plt.ylabel('Loss') 87 | if filename: 88 | plt.savefig(filename) 89 | else: 90 | plt.show() 91 | return 92 | 93 | #-------------------------------------------- 94 | # Predict --- all NN are the same! 95 | #-------------------------------------------- 96 | 97 | def predict(loader, model, 98 | binaryclass=False, multiclass=False, 99 | device=None, 100 | reshape_inp=None, reshape_targ=None): 101 | 102 | ''' 103 | Iterate through the specified dataloader, apply the model, 104 | and return the full set of predictions and targets 105 | ''' 106 | 107 | if device is None: model.to(device) 108 | 109 | all_targets, all_predictions = [], [] 110 | 111 | for inputs, targets in loader: 112 | if device is not None: inputs, targets = inputs.to(device), targets.to(device) 113 | if reshape_inp is not None: 114 | inputs = reshape_inp(inputs) 115 | if reshape_targ is not None: 116 | targets = reshape_targ(targets) 117 | outputs = model(inputs) 118 | if multiclass: 119 | max_values, predictions = torch.max(outputs, 1) 120 | max_values = max_values.cpu().detach().numpy() 121 | predictions = predictions.cpu().detach().numpy() 122 | elif binaryclass: 123 | predictions = outputs.detach().numpy().flatten() > 0 124 | else: 125 | predictions = outputs.detach().numpy().flatten() 126 | all_targets.extend(targets.cpu().numpy()) 127 | all_predictions.extend(predictions) 128 | 129 | return all_predictions, all_targets 130 | 131 | #-------------------------------------------- 132 | # Model accuracy 133 | #-------------------------------------------- 134 | 135 | def get_accuracy(train_predictions, train_targets, 136 | test_predictions, test_targets, verbose=1): 137 | ''' 138 | * Compute and display model accuracies. 139 | ''' 140 | train_acc = np.sum(np.array(train_predictions)==np.array(train_targets)) / len(train_targets) 141 | test_acc = np.sum(np.array(test_predictions)==np.array(test_targets)) / len(test_targets) 142 | if verbose: 143 | print('---------------------------------------') 144 | print('Training set accuracy: {:5.4f}'.format(train_acc)) 145 | print(' Test set accuracy: {:5.4f}'.format(test_acc)) 146 | return 147 | 148 | #------------------------------------------------- 149 | # Indices of "correct" and "wrong" predictions 150 | #------------------------------------------------- 151 | 152 | def get_correct_wrong(test_predictions, test_targets, 153 | verbose=1): 154 | ''' 155 | * Get the indices into the test dataset for all the images with 156 | correct predictions, and with wrong predictions. 157 | ''' 158 | wrong = np.where(np.array(test_targets) != np.array(test_predictions))[0] 159 | correct = np.where(np.array(test_targets) == np.array(test_predictions))[0] 160 | if verbose: 161 | print('------------- Test Set: ---------------') 162 | print('# Correct predictions: {}'.format(len(correct))) 163 | print(' # Wrong predictions: {}'.format(len(wrong))) 164 | print('---------------------------------------') 165 | return correct, wrong 166 | 167 | 168 | #-------------------------------------------- 169 | # Confusion matrix 170 | #-------------------------------------------- 171 | 172 | def plot_confusion_matrix(targets, predictions, 173 | labels=None, normalize=False, log_color=True, 174 | title='Confusion matrix', cmap=plt.cm.Blues, 175 | filename=None): 176 | ''' 177 | Make a nice color-scale plot of the confusion matrix, 178 | using the targets and predictions. 179 | ''' 180 | fig = plt.figure(figsize=(7,7)) 181 | plt.rcParams.update({'font.size': 14}) 182 | cm = confusion_matrix(targets, predictions) 183 | if normalize: 184 | cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis] 185 | 186 | if log_color: 187 | plt.imshow(np.log10(np.clip(cm,1,cm.max())), # floor at 1 so log is defined 188 | interpolation='nearest', cmap=cmap) 189 | else: 190 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 191 | plt.title(title) 192 | cbar = plt.colorbar() 193 | if log_color: cbar.set_label('log N') 194 | if labels: 195 | tick_marks = list(labels.keys()) 196 | rotation = 90 if max([len(v) for v in labels.values()]) > 2 else 0 197 | plt.xticks(tick_marks, list(labels.values()), rotation=rotation, ha='center') 198 | plt.yticks(tick_marks, list(labels.values())) 199 | 200 | fmt = '.2f' if normalize else 'd' 201 | thresh = cm.max() / 2. 202 | for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 203 | plt.text(j,i, format(cm[i,j], fmt), 204 | horizontalalignment='center', 205 | color='white' if cm[i,j] > thresh else 'black') 206 | plt.tight_layout() 207 | plt.ylabel('True Label') 208 | plt.xlabel('Predicted Label') 209 | if filename: 210 | plt.savefig(filename) 211 | else: 212 | plt.show() 213 | return 214 | 215 | #-------------------------------------------- 216 | # Precision / Recall / F1 217 | #-------------------------------------------- 218 | 219 | def precision_recall(targets, predictions, normalize=False): 220 | ''' 221 | Display the precision and recall data for binary outcome predictions 222 | ''' 223 | tarr = np.array(targets,dtype=int) 224 | parr = np.array(predictions,dtype=int) 225 | true_pos = np.where(tarr*parr)[0] 226 | false_pos = np.where((1-tarr)*parr)[0] 227 | false_neg = np.where(tarr*(1-parr))[0] 228 | true_neg = np.where((1-tarr)*(1-parr))[0] 229 | 230 | n_tp = len(true_pos) 231 | n_fp = len(false_pos) 232 | n_fn = len(false_neg) 233 | n_tn = len(true_neg) 234 | 235 | f_tp = n_tp / len(targets) 236 | f_fp = n_fp / len(targets) 237 | f_fn = n_fn / len(targets) 238 | f_tn = n_tn / len(targets) 239 | 240 | # print(' True positives: {}'.format(n_tp)) 241 | # print('False positives: {}'.format(n_fp)) 242 | # print('False negatives: {}'.format(n_fn)) 243 | # print(' True negatives: {}'.format(n_tn)) 244 | 245 | if not normalize: 246 | print() 247 | print(' | Truth ') 248 | print(' | Yes No') 249 | print('--------------------------------') 250 | print('Pred: Yes | {} {}'.format(n_tp,n_fp)) 251 | print('Pred: No | {} {}'.format(n_fn,n_tn)) 252 | print() 253 | 254 | if normalize: 255 | print() 256 | print(' | Truth ') 257 | print(' | Yes No') 258 | print('--------------------------------') 259 | print('Pred: Yes | {:4.2f} {:4.2f}'.format(f_tp,f_fp)) 260 | print('Pred: No | {:4.2f} {:4.2f}'.format(f_fn,f_tn)) 261 | print() 262 | 263 | precision = n_tp / (n_tp + n_fp) 264 | recall = n_tp / (n_tp + n_fn) 265 | f1 = 2 * (precision * recall) / (precision + recall) 266 | print(' Precision: {:4.2f} (Purity)'.format(precision)) 267 | print(' Recall: {:4.2f} (Completeness)'.format(recall)) 268 | print(' F1: {:4.2f}'.format(f1)) 269 | 270 | return -------------------------------------------------------------------------------- /burst_examples/cifar10/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.8.1 2 | torchvision==0.9.1 3 | torchsummary==1.5.1 4 | numpy==1.20.2 5 | matplotlib==3.4.1 6 | sklearn==0.0 7 | jupyterlab==3.0.14 8 | tee==0.0.3 9 | -------------------------------------------------------------------------------- /burst_examples/cifar10/trainCNN_CIFAR10.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchsummary import summary 5 | 6 | import argparse 7 | import subprocess, os, sys 8 | 9 | import cifar_data_tools as dt 10 | import ml_tools as ml 11 | 12 | OUTPUT_DIR = 'output/' 13 | LOGFILE = OUTPUT_DIR + 'model_log.txt' 14 | 15 | #-------------------------------------------- 16 | # Define the Convolutional Neural Net 17 | #-------------------------------------------- 18 | 19 | class CNN(nn.Module): 20 | def __init__(self, K): 21 | super(CNN, self).__init__() 22 | self.conv1 = nn.Sequential( 23 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), 24 | nn.ReLU(), 25 | nn.BatchNorm2d(32), 26 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1), 27 | nn.ReLU(), 28 | nn.BatchNorm2d(32), 29 | nn.MaxPool2d(2) 30 | ) 31 | self.conv2 = nn.Sequential( 32 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), 33 | nn.ReLU(), 34 | nn.BatchNorm2d(64), 35 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), 36 | nn.ReLU(), 37 | nn.BatchNorm2d(64), 38 | nn.MaxPool2d(2) 39 | ) 40 | self.conv3 = nn.Sequential( 41 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), 42 | nn.ReLU(), 43 | nn.BatchNorm2d(128), 44 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), 45 | nn.ReLU(), 46 | nn.BatchNorm2d(128), 47 | nn.MaxPool2d(2) 48 | ) 49 | self.fc1 = nn.Linear(128 * 4 * 4, 1024) 50 | self.fc2 = nn.Linear(1024, K) 51 | def forward(self,x): 52 | x = self.conv1(x) 53 | x = self.conv2(x) 54 | x = self.conv3(x) 55 | x = x.view(x.size(0),-1) 56 | x = F.dropout(x, p=0.5) 57 | x = F.relu(self.fc1(x)) 58 | x = F.dropout(x, p=0.2) 59 | x = self.fc2(x) 60 | return x 61 | def summarize(self): 62 | # Print summary of model 63 | summary(self, (3, 32, 32)) 64 | return 65 | 66 | #---------------------------------------------------------- 67 | # Train the model and see how we do! 68 | #---------------------------------------------------------- 69 | 70 | def main(): 71 | 72 | # Get command-line arguments 73 | parser = argparse.ArgumentParser(description=__doc__) 74 | parser.add_argument('--verbose', type=int, default=1, 75 | help='Specify level of verbosity: 0=none, 1=default, 2=extra-verbose') 76 | parser.add_argument('--nepochs', type=int, default=2, 77 | help='Specify the number of training epochs') 78 | args = parser.parse_args() 79 | 80 | # Check for output directory to store plots 81 | if not os.path.isdir(OUTPUT_DIR): 82 | os.system('mkdir '+OUTPUT_DIR) 83 | 84 | # Set up logging 85 | if args.verbose: 86 | tee = subprocess.Popen(["tee", LOGFILE], stdin=subprocess.PIPE) 87 | os.dup2(tee.stdin.fileno(), sys.stdout.fileno()) 88 | os.dup2(tee.stdin.fileno(), sys.stderr.fileno()) 89 | 90 | 91 | # Load train/test dataset 92 | train_dataset, test_dataset, label_dict = dt.load_CIFAR10_data(verbose=args.verbose) 93 | # Plot one example image from each category 94 | dt.display_example_data(train_dataset, label_dict, 95 | filename=OUTPUT_DIR+'training_example_images.png') 96 | 97 | # Construct the model 98 | model = CNN(len(label_dict)) 99 | 100 | # Check to see if GPU is available and move model to GPU if it is 101 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 102 | if args.verbose: 103 | print('GPU is available?: {}'.format(torch.cuda.is_available())) 104 | print('Using device: {}'.format(device)) 105 | model.to(device) 106 | 107 | # Define our loss function and optimizer 108 | criterion = nn.CrossEntropyLoss() 109 | optimizer = torch.optim.Adam(model.parameters()) 110 | 111 | # Use batch gradient descent; we don't want to load the whole dataset into memory all at once! 112 | batch_size = 128 113 | train_loader, test_loader = dt.get_dataloaders(train_dataset, test_dataset, 114 | batch_size=batch_size) 115 | 116 | # Execute the training loop 117 | train_losses, test_losses = ml.train(train_loader, model, optimizer, criterion, 118 | device=device, 119 | n_epochs=args.nepochs, test_loader=test_loader, 120 | verbose=bool(args.verbose), print_every=1) 121 | 122 | # Plot loss per epoch to output directory 123 | ml.plot_losses(train_losses, test_losses, filename=OUTPUT_DIR+'model_losses.png') 124 | 125 | # Get model predictions 126 | train_predictions, train_targets = ml.predict(train_loader, model, device=device, 127 | multiclass=True) 128 | test_predictions, test_targets = ml.predict(test_loader, model, device=device, 129 | multiclass=True) 130 | 131 | # Get overall accuracy 132 | ml.get_accuracy(train_predictions, train_targets, 133 | test_predictions, test_targets, 134 | verbose=args.verbose) 135 | 136 | # Plot examples of images we got wrong 137 | correct, wrong = ml.get_correct_wrong(test_predictions, test_targets, verbose=args.verbose) 138 | dt.display_pred_examples(wrong, test_dataset, test_targets, test_predictions, 139 | label_dict, filename=OUTPUT_DIR+'wrong_examples.png') 140 | 141 | # Make the confusion_matrix 142 | ml.plot_confusion_matrix(test_targets, test_predictions, labels=label_dict, log_color=True, 143 | filename=OUTPUT_DIR+'confusion_matrix.png') 144 | 145 | # Plot model summary info 146 | model.summarize() 147 | 148 | # Flush logging 149 | if args.verbose: 150 | print("\nstdout flushed", flush=True) 151 | print("stderr flushed", file=sys.stderr, flush=True) 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/.burstignore: -------------------------------------------------------------------------------- 1 | venv 2 | .* 3 | __pycache__ 4 | src 5 | cifar-10-batches-py 6 | cifar-10-python.tar.gz 7 | 8 | -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | !/requirements.txt 4 | -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM burstableai/burst_base:ubu2004 2 | 3 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh 4 | RUN bash ~/miniconda.sh -b -p $HOME/miniconda 5 | 6 | ENV PATH "/root/miniconda/bin/:$PATH$" 7 | ENV PYTHONPATH "/home/burst/work" 8 | 9 | RUN conda install -c conda-forge numpy matplotlib scikit-learn pytorch-gpu torchvision jupyterlab 10 | RUN pip install torchsummary gputil 11 | 12 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/cifar_data_tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision as tv 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | 8 | import ml_tools as ml 9 | 10 | #-------------------------------------------- 11 | # Access and define the CIFAR-10 dataset 12 | #-------------------------------------------- 13 | 14 | def load_CIFAR10_data(verbose=1): 15 | ''' 16 | Define the dataset we are going to use, included data transformers. 17 | Download the data if we haven't already. 18 | ''' 19 | 20 | if verbose > 0: 21 | print('Loading CIFAR dataset...') 22 | 23 | # Define image transforms to use for data augmentation when loading 24 | # NOTE: some of these are much slower than others! 25 | augment_trans = tv.transforms.Compose([tv.transforms.RandomHorizontalFlip(p=0.5), 26 | tv.transforms.RandomRotation(degrees=15), 27 | # tv.transforms.ColorJitter(brightness=0.2, contrast=0.2, 28 | # saturation=0.2, hue=0.2), 29 | # tv.transforms.RandomCrop(32,4), 30 | # tv.transforms.RandomAffine(0, translate=(0.1,0.1)), 31 | # tv.transforms.RandomPerspective(), 32 | tv.transforms.ToTensor() 33 | ]) 34 | # Load train/test dataset 35 | train_dataset = tv.datasets.CIFAR10( 36 | root='.', 37 | train=True, 38 | transform=augment_trans, 39 | download=True 40 | ) 41 | test_dataset = tv.datasets.CIFAR10( 42 | root='.', 43 | train=False, 44 | transform=tv.transforms.ToTensor(), # no augmentation for test set 45 | download=True 46 | ) 47 | 48 | # Define the mapping of target labels (0-9) onto more descriptive strings for CIFAR10 49 | label_dict = {0: 'Airplane', 1: 'Automobile', 2: 'Bird', 3: 'Cat', 4: 'Deer', 50 | 5: 'Dog', 6: 'Frog', 7: 'Horse', 8: 'Ship', 9: 'Truck'} 51 | 52 | # Give use back some information about the dataset we've just loaded 53 | if verbose > 1: 54 | print(' Training data shape: {}'.format(train_dataset.data.shape)) 55 | print(' Test data shape: {}'.format(test_dataset.data.shape)) 56 | print(' Data min,max values: {},{}'.format(train_dataset.data.min(), train_dataset.data.max())) 57 | label_str = ', '.join(['{}:{}'.format(k,v) for k,v in label_dict.items()]) 58 | print(' Data labels ({} categories): {}'.format(len(label_dict), label_str)) 59 | 60 | return train_dataset, test_dataset, label_dict 61 | 62 | #------------------------------------------------------- 63 | # Define the data loaders we will use to train a model 64 | #------------------------------------------------------ 65 | 66 | def get_dataloaders(train_dataset, test_dataset, batch_size=256): 67 | ''' 68 | Create our data loaders. Use batches for batch gradient descent so we aren't 69 | trying to load all training images into memory at the same time! 70 | ''' 71 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 72 | batch_size=batch_size, 73 | shuffle=True) 74 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 75 | batch_size=batch_size, 76 | shuffle=False) 77 | return train_loader, test_loader 78 | 79 | 80 | 81 | #-------------------------------------------- 82 | # Make a plot of some example training data 83 | #-------------------------------------------- 84 | 85 | def display_example_data(train_dataset, label_dict, 86 | filename=None): 87 | ''' 88 | Plot one example from each of the training data categories. 89 | Use this to confirm label assignments and to see what we are up against! 90 | ''' 91 | fig = plt.figure(figsize=(16,8)) 92 | axs = fig.subplots(2,5) 93 | for i,l in enumerate(list(label_dict.keys())): 94 | ind = list(train_dataset.targets).index(l) # just get the first example for each label 95 | im = train_dataset.data[ind] 96 | # Show an example image 97 | axs[i//5,i%5].imshow(im.reshape([32,32,3])) 98 | axs[i//5,i%5].set_title('Label={}: {}'.format(l,label_dict[l])) 99 | if filename: 100 | plt.savefig(filename) 101 | else: 102 | plt.show() 103 | return 104 | 105 | 106 | #------------------------------------------------- 107 | # Make a plot of examples with their predictions 108 | #------------------------------------------------- 109 | 110 | def display_pred_examples(example_indices, test_dataset, 111 | test_targets, test_predictions, 112 | label_dict, 113 | filename=None): 114 | ''' 115 | Given a set of indices for the objects we want to see examples of 116 | (e.g., "correct" predictions or "wrong" predictions), plot a 117 | set of images for inspection. 118 | ''' 119 | fig = plt.figure(figsize=(15,15)) 120 | axs = fig.subplots(4,4) 121 | for i in range(16): 122 | idx = random.sample(list(example_indices), 1)[0] 123 | im = test_dataset.data[idx] 124 | axs[i//4,i%4].imshow(im.reshape([32,32,3])) 125 | axs[i//4,i%4].set_title('True={} Pred={}'.format(label_dict[test_targets[idx]], 126 | label_dict[test_predictions[idx]]), 127 | fontsize='small') 128 | if filename: 129 | plt.savefig(filename) 130 | else: 131 | plt.show() 132 | return 133 | 134 | 135 | -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/ml_tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from datetime import datetime 4 | from sklearn.metrics import confusion_matrix 5 | import matplotlib.pyplot as plt 6 | import itertools 7 | 8 | #-------------------------------------------- 9 | # Training loop --- all NN are the same! 10 | #-------------------------------------------- 11 | 12 | def train(train_loader, model, optimizer, criterion, 13 | n_epochs=10, test_loader=None, device=None, 14 | reshape_inp=None, reshape_targ=None, 15 | verbose=1, print_every=1): 16 | ''' 17 | Train a Neural Net model (all data are the same!) 18 | ''' 19 | 20 | if device is not None: model = model.to(device) 21 | 22 | t0 = datetime.now() 23 | if verbose: print('Training NN through {} epochs. Start time: {}'.format(n_epochs, t0)) 24 | train_losses, test_losses = [], [] 25 | 26 | for e in range(n_epochs): 27 | t0 = datetime.now() 28 | train_loss = [] 29 | 30 | for inputs, targets in train_loader: 31 | # move data to GPU (in batches!) and reshape 32 | if device is not None: inputs, targets = inputs.to(device), targets.to(device) 33 | if reshape_inp is not None: 34 | inputs = reshape_inp(inputs) 35 | if reshape_targ is not None: 36 | targets = reshape_targ(targets) 37 | 38 | # zero the parameter gradients 39 | optimizer.zero_grad() 40 | 41 | # Forward pass 42 | outputs = model(inputs) 43 | loss = criterion(outputs, targets) 44 | train_loss.append(loss.item()) # store losses 45 | 46 | # Backward pass and optimize 47 | loss.backward() 48 | optimizer.step() 49 | 50 | # Average loss for this epoch 51 | train_losses.append(np.mean(train_loss)) 52 | 53 | # Get test losses for tracking 54 | if test_loader is not None: 55 | test_loss = [] 56 | for test_inp, test_targ in test_loader: 57 | if device is not None: test_inp, test_targ = test_inp.to(device), test_targ.to(device) 58 | if reshape_inp is not None: 59 | test_inp = reshape_inp(test_inp) 60 | if reshape_targ is not None: 61 | test_targ = reshape_targ(test_targ) 62 | test_out = model(test_inp) 63 | test_loss.append(criterion(test_out, test_targ).item()) 64 | test_losses.append(np.mean(test_loss)) 65 | 66 | if verbose and e % print_every == 0: 67 | string = ' Iteration {:3d}, avg train_loss = {:2.3f}, '.format(e, np.mean(train_loss)) 68 | if test_loader is not None: 69 | string += 'avg test_loss = {:2.3f},'.format(np.mean(test_loss)) 70 | string += '1 epoch duration: {}'.format(datetime.now()-t0) 71 | print(string) 72 | if verbose: print(' Done training.') 73 | 74 | return train_losses, test_losses 75 | 76 | #-------------------------------------------- 77 | # Plot losses to an output file 78 | #-------------------------------------------- 79 | 80 | def plot_losses(train_losses, test_losses, filename=None): 81 | fig = plt.figure(figsize=(7,7)) 82 | plt.plot(train_losses, label='train') 83 | plt.plot(test_losses, label='test') 84 | plt.legend() 85 | plt.xlabel('Epoch') 86 | plt.ylabel('Loss') 87 | if filename: 88 | plt.savefig(filename) 89 | else: 90 | plt.show() 91 | return 92 | 93 | #-------------------------------------------- 94 | # Predict --- all NN are the same! 95 | #-------------------------------------------- 96 | 97 | def predict(loader, model, 98 | binaryclass=False, multiclass=False, 99 | device=None, 100 | reshape_inp=None, reshape_targ=None): 101 | 102 | ''' 103 | Iterate through the specified dataloader, apply the model, 104 | and return the full set of predictions and targets 105 | ''' 106 | 107 | if device is None: model.to(device) 108 | 109 | all_targets, all_predictions = [], [] 110 | 111 | for inputs, targets in loader: 112 | if device is not None: inputs, targets = inputs.to(device), targets.to(device) 113 | if reshape_inp is not None: 114 | inputs = reshape_inp(inputs) 115 | if reshape_targ is not None: 116 | targets = reshape_targ(targets) 117 | outputs = model(inputs) 118 | if multiclass: 119 | max_values, predictions = torch.max(outputs, 1) 120 | max_values = max_values.cpu().detach().numpy() 121 | predictions = predictions.cpu().detach().numpy() 122 | elif binaryclass: 123 | predictions = outputs.detach().numpy().flatten() > 0 124 | else: 125 | predictions = outputs.detach().numpy().flatten() 126 | all_targets.extend(targets.cpu().numpy()) 127 | all_predictions.extend(predictions) 128 | 129 | return all_predictions, all_targets 130 | 131 | #-------------------------------------------- 132 | # Model accuracy 133 | #-------------------------------------------- 134 | 135 | def get_accuracy(train_predictions, train_targets, 136 | test_predictions, test_targets, verbose=1): 137 | ''' 138 | * Compute and display model accuracies. 139 | ''' 140 | train_acc = np.sum(np.array(train_predictions)==np.array(train_targets)) / len(train_targets) 141 | test_acc = np.sum(np.array(test_predictions)==np.array(test_targets)) / len(test_targets) 142 | if verbose: 143 | print('---------------------------------------') 144 | print('Training set accuracy: {:5.4f}'.format(train_acc)) 145 | print(' Test set accuracy: {:5.4f}'.format(test_acc)) 146 | return 147 | 148 | #------------------------------------------------- 149 | # Indices of "correct" and "wrong" predictions 150 | #------------------------------------------------- 151 | 152 | def get_correct_wrong(test_predictions, test_targets, 153 | verbose=1): 154 | ''' 155 | * Get the indices into the test dataset for all the images with 156 | correct predictions, and with wrong predictions. 157 | ''' 158 | wrong = np.where(np.array(test_targets) != np.array(test_predictions))[0] 159 | correct = np.where(np.array(test_targets) == np.array(test_predictions))[0] 160 | if verbose: 161 | print('------------- Test Set: ---------------') 162 | print('# Correct predictions: {}'.format(len(correct))) 163 | print(' # Wrong predictions: {}'.format(len(wrong))) 164 | print('---------------------------------------') 165 | return correct, wrong 166 | 167 | 168 | #-------------------------------------------- 169 | # Confusion matrix 170 | #-------------------------------------------- 171 | 172 | def plot_confusion_matrix(targets, predictions, 173 | labels=None, normalize=False, log_color=True, 174 | title='Confusion matrix', cmap=plt.cm.Blues, 175 | filename=None): 176 | ''' 177 | Make a nice color-scale plot of the confusion matrix, 178 | using the targets and predictions. 179 | ''' 180 | fig = plt.figure(figsize=(7,7)) 181 | plt.rcParams.update({'font.size': 14}) 182 | cm = confusion_matrix(targets, predictions) 183 | if normalize: 184 | cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis] 185 | 186 | if log_color: 187 | plt.imshow(np.log10(np.clip(cm,1,cm.max())), # floor at 1 so log is defined 188 | interpolation='nearest', cmap=cmap) 189 | else: 190 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 191 | plt.title(title) 192 | cbar = plt.colorbar() 193 | if log_color: cbar.set_label('log N') 194 | if labels: 195 | tick_marks = list(labels.keys()) 196 | rotation = 90 if max([len(v) for v in labels.values()]) > 2 else 0 197 | plt.xticks(tick_marks, list(labels.values()), rotation=rotation, ha='center') 198 | plt.yticks(tick_marks, list(labels.values())) 199 | 200 | fmt = '.2f' if normalize else 'd' 201 | thresh = cm.max() / 2. 202 | for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 203 | plt.text(j,i, format(cm[i,j], fmt), 204 | horizontalalignment='center', 205 | color='white' if cm[i,j] > thresh else 'black') 206 | plt.tight_layout() 207 | plt.ylabel('True Label') 208 | plt.xlabel('Predicted Label') 209 | if filename: 210 | plt.savefig(filename) 211 | else: 212 | plt.show() 213 | return 214 | 215 | #-------------------------------------------- 216 | # Precision / Recall / F1 217 | #-------------------------------------------- 218 | 219 | def precision_recall(targets, predictions, normalize=False): 220 | ''' 221 | Display the precision and recall data for binary outcome predictions 222 | ''' 223 | tarr = np.array(targets,dtype=int) 224 | parr = np.array(predictions,dtype=int) 225 | true_pos = np.where(tarr*parr)[0] 226 | false_pos = np.where((1-tarr)*parr)[0] 227 | false_neg = np.where(tarr*(1-parr))[0] 228 | true_neg = np.where((1-tarr)*(1-parr))[0] 229 | 230 | n_tp = len(true_pos) 231 | n_fp = len(false_pos) 232 | n_fn = len(false_neg) 233 | n_tn = len(true_neg) 234 | 235 | f_tp = n_tp / len(targets) 236 | f_fp = n_fp / len(targets) 237 | f_fn = n_fn / len(targets) 238 | f_tn = n_tn / len(targets) 239 | 240 | # print(' True positives: {}'.format(n_tp)) 241 | # print('False positives: {}'.format(n_fp)) 242 | # print('False negatives: {}'.format(n_fn)) 243 | # print(' True negatives: {}'.format(n_tn)) 244 | 245 | if not normalize: 246 | print() 247 | print(' | Truth ') 248 | print(' | Yes No') 249 | print('--------------------------------') 250 | print('Pred: Yes | {} {}'.format(n_tp,n_fp)) 251 | print('Pred: No | {} {}'.format(n_fn,n_tn)) 252 | print() 253 | 254 | if normalize: 255 | print() 256 | print(' | Truth ') 257 | print(' | Yes No') 258 | print('--------------------------------') 259 | print('Pred: Yes | {:4.2f} {:4.2f}'.format(f_tp,f_fp)) 260 | print('Pred: No | {:4.2f} {:4.2f}'.format(f_fn,f_tn)) 261 | print() 262 | 263 | precision = n_tp / (n_tp + n_fp) 264 | recall = n_tp / (n_tp + n_fn) 265 | f1 = 2 * (precision * recall) / (precision + recall) 266 | print(' Precision: {:4.2f} (Purity)'.format(precision)) 267 | print(' Recall: {:4.2f} (Completeness)'.format(recall)) 268 | print(' F1: {:4.2f}'.format(f1)) 269 | 270 | return -------------------------------------------------------------------------------- /burst_examples/cifar10_conda/trainCNN_CIFAR10.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchsummary import summary 5 | 6 | import argparse 7 | import subprocess, os, sys 8 | 9 | import cifar_data_tools as dt 10 | import ml_tools as ml 11 | 12 | OUTPUT_DIR = 'output/' 13 | LOGFILE = OUTPUT_DIR + 'model_log.txt' 14 | 15 | #-------------------------------------------- 16 | # Define the Convolutional Neural Net 17 | #-------------------------------------------- 18 | 19 | class CNN(nn.Module): 20 | def __init__(self, K): 21 | super(CNN, self).__init__() 22 | self.conv1 = nn.Sequential( 23 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), 24 | nn.ReLU(), 25 | nn.BatchNorm2d(32), 26 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1), 27 | nn.ReLU(), 28 | nn.BatchNorm2d(32), 29 | nn.MaxPool2d(2) 30 | ) 31 | self.conv2 = nn.Sequential( 32 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), 33 | nn.ReLU(), 34 | nn.BatchNorm2d(64), 35 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), 36 | nn.ReLU(), 37 | nn.BatchNorm2d(64), 38 | nn.MaxPool2d(2) 39 | ) 40 | self.conv3 = nn.Sequential( 41 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), 42 | nn.ReLU(), 43 | nn.BatchNorm2d(128), 44 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), 45 | nn.ReLU(), 46 | nn.BatchNorm2d(128), 47 | nn.MaxPool2d(2) 48 | ) 49 | self.fc1 = nn.Linear(128 * 4 * 4, 1024) 50 | self.fc2 = nn.Linear(1024, K) 51 | def forward(self,x): 52 | x = self.conv1(x) 53 | x = self.conv2(x) 54 | x = self.conv3(x) 55 | x = x.view(x.size(0),-1) 56 | x = F.dropout(x, p=0.5) 57 | x = F.relu(self.fc1(x)) 58 | x = F.dropout(x, p=0.2) 59 | x = self.fc2(x) 60 | return x 61 | def summarize(self): 62 | # Print summary of model 63 | summary(self, (3, 32, 32)) 64 | return 65 | 66 | #---------------------------------------------------------- 67 | # Train the model and see how we do! 68 | #---------------------------------------------------------- 69 | 70 | def main(): 71 | 72 | # Get command-line arguments 73 | parser = argparse.ArgumentParser(description=__doc__) 74 | parser.add_argument('--verbose', type=int, default=1, 75 | help='Specify level of verbosity: 0=none, 1=default, 2=extra-verbose') 76 | parser.add_argument('--nepochs', type=int, default=2, 77 | help='Specify the number of training epochs') 78 | args = parser.parse_args() 79 | 80 | # Check for output directory to store plots 81 | if not os.path.isdir(OUTPUT_DIR): 82 | os.system('mkdir '+OUTPUT_DIR) 83 | 84 | # Set up logging 85 | if args.verbose: 86 | tee = subprocess.Popen(["tee", LOGFILE], stdin=subprocess.PIPE) 87 | os.dup2(tee.stdin.fileno(), sys.stdout.fileno()) 88 | os.dup2(tee.stdin.fileno(), sys.stderr.fileno()) 89 | 90 | 91 | # Load train/test dataset 92 | train_dataset, test_dataset, label_dict = dt.load_CIFAR10_data(verbose=args.verbose) 93 | # Plot one example image from each category 94 | dt.display_example_data(train_dataset, label_dict, 95 | filename=OUTPUT_DIR+'training_example_images.png') 96 | 97 | # Construct the model 98 | model = CNN(len(label_dict)) 99 | 100 | # Check to see if GPU is available and move model to GPU if it is 101 | device = torch.device("cuda:0") if torch.cuda.is_available() else "cpu" 102 | if args.verbose: 103 | print('GPU is available?: {}'.format(torch.cuda.is_available())) 104 | print('Using device: {}'.format(device)) 105 | model.to(device) 106 | 107 | # Define our loss function and optimizer 108 | criterion = nn.CrossEntropyLoss() 109 | optimizer = torch.optim.Adam(model.parameters()) 110 | 111 | # Use batch gradient descent; we don't want to load the whole dataset into memory all at once! 112 | batch_size = 128 113 | train_loader, test_loader = dt.get_dataloaders(train_dataset, test_dataset, 114 | batch_size=batch_size) 115 | 116 | # Execute the training loop 117 | train_losses, test_losses = ml.train(train_loader, model, optimizer, criterion, 118 | device=device, 119 | n_epochs=args.nepochs, test_loader=test_loader, 120 | verbose=bool(args.verbose), print_every=1) 121 | 122 | # Plot loss per epoch to output directory 123 | ml.plot_losses(train_losses, test_losses, filename=OUTPUT_DIR+'model_losses.png') 124 | 125 | # Get model predictions 126 | train_predictions, train_targets = ml.predict(train_loader, model, device=device, 127 | multiclass=True) 128 | test_predictions, test_targets = ml.predict(test_loader, model, device=device, 129 | multiclass=True) 130 | 131 | # Get overall accuracy 132 | ml.get_accuracy(train_predictions, train_targets, 133 | test_predictions, test_targets, 134 | verbose=args.verbose) 135 | 136 | # Plot examples of images we got wrong 137 | correct, wrong = ml.get_correct_wrong(test_predictions, test_targets, verbose=args.verbose) 138 | dt.display_pred_examples(wrong, test_dataset, test_targets, test_predictions, 139 | label_dict, filename=OUTPUT_DIR+'wrong_examples.png') 140 | 141 | # Make the confusion_matrix 142 | ml.plot_confusion_matrix(test_targets, test_predictions, labels=label_dict, log_color=True, 143 | filename=OUTPUT_DIR+'confusion_matrix.png') 144 | 145 | # Plot model summary info 146 | model.summarize() 147 | 148 | # Flush logging 149 | if args.verbose: 150 | print("\nstdout flushed", flush=True) 151 | print("stderr flushed", file=sys.stderr, flush=True) 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /burst_examples/hello_burst/.burstignore: -------------------------------------------------------------------------------- 1 | venv 2 | .* 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /burst_examples/hello_burst/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | -------------------------------------------------------------------------------- /burst_examples/hello_burst/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM burstableai/burst_base:ubu2004 2 | CMD ["/bin/bash"] 3 | -------------------------------------------------------------------------------- /burst_examples/hello_burst/hello_burst.py: -------------------------------------------------------------------------------- 1 | import multiprocessing, GPUtil 2 | 3 | 4 | print ("Welcome burstables! You're running a virtual machine with %i cpus" % (multiprocessing.cpu_count())) 5 | 6 | try: 7 | gpus = GPUtil.getGPUs() 8 | if len(gpus): 9 | print ("The following GPUs are available:") 10 | for gpu in gpus: 11 | print (gpu.name) 12 | else: 13 | print ("GPU drivers are installed but no GPUs are available") 14 | except: 15 | print ("No GPU drivers available") 16 | -------------------------------------------------------------------------------- /burst_examples/your_conda_project/.burstignore: -------------------------------------------------------------------------------- 1 | venv 2 | .* 3 | __pycache__ 4 | 5 | -------------------------------------------------------------------------------- /burst_examples/your_conda_project/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | !/requirements.txt 4 | -------------------------------------------------------------------------------- /burst_examples/your_conda_project/Dockerfile: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | # Set up burst and conda (you shouldn't need to modify these!) 3 | ######################################################################################## 4 | 5 | FROM burstableai/burst_base:ubu2004 6 | 7 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh 8 | RUN bash ~/miniconda.sh -b -p $HOME/miniconda 9 | 10 | ENV PATH "/root/miniconda/bin/:$PATH$" 11 | ENV PYTHONPATH "/home/burst/work" 12 | 13 | ######################################################################################## 14 | # Install conda packages needed for your project -- MODIFY AS NEEDED 15 | # 16 | # Make sure you put all conda packages in the same install line, so that the conda 17 | # package solver is able to work its magic! 18 | ######################################################################################## 19 | 20 | #---------------------------- 21 | # Install packages from conda 22 | #---------------------------- 23 | 24 | RUN conda install -c conda-forge numpy matplotlib jupyterlab 25 | 26 | #------------------------------------------------------------------------------------------ 27 | # Use pip to install any packages that are available through the pypi server, but not conda 28 | # (for this example, there are none so the line is commented out, but the CIFAR10_conda 29 | # example needs torchsummary) 30 | #------------------------------------------------------------------------------------------ 31 | 32 | # RUN pip install torchsummary 33 | 34 | ######################################################################################## 35 | # Launch bash shell on new server (you shouldn't need to modify this!) 36 | ######################################################################################## 37 | 38 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /burst_examples/your_conda_project/Template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "bronze-schedule", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 18, 17 | "id": "communist-telephone", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "x = np.arange(10) \n", 22 | "y = x**2 " 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 19, 28 | "id": "applicable-compilation", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "execution_count": 19, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | }, 41 | { 42 | "data": { 43 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAARy0lEQVR4nO3db4hd933n8fdnRzKZpLsdOxmENUpWhpgJpiFWOgRnXUJr2StnW+IhhOD0D6IY9CRtk7aosfqkLCwkQaVpHpSCsNsVbEiTuqpssiWqUR2WwuLu2HKr2K6w68aJRrI17WaabHbYyOp3H8wZSxqPozuae+fOb+b9AnPP+d17db8crA9Hv/M755uqQpLUnn8z7AIkSdfHAJekRhngktQoA1ySGmWAS1Kjtq3nj73jHe+o3bt3r+dPSlLznnrqqX+qqvHl4+sa4Lt372ZmZmY9f1KSmpfk5ZXGnUKRpEYZ4JLUKANckhplgEtSowxwSWrUuq5CkaSt5PipWQ6fOMO5+QV2jo1ycN8k03sm+vbnG+CSNADHT81y6NhpFi5eAmB2foFDx04D9C3EnUKRpAE4fOLM6+G9ZOHiJQ6fONO33+gpwJP8epJnk3wzyZeTvCXJLUmeTPJikq8kuaFvVUlS487NL6xq/HpcM8CTTAC/BkxV1U8AI8D9wOeBL1TVu4HvAg/0rSpJatzOsdFVjV+PXqdQtgGjSbYBbwXOA3cBj3TvHwWm+1aVJDXu4L5JRrePXDU2un2Eg/sm+/Yb1wzwqpoFfhf4NovB/S/AU8B8Vb3WfewssOKsfJIDSWaSzMzNzfWnakna4Kb3TPDZj76XibFRAkyMjfLZj753fVehJLkRuA+4BZgH/hS4t9cfqKojwBGAqakpG3BK2jKm90z0NbCX62UK5W7gH6tqrqouAseAO4GxbkoFYBcwO6AaJUkr6CXAvw3ckeStSQLsBZ4DngA+1n1mP/DoYEqUJK2klznwJ1m8WPk0cLr7zhHgM8BvJHkReDvw8ADrlCQt09OdmFX1O8DvLBt+CfhA3yuSJPXEOzElqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElq1DUDPMlkkmeu+O97ST6d5KYkjyd5oXu9cT0KliQt6qUjz5mqur2qbgd+Evi/wJ8DDwInq+pW4GS3L0laJ6udQtkL/ENVvcxip/qj3fhRYLqfhUmSfrTVBvj9wJe77R1Vdb7bfgXYsdIXkhxIMpNkZm5u7jrLlCQt13OAJ7kB+Ajwp8vfq6oCaqXvVdWRqpqqqqnx8fHrLlSSdLXVnIF/GHi6ql7t9l9NcjNA93qh38VJkt7cagL8E1yePgF4DNjfbe8HHu1XUZKka+spwJO8DbgHOHbF8OeAe5K8ANzd7UuS1sm2Xj5UVT8A3r5s7J9ZXJUiSRoC78SUpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjeppGaEkteT4qVkOnzjDufkFdo6NcnDfJNN7JoZdVt8Z4JI2leOnZjl07DQLFy8BMDu/wKFjpwE2XYg7hSJpUzl84szr4b1k4eIlDp84M6SKBscAl7SpnJtfWNV4ywxwSZvKzrHRVY23zACXtKkc3DfJ6PaRq8ZGt49wcN/kkCoaHC9iStpUli5UugpFkho0vWdiUwb2ck6hSFKjem3oMJbkkSR/n+T5JB9MclOSx5O80L3eOOhiJUmX9XoG/kXg61X1HuB9wPPAg8DJqroVONntS5LWyTUDPMmPAx8CHgaoqh9W1TxwH3C0+9hRYHpQRUqS3qiXM/BbgDngj5OcSvJQ1yNzR1Wd7z7zCrBjUEVKkt6olwDfBrwf+MOq2gP8gGXTJVVVQK305SQHkswkmZmbm1trvZKkTi8BfhY4W1VPdvuPsBjorya5GaB7vbDSl6vqSFVNVdXU+Ph4P2qWJNFDgFfVK8B3kizdxrQXeA54DNjfje0HHh1IhZKkFfV6I8+vAl9KcgPwEvDLLIb/V5M8ALwMfHwwJUqSVtJTgFfVM8DUCm/t7W85kqReeSemJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRPTV0SPIt4PvAJeC1qppKchPwFWA38C3g41X13cGUKUlabjVn4D9TVbdX1VJnngeBk1V1K3CSZZ3qJUmD1WtPzJXcB/x0t30U+AbwmTXWI6lxx0/NcvjEGc7NL7BzbJSD+yaZ3jMx7LI2pV7PwAv4yyRPJTnQje2oqvPd9ivAjpW+mORAkpkkM3Nzc2ssV9JGdvzULIeOnWZ2foECZucXOHTsNMdPzQ67tE2p1wD/qap6P/Bh4JNJPnTlm1VVLIb8G1TVkaqaqqqp8fHxtVUraUM7fOIMCxcvXTW2cPESh0+cGVJFm1tPAV5Vs93rBeDPgQ8Arya5GaB7vTCoIiW14dz8wqrGtTbXDPAkb0vyb5e2gf8IfBN4DNjffWw/8OigipTUhp1jo6sa19r0cga+A/jrJH8L/A3w36vq68DngHuSvADc3e1L2sIO7ptkdPvIVWOj20c4uG9ySBVtbtdchVJVLwHvW2H8n4G9gyhKUpuWVpu4CmV9rGUZoSS9wfSeCQN7nXgrvSQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVM8BnmQkyakkX+v2b0nyZJIXk3wlyQ2DK1OStNxqzsA/BTx/xf7ngS9U1buB7wIP9LMwSdKP1lOAJ9kF/CzwULcf4C7gke4jR4HpQRQoSVpZr2fgvw/8FvCv3f7bgfmqeq3bPwus2IIjyYEkM0lm5ubm1lSsJOmyXrrS/xxwoaqeup4fqKojVTVVVVPj4+PX80dIklbQS0/MO4GPJPlPwFuAfwd8ERhLsq07C98FzA6uTEnSctc8A6+qQ1W1q6p2A/cDf1VVvwA8AXys+9h+4NGBVSlJeoO1rAP/DPAbSV5kcU784f6UJEnqRS9TKK+rqm8A3+i2XwI+0P+SJEm98E5MSWrUqs7AJW1cx0/NcvjEGc7NL7BzbJSD+yaZ3rPi6l5tEga4tAkcPzXLoWOnWbh4CYDZ+QUOHTsNYIhvYk6hSJvA4RNnXg/vJQsXL3H4xJkhVaT1YIBLm8C5+YVVjWtzMMClTWDn2OiqxrU5GODSJnBw3ySj20euGhvdPsLBfZNDqkjrwYuY0iawdKHSVShbiwEubRLTeyYM7C3GKRRJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhrVS1PjtyT5myR/m+TZJP+5G78lyZNJXkzylSQ3DL5cSdKSXs7A/x9wV1W9D7gduDfJHcDngS9U1buB7wIPDK5MSdJyvTQ1rqr6P93u9u6/Au4CHunGjwLTA6lQkrSinubAk4wkeQa4ADwO/AMwX1WvdR85C6x4D2+SA0lmkszMzc31o2ZJEj0GeFVdqqrbgV0sNjJ+T68/UFVHqmqqqqbGx8evs0xJ0nKrWoVSVfPAE8AHgbEkSw/D2gXM9rk2SdKP0MsqlPEkY932KHAP8DyLQf6x7mP7gUcHVaQk6Y16eZzszcDRJCMsBv5Xq+prSZ4D/iTJfwFOAQ8PsE5J0jLXDPCq+jtgzwrjL7E4Hy5JGgLvxJSkRhngktQoA1ySGmVPTGmNjp+atZmwhsIAl9bg+KlZDh07zcLFSwDMzi9w6NhpAENcA+cUirQGh0+ceT28lyxcvMThE2eGVJG2EgNcWoNz8wurGpf6yQCX1mDn2OiqxqV+MsClNTi4b5LR7SNXjY1uH+HgvskhVaStxIuY0hosXah0FYqGwQCX1mh6z4SBraFwCkWSGmWAS1KjDHBJapQBLkmN6qUjzzuTPJHkuSTPJvlUN35TkseTvNC93jj4ciVJS3o5A38N+M2qug24A/hkktuAB4GTVXUrcLLblyStk2sGeFWdr6qnu+3vs9gPcwK4DzjafewoMD2oIiVJb7SqOfAku1lsr/YksKOqzndvvQLseJPvHEgyk2Rmbm5uDaVKkq7Uc4An+THgz4BPV9X3rnyvqgqolb5XVUeqaqqqpsbHx9dUrCTpsp4CPMl2FsP7S1V1rBt+NcnN3fs3AxcGU6IkaSW9rEIJ8DDwfFX93hVvPQbs77b3A4/2vzxJ0pvp5VkodwK/BJxO8kw39tvA54CvJnkAeBn4+GBKlCSt5JoBXlV/DeRN3t7b33IkSb3yTkxJapSPk1Wz7Aavrc4AV5PsBi85haJG2Q1eMsDVKLvBSwa4GmU3eMkAV6PsBi95EVONshu8ZICrYXaD11bnFIokNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVC8def4oyYUk37xi7KYkjyd5oXu9cbBlSpKW6+UM/L8C9y4bexA4WVW3Aie7fUnSOrpmgFfV/wD+97Lh+4Cj3fZRYLrPdUmSruF678TcUVXnu+1XgB1v9sEkB4ADAO9617uu8+e00dhMQRq+NV/ErKoC6ke8f6Sqpqpqanx8fK0/pw1gqZnC7PwCxeVmCsdPzQ67NGlLud4AfzXJzQDd64X+laSNzmYK0sZwvQH+GLC/294PPNqfctQCmylIG0Mvywi/DPxPYDLJ2SQPAJ8D7knyAnB3t68twmYK0sZwzYuYVfWJN3lrb59rUSMO7pu8qqEw2ExBGgafB65Vs5mCtDEY4LouNlOQhs9noUhSowxwSWqUAS5JjTLAJalRXsRsjM8gkbTEAG/I0jNIltZfLz2DBDDEpS3IKZSG+AwSSVcywBviM0gkXckAb4jPIJF0JQO8IQf3TTK6feSqMZ9BIm1dXsRsiM8gkXQlA7xHG2X5ns8gkbTEAO+By/ckbUTOgffA5XuSNqI1nYEnuRf4IjACPFRVfe/MsxGmLly+J2kjuu4z8CQjwB8AHwZuAz6R5LZ+FQYbp/u5y/ckbURrmUL5APBiVb1UVT8E/gS4rz9lLdooUxcu35O0Ea0lwCeA71yxf7Ybu0qSA0lmkszMzc2t6gc2ytTF9J4JPvvR9zIxNkqAibFRPvvR93oBU9JQDXwVSlUdAY4ATE1N1Wq+u3NslNkVwnoYUxcu35O00azlDHwWeOcV+7u6sb5x6kKS3txazsD/F3BrkltYDO77gZ/vS1Ud7zyUpDd33QFeVa8l+RXgBIvLCP+oqp7tW2Udpy4kaWVrmgOvqr8A/qJPtUiSVsE7MSWpUQa4JDXKAJekRhngktSoVK3q3pq1/VgyB7x8nV9/B/BPfSyndR6PyzwWV/N4XLZZjsW/r6rx5YPrGuBrkWSmqqaGXcdG4fG4zGNxNY/HZZv9WDiFIkmNMsAlqVEtBfiRYRewwXg8LvNYXM3jcdmmPhbNzIFLkq7W0hm4JOkKBrgkNaqJAE9yb5IzSV5M8uCw6xmWJO9M8kSS55I8m+RTw65pI0gykuRUkq8Nu5ZhSjKW5JEkf5/k+SQfHHZNw5Tk17u/J99M8uUkbxl2Tf224QN8PZonN+Q14Der6jbgDuCTW/hYXOlTwPPDLmID+CLw9ap6D/A+tvAxSTIB/BowVVU/weIjr+8fblX9t+EDnHVontyKqjpfVU93299n8S/oln5YepJdwM8CDw27lmFK8uPAh4CHAarqh1U1P9yqhm4bMJpkG/BW4NyQ6+m7FgK8p+bJW02S3cAe4MnhVjJ0vw/8FvCvwy5kyG4B5oA/7qaTHkrytmEXNSxVNQv8LvBt4DzwL1X1l8Otqv9aCHAtk+THgD8DPl1V3xt2PcOS5OeAC1X11LBr2QC2Ae8H/rCq9gA/ALby9aIbWfyX+i3ATuBtSX5xuFX1XwsBPvDmyS1Jsp3F8P5SVR0bdj1DdifwkSTfYnFq7a4k/224JQ3NWeBsVS39i+wRFgN9q7ob+Meqmquqi8Ax4D8Muaa+ayHAX2+enOQGFi9EPDbkmoYiSVic43y+qn5v2PUMW1UdqqpdVbWbxf8v/qqqNt1ZVi+q6hXgO0kmu6G9wHNDLGnYvg3ckeSt3d+bvWzCi7pr6om5HtareXIj7gR+CTid5Jlu7Le73qTSrwJf6k50XgJ+ecj1DE1VPZnkEeBpFldvnWIT3lbvrfSS1KgWplAkSSswwCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1Kj/j+7nYPFK9cIQgAAAABJRU5ErkJggg==\n", 44 | "text/plain": [ 45 | "
" 46 | ] 47 | }, 48 | "metadata": { 49 | "needs_background": "light" 50 | }, 51 | "output_type": "display_data" 52 | } 53 | ], 54 | "source": [ 55 | "plt.scatter(x, y)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "banned-injury", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.6.9" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 5 88 | } 89 | -------------------------------------------------------------------------------- /burst_examples/your_conda_project/template.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | x = np.arange(10) 5 | y = x**2 6 | 7 | for i in range(len(x)): 8 | print('{} * {} = {}'.format(x[i], x[i], y[i])) 9 | 10 | plt.scatter(x, y) 11 | plt.savefig('example.png') 12 | 13 | -------------------------------------------------------------------------------- /burst_examples/your_python_project/.burstignore: -------------------------------------------------------------------------------- 1 | venv 2 | .* 3 | __pycache__ 4 | 5 | -------------------------------------------------------------------------------- /burst_examples/your_python_project/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | !/requirements.txt 4 | -------------------------------------------------------------------------------- /burst_examples/your_python_project/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM burstableai/burst_base:ubu2004 2 | 3 | COPY requirements.txt requirements.txt 4 | RUN pip3 install -r ./requirements.txt 5 | 6 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /burst_examples/your_python_project/Template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "bronze-schedule", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 18, 17 | "id": "communist-telephone", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "x = np.arange(10) \n", 22 | "y = x**2 " 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 19, 28 | "id": "applicable-compilation", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "" 35 | ] 36 | }, 37 | "execution_count": 19, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | }, 41 | { 42 | "data": { 43 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAARy0lEQVR4nO3db4hd933n8fdnRzKZpLsdOxmENUpWhpgJpiFWOgRnXUJr2StnW+IhhOD0D6IY9CRtk7aosfqkLCwkQaVpHpSCsNsVbEiTuqpssiWqUR2WwuLu2HKr2K6w68aJRrI17WaabHbYyOp3H8wZSxqPozuae+fOb+b9AnPP+d17db8crA9Hv/M755uqQpLUnn8z7AIkSdfHAJekRhngktQoA1ySGmWAS1Kjtq3nj73jHe+o3bt3r+dPSlLznnrqqX+qqvHl4+sa4Lt372ZmZmY9f1KSmpfk5ZXGnUKRpEYZ4JLUKANckhplgEtSowxwSWrUuq5CkaSt5PipWQ6fOMO5+QV2jo1ycN8k03sm+vbnG+CSNADHT81y6NhpFi5eAmB2foFDx04D9C3EnUKRpAE4fOLM6+G9ZOHiJQ6fONO33+gpwJP8epJnk3wzyZeTvCXJLUmeTPJikq8kuaFvVUlS487NL6xq/HpcM8CTTAC/BkxV1U8AI8D9wOeBL1TVu4HvAg/0rSpJatzOsdFVjV+PXqdQtgGjSbYBbwXOA3cBj3TvHwWm+1aVJDXu4L5JRrePXDU2un2Eg/sm+/Yb1wzwqpoFfhf4NovB/S/AU8B8Vb3WfewssOKsfJIDSWaSzMzNzfWnakna4Kb3TPDZj76XibFRAkyMjfLZj753fVehJLkRuA+4BZgH/hS4t9cfqKojwBGAqakpG3BK2jKm90z0NbCX62UK5W7gH6tqrqouAseAO4GxbkoFYBcwO6AaJUkr6CXAvw3ckeStSQLsBZ4DngA+1n1mP/DoYEqUJK2klznwJ1m8WPk0cLr7zhHgM8BvJHkReDvw8ADrlCQt09OdmFX1O8DvLBt+CfhA3yuSJPXEOzElqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElq1DUDPMlkkmeu+O97ST6d5KYkjyd5oXu9cT0KliQt6qUjz5mqur2qbgd+Evi/wJ8DDwInq+pW4GS3L0laJ6udQtkL/ENVvcxip/qj3fhRYLqfhUmSfrTVBvj9wJe77R1Vdb7bfgXYsdIXkhxIMpNkZm5u7jrLlCQt13OAJ7kB+Ajwp8vfq6oCaqXvVdWRqpqqqqnx8fHrLlSSdLXVnIF/GHi6ql7t9l9NcjNA93qh38VJkt7cagL8E1yePgF4DNjfbe8HHu1XUZKka+spwJO8DbgHOHbF8OeAe5K8ANzd7UuS1sm2Xj5UVT8A3r5s7J9ZXJUiSRoC78SUpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjeppGaEkteT4qVkOnzjDufkFdo6NcnDfJNN7JoZdVt8Z4JI2leOnZjl07DQLFy8BMDu/wKFjpwE2XYg7hSJpUzl84szr4b1k4eIlDp84M6SKBscAl7SpnJtfWNV4ywxwSZvKzrHRVY23zACXtKkc3DfJ6PaRq8ZGt49wcN/kkCoaHC9iStpUli5UugpFkho0vWdiUwb2ck6hSFKjem3oMJbkkSR/n+T5JB9MclOSx5O80L3eOOhiJUmX9XoG/kXg61X1HuB9wPPAg8DJqroVONntS5LWyTUDPMmPAx8CHgaoqh9W1TxwH3C0+9hRYHpQRUqS3qiXM/BbgDngj5OcSvJQ1yNzR1Wd7z7zCrBjUEVKkt6olwDfBrwf+MOq2gP8gGXTJVVVQK305SQHkswkmZmbm1trvZKkTi8BfhY4W1VPdvuPsBjorya5GaB7vbDSl6vqSFVNVdXU+Ph4P2qWJNFDgFfVK8B3kizdxrQXeA54DNjfje0HHh1IhZKkFfV6I8+vAl9KcgPwEvDLLIb/V5M8ALwMfHwwJUqSVtJTgFfVM8DUCm/t7W85kqReeSemJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRPTV0SPIt4PvAJeC1qppKchPwFWA38C3g41X13cGUKUlabjVn4D9TVbdX1VJnngeBk1V1K3CSZZ3qJUmD1WtPzJXcB/x0t30U+AbwmTXWI6lxx0/NcvjEGc7NL7BzbJSD+yaZ3jMx7LI2pV7PwAv4yyRPJTnQje2oqvPd9ivAjpW+mORAkpkkM3Nzc2ssV9JGdvzULIeOnWZ2foECZucXOHTsNMdPzQ67tE2p1wD/qap6P/Bh4JNJPnTlm1VVLIb8G1TVkaqaqqqp8fHxtVUraUM7fOIMCxcvXTW2cPESh0+cGVJFm1tPAV5Vs93rBeDPgQ8Arya5GaB7vTCoIiW14dz8wqrGtTbXDPAkb0vyb5e2gf8IfBN4DNjffWw/8OigipTUhp1jo6sa19r0cga+A/jrJH8L/A3w36vq68DngHuSvADc3e1L2sIO7ptkdPvIVWOj20c4uG9ySBVtbtdchVJVLwHvW2H8n4G9gyhKUpuWVpu4CmV9rGUZoSS9wfSeCQN7nXgrvSQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVM8BnmQkyakkX+v2b0nyZJIXk3wlyQ2DK1OStNxqzsA/BTx/xf7ngS9U1buB7wIP9LMwSdKP1lOAJ9kF/CzwULcf4C7gke4jR4HpQRQoSVpZr2fgvw/8FvCv3f7bgfmqeq3bPwus2IIjyYEkM0lm5ubm1lSsJOmyXrrS/xxwoaqeup4fqKojVTVVVVPj4+PX80dIklbQS0/MO4GPJPlPwFuAfwd8ERhLsq07C98FzA6uTEnSctc8A6+qQ1W1q6p2A/cDf1VVvwA8AXys+9h+4NGBVSlJeoO1rAP/DPAbSV5kcU784f6UJEnqRS9TKK+rqm8A3+i2XwI+0P+SJEm98E5MSWrUqs7AJW1cx0/NcvjEGc7NL7BzbJSD+yaZ3rPi6l5tEga4tAkcPzXLoWOnWbh4CYDZ+QUOHTsNYIhvYk6hSJvA4RNnXg/vJQsXL3H4xJkhVaT1YIBLm8C5+YVVjWtzMMClTWDn2OiqxrU5GODSJnBw3ySj20euGhvdPsLBfZNDqkjrwYuY0iawdKHSVShbiwEubRLTeyYM7C3GKRRJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhrVS1PjtyT5myR/m+TZJP+5G78lyZNJXkzylSQ3DL5cSdKSXs7A/x9wV1W9D7gduDfJHcDngS9U1buB7wIPDK5MSdJyvTQ1rqr6P93u9u6/Au4CHunGjwLTA6lQkrSinubAk4wkeQa4ADwO/AMwX1WvdR85C6x4D2+SA0lmkszMzc31o2ZJEj0GeFVdqqrbgV0sNjJ+T68/UFVHqmqqqqbGx8evs0xJ0nKrWoVSVfPAE8AHgbEkSw/D2gXM9rk2SdKP0MsqlPEkY932KHAP8DyLQf6x7mP7gUcHVaQk6Y16eZzszcDRJCMsBv5Xq+prSZ4D/iTJfwFOAQ8PsE5J0jLXDPCq+jtgzwrjL7E4Hy5JGgLvxJSkRhngktQoA1ySGmVPTGmNjp+atZmwhsIAl9bg+KlZDh07zcLFSwDMzi9w6NhpAENcA+cUirQGh0+ceT28lyxcvMThE2eGVJG2EgNcWoNz8wurGpf6yQCX1mDn2OiqxqV+MsClNTi4b5LR7SNXjY1uH+HgvskhVaStxIuY0hosXah0FYqGwQCX1mh6z4SBraFwCkWSGmWAS1KjDHBJapQBLkmN6qUjzzuTPJHkuSTPJvlUN35TkseTvNC93jj4ciVJS3o5A38N+M2qug24A/hkktuAB4GTVXUrcLLblyStk2sGeFWdr6qnu+3vs9gPcwK4DzjafewoMD2oIiVJb7SqOfAku1lsr/YksKOqzndvvQLseJPvHEgyk2Rmbm5uDaVKkq7Uc4An+THgz4BPV9X3rnyvqgqolb5XVUeqaqqqpsbHx9dUrCTpsp4CPMl2FsP7S1V1rBt+NcnN3fs3AxcGU6IkaSW9rEIJ8DDwfFX93hVvPQbs77b3A4/2vzxJ0pvp5VkodwK/BJxO8kw39tvA54CvJnkAeBn4+GBKlCSt5JoBXlV/DeRN3t7b33IkSb3yTkxJapSPk1Wz7Aavrc4AV5PsBi85haJG2Q1eMsDVKLvBSwa4GmU3eMkAV6PsBi95EVONshu8ZICrYXaD11bnFIokNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqVC8def4oyYUk37xi7KYkjyd5oXu9cbBlSpKW6+UM/L8C9y4bexA4WVW3Aie7fUnSOrpmgFfV/wD+97Lh+4Cj3fZRYLrPdUmSruF678TcUVXnu+1XgB1v9sEkB4ADAO9617uu8+e00dhMQRq+NV/ErKoC6ke8f6Sqpqpqanx8fK0/pw1gqZnC7PwCxeVmCsdPzQ67NGlLud4AfzXJzQDd64X+laSNzmYK0sZwvQH+GLC/294PPNqfctQCmylIG0Mvywi/DPxPYDLJ2SQPAJ8D7knyAnB3t68twmYK0sZwzYuYVfWJN3lrb59rUSMO7pu8qqEw2ExBGgafB65Vs5mCtDEY4LouNlOQhs9noUhSowxwSWqUAS5JjTLAJalRXsRsjM8gkbTEAG/I0jNIltZfLz2DBDDEpS3IKZSG+AwSSVcywBviM0gkXckAb4jPIJF0JQO8IQf3TTK6feSqMZ9BIm1dXsRsiM8gkXQlA7xHG2X5ns8gkbTEAO+By/ckbUTOgffA5XuSNqI1nYEnuRf4IjACPFRVfe/MsxGmLly+J2kjuu4z8CQjwB8AHwZuAz6R5LZ+FQYbp/u5y/ckbURrmUL5APBiVb1UVT8E/gS4rz9lLdooUxcu35O0Ea0lwCeA71yxf7Ybu0qSA0lmkszMzc2t6gc2ytTF9J4JPvvR9zIxNkqAibFRPvvR93oBU9JQDXwVSlUdAY4ATE1N1Wq+u3NslNkVwnoYUxcu35O00azlDHwWeOcV+7u6sb5x6kKS3txazsD/F3BrkltYDO77gZ/vS1Ud7zyUpDd33QFeVa8l+RXgBIvLCP+oqp7tW2Udpy4kaWVrmgOvqr8A/qJPtUiSVsE7MSWpUQa4JDXKAJekRhngktSoVK3q3pq1/VgyB7x8nV9/B/BPfSyndR6PyzwWV/N4XLZZjsW/r6rx5YPrGuBrkWSmqqaGXcdG4fG4zGNxNY/HZZv9WDiFIkmNMsAlqVEtBfiRYRewwXg8LvNYXM3jcdmmPhbNzIFLkq7W0hm4JOkKBrgkNaqJAE9yb5IzSV5M8uCw6xmWJO9M8kSS55I8m+RTw65pI0gykuRUkq8Nu5ZhSjKW5JEkf5/k+SQfHHZNw5Tk17u/J99M8uUkbxl2Tf224QN8PZonN+Q14Der6jbgDuCTW/hYXOlTwPPDLmID+CLw9ap6D/A+tvAxSTIB/BowVVU/weIjr+8fblX9t+EDnHVontyKqjpfVU93299n8S/oln5YepJdwM8CDw27lmFK8uPAh4CHAarqh1U1P9yqhm4bMJpkG/BW4NyQ6+m7FgK8p+bJW02S3cAe4MnhVjJ0vw/8FvCvwy5kyG4B5oA/7qaTHkrytmEXNSxVNQv8LvBt4DzwL1X1l8Otqv9aCHAtk+THgD8DPl1V3xt2PcOS5OeAC1X11LBr2QC2Ae8H/rCq9gA/ALby9aIbWfyX+i3ATuBtSX5xuFX1XwsBPvDmyS1Jsp3F8P5SVR0bdj1DdifwkSTfYnFq7a4k/224JQ3NWeBsVS39i+wRFgN9q7ob+Meqmquqi8Ax4D8Muaa+ayHAX2+enOQGFi9EPDbkmoYiSVic43y+qn5v2PUMW1UdqqpdVbWbxf8v/qqqNt1ZVi+q6hXgO0kmu6G9wHNDLGnYvg3ckeSt3d+bvWzCi7pr6om5HtareXIj7gR+CTid5Jlu7Le73qTSrwJf6k50XgJ+ecj1DE1VPZnkEeBpFldvnWIT3lbvrfSS1KgWplAkSSswwCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1Kj/j+7nYPFK9cIQgAAAABJRU5ErkJggg==\n", 44 | "text/plain": [ 45 | "
" 46 | ] 47 | }, 48 | "metadata": { 49 | "needs_background": "light" 50 | }, 51 | "output_type": "display_data" 52 | } 53 | ], 54 | "source": [ 55 | "plt.scatter(x, y)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "banned-injury", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.6.9" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 5 88 | } 89 | -------------------------------------------------------------------------------- /burst_examples/your_python_project/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.18.1 2 | matplotlib==3.1.2 3 | jupyterlab==3.0.14 4 | -------------------------------------------------------------------------------- /burst_examples/your_python_project/template.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | x = np.arange(10) 5 | y = x**2 6 | 7 | for i in range(len(x)): 8 | print('{} * {} = {}'.format(x[i], x[i], y[i])) 9 | 10 | plt.scatter(x, y) 11 | plt.savefig('example.png') 12 | 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/temp: -------------------------------------------------------------------------------- 1 | # nothing to see here 2 | -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | .. _about_page: 2 | 3 | ============ 4 | About Us 5 | ============ 6 | 7 | Burstable AI 8 | ============ 9 | 10 | Burstable AI is the parent company responsible for the burst open source project. 11 | 12 | More soon... 13 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Burst' 21 | copyright = '2021, Burstable.AI' 22 | author = 'Burstable.AI' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '1.1.1' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc' 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = 'alabaster' 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ['_static'] 56 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started_page: 2 | 3 | =============== 4 | Getting Started 5 | =============== 6 | 7 | * :ref:`installation_sec` 8 | * :ref:`configuration_sec` 9 | * :ref:`usage_sec` 10 | 11 | .. _installation_sec: 12 | 13 | Installation 14 | ============ 15 | 16 | *Note: If you want to contribute to the Burst OSS project or just follow bleeding-edge development, install through gitHub as described* `here `_ *instead.* 17 | 18 | SSH keys: 19 | ^^^^^^^^^ 20 | You must have a public/private ssh key pair, stored as ``~/.ssh/id_rsa.pub`` and ``~/.ssh/id_rsa`` by default. 21 | 22 | If you do not already have ssh keys, run ssh-keygen to generate them: 23 | :: 24 | 25 | ssh-keygen -t rsa -b 4096 26 | 27 | [Recommended] Set up a virtual environment: 28 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 29 | 30 | Follow the instructions here: `python-virtual-environments-a-primer. `_ 31 | 32 | Launch the virtual environment and do the remainder of your installation and set-up inside the virtual environment. 33 | 34 | Check versions of Python and Docker at the command line: 35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 36 | 37 | Make sure you are running the necessary versions of python and Docker (you need Python 3, Docker >= 19) 38 | :: 39 | 40 | python --version 41 | docker --version 42 | 43 | Install the command-line tool: 44 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 45 | :: 46 | 47 | pip install burstable 48 | 49 | 50 | Check to make sure that burst has installed correctly with 51 | 52 | :: 53 | 54 | burst --version 55 | 56 | .. _configuration_sec: 57 | 58 | Configuration (basic) 59 | ===================== 60 | 61 | The first time you use burst, we recommend running the interactive configuration setup. To do this, you will need credentials for the cloud account you want to use. For AWS, this means you will need: 62 | 63 | * an access key 64 | * a secret key 65 | * a region (e.g., ``us-west-2``) 66 | 67 | Enter the configuration wizard and follow the instructions to set up a new compute service, entering your access key, secret key, and region as prompted. 68 | 69 | :: 70 | 71 | burst configure 72 | 73 | This configuration will, by default, set up your account to use a powerful GPU when you run burst with ``--gpu``, a medium-power CPU for testing when you run burst with ``--no-gpu``, and a default harddisk with 175 Gb. 74 | 75 | To set up other hardware configurations, see :ref:`the detailed configuration instructions.`. 76 | 77 | 78 | .. _usage_sec: 79 | 80 | 81 | Using burst 82 | =========== 83 | 84 | Burst is built and run from inside a project directory. In order to be "burstable", a project requires a working Dockerfile. You can find examples and templates for such Dockerfiles, in the :ref:`Examples`. 85 | 86 | The easiest way to test your burst installation is using the test examples that are available in the burst gitHub repo `here `_. 87 | 88 | Download the repo. The examples can be found in the ``burst_examples`` folder. 89 | 90 | Try running :ref:`Hello Burst!`. 91 | -------------------------------------------------------------------------------- /docs/source/help.rst: -------------------------------------------------------------------------------- 1 | .. _help_page: 2 | 3 | Help 4 | ============== 5 | 6 | Need help? Contact help@burstable.ai with questions. 7 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _index_page: 2 | 3 | Burst 4 | ====== 5 | 6 | Run your code in the cloud. 7 | --------------------------- 8 | 9 | Burst lets you run your software remotely---on any sized virtual machine---without any change to your existing development process. All you need is a working Dockerfile. 10 | 11 | We support remote computing on Amazon Web Services and will be adding more (Google Cloud Platform support is currently in beta). 12 | 13 | Dependencies 14 | ------------ 15 | * Python3 16 | * Docker version 19 or higher 17 | * A folder/project with a working Dockerfile 18 | * ssh keys 19 | * AWS or Google Cloud Platform account and access keys 20 | 21 | Burst is open source on `github. `_ 22 | 23 | 24 | Guide 25 | ----- 26 | 27 | .. toctree:: 28 | :maxdepth: 2 29 | 30 | getting_started 31 | user_guide 32 | examples 33 | license 34 | help 35 | about 36 | 37 | 38 | .. Indices and tables 39 | .. ================== 40 | .. * :ref:`genindex` 41 | .. * :ref:`modindex` 42 | .. * :ref:`search` 43 | -------------------------------------------------------------------------------- /docs/source/license.rst: -------------------------------------------------------------------------------- 1 | .. _license_page: 2 | 3 | .. Copyright 2021 Burstable.AI 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | 18 | ============== 19 | License 20 | ============== 21 | 22 | Apache License 23 | ============== 24 | 25 | :Version: 2.0 26 | :Date: January 2004 27 | :URL: http://www.apache.org/licenses/ 28 | 29 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 30 | ------------------------------------------------------------ 31 | 32 | 1. Definitions. 33 | --------------- 34 | 35 | **"License"** shall mean the terms and conditions for use, reproduction, and 36 | distribution as defined by Sections 1 through 9 of this document. 37 | 38 | **"Licensor"** shall mean the copyright owner or entity authorized by the 39 | copyright owner that is granting the License. 40 | 41 | **"Legal Entity"** shall mean the union of the acting entity and all other 42 | entities that control, are controlled by, or are under common control with that 43 | entity. For the purposes of this definition, "control" means *(i)* the power, 44 | direct or indirect, to cause the direction or management of such entity, 45 | whether by contract or otherwise, or *(ii)* ownership of fifty percent (50%) or 46 | more of the outstanding shares, or *(iii)* beneficial ownership of such entity. 47 | 48 | **"You"** (or **"Your"**) shall mean an individual or Legal Entity exercising 49 | permissions granted by this License. 50 | 51 | **"Source"** form shall mean the preferred form for making modifications, 52 | including but not limited to software source code, documentation source, and 53 | configuration files. 54 | 55 | **"Object"** form shall mean any form resulting from mechanical transformation 56 | or translation of a Source form, including but not limited to compiled object 57 | code, generated documentation, and conversions to other media types. 58 | 59 | **"Work"** shall mean the work of authorship, whether in Source or Object form, 60 | made available under the License, as indicated by a copyright notice that is 61 | included in or attached to the work (an example is provided in the Appendix 62 | below). 63 | 64 | **"Derivative Works"** shall mean any work, whether in Source or Object form, 65 | that is based on (or derived from) the Work and for which the editorial 66 | revisions, annotations, elaborations, or other modifications represent, as a 67 | whole, an original work of authorship. For the purposes of this License, 68 | Derivative Works shall not include works that remain separable from, or merely 69 | link (or bind by name) to the interfaces of, the Work and Derivative Works 70 | thereof. 71 | 72 | **"Contribution"** shall mean any work of authorship, including the original 73 | version of the Work and any modifications or additions to that Work or 74 | Derivative Works thereof, that is intentionally submitted to Licensor for 75 | inclusion in the Work by the copyright owner or by an individual or Legal 76 | Entity authorized to submit on behalf of the copyright owner. For the purposes 77 | of this definition, "submitted" means any form of electronic, verbal, or 78 | written communication sent to the Licensor or its representatives, including 79 | but not limited to communication on electronic mailing lists, source code 80 | control systems, and issue tracking systems that are managed by, or on behalf 81 | of, the Licensor for the purpose of discussing and improving the Work, but 82 | excluding communication that is conspicuously marked or otherwise designated in 83 | writing by the copyright owner as "Not a Contribution." 84 | 85 | **"Contributor"** shall mean Licensor and any individual or Legal Entity on 86 | behalf of whom a Contribution has been received by Licensor and subsequently 87 | incorporated within the Work. 88 | 89 | 2. Grant of Copyright License. 90 | ------------------------------ 91 | 92 | Subject to the terms and conditions of this License, each Contributor hereby 93 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 94 | irrevocable copyright license to reproduce, prepare Derivative Works of, 95 | publicly display, publicly perform, sublicense, and distribute the Work and 96 | such Derivative Works in Source or Object form. 97 | 98 | 3. Grant of Patent License. 99 | --------------------------- 100 | 101 | Subject to the terms and conditions of this License, each Contributor hereby 102 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 103 | irrevocable (except as stated in this section) patent license to make, have 104 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 105 | such license applies only to those patent claims licensable by such Contributor 106 | that are necessarily infringed by their Contribution(s) alone or by combination 107 | of their Contribution(s) with the Work to which such Contribution(s) was 108 | submitted. If You institute patent litigation against any entity (including a 109 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 110 | Contribution incorporated within the Work constitutes direct or contributory 111 | patent infringement, then any patent licenses granted to You under this License 112 | for that Work shall terminate as of the date such litigation is filed. 113 | 114 | 4. Redistribution. 115 | ------------------ 116 | 117 | You may reproduce and distribute copies of the Work or Derivative Works thereof 118 | in any medium, with or without modifications, and in Source or Object form, 119 | provided that You meet the following conditions: 120 | 121 | - You must give any other recipients of the Work or Derivative Works a copy of 122 | this License; and 123 | 124 | - You must cause any modified files to carry prominent notices stating that You 125 | changed the files; and 126 | 127 | - You must retain, in the Source form of any Derivative Works that You 128 | distribute, all copyright, patent, trademark, and attribution notices from 129 | the Source form of the Work, excluding those notices that do not pertain to 130 | any part of the Derivative Works; and 131 | 132 | - If the Work includes a ``"NOTICE"`` text file as part of its distribution, 133 | then any Derivative Works that You distribute must include a readable copy of 134 | the attribution notices contained within such ``NOTICE`` file, excluding 135 | those notices that do not pertain to any part of the Derivative Works, in at 136 | least one of the following places: within a ``NOTICE`` text file distributed 137 | as part of the Derivative Works; within the Source form or documentation, if 138 | provided along with the Derivative Works; or, within a display generated by 139 | the Derivative Works, if and wherever such third-party notices normally 140 | appear. The contents of the ``NOTICE`` file are for informational purposes 141 | only and do not modify the License. You may add Your own attribution notices 142 | within Derivative Works that You distribute, alongside or as an addendum to 143 | the ``NOTICE`` text from the Work, provided that such additional attribution 144 | notices cannot be construed as modifying the License. You may add Your own 145 | copyright statement to Your modifications and may provide additional or 146 | different license terms and conditions for use, reproduction, or distribution 147 | of Your modifications, or for any such Derivative Works as a whole, provided 148 | Your use, reproduction, and distribution of the Work otherwise complies with 149 | the conditions stated in this License. 150 | 151 | 5. Submission of Contributions. 152 | ------------------------------- 153 | 154 | Unless You explicitly state otherwise, any Contribution intentionally submitted 155 | for inclusion in the Work by You to the Licensor shall be under the terms and 156 | conditions of this License, without any additional terms or conditions. 157 | Notwithstanding the above, nothing herein shall supersede or modify the terms 158 | of any separate license agreement you may have executed with Licensor regarding 159 | such Contributions. 160 | 161 | 6. Trademarks. 162 | -------------- 163 | 164 | This License does not grant permission to use the trade names, trademarks, 165 | service marks, or product names of the Licensor, except as required for 166 | reasonable and customary use in describing the origin of the Work and 167 | reproducing the content of the ``NOTICE`` file. 168 | 169 | 7. Disclaimer of Warranty. 170 | -------------------------- 171 | 172 | Unless required by applicable law or agreed to in writing, Licensor provides 173 | the Work (and each Contributor provides its Contributions) on an **"AS IS" 174 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND**, either express or 175 | implied, including, without limitation, any warranties or conditions of 176 | **TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR 177 | PURPOSE**. You are solely responsible for determining the appropriateness of 178 | using or redistributing the Work and assume any risks associated with Your 179 | exercise of permissions under this License. 180 | 181 | 8. Limitation of Liability. 182 | --------------------------- 183 | 184 | In no event and under no legal theory, whether in tort (including negligence), 185 | contract, or otherwise, unless required by applicable law (such as deliberate 186 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 187 | liable to You for damages, including any direct, indirect, special, incidental, 188 | or consequential damages of any character arising as a result of this License 189 | or out of the use or inability to use the Work (including but not limited to 190 | damages for loss of goodwill, work stoppage, computer failure or malfunction, 191 | or any and all other commercial damages or losses), even if such Contributor 192 | has been advised of the possibility of such damages. 193 | 194 | 9. Accepting Warranty or Additional Liability. 195 | ---------------------------------------------- 196 | 197 | While redistributing the Work or Derivative Works thereof, You may choose to 198 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 199 | other liability obligations and/or rights consistent with this License. 200 | However, in accepting such obligations, You may act only on Your own behalf and 201 | on Your sole responsibility, not on behalf of any other Contributor, and only 202 | if You agree to indemnify, defend, and hold each Contributor harmless for any 203 | liability incurred by, or claims asserted against, such Contributor by reason 204 | of your accepting any such warranty or additional liability. 205 | 206 | **END OF TERMS AND CONDITIONS** 207 | 208 | -------------------------------------------------------------------------------- /foo: -------------------------------------------------------------------------------- 1 | zzzzr 2 | -------------------------------------------------------------------------------- /release_notes.txt: -------------------------------------------------------------------------------- 1 | Release notes for burst 1.2.1 2 | 3 | Updated README.md to comport with readthedocs 4 | ======================================================================================================== 5 | Release notes for burst 1.2.0 6 | 7 | Added action 'jupyter' 8 | 9 | stop-server will now stop an instance that has an idle bash shell (in all cases) 10 | or jupyter notebook (if jupyter is the action) 11 | 12 | User guide and docs now available at burstable.readthedocs.io 13 | 14 | Numerous tweaks & bug fixes 15 | ======================================================================================================== 16 | Release notes for burst 1.1.1 17 | 18 | Changes in CLI: 19 | 20 | This release represents a major overhaul of the Command Line Interface (CLI). A number of options were recast as 'actions', or 21 | top-level commands that then support various options. These commands are positional arguments, meaning they are not 22 | preceded by a dash '-' or double-dash '--'. 23 | 24 | 'burst --help' has been expanded to document all the available options. 25 | 26 | The actions and their usage can be seen by typing 'burst actions'. 27 | 28 | For existing users, here is a cheat sheet to support migration to the new syntax: 29 | 30 | old new 31 | ----------------------------------------------------------------------------------------------- 32 | burst --build burst build --gpu | --no-gpu 33 | burst python3 myprog.py burst run python3 myprog.py 34 | burst --shutdown [seconds] burst stop-server | burst --stop-server seconds 35 | burst --verbosity ... burst --verbose | -v ... 36 | burst --background ... burst --background | -b ... 37 | burst --gpus [none|all] burst --gpu | --no-gpu 38 | burst --list burst list-servers 39 | burst --status burst status 40 | burst --attach burst attach 41 | 42 | similar options that are now actions: kill, sync, configure, terminate-server 43 | 44 | Note both actions and options can be abbreviated: 45 | 46 | burst --verb 1 list 47 | 48 | options can come before or after the main action, but the task to run must come last: 49 | 50 | burst --verb 1 run --stop 300 python3 myprog.py 51 | 52 | As presently implemented, all options are valid for all actions, although they may not be applicable. 53 | This behavior is not guaranteed to remain forever. 54 | ======================================================================================================== 55 | 56 | Release notes for burst 0.2.18 57 | 58 | (includes previous releases) 59 | 60 | New options: 61 | 62 | --background -b 63 | 64 | Run task in the background. Does usual rsync, docker build/run, then exits 65 | 66 | --status 67 | 68 | Show status of running task. If task is not present, task has exited 69 | 70 | --sync 71 | 72 | Synchronize remote workspace to local 73 | 74 | --attach 75 | 76 | Attach stdio, stdout, stderr to background task. ctrl-c detaches (does not kill) 77 | 78 | --kill 79 | 80 | Prompts user to kill running task 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from burst.version import version 3 | 4 | setup( 5 | name="burstable", 6 | version=version, 7 | py_modules=['config'], 8 | packages=find_packages(), 9 | python_requires='>=3.6', 10 | scripts=['bin/burst', 'bin/burst-config', 'bin/burst-monitor'], 11 | install_requires=[ 12 | "blessings >=1.7, <2", 13 | "apache-libcloud >=3.2.0, <4", 14 | "cryptography >=3.2, <4", 15 | "easydict >=1.9, <2", 16 | "PyYAML >=5.3.1, <6" 17 | ], 18 | include_package_data=True 19 | ) 20 | -------------------------------------------------------------------------------- /tests/.burstignore: -------------------------------------------------------------------------------- 1 | .* 2 | venv 3 | __pycache__ 4 | fulltest.ports 5 | fulltest.log 6 | fulltest.shut 7 | -------------------------------------------------------------------------------- /tests/.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !/Dockerfile* 3 | !/requirements.txt 4 | -------------------------------------------------------------------------------- /tests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM burstableai/burst_base:ubu2004 2 | 3 | COPY requirements.txt requirements.txt 4 | RUN pip3 install -r ./requirements.txt 5 | 6 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /tests/buildtest.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | out1 = """Build phase 1 success""" 4 | 5 | out2 = """Build phase 2 success""" 6 | 7 | os.system("rm buildtest.log") 8 | os.system("burst --verbosity 127 --build 2>&1 | tee buildtest.log") 9 | 10 | f = open("buildtest.log") 11 | s = f.read() 12 | f.close() 13 | 14 | print ("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~TEST COMPLETED~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 15 | if out1 in s and out2 in s: 16 | print ("PASSED") 17 | else: 18 | print ("FAILED") 19 | -------------------------------------------------------------------------------- /tests/fulltest.py: -------------------------------------------------------------------------------- 1 | import os, sys, argparse, time 2 | 3 | parser = argparse.ArgumentParser(description=__doc__) 4 | parser.add_argument("--storage-mount", required=True, help="as passed to burst") 5 | parser.add_argument("--testpath", required=True, help="bucket or root directory for tests") 6 | parser.add_argument("--storage-config") 7 | parser.add_argument("--compute-config") 8 | parser.add_argument("--stop-test", dest="shutdown_test", action="store_true",) 9 | parser.add_argument("--gpus") 10 | parser.add_argument("--verbose", "-v", type=int, default=1) 11 | args = parser.parse_args() 12 | 13 | out1 = "----------------------END-------------------------" 14 | 15 | out2 = "123\n456\n" 16 | 17 | out3 = "" 18 | 19 | os.system("rm fulltest.log fulltest.foo fulltest.ports fulltest.shut") 20 | 21 | os.system("python3 fulltest_ports.py &") 22 | 23 | opts = "-v {0} --storage-mount {1}".format(args.verbose, args.storage_mount) 24 | if args.storage_config: 25 | opts += " --storage-service={0}".format(args.storage_config) 26 | if args.compute_config: 27 | opts += " --compute-service={0}".format(args.compute_config) 28 | 29 | root = args.storage_mount.split(':')[1] 30 | shutopt = "--stop 10" if args.shutdown_test else "" 31 | 32 | args_gpus = "--gpus " + args.gpus if args.gpus else "" 33 | cmd = "burst run {3} {4} -p 6789:80 {0} python3 -u fulltest_command.py --testpath={1}/{2} 2>&1 | tee fulltest.log".format(opts, 34 | root, args.testpath, shutopt, args_gpus) 35 | print (cmd) 36 | sys.stdout.flush() 37 | os.system(cmd) 38 | 39 | if args.shutdown_test: 40 | print ("Waiting for VM to stop") 41 | sys.stdout.flush() 42 | time.sleep(16) 43 | 44 | os.system("burst list > fulltest.shut") 45 | 46 | f = open("fulltest.log") 47 | s = f.read() 48 | f.close() 49 | 50 | foo = None 51 | if os.path.exists("fulltest.foo"): 52 | f = open("fulltest.foo") 53 | foo = f.read() 54 | f.close() 55 | 56 | ports = "" 57 | if os.path.exists("fulltest.ports"): 58 | f = open("fulltest.ports") 59 | ports = f.read() 60 | f.close() 61 | 62 | shut = "" 63 | if os.path.exists("fulltest.shut"): 64 | f = open("fulltest.shut") 65 | shut = f.read() 66 | f.close() 67 | 68 | print ("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~TEST COMPLETED~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 69 | 70 | if out1 in s: 71 | print ("PASSED main test") 72 | else: 73 | print ("FAILED main test") 74 | 75 | if out2 == foo: 76 | print ("PASSED storage test") 77 | else: 78 | print ("FAILED storage test") 79 | 80 | if ports.find(out3)==0: 81 | print ("PASSED tunnel test") 82 | else: 83 | print ("FAILED tunnel test") 84 | 85 | if args.shutdown_test: 86 | if shut.find("running")==-1: 87 | print ("PASSED stop test") 88 | else: 89 | print ("FAILED stop test") 90 | 91 | sys.stdout.flush() 92 | -------------------------------------------------------------------------------- /tests/fulltest_command.py: -------------------------------------------------------------------------------- 1 | import os, sys, time, argparse 2 | 3 | parser = argparse.ArgumentParser(description=__doc__) 4 | parser.add_argument("--testpath", required=True) 5 | args = parser.parse_args() 6 | 7 | def do(cmd): 8 | print (cmd) 9 | os.system(cmd) 10 | 11 | print ("fulltest_command:") 12 | 13 | do("service nginx start") 14 | do("rm fulltest.foo") 15 | do("echo 123 > {0}/foo".format(args.testpath)) 16 | do("echo 456 >> {0}/foo".format(args.testpath)) 17 | do("cp {0}/foo fulltest.foo".format(args.testpath)) 18 | do("rm {0}/foo".format(args.testpath)) 19 | 20 | sys.stdout.flush() 21 | time.sleep(30) 22 | -------------------------------------------------------------------------------- /tests/fulltest_ports.py: -------------------------------------------------------------------------------- 1 | import os, sys, time 2 | 3 | print ("fulltest_ports") 4 | sys.stdout.flush() 5 | 6 | for i in range(20): 7 | os.system("curl -s localhost:6789 > fulltest.ports") 8 | if os.path.getsize("fulltest.ports") > 0: 9 | break 10 | time.sleep(5) 11 | 12 | if os.path.getsize("fulltest.ports") == 0: 13 | print("fulltest_ports: failed") 14 | else: 15 | print("fulltest_ports: received data") 16 | 17 | sys.stdout.flush() 18 | -------------------------------------------------------------------------------- /tests/hello_burst.py: -------------------------------------------------------------------------------- 1 | import multiprocessing, GPUtil 2 | 3 | 4 | print ("Welcome burstables! You're running a virtual machine with %i cpus" % (multiprocessing.cpu_count())) 5 | 6 | try: 7 | gpus = GPUtil.getGPUs() 8 | if len(gpus): 9 | print ("The following GPU's are available:") 10 | for gpu in gpus: 11 | print (gpu.name) 12 | else: 13 | print ("GPU drivers are installed but no GPU's are available") 14 | except: 15 | print ("No GPU drivers available") 16 | -------------------------------------------------------------------------------- /tests/long_process.py: -------------------------------------------------------------------------------- 1 | import sys, time, os 2 | t0 = time.time() 3 | print ("HOME2\nPublic ip:", end=' ') 4 | sys.stdout.flush() 5 | os.system("curl https://ipv4.wtfismyip.com/text") 6 | print ("This is a long-running process (of sorts)") 7 | print (os.path.abspath('.')) 8 | f = open("./data/test.log", 'w') 9 | for i in range(int(sys.argv[1])): 10 | t = time.time()-t0 11 | print (i, "TIME:", t) 12 | sys.stdout.flush() 13 | print (i, "TIME:", t, file=f) 14 | f.flush() 15 | time.sleep(5) 16 | f.close() 17 | 18 | print ("OK, done with that. I should be going to sleep now...") 19 | sys.stdout.flush() 20 | -------------------------------------------------------------------------------- /tests/quicktest.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | out1 = """Welcome burstables! You're running a virtual machine with""" 4 | 5 | out2 = "----------------------END-------------------------" 6 | 7 | os.system("rm quicktest.log") 8 | os.system("burst run --verbose 127 python3 hello_burst.py 2>&1 | tee quicktest.log") 9 | 10 | f = open("quicktest.log") 11 | s = f.read() 12 | f.close() 13 | 14 | print ("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~TEST COMPLETED~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 15 | if out1 in s and out2 in s: 16 | print ("PASSED") 17 | else: 18 | print ("FAILED") 19 | 20 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | # add pypi dependencies here. Be sure to pin the version number, like: 2 | # six==1.15.0 -------------------------------------------------------------------------------- /upload_pip.sh: -------------------------------------------------------------------------------- 1 | rm -fr build dist 2 | python setup.py sdist bdist_wheel 3 | python3 -m twine upload --verbose dist/* 4 | --------------------------------------------------------------------------------