├── tests ├── __init__.py └── test_sdk.py ├── setup.cfg ├── assets ├── demo.gif └── fastmap_logo.png ├── .gitignore ├── requirements.txt ├── requirements_dev.txt ├── Makefile ├── test_sdk.sh ├── CHANGELOG.txt ├── LICENSE ├── .secrets.baseline ├── setup.py ├── fastmap ├── __init__.py └── sdk_lib.py ├── README.md └── scripts ├── fastmapadmin └── fastmap /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmap-io/fastmap/HEAD/assets/demo.gif -------------------------------------------------------------------------------- /assets/fastmap_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmap-io/fastmap/HEAD/assets/fastmap_logo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__ 3 | scratch 4 | htmlcov 5 | .coverage 6 | .coverage.* 7 | *.egg-info 8 | *.eggs 9 | *.pyc 10 | build/ 11 | dist/ 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Client SDK requirements 2 | dill>=0.3.4,<0.4 3 | msgpack>=1.0.0,<1.1.0 4 | requests>=2.24,<2.25 # purposefully downgraded. Test carefully 5 | tabulate>=0.8.7,<0.9.0 6 | urllib3<1.26 7 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Client SDK dev requiremnets 2 | -r requirements.txt 3 | pytest>=6.0,<6.1 4 | requests-mock>=1.8.0,<2.0.0 5 | coverage>4.5,<4.6 # downgraded on purpose for --concurrency flag 6 | detect-secrets>=0.14,<0.15 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | rm -rf dist 3 | python3 setup.py sdist 4 | 5 | test: 6 | pytest --cov-report=html --cov=fastmap -x 7 | 8 | clean: 9 | rm -rf build 10 | rm -rf dist 11 | rm -f out.* 12 | rm -rf *.egg-info 13 | rm -rf htmlcov 14 | 15 | 16 | -------------------------------------------------------------------------------- /test_sdk.sh: -------------------------------------------------------------------------------- 1 | detect-secrets scan --no-keyword-scan --no-basic-auth-scan > .secrets.baseline 2 | coverage run --concurrency=multiprocessing -m pytest ./tests/test_sdk.py -vx || exit 1 3 | coverage combine 4 | coverage report --include=fastmap/sdk_lib.py,fastmap/__init__.py 5 | coverage html --include=fastmap/sdk_lib.py,fastmap/__init__.py 6 | -------------------------------------------------------------------------------- /CHANGELOG.txt: -------------------------------------------------------------------------------- 1 | # Fastmap CHANGELOG 2 | 3 | ## [0.0.12] - 2021-07-23 4 | Added login option for fastmap to make credentials easier to manage. Bugfixes. 5 | 6 | ## [0.0.11] - 2021-07-22 7 | Lots of cleanup and bug fixes. Add fastmapadmin script 8 | 9 | ## [0.0.10] - 2021-07-12 10 | Several tweaks/fixes over the past few versions for test users 11 | 12 | ## [0.0.7] - 2021-07-01 13 | Major paradigm change. Fastmap is now oriented around a controller/worker approach. 14 | Map is still in dev. Unit tests have no chance. But this should work for 15 | some basic offload usage. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 fastmap.io 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /.secrets.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "custom_plugin_paths": [], 3 | "exclude": { 4 | "files": null, 5 | "lines": null 6 | }, 7 | "generated_at": "2021-03-13T04:28:12Z", 8 | "plugins_used": [ 9 | { 10 | "name": "AWSKeyDetector" 11 | }, 12 | { 13 | "name": "ArtifactoryDetector" 14 | }, 15 | { 16 | "base64_limit": 4.5, 17 | "name": "Base64HighEntropyString" 18 | }, 19 | { 20 | "name": "CloudantDetector" 21 | }, 22 | { 23 | "hex_limit": 3, 24 | "name": "HexHighEntropyString" 25 | }, 26 | { 27 | "name": "IbmCloudIamDetector" 28 | }, 29 | { 30 | "name": "IbmCosHmacDetector" 31 | }, 32 | { 33 | "name": "JwtTokenDetector" 34 | }, 35 | { 36 | "name": "MailchimpDetector" 37 | }, 38 | { 39 | "name": "PrivateKeyDetector" 40 | }, 41 | { 42 | "name": "SlackDetector" 43 | }, 44 | { 45 | "name": "SoftlayerDetector" 46 | }, 47 | { 48 | "name": "StripeDetector" 49 | }, 50 | { 51 | "name": "TwilioKeyDetector" 52 | } 53 | ], 54 | "results": {}, 55 | "version": "0.14.3", 56 | "word_list": { 57 | "file": null, 58 | "hash": null 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import setuptools 4 | import sys 5 | 6 | if sys.version_info[:2] < (3, 6): 7 | print("ERROR: this package requires Python 3.7 or later!") 8 | sys.exit(1) 9 | # if sys.version_info[:2] >= (3, 9): 10 | # # This is because of a pickling issue. Maybe dill needs a PR? 11 | # print("ERROR: this package cannot run on Python 3.9 or later!") 12 | # sys.exit(1) 13 | 14 | 15 | with open("README.md", "r") as fh: 16 | long_description = fh.read() 17 | 18 | with open(os.path.join("fastmap", "sdk_lib.py")) as f: 19 | version = re.search(r"^CLIENT_VERSION \= \"([0-9.]+)\"", f.read(), 20 | re.MULTILINE).group(1) 21 | 22 | url_base = "https://github.com/fastmap-io/fastmap" 23 | download_url = '%s/archive/fastmap-%s.tar.gz' % (url_base, version) 24 | 25 | setuptools.setup( 26 | name="fastmap", 27 | version=version, 28 | author="fastmap.io team", 29 | author_email="scott@fastmap.io", 30 | description="Fastmap offloads arbitrary Python code " 31 | "via the open source fastmap cloud service.", 32 | long_description=long_description, 33 | long_description_content_type="text/markdown", 34 | url=url_base, 35 | download_url=download_url, 36 | packages=setuptools.find_packages(), 37 | scripts=[ 38 | "scripts/fastmap", 39 | "scripts/fastmapadmin", 40 | ], 41 | classifiers=[ 42 | "Programming Language :: Python :: 3", 43 | "License :: OSI Approved :: MIT License", 44 | "Operating System :: OS Independent", 45 | ], 46 | python_requires='>=3.7', 47 | install_requires=[ 48 | "dill>=0.3.2,<0.4", 49 | "msgpack>=1.0.0,<1.1.0", 50 | "requests>=2.24,<3.0", 51 | "tabulate>=0.8.7,<0.9.0", 52 | ], 53 | ) 54 | -------------------------------------------------------------------------------- /fastmap/__init__.py: -------------------------------------------------------------------------------- 1 | from .sdk_lib import (FastmapConfig, set_docstring, ExecPolicy, Verbosity, MachineType, 2 | FastmapException, FastmapTask, TaskState, TaskOutcome, 3 | CLIENT_VERSION, INIT_DOCSTRING, GLOBAL_INIT_DOCSTRING, 4 | OFFLOAD_DOCSTRING, GET_TASK_DOCSTRING, 5 | POLL_ALL_DOCSTRING, CLEAR_ALL_DOCSTRING) 6 | 7 | # Clases 8 | FastmapConfig = FastmapConfig 9 | FastmapTask = FastmapTask 10 | 11 | # Namespaces 12 | ExecPolicy = ExecPolicy 13 | Verbosity = Verbosity 14 | MachineType = MachineType 15 | TaskState = TaskState 16 | TaskOutcome = TaskOutcome 17 | 18 | # Exceptions 19 | FastmapException = FastmapException 20 | 21 | 22 | __version__ = CLIENT_VERSION 23 | _global_config = None 24 | 25 | 26 | @set_docstring(GLOBAL_INIT_DOCSTRING) 27 | def global_init(*args, **kwargs): 28 | global _global_config 29 | _global_config = init(*args, **kwargs) 30 | 31 | 32 | @set_docstring(INIT_DOCSTRING) 33 | def init(*args, **kwargs): 34 | return FastmapConfig.create(*args, **kwargs) 35 | 36 | 37 | def _get_config(): 38 | if not _global_config: 39 | raise FastmapException("Fastmap not initialized globally.") 40 | return _global_config 41 | 42 | 43 | # @set_docstring(MAP_DOCSTRING) 44 | # def map(func, iterable, *args, **kwargs): 45 | # return _get_config().map(func, iterable, *args, **kwargs) 46 | 47 | 48 | @set_docstring(OFFLOAD_DOCSTRING) 49 | def offload(func, *args, **kwargs): 50 | return _get_config().offload(func, *args, **kwargs) 51 | 52 | 53 | @set_docstring(POLL_ALL_DOCSTRING) 54 | def poll_all(): 55 | return _get_config().poll_all() 56 | 57 | 58 | @set_docstring(CLEAR_ALL_DOCSTRING) 59 | def clear_all(): 60 | return _get_config().clear_all() 61 | 62 | 63 | @set_docstring(GET_TASK_DOCSTRING) 64 | def get_task(task_id): 65 | return _get_config().get_task(task_id) 66 | 67 | # @set_docstring(POLL_DOCSTRING) 68 | # def poll(task_id): 69 | # return _get_config().poll(task_id) 70 | 71 | 72 | # @set_docstring(KILL_DOCSTRING) 73 | # def kill(task_id): 74 | # return _get_config().kill(task_id) 75 | 76 | 77 | # @set_docstring(RETURN_VALUE_DOCSTRING) 78 | # def return_value(task_id): 79 | # return _get_config().return_value(task_id) 80 | 81 | 82 | # @set_docstring(TRACEBACK_DOCSTRING) 83 | # def traceback(task_id): 84 | # return _get_config().traceback(task_id) 85 | 86 | 87 | # @set_docstring(WAIT_DOCSTRING) 88 | # def wait(task_id, *args, **kwargs): 89 | # return _get_config().wait(task_id, *args, **kwargs) 90 | 91 | 92 | # @set_docstring(ALL_LOGS_DOCSTRING) 93 | # def all_logs(task_id, *args, **kwargs): 94 | # return _get_config().all_logs(task_id, *args, **kwargs) 95 | 96 | 97 | # @set_docstring(CLEAR_DOCSTRING) 98 | # def clear(task_id): 99 | # return _get_config().clear(task_id) 100 | 101 | 102 | # @set_docstring(RETRY_DOCSTRING) 103 | # def retry(task_id): 104 | # return _get_config().retry(task_id) 105 | 106 | 107 | def _reset_global_config(): 108 | """ For unit tests. Do not use """ 109 | global _global_config 110 | _global_config = None 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > Note: Fastmap is currently in beta. 2 | 3 | ![Version 0.0.12](https://img.shields.io/badge/version-0.0.12-red) 4 | 5 |

6 | fastmap logo 7 |

8 | 9 | Fastmap offloads and parallelizes arbitrary Python functions on the cloud. 10 | 11 | - **🌤 Offload anything** Fastmap is the easiest way to offload Python code. Use fastmap.offload wherever you would might have needed an EC2. 12 | - **🔗 Parallelize in one line** With Fastmap, you can parallelize any Python function across as many workers as it takes. Use fastmap.map wherever you might have needed a Lambda. 13 | - **🚀 Deploy in minutes** Fastmap comes with an open-source cloud service. With a Google Cloud Platform account, you can setup and deploy fastmap with one command. 14 | - ** 🛀 Built for productivity** Fastmap is designed to eliminate infrastructure and speed up your work. Spend your time writing code - not fighting infrastructure. 15 | 16 | Fastmap is in active development. For more information, see https://fastmap.io. 17 | 18 | 19 | 101 | -------------------------------------------------------------------------------- /scripts/fastmapadmin: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import datetime 5 | 6 | import tabulate 7 | import fastmap 8 | 9 | USAGE = """Fastmap CLI for service administration.""" 10 | 11 | DESCRIPTION = """ 12 | Fastmap CLI for service administration. 13 | \n\n 14 | Examples: 15 | $ fastmap accounts 16 | 17 | """ 18 | 19 | EPILOG = """Run `fastmap --help` for help on individual operations.""" 20 | 21 | 22 | def _relative_time(seconds): 23 | if not seconds: 24 | return 'never' 25 | seconds = datetime.datetime.now(datetime.timezone.utc).timestamp() - seconds 26 | if seconds > 60 * 60 * 24 * 2: 27 | return '%d days ago' % (seconds // (60 * 60 * 24)) 28 | elif seconds > 60 * 60 * 2: 29 | return '%d hours ago' % (seconds // (60 * 60)) 30 | elif seconds > 120: 31 | return '%d minutes ago' % (seconds // 60) 32 | elif seconds > 2: 33 | return '%d seconds ago' % seconds 34 | else: 35 | return 'just now' 36 | 37 | 38 | def _prettify(items): 39 | for item in items: 40 | item['created'] = _relative_time(item['created']) 41 | item['updated'] = _relative_time(item['updated']) 42 | 43 | if item.get('idle'): 44 | item['idle'] = _relative_time(item['idle']) 45 | 46 | if item.get('heartbeat_ts'): 47 | item['heartbeat_ts'] = _relative_time(item['heartbeat_ts']) 48 | # task['state'] = task['task_state'] 49 | # task['id'] = task['task_id'] 50 | 51 | # del task['last_heartbeat'] 52 | # del task['task_state'] 53 | # del task['task_id'] 54 | 55 | # task['progress'] = task['progress'] and "%.1f%%" % task['progress'] 56 | 57 | 58 | def list_accounts(config): 59 | resp = fastmap.sdk_lib.post_request( 60 | url=config.cloud_url + '/admin/v1/list_accounts', 61 | data={}, 62 | secret=config.secret, 63 | log=fastmap.sdk_lib.FastmapLogger('QUIET')) 64 | 65 | accounts = resp.obj['accounts'] 66 | accounts.sort(key=lambda x: x['created'], reverse=True) 67 | _prettify(accounts) 68 | print(tabulate.tabulate(accounts, headers='keys')) 69 | 70 | 71 | def list_workers(config): 72 | resp = fastmap.sdk_lib.post_request( 73 | url=config.cloud_url + '/admin/v1/list_workers', 74 | data={}, 75 | secret=config.secret, 76 | log=fastmap.sdk_lib.FastmapLogger('QUIET')) 77 | 78 | workers = resp.obj['workers'] 79 | workers.sort(key=lambda x: x['created'], reverse=True) 80 | import pprint; pprint.pprint(workers) 81 | _prettify(workers) 82 | print(tabulate.tabulate(workers, headers='keys')) 83 | 84 | 85 | def add_account(config, email, password): 86 | resp = fastmap.sdk_lib.post_request( 87 | url=config.cloud_url + '/admin/v1/add_account', 88 | data={'email': email, 'password': password}, 89 | secret=config.secret, 90 | log=fastmap.sdk_lib.FastmapLogger('QUIET')) 91 | 92 | account_id = resp.obj['account_id'] 93 | secret_token = resp.obj['secret_token'] 94 | print("New account is %s token=%s" % (account_id, secret_token)) 95 | 96 | 97 | def add_credits(config, account_id, amount): 98 | # TODO check account_id 99 | resp = fastmap.sdk_lib.post_request( 100 | url=config.cloud_url + '/admin/v1/add_credit', 101 | data={'account_id': account_id, 'amount': amount}, 102 | secret=config.secret, 103 | log=fastmap.sdk_lib.FastmapLogger('QUIET')) 104 | 105 | print("Added credits. New balance = %.2f" % resp.obj['balance']) 106 | 107 | 108 | if __name__ == "__main__": 109 | parser = argparse.ArgumentParser( 110 | # usage=DESCRIPTION, 111 | description=DESCRIPTION, 112 | epilog=EPILOG, 113 | formatter_class=argparse.RawDescriptionHelpFormatter) 114 | 115 | parser.add_argument( 116 | "--config", 117 | help="Location of configuration file generated by depoly_gcp.py. " 118 | "If omitted, will attempt to use the default configuration. ") 119 | 120 | subparsers = parser.add_subparsers( 121 | dest='operation', required=True, 122 | help='sub-command help') 123 | 124 | list_accounts_p = subparsers.add_parser( 125 | 'list_accounts', help="Get account info") 126 | 127 | list_workers_p = subparsers.add_parser( 128 | 'list_workers', help="Get worker info") 129 | 130 | add_credits_p = subparsers.add_parser( 131 | 'add_credits', help="Add credits to users") 132 | add_credits_p.add_argument( 133 | "account_id", 134 | help="Account ID") 135 | add_credits_p.add_argument( 136 | "amount", 137 | help="Amount") 138 | 139 | add_account_p = subparsers.add_parser( 140 | 'add_account', help="Add new account") 141 | add_account_p.add_argument( 142 | "email", 143 | help="Email") 144 | add_account_p.add_argument( 145 | "password", 146 | help="Password") 147 | 148 | args = parser.parse_args() 149 | 150 | config = fastmap.init(config=args.config) 151 | 152 | if config.exec_policy == fastmap.ExecPolicy.LOCAL: 153 | raise AssertionError("The fastmap CLI does not support a LOCAL exec_policy. " 154 | "Check your configuration file.") 155 | 156 | if args.operation == 'list_accounts': 157 | list_accounts(config) 158 | if args.operation == 'list_workers': 159 | list_workers(config) 160 | if args.operation == 'add_credits': 161 | add_credits(config, args.account_id, args.amount) 162 | if args.operation == 'add_account': 163 | add_account(config, args.email, args.password) 164 | -------------------------------------------------------------------------------- /scripts/fastmap: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import getpass 5 | import json 6 | import os 7 | import time 8 | 9 | import fastmap 10 | import requests 11 | import tabulate 12 | 13 | USAGE = """Fastmap CLI. 14 | This CLI only supports the offload workflow and doesn't have any mapping functionality. 15 | Run `fastmap --help` for more details. 16 | """ 17 | 18 | DESCRIPTION = """ 19 | Fastmap CLI. 20 | \n\n 21 | Examples: 22 | $ fastmap poll 23 | $ fastmap poll 24 | $ fastmap logs 25 | $ fastmap traceback 26 | $ fastmap return_value 27 | $ fastmap kill 28 | $ fastmap clear 29 | $ fastmap clear --force 30 | 31 | """ 32 | 33 | EPILOG = """Run `fastmap --help` for help on individual operations.""" 34 | 35 | DEFAULT_CONFIG_DIR = fastmap.sdk_lib.DEFAULT_CONFIG_DIR 36 | 37 | def _relative_time(seconds): 38 | if not seconds: 39 | return 'never' 40 | if seconds > 60 * 60 * 24 * 2: 41 | return '%d days ago' % (seconds // (60 * 60 * 24)) 42 | elif seconds > 60 * 60 * 2: 43 | return '%d hours ago' % (seconds // (60 * 60)) 44 | elif seconds > 120: 45 | return '%d minutes ago' % (seconds // 60) 46 | elif seconds > 2: 47 | return '%d seconds ago' % seconds 48 | else: 49 | return 'just now' 50 | 51 | 52 | def _prettify_tasks(tasks): 53 | for task in tasks: 54 | task['type'] = task['type'].lower().capitalize() 55 | task['start_time'] = task['start_time'].strftime("%Y-%m-%d %H:%M:%S") 56 | task['runtime'] = task['runtime'] and ("%.1fs" % task['runtime']) 57 | task['last_heartbeat'] = _relative_time(task['last_heartbeat']) 58 | # task['state'] = task['task_state'] 59 | # task['id'] = task['task_id'] 60 | 61 | # del task['last_heartbeat'] 62 | # del task['task_state'] 63 | # del task['task_id'] 64 | 65 | 66 | # task['progress'] = task['progress'] and "%.1f%%" % task['progress'] 67 | 68 | 69 | # def offload(config, path, function_name, label): 70 | # if not os.path.exists(path): 71 | # raise AssertionError("Path %r does not exist." % path) 72 | # mod_path = path.replace('/', '.') 73 | # if mod_path.endswith('.py'): 74 | # mod_path = mod_path[:-3] 75 | # sys.path.append(os.getcwd()) 76 | # try: 77 | # mod = importlib.import_module(mod_path) 78 | # except ImportError: 79 | # raise AssertionError("Could not import module %r" % mod_path) from None 80 | 81 | # # TODO this doesn't actually work... 82 | # try: 83 | # func = getattr(mod, function_name) 84 | # except AttributeError: 85 | # raise AssertionError("Could not import function %r from module %r" % 86 | # (func, mod_path)) from None 87 | # fastmap_task = config.offload(func, label=label) 88 | # config.log.info("Started new task: %s" % fastmap_task.task_id) 89 | 90 | def login(cloud_url): 91 | print("Login for %r" % cloud_url) 92 | email = input("Email: ") 93 | password = getpass.getpass("Password: ") 94 | resp = requests.post(cloud_url + '/actions/v1/login', 95 | data=json.dumps({'email': email, 'password': password})) 96 | if resp.status_code == 401: 97 | raise AssertionError("Could not login with those credentials") 98 | if resp.status_code != 200: 99 | raise AssertionError("Could not login unexpectedly %r" % resp.status_code) 100 | resp_dict = json.loads(resp.content) 101 | config = dict(fastmap.sdk_lib.DEFAULT_INLINE_CONFIG) 102 | config['secret'] = resp_dict['session_token'] 103 | config['cloud_url'] = cloud_url 104 | config_dir = os.path.dirname(DEFAULT_CONFIG_DIR) 105 | if not os.path.exists(config_dir): 106 | os.mkdir(config_dir) 107 | with open(DEFAULT_CONFIG_DIR, 'w') as f: 108 | f.write(json.dumps(config, indent=4)) 109 | print("Login successful. Configuration saved to %s." % DEFAULT_CONFIG_DIR) 110 | 111 | 112 | def poll(config, task_id): 113 | if task_id: 114 | tasks = [config.get_task(task_id).poll()] 115 | else: 116 | tasks = config.poll_all() 117 | 118 | tasks.sort(key=lambda x: x['start_time'], reverse=True) 119 | _prettify_tasks(tasks) 120 | 121 | config.log.info("Found %d task(s)" % len(tasks)) 122 | print(tabulate.tabulate(tasks, headers='keys')) 123 | 124 | 125 | def return_value(config, task_id): 126 | print(config.get_task(task_id).return_value()) 127 | 128 | 129 | def traceback(config, task_id): 130 | tb = config.get_task(task_id).traceback() 131 | config.log.info("Traceback for %s:" % task_id) 132 | print(tb) 133 | 134 | 135 | def kill(config, task_id, force=False): 136 | if task_id: 137 | config.get_task(task_id).kill() 138 | return 139 | config.log.info("Polling for tasks to kill...") 140 | tasks = config.poll_all() 141 | tasks_to_kill = [t for t in tasks if t['task_state'] in ("PENDING", "PROCESSING")] 142 | if not tasks_to_kill: 143 | config.log.info("Could not find any tasks to kill") 144 | return 145 | if not force: 146 | if config.log.input("Kill %d tasks? (y/n) " % len(tasks_to_kill)).lower() != 'y': 147 | config.log.info("Not killing.") 148 | return 149 | for task in tasks_to_kill: 150 | config.get_task(task_id).kill() 151 | config.log.info("Killed %d tasks" % len(tasks_to_kill)) 152 | 153 | 154 | def logs(config, task_id, live): 155 | task = config.get_task(task_id) 156 | original_logs = task.all_logs() 157 | config.log.info("Logs for %s:" % task_id) 158 | print(original_logs) 159 | if live: 160 | while True: 161 | if task._outcome: 162 | return 163 | new_logs = task.new_logs() 164 | if new_logs: 165 | print(new_logs) 166 | time.sleep(5) 167 | 168 | def retry(config, task_id): 169 | new_task = config.get_task(task_id).retry() 170 | config.log.info("Retry in process %r" % new_task) 171 | 172 | 173 | def clear(config, task_id, force=False): 174 | if task_id: 175 | cleared_tasks = [config.get_task(task_id).clear()] 176 | return 177 | 178 | config.log.info("Polling for tasks to clear...") 179 | tasks = config.poll_all() 180 | tasks_to_clear = [t for t in tasks if t['task_state'] == "DONE"] 181 | 182 | if not tasks_to_clear: 183 | config.log.info("Could not find any tasks to clear") 184 | return 185 | 186 | if not force: 187 | if config.log.input("Clear all 'DONE' tasks? There are currently %d. (y/n) " % len(tasks_to_clear)).lower() != 'y': 188 | config.log.info("Not clearing.") 189 | return 190 | cleared_tasks = config.clear_all() 191 | _prettify_tasks(cleared_tasks) 192 | print(tabulate.tabulate(cleared_tasks, headers='keys')) 193 | 194 | 195 | if __name__ == "__main__": 196 | parser = argparse.ArgumentParser( 197 | # usage=DESCRIPTION, 198 | description=DESCRIPTION, 199 | epilog=EPILOG, 200 | formatter_class=argparse.RawDescriptionHelpFormatter) 201 | 202 | parser.add_argument( 203 | "--config", 204 | help="Location of configuration file generated by depoly_gcp.py. " 205 | "If omitted, will attempt to use the default configuration. ") 206 | parser.add_argument( 207 | '--cloud-url', 208 | help="Which fastmap service to use. Default: Entry in the config or https://app.fastmap.io") 209 | parser.add_argument( 210 | "--verbosity", 211 | choices=("SILENT", "QUIET", "NORMAL", "LOUD"), 212 | help="How loud fastmap is. Default is NORMAL.", 213 | default="NORMAL") 214 | 215 | subparsers = parser.add_subparsers( 216 | dest='operation', required=True, 217 | help='sub-command help') 218 | 219 | # offload_p = subparsers.add_parser( 220 | # 'offload', 221 | # help="Offload a function in a python file.") 222 | # offload_p.add_argument( 223 | # "path", 224 | # help="The python file. E.g. path/script.py") 225 | # offload_p.add_argument( 226 | # "function", 227 | # help="The name of the function in the file. E.g. main_function") 228 | # offload_p.add_argument( 229 | # "label", nargs='?', 230 | # help="Optional label for your use") 231 | 232 | login_p = subparsers.add_parser( 233 | 'login', help="Login to a fastmap service") 234 | 235 | poll_p = subparsers.add_parser( 236 | 'poll', help="Get the metadata of one or all tasks") 237 | poll_p.add_argument( 238 | "task_id", nargs='?', 239 | help="Which task to return specifically. If omitted, return all non-CLEARED tasks") 240 | 241 | logs_p = subparsers.add_parser( 242 | 'logs', 243 | help="Get logs of a task. Task can be in any state except CLEARED. ") 244 | logs_p.add_argument( 245 | "task_id", 246 | help="Task ID of task to get logs for.") 247 | logs_p.add_argument( 248 | "--live", action="store_true", 249 | help="Get live logs until the task is DONE.") 250 | 251 | return_value_p = subparsers.add_parser( 252 | 'return_value', 253 | help="Get the return_value of a task in a DONE state.") 254 | return_value_p.add_argument( 255 | "task_id", 256 | help="Task ID") 257 | 258 | traceback_p = subparsers.add_parser( 259 | 'traceback', 260 | help="Get the traceback of a task in a DONE state with an ERROR outcome.") 261 | traceback_p.add_argument( 262 | "task_id", 263 | help="Task ID") 264 | 265 | kill_p = subparsers.add_parser( 266 | 'kill', 267 | help="Kill a running task") 268 | kill_p.add_argument( 269 | "task_id", nargs='?', 270 | help="If omitted, kill all tasks") 271 | kill_p.add_argument( 272 | '--force', action='store_true', 273 | help='When task_id is omitted, kill all tasks without confirmation') 274 | 275 | retry_p = subparsers.add_parser( 276 | 'retry', 277 | help='Retry a task in a DONE state') 278 | retry_p.add_argument( 279 | "task_id", 280 | help="Task to retry") 281 | 282 | clear_p = subparsers.add_parser( 283 | 'clear', 284 | help="Clear a completed task") 285 | clear_p.add_argument( 286 | "task_id", nargs='?', 287 | help="If omitted, clear all tasks") 288 | clear_p.add_argument( 289 | '--force', action='store_true', 290 | help='When task_id is omitted, clear all tasks without confirmation') 291 | 292 | args = parser.parse_args() 293 | 294 | if args.config: 295 | try: 296 | with open(args.config) as f: 297 | config_json = json.loads(f.read()) 298 | except: 299 | raise AssertionError("Error loading configuration %r" % args.config) 300 | else: 301 | try: 302 | with open(DEFAULT_CONFIG_DIR) as f: 303 | config_json = json.loads(f.read()) 304 | except: 305 | config_json = {} 306 | 307 | if not config_json and not args.cloud_url: 308 | args.cloud_url = "https://app.fastmap.io" 309 | 310 | config_json['exec_policy'] = "CLOUD" 311 | if args.verbosity: 312 | config_json['verbosity'] = args.verbosity 313 | if args.cloud_url: 314 | config_json['cloud_url'] = args.cloud_url 315 | 316 | config = fastmap.init( 317 | config=config_json) 318 | 319 | # if config.exec_policy == fastmap.ExecPolicy.LOCAL: 320 | # raise AssertionError("The fastmap CLI does not support a LOCAL exec_policy. " 321 | # "Check your configuration file.") 322 | 323 | if args.operation == 'login': 324 | login(config_json['cloud_url']) 325 | exit(0) 326 | # if args.operation == 'offload': 327 | # offload(config, args.path, args.function, args.label) 328 | if args.operation == 'poll': 329 | poll(config, args.task_id) 330 | if args.operation == 'return_value': 331 | return_value(config, args.task_id) 332 | if args.operation == 'traceback': 333 | traceback(config, args.task_id) 334 | if args.operation == 'kill': 335 | kill(config, args.task_id, args.force) 336 | if args.operation == 'retry': 337 | retry(config, args.task_id) 338 | if args.operation == 'logs': 339 | logs(config, args.task_id, args.live) 340 | if args.operation == 'clear': 341 | clear(config, args.task_id, args.force) 342 | -------------------------------------------------------------------------------- /tests/test_sdk.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import gzip 3 | import io 4 | import math 5 | import pickle 6 | import random 7 | import re 8 | import os 9 | import sys 10 | import time 11 | import types 12 | 13 | import dill 14 | import msgpack 15 | import pytest 16 | import requests_mock 17 | 18 | sys.path.append(os.getcwd().split('/tests')[0]) 19 | 20 | from fastmap import (init, global_init, fastmap, _reset_global_config, 21 | FastmapException, sdk_lib, ReturnType, 22 | Verbosity, ExecPolicy) 23 | 24 | TEST_SECRET = "abcd" * (64 // 4) 25 | 26 | 27 | def flatten(lst): 28 | # https://stackoverflow.com/questions/952914 29 | return [el for sublst in lst for el in sublst] 30 | 31 | 32 | def primeFactors(n): 33 | # adapted from https://www.geeksforgeeks.org/print-all-prime-factors-of-a-given-number/ 34 | if n == 0: 35 | return [] 36 | ret = [] 37 | # Print the number of two's that divide n 38 | while n % 2 == 0: 39 | ret.append(2) 40 | n = n / 2 41 | 42 | # n must be odd at this point 43 | # so a skip of 2 ( i = i + 2) can be used 44 | for i in range(3, int(math.sqrt(n)) + 1, 2): 45 | # while i divides n , print i ad divide n 46 | while n % i == 0: 47 | ret.append(i) 48 | n = n / i 49 | 50 | # Condition if n is a prime 51 | # number greater than 2 52 | if n > 2: 53 | ret.append(2) 54 | return ret 55 | 56 | 57 | def calc_pi_basic(seed, two=2.0): 58 | random.seed(seed) 59 | x = random.random() * two - 1.0 60 | y = random.random() * two - 1.0 61 | return 1 if x**2 + y**2 <= 1.0 else 0 62 | 63 | 64 | def calc_pi_dead_99(seed): 65 | assert seed != 99 66 | random.seed(seed) 67 | x = random.random() * 2.0 - 1.0 68 | y = random.random() * 2.0 - 1.0 69 | return 1 if x**2 + y**2 <= 1.0 else 0 70 | 71 | 72 | def fake_input_yes(self, msg): 73 | print(msg) 74 | return 'y' 75 | 76 | 77 | def fake_input_no(self, msg): 78 | print(msg) 79 | return 'n' 80 | 81 | 82 | def test_local_basic(): 83 | config = init(exec_policy="LOCAL") 84 | assert isinstance(config, sdk_lib.FastmapConfig) 85 | range_100 = range(100) 86 | 87 | gen = config.fastmap(calc_pi_basic, range_100) 88 | assert isinstance(gen, types.GeneratorType) 89 | pi = 4.0 * sum(gen) / len(range_100) 90 | assert pi == 3.12 91 | 92 | gen = config.fastmap(calc_pi_basic, list(range_100)) 93 | assert isinstance(gen, types.GeneratorType) 94 | pi = 4.0 * sum(gen) / len(range_100) 95 | assert pi == 3.12 96 | 97 | gen = config.fastmap(calc_pi_basic, iter(range_100)) 98 | assert isinstance(gen, types.GeneratorType) 99 | pi = 4.0 * sum(gen) / len(range_100) 100 | assert pi == 3.12 101 | 102 | gen = config.fastmap(calc_pi_basic, set(range_100)) 103 | pi = 4.0 * sum(gen) / len(range_100) 104 | assert pi == 3.12 105 | 106 | 107 | def test_return_type_seq(): 108 | assert ReturnType.ELEMENTS == "ELEMENTS" 109 | assert ReturnType.BATCHES == "BATCHES" 110 | assert set(ReturnType) == set(("ELEMENTS", "BATCHES")) 111 | 112 | range_0 = range(0) 113 | range_1 = range(1) 114 | range_100 = range(100) 115 | 116 | for verbosity in ("QUIET", "NORMAL"): 117 | config = init(exec_policy="LOCAL", verbosity=verbosity) 118 | with pytest.raises(FastmapException): 119 | list(config.fastmap(lambda x: x**.5, [], return_type="FAKE_RETURN_TYPE")) 120 | 121 | seq = config.fastmap(lambda x: x**.5, [], return_type="BATCHES") 122 | assert isinstance(seq, types.GeneratorType) 123 | assert list(seq) == [] 124 | 125 | seq = config.fastmap(lambda x: x**.5, range_0, return_type="BATCHES") 126 | assert isinstance(seq, types.GeneratorType) 127 | assert list(seq) == [] 128 | 129 | seq = config.fastmap(lambda x: x**.5, list(range_1), return_type="BATCHES") 130 | assert isinstance(seq, types.GeneratorType) 131 | seq = list(seq) 132 | assert len(seq) == 1 133 | assert isinstance(seq[0], list) 134 | 135 | seq = config.fastmap(lambda x: x**.5, range_1, return_type="BATCHES") 136 | assert isinstance(seq, types.GeneratorType) 137 | seq = list(seq) 138 | assert len(seq) == 1 139 | assert isinstance(seq[0], list) 140 | 141 | seq = config.fastmap(lambda x: x**.5, range_100, return_type="BATCHES") 142 | assert isinstance(seq, types.GeneratorType) 143 | seq = list(seq) 144 | assert all(isinstance(e, list) for e in seq) 145 | assert math.isclose(sum(flatten(seq)), 661.4629471031477) 146 | 147 | seq = config.fastmap(lambda x: x**.5, list(range_100), return_type="BATCHES") 148 | assert isinstance(seq, types.GeneratorType) 149 | seq = list(seq) 150 | assert all(isinstance(e, list) for e in seq) 151 | assert math.isclose(sum(flatten(seq)), 661.4629471031477) 152 | 153 | 154 | class Wrapper(): 155 | def __init__(self, x): 156 | self.x = x 157 | 158 | def sqrt(self): 159 | self.x = self.x**.5 160 | 161 | 162 | def test_objects(): 163 | def proc(x): 164 | x.sqrt() 165 | return x 166 | 167 | seq_1 = [Wrapper(1)] 168 | gen_1 = (Wrapper(x) for x in range(1, 2)) 169 | seq_100 = [Wrapper(x) for x in range(100)] 170 | gen_100 = (Wrapper(x) for x in range(100)) 171 | seq_200000 = [Wrapper(x) for x in range(200000)] 172 | gen_200000 = (Wrapper(x) for x in range(200000)) 173 | 174 | config = init(exec_policy="LOCAL") 175 | res_seq_1 = list(config.fastmap(proc, seq_1)) 176 | assert len(res_seq_1) == 1 177 | assert res_seq_1[0].x == 1 178 | res_gen_1 = list(config.fastmap(proc, gen_1)) 179 | assert len(res_gen_1) == 1 180 | assert res_gen_1[0].x == 1 181 | 182 | res_seq_100 = list(config.fastmap(proc, seq_100)) 183 | assert len(res_seq_100) == 100 184 | assert res_seq_100[99].x == 99 ** .5 185 | res_gen_100 = list(config.fastmap(proc, gen_100)) 186 | assert len(res_gen_100) == 100 187 | assert res_gen_100[99].x == 99 ** .5 188 | 189 | res_seq_200000 = list(config.fastmap(proc, seq_200000)) 190 | assert len(res_seq_200000) == 200000 191 | assert res_seq_200000[99999].x == 99999 ** .5 192 | res_gen_200000 = list(config.fastmap(proc, gen_200000)) 193 | assert len(res_gen_200000) == 200000 194 | assert res_gen_200000[99999].x == 99999 ** .5 195 | 196 | 197 | def test_local_empty(): 198 | config = init(exec_policy="LOCAL") 199 | 200 | gen = config.fastmap(calc_pi_basic, []) 201 | assert isinstance(gen, types.GeneratorType) 202 | assert list(gen) == [] 203 | 204 | gen = config.fastmap(calc_pi_basic, iter([])) 205 | assert isinstance(gen, types.GeneratorType) 206 | assert list(gen) == [] 207 | 208 | 209 | def test_local_no_init(): 210 | _reset_global_config() 211 | range_100 = range(100) 212 | pi = 4.0 * sum(fastmap(calc_pi_basic, range_100)) / len(range_100) 213 | assert pi == 3.12 214 | 215 | 216 | def test_local_global_init(): 217 | global_init(exec_policy="LOCAL") 218 | range_100 = range(100) 219 | pi = 4.0 * sum(fastmap(calc_pi_basic, range_100)) / len(range_100) 220 | assert pi == 3.12 221 | 222 | 223 | def test_local_functools(): 224 | config = init(exec_policy="LOCAL") 225 | range_100 = range(100) 226 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range_100, kwargs={'two': 2.0})) / len(range_100) 227 | assert pi == 3.12 228 | 229 | 230 | def test_max_local_workers(): 231 | config = init(exec_policy="LOCAL", max_local_workers=2) 232 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range(100))) / len(range(100)) 233 | assert pi == 3.12 234 | 235 | # To get into max_local_workers <= 1 236 | config = init(exec_policy="LOCAL", max_local_workers=1) 237 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range(100))) / len(range(100)) 238 | assert pi == 3.12 239 | 240 | 241 | def test_exec_policy(): 242 | assert ExecPolicy.LOCAL == "LOCAL" 243 | assert ExecPolicy.CLOUD == "CLOUD" 244 | assert ExecPolicy.ADAPTIVE == "ADAPTIVE" 245 | assert set(ExecPolicy) == set(("LOCAL", "ADAPTIVE", "CLOUD")) 246 | 247 | with pytest.raises(FastmapException): 248 | init(exec_policy="INVALID") 249 | for exec_policy in ("LOCAL", "CLOUD", "ADAPTIVE"): 250 | init(exec_policy=exec_policy) 251 | 252 | 253 | def test_verbosity(capsys): 254 | assert Verbosity.SILENT == "SILENT" 255 | assert Verbosity.QUIET == "QUIET" 256 | assert Verbosity.NORMAL == "NORMAL" 257 | assert Verbosity.LOUD == "LOUD" 258 | assert set(Verbosity) == set(("SILENT", "QUIET", "NORMAL", "LOUD")) 259 | 260 | config = init(exec_policy="LOCAL", verbosity="QUIET") 261 | list(config.fastmap(lambda x: x**x, range(10))) 262 | stdio = capsys.readouterr() 263 | assert stdio.out == "" 264 | config = init(exec_policy="LOCAL", verbosity="SILENT") 265 | list(config.fastmap(lambda x: x**x, range(10))) 266 | stdio = capsys.readouterr() 267 | assert stdio.out == "" 268 | config = init(exec_policy="ADAPTIVE", verbosity="QUIET") 269 | list(config.fastmap(lambda x: x**x, range(10))) 270 | stdio = capsys.readouterr() 271 | assert "fastmap WARNING:" in stdio.out 272 | config = init(exec_policy="LOCAL", verbosity="NORMAL") 273 | list(config.fastmap(lambda x: x**x, range(10))) 274 | stdio = capsys.readouterr() 275 | assert "fastmap INFO:" in stdio.out 276 | config = init(exec_policy="LOCAL", verbosity="LOUD") 277 | list(config.fastmap(lambda x: x**x, range(10))) 278 | stdio = capsys.readouterr() 279 | assert "fastmap DEBUG:" in stdio.out 280 | assert "fastmap INFO:" in stdio.out 281 | with pytest.raises(FastmapException): 282 | config = init(exec_policy="LOCAL", verbosity="FAKE") 283 | 284 | 285 | def test_lambda(): 286 | config = init(exec_policy="LOCAL") 287 | range_100 = range(100) 288 | with pytest.raises(ZeroDivisionError): 289 | # zero division error raises execution error 290 | sum(config.fastmap(lambda x: 1.0 / x, range_100)) 291 | range_1_100 = range(1, 1000) 292 | the_sum = sum(config.fastmap(lambda x: 1.0 / x if x % 2 == 1 else -1.0 / x, range_1_100)) 293 | assert math.isclose(the_sum, 0.6936474305598223) 294 | 295 | 296 | def test_closure_basic(): 297 | config = init(exec_policy="LOCAL") 298 | range_100 = range(100) 299 | with pytest.raises(ZeroDivisionError): 300 | # zero division error raises execution error 301 | sum(config.fastmap(lambda x: 1.0 / x, range_100)) 302 | range_1_100 = range(1, 1000) 303 | 304 | def cl(x): 305 | if x % 2 == 1: 306 | return 1.0 / x 307 | else: 308 | return -1.0 / x 309 | 310 | the_sum = sum(config.fastmap(cl, range_1_100)) 311 | assert math.isclose(the_sum, 0.6936474305598223) 312 | 313 | 314 | def test_closure_real(): 315 | config = init(exec_policy="LOCAL") 316 | range_100 = range(100) 317 | with pytest.raises(ZeroDivisionError): 318 | # zero division error raises execution error 319 | sum(config.fastmap(lambda x: 1.0 / x, range_100)) 320 | range_1_100 = range(1, 1000) 321 | one = 1.0 322 | 323 | def cl(x): 324 | if x % 2 == 1: 325 | return one / x 326 | else: 327 | return -1 * one / x 328 | 329 | the_sum = sum(config.fastmap(cl, range_1_100)) 330 | assert math.isclose(the_sum, 0.6936474305598223) 331 | 332 | 333 | def test_single_threaded(monkeypatch): 334 | # Set initial run duration to make it not process everything on first run 335 | # but don't change proc_overhead so that it decides processes are too much 336 | config = init(exec_policy="LOCAL") 337 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 338 | range_100 = range(100) 339 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100) 340 | assert pi == 3.12 341 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100) 342 | assert pi == 3.12 343 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, set(range_100))) / len(range_100) 344 | assert pi == 3.12 345 | 346 | 347 | def test_process_local(monkeypatch): 348 | config = init(exec_policy="LOCAL") 349 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 350 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 351 | range_100 = range(100) 352 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100) 353 | assert pi == 3.12 354 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100) 355 | assert pi == 3.12 356 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, set(range_100))) / len(range_100) 357 | assert pi == 3.12 358 | 359 | 360 | def test_single_threaded_process(capsys, monkeypatch): 361 | config = init(exec_policy="LOCAL", max_local_workers=1) 362 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 363 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 364 | range_100 = range(100) 365 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100) 366 | assert pi == 3.12 367 | 368 | 369 | def test_single_threaded_process_exception(capsys, monkeypatch): 370 | config = init(exec_policy="LOCAL", max_local_workers=1) 371 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 372 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 373 | with pytest.raises(AssertionError): 374 | list(config.fastmap(calc_pi_dead_99, range(100))) 375 | 376 | 377 | def test_process_exception(capsys, monkeypatch): 378 | config = init(exec_policy="LOCAL", max_local_workers=2) 379 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 380 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 381 | with pytest.raises(FastmapException): 382 | list(config.fastmap(calc_pi_dead_99, range(100))) 383 | 384 | 385 | def test_process_adaptive(capsys, monkeypatch): 386 | # remote will die but this will continue 387 | config = init(exec_policy="ADAPTIVE") 388 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 389 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 390 | range_100 = range(100) 391 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100) 392 | assert pi == 3.12 393 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100) 394 | assert pi == 3.12 395 | 396 | 397 | def test_slow_generator(): 398 | def slow_gen(iterable): 399 | for el in iterable: 400 | yield el 401 | time.sleep(.01) 402 | config = init(exec_policy="LOCAL") 403 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, slow_gen(range(100)))) / 100 404 | assert pi == 3.12 405 | 406 | # test the do_die in the _FillInbox generators 407 | with pytest.raises(FastmapException): 408 | sum(config.fastmap(lambda x: 1 / (x - 50), slow_gen(range(100)))) 409 | with pytest.raises(FastmapException): 410 | sum(config.fastmap(lambda x: 1 / (x - 50), slow_gen(list(range(100))))) 411 | 412 | 413 | def test_order(monkeypatch): 414 | config = init(exec_policy="LOCAL") 415 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 416 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 417 | monkeypatch.setattr(sdk_lib._FillInbox, "BATCH_DUR_GOAL", .0001) 418 | order_range = list(config.fastmap(lambda x: int((x**2)**.5), range(10000))) 419 | assert order_range == list(range(10000)) 420 | 421 | 422 | def test_no_secret(monkeypatch, capsys): 423 | config = init(exec_policy="CLOUD") 424 | stdio = capsys.readouterr() 425 | assert re.search("fastmap WARNING:.*?LOCAL.\n", stdio.out) 426 | assert config.exec_policy == "LOCAL" 427 | 428 | config = init(exec_policy="ADAPTIVE") 429 | stdio = capsys.readouterr() 430 | assert re.search("fastmap WARNING:.*?LOCAL.\n", stdio.out) 431 | assert config.exec_policy == "LOCAL" 432 | 433 | 434 | def test_remote_no_connection(monkeypatch, capsys): 435 | config = init(exec_policy="CLOUD", verbosity="LOUD", secret=TEST_SECRET, 436 | cloud_url="localhost:9999") 437 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 438 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 439 | range_100 = range(100) 440 | with pytest.raises(FastmapException): 441 | list(config.fastmap(lambda x: x**.5, range_100)) 442 | stdio = capsys.readouterr() 443 | assert re.search("could not connect", stdio.out) 444 | 445 | 446 | def test_invalid_token(capsys): 447 | init(exec_policy="CLOUD", verbosity="LOUD", secret=None) 448 | for bad_token in (5, "_" * 64, "a" * 63, "a" * 65): 449 | with pytest.raises(FastmapException): 450 | init(exec_policy="CLOUD", verbosity="LOUD", secret=bad_token) 451 | init(exec_policy="CLOUD", verbosity="LOUD", secret="a"*64) 452 | 453 | 454 | def test_confirm_charges_basic(capsys, monkeypatch): 455 | # Basic local should not warn about confirming charges or any issues with 456 | # the secret 457 | # config = init(exec_policy="LOCAL", max_local_workers=2) 458 | # stdio = capsys.readouterr() 459 | # assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out) 460 | # assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out) 461 | # assert isinstance(config, FastmapConfig) 462 | # assert config.exec_policy == "LOCAL" 463 | 464 | # # Basic cloud should warn about an absent secret and set execpolicy to local 465 | # # (and say something about it) 466 | # config = init(exec_policy="CLOUD", max_local_workers=2) 467 | # stdio = capsys.readouterr() 468 | # assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out) 469 | # assert re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out) 470 | # assert config.exec_policy == "LOCAL" 471 | 472 | # If a secret is correctly provided for cloud, warn about confirming 473 | # charges and do not set to local config policy 474 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET, max_local_workers=2) 475 | # stdio = capsys.readouterr() 476 | # assert re.search("fastmap WARNING:.*?confirm_charges", stdio.out) 477 | # assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out) 478 | # assert config.exec_policy == "CLOUD" 479 | 480 | # If we set confirm charges, assert no warnings are thrown 481 | config = init(exec_policy="CLOUD", secret=TEST_SECRET, cloud_url="https://a.a", 482 | confirm_charges=True, max_local_workers=2) 483 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 484 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 485 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_no) 486 | stdio = capsys.readouterr() 487 | assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out) 488 | assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out) 489 | assert config.exec_policy == "CLOUD" 490 | assert config.confirm_charges is True 491 | 492 | # Using the same config, ensure that every process dies with a fake url. 493 | # There should only be 1 process which can die 494 | config.cloud_url = "localhost:9999" 495 | monkeypatch.setattr(sdk_lib.AuthCheck, "was_success", lambda _: True) 496 | with pytest.raises(FastmapException): 497 | list(config.fastmap(lambda x: x**.5, range(100))) 498 | stdio = capsys.readouterr() 499 | assert re.search(r"Continue\?", stdio.out) 500 | with pytest.raises(FastmapException): 501 | list(config.fastmap(lambda x: x**.5, iter(range(100)))) 502 | stdio = capsys.readouterr() 503 | assert re.search(r"Continue anyway\?", stdio.out) 504 | 505 | # Adaptive should log cancelled 506 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET, 507 | confirm_charges=True, max_local_workers=2, 508 | cloud_url="localhost:9999/") 509 | list(config.fastmap(lambda x: x**.5, range(100))) 510 | stdio = capsys.readouterr() 511 | assert re.search(r"fastmap INFO:.*?cancelled", stdio.out) 512 | 513 | # Test enter yes 514 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_yes) 515 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET, confirm_charges=True, 516 | cloud_url="https://a.a",) 517 | # monkeypatch.setattr('sys.stdin', io.StringIO('y\n')) 518 | data = list(config.fastmap(lambda x: x**.5, iter(range(100)))) 519 | assert data 520 | 521 | def fake_input_try_again(self, msg, now={}): 522 | # clever 💯 523 | print(msg) 524 | if not now.get('done'): 525 | now['done'] = True 526 | return 'will repeat' 527 | return "n" 528 | 529 | # Test unrecognized input 530 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_try_again) 531 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET, cloud_url='https://a.a', 532 | confirm_charges=True) 533 | list(config.fastmap(lambda x: x**.5, iter(range(100)))) 534 | stdio = capsys.readouterr() 535 | assert "Unrecognized input" in stdio.out 536 | 537 | 538 | def test_empty_remote(): 539 | config = init(exec_policy="CLOUD") 540 | assert list(config.fastmap(lambda x: x**.5, [])) == [] 541 | assert list(config.fastmap(lambda x: x**.5, iter([]))) == [] 542 | config = init(exec_policy="ADAPTIVE") 543 | assert list(config.fastmap(lambda x: x**.5, [])) == [] 544 | assert list(config.fastmap(lambda x: x**.5, iter([]))) == [] 545 | 546 | 547 | def resp_dump(resp_dict): 548 | return base64.b64encode(pickle.dumps(resp_dict)) 549 | 550 | 551 | def resp_headers(): 552 | return { 553 | "X-Container-Id": "FAKE_ID", 554 | "X-Thread-Id": "FAKE_ID", 555 | "X-Process-Seconds": '4', 556 | "X-Total-Seconds": '5', 557 | } 558 | 559 | # def test_remote_200(monkeypatch, requests_mock): 560 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 561 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 562 | 563 | # results = list(map(lambda x: 1/x, range(1, 100))) 564 | 565 | # resp = resp_dump({"status": "OK", 566 | # "results": results[1:], 567 | # "map_seconds": 5}) 568 | # requests_mock.post('localhost:9999/api/v1/map', 569 | # content=resp, 570 | # status_code=200, 571 | # headers=resp_headers()) 572 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET) 573 | # config.cloud_url = "localhost:9999" 574 | # assert math.isclose(sum(config.fastmap(lambda x: 1/x, range(1, 100))), 575 | # sum(results)) 576 | 577 | # def test_remote_401(monkeypatch, requests_mock, capsys): 578 | # resp = resp_dump({"status": "UNAUTHORIZED", 579 | # "reason": "UNAUTHORIZED"}) 580 | # requests_mock.post('localhost:9999/api/v1/map', 581 | # content=resp, 582 | # status_code=401) 583 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET) 584 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 585 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 586 | # with pytest.raises(FastmapException): 587 | # # Unauthorized will kill the cloud thread 588 | # list(config.fastmap(sqrt, range(100))) 589 | # stdio = capsys.readouterr() 590 | # assert re.search("fastmap ERROR:.*?Unauthorized", stdio.out) 591 | 592 | 593 | # def test_remote_402(monkeypatch, requests_mock, capsys): 594 | # resp = resp_dump({"status": "NOT_ENOUGH_CREDITS", 595 | # "reason": "You do not have any credits available"}) 596 | # requests_mock.post('localhost:9999/api/v1/map', 597 | # content=resp, 598 | # status_code=402) 599 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET) 600 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 601 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 602 | # with pytest.raises(FastmapException): 603 | # # Unauthorized will kill the cloud thread 604 | # list(config.fastmap(sqrt, range(100))) 605 | # stdio = capsys.readouterr() 606 | # assert re.search("fastmap ERROR:.*?credits", stdio.out) 607 | 608 | # def test_remote_403(monkeypatch, requests_mock, capsys): 609 | # resp = resp_dump({"status": "NOT_ENOUGH_CREDITS", 610 | # "reason": "You do not have any credits available"}) 611 | # requests_mock.post('localhost:9999/api/v1/map', 612 | # content=resp, 613 | # status_code=402) 614 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET) 615 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0) 616 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0) 617 | # with pytest.raises(FastmapException): 618 | # # Unauthorized will kill the cloud thread 619 | # list(config.fastmap(sqrt, range(100))) 620 | # stdio = capsys.readouterr() 621 | # assert re.search("fastmap ERROR:.*?credits", stdio.out) 622 | 623 | def test_post_request(monkeypatch, capsys): 624 | url = "localhost:8888/api/v1/map" 625 | data = msgpack.dumps({"hello": "world"}) 626 | secret = "secret_token" 627 | log = sdk_lib.FastmapLogger('QUIET') 628 | resp_headers = { 629 | "X-Status": "OK" 630 | } 631 | resp_dict = { 632 | "world": "hello" 633 | } 634 | 635 | # Bad content type on API call 636 | resp_headers['Content-Type'] = "text/html" 637 | with requests_mock.Mocker() as m: 638 | m.post(url, content=msgpack.dumps(resp_dict), 639 | headers=resp_headers) 640 | with pytest.raises(sdk_lib.CloudError): 641 | sdk_lib.post_request(url, data, secret, log) 642 | 643 | # Server warning and basic msgpack obj extraction 644 | resp_headers['Content-Type'] = "application/msgpack" 645 | resp_headers["X-Server-Warning"] = "abcdefg" 646 | with requests_mock.Mocker() as m: 647 | m.post(url, content=msgpack.dumps(resp_dict), 648 | headers=resp_headers) 649 | resp = sdk_lib.post_request(url, data, secret, log) 650 | stdio = capsys.readouterr() 651 | assert re.search("WARNING:[^\n]+ abcdefg", stdio.out) 652 | assert resp.obj['world'] == 'hello' 653 | del resp_headers["X-Server-Warning"] 654 | 655 | # Cloud error on 500 status code 656 | with requests_mock.Mocker() as m: 657 | m.post(url, content=msgpack.dumps(resp_dict), 658 | headers=resp_headers, status_code=500) 659 | with pytest.raises(sdk_lib.CloudError): 660 | sdk_lib.post_request(url, data, secret, log) 661 | 662 | # No Content signature on octet-stream 663 | resp_headers['Content-Type'] = "application/octet-stream" 664 | pickled_resp = gzip.compress(dill.dumps(resp_dict)) 665 | with requests_mock.Mocker() as m: 666 | m.post(url, content=pickled_resp, 667 | headers=resp_headers) 668 | with pytest.raises(sdk_lib.CloudError): 669 | sdk_lib.post_request(url, data, secret, log) 670 | 671 | # Wrong content signature on octet-stream 672 | resp_headers['X-Content-Signature'] = "fake" 673 | with requests_mock.Mocker() as m: 674 | m.post(url, content=pickled_resp, 675 | headers=resp_headers) 676 | with pytest.raises(sdk_lib.CloudError): 677 | sdk_lib.post_request(url, data, secret, log) 678 | 679 | # Correct content signature. Extract works 680 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp) 681 | with requests_mock.Mocker() as m: 682 | m.post(url, content=pickled_resp, 683 | headers=resp_headers) 684 | resp = sdk_lib.post_request(url, data, secret, log) 685 | assert resp.obj['world'] == 'hello' 686 | 687 | # Not gzipped 688 | pickled_resp = dill.dumps(resp_dict) 689 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp) 690 | with requests_mock.Mocker() as m: 691 | m.post(url, content=pickled_resp, 692 | headers=resp_headers) 693 | with pytest.raises(sdk_lib.CloudError): 694 | sdk_lib.post_request(url, data, secret, log) 695 | 696 | # Not msgpacked 697 | pickled_resp = gzip.compress(str(resp_dict).encode()) 698 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp) 699 | with requests_mock.Mocker() as m: 700 | m.post(url, content=pickled_resp, 701 | headers=resp_headers) 702 | with pytest.raises(sdk_lib.CloudError): 703 | sdk_lib.post_request(url, data, secret, log) 704 | 705 | 706 | def test_fmt_bytes(): 707 | assert sdk_lib.fmt_bytes(1023) == "1023B" 708 | assert sdk_lib.fmt_bytes(1024) == "1.0KB" 709 | assert sdk_lib.fmt_bytes(2048) == "2.0KB" 710 | assert sdk_lib.fmt_bytes(1024**2) == "1.0MB" 711 | assert sdk_lib.fmt_bytes(1024**2 * 2) == "2.0MB" 712 | assert sdk_lib.fmt_bytes(1024**3) == "1.0GB" 713 | assert sdk_lib.fmt_bytes(1024**3 * 2) == "2.0GB" 714 | 715 | 716 | def test_fmt_time(): 717 | assert sdk_lib.fmt_time(59) == "59s" 718 | assert sdk_lib.fmt_time(60) == "01:00" 719 | assert sdk_lib.fmt_time(61) == "01:01" 720 | assert sdk_lib.fmt_time(121) == "02:01" 721 | assert sdk_lib.fmt_time(60 * 60) == "01:00:00" 722 | assert sdk_lib.fmt_time(60 * 60 + 1) == "01:00:01" 723 | assert sdk_lib.fmt_time(60 * 60 + 61) == "01:01:01" 724 | 725 | 726 | def test_fmt_dur(): 727 | assert sdk_lib.fmt_dur(.000009) == "0 milliseconds" 728 | assert sdk_lib.fmt_dur(.9) == "900 milliseconds" 729 | assert sdk_lib.fmt_dur(1) == "1.00 seconds" 730 | assert sdk_lib.fmt_dur(59) == "59.00 seconds" 731 | assert sdk_lib.fmt_dur(60) == "1.00 minutes" 732 | assert sdk_lib.fmt_dur(61) == "1.02 minutes" 733 | assert sdk_lib.fmt_dur(121) == "2.02 minutes" 734 | assert sdk_lib.fmt_dur(60 * 60) == "1.00 hours" 735 | assert sdk_lib.fmt_dur(60 * 60 + 1) == "1.00 hours" 736 | assert sdk_lib.fmt_dur(60 * 60 + 61) == "1.02 hours" 737 | 738 | 739 | def test_namespace(): 740 | ns = sdk_lib.Namespace("A", B="C") 741 | assert ns.A == "A" 742 | assert ns.B == "C" 743 | assert 'A' in ns 744 | assert 'B' in ns 745 | assert 'C' not in ns 746 | assert set(list(ns)) == set(['A', 'B']) 747 | with pytest.raises(AttributeError): 748 | ns.C 749 | 750 | 751 | def test_short_func(): 752 | SMALL_NUM = 15 753 | 754 | def generator(): 755 | yield SMALL_NUM 756 | 757 | config = init(exec_policy="CLOUD", secret="0" * 64) 758 | list(config.fastmap(primeFactors, (SMALL_NUM,))) 759 | list(config.fastmap(primeFactors, generator())) 760 | 761 | config = init(exec_policy="ADAPTIVE", secret="0" * 64) 762 | list(config.fastmap(primeFactors, (SMALL_NUM,))) 763 | list(config.fastmap(primeFactors, generator())) 764 | 765 | config = init(exec_policy="LOCAL", secret="0" * 64) 766 | list(config.fastmap(primeFactors, (SMALL_NUM,))) 767 | list(config.fastmap(primeFactors, generator())) 768 | 769 | 770 | def test_long_func(monkeypatch): 771 | # regression test for bug when on CLOUD exec_policy with a long initial 772 | # function which nevertheless clears out the iterable 773 | # also test with other exec policies and generator types 774 | # this is still only one number so will never actually call the cloud 775 | 776 | BIG_NUM = 29393395993999 777 | 778 | def generator(): 779 | yield BIG_NUM 780 | 781 | config = init(exec_policy="LOCAL", verbosity="LOUD", secret="0" * 64) 782 | list(config.fastmap(primeFactors, (BIG_NUM,))) 783 | list(config.fastmap(primeFactors, generator())) 784 | 785 | monkeypatch.setattr(sdk_lib.AuthCheck, "was_success", lambda _: True) 786 | config = init(exec_policy="CLOUD", verbosity="LOUD", secret="0" * 64) 787 | list(config.fastmap(primeFactors, (BIG_NUM,))) 788 | list(config.fastmap(primeFactors, generator())) 789 | 790 | config = init(exec_policy="ADAPTIVE", verbosity="LOUD", secret="0" * 64) 791 | list(config.fastmap(primeFactors, (BIG_NUM,))) 792 | list(config.fastmap(primeFactors, generator())) 793 | 794 | 795 | def test_log_etcetera(monkeypatch, capsys): 796 | # log functions that can't be captured in normal tests 797 | 798 | logger = sdk_lib.FastmapLogger("LOUD") 799 | logger.debug("Hello") 800 | stdio = capsys.readouterr() 801 | assert "fastmap DEBUG:" in stdio.out 802 | assert 'Hello' in stdio.out 803 | 804 | logger.hush() 805 | logger.error("Hello") 806 | stdio = capsys.readouterr() 807 | assert "" == stdio.out 808 | 809 | logger.restore_verbosity() 810 | logger.error("Hello") 811 | stdio = capsys.readouterr() 812 | assert "Hello" in stdio.out 813 | 814 | monkeypatch.setattr('sys.stdin', io.StringIO('y\n')) 815 | resp = logger.input("Hi") 816 | assert resp == 'y' 817 | stdio = capsys.readouterr() 818 | assert "Hi" in stdio.out 819 | 820 | 821 | if __name__ == '__main__': 822 | pytest.main() 823 | -------------------------------------------------------------------------------- /fastmap/sdk_lib.py: -------------------------------------------------------------------------------- 1 | """ 2 | Primary file for the fastmap SDK. Almost all client-side code is in this file. 3 | Do not instantiate anything here directly. Use the interface __init__.py. 4 | """ 5 | 6 | import atexit 7 | import collections 8 | import datetime 9 | import distutils.sysconfig 10 | import functools 11 | import glob 12 | import gzip 13 | import hashlib 14 | import hmac 15 | import importlib.metadata 16 | import json 17 | import multiprocessing 18 | import os 19 | import pathlib 20 | import queue 21 | import re 22 | import secrets 23 | import string 24 | import sys 25 | import threading 26 | import traceback 27 | import time 28 | from collections.abc import Iterable, Sequence, Generator 29 | from types import FunctionType, ModuleType 30 | from typing import List, Dict 31 | 32 | import dill 33 | import msgpack 34 | import requests 35 | 36 | SECRET_RE = r'^[PS]\-[0-9a-zA-Z]{64}$' 37 | TASK_RE = r'^[0-9a-zA-Z]{12}$' 38 | SITE_PACKAGES_RE = re.compile(r".*?/python[0-9.]+/(?:site|dist)\-packages/") 39 | REQUIREMENT_RE = re.compile(r'^[\w-]+==[\w-]+(?:\.[\w-]+)*$') 40 | CLIENT_VERSION = "0.0.12" 41 | KB = 1024 42 | MB = 1024 ** 2 43 | GB = 1024 ** 3 44 | 45 | # MAP_DOCSTRING = """ 46 | # Map a function over an iterable and return the results. 47 | # Depending on prior configuration, fastmap will run either locally via 48 | # multiprocessing, in the cloud on the fastmap.io servers, or adaptively on 49 | # both. 50 | 51 | # :param function func: Function to map against. 52 | # :param sequence|generator iterable: Iterable to map over. 53 | # :param dict kwargs: Named parameters to bind to the function. Optional. 54 | # :param str return_type: Either "ELEMENTS" or "BATCHES". Default 55 | # is "ELEMENTS". 56 | # :param str label: Optional label to track this execution. Only meaningful if 57 | # some execution occurs on the cloud. Default is "". 58 | # :rtype: Generator 59 | 60 | # Fastmap is a parallelized/distributed drop-in replacement for 'map'. 61 | # It runs faster than the builtin map function in most circumstances. 62 | 63 | # Notes: 64 | # - The function passed in must be stateless and cannot access the network or 65 | # the filesystem. If run locally, these restrictions will not be enforced 66 | # but because fastmap will likely execute out-of-order, running stateful 67 | # functions is not recommended. 68 | # - The iterable can be a sequence (list, tuple, ndarray, dataframe, etc), 69 | # or a generator. 70 | # - Fastmap is a generator so the iterable is processed lazily and fastmap 71 | # will not begin execution unless iterated over or execution is forced 72 | # (e.g. by wrapping it in a list). 73 | 74 | # For more documentation, go to https://fastmap.io/docs 75 | 76 | # """ 77 | 78 | OFFLOAD_DOCSTRING = """ 79 | Offload a function to the cloud and return a FastmapTask. 80 | 81 | :param function func: Function to offload 82 | :param dict kwargs: Named parameters to bind to the function. Optional. 83 | :param function hook: Function to call upon process completion. Optional. 84 | :param str label: Optional label to track this execution. Default is "". 85 | 86 | :rtype: FastmapTask 87 | """ 88 | 89 | GET_TASK_DOCSTRING = """ 90 | Given a task_id, get the associated FastmapTask. 91 | Raises an exeption if the task cannot be found. 92 | 93 | :param str task_id: 94 | :rtype: FastmapTask 95 | """ 96 | 97 | POLL_ALL_DOCSTRING = """ 98 | Poll all non-CLEARED cloud task metadata. 99 | 100 | :rtype: list[dict] 101 | """ 102 | 103 | CLEAR_ALL_DOCSTRING = """ 104 | Clear all done tasks and remove their functions, logs, and results from storage. 105 | """ 106 | 107 | CLEAR_DOCSTRING = """ 108 | Clear the task and remove its function, logs, and result from storage. 109 | 110 | Raises a FastmapException if the task cannot be found or the 111 | task has not completed. 112 | 113 | :rtype: None 114 | """ 115 | 116 | KILL_DOCSTRING = """ 117 | Kill the associated cloud task. 118 | 119 | Raises a FastmapException if the task cannot be found or is already dead. 120 | 121 | :rtype: None 122 | """ 123 | 124 | ALL_LOGS_DOCSTRING = """ 125 | Return the task's stdout and stderr since the task started. 126 | 127 | Raises a FastmapException if the task cannot be found. 128 | 129 | :rtype: str 130 | """ 131 | 132 | NEW_LOGS_DOCSTRING = """ 133 | Return the task's stdout and stderr since the task started. 134 | 135 | Raises a FastmapException if the task cannot be found. 136 | 137 | :rtype: str 138 | """ 139 | 140 | POLL_DOCSTRING = """ 141 | Poll for cloud task metadata. 142 | 143 | Raises a FastmapException if the task cannot be found. 144 | 145 | :rtype: dict 146 | """ 147 | 148 | RETRY_DOCSTRING = """ 149 | Retry the task. Returns a new FastmapTask 150 | 151 | :rtype: FastmapTask 152 | """ 153 | 154 | RETURN_VALUE_DOCSTRING = """ 155 | Return the function's return value. 156 | 157 | Raises a FastmapException if the result cannot be found, 158 | if task has not completed, or if the task was not successful. 159 | 160 | :rtype: Various 161 | """ 162 | 163 | TRACEBACK_DOCSTRING = """ 164 | Return the traceback of an errored task. 165 | 166 | Raises a FastmapException if the result cannot be found, 167 | if task has not completed, or if the task did not error. 168 | 169 | :rtype: str 170 | """ 171 | 172 | WAIT_DOCSTRING = """ 173 | Block until the task completes. If the task is 174 | ultimately successful, return the function's return value. 175 | 176 | Raises a FastmapException if the result cannot be found, 177 | or if raise_exceptions is true and the task is not successful. 178 | 179 | :param int polling_interval: 180 | :param bool live_logs: 181 | :param bool raise_exceptions 182 | :rtype: Various 183 | """ 184 | 185 | INIT_PARAMS = """ 186 | :param str|file|dict config: The json file path / dict of the 187 | configuration. Every subsequent argument will alter this configuration. 188 | This is optional and if empty, only subsequent parameters will be used. 189 | :param str secret: The API token generated on fastmap.io. Treat this like a 190 | password. Do not commit it to version control! Failure to do so could 191 | result in man-in-the-middle attacks or your credits being used by others 192 | (e.g. cryptocurrency miners). If None, fastmap will run locally 193 | regardless of exec_policy. 194 | :param str verbosity: 'SILENT', 'QUIET', 'NORMAL', or 'LOUD'. 195 | Default is 'NORMAL'. 196 | :param str exec_policy: 'LOCAL' or 'CLOUD'. Default is 'CLOUD'. 197 | :param str machine_type: 'SPARROW_1', 'PEREGRINE_4', or 'HUMMINGBIRD_7'. Only 198 | for the CLOUD exec_policy. Default is 'SPARROW_1'. 199 | :param list requirements: A list of requirements in "package==1.2.3" style. 200 | If omitted, requirement discovery is automatic. 201 | 202 | For more documentation, go to https://fastmap.io/docs 203 | """ 204 | 205 | GLOBAL_INIT_DOCSTRING = """ 206 | Initialize fastmap globally. All subsequent calls to fastmap will use this 207 | global configuration. Also see documentation for 'init'. 208 | %s 209 | :rtype: None 210 | 211 | Example usage: 212 | 213 | import fastmap 214 | 215 | fastmap.global_init(exec_policy="LOCAL", verbosity="LOUD") 216 | results = fastmap.fastmap(func, iterable) 217 | 218 | """ + INIT_PARAMS 219 | 220 | INIT_DOCSTRING = """ 221 | Create and return a FastmapConfig object. The FastmapConfig object has 222 | a method, fastmap, which can then be called. Also see documentation for 223 | 'global_init'. 224 | %s 225 | :rtype: FastmapConfig 226 | 227 | Example usage: 228 | 229 | import fastmap 230 | 231 | fastmap_config = fastmap.init(machine_type="GPU", verbosity="QUIET") 232 | results = fastmap_config.fastmap(func, iterable) 233 | 234 | """ + INIT_PARAMS 235 | 236 | dill.settings['recurse'] = True 237 | 238 | # TODO 239 | # Make dill.dumps deterministic https://github.com/uqfoundation/dill/issues/19 240 | # This allows for hashing of the functions 241 | # dill._dill._typemap = dict(sorted(dill._dill._typemap.items(), 242 | # key=lambda x: x[1])) 243 | 244 | try: 245 | # Windows / mac use spawn. Linux uses fork. Set to spawn 246 | # because it has more issues and this provides a steady dev environment 247 | # Do not remove without at least adding more unit tests which operate 248 | # in a spawn environment 249 | multiprocessing.set_start_method("spawn") 250 | except RuntimeError: 251 | pass 252 | 253 | 254 | class Namespace(dict): 255 | """ 256 | Abstract constants class 257 | Constants can be accessed via .attribute or [key] and can be iterated over. 258 | """ 259 | def __init__(self, *args, **kwargs): 260 | d = {k: k for k in args} 261 | d.update(dict(kwargs.items())) 262 | super().__init__(d) 263 | 264 | def __getattr__(self, item): 265 | if item in self: 266 | return self[item] 267 | raise AttributeError 268 | 269 | 270 | Verbosity = Namespace("SILENT", "QUIET", "NORMAL", "LOUD") 271 | ExecPolicy = Namespace("LOCAL", "CLOUD") 272 | AuthStatus = Namespace("AUTHORIZED") 273 | InitStatus = Namespace("UPLOADED", "FOUND", "NOT_FOUND") 274 | MapStatus = Namespace("NOT_FOUND", "BATCH_PROCESSED", "INITALIZING", "INITIALIZATION_ERROR", "PROCESS_ERROR") 275 | DoneStatus = Namespace("DONE", "NOT_FOUND") 276 | TaskState = Namespace("PENDING", "PROCESSING", "KILLING", "FINISHING", "DONE", "CLEARED") 277 | TaskOutcome = Namespace("SUCCESS", "ERROR", "KILLED_BY_REQUEST", "KILLED_ZOMBIE") 278 | MachineType = Namespace("SPARROW_1", "PEREGRINE_4", "HUMMINGBIRD_7") 279 | Color = Namespace( 280 | GREEN="\033[92m", 281 | RED="\033[91m", 282 | YELLOW="\033[93m", 283 | CYAN="\033[36m", 284 | MAGENTA="\u001b[35m", 285 | CANCEL="\033[0m") 286 | 287 | 288 | DEFAULT_INLINE_CONFIG = { 289 | 'secret': None, 290 | 'cloud_url': 'https://app.fastmap.io', 291 | 'verbosity': Verbosity.NORMAL, 292 | 'exec_policy': ExecPolicy.CLOUD, 293 | 'machine_type': MachineType.SPARROW_1, 294 | 'requirements': None, 295 | } 296 | 297 | DEFAULT_CONFIG_DIR = os.path.join(pathlib.Path.home(), '.fastmap', 'default_config.json') 298 | 299 | 300 | def set_docstring(docstr: str, docstr_prefix='') -> FunctionType: 301 | """ Add the given doc string to each function """ 302 | def wrap(func): 303 | func.__doc__ = docstr_prefix + docstr 304 | return func 305 | return wrap 306 | 307 | 308 | def nowstamp() -> int: 309 | return datetime.datetime.now(datetime.timezone.utc).timestamp() 310 | 311 | 312 | def fmt_bytes(num_bytes: int) -> str: 313 | """ 314 | Returns the human-readable byte quantity 315 | e.g. 2048 -> 2.0KB 316 | """ 317 | if num_bytes >= GB: 318 | return "%.1fGB" % (num_bytes / GB) 319 | if num_bytes >= MB: 320 | return "%.1fMB" % (num_bytes / MB) 321 | if num_bytes >= KB: 322 | return "%.1fKB" % (num_bytes / KB) 323 | return "%dB" % num_bytes 324 | 325 | 326 | def fmt_time(num_secs: int) -> str: 327 | """ 328 | Returns a human-readable time scalar 329 | e.g. 121 -> 02:01 330 | """ 331 | hours, remainder = divmod(num_secs, 3600) 332 | mins, secs = divmod(remainder, 60) 333 | if hours > 0: 334 | return '{:02}:{:02}:{:02}'.format(int(hours), int(mins), int(secs)) 335 | if mins > 0: 336 | return '{:02}:{:02}'.format(int(mins), int(secs)) 337 | return '{}s'.format(int(secs)) 338 | 339 | 340 | def fmt_dur(num_secs: int) -> str: 341 | """ 342 | Returns a human-readable time scalar 343 | e.g. 121 -> 2.02 minutes 344 | """ 345 | if num_secs >= 3600: 346 | return "%.2f hours" % (num_secs / 3600) 347 | if num_secs >= 60: 348 | return "%.2f minutes" % (num_secs / 60) 349 | if num_secs >= 1: 350 | return "%.2f seconds" % (num_secs) 351 | return "%d milliseconds" % (round(num_secs * 1000)) 352 | 353 | 354 | def get_credits(seconds: float, bytes_egress: float) -> float: 355 | """ 356 | Estimate the number of credits spent. 357 | 100 credits per vcpu hour + 100 credits per byte egress 358 | """ 359 | return 8 * (seconds * 10.0 / 3600.0 + bytes_egress * 10.0 / GB) 360 | 361 | 362 | def get_hash(binary: bytes) -> str: 363 | """ 364 | Get the function hash for a dill pickled function. 365 | This is used mostly for caching and bucketing 366 | 367 | TODO: 368 | The problem is that dill is not deterministic so if we just take a 369 | hash of the pickled function, we will get different values each time. 370 | For now, this is fine because we are basically using the hash for the 371 | single run. In the future, we will need to make dill deterministic. 372 | 373 | To save time, don't try an approach with inspect.getsourcelines 374 | If upstream functions change, it won't capture the difference. 375 | 376 | Approach will be to find non-deteministic aspects of Python and replace 377 | them one-by-one in dill 378 | """ 379 | return hashlib.sha256(binary).hexdigest()[:16] 380 | 381 | 382 | class FastmapException(Exception): 383 | """ 384 | Thrown when something goes wrong running the user's code on the 385 | cloud or on separate processes. 386 | """ 387 | 388 | 389 | class FastmapUnexpectedException(FastmapException): 390 | """ 391 | Thrown when something in a post request results in a non-200. 392 | Should be caught by anything that calls post_request 393 | 394 | The traceback (tb) can also be used to add more context to cloud errors 395 | """ 396 | def __init__(self, *args, **kwargs): 397 | try: 398 | self.tb = kwargs.pop('tb') 399 | except KeyError: 400 | self.tb = None 401 | super().__init__(*args, **kwargs) 402 | 403 | 404 | def simplified_tb(): 405 | """ 406 | Given a traceback, remove fastmap-specific lines (this file + dependencies) 407 | to make it easier for the end user to read and hide the sausage-making. 408 | To do so, go through a traceback line-by-line in reverse. The moment 409 | we have a fastmap-specific line, break and return 410 | """ 411 | skip_dir_paths = ( 412 | ' File "' + os.path.abspath(__file__), 413 | '/layers/google.python.pip/pip/', 414 | ) 415 | tb_list = [] 416 | tb_lines = traceback.format_exc().split('\n') 417 | preamble = tb_lines.pop() # we want "Traceback (most rec..." no matter what 418 | for tb_line in reversed(tb_lines): 419 | if any(tb_line.startswith(path) for path in skip_dir_paths): 420 | # pop prev tb_line b/c each stack layer is 2 lines: file-loc & code 421 | tb_list.pop() 422 | break 423 | tb_list.append(tb_line) 424 | tb_list.append(preamble) 425 | 426 | return '\n'.join(reversed(tb_list)).strip() 427 | 428 | 429 | def get_func_name(func: FunctionType) -> str: 430 | """Robust way to get the name of a random function""" 431 | try: 432 | name = func.__name__ 433 | except AttributeError: 434 | name = repr(func) 435 | name = name[:40] + "..." if len(name) >= 45 else name 436 | return name 437 | 438 | 439 | class FastmapLogger(): 440 | """ 441 | FastmapLogger exists primarily because it is difficult to pass python's 442 | native logger between processes. Doing so was requiring a lot of 443 | weird workarounds. Otherwise, it should behave similarly 444 | """ 445 | def __init__(self, verbosity: str): 446 | self.verbosity = verbosity 447 | self.restore_verbosity() 448 | 449 | def restore_verbosity(self): 450 | self.debug = self._debug 451 | self.info = self._info 452 | self.warning = self._warning 453 | self.error = self._error 454 | if self.verbosity == Verbosity.LOUD: 455 | pass 456 | elif self.verbosity == Verbosity.NORMAL: 457 | self.debug = self._do_nothing 458 | elif self.verbosity == Verbosity.QUIET: 459 | self.debug = self._do_nothing 460 | self.info = self._do_nothing 461 | elif self.verbosity == Verbosity.SILENT: 462 | self.debug = self._do_nothing 463 | self.info = self._do_nothing 464 | self.warning = self._do_nothing 465 | else: 466 | raise FastmapException(f"Unknown verbosity '{self.verbosity}'") 467 | 468 | def hush(self): 469 | self.debug = self._do_nothing # noqa 470 | self.info = self._do_nothing # noqa 471 | self.warning = self._do_nothing # noqa 472 | self.error = self._do_nothing # noqa 473 | 474 | def _do_nothing(self, *args): 475 | # This instead of a lambda because of pickling in multiprocessing 476 | pass 477 | 478 | def _debug(self, msg, *args): 479 | if args: 480 | msg = msg % args 481 | print("\033[K" + Color.CYAN + "fastmap DEBUG:" + Color.CANCEL, msg) 482 | 483 | def _info(self, msg, *args): 484 | if args: 485 | msg = msg % args 486 | print("\033[K" + Color.YELLOW + "fastmap INFO:" + Color.CANCEL, msg) 487 | 488 | def _warning(self, msg, *args): 489 | if args: 490 | msg = msg % args 491 | print("\033[K" + Color.RED + "fastmap WARNING:" + Color.CANCEL, msg) 492 | 493 | def _error(self, msg, *args): 494 | if args: 495 | msg = msg % args 496 | print("\033[K" + Color.RED + "fastmap ERROR:" + Color.CANCEL, msg, flush=True) 497 | 498 | def input(self, msg): 499 | # This exists mostly for test mocking 500 | return input(Color.CYAN + "\nfastmap: " + msg + Color.CANCEL) 501 | 502 | 503 | 504 | # def local_worker_func(func: FunctionType, itdm: InterThreadDataManager, 505 | # log: FastmapLogger) -> None: 506 | # """ 507 | # A single persistent local worker. This function will process one 508 | # batch at a time until there are none left. 509 | # """ 510 | # func = dill.loads(func) 511 | # try: 512 | # batch_tup = itdm.checkout() 513 | # while batch_tup: 514 | # batch_idx, batch_iter = batch_tup 515 | # start = time.perf_counter() 516 | # ret = list(map(func, batch_iter)) 517 | # total_proc_time = time.perf_counter() - start 518 | # runtime = total_proc_time / len(ret) 519 | # log.debug("Batch %d local cnt=%d dur=%.2fs (%.2e/el).", 520 | # batch_idx, len(ret), total_proc_time, runtime) 521 | # itdm.push_outbox(batch_idx, ret, runtime) 522 | # batch_tup = itdm.checkout() 523 | # except Exception as e: 524 | # proc_name = multiprocessing.current_process().name 525 | # itdm.put_error(proc_name, repr(e), batch_tup) 526 | # tb = simplified_tb(traceback.format_exc()) 527 | # log.error("In local worker [%s]:\n %s.", 528 | # multiprocessing.current_process().name, tb) 529 | # return 530 | 531 | 532 | def auth_token(secret: str) -> str: 533 | """ The auth token is the first half of the secret plus the P/S signifier """ 534 | return secret[:34] 535 | 536 | 537 | def sign_token(secret: str) -> str: 538 | """ The sign token is the first half of the secret """ 539 | return secret[34:] 540 | 541 | 542 | def hmac_digest(secret: str, payload: bytes) -> str: 543 | """ With our secret, generate a signature for the payload """ 544 | return hmac.new(sign_token(secret).encode(), payload, 545 | digestmod=hashlib.sha256).hexdigest() 546 | 547 | 548 | def basic_headers(secret: str, payload: bytes) -> dict: 549 | """ Basic headers needed for most API calls. """ 550 | return { 551 | 'Authorization': 'Bearer ' + auth_token(secret), 552 | 'X-Python-Version': sys.version.replace('\n', ''), 553 | 'X-Client-Version': CLIENT_VERSION, 554 | 'X-Content-Signature': hmac_digest(secret, payload), 555 | # 'X-Request-ID': secrets.token_hex()[:5], # commented out because not using 556 | } 557 | 558 | 559 | def post_request(url: str, data: dict, secret: str, 560 | log: FastmapLogger) -> requests.Response: 561 | """ 562 | Generic cloud post wrapper. 563 | This does warning/error management, extracts content, and checks signatures 564 | for every API post 565 | """ 566 | if isinstance(data, dict): 567 | data = msgpack.dumps(data) 568 | 569 | headers = basic_headers(secret, data) 570 | start_time = nowstamp() 571 | log.debug("Posting to %s with %s" % (url, fmt_bytes(len(data)))) 572 | try: 573 | # I decided not to put this in a retry loop because it would be 574 | # engineering for a problem that I don't have yet 575 | resp = requests.post(url, data=data, headers=headers) 576 | except requests.exceptions.ConnectionError: 577 | ping_url = url.split('/api')[0] + '/api/v1/ping' 578 | raise FastmapUnexpectedException( 579 | "Fastmap could not connect to %r. " 580 | "Check your network connection. To check if your " 581 | "server is running, try: `curl %s`." % (url, ping_url)) from None 582 | log.debug("Posted %s in %.2fs" % (url, nowstamp() - start_time)) 583 | 584 | if 'X-Server-Warning' in resp.headers: 585 | # deprecations or anything else 586 | log.warning(resp.headers['X-Server-Warning']) 587 | 588 | if resp.status_code == 400: 589 | # BAD_REQUEST 590 | raise FastmapUnexpectedException("Bad request: %r" % resp.headers['X-Reason']) 591 | 592 | if resp.status_code == 401: 593 | # UNAUTHORIZED 594 | raise FastmapException(resp.headers['X-Reason']) 595 | 596 | if resp.status_code == 402: 597 | # NOT_ENOUGH_CREDITS 598 | raise FastmapException("You have exhausted your credits. Contact your admin or purchase more credits.") 599 | 600 | if resp.status_code == 403: 601 | # INVALID_SIGNATURE 602 | raise FastmapException("Your signature was invalid. Check your API token.") 603 | 604 | if resp.status_code == 410: 605 | # DISCONTINUED 606 | raise FastmapException("Deprecated: %r" % resp.headers['X-Reason']) 607 | 608 | if resp.status_code == 500: 609 | raise FastmapUnexpectedException("Fastmap 500 error: %r" % resp.content) 610 | 611 | if resp.status_code == 200 and \ 612 | resp.headers.get('Content-Type') == 'application/msgpack': 613 | if 'X-Content-Signature' not in resp.headers: 614 | raise FastmapUnexpectedException("Cloud payload was not signed (%d). " 615 | "Will not unpickle." % (resp.status_code)) 616 | cloud_hash = hmac_digest(secret, resp.content) 617 | if resp.headers['X-Content-Signature'] != cloud_hash: 618 | raise FastmapUnexpectedException("Cloud checksum did not match. " 619 | "Will not unpickle.") 620 | resp.status = resp.headers['X-Status'] 621 | log.debug("Response %s %s", resp.status_code, resp.status) 622 | try: 623 | resp.obj = msgpack.loads(resp.content) 624 | except Exception: 625 | # Needs to be plain 'Exception' 626 | # msgpack-python.readthedocs.io/en/latest/_modules/msgpack/exceptions.html 627 | raise FastmapUnexpectedException("Error unpacking response") from None 628 | return resp 629 | 630 | raise FastmapUnexpectedException("Unexpected Status / Content-Type %d (%s): %r" % 631 | (resp.status_code, 632 | resp.headers.get("Content-Type"), 633 | resp.content[:100].strip())) 634 | 635 | 636 | # def process_cloud_batch(itdm: InterThreadDataManager, batch_tup: tuple, 637 | # map_url: str, func_hash: str, label: str, 638 | # run_id: str, secret: str, log: FastmapLogger) -> None: 639 | # """ 640 | # For /api/v1/map, finish preparing the request, send it, and handle the 641 | # response. Processed batches go back into the itdm. If a 642 | # processed batch leaves this function, it will end up back with the user. 643 | # """ 644 | 645 | # start_req_time = time.perf_counter() 646 | 647 | # batch_idx, batch = batch_tup 648 | # try: 649 | # pickled_batch = dill.dumps(batch) 650 | # except Exception as ex: 651 | # raise FastmapUnexpectedException("Could not pickle your data. " 652 | # "Fastmap cannot run on the cloud.") from ex 653 | # compressed_batch = gzip.compress(pickled_batch, compresslevel=1) 654 | # payload = { 655 | # 'func_hash': func_hash, 656 | # 'batch': compressed_batch, 657 | # 'label': label, 658 | # 'run_id': run_id, 659 | # } 660 | 661 | # while True: 662 | # log.debug("Making cloud request batchlen=%d size=%s (%s/el)...", 663 | # len(batch), fmt_bytes(len(compressed_batch)), 664 | # fmt_bytes(len(compressed_batch) / len(batch))) 665 | # try TODO 666 | # resp = post_request(map_url, payload, secret, log) 667 | # if resp.status_code == 200: 668 | # if resp.status == MapStatus.INITALIZING: 669 | # log.debug("Cloud worker is initializing. Last msg [%s]." 670 | # " Retrying in 5 seconds..." % 671 | # (resp.obj.get('init_step', ''))) 672 | # time.sleep(5) 673 | # continue 674 | # elif resp.status == MapStatus.INITIALIZATION_ERROR: 675 | # raise FastmapUnexpectedException("Error initializing worker %r %r" % ( 676 | # resp.obj.get('init_error'), resp.obj.get('init_tb'))) 677 | # break 678 | 679 | # mem_used = (float(resp.headers.get('X-Mem-Used', 0.0)) 680 | # / float(resp.headers.get('X-Mem-Total', 1.0))) 681 | # if mem_used > 0.9: 682 | # log.warning("Cloud memory utilization high: %.2f%%. " 683 | # "Consider increasing memory." % mem_used * 100) 684 | 685 | # if resp.status == MapStatus.BATCH_PROCESSED: 686 | # service_id = resp.headers['X-Service-Id'] 687 | # total_request = time.perf_counter() - start_req_time 688 | # total_application = float(resp.headers['X-Application-Seconds']) 689 | # total_mapping = float(resp.headers['X-Map-Seconds']) 690 | # credits_used = float(resp.headers['X-Credits']) 691 | # result_len = len(resp.obj['results']) 692 | # req_time_per_el = total_request / result_len 693 | # app_time_per_el = total_application / result_len 694 | # map_time_per_el = total_mapping / result_len 695 | 696 | # log.debug("Batch %d cloud cnt=%d " 697 | # "%.2fs/%.2fs/%.2fs map/app/req (%.2e/%.2e/%.2e per el) " 698 | # "[%s].", 699 | # batch_idx, result_len, 700 | # total_mapping, total_application, total_request, 701 | # map_time_per_el, app_time_per_el, req_time_per_el, 702 | # service_id) 703 | # itdm.push_outbox(batch_idx, 704 | # resp.obj['results'], 705 | # None, 706 | # credits_used=credits_used, 707 | # network_seconds=total_request - total_application) 708 | # return 709 | 710 | # if resp.status == MapStatus.PROCESS_ERROR: 711 | # msg = "Your code could not be processed on the cloud: %s. " % \ 712 | # resp.obj.get('exception') 713 | # bad_modules = resp.obj.get('bad_modules', []) 714 | # if bad_modules: 715 | # msg += "Modules with errors on import: %s." % ' '.join(bad_modules) 716 | # msg += "You might need to explicitly specify a requirements file " \ 717 | # "in your deployment." 718 | # raise FastmapUnexpectedException(msg, tb=resp.obj.get('traceback', '')) 719 | # if resp.status == MapStatus.NOT_FOUND: 720 | # msg = "Your function was not found on the cloud." 721 | # raise FastmapUnexpectedException(msg) 722 | # if resp.status_code == 402: 723 | # # NOT_ENOUGH_CREDITS 724 | # raise FastmapUnexpectedException("Insufficient credits for this request. " 725 | # "Your current balance is $%.4f." % 726 | # resp.obj.get('credits_used', 0) / 100) 727 | # if resp.status_code == 403: 728 | # # INVALID_SIGNATURE 729 | # raise FastmapUnexpectedException("Invalid signature. Check your token") 730 | # if resp.status_code == 410: 731 | # # DISCONTINUED (post-deprecated end-of-life) 732 | # raise FastmapUnexpectedException("Fastmap.io API discontinued: %r" % resp.obj.get('reason')) 733 | # if resp.status_code == 413: 734 | # # TOO_LARGE 735 | # payload_len = len(msgpack.dumps(payload)) 736 | # raise FastmapUnexpectedException("Your request was too large (%s). " 737 | # "Find a way to reduce the size of your data or " 738 | # "function and try again." % fmt_bytes(payload_len)) 739 | 740 | # if resp.status_code == 500 and resp.headers['Content-Type'] == 'text/html': 741 | # content = re.sub('<[^<]+?>', '', resp.text) 742 | # raise FastmapUnexpectedException("Unexpected cloud error 500. You might have run out " 743 | # "of memory. %s" % content) 744 | 745 | # # catch all (should just be for 500s of which a few are explicitly defined) 746 | # raise FastmapUnexpectedException("Unexpected cloud response %d %s %r" % 747 | # (resp.status_code, resp.status, resp.obj)) 748 | 749 | 750 | # def cloud_thread(thread_id: str, map_url: str, func_hash: str, label: str, 751 | # run_id: str, itdm: InterThreadDataManager, secret: str, 752 | # log: FastmapLogger): 753 | # """ 754 | # A thread for running cloud requests in a loop. Batches are pulled out of 755 | # the itdm and passed into process_cloud_batch one-by-one until they are 756 | # exhausted. This also does some basic 757 | # """ 758 | # batch_tup = itdm.checkout() 759 | # if batch_tup: 760 | # log.debug("Starting cloud thread %d []...", thread_id) 761 | # while batch_tup: 762 | # try: 763 | # process_cloud_batch(itdm, batch_tup, map_url, func_hash, 764 | # label, run_id, secret, log) 765 | # except FastmapUnexpectedException as e: 766 | # proc_name = multiprocessing.current_process().name 767 | # thread_id = threading.get_ident() 768 | # error_loc = "%s: thread:%d" % (proc_name, thread_id) 769 | # itdm.put_error(error_loc, repr(e), batch_tup) 770 | # if hasattr(e, 'tb') and e.tb: 771 | # tb = e.tb.replace('%0A', '\n') 772 | # log.error("In cloud thread [%s]:\n%s.", 773 | # threading.current_thread().name, tb) 774 | # else: 775 | # log.error("In cloud thread [%s]: %r.", 776 | # threading.current_thread().name, e) 777 | # log.error("Shutting down cloud thread [%s] due to error...", 778 | # threading.current_thread().name) 779 | # return 780 | 781 | # batch_tup = itdm.checkout() 782 | 783 | 784 | def get_modules(log: FastmapLogger) -> (Dict[str, str], List[ModuleType]): 785 | """ 786 | Get in scope modules. 787 | Returns two things: 788 | 1. a dictionary of all mod_name -> source|None 789 | 2. a list of ModuleType for modules found in site packages 790 | For the former, a source is included if it is a local module. If it is 791 | an installed module, the source is None 792 | """ 793 | std_lib_dir = os.path.realpath(distutils.sysconfig.get_python_lib(standard_lib=True)) 794 | local_sources = {} 795 | installed_mods = [] 796 | for mod_name, mod in sys.modules.items(): 797 | if mod_name in sys.builtin_module_names: 798 | # builtin 799 | continue 800 | if mod_name.startswith("_"): 801 | # hidden 802 | continue 803 | if not getattr(mod, '__file__', None): 804 | # also not builtin 805 | continue 806 | mod_path = os.path.realpath(mod.__file__) 807 | if mod_path.startswith(std_lib_dir) and 'site-packages' not in mod_path: 808 | # not stdlib 809 | continue 810 | if hasattr(mod, "__package__") and \ 811 | mod.__package__ in ("fastmap", "fastmap.fastmap"): 812 | # not fastmap 813 | continue 814 | 815 | # Through with the silent skips. 816 | # Looking for local_sources and installed_mods 817 | if SITE_PACKAGES_RE.match(mod.__file__): 818 | installed_mods.append(mod) 819 | continue 820 | if not mod.__file__.endswith('.py'): 821 | log.warning("The module %r is a non-Python locally-built module " 822 | "which cannot be uploaded.", mod) 823 | continue 824 | with open(mod.__file__) as f: 825 | source = f.read() 826 | if source: 827 | local_sources[mod.__name__] = source 828 | 829 | return local_sources, installed_mods 830 | 831 | 832 | def get_requirements(installed_mods: List[ModuleType], 833 | log: FastmapLogger) -> List[str]: 834 | """ 835 | TODO docstring 836 | """ 837 | imported_module_names = set() 838 | site_packages_dirs = set() 839 | for mod in installed_mods: 840 | try: 841 | mod_name = mod.__package__ 842 | except AttributeError: 843 | mod_name = None 844 | if not mod_name: 845 | mod_name = mod.__name__ 846 | imported_module_names.add(mod_name) 847 | site_packages_dirs.add(SITE_PACKAGES_RE.match(mod.__file__).group(0)) 848 | 849 | top_level_files = set() 850 | for site_packages_dir in site_packages_dirs: 851 | top_level_path = site_packages_dir + "*.dist-info/top_level.txt" 852 | top_level_files.update(glob.glob(top_level_path)) 853 | 854 | packages_by_module = collections.defaultdict(set) 855 | for fn in top_level_files: 856 | with open(fn) as f: 857 | modules = f.read().split('\n') 858 | metadata_fn = fn.rsplit('/', 1)[0] + '/METADATA' 859 | pkg_name = None 860 | with open(metadata_fn) as f: 861 | for row in f.readlines(): 862 | if match := re.match(r"Name: (?P[a-zA-Z0-9-]+)", row): 863 | pkg_name = match['pkg_name'] 864 | break 865 | if not pkg_name: 866 | raise FastmapException("No package name for %r" % fn) 867 | for mod_name in modules: 868 | packages_by_module[mod_name].add(pkg_name) 869 | 870 | requirements = {} 871 | missed_modules = set() 872 | for mod_name in imported_module_names: 873 | pkg_names = packages_by_module[mod_name] 874 | if not pkg_names: 875 | # log.warning("Could not find version for module %r. Skipping...", 876 | # mod_name) 877 | missed_modules.add(mod_name) 878 | # requirements[mod_name] = None 879 | continue 880 | for pkg_name in pkg_names: 881 | pkg_version = importlib.metadata.version(pkg_name) 882 | requirements[pkg_name] = pkg_version 883 | 884 | # one last run-through to make sure we didn't forget anything 885 | # this fixed the issue with google.cloud.vision import 886 | for missed_mod in missed_modules: 887 | missed_mod = missed_mod.replace('.', '-') 888 | try: 889 | pkg_version = importlib.metadata.version(missed_mod) 890 | except: 891 | continue 892 | requirements[missed_mod] = pkg_version 893 | 894 | return sorted([f'{k}=={v}' for k, v in requirements.items()]) 895 | 896 | 897 | def get_dependencies(requirements: dict, log: FastmapLogger) -> (dict, dict): 898 | """ 899 | Get dependency dictionary. 900 | Keys are module names. 901 | Values are either pip version strings or source code. 902 | """ 903 | local_sources, installed_mods = get_modules(log) 904 | log.debug("Found %d installed modules" % len(installed_mods)) 905 | log.debug("Found local imports %r" % list(sorted(local_sources.keys()))) 906 | 907 | if requirements: 908 | log.debug("Skipping requirements autodetect.") 909 | else: 910 | requirements = get_requirements(installed_mods, log) 911 | log.info("Autodetected requirements %r." % requirements) 912 | 913 | installed_mods = [im.__name__ for im in installed_mods] 914 | return local_sources, installed_mods, requirements 915 | 916 | 917 | def seq_batcher(sequence: Sequence, size: int) -> Generator: 918 | seq_len = len(Sequence) 919 | for idx in range(0, seq_len, size): 920 | yield sequence[idx:min(idx + size, seq_len)] 921 | 922 | 923 | def pickle_function(func, func_name): 924 | try: 925 | return dill.dumps(func, recurse=True) 926 | except Exception as ex: 927 | err = "Your function %r could not be pickled." % func_name 928 | raise FastmapException(err) from ex 929 | 930 | 931 | class HeartbeatIO(): 932 | def __init__(self, logs_queue, heartbeat_queue): 933 | self.logs_queue = logs_queue 934 | self.heartbeat_queue = heartbeat_queue 935 | self.is_open = True 936 | 937 | def write(self, s): 938 | self.logs_queue.put(s) 939 | self.heartbeat_queue.put(nowstamp()) 940 | return len(s) 941 | 942 | def flush(self): 943 | pass 944 | 945 | def close(self): 946 | self.is_open = False 947 | 948 | 949 | class RedirectStdStreams(object): 950 | def __init__(self, heartbeat_io): 951 | self.heartbeat_io = heartbeat_io 952 | 953 | def __enter__(self): 954 | self.old_stdout, self.old_stderr = sys.stdout, sys.stderr 955 | self.old_stdout.flush(); self.old_stderr.flush() 956 | sys.stdout = self.heartbeat_io 957 | sys.stderr = self.heartbeat_io 958 | 959 | def __exit__(self, exc_type, exc_value, traceback): 960 | sys.stdout = self.old_stdout 961 | sys.stderr = self.old_stderr 962 | 963 | 964 | def heartbeat_loop(heartbeat_queue, kill_queue): 965 | last_send = None 966 | while True: 967 | try: 968 | kill_queue.get(block=False) 969 | break 970 | except queue.Empty: 971 | pass 972 | if not last_send or nowstamp() - last_send > 60: 973 | heartbeat_queue.put(nowstamp()) 974 | last_send = nowstamp() 975 | time.sleep(1) 976 | 977 | 978 | def local_offload_wrapper(func_payload, result_queue, logs_queue, heartbeat_queue): 979 | start_time = nowstamp() 980 | func_dict = msgpack.loads(gzip.decompress(func_payload)) 981 | heartbeat_io = HeartbeatIO(logs_queue, heartbeat_queue) 982 | kill_queue = multiprocessing.Queue() 983 | heartbeat_thread = threading.Thread(target=heartbeat_loop, args=(heartbeat_queue, kill_queue)) 984 | heartbeat_thread.start() 985 | 986 | with RedirectStdStreams(heartbeat_io): 987 | try: 988 | func = dill.loads(func_dict['func']) 989 | ret = func() 990 | result_dict = { 991 | 'outcome': 'SUCCESS', 992 | 'return_value': ret, 993 | 'exception': None, 994 | 'traceback': None, 995 | } 996 | pickled_result = dill.dumps(result_dict) 997 | except Exception as ex: 998 | result_dict = { 999 | 'outcome': 'ERROR', 1000 | 'return_value': None, 1001 | 'exception': repr(ex), 1002 | 'traceback': simplified_tb(), 1003 | } 1004 | pickled_result = dill.dumps(result_dict) 1005 | kill_queue.put(True) 1006 | runtime = nowstamp() - start_time 1007 | result_queue.put((pickled_result, runtime)) 1008 | 1009 | 1010 | # def local_map_wrapper(func_payload, pickled_iterable, result_queue, logs_queue): 1011 | # func = dill.loads(pickled_func) 1012 | # iterable = dill.loads(pickled_iterable) 1013 | # logs = HeartbeatIO(logs_queue) 1014 | # start_time = datetime.datetime.now() 1015 | # with contextlib.redirect_stderr(logs): 1016 | # with contextlib.redirect_stdout(logs): 1017 | # try: 1018 | # ret = list(map(func, iterable)) # TODO multiprocessing.Pool 1019 | # resp = { 1020 | # 'outcome': 'SUCCESS', 1021 | # 'return_value': ret, 1022 | # 'exception': None, 1023 | # 'tb': None, 1024 | # } 1025 | # except Exception as ex: 1026 | # resp = { 1027 | # 'outcome': 'ERROR', 1028 | # 'return_value': None, 1029 | # 'exception': repr(ex), 1030 | # 'tb': simplified_tb(), 1031 | # } 1032 | # runtime = (datetime.datetime.now() - start_time).total_seconds() 1033 | # result_queue.put((dill.dumps(resp), runtime)) 1034 | 1035 | 1036 | def task_hook_thread(task, hook): 1037 | try: 1038 | ret = task.wait() 1039 | except: 1040 | return 1041 | hook(ret) 1042 | 1043 | 1044 | OfldStatus = Namespace("NOT_FOUND", "ACKNOWLEDGED", "ERROR") 1045 | 1046 | 1047 | class FastmapTask(): 1048 | POLLING_INTERVAL = 3 1049 | 1050 | def __repr__(self): 1051 | return "<%s id=%s state=%s outcome=%s>" % ( 1052 | self.__class__.__name__, self.task_id, self._task_state, self._outcome) 1053 | 1054 | def add_hook(self, hook): 1055 | t = threading.Thread(target=task_hook_thread, args=(self, hook)) 1056 | t.start() 1057 | 1058 | def wait(self, polling_interval=None, live_logs=False, raise_exceptions=False): 1059 | def handle_anomaly(msg): 1060 | if raise_exceptions: 1061 | raise FastmapException(msg) 1062 | self._config.log.info(msg) 1063 | 1064 | self._config.log.info("Waiting for task to finish...") 1065 | if live_logs: 1066 | logs = self.all_logs() 1067 | if logs: 1068 | sys.stdout.write("\033[K" + Color.MAGENTA + logs + Color.CANCEL) 1069 | 1070 | while True: 1071 | if live_logs: 1072 | logs = self.new_logs() 1073 | if logs: 1074 | sys.stdout.write("\033[K" + Color.MAGENTA + logs + Color.CANCEL) 1075 | else: 1076 | self.poll() 1077 | if self._outcome == TaskOutcome.SUCCESS: 1078 | return self.return_value() 1079 | if self._outcome == TaskOutcome.ERROR: 1080 | tb = self.traceback() 1081 | handle_anomaly("Task error %r" % self._result_dict['exception']) 1082 | print(tb) 1083 | return 1084 | if self._outcome in (TaskOutcome.KILLED_BY_REQUEST, TaskOutcome.KILLED_ZOMBIE): 1085 | handle_anomaly("Task was killed") 1086 | return 1087 | if self._task_state == TaskState.CLEARED: 1088 | handle_anomaly("Task has been cleared") 1089 | return 1090 | if self._task_state == TaskState.KILLING: 1091 | handle_anomaly("Task is being killed") 1092 | return 1093 | time.sleep(polling_interval or self.POLLING_INTERVAL) 1094 | 1095 | def traceback(self): 1096 | self._fetch_result_dict() 1097 | if self._task_state == TaskState.CLEARED: 1098 | raise FastmapException("Traceback cannot be retrieved because task is cleared.") 1099 | if self._task_state != TaskState.DONE: 1100 | raise FastmapException("Traceback cannot be retrieved because task is not done.") 1101 | if self._outcome != TaskOutcome.ERROR: 1102 | raise FastmapException("Traceback cannot be retrieved because task did not error.") 1103 | return self._result_dict['traceback'] 1104 | 1105 | def return_value(self): 1106 | self._fetch_result_dict() 1107 | if self._task_state == TaskState.CLEARED: 1108 | raise FastmapException("Return value cannot be retrieved because task is cleared.") 1109 | if self._task_state != TaskState.DONE: 1110 | raise FastmapException("Return value cannot be retrieved because task is not done.") 1111 | if self._outcome != TaskOutcome.SUCCESS: 1112 | raise FastmapException("Return value cannot be retrieved because task did not succeed.") 1113 | return self._result_dict['return_value'] 1114 | 1115 | def new_logs(self): 1116 | new_logs = self._fetch_logs() 1117 | if self._task_state == TaskState.CLEARED: 1118 | raise FastmapException("Logs cannot be retrieved because task is cleared.") 1119 | return new_logs 1120 | 1121 | def all_logs(self): 1122 | self._fetch_logs() 1123 | if self._task_state == TaskState.CLEARED: 1124 | raise FastmapException("Logs cannot be retrieved because task is cleared.") 1125 | return self._all_logs 1126 | 1127 | 1128 | def gen_id(chars=12): 1129 | # 62 ** 12 = 3*10^21. World generates 3*10^22 bytes per year so this feels ok 1130 | alphabet = string.ascii_letters + string.digits 1131 | return ''.join(secrets.choice(alphabet) for _ in range(chars)) 1132 | 1133 | 1134 | class FastmapLocalTask(FastmapTask): 1135 | def __init__(self, config, func_name, task_type, proc=None, func_payload=None, 1136 | result_queue=None, logs_queue=None, heartbeat_queue=None, 1137 | hook=None, webhook=None, label=''): 1138 | self.task_id = gen_id() 1139 | self.task_type = task_type 1140 | self._config = config 1141 | self._task_state = None 1142 | self._func_name = func_name 1143 | self._func_payload = func_payload 1144 | self._label = label 1145 | self._hook = hook 1146 | self._webhook = webhook 1147 | self._outcome = None 1148 | self._result_dict = None 1149 | self._runtime = None 1150 | self._heartbeat_ts = None 1151 | 1152 | self._proc = proc 1153 | self._result_queue = result_queue 1154 | self._logs_queue = logs_queue 1155 | self._heartbeat_queue = heartbeat_queue 1156 | self._all_logs = "" 1157 | self._starttime = nowstamp() 1158 | 1159 | @staticmethod 1160 | def create(config, func_payload, func_name, hook, webhook, label): 1161 | result_queue = multiprocessing.Queue() 1162 | logs_queue = multiprocessing.Queue() 1163 | heartbeat_queue = multiprocessing.Queue() 1164 | proc = multiprocessing.Process(target=local_offload_wrapper, 1165 | args=(func_payload, result_queue, 1166 | logs_queue, heartbeat_queue)) 1167 | try: 1168 | proc.start() 1169 | except RuntimeError: 1170 | raise FastmapException("Error starting local process. It's likely " 1171 | "that you need to wrap your code in an " 1172 | "`if __name__ == '__main__'` context.") 1173 | fp = FastmapLocalTask(config, func_name, "OFFLOAD", proc=proc, 1174 | func_payload=func_payload, result_queue=result_queue, 1175 | logs_queue=logs_queue, heartbeat_queue=heartbeat_queue, 1176 | label=label, hook=hook, webhook=webhook) 1177 | if hook: 1178 | fp.add_hook(hook) 1179 | return fp 1180 | 1181 | @staticmethod 1182 | def create_map(config, func_payload, func_name, iterable, hook, webhook, label): 1183 | pickled_iterable = dill.dumps(iterable) 1184 | result_queue = multiprocessing.Queue() 1185 | logs_queue = multiprocessing.Queue() 1186 | heartbeat_queue = multiprocessing.Queue() 1187 | proc = multiprocessing.Process(target=local_map_wrapper, 1188 | args=(func_payload, pickled_iterable, 1189 | result_queue, logs_queue, webhook, heartbeat_queue)) # TODO heartbeat_queue 1190 | try: 1191 | proc.start() 1192 | except RuntimeError: 1193 | raise FastmapException("Error starting local process. It's likely " 1194 | "that you need to wrap your code in an " 1195 | "`if __name__ == '__main__'` context.") 1196 | fp = FastmapLocalTask(config, func_name, "MAP", proc=proc, 1197 | result_queue=result_queue, 1198 | logs_queue=logs_queue, heartbeat_queue=heartbeat_queue, 1199 | label=label) 1200 | if hook: 1201 | fp.add_hook(hook) 1202 | return fp 1203 | 1204 | def poll(self): 1205 | try: 1206 | pickled_result, self._runtime = self._result_queue.get(block=False) 1207 | if not self._task_state == TaskState.CLEARED: 1208 | self._task_state = TaskState.DONE 1209 | self._result_dict = dill.loads(pickled_result) 1210 | self._outcome = self._result_dict['outcome'] 1211 | except queue.Empty: 1212 | pass 1213 | 1214 | while True: 1215 | try: 1216 | self._heartbeat_ts = self._heartbeat_queue.get(block=False) 1217 | except queue.Empty: 1218 | break 1219 | 1220 | return { 1221 | 'type': self.task_type, 1222 | 'func_name': self._func_name, 1223 | "task_id": self.task_id, 1224 | "task_state": self._task_state, 1225 | 'outcome': self._outcome, 1226 | 'start_time': datetime.datetime.fromtimestamp(self._starttime), 1227 | 'runtime': self._runtime, 1228 | 1229 | 'label': self._label, 1230 | 'last_heartbeat': nowstamp() - self._heartbeat_ts if self._heartbeat_ts else None, 1231 | 'items_uploaded': None, # TODO for map items 1232 | 'items_completed': None, # TODO for map items 1233 | } 1234 | 1235 | def kill(self): 1236 | self._config.log.info("Killing task %s...", self.task_id) 1237 | self._task_state = TaskState.KILLING 1238 | self._proc.kill() 1239 | self._outcome = TaskOutcome.KILLED_BY_REQUEST 1240 | self._task_state = TaskState.DONE 1241 | self.poll() 1242 | 1243 | def retry(self): 1244 | return FastmapLocalTask.create(self._config, self._func_payload, 1245 | self._func_name, self._hook, self._webhook, 1246 | self._label) 1247 | 1248 | def _fetch_logs(self): 1249 | self.poll() 1250 | new_logs = '' 1251 | while True: 1252 | try: 1253 | new_logs += self._logs_queue.get(block=False) 1254 | except queue.Empty: 1255 | break 1256 | self._all_logs += new_logs 1257 | return new_logs 1258 | 1259 | def clear(self): 1260 | if not self._task_state == TaskState.DONE: 1261 | raise FastmapException("Task not done") 1262 | self._task_state = TaskState.CLEARED 1263 | self._config.log.info("Clearing task %s...", self.task_id) 1264 | self.poll() 1265 | 1266 | def _fetch_result_dict(self): 1267 | if self._result_dict: 1268 | return 1269 | self.poll() 1270 | 1271 | 1272 | class FastmapCloudTask(FastmapTask): 1273 | def __init__(self, config, task_id=None): 1274 | self.task_id = task_id 1275 | self._config = config 1276 | self._task_state = None 1277 | self._outcome = None 1278 | self._next_log_idx = 0 1279 | self._all_logs = '' 1280 | self._logs_done = False 1281 | self._result_dict = None 1282 | 1283 | @staticmethod 1284 | def create(config, func_name, func_hash, hook, webhook, label): 1285 | url = config.cloud_url + "/api/v1/offload" 1286 | payload = { 1287 | "func_name": func_name, 1288 | "func_hash": func_hash, 1289 | "label": label, 1290 | "machine_type": config.machine_type, 1291 | "webhook": webhook, 1292 | } 1293 | 1294 | # TODO 1295 | if config.machine_type == MachineType.HUMMINGBIRD_7: 1296 | raise FastmapException("Only SPARROW_1 & PEREGRINE_4 machine_type are supported right now") 1297 | 1298 | config.log.info("Starting new task for function %r..." % func_name) 1299 | resp = post_request(url, payload, config.secret, config.log) 1300 | if resp.status in (OfldStatus.ERROR, OfldStatus.NOT_FOUND): 1301 | raise FastmapException("Internal cloud error. Try again later.") 1302 | 1303 | if resp.status == OfldStatus.ACKNOWLEDGED: 1304 | task_id = resp.obj['task']['task_id'] 1305 | task = FastmapCloudTask(config, task_id=task_id) 1306 | config.log.info("Created new task %r." % task) 1307 | if hook: 1308 | task.add_hook(hook) 1309 | return task 1310 | raise FastmapException("Got unexpected response from server %r" % resp.status) 1311 | 1312 | @staticmethod 1313 | def create_map(config, func_name, func_hash, iterable, kwargs, hook, webhook, label): 1314 | url = config.cloud_url + "/api/v1/map" 1315 | payload = { 1316 | "func_name": func_name, 1317 | "func_hash": func_hash, 1318 | "kwargs": kwargs, 1319 | "label": label, 1320 | "webhook": webhook, 1321 | } 1322 | config.log.debug("Calling /api/v1/map") 1323 | 1324 | assert isinstance(iterable, list) # TODO 1325 | assert not hook # TODO 1326 | task_id = None 1327 | page_offset = 0 1328 | for i, batch in enumerate(seq_batcher(iterable, 10)): # TODO 1329 | config.log.info("Uploading batch %d" % i) # TODO 1330 | payload['page_idx'] = i 1331 | payload['page_len'] = len(batch) 1332 | payload['page_offset'] = page_offset 1333 | payload['iterable'] = dill.dumps(batch) 1334 | payload['task_id'] = task_id 1335 | page_offset += len(batch) 1336 | resp = post_request(url, payload, config.secret, config.log) 1337 | if resp.status in (OfldStatus.ERROR, OfldStatus.NOT_FOUND): 1338 | raise FastmapException("Internal cloud error. Try again later.") 1339 | if task_id: 1340 | assert resp.obj['task']['task_id'] == task_id 1341 | else: 1342 | task_id = resp.obj['task']['task_id'] 1343 | 1344 | return FastmapCloudTask(config, "MAP", task_id=task_id) 1345 | raise FastmapException("Got unexpected response from server %r" % resp.status) # TODO 1346 | 1347 | def poll(self): 1348 | self._config.log.debug("Calling /api/v1/poll") 1349 | url = self._config.cloud_url + "/api/v1/poll" 1350 | payload = {"task_id": self.task_id} 1351 | resp = post_request(url, payload, self._config.secret, self._config.log) 1352 | if resp.status == 'NOT_FOUND': 1353 | raise FastmapException("No task found") 1354 | if resp.status == 'FOUND': 1355 | task_dict = resp.obj['task'] 1356 | make_dt(task_dict) 1357 | self._task_state = resp.obj['task']['task_state'] 1358 | self._outcome = resp.obj['task']['outcome'] 1359 | return task_dict 1360 | raise FastmapException("Unexpected status from server %r" % resp.status) 1361 | 1362 | def kill(self): 1363 | self._config.log.debug("Calling /api/v1/kill") 1364 | url = self._config.cloud_url + "/api/v1/kill" 1365 | payload = {"task_id": self.task_id} 1366 | resp = post_request(url, payload, self._config.secret, self._config.log) 1367 | if resp.status == 'NOT_FOUND': 1368 | raise FastmapException("No task found") 1369 | if resp.status == 'FOUND': 1370 | self._config.log.info("Server acknowledged kill order for task %s.", self.task_id) 1371 | task_dict = resp.obj['task'] 1372 | make_dt(task_dict) 1373 | self._task_state = task_dict['task_state'] 1374 | self._outcome = task_dict['outcome'] 1375 | return task_dict 1376 | raise FastmapException("Unexpected status from server %r" % resp.status) 1377 | 1378 | def _fetch_logs(self): 1379 | if self._logs_done: 1380 | return "" 1381 | new_logs = '' 1382 | while True: 1383 | self._config.log.debug("Calling /api/v1/logs") 1384 | url = self._config.cloud_url + "/api/v1/logs" 1385 | payload = {"task_id": self.task_id, "next_log_idx": self._next_log_idx} 1386 | resp = post_request(url, payload, self._config.secret, self._config.log) 1387 | if resp.status == 'NOT_FOUND': 1388 | raise FastmapException("No task found") 1389 | if resp.status == 'FOUND': 1390 | self._next_log_idx = resp.obj['next_log_idx'] 1391 | self._all_logs += resp.obj['logs'].decode() 1392 | new_logs += resp.obj['logs'].decode() 1393 | task_dict = resp.obj['task'] 1394 | self._task_state = task_dict['task_state'] 1395 | self._outcome = task_dict['outcome'] 1396 | if self._task_state in (TaskState.DONE, TaskState.CLEARED): 1397 | self._logs_done = True 1398 | if not resp.obj['has_more']: 1399 | return new_logs 1400 | raise FastmapException("Unexpected status from server %r" % resp.status) 1401 | 1402 | def retry(self): 1403 | self._config.log.debug("Calling /api/v1/retry") 1404 | url = self._config.cloud_url + '/api/v1/retry' 1405 | payload = {"task_id": self.task_id} 1406 | resp = post_request(url, payload, self._config.secret, self._config.log) 1407 | if resp.status == 'NOT_FOUND': 1408 | # TODO more error handling 1409 | raise FastmapException("Could not find task to retry") 1410 | new_task_dict = resp.obj['task'] 1411 | new_task_id = new_task_dict['task_id'] 1412 | self._config.log.info("Server is retrying task %s with new task %s" % (self.task_id, new_task_id)) 1413 | # TODO hook for new retry 1414 | return FastmapCloudTask(self._config, task_id=new_task_id) 1415 | 1416 | def clear(self): 1417 | self._config.log.debug("Calling /api/v1/clear") 1418 | url = self._config.cloud_url + "/api/v1/clear" 1419 | payload = {"task_id": self.task_id} 1420 | resp = post_request(url, payload, self._config.secret, self._config.log) 1421 | if resp.status == 'NOT_FOUND': 1422 | raise FastmapException("No task found") 1423 | if resp.status == 'NOT_READY': 1424 | raise FastmapException("Task not cleared. Task status is not \"DONE\".") 1425 | if resp.status == 'FOUND': 1426 | self._config.log.info("Server cleared task %s...", self.task_id) 1427 | task_dict = resp.obj['task'] 1428 | make_dt(task_dict) 1429 | self._task_state = task_dict['task_state'] 1430 | self._outcome = task_dict['outcome'] 1431 | return task_dict 1432 | raise FastmapException("Unexpected status from server %r" % resp.status) 1433 | 1434 | def _fetch_result_dict(self): 1435 | if self._result_dict: 1436 | return 1437 | url = self._config.cloud_url + "/api/v1/result" 1438 | payload = {"task_id": self.task_id} 1439 | 1440 | result_idx = 0 1441 | result_buffer = b'' 1442 | while True: 1443 | self._config.log.debug("Calling /api/v1/result part %d" % result_idx) 1444 | payload['result_idx'] = result_idx 1445 | resp = post_request(url, payload, self._config.secret, self._config.log) 1446 | if resp.status == 'NOT_FOUND': 1447 | raise FastmapException("No task found") 1448 | 1449 | task_dict = resp.obj['task'] 1450 | self._task_state = task_dict['task_state'] 1451 | self._outcome = task_dict['outcome'] 1452 | 1453 | if resp.status == 'NOT_READY': 1454 | break 1455 | # raise FastmapException("Result not ready") 1456 | if resp.status not in ('ERROR', "SUCCESS"): 1457 | break 1458 | # raise FastmapException("Unexpected status from server %r" % resp.status) 1459 | 1460 | if resp.obj['instruction'] == 'APPEND': 1461 | result_buffer += resp.obj['result_part'] 1462 | result_idx += 1 1463 | assert result_idx < resp.obj['result_len'] 1464 | continue 1465 | 1466 | assert resp.obj['instruction'] == 'UNPICKLE' 1467 | result_buffer += resp.obj['result_part'] 1468 | try: 1469 | self._result_dict = dill.loads(gzip.decompress(result_buffer)) 1470 | except dill.UnpicklingError: 1471 | raise FastmapException("Error unpickling response") from None 1472 | except gzip.BadGzipFile: 1473 | raise FastmapException("Error unzipping response") from None 1474 | 1475 | # if self._task_type == 'MAP': 1476 | # self._result_dict += _return 1477 | # else: 1478 | # self._result_dict = _return 1479 | break 1480 | 1481 | 1482 | def local_exit_handler(config): 1483 | # config.log.info("Received exit signal. Would kill %d local task(s)... " % len(config.local_threads)) 1484 | # for thread in config.local_threads: 1485 | # thread.kill() 1486 | pass 1487 | 1488 | 1489 | class FastmapConfig(): 1490 | """ 1491 | The configuration object. Do not instantiate this directly. 1492 | Instead, either: 1493 | - use init to get a new FastmapConfig object 1494 | - use global_init to allow fastmap to run without an init object. 1495 | 1496 | This object exposes one public method: fastmap. 1497 | """ 1498 | 1499 | __slots__ = [ 1500 | "secret", 1501 | "verbosity", 1502 | "log", 1503 | "exec_policy", 1504 | "machine_type", 1505 | "cloud_url", 1506 | "requirements", 1507 | "local_threads", 1508 | ] 1509 | 1510 | def __init__(self, config): 1511 | # TODO parameter checking is weirdly divided between create and init 1512 | self.exec_policy = config['exec_policy'] 1513 | self.log = FastmapLogger(config['verbosity']) 1514 | self.verbosity = config['verbosity'] 1515 | self.cloud_url = config['cloud_url'] 1516 | self.requirements = config['requirements'] 1517 | self.machine_type = config['machine_type'] 1518 | self.local_threads = [] 1519 | 1520 | if self.cloud_url: 1521 | if not self.cloud_url.startswith("http"): 1522 | self.cloud_url = "http://" + self.cloud_url 1523 | if self.cloud_url.endswith("/"): 1524 | self.cloud_url = self.cloud_url[:-1] 1525 | elif self.exec_policy != ExecPolicy.LOCAL: 1526 | self.exec_policy = ExecPolicy.LOCAL 1527 | self.log.warning("No cloud_url provided. " 1528 | "Setting exec_policy to LOCAL.") 1529 | 1530 | if multiprocessing.current_process().name != "MainProcess": 1531 | # Fixes issue with multiple loud inits during local multiprocessing 1532 | # in Mac / Windows 1533 | self.log.hush() 1534 | 1535 | if config['secret']: 1536 | if not isinstance(config['secret'], str) or not re.match(SECRET_RE, config['secret']): 1537 | raise FastmapException("Invalid secret token format.") 1538 | self.secret = config['secret'] 1539 | else: 1540 | if self.exec_policy != ExecPolicy.LOCAL: 1541 | raise FastmapException("No secret provided on exec_policy==LOCAL.") 1542 | self.secret = None 1543 | 1544 | if self.requirements: 1545 | if not isinstance(self.requirements, list): 1546 | raise FastmapException("Invalid 'requirements' format. It must be a " 1547 | "list in 'package==1.2.3' form.") 1548 | for req in self.requirements: 1549 | if not REQUIREMENT_RE.match(req): 1550 | raise FastmapException("Invalid requirement format %r. Requirements " 1551 | "must be formatted like 'package==1.2.3'." % req) 1552 | self.log.restore_verbosity() # undo hush 1553 | 1554 | @staticmethod 1555 | def create(config=None, **kwargs): 1556 | if not config and os.path.exists(DEFAULT_CONFIG_DIR): 1557 | try: 1558 | with open(DEFAULT_CONFIG_DIR) as f: 1559 | c = json.loads(f.read()) 1560 | except Exception as e: 1561 | raise FastmapException(f"Exception loading '{DEFAULT_CONFIG_DIR}'") from e 1562 | elif not config: 1563 | c = dict(DEFAULT_INLINE_CONFIG) 1564 | elif isinstance(config, dict): 1565 | c = dict(config) 1566 | elif isinstance(config, str): 1567 | try: 1568 | with open(config) as f: 1569 | c = json.loads(f.read()) 1570 | except Exception as e: 1571 | raise FastmapException(f"Exception loading '{config}'") from e 1572 | else: 1573 | raise FastmapException(f"Unknown config type {type(config)}") 1574 | 1575 | for k, v in kwargs.items(): 1576 | if k not in DEFAULT_INLINE_CONFIG.keys(): 1577 | raise FastmapException(f"Unknown parameter: {k}") 1578 | c[k] = v 1579 | 1580 | for k in DEFAULT_INLINE_CONFIG.keys(): 1581 | if k not in c: 1582 | raise FastmapException(f"Missing configuration parameter: {k}") 1583 | 1584 | if c['machine_type'] not in MachineType: 1585 | raise FastmapException(f"Unknown machine_type '{c['machine_type']}'.") 1586 | 1587 | if c['exec_policy'] not in ExecPolicy: 1588 | raise FastmapException(f"Unknown exec_policy '{c['exec_policy']}'.") 1589 | 1590 | if c['exec_policy'] == ExecPolicy.LOCAL: 1591 | local_config = FastmapLocalConfig(c) 1592 | atexit.register(local_exit_handler, local_config) 1593 | return local_config 1594 | return FastmapCloudConfig(c) 1595 | 1596 | @set_docstring(OFFLOAD_DOCSTRING) 1597 | def offload(self, func: FunctionType, kwargs=None, 1598 | hook=None, webhook=None, label=""): 1599 | self.log.info("Fastmap offload." \ 1600 | "\n verbosity: %s." \ 1601 | "\n exec_policy: %s." % (self.verbosity, self.exec_policy)) 1602 | if not callable(func): 1603 | raise FastmapException("'func' must be a function") 1604 | 1605 | func_name = get_func_name(func) # before applying kwargs, get func_name 1606 | if kwargs: 1607 | kwargs = kwargs or {} 1608 | if not isinstance(kwargs, dict): 1609 | raise FastmapException("'kwargs' must be a dict.") 1610 | func = functools.partial(func, **kwargs) 1611 | 1612 | pickled_func = pickle_function(func, func_name) 1613 | func_payload, func_hash = get_payload_and_hash(pickled_func, self) 1614 | 1615 | if self.exec_policy == ExecPolicy.LOCAL: 1616 | task = FastmapLocalTask.create(self, func_payload, func_name=func_name, 1617 | hook=hook, webhook=webhook, label=label) 1618 | self.local_threads.append(task) 1619 | return task 1620 | 1621 | assert self.exec_policy == ExecPolicy.CLOUD 1622 | init_remote(self, func_hash, func_payload) 1623 | return FastmapCloudTask.create(self, func_name=func_name, func_hash=func_hash, 1624 | hook=hook, webhook=webhook, label=label) 1625 | 1626 | 1627 | class FastmapLocalConfig(FastmapConfig): 1628 | # TODO we really oughta implement these 1629 | 1630 | def get_task(self, task_id): 1631 | raise NotImplementedError() 1632 | 1633 | def poll_all(self): 1634 | raise NotImplementedError() 1635 | 1636 | def clear_all(self): 1637 | raise NotImplementedError() 1638 | 1639 | def map(self, *args, **kwargs): 1640 | raise NotImplementedError() 1641 | 1642 | 1643 | def check_task_id(func) -> FunctionType: 1644 | @functools.wraps(func) 1645 | def inner(self, task_id): 1646 | if not task_id or not re.match(TASK_RE, task_id): 1647 | raise FastmapException("Invalid task_id format %r" % task_id) 1648 | return func(self, task_id) 1649 | return inner 1650 | 1651 | 1652 | def make_dt(task): 1653 | task['start_time'] = datetime.datetime.fromtimestamp(task['start_time']) 1654 | 1655 | 1656 | class FastmapCloudConfig(FastmapConfig): 1657 | 1658 | @check_task_id 1659 | @set_docstring(GET_TASK_DOCSTRING) 1660 | def get_task(self, task_id): 1661 | return FastmapCloudTask(self, task_id=task_id) 1662 | 1663 | @set_docstring(POLL_ALL_DOCSTRING) 1664 | def poll_all(self): 1665 | resp = post_request(self.cloud_url + '/api/v1/poll_all', {}, 1666 | self.secret, self.log) 1667 | tasks = resp.obj['tasks'] 1668 | list(map(make_dt, tasks)) 1669 | return tasks 1670 | 1671 | @set_docstring(CLEAR_ALL_DOCSTRING) 1672 | def clear_all(self): 1673 | resp = post_request(self.cloud_url + '/api/v1/clear_all', {}, 1674 | self.secret, self.log) 1675 | self.log.info("Cleared %d tasks", resp.obj['count']) 1676 | cleared_tasks = resp.obj['cleared_tasks'] 1677 | list(map(make_dt, cleared_tasks)) 1678 | return cleared_tasks 1679 | 1680 | # @set_docstring(MAP_DOCSTRING) 1681 | # def map(self, func: FunctionType, iterable: Iterable, kwargs=None, 1682 | # hook=None, label=""): 1683 | # raise AssertionError("This is not ready yet") 1684 | 1685 | # if kwargs: 1686 | # kwargs = kwargs or {} 1687 | # if not isinstance(kwargs, dict): 1688 | # raise FastmapException("'kwargs' must be a dict.") 1689 | # func = functools.partial(func, **kwargs) 1690 | 1691 | # pickled_func = pickle_function(func) 1692 | # func_payload, func_hash = get_payload_and_hash(pickled_func, self) 1693 | 1694 | # if self.exec_policy == ExecPolicy.LOCAL: 1695 | # task = FastmapLocalTask.create_map(self, func_payload, func_name=func_name, 1696 | # iterable=iterable, hook=hook, webhook=webhook, label=label) 1697 | # self.local_threads.append(task) 1698 | # return task 1699 | 1700 | # assert self.exec_policy == ExecPolicy.CLOUD 1701 | # init_remote(self, func_hash, func_payload) 1702 | 1703 | # return FastmapCloudTask.create_map(self, func_hash=func_hash, 1704 | # iterable=iterable, 1705 | # hook=hook, webhook=webhook, label=label) 1706 | 1707 | # @check_task_id 1708 | # @set_docstring(POLL_DOCSTRING) 1709 | # def poll(self, task_id): 1710 | # return FastmapCloudTask(self, task_id=task_id).poll() 1711 | 1712 | # @check_task_id 1713 | # @set_docstring(RETRY_DOCSTRING) 1714 | # def retry(self, task_id): 1715 | # return FastmapCloudTask(self, task_id=task_id).retry() 1716 | 1717 | # @check_task_id 1718 | # @set_docstring(KILL_DOCSTRING) 1719 | # def kill(self, task_id): 1720 | # return FastmapCloudTask(self, task_id=task_id).kill() 1721 | 1722 | # @check_task_id 1723 | # @set_docstring(WAIT_DOCSTRING) 1724 | # def wait(self, task_id): 1725 | # return FastmapCloudTask(self, task_id=task_id).wait() 1726 | 1727 | # @check_task_id 1728 | # @set_docstring(RETURN_VALUE_DOCSTRING) 1729 | # def return_value(self, task_id): 1730 | # return FastmapCloudTask(self, task_id=task_id).return_value() 1731 | 1732 | # @check_task_id 1733 | # @set_docstring(TRACEBACK_DOCSTRING) 1734 | # def traceback(self, task_id): 1735 | # return FastmapCloudTask(self, task_id=task_id).traceback() 1736 | 1737 | # @check_task_id 1738 | # @set_docstring(CLEAR_DOCSTRING) 1739 | # def clear(self, task_id): 1740 | # return FastmapCloudTask(self, task_id=task_id).clear() 1741 | 1742 | # @check_task_id 1743 | # @set_docstring(ALL_LOGS_DOCSTRING) 1744 | # def all_logs(self, task_id): 1745 | # return FastmapCloudTask(self, task_id=task_id).all_logs() 1746 | 1747 | # @check_task_id 1748 | # @set_docstring(NEW_LOGS_DOCSTRING) 1749 | # def new_logs(self, task_id): 1750 | # return FastmapCloudTask(self, task_id=task_id).new_logs() 1751 | 1752 | # def _log_final_stats(self, fname: str, mapper: Mapper, proc_cnt: int, 1753 | # total_dur: float): 1754 | # """ After finishing the .fastmap(...) run, log stats for the user """ 1755 | # avg_runtime = mapper.avg_runtime 1756 | # total_credits_used = mapper.total_credits_used 1757 | 1758 | # print() 1759 | # if not avg_runtime: 1760 | # self.log.info("Done processing %r in %.2fms." % (fname, total_dur*1000)) 1761 | # else: 1762 | # time_saved = avg_runtime * proc_cnt - total_dur 1763 | # if time_saved > 0.02: 1764 | # self.log.info("Processed %d elements from %r in %s. " 1765 | # "You saved ~%s.", proc_cnt, fname, 1766 | # fmt_dur(total_dur), fmt_dur(time_saved)) 1767 | # elif abs(time_saved) < 0.02: 1768 | # self.log.info("Processed %d elements from %r in %s. This " 1769 | # "ran at about the same speed as the builtin map.", 1770 | # proc_cnt, fname, fmt_dur(total_dur)) 1771 | # elif self.exec_policy == ExecPolicy.LOCAL: 1772 | # self.log.info("Processed %d elements from %r in %s. This " 1773 | # "ran slower than the map builtin by ~%s. " 1774 | # "Consider not using fastmap here.", 1775 | # proc_cnt, fname, fmt_dur(total_dur), 1776 | # fmt_dur(time_saved * -1)) 1777 | # else: 1778 | # self.log.info("Processed %d elements from %r in %s. " 1779 | # "This ran slower than the map builtin by ~%s. " 1780 | # "Consider connecting to a faster " 1781 | # "internet, reducing your data size, or using " 1782 | # "exec_policy LOCAL or ADAPTIVE.", 1783 | # proc_cnt, fname, fmt_dur(total_dur), 1784 | # fmt_dur(time_saved * -1)) 1785 | 1786 | # if total_credits_used: 1787 | # self.log.info("Spent $%.4f.", total_credits_used / 100) 1788 | # self.log.info("Fastmap done.") 1789 | 1790 | 1791 | def chunk_bytes(payload: bytes, size: int) -> list: 1792 | return [payload[i:i + size] for i in range(0, len(payload), size)] 1793 | 1794 | 1795 | def init_remote(config, func_hash, func_payload): 1796 | """ 1797 | Get the function and modules uploaded to the cloud via the 1798 | /api/v1/init endpoint. This must happen BEFORE calling /api/v1/map. 1799 | Because of server-side caching, and the potential for very large 1800 | payloads, check with the function hash before uploading the function. 1801 | """ 1802 | 1803 | # Step 1: Try just uploaded the function hash. If it exists, we are good. 1804 | req_dict = {} 1805 | req_dict['func_hash'] = func_hash 1806 | url = config.cloud_url + "/api/v1/init" 1807 | resp = post_request(url, req_dict, config.secret, config.log) 1808 | 1809 | if resp.status_code != 200: 1810 | raise FastmapException("Cloud initialization failed %r." % resp.obj) 1811 | if resp.status == InitStatus.FOUND: 1812 | config.log.info("Function already on the server.") 1813 | return 1814 | if resp.status != InitStatus.NOT_FOUND: 1815 | raise FastmapException("Unexpected init status %r." % resp.obj) 1816 | 1817 | # Step 2: If the server can't find the func, we need to upload it 1818 | # We might need to chunk the upload due to cloud run limits 1819 | func_parts = chunk_bytes(func_payload, 5 * MB) # 5MB is arbitrary but feels right 1820 | for i, func_part in enumerate(func_parts): 1821 | req_dict['func'] = func_part 1822 | req_dict['part_idx'] = i 1823 | req_dict['part_len'] = len(func_parts) 1824 | payload = msgpack.dumps(req_dict) 1825 | payload_bytes = fmt_bytes(len(payload)) 1826 | if len(func_parts) > 1: 1827 | config.log.info("Uploading code (%s) part %d/%d..." % 1828 | (payload_bytes, i + 1, len(func_parts))) 1829 | else: 1830 | config.log.info("Uploading code (%s)..." % payload_bytes) 1831 | resp = post_request(url, payload, config.secret, config.log) 1832 | 1833 | if resp.status_code != 200: 1834 | raise FastmapException("Cloud initialization failed %r." % resp.obj) 1835 | if resp.status == InitStatus.UPLOADED: 1836 | continue 1837 | raise FastmapException("Cloud initialization failed. Function not uploaded.") 1838 | config.log.info("Done uploading code.") 1839 | return 1840 | 1841 | 1842 | def get_payload_and_hash(pickled_func, config): 1843 | local_sources, installed_mods, requirements = get_dependencies( 1844 | config.requirements, config.log) 1845 | func_payload = msgpack.dumps({ 1846 | 'func': pickled_func, 1847 | 'local_sources': local_sources, 1848 | 'installed_mods': installed_mods, 1849 | 'requirements': requirements}) 1850 | compressed_payload = gzip.compress(func_payload, compresslevel=1) 1851 | func_hash = get_hash(compressed_payload) 1852 | return compressed_payload, func_hash 1853 | --------------------------------------------------------------------------------