├── tests
├── __init__.py
└── test_sdk.py
├── setup.cfg
├── assets
├── demo.gif
└── fastmap_logo.png
├── .gitignore
├── requirements.txt
├── requirements_dev.txt
├── Makefile
├── test_sdk.sh
├── CHANGELOG.txt
├── LICENSE
├── .secrets.baseline
├── setup.py
├── fastmap
├── __init__.py
└── sdk_lib.py
├── README.md
└── scripts
├── fastmapadmin
└── fastmap
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmap-io/fastmap/HEAD/assets/demo.gif
--------------------------------------------------------------------------------
/assets/fastmap_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmap-io/fastmap/HEAD/assets/fastmap_logo.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | __pycache__
3 | scratch
4 | htmlcov
5 | .coverage
6 | .coverage.*
7 | *.egg-info
8 | *.eggs
9 | *.pyc
10 | build/
11 | dist/
12 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Client SDK requirements
2 | dill>=0.3.4,<0.4
3 | msgpack>=1.0.0,<1.1.0
4 | requests>=2.24,<2.25 # purposefully downgraded. Test carefully
5 | tabulate>=0.8.7,<0.9.0
6 | urllib3<1.26
7 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | # Client SDK dev requiremnets
2 | -r requirements.txt
3 | pytest>=6.0,<6.1
4 | requests-mock>=1.8.0,<2.0.0
5 | coverage>4.5,<4.6 # downgraded on purpose for --concurrency flag
6 | detect-secrets>=0.14,<0.15
7 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | build:
2 | rm -rf dist
3 | python3 setup.py sdist
4 |
5 | test:
6 | pytest --cov-report=html --cov=fastmap -x
7 |
8 | clean:
9 | rm -rf build
10 | rm -rf dist
11 | rm -f out.*
12 | rm -rf *.egg-info
13 | rm -rf htmlcov
14 |
15 |
16 |
--------------------------------------------------------------------------------
/test_sdk.sh:
--------------------------------------------------------------------------------
1 | detect-secrets scan --no-keyword-scan --no-basic-auth-scan > .secrets.baseline
2 | coverage run --concurrency=multiprocessing -m pytest ./tests/test_sdk.py -vx || exit 1
3 | coverage combine
4 | coverage report --include=fastmap/sdk_lib.py,fastmap/__init__.py
5 | coverage html --include=fastmap/sdk_lib.py,fastmap/__init__.py
6 |
--------------------------------------------------------------------------------
/CHANGELOG.txt:
--------------------------------------------------------------------------------
1 | # Fastmap CHANGELOG
2 |
3 | ## [0.0.12] - 2021-07-23
4 | Added login option for fastmap to make credentials easier to manage. Bugfixes.
5 |
6 | ## [0.0.11] - 2021-07-22
7 | Lots of cleanup and bug fixes. Add fastmapadmin script
8 |
9 | ## [0.0.10] - 2021-07-12
10 | Several tweaks/fixes over the past few versions for test users
11 |
12 | ## [0.0.7] - 2021-07-01
13 | Major paradigm change. Fastmap is now oriented around a controller/worker approach.
14 | Map is still in dev. Unit tests have no chance. But this should work for
15 | some basic offload usage.
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2020 fastmap.io
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
1 | {
2 | "custom_plugin_paths": [],
3 | "exclude": {
4 | "files": null,
5 | "lines": null
6 | },
7 | "generated_at": "2021-03-13T04:28:12Z",
8 | "plugins_used": [
9 | {
10 | "name": "AWSKeyDetector"
11 | },
12 | {
13 | "name": "ArtifactoryDetector"
14 | },
15 | {
16 | "base64_limit": 4.5,
17 | "name": "Base64HighEntropyString"
18 | },
19 | {
20 | "name": "CloudantDetector"
21 | },
22 | {
23 | "hex_limit": 3,
24 | "name": "HexHighEntropyString"
25 | },
26 | {
27 | "name": "IbmCloudIamDetector"
28 | },
29 | {
30 | "name": "IbmCosHmacDetector"
31 | },
32 | {
33 | "name": "JwtTokenDetector"
34 | },
35 | {
36 | "name": "MailchimpDetector"
37 | },
38 | {
39 | "name": "PrivateKeyDetector"
40 | },
41 | {
42 | "name": "SlackDetector"
43 | },
44 | {
45 | "name": "SoftlayerDetector"
46 | },
47 | {
48 | "name": "StripeDetector"
49 | },
50 | {
51 | "name": "TwilioKeyDetector"
52 | }
53 | ],
54 | "results": {},
55 | "version": "0.14.3",
56 | "word_list": {
57 | "file": null,
58 | "hash": null
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import setuptools
4 | import sys
5 |
6 | if sys.version_info[:2] < (3, 6):
7 | print("ERROR: this package requires Python 3.7 or later!")
8 | sys.exit(1)
9 | # if sys.version_info[:2] >= (3, 9):
10 | # # This is because of a pickling issue. Maybe dill needs a PR?
11 | # print("ERROR: this package cannot run on Python 3.9 or later!")
12 | # sys.exit(1)
13 |
14 |
15 | with open("README.md", "r") as fh:
16 | long_description = fh.read()
17 |
18 | with open(os.path.join("fastmap", "sdk_lib.py")) as f:
19 | version = re.search(r"^CLIENT_VERSION \= \"([0-9.]+)\"", f.read(),
20 | re.MULTILINE).group(1)
21 |
22 | url_base = "https://github.com/fastmap-io/fastmap"
23 | download_url = '%s/archive/fastmap-%s.tar.gz' % (url_base, version)
24 |
25 | setuptools.setup(
26 | name="fastmap",
27 | version=version,
28 | author="fastmap.io team",
29 | author_email="scott@fastmap.io",
30 | description="Fastmap offloads arbitrary Python code "
31 | "via the open source fastmap cloud service.",
32 | long_description=long_description,
33 | long_description_content_type="text/markdown",
34 | url=url_base,
35 | download_url=download_url,
36 | packages=setuptools.find_packages(),
37 | scripts=[
38 | "scripts/fastmap",
39 | "scripts/fastmapadmin",
40 | ],
41 | classifiers=[
42 | "Programming Language :: Python :: 3",
43 | "License :: OSI Approved :: MIT License",
44 | "Operating System :: OS Independent",
45 | ],
46 | python_requires='>=3.7',
47 | install_requires=[
48 | "dill>=0.3.2,<0.4",
49 | "msgpack>=1.0.0,<1.1.0",
50 | "requests>=2.24,<3.0",
51 | "tabulate>=0.8.7,<0.9.0",
52 | ],
53 | )
54 |
--------------------------------------------------------------------------------
/fastmap/__init__.py:
--------------------------------------------------------------------------------
1 | from .sdk_lib import (FastmapConfig, set_docstring, ExecPolicy, Verbosity, MachineType,
2 | FastmapException, FastmapTask, TaskState, TaskOutcome,
3 | CLIENT_VERSION, INIT_DOCSTRING, GLOBAL_INIT_DOCSTRING,
4 | OFFLOAD_DOCSTRING, GET_TASK_DOCSTRING,
5 | POLL_ALL_DOCSTRING, CLEAR_ALL_DOCSTRING)
6 |
7 | # Clases
8 | FastmapConfig = FastmapConfig
9 | FastmapTask = FastmapTask
10 |
11 | # Namespaces
12 | ExecPolicy = ExecPolicy
13 | Verbosity = Verbosity
14 | MachineType = MachineType
15 | TaskState = TaskState
16 | TaskOutcome = TaskOutcome
17 |
18 | # Exceptions
19 | FastmapException = FastmapException
20 |
21 |
22 | __version__ = CLIENT_VERSION
23 | _global_config = None
24 |
25 |
26 | @set_docstring(GLOBAL_INIT_DOCSTRING)
27 | def global_init(*args, **kwargs):
28 | global _global_config
29 | _global_config = init(*args, **kwargs)
30 |
31 |
32 | @set_docstring(INIT_DOCSTRING)
33 | def init(*args, **kwargs):
34 | return FastmapConfig.create(*args, **kwargs)
35 |
36 |
37 | def _get_config():
38 | if not _global_config:
39 | raise FastmapException("Fastmap not initialized globally.")
40 | return _global_config
41 |
42 |
43 | # @set_docstring(MAP_DOCSTRING)
44 | # def map(func, iterable, *args, **kwargs):
45 | # return _get_config().map(func, iterable, *args, **kwargs)
46 |
47 |
48 | @set_docstring(OFFLOAD_DOCSTRING)
49 | def offload(func, *args, **kwargs):
50 | return _get_config().offload(func, *args, **kwargs)
51 |
52 |
53 | @set_docstring(POLL_ALL_DOCSTRING)
54 | def poll_all():
55 | return _get_config().poll_all()
56 |
57 |
58 | @set_docstring(CLEAR_ALL_DOCSTRING)
59 | def clear_all():
60 | return _get_config().clear_all()
61 |
62 |
63 | @set_docstring(GET_TASK_DOCSTRING)
64 | def get_task(task_id):
65 | return _get_config().get_task(task_id)
66 |
67 | # @set_docstring(POLL_DOCSTRING)
68 | # def poll(task_id):
69 | # return _get_config().poll(task_id)
70 |
71 |
72 | # @set_docstring(KILL_DOCSTRING)
73 | # def kill(task_id):
74 | # return _get_config().kill(task_id)
75 |
76 |
77 | # @set_docstring(RETURN_VALUE_DOCSTRING)
78 | # def return_value(task_id):
79 | # return _get_config().return_value(task_id)
80 |
81 |
82 | # @set_docstring(TRACEBACK_DOCSTRING)
83 | # def traceback(task_id):
84 | # return _get_config().traceback(task_id)
85 |
86 |
87 | # @set_docstring(WAIT_DOCSTRING)
88 | # def wait(task_id, *args, **kwargs):
89 | # return _get_config().wait(task_id, *args, **kwargs)
90 |
91 |
92 | # @set_docstring(ALL_LOGS_DOCSTRING)
93 | # def all_logs(task_id, *args, **kwargs):
94 | # return _get_config().all_logs(task_id, *args, **kwargs)
95 |
96 |
97 | # @set_docstring(CLEAR_DOCSTRING)
98 | # def clear(task_id):
99 | # return _get_config().clear(task_id)
100 |
101 |
102 | # @set_docstring(RETRY_DOCSTRING)
103 | # def retry(task_id):
104 | # return _get_config().retry(task_id)
105 |
106 |
107 | def _reset_global_config():
108 | """ For unit tests. Do not use """
109 | global _global_config
110 | _global_config = None
111 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | > Note: Fastmap is currently in beta.
2 |
3 | 
4 |
5 |
6 |
7 |
8 |
9 | Fastmap offloads and parallelizes arbitrary Python functions on the cloud.
10 |
11 | - **🌤 Offload anything** Fastmap is the easiest way to offload Python code. Use fastmap.offload wherever you would might have needed an EC2.
12 | - **🔗 Parallelize in one line** With Fastmap, you can parallelize any Python function across as many workers as it takes. Use fastmap.map wherever you might have needed a Lambda.
13 | - **🚀 Deploy in minutes** Fastmap comes with an open-source cloud service. With a Google Cloud Platform account, you can setup and deploy fastmap with one command.
14 | - ** 🛀 Built for productivity** Fastmap is designed to eliminate infrastructure and speed up your work. Spend your time writing code - not fighting infrastructure.
15 |
16 | Fastmap is in active development. For more information, see https://fastmap.io.
17 |
18 |
19 |
101 |
--------------------------------------------------------------------------------
/scripts/fastmapadmin:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import datetime
5 |
6 | import tabulate
7 | import fastmap
8 |
9 | USAGE = """Fastmap CLI for service administration."""
10 |
11 | DESCRIPTION = """
12 | Fastmap CLI for service administration.
13 | \n\n
14 | Examples:
15 | $ fastmap accounts
16 |
17 | """
18 |
19 | EPILOG = """Run `fastmap --help` for help on individual operations."""
20 |
21 |
22 | def _relative_time(seconds):
23 | if not seconds:
24 | return 'never'
25 | seconds = datetime.datetime.now(datetime.timezone.utc).timestamp() - seconds
26 | if seconds > 60 * 60 * 24 * 2:
27 | return '%d days ago' % (seconds // (60 * 60 * 24))
28 | elif seconds > 60 * 60 * 2:
29 | return '%d hours ago' % (seconds // (60 * 60))
30 | elif seconds > 120:
31 | return '%d minutes ago' % (seconds // 60)
32 | elif seconds > 2:
33 | return '%d seconds ago' % seconds
34 | else:
35 | return 'just now'
36 |
37 |
38 | def _prettify(items):
39 | for item in items:
40 | item['created'] = _relative_time(item['created'])
41 | item['updated'] = _relative_time(item['updated'])
42 |
43 | if item.get('idle'):
44 | item['idle'] = _relative_time(item['idle'])
45 |
46 | if item.get('heartbeat_ts'):
47 | item['heartbeat_ts'] = _relative_time(item['heartbeat_ts'])
48 | # task['state'] = task['task_state']
49 | # task['id'] = task['task_id']
50 |
51 | # del task['last_heartbeat']
52 | # del task['task_state']
53 | # del task['task_id']
54 |
55 | # task['progress'] = task['progress'] and "%.1f%%" % task['progress']
56 |
57 |
58 | def list_accounts(config):
59 | resp = fastmap.sdk_lib.post_request(
60 | url=config.cloud_url + '/admin/v1/list_accounts',
61 | data={},
62 | secret=config.secret,
63 | log=fastmap.sdk_lib.FastmapLogger('QUIET'))
64 |
65 | accounts = resp.obj['accounts']
66 | accounts.sort(key=lambda x: x['created'], reverse=True)
67 | _prettify(accounts)
68 | print(tabulate.tabulate(accounts, headers='keys'))
69 |
70 |
71 | def list_workers(config):
72 | resp = fastmap.sdk_lib.post_request(
73 | url=config.cloud_url + '/admin/v1/list_workers',
74 | data={},
75 | secret=config.secret,
76 | log=fastmap.sdk_lib.FastmapLogger('QUIET'))
77 |
78 | workers = resp.obj['workers']
79 | workers.sort(key=lambda x: x['created'], reverse=True)
80 | import pprint; pprint.pprint(workers)
81 | _prettify(workers)
82 | print(tabulate.tabulate(workers, headers='keys'))
83 |
84 |
85 | def add_account(config, email, password):
86 | resp = fastmap.sdk_lib.post_request(
87 | url=config.cloud_url + '/admin/v1/add_account',
88 | data={'email': email, 'password': password},
89 | secret=config.secret,
90 | log=fastmap.sdk_lib.FastmapLogger('QUIET'))
91 |
92 | account_id = resp.obj['account_id']
93 | secret_token = resp.obj['secret_token']
94 | print("New account is %s token=%s" % (account_id, secret_token))
95 |
96 |
97 | def add_credits(config, account_id, amount):
98 | # TODO check account_id
99 | resp = fastmap.sdk_lib.post_request(
100 | url=config.cloud_url + '/admin/v1/add_credit',
101 | data={'account_id': account_id, 'amount': amount},
102 | secret=config.secret,
103 | log=fastmap.sdk_lib.FastmapLogger('QUIET'))
104 |
105 | print("Added credits. New balance = %.2f" % resp.obj['balance'])
106 |
107 |
108 | if __name__ == "__main__":
109 | parser = argparse.ArgumentParser(
110 | # usage=DESCRIPTION,
111 | description=DESCRIPTION,
112 | epilog=EPILOG,
113 | formatter_class=argparse.RawDescriptionHelpFormatter)
114 |
115 | parser.add_argument(
116 | "--config",
117 | help="Location of configuration file generated by depoly_gcp.py. "
118 | "If omitted, will attempt to use the default configuration. ")
119 |
120 | subparsers = parser.add_subparsers(
121 | dest='operation', required=True,
122 | help='sub-command help')
123 |
124 | list_accounts_p = subparsers.add_parser(
125 | 'list_accounts', help="Get account info")
126 |
127 | list_workers_p = subparsers.add_parser(
128 | 'list_workers', help="Get worker info")
129 |
130 | add_credits_p = subparsers.add_parser(
131 | 'add_credits', help="Add credits to users")
132 | add_credits_p.add_argument(
133 | "account_id",
134 | help="Account ID")
135 | add_credits_p.add_argument(
136 | "amount",
137 | help="Amount")
138 |
139 | add_account_p = subparsers.add_parser(
140 | 'add_account', help="Add new account")
141 | add_account_p.add_argument(
142 | "email",
143 | help="Email")
144 | add_account_p.add_argument(
145 | "password",
146 | help="Password")
147 |
148 | args = parser.parse_args()
149 |
150 | config = fastmap.init(config=args.config)
151 |
152 | if config.exec_policy == fastmap.ExecPolicy.LOCAL:
153 | raise AssertionError("The fastmap CLI does not support a LOCAL exec_policy. "
154 | "Check your configuration file.")
155 |
156 | if args.operation == 'list_accounts':
157 | list_accounts(config)
158 | if args.operation == 'list_workers':
159 | list_workers(config)
160 | if args.operation == 'add_credits':
161 | add_credits(config, args.account_id, args.amount)
162 | if args.operation == 'add_account':
163 | add_account(config, args.email, args.password)
164 |
--------------------------------------------------------------------------------
/scripts/fastmap:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import getpass
5 | import json
6 | import os
7 | import time
8 |
9 | import fastmap
10 | import requests
11 | import tabulate
12 |
13 | USAGE = """Fastmap CLI.
14 | This CLI only supports the offload workflow and doesn't have any mapping functionality.
15 | Run `fastmap --help` for more details.
16 | """
17 |
18 | DESCRIPTION = """
19 | Fastmap CLI.
20 | \n\n
21 | Examples:
22 | $ fastmap poll
23 | $ fastmap poll
24 | $ fastmap logs
25 | $ fastmap traceback
26 | $ fastmap return_value
27 | $ fastmap kill
28 | $ fastmap clear
29 | $ fastmap clear --force
30 |
31 | """
32 |
33 | EPILOG = """Run `fastmap --help` for help on individual operations."""
34 |
35 | DEFAULT_CONFIG_DIR = fastmap.sdk_lib.DEFAULT_CONFIG_DIR
36 |
37 | def _relative_time(seconds):
38 | if not seconds:
39 | return 'never'
40 | if seconds > 60 * 60 * 24 * 2:
41 | return '%d days ago' % (seconds // (60 * 60 * 24))
42 | elif seconds > 60 * 60 * 2:
43 | return '%d hours ago' % (seconds // (60 * 60))
44 | elif seconds > 120:
45 | return '%d minutes ago' % (seconds // 60)
46 | elif seconds > 2:
47 | return '%d seconds ago' % seconds
48 | else:
49 | return 'just now'
50 |
51 |
52 | def _prettify_tasks(tasks):
53 | for task in tasks:
54 | task['type'] = task['type'].lower().capitalize()
55 | task['start_time'] = task['start_time'].strftime("%Y-%m-%d %H:%M:%S")
56 | task['runtime'] = task['runtime'] and ("%.1fs" % task['runtime'])
57 | task['last_heartbeat'] = _relative_time(task['last_heartbeat'])
58 | # task['state'] = task['task_state']
59 | # task['id'] = task['task_id']
60 |
61 | # del task['last_heartbeat']
62 | # del task['task_state']
63 | # del task['task_id']
64 |
65 |
66 | # task['progress'] = task['progress'] and "%.1f%%" % task['progress']
67 |
68 |
69 | # def offload(config, path, function_name, label):
70 | # if not os.path.exists(path):
71 | # raise AssertionError("Path %r does not exist." % path)
72 | # mod_path = path.replace('/', '.')
73 | # if mod_path.endswith('.py'):
74 | # mod_path = mod_path[:-3]
75 | # sys.path.append(os.getcwd())
76 | # try:
77 | # mod = importlib.import_module(mod_path)
78 | # except ImportError:
79 | # raise AssertionError("Could not import module %r" % mod_path) from None
80 |
81 | # # TODO this doesn't actually work...
82 | # try:
83 | # func = getattr(mod, function_name)
84 | # except AttributeError:
85 | # raise AssertionError("Could not import function %r from module %r" %
86 | # (func, mod_path)) from None
87 | # fastmap_task = config.offload(func, label=label)
88 | # config.log.info("Started new task: %s" % fastmap_task.task_id)
89 |
90 | def login(cloud_url):
91 | print("Login for %r" % cloud_url)
92 | email = input("Email: ")
93 | password = getpass.getpass("Password: ")
94 | resp = requests.post(cloud_url + '/actions/v1/login',
95 | data=json.dumps({'email': email, 'password': password}))
96 | if resp.status_code == 401:
97 | raise AssertionError("Could not login with those credentials")
98 | if resp.status_code != 200:
99 | raise AssertionError("Could not login unexpectedly %r" % resp.status_code)
100 | resp_dict = json.loads(resp.content)
101 | config = dict(fastmap.sdk_lib.DEFAULT_INLINE_CONFIG)
102 | config['secret'] = resp_dict['session_token']
103 | config['cloud_url'] = cloud_url
104 | config_dir = os.path.dirname(DEFAULT_CONFIG_DIR)
105 | if not os.path.exists(config_dir):
106 | os.mkdir(config_dir)
107 | with open(DEFAULT_CONFIG_DIR, 'w') as f:
108 | f.write(json.dumps(config, indent=4))
109 | print("Login successful. Configuration saved to %s." % DEFAULT_CONFIG_DIR)
110 |
111 |
112 | def poll(config, task_id):
113 | if task_id:
114 | tasks = [config.get_task(task_id).poll()]
115 | else:
116 | tasks = config.poll_all()
117 |
118 | tasks.sort(key=lambda x: x['start_time'], reverse=True)
119 | _prettify_tasks(tasks)
120 |
121 | config.log.info("Found %d task(s)" % len(tasks))
122 | print(tabulate.tabulate(tasks, headers='keys'))
123 |
124 |
125 | def return_value(config, task_id):
126 | print(config.get_task(task_id).return_value())
127 |
128 |
129 | def traceback(config, task_id):
130 | tb = config.get_task(task_id).traceback()
131 | config.log.info("Traceback for %s:" % task_id)
132 | print(tb)
133 |
134 |
135 | def kill(config, task_id, force=False):
136 | if task_id:
137 | config.get_task(task_id).kill()
138 | return
139 | config.log.info("Polling for tasks to kill...")
140 | tasks = config.poll_all()
141 | tasks_to_kill = [t for t in tasks if t['task_state'] in ("PENDING", "PROCESSING")]
142 | if not tasks_to_kill:
143 | config.log.info("Could not find any tasks to kill")
144 | return
145 | if not force:
146 | if config.log.input("Kill %d tasks? (y/n) " % len(tasks_to_kill)).lower() != 'y':
147 | config.log.info("Not killing.")
148 | return
149 | for task in tasks_to_kill:
150 | config.get_task(task_id).kill()
151 | config.log.info("Killed %d tasks" % len(tasks_to_kill))
152 |
153 |
154 | def logs(config, task_id, live):
155 | task = config.get_task(task_id)
156 | original_logs = task.all_logs()
157 | config.log.info("Logs for %s:" % task_id)
158 | print(original_logs)
159 | if live:
160 | while True:
161 | if task._outcome:
162 | return
163 | new_logs = task.new_logs()
164 | if new_logs:
165 | print(new_logs)
166 | time.sleep(5)
167 |
168 | def retry(config, task_id):
169 | new_task = config.get_task(task_id).retry()
170 | config.log.info("Retry in process %r" % new_task)
171 |
172 |
173 | def clear(config, task_id, force=False):
174 | if task_id:
175 | cleared_tasks = [config.get_task(task_id).clear()]
176 | return
177 |
178 | config.log.info("Polling for tasks to clear...")
179 | tasks = config.poll_all()
180 | tasks_to_clear = [t for t in tasks if t['task_state'] == "DONE"]
181 |
182 | if not tasks_to_clear:
183 | config.log.info("Could not find any tasks to clear")
184 | return
185 |
186 | if not force:
187 | if config.log.input("Clear all 'DONE' tasks? There are currently %d. (y/n) " % len(tasks_to_clear)).lower() != 'y':
188 | config.log.info("Not clearing.")
189 | return
190 | cleared_tasks = config.clear_all()
191 | _prettify_tasks(cleared_tasks)
192 | print(tabulate.tabulate(cleared_tasks, headers='keys'))
193 |
194 |
195 | if __name__ == "__main__":
196 | parser = argparse.ArgumentParser(
197 | # usage=DESCRIPTION,
198 | description=DESCRIPTION,
199 | epilog=EPILOG,
200 | formatter_class=argparse.RawDescriptionHelpFormatter)
201 |
202 | parser.add_argument(
203 | "--config",
204 | help="Location of configuration file generated by depoly_gcp.py. "
205 | "If omitted, will attempt to use the default configuration. ")
206 | parser.add_argument(
207 | '--cloud-url',
208 | help="Which fastmap service to use. Default: Entry in the config or https://app.fastmap.io")
209 | parser.add_argument(
210 | "--verbosity",
211 | choices=("SILENT", "QUIET", "NORMAL", "LOUD"),
212 | help="How loud fastmap is. Default is NORMAL.",
213 | default="NORMAL")
214 |
215 | subparsers = parser.add_subparsers(
216 | dest='operation', required=True,
217 | help='sub-command help')
218 |
219 | # offload_p = subparsers.add_parser(
220 | # 'offload',
221 | # help="Offload a function in a python file.")
222 | # offload_p.add_argument(
223 | # "path",
224 | # help="The python file. E.g. path/script.py")
225 | # offload_p.add_argument(
226 | # "function",
227 | # help="The name of the function in the file. E.g. main_function")
228 | # offload_p.add_argument(
229 | # "label", nargs='?',
230 | # help="Optional label for your use")
231 |
232 | login_p = subparsers.add_parser(
233 | 'login', help="Login to a fastmap service")
234 |
235 | poll_p = subparsers.add_parser(
236 | 'poll', help="Get the metadata of one or all tasks")
237 | poll_p.add_argument(
238 | "task_id", nargs='?',
239 | help="Which task to return specifically. If omitted, return all non-CLEARED tasks")
240 |
241 | logs_p = subparsers.add_parser(
242 | 'logs',
243 | help="Get logs of a task. Task can be in any state except CLEARED. ")
244 | logs_p.add_argument(
245 | "task_id",
246 | help="Task ID of task to get logs for.")
247 | logs_p.add_argument(
248 | "--live", action="store_true",
249 | help="Get live logs until the task is DONE.")
250 |
251 | return_value_p = subparsers.add_parser(
252 | 'return_value',
253 | help="Get the return_value of a task in a DONE state.")
254 | return_value_p.add_argument(
255 | "task_id",
256 | help="Task ID")
257 |
258 | traceback_p = subparsers.add_parser(
259 | 'traceback',
260 | help="Get the traceback of a task in a DONE state with an ERROR outcome.")
261 | traceback_p.add_argument(
262 | "task_id",
263 | help="Task ID")
264 |
265 | kill_p = subparsers.add_parser(
266 | 'kill',
267 | help="Kill a running task")
268 | kill_p.add_argument(
269 | "task_id", nargs='?',
270 | help="If omitted, kill all tasks")
271 | kill_p.add_argument(
272 | '--force', action='store_true',
273 | help='When task_id is omitted, kill all tasks without confirmation')
274 |
275 | retry_p = subparsers.add_parser(
276 | 'retry',
277 | help='Retry a task in a DONE state')
278 | retry_p.add_argument(
279 | "task_id",
280 | help="Task to retry")
281 |
282 | clear_p = subparsers.add_parser(
283 | 'clear',
284 | help="Clear a completed task")
285 | clear_p.add_argument(
286 | "task_id", nargs='?',
287 | help="If omitted, clear all tasks")
288 | clear_p.add_argument(
289 | '--force', action='store_true',
290 | help='When task_id is omitted, clear all tasks without confirmation')
291 |
292 | args = parser.parse_args()
293 |
294 | if args.config:
295 | try:
296 | with open(args.config) as f:
297 | config_json = json.loads(f.read())
298 | except:
299 | raise AssertionError("Error loading configuration %r" % args.config)
300 | else:
301 | try:
302 | with open(DEFAULT_CONFIG_DIR) as f:
303 | config_json = json.loads(f.read())
304 | except:
305 | config_json = {}
306 |
307 | if not config_json and not args.cloud_url:
308 | args.cloud_url = "https://app.fastmap.io"
309 |
310 | config_json['exec_policy'] = "CLOUD"
311 | if args.verbosity:
312 | config_json['verbosity'] = args.verbosity
313 | if args.cloud_url:
314 | config_json['cloud_url'] = args.cloud_url
315 |
316 | config = fastmap.init(
317 | config=config_json)
318 |
319 | # if config.exec_policy == fastmap.ExecPolicy.LOCAL:
320 | # raise AssertionError("The fastmap CLI does not support a LOCAL exec_policy. "
321 | # "Check your configuration file.")
322 |
323 | if args.operation == 'login':
324 | login(config_json['cloud_url'])
325 | exit(0)
326 | # if args.operation == 'offload':
327 | # offload(config, args.path, args.function, args.label)
328 | if args.operation == 'poll':
329 | poll(config, args.task_id)
330 | if args.operation == 'return_value':
331 | return_value(config, args.task_id)
332 | if args.operation == 'traceback':
333 | traceback(config, args.task_id)
334 | if args.operation == 'kill':
335 | kill(config, args.task_id, args.force)
336 | if args.operation == 'retry':
337 | retry(config, args.task_id)
338 | if args.operation == 'logs':
339 | logs(config, args.task_id, args.live)
340 | if args.operation == 'clear':
341 | clear(config, args.task_id, args.force)
342 |
--------------------------------------------------------------------------------
/tests/test_sdk.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import gzip
3 | import io
4 | import math
5 | import pickle
6 | import random
7 | import re
8 | import os
9 | import sys
10 | import time
11 | import types
12 |
13 | import dill
14 | import msgpack
15 | import pytest
16 | import requests_mock
17 |
18 | sys.path.append(os.getcwd().split('/tests')[0])
19 |
20 | from fastmap import (init, global_init, fastmap, _reset_global_config,
21 | FastmapException, sdk_lib, ReturnType,
22 | Verbosity, ExecPolicy)
23 |
24 | TEST_SECRET = "abcd" * (64 // 4)
25 |
26 |
27 | def flatten(lst):
28 | # https://stackoverflow.com/questions/952914
29 | return [el for sublst in lst for el in sublst]
30 |
31 |
32 | def primeFactors(n):
33 | # adapted from https://www.geeksforgeeks.org/print-all-prime-factors-of-a-given-number/
34 | if n == 0:
35 | return []
36 | ret = []
37 | # Print the number of two's that divide n
38 | while n % 2 == 0:
39 | ret.append(2)
40 | n = n / 2
41 |
42 | # n must be odd at this point
43 | # so a skip of 2 ( i = i + 2) can be used
44 | for i in range(3, int(math.sqrt(n)) + 1, 2):
45 | # while i divides n , print i ad divide n
46 | while n % i == 0:
47 | ret.append(i)
48 | n = n / i
49 |
50 | # Condition if n is a prime
51 | # number greater than 2
52 | if n > 2:
53 | ret.append(2)
54 | return ret
55 |
56 |
57 | def calc_pi_basic(seed, two=2.0):
58 | random.seed(seed)
59 | x = random.random() * two - 1.0
60 | y = random.random() * two - 1.0
61 | return 1 if x**2 + y**2 <= 1.0 else 0
62 |
63 |
64 | def calc_pi_dead_99(seed):
65 | assert seed != 99
66 | random.seed(seed)
67 | x = random.random() * 2.0 - 1.0
68 | y = random.random() * 2.0 - 1.0
69 | return 1 if x**2 + y**2 <= 1.0 else 0
70 |
71 |
72 | def fake_input_yes(self, msg):
73 | print(msg)
74 | return 'y'
75 |
76 |
77 | def fake_input_no(self, msg):
78 | print(msg)
79 | return 'n'
80 |
81 |
82 | def test_local_basic():
83 | config = init(exec_policy="LOCAL")
84 | assert isinstance(config, sdk_lib.FastmapConfig)
85 | range_100 = range(100)
86 |
87 | gen = config.fastmap(calc_pi_basic, range_100)
88 | assert isinstance(gen, types.GeneratorType)
89 | pi = 4.0 * sum(gen) / len(range_100)
90 | assert pi == 3.12
91 |
92 | gen = config.fastmap(calc_pi_basic, list(range_100))
93 | assert isinstance(gen, types.GeneratorType)
94 | pi = 4.0 * sum(gen) / len(range_100)
95 | assert pi == 3.12
96 |
97 | gen = config.fastmap(calc_pi_basic, iter(range_100))
98 | assert isinstance(gen, types.GeneratorType)
99 | pi = 4.0 * sum(gen) / len(range_100)
100 | assert pi == 3.12
101 |
102 | gen = config.fastmap(calc_pi_basic, set(range_100))
103 | pi = 4.0 * sum(gen) / len(range_100)
104 | assert pi == 3.12
105 |
106 |
107 | def test_return_type_seq():
108 | assert ReturnType.ELEMENTS == "ELEMENTS"
109 | assert ReturnType.BATCHES == "BATCHES"
110 | assert set(ReturnType) == set(("ELEMENTS", "BATCHES"))
111 |
112 | range_0 = range(0)
113 | range_1 = range(1)
114 | range_100 = range(100)
115 |
116 | for verbosity in ("QUIET", "NORMAL"):
117 | config = init(exec_policy="LOCAL", verbosity=verbosity)
118 | with pytest.raises(FastmapException):
119 | list(config.fastmap(lambda x: x**.5, [], return_type="FAKE_RETURN_TYPE"))
120 |
121 | seq = config.fastmap(lambda x: x**.5, [], return_type="BATCHES")
122 | assert isinstance(seq, types.GeneratorType)
123 | assert list(seq) == []
124 |
125 | seq = config.fastmap(lambda x: x**.5, range_0, return_type="BATCHES")
126 | assert isinstance(seq, types.GeneratorType)
127 | assert list(seq) == []
128 |
129 | seq = config.fastmap(lambda x: x**.5, list(range_1), return_type="BATCHES")
130 | assert isinstance(seq, types.GeneratorType)
131 | seq = list(seq)
132 | assert len(seq) == 1
133 | assert isinstance(seq[0], list)
134 |
135 | seq = config.fastmap(lambda x: x**.5, range_1, return_type="BATCHES")
136 | assert isinstance(seq, types.GeneratorType)
137 | seq = list(seq)
138 | assert len(seq) == 1
139 | assert isinstance(seq[0], list)
140 |
141 | seq = config.fastmap(lambda x: x**.5, range_100, return_type="BATCHES")
142 | assert isinstance(seq, types.GeneratorType)
143 | seq = list(seq)
144 | assert all(isinstance(e, list) for e in seq)
145 | assert math.isclose(sum(flatten(seq)), 661.4629471031477)
146 |
147 | seq = config.fastmap(lambda x: x**.5, list(range_100), return_type="BATCHES")
148 | assert isinstance(seq, types.GeneratorType)
149 | seq = list(seq)
150 | assert all(isinstance(e, list) for e in seq)
151 | assert math.isclose(sum(flatten(seq)), 661.4629471031477)
152 |
153 |
154 | class Wrapper():
155 | def __init__(self, x):
156 | self.x = x
157 |
158 | def sqrt(self):
159 | self.x = self.x**.5
160 |
161 |
162 | def test_objects():
163 | def proc(x):
164 | x.sqrt()
165 | return x
166 |
167 | seq_1 = [Wrapper(1)]
168 | gen_1 = (Wrapper(x) for x in range(1, 2))
169 | seq_100 = [Wrapper(x) for x in range(100)]
170 | gen_100 = (Wrapper(x) for x in range(100))
171 | seq_200000 = [Wrapper(x) for x in range(200000)]
172 | gen_200000 = (Wrapper(x) for x in range(200000))
173 |
174 | config = init(exec_policy="LOCAL")
175 | res_seq_1 = list(config.fastmap(proc, seq_1))
176 | assert len(res_seq_1) == 1
177 | assert res_seq_1[0].x == 1
178 | res_gen_1 = list(config.fastmap(proc, gen_1))
179 | assert len(res_gen_1) == 1
180 | assert res_gen_1[0].x == 1
181 |
182 | res_seq_100 = list(config.fastmap(proc, seq_100))
183 | assert len(res_seq_100) == 100
184 | assert res_seq_100[99].x == 99 ** .5
185 | res_gen_100 = list(config.fastmap(proc, gen_100))
186 | assert len(res_gen_100) == 100
187 | assert res_gen_100[99].x == 99 ** .5
188 |
189 | res_seq_200000 = list(config.fastmap(proc, seq_200000))
190 | assert len(res_seq_200000) == 200000
191 | assert res_seq_200000[99999].x == 99999 ** .5
192 | res_gen_200000 = list(config.fastmap(proc, gen_200000))
193 | assert len(res_gen_200000) == 200000
194 | assert res_gen_200000[99999].x == 99999 ** .5
195 |
196 |
197 | def test_local_empty():
198 | config = init(exec_policy="LOCAL")
199 |
200 | gen = config.fastmap(calc_pi_basic, [])
201 | assert isinstance(gen, types.GeneratorType)
202 | assert list(gen) == []
203 |
204 | gen = config.fastmap(calc_pi_basic, iter([]))
205 | assert isinstance(gen, types.GeneratorType)
206 | assert list(gen) == []
207 |
208 |
209 | def test_local_no_init():
210 | _reset_global_config()
211 | range_100 = range(100)
212 | pi = 4.0 * sum(fastmap(calc_pi_basic, range_100)) / len(range_100)
213 | assert pi == 3.12
214 |
215 |
216 | def test_local_global_init():
217 | global_init(exec_policy="LOCAL")
218 | range_100 = range(100)
219 | pi = 4.0 * sum(fastmap(calc_pi_basic, range_100)) / len(range_100)
220 | assert pi == 3.12
221 |
222 |
223 | def test_local_functools():
224 | config = init(exec_policy="LOCAL")
225 | range_100 = range(100)
226 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range_100, kwargs={'two': 2.0})) / len(range_100)
227 | assert pi == 3.12
228 |
229 |
230 | def test_max_local_workers():
231 | config = init(exec_policy="LOCAL", max_local_workers=2)
232 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range(100))) / len(range(100))
233 | assert pi == 3.12
234 |
235 | # To get into max_local_workers <= 1
236 | config = init(exec_policy="LOCAL", max_local_workers=1)
237 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, range(100))) / len(range(100))
238 | assert pi == 3.12
239 |
240 |
241 | def test_exec_policy():
242 | assert ExecPolicy.LOCAL == "LOCAL"
243 | assert ExecPolicy.CLOUD == "CLOUD"
244 | assert ExecPolicy.ADAPTIVE == "ADAPTIVE"
245 | assert set(ExecPolicy) == set(("LOCAL", "ADAPTIVE", "CLOUD"))
246 |
247 | with pytest.raises(FastmapException):
248 | init(exec_policy="INVALID")
249 | for exec_policy in ("LOCAL", "CLOUD", "ADAPTIVE"):
250 | init(exec_policy=exec_policy)
251 |
252 |
253 | def test_verbosity(capsys):
254 | assert Verbosity.SILENT == "SILENT"
255 | assert Verbosity.QUIET == "QUIET"
256 | assert Verbosity.NORMAL == "NORMAL"
257 | assert Verbosity.LOUD == "LOUD"
258 | assert set(Verbosity) == set(("SILENT", "QUIET", "NORMAL", "LOUD"))
259 |
260 | config = init(exec_policy="LOCAL", verbosity="QUIET")
261 | list(config.fastmap(lambda x: x**x, range(10)))
262 | stdio = capsys.readouterr()
263 | assert stdio.out == ""
264 | config = init(exec_policy="LOCAL", verbosity="SILENT")
265 | list(config.fastmap(lambda x: x**x, range(10)))
266 | stdio = capsys.readouterr()
267 | assert stdio.out == ""
268 | config = init(exec_policy="ADAPTIVE", verbosity="QUIET")
269 | list(config.fastmap(lambda x: x**x, range(10)))
270 | stdio = capsys.readouterr()
271 | assert "fastmap WARNING:" in stdio.out
272 | config = init(exec_policy="LOCAL", verbosity="NORMAL")
273 | list(config.fastmap(lambda x: x**x, range(10)))
274 | stdio = capsys.readouterr()
275 | assert "fastmap INFO:" in stdio.out
276 | config = init(exec_policy="LOCAL", verbosity="LOUD")
277 | list(config.fastmap(lambda x: x**x, range(10)))
278 | stdio = capsys.readouterr()
279 | assert "fastmap DEBUG:" in stdio.out
280 | assert "fastmap INFO:" in stdio.out
281 | with pytest.raises(FastmapException):
282 | config = init(exec_policy="LOCAL", verbosity="FAKE")
283 |
284 |
285 | def test_lambda():
286 | config = init(exec_policy="LOCAL")
287 | range_100 = range(100)
288 | with pytest.raises(ZeroDivisionError):
289 | # zero division error raises execution error
290 | sum(config.fastmap(lambda x: 1.0 / x, range_100))
291 | range_1_100 = range(1, 1000)
292 | the_sum = sum(config.fastmap(lambda x: 1.0 / x if x % 2 == 1 else -1.0 / x, range_1_100))
293 | assert math.isclose(the_sum, 0.6936474305598223)
294 |
295 |
296 | def test_closure_basic():
297 | config = init(exec_policy="LOCAL")
298 | range_100 = range(100)
299 | with pytest.raises(ZeroDivisionError):
300 | # zero division error raises execution error
301 | sum(config.fastmap(lambda x: 1.0 / x, range_100))
302 | range_1_100 = range(1, 1000)
303 |
304 | def cl(x):
305 | if x % 2 == 1:
306 | return 1.0 / x
307 | else:
308 | return -1.0 / x
309 |
310 | the_sum = sum(config.fastmap(cl, range_1_100))
311 | assert math.isclose(the_sum, 0.6936474305598223)
312 |
313 |
314 | def test_closure_real():
315 | config = init(exec_policy="LOCAL")
316 | range_100 = range(100)
317 | with pytest.raises(ZeroDivisionError):
318 | # zero division error raises execution error
319 | sum(config.fastmap(lambda x: 1.0 / x, range_100))
320 | range_1_100 = range(1, 1000)
321 | one = 1.0
322 |
323 | def cl(x):
324 | if x % 2 == 1:
325 | return one / x
326 | else:
327 | return -1 * one / x
328 |
329 | the_sum = sum(config.fastmap(cl, range_1_100))
330 | assert math.isclose(the_sum, 0.6936474305598223)
331 |
332 |
333 | def test_single_threaded(monkeypatch):
334 | # Set initial run duration to make it not process everything on first run
335 | # but don't change proc_overhead so that it decides processes are too much
336 | config = init(exec_policy="LOCAL")
337 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
338 | range_100 = range(100)
339 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100)
340 | assert pi == 3.12
341 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100)
342 | assert pi == 3.12
343 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, set(range_100))) / len(range_100)
344 | assert pi == 3.12
345 |
346 |
347 | def test_process_local(monkeypatch):
348 | config = init(exec_policy="LOCAL")
349 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
350 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
351 | range_100 = range(100)
352 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100)
353 | assert pi == 3.12
354 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100)
355 | assert pi == 3.12
356 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, set(range_100))) / len(range_100)
357 | assert pi == 3.12
358 |
359 |
360 | def test_single_threaded_process(capsys, monkeypatch):
361 | config = init(exec_policy="LOCAL", max_local_workers=1)
362 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
363 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
364 | range_100 = range(100)
365 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100)
366 | assert pi == 3.12
367 |
368 |
369 | def test_single_threaded_process_exception(capsys, monkeypatch):
370 | config = init(exec_policy="LOCAL", max_local_workers=1)
371 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
372 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
373 | with pytest.raises(AssertionError):
374 | list(config.fastmap(calc_pi_dead_99, range(100)))
375 |
376 |
377 | def test_process_exception(capsys, monkeypatch):
378 | config = init(exec_policy="LOCAL", max_local_workers=2)
379 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
380 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
381 | with pytest.raises(FastmapException):
382 | list(config.fastmap(calc_pi_dead_99, range(100)))
383 |
384 |
385 | def test_process_adaptive(capsys, monkeypatch):
386 | # remote will die but this will continue
387 | config = init(exec_policy="ADAPTIVE")
388 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
389 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
390 | range_100 = range(100)
391 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, list(range_100))) / len(range_100)
392 | assert pi == 3.12
393 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, iter(range_100))) / len(range_100)
394 | assert pi == 3.12
395 |
396 |
397 | def test_slow_generator():
398 | def slow_gen(iterable):
399 | for el in iterable:
400 | yield el
401 | time.sleep(.01)
402 | config = init(exec_policy="LOCAL")
403 | pi = 4.0 * sum(config.fastmap(calc_pi_basic, slow_gen(range(100)))) / 100
404 | assert pi == 3.12
405 |
406 | # test the do_die in the _FillInbox generators
407 | with pytest.raises(FastmapException):
408 | sum(config.fastmap(lambda x: 1 / (x - 50), slow_gen(range(100))))
409 | with pytest.raises(FastmapException):
410 | sum(config.fastmap(lambda x: 1 / (x - 50), slow_gen(list(range(100)))))
411 |
412 |
413 | def test_order(monkeypatch):
414 | config = init(exec_policy="LOCAL")
415 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
416 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
417 | monkeypatch.setattr(sdk_lib._FillInbox, "BATCH_DUR_GOAL", .0001)
418 | order_range = list(config.fastmap(lambda x: int((x**2)**.5), range(10000)))
419 | assert order_range == list(range(10000))
420 |
421 |
422 | def test_no_secret(monkeypatch, capsys):
423 | config = init(exec_policy="CLOUD")
424 | stdio = capsys.readouterr()
425 | assert re.search("fastmap WARNING:.*?LOCAL.\n", stdio.out)
426 | assert config.exec_policy == "LOCAL"
427 |
428 | config = init(exec_policy="ADAPTIVE")
429 | stdio = capsys.readouterr()
430 | assert re.search("fastmap WARNING:.*?LOCAL.\n", stdio.out)
431 | assert config.exec_policy == "LOCAL"
432 |
433 |
434 | def test_remote_no_connection(monkeypatch, capsys):
435 | config = init(exec_policy="CLOUD", verbosity="LOUD", secret=TEST_SECRET,
436 | cloud_url="localhost:9999")
437 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
438 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
439 | range_100 = range(100)
440 | with pytest.raises(FastmapException):
441 | list(config.fastmap(lambda x: x**.5, range_100))
442 | stdio = capsys.readouterr()
443 | assert re.search("could not connect", stdio.out)
444 |
445 |
446 | def test_invalid_token(capsys):
447 | init(exec_policy="CLOUD", verbosity="LOUD", secret=None)
448 | for bad_token in (5, "_" * 64, "a" * 63, "a" * 65):
449 | with pytest.raises(FastmapException):
450 | init(exec_policy="CLOUD", verbosity="LOUD", secret=bad_token)
451 | init(exec_policy="CLOUD", verbosity="LOUD", secret="a"*64)
452 |
453 |
454 | def test_confirm_charges_basic(capsys, monkeypatch):
455 | # Basic local should not warn about confirming charges or any issues with
456 | # the secret
457 | # config = init(exec_policy="LOCAL", max_local_workers=2)
458 | # stdio = capsys.readouterr()
459 | # assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out)
460 | # assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out)
461 | # assert isinstance(config, FastmapConfig)
462 | # assert config.exec_policy == "LOCAL"
463 |
464 | # # Basic cloud should warn about an absent secret and set execpolicy to local
465 | # # (and say something about it)
466 | # config = init(exec_policy="CLOUD", max_local_workers=2)
467 | # stdio = capsys.readouterr()
468 | # assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out)
469 | # assert re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out)
470 | # assert config.exec_policy == "LOCAL"
471 |
472 | # If a secret is correctly provided for cloud, warn about confirming
473 | # charges and do not set to local config policy
474 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET, max_local_workers=2)
475 | # stdio = capsys.readouterr()
476 | # assert re.search("fastmap WARNING:.*?confirm_charges", stdio.out)
477 | # assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out)
478 | # assert config.exec_policy == "CLOUD"
479 |
480 | # If we set confirm charges, assert no warnings are thrown
481 | config = init(exec_policy="CLOUD", secret=TEST_SECRET, cloud_url="https://a.a",
482 | confirm_charges=True, max_local_workers=2)
483 | monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
484 | monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
485 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_no)
486 | stdio = capsys.readouterr()
487 | assert not re.search("fastmap WARNING:.*?confirm_charges", stdio.out)
488 | assert not re.search("fastmap WARNING:.*?secret.*?LOCAL", stdio.out)
489 | assert config.exec_policy == "CLOUD"
490 | assert config.confirm_charges is True
491 |
492 | # Using the same config, ensure that every process dies with a fake url.
493 | # There should only be 1 process which can die
494 | config.cloud_url = "localhost:9999"
495 | monkeypatch.setattr(sdk_lib.AuthCheck, "was_success", lambda _: True)
496 | with pytest.raises(FastmapException):
497 | list(config.fastmap(lambda x: x**.5, range(100)))
498 | stdio = capsys.readouterr()
499 | assert re.search(r"Continue\?", stdio.out)
500 | with pytest.raises(FastmapException):
501 | list(config.fastmap(lambda x: x**.5, iter(range(100))))
502 | stdio = capsys.readouterr()
503 | assert re.search(r"Continue anyway\?", stdio.out)
504 |
505 | # Adaptive should log cancelled
506 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET,
507 | confirm_charges=True, max_local_workers=2,
508 | cloud_url="localhost:9999/")
509 | list(config.fastmap(lambda x: x**.5, range(100)))
510 | stdio = capsys.readouterr()
511 | assert re.search(r"fastmap INFO:.*?cancelled", stdio.out)
512 |
513 | # Test enter yes
514 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_yes)
515 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET, confirm_charges=True,
516 | cloud_url="https://a.a",)
517 | # monkeypatch.setattr('sys.stdin', io.StringIO('y\n'))
518 | data = list(config.fastmap(lambda x: x**.5, iter(range(100))))
519 | assert data
520 |
521 | def fake_input_try_again(self, msg, now={}):
522 | # clever 💯
523 | print(msg)
524 | if not now.get('done'):
525 | now['done'] = True
526 | return 'will repeat'
527 | return "n"
528 |
529 | # Test unrecognized input
530 | monkeypatch.setattr(sdk_lib.FastmapLogger, "input", fake_input_try_again)
531 | config = init(exec_policy="ADAPTIVE", secret=TEST_SECRET, cloud_url='https://a.a',
532 | confirm_charges=True)
533 | list(config.fastmap(lambda x: x**.5, iter(range(100))))
534 | stdio = capsys.readouterr()
535 | assert "Unrecognized input" in stdio.out
536 |
537 |
538 | def test_empty_remote():
539 | config = init(exec_policy="CLOUD")
540 | assert list(config.fastmap(lambda x: x**.5, [])) == []
541 | assert list(config.fastmap(lambda x: x**.5, iter([]))) == []
542 | config = init(exec_policy="ADAPTIVE")
543 | assert list(config.fastmap(lambda x: x**.5, [])) == []
544 | assert list(config.fastmap(lambda x: x**.5, iter([]))) == []
545 |
546 |
547 | def resp_dump(resp_dict):
548 | return base64.b64encode(pickle.dumps(resp_dict))
549 |
550 |
551 | def resp_headers():
552 | return {
553 | "X-Container-Id": "FAKE_ID",
554 | "X-Thread-Id": "FAKE_ID",
555 | "X-Process-Seconds": '4',
556 | "X-Total-Seconds": '5',
557 | }
558 |
559 | # def test_remote_200(monkeypatch, requests_mock):
560 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
561 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
562 |
563 | # results = list(map(lambda x: 1/x, range(1, 100)))
564 |
565 | # resp = resp_dump({"status": "OK",
566 | # "results": results[1:],
567 | # "map_seconds": 5})
568 | # requests_mock.post('localhost:9999/api/v1/map',
569 | # content=resp,
570 | # status_code=200,
571 | # headers=resp_headers())
572 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET)
573 | # config.cloud_url = "localhost:9999"
574 | # assert math.isclose(sum(config.fastmap(lambda x: 1/x, range(1, 100))),
575 | # sum(results))
576 |
577 | # def test_remote_401(monkeypatch, requests_mock, capsys):
578 | # resp = resp_dump({"status": "UNAUTHORIZED",
579 | # "reason": "UNAUTHORIZED"})
580 | # requests_mock.post('localhost:9999/api/v1/map',
581 | # content=resp,
582 | # status_code=401)
583 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET)
584 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
585 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
586 | # with pytest.raises(FastmapException):
587 | # # Unauthorized will kill the cloud thread
588 | # list(config.fastmap(sqrt, range(100)))
589 | # stdio = capsys.readouterr()
590 | # assert re.search("fastmap ERROR:.*?Unauthorized", stdio.out)
591 |
592 |
593 | # def test_remote_402(monkeypatch, requests_mock, capsys):
594 | # resp = resp_dump({"status": "NOT_ENOUGH_CREDITS",
595 | # "reason": "You do not have any credits available"})
596 | # requests_mock.post('localhost:9999/api/v1/map',
597 | # content=resp,
598 | # status_code=402)
599 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET)
600 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
601 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
602 | # with pytest.raises(FastmapException):
603 | # # Unauthorized will kill the cloud thread
604 | # list(config.fastmap(sqrt, range(100)))
605 | # stdio = capsys.readouterr()
606 | # assert re.search("fastmap ERROR:.*?credits", stdio.out)
607 |
608 | # def test_remote_403(monkeypatch, requests_mock, capsys):
609 | # resp = resp_dump({"status": "NOT_ENOUGH_CREDITS",
610 | # "reason": "You do not have any credits available"})
611 | # requests_mock.post('localhost:9999/api/v1/map',
612 | # content=resp,
613 | # status_code=402)
614 | # config = init(exec_policy="CLOUD", secret=TEST_SECRET)
615 | # monkeypatch.setattr(sdk_lib.Mapper, "INITIAL_RUN_DUR", 0)
616 | # monkeypatch.setattr(sdk_lib.Mapper, "PROC_OVERHEAD", 0)
617 | # with pytest.raises(FastmapException):
618 | # # Unauthorized will kill the cloud thread
619 | # list(config.fastmap(sqrt, range(100)))
620 | # stdio = capsys.readouterr()
621 | # assert re.search("fastmap ERROR:.*?credits", stdio.out)
622 |
623 | def test_post_request(monkeypatch, capsys):
624 | url = "localhost:8888/api/v1/map"
625 | data = msgpack.dumps({"hello": "world"})
626 | secret = "secret_token"
627 | log = sdk_lib.FastmapLogger('QUIET')
628 | resp_headers = {
629 | "X-Status": "OK"
630 | }
631 | resp_dict = {
632 | "world": "hello"
633 | }
634 |
635 | # Bad content type on API call
636 | resp_headers['Content-Type'] = "text/html"
637 | with requests_mock.Mocker() as m:
638 | m.post(url, content=msgpack.dumps(resp_dict),
639 | headers=resp_headers)
640 | with pytest.raises(sdk_lib.CloudError):
641 | sdk_lib.post_request(url, data, secret, log)
642 |
643 | # Server warning and basic msgpack obj extraction
644 | resp_headers['Content-Type'] = "application/msgpack"
645 | resp_headers["X-Server-Warning"] = "abcdefg"
646 | with requests_mock.Mocker() as m:
647 | m.post(url, content=msgpack.dumps(resp_dict),
648 | headers=resp_headers)
649 | resp = sdk_lib.post_request(url, data, secret, log)
650 | stdio = capsys.readouterr()
651 | assert re.search("WARNING:[^\n]+ abcdefg", stdio.out)
652 | assert resp.obj['world'] == 'hello'
653 | del resp_headers["X-Server-Warning"]
654 |
655 | # Cloud error on 500 status code
656 | with requests_mock.Mocker() as m:
657 | m.post(url, content=msgpack.dumps(resp_dict),
658 | headers=resp_headers, status_code=500)
659 | with pytest.raises(sdk_lib.CloudError):
660 | sdk_lib.post_request(url, data, secret, log)
661 |
662 | # No Content signature on octet-stream
663 | resp_headers['Content-Type'] = "application/octet-stream"
664 | pickled_resp = gzip.compress(dill.dumps(resp_dict))
665 | with requests_mock.Mocker() as m:
666 | m.post(url, content=pickled_resp,
667 | headers=resp_headers)
668 | with pytest.raises(sdk_lib.CloudError):
669 | sdk_lib.post_request(url, data, secret, log)
670 |
671 | # Wrong content signature on octet-stream
672 | resp_headers['X-Content-Signature'] = "fake"
673 | with requests_mock.Mocker() as m:
674 | m.post(url, content=pickled_resp,
675 | headers=resp_headers)
676 | with pytest.raises(sdk_lib.CloudError):
677 | sdk_lib.post_request(url, data, secret, log)
678 |
679 | # Correct content signature. Extract works
680 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp)
681 | with requests_mock.Mocker() as m:
682 | m.post(url, content=pickled_resp,
683 | headers=resp_headers)
684 | resp = sdk_lib.post_request(url, data, secret, log)
685 | assert resp.obj['world'] == 'hello'
686 |
687 | # Not gzipped
688 | pickled_resp = dill.dumps(resp_dict)
689 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp)
690 | with requests_mock.Mocker() as m:
691 | m.post(url, content=pickled_resp,
692 | headers=resp_headers)
693 | with pytest.raises(sdk_lib.CloudError):
694 | sdk_lib.post_request(url, data, secret, log)
695 |
696 | # Not msgpacked
697 | pickled_resp = gzip.compress(str(resp_dict).encode())
698 | resp_headers['X-Content-Signature'] = sdk_lib.hmac_digest(secret, pickled_resp)
699 | with requests_mock.Mocker() as m:
700 | m.post(url, content=pickled_resp,
701 | headers=resp_headers)
702 | with pytest.raises(sdk_lib.CloudError):
703 | sdk_lib.post_request(url, data, secret, log)
704 |
705 |
706 | def test_fmt_bytes():
707 | assert sdk_lib.fmt_bytes(1023) == "1023B"
708 | assert sdk_lib.fmt_bytes(1024) == "1.0KB"
709 | assert sdk_lib.fmt_bytes(2048) == "2.0KB"
710 | assert sdk_lib.fmt_bytes(1024**2) == "1.0MB"
711 | assert sdk_lib.fmt_bytes(1024**2 * 2) == "2.0MB"
712 | assert sdk_lib.fmt_bytes(1024**3) == "1.0GB"
713 | assert sdk_lib.fmt_bytes(1024**3 * 2) == "2.0GB"
714 |
715 |
716 | def test_fmt_time():
717 | assert sdk_lib.fmt_time(59) == "59s"
718 | assert sdk_lib.fmt_time(60) == "01:00"
719 | assert sdk_lib.fmt_time(61) == "01:01"
720 | assert sdk_lib.fmt_time(121) == "02:01"
721 | assert sdk_lib.fmt_time(60 * 60) == "01:00:00"
722 | assert sdk_lib.fmt_time(60 * 60 + 1) == "01:00:01"
723 | assert sdk_lib.fmt_time(60 * 60 + 61) == "01:01:01"
724 |
725 |
726 | def test_fmt_dur():
727 | assert sdk_lib.fmt_dur(.000009) == "0 milliseconds"
728 | assert sdk_lib.fmt_dur(.9) == "900 milliseconds"
729 | assert sdk_lib.fmt_dur(1) == "1.00 seconds"
730 | assert sdk_lib.fmt_dur(59) == "59.00 seconds"
731 | assert sdk_lib.fmt_dur(60) == "1.00 minutes"
732 | assert sdk_lib.fmt_dur(61) == "1.02 minutes"
733 | assert sdk_lib.fmt_dur(121) == "2.02 minutes"
734 | assert sdk_lib.fmt_dur(60 * 60) == "1.00 hours"
735 | assert sdk_lib.fmt_dur(60 * 60 + 1) == "1.00 hours"
736 | assert sdk_lib.fmt_dur(60 * 60 + 61) == "1.02 hours"
737 |
738 |
739 | def test_namespace():
740 | ns = sdk_lib.Namespace("A", B="C")
741 | assert ns.A == "A"
742 | assert ns.B == "C"
743 | assert 'A' in ns
744 | assert 'B' in ns
745 | assert 'C' not in ns
746 | assert set(list(ns)) == set(['A', 'B'])
747 | with pytest.raises(AttributeError):
748 | ns.C
749 |
750 |
751 | def test_short_func():
752 | SMALL_NUM = 15
753 |
754 | def generator():
755 | yield SMALL_NUM
756 |
757 | config = init(exec_policy="CLOUD", secret="0" * 64)
758 | list(config.fastmap(primeFactors, (SMALL_NUM,)))
759 | list(config.fastmap(primeFactors, generator()))
760 |
761 | config = init(exec_policy="ADAPTIVE", secret="0" * 64)
762 | list(config.fastmap(primeFactors, (SMALL_NUM,)))
763 | list(config.fastmap(primeFactors, generator()))
764 |
765 | config = init(exec_policy="LOCAL", secret="0" * 64)
766 | list(config.fastmap(primeFactors, (SMALL_NUM,)))
767 | list(config.fastmap(primeFactors, generator()))
768 |
769 |
770 | def test_long_func(monkeypatch):
771 | # regression test for bug when on CLOUD exec_policy with a long initial
772 | # function which nevertheless clears out the iterable
773 | # also test with other exec policies and generator types
774 | # this is still only one number so will never actually call the cloud
775 |
776 | BIG_NUM = 29393395993999
777 |
778 | def generator():
779 | yield BIG_NUM
780 |
781 | config = init(exec_policy="LOCAL", verbosity="LOUD", secret="0" * 64)
782 | list(config.fastmap(primeFactors, (BIG_NUM,)))
783 | list(config.fastmap(primeFactors, generator()))
784 |
785 | monkeypatch.setattr(sdk_lib.AuthCheck, "was_success", lambda _: True)
786 | config = init(exec_policy="CLOUD", verbosity="LOUD", secret="0" * 64)
787 | list(config.fastmap(primeFactors, (BIG_NUM,)))
788 | list(config.fastmap(primeFactors, generator()))
789 |
790 | config = init(exec_policy="ADAPTIVE", verbosity="LOUD", secret="0" * 64)
791 | list(config.fastmap(primeFactors, (BIG_NUM,)))
792 | list(config.fastmap(primeFactors, generator()))
793 |
794 |
795 | def test_log_etcetera(monkeypatch, capsys):
796 | # log functions that can't be captured in normal tests
797 |
798 | logger = sdk_lib.FastmapLogger("LOUD")
799 | logger.debug("Hello")
800 | stdio = capsys.readouterr()
801 | assert "fastmap DEBUG:" in stdio.out
802 | assert 'Hello' in stdio.out
803 |
804 | logger.hush()
805 | logger.error("Hello")
806 | stdio = capsys.readouterr()
807 | assert "" == stdio.out
808 |
809 | logger.restore_verbosity()
810 | logger.error("Hello")
811 | stdio = capsys.readouterr()
812 | assert "Hello" in stdio.out
813 |
814 | monkeypatch.setattr('sys.stdin', io.StringIO('y\n'))
815 | resp = logger.input("Hi")
816 | assert resp == 'y'
817 | stdio = capsys.readouterr()
818 | assert "Hi" in stdio.out
819 |
820 |
821 | if __name__ == '__main__':
822 | pytest.main()
823 |
--------------------------------------------------------------------------------
/fastmap/sdk_lib.py:
--------------------------------------------------------------------------------
1 | """
2 | Primary file for the fastmap SDK. Almost all client-side code is in this file.
3 | Do not instantiate anything here directly. Use the interface __init__.py.
4 | """
5 |
6 | import atexit
7 | import collections
8 | import datetime
9 | import distutils.sysconfig
10 | import functools
11 | import glob
12 | import gzip
13 | import hashlib
14 | import hmac
15 | import importlib.metadata
16 | import json
17 | import multiprocessing
18 | import os
19 | import pathlib
20 | import queue
21 | import re
22 | import secrets
23 | import string
24 | import sys
25 | import threading
26 | import traceback
27 | import time
28 | from collections.abc import Iterable, Sequence, Generator
29 | from types import FunctionType, ModuleType
30 | from typing import List, Dict
31 |
32 | import dill
33 | import msgpack
34 | import requests
35 |
36 | SECRET_RE = r'^[PS]\-[0-9a-zA-Z]{64}$'
37 | TASK_RE = r'^[0-9a-zA-Z]{12}$'
38 | SITE_PACKAGES_RE = re.compile(r".*?/python[0-9.]+/(?:site|dist)\-packages/")
39 | REQUIREMENT_RE = re.compile(r'^[\w-]+==[\w-]+(?:\.[\w-]+)*$')
40 | CLIENT_VERSION = "0.0.12"
41 | KB = 1024
42 | MB = 1024 ** 2
43 | GB = 1024 ** 3
44 |
45 | # MAP_DOCSTRING = """
46 | # Map a function over an iterable and return the results.
47 | # Depending on prior configuration, fastmap will run either locally via
48 | # multiprocessing, in the cloud on the fastmap.io servers, or adaptively on
49 | # both.
50 |
51 | # :param function func: Function to map against.
52 | # :param sequence|generator iterable: Iterable to map over.
53 | # :param dict kwargs: Named parameters to bind to the function. Optional.
54 | # :param str return_type: Either "ELEMENTS" or "BATCHES". Default
55 | # is "ELEMENTS".
56 | # :param str label: Optional label to track this execution. Only meaningful if
57 | # some execution occurs on the cloud. Default is "".
58 | # :rtype: Generator
59 |
60 | # Fastmap is a parallelized/distributed drop-in replacement for 'map'.
61 | # It runs faster than the builtin map function in most circumstances.
62 |
63 | # Notes:
64 | # - The function passed in must be stateless and cannot access the network or
65 | # the filesystem. If run locally, these restrictions will not be enforced
66 | # but because fastmap will likely execute out-of-order, running stateful
67 | # functions is not recommended.
68 | # - The iterable can be a sequence (list, tuple, ndarray, dataframe, etc),
69 | # or a generator.
70 | # - Fastmap is a generator so the iterable is processed lazily and fastmap
71 | # will not begin execution unless iterated over or execution is forced
72 | # (e.g. by wrapping it in a list).
73 |
74 | # For more documentation, go to https://fastmap.io/docs
75 |
76 | # """
77 |
78 | OFFLOAD_DOCSTRING = """
79 | Offload a function to the cloud and return a FastmapTask.
80 |
81 | :param function func: Function to offload
82 | :param dict kwargs: Named parameters to bind to the function. Optional.
83 | :param function hook: Function to call upon process completion. Optional.
84 | :param str label: Optional label to track this execution. Default is "".
85 |
86 | :rtype: FastmapTask
87 | """
88 |
89 | GET_TASK_DOCSTRING = """
90 | Given a task_id, get the associated FastmapTask.
91 | Raises an exeption if the task cannot be found.
92 |
93 | :param str task_id:
94 | :rtype: FastmapTask
95 | """
96 |
97 | POLL_ALL_DOCSTRING = """
98 | Poll all non-CLEARED cloud task metadata.
99 |
100 | :rtype: list[dict]
101 | """
102 |
103 | CLEAR_ALL_DOCSTRING = """
104 | Clear all done tasks and remove their functions, logs, and results from storage.
105 | """
106 |
107 | CLEAR_DOCSTRING = """
108 | Clear the task and remove its function, logs, and result from storage.
109 |
110 | Raises a FastmapException if the task cannot be found or the
111 | task has not completed.
112 |
113 | :rtype: None
114 | """
115 |
116 | KILL_DOCSTRING = """
117 | Kill the associated cloud task.
118 |
119 | Raises a FastmapException if the task cannot be found or is already dead.
120 |
121 | :rtype: None
122 | """
123 |
124 | ALL_LOGS_DOCSTRING = """
125 | Return the task's stdout and stderr since the task started.
126 |
127 | Raises a FastmapException if the task cannot be found.
128 |
129 | :rtype: str
130 | """
131 |
132 | NEW_LOGS_DOCSTRING = """
133 | Return the task's stdout and stderr since the task started.
134 |
135 | Raises a FastmapException if the task cannot be found.
136 |
137 | :rtype: str
138 | """
139 |
140 | POLL_DOCSTRING = """
141 | Poll for cloud task metadata.
142 |
143 | Raises a FastmapException if the task cannot be found.
144 |
145 | :rtype: dict
146 | """
147 |
148 | RETRY_DOCSTRING = """
149 | Retry the task. Returns a new FastmapTask
150 |
151 | :rtype: FastmapTask
152 | """
153 |
154 | RETURN_VALUE_DOCSTRING = """
155 | Return the function's return value.
156 |
157 | Raises a FastmapException if the result cannot be found,
158 | if task has not completed, or if the task was not successful.
159 |
160 | :rtype: Various
161 | """
162 |
163 | TRACEBACK_DOCSTRING = """
164 | Return the traceback of an errored task.
165 |
166 | Raises a FastmapException if the result cannot be found,
167 | if task has not completed, or if the task did not error.
168 |
169 | :rtype: str
170 | """
171 |
172 | WAIT_DOCSTRING = """
173 | Block until the task completes. If the task is
174 | ultimately successful, return the function's return value.
175 |
176 | Raises a FastmapException if the result cannot be found,
177 | or if raise_exceptions is true and the task is not successful.
178 |
179 | :param int polling_interval:
180 | :param bool live_logs:
181 | :param bool raise_exceptions
182 | :rtype: Various
183 | """
184 |
185 | INIT_PARAMS = """
186 | :param str|file|dict config: The json file path / dict of the
187 | configuration. Every subsequent argument will alter this configuration.
188 | This is optional and if empty, only subsequent parameters will be used.
189 | :param str secret: The API token generated on fastmap.io. Treat this like a
190 | password. Do not commit it to version control! Failure to do so could
191 | result in man-in-the-middle attacks or your credits being used by others
192 | (e.g. cryptocurrency miners). If None, fastmap will run locally
193 | regardless of exec_policy.
194 | :param str verbosity: 'SILENT', 'QUIET', 'NORMAL', or 'LOUD'.
195 | Default is 'NORMAL'.
196 | :param str exec_policy: 'LOCAL' or 'CLOUD'. Default is 'CLOUD'.
197 | :param str machine_type: 'SPARROW_1', 'PEREGRINE_4', or 'HUMMINGBIRD_7'. Only
198 | for the CLOUD exec_policy. Default is 'SPARROW_1'.
199 | :param list requirements: A list of requirements in "package==1.2.3" style.
200 | If omitted, requirement discovery is automatic.
201 |
202 | For more documentation, go to https://fastmap.io/docs
203 | """
204 |
205 | GLOBAL_INIT_DOCSTRING = """
206 | Initialize fastmap globally. All subsequent calls to fastmap will use this
207 | global configuration. Also see documentation for 'init'.
208 | %s
209 | :rtype: None
210 |
211 | Example usage:
212 |
213 | import fastmap
214 |
215 | fastmap.global_init(exec_policy="LOCAL", verbosity="LOUD")
216 | results = fastmap.fastmap(func, iterable)
217 |
218 | """ + INIT_PARAMS
219 |
220 | INIT_DOCSTRING = """
221 | Create and return a FastmapConfig object. The FastmapConfig object has
222 | a method, fastmap, which can then be called. Also see documentation for
223 | 'global_init'.
224 | %s
225 | :rtype: FastmapConfig
226 |
227 | Example usage:
228 |
229 | import fastmap
230 |
231 | fastmap_config = fastmap.init(machine_type="GPU", verbosity="QUIET")
232 | results = fastmap_config.fastmap(func, iterable)
233 |
234 | """ + INIT_PARAMS
235 |
236 | dill.settings['recurse'] = True
237 |
238 | # TODO
239 | # Make dill.dumps deterministic https://github.com/uqfoundation/dill/issues/19
240 | # This allows for hashing of the functions
241 | # dill._dill._typemap = dict(sorted(dill._dill._typemap.items(),
242 | # key=lambda x: x[1]))
243 |
244 | try:
245 | # Windows / mac use spawn. Linux uses fork. Set to spawn
246 | # because it has more issues and this provides a steady dev environment
247 | # Do not remove without at least adding more unit tests which operate
248 | # in a spawn environment
249 | multiprocessing.set_start_method("spawn")
250 | except RuntimeError:
251 | pass
252 |
253 |
254 | class Namespace(dict):
255 | """
256 | Abstract constants class
257 | Constants can be accessed via .attribute or [key] and can be iterated over.
258 | """
259 | def __init__(self, *args, **kwargs):
260 | d = {k: k for k in args}
261 | d.update(dict(kwargs.items()))
262 | super().__init__(d)
263 |
264 | def __getattr__(self, item):
265 | if item in self:
266 | return self[item]
267 | raise AttributeError
268 |
269 |
270 | Verbosity = Namespace("SILENT", "QUIET", "NORMAL", "LOUD")
271 | ExecPolicy = Namespace("LOCAL", "CLOUD")
272 | AuthStatus = Namespace("AUTHORIZED")
273 | InitStatus = Namespace("UPLOADED", "FOUND", "NOT_FOUND")
274 | MapStatus = Namespace("NOT_FOUND", "BATCH_PROCESSED", "INITALIZING", "INITIALIZATION_ERROR", "PROCESS_ERROR")
275 | DoneStatus = Namespace("DONE", "NOT_FOUND")
276 | TaskState = Namespace("PENDING", "PROCESSING", "KILLING", "FINISHING", "DONE", "CLEARED")
277 | TaskOutcome = Namespace("SUCCESS", "ERROR", "KILLED_BY_REQUEST", "KILLED_ZOMBIE")
278 | MachineType = Namespace("SPARROW_1", "PEREGRINE_4", "HUMMINGBIRD_7")
279 | Color = Namespace(
280 | GREEN="\033[92m",
281 | RED="\033[91m",
282 | YELLOW="\033[93m",
283 | CYAN="\033[36m",
284 | MAGENTA="\u001b[35m",
285 | CANCEL="\033[0m")
286 |
287 |
288 | DEFAULT_INLINE_CONFIG = {
289 | 'secret': None,
290 | 'cloud_url': 'https://app.fastmap.io',
291 | 'verbosity': Verbosity.NORMAL,
292 | 'exec_policy': ExecPolicy.CLOUD,
293 | 'machine_type': MachineType.SPARROW_1,
294 | 'requirements': None,
295 | }
296 |
297 | DEFAULT_CONFIG_DIR = os.path.join(pathlib.Path.home(), '.fastmap', 'default_config.json')
298 |
299 |
300 | def set_docstring(docstr: str, docstr_prefix='') -> FunctionType:
301 | """ Add the given doc string to each function """
302 | def wrap(func):
303 | func.__doc__ = docstr_prefix + docstr
304 | return func
305 | return wrap
306 |
307 |
308 | def nowstamp() -> int:
309 | return datetime.datetime.now(datetime.timezone.utc).timestamp()
310 |
311 |
312 | def fmt_bytes(num_bytes: int) -> str:
313 | """
314 | Returns the human-readable byte quantity
315 | e.g. 2048 -> 2.0KB
316 | """
317 | if num_bytes >= GB:
318 | return "%.1fGB" % (num_bytes / GB)
319 | if num_bytes >= MB:
320 | return "%.1fMB" % (num_bytes / MB)
321 | if num_bytes >= KB:
322 | return "%.1fKB" % (num_bytes / KB)
323 | return "%dB" % num_bytes
324 |
325 |
326 | def fmt_time(num_secs: int) -> str:
327 | """
328 | Returns a human-readable time scalar
329 | e.g. 121 -> 02:01
330 | """
331 | hours, remainder = divmod(num_secs, 3600)
332 | mins, secs = divmod(remainder, 60)
333 | if hours > 0:
334 | return '{:02}:{:02}:{:02}'.format(int(hours), int(mins), int(secs))
335 | if mins > 0:
336 | return '{:02}:{:02}'.format(int(mins), int(secs))
337 | return '{}s'.format(int(secs))
338 |
339 |
340 | def fmt_dur(num_secs: int) -> str:
341 | """
342 | Returns a human-readable time scalar
343 | e.g. 121 -> 2.02 minutes
344 | """
345 | if num_secs >= 3600:
346 | return "%.2f hours" % (num_secs / 3600)
347 | if num_secs >= 60:
348 | return "%.2f minutes" % (num_secs / 60)
349 | if num_secs >= 1:
350 | return "%.2f seconds" % (num_secs)
351 | return "%d milliseconds" % (round(num_secs * 1000))
352 |
353 |
354 | def get_credits(seconds: float, bytes_egress: float) -> float:
355 | """
356 | Estimate the number of credits spent.
357 | 100 credits per vcpu hour + 100 credits per byte egress
358 | """
359 | return 8 * (seconds * 10.0 / 3600.0 + bytes_egress * 10.0 / GB)
360 |
361 |
362 | def get_hash(binary: bytes) -> str:
363 | """
364 | Get the function hash for a dill pickled function.
365 | This is used mostly for caching and bucketing
366 |
367 | TODO:
368 | The problem is that dill is not deterministic so if we just take a
369 | hash of the pickled function, we will get different values each time.
370 | For now, this is fine because we are basically using the hash for the
371 | single run. In the future, we will need to make dill deterministic.
372 |
373 | To save time, don't try an approach with inspect.getsourcelines
374 | If upstream functions change, it won't capture the difference.
375 |
376 | Approach will be to find non-deteministic aspects of Python and replace
377 | them one-by-one in dill
378 | """
379 | return hashlib.sha256(binary).hexdigest()[:16]
380 |
381 |
382 | class FastmapException(Exception):
383 | """
384 | Thrown when something goes wrong running the user's code on the
385 | cloud or on separate processes.
386 | """
387 |
388 |
389 | class FastmapUnexpectedException(FastmapException):
390 | """
391 | Thrown when something in a post request results in a non-200.
392 | Should be caught by anything that calls post_request
393 |
394 | The traceback (tb) can also be used to add more context to cloud errors
395 | """
396 | def __init__(self, *args, **kwargs):
397 | try:
398 | self.tb = kwargs.pop('tb')
399 | except KeyError:
400 | self.tb = None
401 | super().__init__(*args, **kwargs)
402 |
403 |
404 | def simplified_tb():
405 | """
406 | Given a traceback, remove fastmap-specific lines (this file + dependencies)
407 | to make it easier for the end user to read and hide the sausage-making.
408 | To do so, go through a traceback line-by-line in reverse. The moment
409 | we have a fastmap-specific line, break and return
410 | """
411 | skip_dir_paths = (
412 | ' File "' + os.path.abspath(__file__),
413 | '/layers/google.python.pip/pip/',
414 | )
415 | tb_list = []
416 | tb_lines = traceback.format_exc().split('\n')
417 | preamble = tb_lines.pop() # we want "Traceback (most rec..." no matter what
418 | for tb_line in reversed(tb_lines):
419 | if any(tb_line.startswith(path) for path in skip_dir_paths):
420 | # pop prev tb_line b/c each stack layer is 2 lines: file-loc & code
421 | tb_list.pop()
422 | break
423 | tb_list.append(tb_line)
424 | tb_list.append(preamble)
425 |
426 | return '\n'.join(reversed(tb_list)).strip()
427 |
428 |
429 | def get_func_name(func: FunctionType) -> str:
430 | """Robust way to get the name of a random function"""
431 | try:
432 | name = func.__name__
433 | except AttributeError:
434 | name = repr(func)
435 | name = name[:40] + "..." if len(name) >= 45 else name
436 | return name
437 |
438 |
439 | class FastmapLogger():
440 | """
441 | FastmapLogger exists primarily because it is difficult to pass python's
442 | native logger between processes. Doing so was requiring a lot of
443 | weird workarounds. Otherwise, it should behave similarly
444 | """
445 | def __init__(self, verbosity: str):
446 | self.verbosity = verbosity
447 | self.restore_verbosity()
448 |
449 | def restore_verbosity(self):
450 | self.debug = self._debug
451 | self.info = self._info
452 | self.warning = self._warning
453 | self.error = self._error
454 | if self.verbosity == Verbosity.LOUD:
455 | pass
456 | elif self.verbosity == Verbosity.NORMAL:
457 | self.debug = self._do_nothing
458 | elif self.verbosity == Verbosity.QUIET:
459 | self.debug = self._do_nothing
460 | self.info = self._do_nothing
461 | elif self.verbosity == Verbosity.SILENT:
462 | self.debug = self._do_nothing
463 | self.info = self._do_nothing
464 | self.warning = self._do_nothing
465 | else:
466 | raise FastmapException(f"Unknown verbosity '{self.verbosity}'")
467 |
468 | def hush(self):
469 | self.debug = self._do_nothing # noqa
470 | self.info = self._do_nothing # noqa
471 | self.warning = self._do_nothing # noqa
472 | self.error = self._do_nothing # noqa
473 |
474 | def _do_nothing(self, *args):
475 | # This instead of a lambda because of pickling in multiprocessing
476 | pass
477 |
478 | def _debug(self, msg, *args):
479 | if args:
480 | msg = msg % args
481 | print("\033[K" + Color.CYAN + "fastmap DEBUG:" + Color.CANCEL, msg)
482 |
483 | def _info(self, msg, *args):
484 | if args:
485 | msg = msg % args
486 | print("\033[K" + Color.YELLOW + "fastmap INFO:" + Color.CANCEL, msg)
487 |
488 | def _warning(self, msg, *args):
489 | if args:
490 | msg = msg % args
491 | print("\033[K" + Color.RED + "fastmap WARNING:" + Color.CANCEL, msg)
492 |
493 | def _error(self, msg, *args):
494 | if args:
495 | msg = msg % args
496 | print("\033[K" + Color.RED + "fastmap ERROR:" + Color.CANCEL, msg, flush=True)
497 |
498 | def input(self, msg):
499 | # This exists mostly for test mocking
500 | return input(Color.CYAN + "\nfastmap: " + msg + Color.CANCEL)
501 |
502 |
503 |
504 | # def local_worker_func(func: FunctionType, itdm: InterThreadDataManager,
505 | # log: FastmapLogger) -> None:
506 | # """
507 | # A single persistent local worker. This function will process one
508 | # batch at a time until there are none left.
509 | # """
510 | # func = dill.loads(func)
511 | # try:
512 | # batch_tup = itdm.checkout()
513 | # while batch_tup:
514 | # batch_idx, batch_iter = batch_tup
515 | # start = time.perf_counter()
516 | # ret = list(map(func, batch_iter))
517 | # total_proc_time = time.perf_counter() - start
518 | # runtime = total_proc_time / len(ret)
519 | # log.debug("Batch %d local cnt=%d dur=%.2fs (%.2e/el).",
520 | # batch_idx, len(ret), total_proc_time, runtime)
521 | # itdm.push_outbox(batch_idx, ret, runtime)
522 | # batch_tup = itdm.checkout()
523 | # except Exception as e:
524 | # proc_name = multiprocessing.current_process().name
525 | # itdm.put_error(proc_name, repr(e), batch_tup)
526 | # tb = simplified_tb(traceback.format_exc())
527 | # log.error("In local worker [%s]:\n %s.",
528 | # multiprocessing.current_process().name, tb)
529 | # return
530 |
531 |
532 | def auth_token(secret: str) -> str:
533 | """ The auth token is the first half of the secret plus the P/S signifier """
534 | return secret[:34]
535 |
536 |
537 | def sign_token(secret: str) -> str:
538 | """ The sign token is the first half of the secret """
539 | return secret[34:]
540 |
541 |
542 | def hmac_digest(secret: str, payload: bytes) -> str:
543 | """ With our secret, generate a signature for the payload """
544 | return hmac.new(sign_token(secret).encode(), payload,
545 | digestmod=hashlib.sha256).hexdigest()
546 |
547 |
548 | def basic_headers(secret: str, payload: bytes) -> dict:
549 | """ Basic headers needed for most API calls. """
550 | return {
551 | 'Authorization': 'Bearer ' + auth_token(secret),
552 | 'X-Python-Version': sys.version.replace('\n', ''),
553 | 'X-Client-Version': CLIENT_VERSION,
554 | 'X-Content-Signature': hmac_digest(secret, payload),
555 | # 'X-Request-ID': secrets.token_hex()[:5], # commented out because not using
556 | }
557 |
558 |
559 | def post_request(url: str, data: dict, secret: str,
560 | log: FastmapLogger) -> requests.Response:
561 | """
562 | Generic cloud post wrapper.
563 | This does warning/error management, extracts content, and checks signatures
564 | for every API post
565 | """
566 | if isinstance(data, dict):
567 | data = msgpack.dumps(data)
568 |
569 | headers = basic_headers(secret, data)
570 | start_time = nowstamp()
571 | log.debug("Posting to %s with %s" % (url, fmt_bytes(len(data))))
572 | try:
573 | # I decided not to put this in a retry loop because it would be
574 | # engineering for a problem that I don't have yet
575 | resp = requests.post(url, data=data, headers=headers)
576 | except requests.exceptions.ConnectionError:
577 | ping_url = url.split('/api')[0] + '/api/v1/ping'
578 | raise FastmapUnexpectedException(
579 | "Fastmap could not connect to %r. "
580 | "Check your network connection. To check if your "
581 | "server is running, try: `curl %s`." % (url, ping_url)) from None
582 | log.debug("Posted %s in %.2fs" % (url, nowstamp() - start_time))
583 |
584 | if 'X-Server-Warning' in resp.headers:
585 | # deprecations or anything else
586 | log.warning(resp.headers['X-Server-Warning'])
587 |
588 | if resp.status_code == 400:
589 | # BAD_REQUEST
590 | raise FastmapUnexpectedException("Bad request: %r" % resp.headers['X-Reason'])
591 |
592 | if resp.status_code == 401:
593 | # UNAUTHORIZED
594 | raise FastmapException(resp.headers['X-Reason'])
595 |
596 | if resp.status_code == 402:
597 | # NOT_ENOUGH_CREDITS
598 | raise FastmapException("You have exhausted your credits. Contact your admin or purchase more credits.")
599 |
600 | if resp.status_code == 403:
601 | # INVALID_SIGNATURE
602 | raise FastmapException("Your signature was invalid. Check your API token.")
603 |
604 | if resp.status_code == 410:
605 | # DISCONTINUED
606 | raise FastmapException("Deprecated: %r" % resp.headers['X-Reason'])
607 |
608 | if resp.status_code == 500:
609 | raise FastmapUnexpectedException("Fastmap 500 error: %r" % resp.content)
610 |
611 | if resp.status_code == 200 and \
612 | resp.headers.get('Content-Type') == 'application/msgpack':
613 | if 'X-Content-Signature' not in resp.headers:
614 | raise FastmapUnexpectedException("Cloud payload was not signed (%d). "
615 | "Will not unpickle." % (resp.status_code))
616 | cloud_hash = hmac_digest(secret, resp.content)
617 | if resp.headers['X-Content-Signature'] != cloud_hash:
618 | raise FastmapUnexpectedException("Cloud checksum did not match. "
619 | "Will not unpickle.")
620 | resp.status = resp.headers['X-Status']
621 | log.debug("Response %s %s", resp.status_code, resp.status)
622 | try:
623 | resp.obj = msgpack.loads(resp.content)
624 | except Exception:
625 | # Needs to be plain 'Exception'
626 | # msgpack-python.readthedocs.io/en/latest/_modules/msgpack/exceptions.html
627 | raise FastmapUnexpectedException("Error unpacking response") from None
628 | return resp
629 |
630 | raise FastmapUnexpectedException("Unexpected Status / Content-Type %d (%s): %r" %
631 | (resp.status_code,
632 | resp.headers.get("Content-Type"),
633 | resp.content[:100].strip()))
634 |
635 |
636 | # def process_cloud_batch(itdm: InterThreadDataManager, batch_tup: tuple,
637 | # map_url: str, func_hash: str, label: str,
638 | # run_id: str, secret: str, log: FastmapLogger) -> None:
639 | # """
640 | # For /api/v1/map, finish preparing the request, send it, and handle the
641 | # response. Processed batches go back into the itdm. If a
642 | # processed batch leaves this function, it will end up back with the user.
643 | # """
644 |
645 | # start_req_time = time.perf_counter()
646 |
647 | # batch_idx, batch = batch_tup
648 | # try:
649 | # pickled_batch = dill.dumps(batch)
650 | # except Exception as ex:
651 | # raise FastmapUnexpectedException("Could not pickle your data. "
652 | # "Fastmap cannot run on the cloud.") from ex
653 | # compressed_batch = gzip.compress(pickled_batch, compresslevel=1)
654 | # payload = {
655 | # 'func_hash': func_hash,
656 | # 'batch': compressed_batch,
657 | # 'label': label,
658 | # 'run_id': run_id,
659 | # }
660 |
661 | # while True:
662 | # log.debug("Making cloud request batchlen=%d size=%s (%s/el)...",
663 | # len(batch), fmt_bytes(len(compressed_batch)),
664 | # fmt_bytes(len(compressed_batch) / len(batch)))
665 | # try TODO
666 | # resp = post_request(map_url, payload, secret, log)
667 | # if resp.status_code == 200:
668 | # if resp.status == MapStatus.INITALIZING:
669 | # log.debug("Cloud worker is initializing. Last msg [%s]."
670 | # " Retrying in 5 seconds..." %
671 | # (resp.obj.get('init_step', '')))
672 | # time.sleep(5)
673 | # continue
674 | # elif resp.status == MapStatus.INITIALIZATION_ERROR:
675 | # raise FastmapUnexpectedException("Error initializing worker %r %r" % (
676 | # resp.obj.get('init_error'), resp.obj.get('init_tb')))
677 | # break
678 |
679 | # mem_used = (float(resp.headers.get('X-Mem-Used', 0.0))
680 | # / float(resp.headers.get('X-Mem-Total', 1.0)))
681 | # if mem_used > 0.9:
682 | # log.warning("Cloud memory utilization high: %.2f%%. "
683 | # "Consider increasing memory." % mem_used * 100)
684 |
685 | # if resp.status == MapStatus.BATCH_PROCESSED:
686 | # service_id = resp.headers['X-Service-Id']
687 | # total_request = time.perf_counter() - start_req_time
688 | # total_application = float(resp.headers['X-Application-Seconds'])
689 | # total_mapping = float(resp.headers['X-Map-Seconds'])
690 | # credits_used = float(resp.headers['X-Credits'])
691 | # result_len = len(resp.obj['results'])
692 | # req_time_per_el = total_request / result_len
693 | # app_time_per_el = total_application / result_len
694 | # map_time_per_el = total_mapping / result_len
695 |
696 | # log.debug("Batch %d cloud cnt=%d "
697 | # "%.2fs/%.2fs/%.2fs map/app/req (%.2e/%.2e/%.2e per el) "
698 | # "[%s].",
699 | # batch_idx, result_len,
700 | # total_mapping, total_application, total_request,
701 | # map_time_per_el, app_time_per_el, req_time_per_el,
702 | # service_id)
703 | # itdm.push_outbox(batch_idx,
704 | # resp.obj['results'],
705 | # None,
706 | # credits_used=credits_used,
707 | # network_seconds=total_request - total_application)
708 | # return
709 |
710 | # if resp.status == MapStatus.PROCESS_ERROR:
711 | # msg = "Your code could not be processed on the cloud: %s. " % \
712 | # resp.obj.get('exception')
713 | # bad_modules = resp.obj.get('bad_modules', [])
714 | # if bad_modules:
715 | # msg += "Modules with errors on import: %s." % ' '.join(bad_modules)
716 | # msg += "You might need to explicitly specify a requirements file " \
717 | # "in your deployment."
718 | # raise FastmapUnexpectedException(msg, tb=resp.obj.get('traceback', ''))
719 | # if resp.status == MapStatus.NOT_FOUND:
720 | # msg = "Your function was not found on the cloud."
721 | # raise FastmapUnexpectedException(msg)
722 | # if resp.status_code == 402:
723 | # # NOT_ENOUGH_CREDITS
724 | # raise FastmapUnexpectedException("Insufficient credits for this request. "
725 | # "Your current balance is $%.4f." %
726 | # resp.obj.get('credits_used', 0) / 100)
727 | # if resp.status_code == 403:
728 | # # INVALID_SIGNATURE
729 | # raise FastmapUnexpectedException("Invalid signature. Check your token")
730 | # if resp.status_code == 410:
731 | # # DISCONTINUED (post-deprecated end-of-life)
732 | # raise FastmapUnexpectedException("Fastmap.io API discontinued: %r" % resp.obj.get('reason'))
733 | # if resp.status_code == 413:
734 | # # TOO_LARGE
735 | # payload_len = len(msgpack.dumps(payload))
736 | # raise FastmapUnexpectedException("Your request was too large (%s). "
737 | # "Find a way to reduce the size of your data or "
738 | # "function and try again." % fmt_bytes(payload_len))
739 |
740 | # if resp.status_code == 500 and resp.headers['Content-Type'] == 'text/html':
741 | # content = re.sub('<[^<]+?>', '', resp.text)
742 | # raise FastmapUnexpectedException("Unexpected cloud error 500. You might have run out "
743 | # "of memory. %s" % content)
744 |
745 | # # catch all (should just be for 500s of which a few are explicitly defined)
746 | # raise FastmapUnexpectedException("Unexpected cloud response %d %s %r" %
747 | # (resp.status_code, resp.status, resp.obj))
748 |
749 |
750 | # def cloud_thread(thread_id: str, map_url: str, func_hash: str, label: str,
751 | # run_id: str, itdm: InterThreadDataManager, secret: str,
752 | # log: FastmapLogger):
753 | # """
754 | # A thread for running cloud requests in a loop. Batches are pulled out of
755 | # the itdm and passed into process_cloud_batch one-by-one until they are
756 | # exhausted. This also does some basic
757 | # """
758 | # batch_tup = itdm.checkout()
759 | # if batch_tup:
760 | # log.debug("Starting cloud thread %d []...", thread_id)
761 | # while batch_tup:
762 | # try:
763 | # process_cloud_batch(itdm, batch_tup, map_url, func_hash,
764 | # label, run_id, secret, log)
765 | # except FastmapUnexpectedException as e:
766 | # proc_name = multiprocessing.current_process().name
767 | # thread_id = threading.get_ident()
768 | # error_loc = "%s: thread:%d" % (proc_name, thread_id)
769 | # itdm.put_error(error_loc, repr(e), batch_tup)
770 | # if hasattr(e, 'tb') and e.tb:
771 | # tb = e.tb.replace('%0A', '\n')
772 | # log.error("In cloud thread [%s]:\n%s.",
773 | # threading.current_thread().name, tb)
774 | # else:
775 | # log.error("In cloud thread [%s]: %r.",
776 | # threading.current_thread().name, e)
777 | # log.error("Shutting down cloud thread [%s] due to error...",
778 | # threading.current_thread().name)
779 | # return
780 |
781 | # batch_tup = itdm.checkout()
782 |
783 |
784 | def get_modules(log: FastmapLogger) -> (Dict[str, str], List[ModuleType]):
785 | """
786 | Get in scope modules.
787 | Returns two things:
788 | 1. a dictionary of all mod_name -> source|None
789 | 2. a list of ModuleType for modules found in site packages
790 | For the former, a source is included if it is a local module. If it is
791 | an installed module, the source is None
792 | """
793 | std_lib_dir = os.path.realpath(distutils.sysconfig.get_python_lib(standard_lib=True))
794 | local_sources = {}
795 | installed_mods = []
796 | for mod_name, mod in sys.modules.items():
797 | if mod_name in sys.builtin_module_names:
798 | # builtin
799 | continue
800 | if mod_name.startswith("_"):
801 | # hidden
802 | continue
803 | if not getattr(mod, '__file__', None):
804 | # also not builtin
805 | continue
806 | mod_path = os.path.realpath(mod.__file__)
807 | if mod_path.startswith(std_lib_dir) and 'site-packages' not in mod_path:
808 | # not stdlib
809 | continue
810 | if hasattr(mod, "__package__") and \
811 | mod.__package__ in ("fastmap", "fastmap.fastmap"):
812 | # not fastmap
813 | continue
814 |
815 | # Through with the silent skips.
816 | # Looking for local_sources and installed_mods
817 | if SITE_PACKAGES_RE.match(mod.__file__):
818 | installed_mods.append(mod)
819 | continue
820 | if not mod.__file__.endswith('.py'):
821 | log.warning("The module %r is a non-Python locally-built module "
822 | "which cannot be uploaded.", mod)
823 | continue
824 | with open(mod.__file__) as f:
825 | source = f.read()
826 | if source:
827 | local_sources[mod.__name__] = source
828 |
829 | return local_sources, installed_mods
830 |
831 |
832 | def get_requirements(installed_mods: List[ModuleType],
833 | log: FastmapLogger) -> List[str]:
834 | """
835 | TODO docstring
836 | """
837 | imported_module_names = set()
838 | site_packages_dirs = set()
839 | for mod in installed_mods:
840 | try:
841 | mod_name = mod.__package__
842 | except AttributeError:
843 | mod_name = None
844 | if not mod_name:
845 | mod_name = mod.__name__
846 | imported_module_names.add(mod_name)
847 | site_packages_dirs.add(SITE_PACKAGES_RE.match(mod.__file__).group(0))
848 |
849 | top_level_files = set()
850 | for site_packages_dir in site_packages_dirs:
851 | top_level_path = site_packages_dir + "*.dist-info/top_level.txt"
852 | top_level_files.update(glob.glob(top_level_path))
853 |
854 | packages_by_module = collections.defaultdict(set)
855 | for fn in top_level_files:
856 | with open(fn) as f:
857 | modules = f.read().split('\n')
858 | metadata_fn = fn.rsplit('/', 1)[0] + '/METADATA'
859 | pkg_name = None
860 | with open(metadata_fn) as f:
861 | for row in f.readlines():
862 | if match := re.match(r"Name: (?P[a-zA-Z0-9-]+)", row):
863 | pkg_name = match['pkg_name']
864 | break
865 | if not pkg_name:
866 | raise FastmapException("No package name for %r" % fn)
867 | for mod_name in modules:
868 | packages_by_module[mod_name].add(pkg_name)
869 |
870 | requirements = {}
871 | missed_modules = set()
872 | for mod_name in imported_module_names:
873 | pkg_names = packages_by_module[mod_name]
874 | if not pkg_names:
875 | # log.warning("Could not find version for module %r. Skipping...",
876 | # mod_name)
877 | missed_modules.add(mod_name)
878 | # requirements[mod_name] = None
879 | continue
880 | for pkg_name in pkg_names:
881 | pkg_version = importlib.metadata.version(pkg_name)
882 | requirements[pkg_name] = pkg_version
883 |
884 | # one last run-through to make sure we didn't forget anything
885 | # this fixed the issue with google.cloud.vision import
886 | for missed_mod in missed_modules:
887 | missed_mod = missed_mod.replace('.', '-')
888 | try:
889 | pkg_version = importlib.metadata.version(missed_mod)
890 | except:
891 | continue
892 | requirements[missed_mod] = pkg_version
893 |
894 | return sorted([f'{k}=={v}' for k, v in requirements.items()])
895 |
896 |
897 | def get_dependencies(requirements: dict, log: FastmapLogger) -> (dict, dict):
898 | """
899 | Get dependency dictionary.
900 | Keys are module names.
901 | Values are either pip version strings or source code.
902 | """
903 | local_sources, installed_mods = get_modules(log)
904 | log.debug("Found %d installed modules" % len(installed_mods))
905 | log.debug("Found local imports %r" % list(sorted(local_sources.keys())))
906 |
907 | if requirements:
908 | log.debug("Skipping requirements autodetect.")
909 | else:
910 | requirements = get_requirements(installed_mods, log)
911 | log.info("Autodetected requirements %r." % requirements)
912 |
913 | installed_mods = [im.__name__ for im in installed_mods]
914 | return local_sources, installed_mods, requirements
915 |
916 |
917 | def seq_batcher(sequence: Sequence, size: int) -> Generator:
918 | seq_len = len(Sequence)
919 | for idx in range(0, seq_len, size):
920 | yield sequence[idx:min(idx + size, seq_len)]
921 |
922 |
923 | def pickle_function(func, func_name):
924 | try:
925 | return dill.dumps(func, recurse=True)
926 | except Exception as ex:
927 | err = "Your function %r could not be pickled." % func_name
928 | raise FastmapException(err) from ex
929 |
930 |
931 | class HeartbeatIO():
932 | def __init__(self, logs_queue, heartbeat_queue):
933 | self.logs_queue = logs_queue
934 | self.heartbeat_queue = heartbeat_queue
935 | self.is_open = True
936 |
937 | def write(self, s):
938 | self.logs_queue.put(s)
939 | self.heartbeat_queue.put(nowstamp())
940 | return len(s)
941 |
942 | def flush(self):
943 | pass
944 |
945 | def close(self):
946 | self.is_open = False
947 |
948 |
949 | class RedirectStdStreams(object):
950 | def __init__(self, heartbeat_io):
951 | self.heartbeat_io = heartbeat_io
952 |
953 | def __enter__(self):
954 | self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
955 | self.old_stdout.flush(); self.old_stderr.flush()
956 | sys.stdout = self.heartbeat_io
957 | sys.stderr = self.heartbeat_io
958 |
959 | def __exit__(self, exc_type, exc_value, traceback):
960 | sys.stdout = self.old_stdout
961 | sys.stderr = self.old_stderr
962 |
963 |
964 | def heartbeat_loop(heartbeat_queue, kill_queue):
965 | last_send = None
966 | while True:
967 | try:
968 | kill_queue.get(block=False)
969 | break
970 | except queue.Empty:
971 | pass
972 | if not last_send or nowstamp() - last_send > 60:
973 | heartbeat_queue.put(nowstamp())
974 | last_send = nowstamp()
975 | time.sleep(1)
976 |
977 |
978 | def local_offload_wrapper(func_payload, result_queue, logs_queue, heartbeat_queue):
979 | start_time = nowstamp()
980 | func_dict = msgpack.loads(gzip.decompress(func_payload))
981 | heartbeat_io = HeartbeatIO(logs_queue, heartbeat_queue)
982 | kill_queue = multiprocessing.Queue()
983 | heartbeat_thread = threading.Thread(target=heartbeat_loop, args=(heartbeat_queue, kill_queue))
984 | heartbeat_thread.start()
985 |
986 | with RedirectStdStreams(heartbeat_io):
987 | try:
988 | func = dill.loads(func_dict['func'])
989 | ret = func()
990 | result_dict = {
991 | 'outcome': 'SUCCESS',
992 | 'return_value': ret,
993 | 'exception': None,
994 | 'traceback': None,
995 | }
996 | pickled_result = dill.dumps(result_dict)
997 | except Exception as ex:
998 | result_dict = {
999 | 'outcome': 'ERROR',
1000 | 'return_value': None,
1001 | 'exception': repr(ex),
1002 | 'traceback': simplified_tb(),
1003 | }
1004 | pickled_result = dill.dumps(result_dict)
1005 | kill_queue.put(True)
1006 | runtime = nowstamp() - start_time
1007 | result_queue.put((pickled_result, runtime))
1008 |
1009 |
1010 | # def local_map_wrapper(func_payload, pickled_iterable, result_queue, logs_queue):
1011 | # func = dill.loads(pickled_func)
1012 | # iterable = dill.loads(pickled_iterable)
1013 | # logs = HeartbeatIO(logs_queue)
1014 | # start_time = datetime.datetime.now()
1015 | # with contextlib.redirect_stderr(logs):
1016 | # with contextlib.redirect_stdout(logs):
1017 | # try:
1018 | # ret = list(map(func, iterable)) # TODO multiprocessing.Pool
1019 | # resp = {
1020 | # 'outcome': 'SUCCESS',
1021 | # 'return_value': ret,
1022 | # 'exception': None,
1023 | # 'tb': None,
1024 | # }
1025 | # except Exception as ex:
1026 | # resp = {
1027 | # 'outcome': 'ERROR',
1028 | # 'return_value': None,
1029 | # 'exception': repr(ex),
1030 | # 'tb': simplified_tb(),
1031 | # }
1032 | # runtime = (datetime.datetime.now() - start_time).total_seconds()
1033 | # result_queue.put((dill.dumps(resp), runtime))
1034 |
1035 |
1036 | def task_hook_thread(task, hook):
1037 | try:
1038 | ret = task.wait()
1039 | except:
1040 | return
1041 | hook(ret)
1042 |
1043 |
1044 | OfldStatus = Namespace("NOT_FOUND", "ACKNOWLEDGED", "ERROR")
1045 |
1046 |
1047 | class FastmapTask():
1048 | POLLING_INTERVAL = 3
1049 |
1050 | def __repr__(self):
1051 | return "<%s id=%s state=%s outcome=%s>" % (
1052 | self.__class__.__name__, self.task_id, self._task_state, self._outcome)
1053 |
1054 | def add_hook(self, hook):
1055 | t = threading.Thread(target=task_hook_thread, args=(self, hook))
1056 | t.start()
1057 |
1058 | def wait(self, polling_interval=None, live_logs=False, raise_exceptions=False):
1059 | def handle_anomaly(msg):
1060 | if raise_exceptions:
1061 | raise FastmapException(msg)
1062 | self._config.log.info(msg)
1063 |
1064 | self._config.log.info("Waiting for task to finish...")
1065 | if live_logs:
1066 | logs = self.all_logs()
1067 | if logs:
1068 | sys.stdout.write("\033[K" + Color.MAGENTA + logs + Color.CANCEL)
1069 |
1070 | while True:
1071 | if live_logs:
1072 | logs = self.new_logs()
1073 | if logs:
1074 | sys.stdout.write("\033[K" + Color.MAGENTA + logs + Color.CANCEL)
1075 | else:
1076 | self.poll()
1077 | if self._outcome == TaskOutcome.SUCCESS:
1078 | return self.return_value()
1079 | if self._outcome == TaskOutcome.ERROR:
1080 | tb = self.traceback()
1081 | handle_anomaly("Task error %r" % self._result_dict['exception'])
1082 | print(tb)
1083 | return
1084 | if self._outcome in (TaskOutcome.KILLED_BY_REQUEST, TaskOutcome.KILLED_ZOMBIE):
1085 | handle_anomaly("Task was killed")
1086 | return
1087 | if self._task_state == TaskState.CLEARED:
1088 | handle_anomaly("Task has been cleared")
1089 | return
1090 | if self._task_state == TaskState.KILLING:
1091 | handle_anomaly("Task is being killed")
1092 | return
1093 | time.sleep(polling_interval or self.POLLING_INTERVAL)
1094 |
1095 | def traceback(self):
1096 | self._fetch_result_dict()
1097 | if self._task_state == TaskState.CLEARED:
1098 | raise FastmapException("Traceback cannot be retrieved because task is cleared.")
1099 | if self._task_state != TaskState.DONE:
1100 | raise FastmapException("Traceback cannot be retrieved because task is not done.")
1101 | if self._outcome != TaskOutcome.ERROR:
1102 | raise FastmapException("Traceback cannot be retrieved because task did not error.")
1103 | return self._result_dict['traceback']
1104 |
1105 | def return_value(self):
1106 | self._fetch_result_dict()
1107 | if self._task_state == TaskState.CLEARED:
1108 | raise FastmapException("Return value cannot be retrieved because task is cleared.")
1109 | if self._task_state != TaskState.DONE:
1110 | raise FastmapException("Return value cannot be retrieved because task is not done.")
1111 | if self._outcome != TaskOutcome.SUCCESS:
1112 | raise FastmapException("Return value cannot be retrieved because task did not succeed.")
1113 | return self._result_dict['return_value']
1114 |
1115 | def new_logs(self):
1116 | new_logs = self._fetch_logs()
1117 | if self._task_state == TaskState.CLEARED:
1118 | raise FastmapException("Logs cannot be retrieved because task is cleared.")
1119 | return new_logs
1120 |
1121 | def all_logs(self):
1122 | self._fetch_logs()
1123 | if self._task_state == TaskState.CLEARED:
1124 | raise FastmapException("Logs cannot be retrieved because task is cleared.")
1125 | return self._all_logs
1126 |
1127 |
1128 | def gen_id(chars=12):
1129 | # 62 ** 12 = 3*10^21. World generates 3*10^22 bytes per year so this feels ok
1130 | alphabet = string.ascii_letters + string.digits
1131 | return ''.join(secrets.choice(alphabet) for _ in range(chars))
1132 |
1133 |
1134 | class FastmapLocalTask(FastmapTask):
1135 | def __init__(self, config, func_name, task_type, proc=None, func_payload=None,
1136 | result_queue=None, logs_queue=None, heartbeat_queue=None,
1137 | hook=None, webhook=None, label=''):
1138 | self.task_id = gen_id()
1139 | self.task_type = task_type
1140 | self._config = config
1141 | self._task_state = None
1142 | self._func_name = func_name
1143 | self._func_payload = func_payload
1144 | self._label = label
1145 | self._hook = hook
1146 | self._webhook = webhook
1147 | self._outcome = None
1148 | self._result_dict = None
1149 | self._runtime = None
1150 | self._heartbeat_ts = None
1151 |
1152 | self._proc = proc
1153 | self._result_queue = result_queue
1154 | self._logs_queue = logs_queue
1155 | self._heartbeat_queue = heartbeat_queue
1156 | self._all_logs = ""
1157 | self._starttime = nowstamp()
1158 |
1159 | @staticmethod
1160 | def create(config, func_payload, func_name, hook, webhook, label):
1161 | result_queue = multiprocessing.Queue()
1162 | logs_queue = multiprocessing.Queue()
1163 | heartbeat_queue = multiprocessing.Queue()
1164 | proc = multiprocessing.Process(target=local_offload_wrapper,
1165 | args=(func_payload, result_queue,
1166 | logs_queue, heartbeat_queue))
1167 | try:
1168 | proc.start()
1169 | except RuntimeError:
1170 | raise FastmapException("Error starting local process. It's likely "
1171 | "that you need to wrap your code in an "
1172 | "`if __name__ == '__main__'` context.")
1173 | fp = FastmapLocalTask(config, func_name, "OFFLOAD", proc=proc,
1174 | func_payload=func_payload, result_queue=result_queue,
1175 | logs_queue=logs_queue, heartbeat_queue=heartbeat_queue,
1176 | label=label, hook=hook, webhook=webhook)
1177 | if hook:
1178 | fp.add_hook(hook)
1179 | return fp
1180 |
1181 | @staticmethod
1182 | def create_map(config, func_payload, func_name, iterable, hook, webhook, label):
1183 | pickled_iterable = dill.dumps(iterable)
1184 | result_queue = multiprocessing.Queue()
1185 | logs_queue = multiprocessing.Queue()
1186 | heartbeat_queue = multiprocessing.Queue()
1187 | proc = multiprocessing.Process(target=local_map_wrapper,
1188 | args=(func_payload, pickled_iterable,
1189 | result_queue, logs_queue, webhook, heartbeat_queue)) # TODO heartbeat_queue
1190 | try:
1191 | proc.start()
1192 | except RuntimeError:
1193 | raise FastmapException("Error starting local process. It's likely "
1194 | "that you need to wrap your code in an "
1195 | "`if __name__ == '__main__'` context.")
1196 | fp = FastmapLocalTask(config, func_name, "MAP", proc=proc,
1197 | result_queue=result_queue,
1198 | logs_queue=logs_queue, heartbeat_queue=heartbeat_queue,
1199 | label=label)
1200 | if hook:
1201 | fp.add_hook(hook)
1202 | return fp
1203 |
1204 | def poll(self):
1205 | try:
1206 | pickled_result, self._runtime = self._result_queue.get(block=False)
1207 | if not self._task_state == TaskState.CLEARED:
1208 | self._task_state = TaskState.DONE
1209 | self._result_dict = dill.loads(pickled_result)
1210 | self._outcome = self._result_dict['outcome']
1211 | except queue.Empty:
1212 | pass
1213 |
1214 | while True:
1215 | try:
1216 | self._heartbeat_ts = self._heartbeat_queue.get(block=False)
1217 | except queue.Empty:
1218 | break
1219 |
1220 | return {
1221 | 'type': self.task_type,
1222 | 'func_name': self._func_name,
1223 | "task_id": self.task_id,
1224 | "task_state": self._task_state,
1225 | 'outcome': self._outcome,
1226 | 'start_time': datetime.datetime.fromtimestamp(self._starttime),
1227 | 'runtime': self._runtime,
1228 |
1229 | 'label': self._label,
1230 | 'last_heartbeat': nowstamp() - self._heartbeat_ts if self._heartbeat_ts else None,
1231 | 'items_uploaded': None, # TODO for map items
1232 | 'items_completed': None, # TODO for map items
1233 | }
1234 |
1235 | def kill(self):
1236 | self._config.log.info("Killing task %s...", self.task_id)
1237 | self._task_state = TaskState.KILLING
1238 | self._proc.kill()
1239 | self._outcome = TaskOutcome.KILLED_BY_REQUEST
1240 | self._task_state = TaskState.DONE
1241 | self.poll()
1242 |
1243 | def retry(self):
1244 | return FastmapLocalTask.create(self._config, self._func_payload,
1245 | self._func_name, self._hook, self._webhook,
1246 | self._label)
1247 |
1248 | def _fetch_logs(self):
1249 | self.poll()
1250 | new_logs = ''
1251 | while True:
1252 | try:
1253 | new_logs += self._logs_queue.get(block=False)
1254 | except queue.Empty:
1255 | break
1256 | self._all_logs += new_logs
1257 | return new_logs
1258 |
1259 | def clear(self):
1260 | if not self._task_state == TaskState.DONE:
1261 | raise FastmapException("Task not done")
1262 | self._task_state = TaskState.CLEARED
1263 | self._config.log.info("Clearing task %s...", self.task_id)
1264 | self.poll()
1265 |
1266 | def _fetch_result_dict(self):
1267 | if self._result_dict:
1268 | return
1269 | self.poll()
1270 |
1271 |
1272 | class FastmapCloudTask(FastmapTask):
1273 | def __init__(self, config, task_id=None):
1274 | self.task_id = task_id
1275 | self._config = config
1276 | self._task_state = None
1277 | self._outcome = None
1278 | self._next_log_idx = 0
1279 | self._all_logs = ''
1280 | self._logs_done = False
1281 | self._result_dict = None
1282 |
1283 | @staticmethod
1284 | def create(config, func_name, func_hash, hook, webhook, label):
1285 | url = config.cloud_url + "/api/v1/offload"
1286 | payload = {
1287 | "func_name": func_name,
1288 | "func_hash": func_hash,
1289 | "label": label,
1290 | "machine_type": config.machine_type,
1291 | "webhook": webhook,
1292 | }
1293 |
1294 | # TODO
1295 | if config.machine_type == MachineType.HUMMINGBIRD_7:
1296 | raise FastmapException("Only SPARROW_1 & PEREGRINE_4 machine_type are supported right now")
1297 |
1298 | config.log.info("Starting new task for function %r..." % func_name)
1299 | resp = post_request(url, payload, config.secret, config.log)
1300 | if resp.status in (OfldStatus.ERROR, OfldStatus.NOT_FOUND):
1301 | raise FastmapException("Internal cloud error. Try again later.")
1302 |
1303 | if resp.status == OfldStatus.ACKNOWLEDGED:
1304 | task_id = resp.obj['task']['task_id']
1305 | task = FastmapCloudTask(config, task_id=task_id)
1306 | config.log.info("Created new task %r." % task)
1307 | if hook:
1308 | task.add_hook(hook)
1309 | return task
1310 | raise FastmapException("Got unexpected response from server %r" % resp.status)
1311 |
1312 | @staticmethod
1313 | def create_map(config, func_name, func_hash, iterable, kwargs, hook, webhook, label):
1314 | url = config.cloud_url + "/api/v1/map"
1315 | payload = {
1316 | "func_name": func_name,
1317 | "func_hash": func_hash,
1318 | "kwargs": kwargs,
1319 | "label": label,
1320 | "webhook": webhook,
1321 | }
1322 | config.log.debug("Calling /api/v1/map")
1323 |
1324 | assert isinstance(iterable, list) # TODO
1325 | assert not hook # TODO
1326 | task_id = None
1327 | page_offset = 0
1328 | for i, batch in enumerate(seq_batcher(iterable, 10)): # TODO
1329 | config.log.info("Uploading batch %d" % i) # TODO
1330 | payload['page_idx'] = i
1331 | payload['page_len'] = len(batch)
1332 | payload['page_offset'] = page_offset
1333 | payload['iterable'] = dill.dumps(batch)
1334 | payload['task_id'] = task_id
1335 | page_offset += len(batch)
1336 | resp = post_request(url, payload, config.secret, config.log)
1337 | if resp.status in (OfldStatus.ERROR, OfldStatus.NOT_FOUND):
1338 | raise FastmapException("Internal cloud error. Try again later.")
1339 | if task_id:
1340 | assert resp.obj['task']['task_id'] == task_id
1341 | else:
1342 | task_id = resp.obj['task']['task_id']
1343 |
1344 | return FastmapCloudTask(config, "MAP", task_id=task_id)
1345 | raise FastmapException("Got unexpected response from server %r" % resp.status) # TODO
1346 |
1347 | def poll(self):
1348 | self._config.log.debug("Calling /api/v1/poll")
1349 | url = self._config.cloud_url + "/api/v1/poll"
1350 | payload = {"task_id": self.task_id}
1351 | resp = post_request(url, payload, self._config.secret, self._config.log)
1352 | if resp.status == 'NOT_FOUND':
1353 | raise FastmapException("No task found")
1354 | if resp.status == 'FOUND':
1355 | task_dict = resp.obj['task']
1356 | make_dt(task_dict)
1357 | self._task_state = resp.obj['task']['task_state']
1358 | self._outcome = resp.obj['task']['outcome']
1359 | return task_dict
1360 | raise FastmapException("Unexpected status from server %r" % resp.status)
1361 |
1362 | def kill(self):
1363 | self._config.log.debug("Calling /api/v1/kill")
1364 | url = self._config.cloud_url + "/api/v1/kill"
1365 | payload = {"task_id": self.task_id}
1366 | resp = post_request(url, payload, self._config.secret, self._config.log)
1367 | if resp.status == 'NOT_FOUND':
1368 | raise FastmapException("No task found")
1369 | if resp.status == 'FOUND':
1370 | self._config.log.info("Server acknowledged kill order for task %s.", self.task_id)
1371 | task_dict = resp.obj['task']
1372 | make_dt(task_dict)
1373 | self._task_state = task_dict['task_state']
1374 | self._outcome = task_dict['outcome']
1375 | return task_dict
1376 | raise FastmapException("Unexpected status from server %r" % resp.status)
1377 |
1378 | def _fetch_logs(self):
1379 | if self._logs_done:
1380 | return ""
1381 | new_logs = ''
1382 | while True:
1383 | self._config.log.debug("Calling /api/v1/logs")
1384 | url = self._config.cloud_url + "/api/v1/logs"
1385 | payload = {"task_id": self.task_id, "next_log_idx": self._next_log_idx}
1386 | resp = post_request(url, payload, self._config.secret, self._config.log)
1387 | if resp.status == 'NOT_FOUND':
1388 | raise FastmapException("No task found")
1389 | if resp.status == 'FOUND':
1390 | self._next_log_idx = resp.obj['next_log_idx']
1391 | self._all_logs += resp.obj['logs'].decode()
1392 | new_logs += resp.obj['logs'].decode()
1393 | task_dict = resp.obj['task']
1394 | self._task_state = task_dict['task_state']
1395 | self._outcome = task_dict['outcome']
1396 | if self._task_state in (TaskState.DONE, TaskState.CLEARED):
1397 | self._logs_done = True
1398 | if not resp.obj['has_more']:
1399 | return new_logs
1400 | raise FastmapException("Unexpected status from server %r" % resp.status)
1401 |
1402 | def retry(self):
1403 | self._config.log.debug("Calling /api/v1/retry")
1404 | url = self._config.cloud_url + '/api/v1/retry'
1405 | payload = {"task_id": self.task_id}
1406 | resp = post_request(url, payload, self._config.secret, self._config.log)
1407 | if resp.status == 'NOT_FOUND':
1408 | # TODO more error handling
1409 | raise FastmapException("Could not find task to retry")
1410 | new_task_dict = resp.obj['task']
1411 | new_task_id = new_task_dict['task_id']
1412 | self._config.log.info("Server is retrying task %s with new task %s" % (self.task_id, new_task_id))
1413 | # TODO hook for new retry
1414 | return FastmapCloudTask(self._config, task_id=new_task_id)
1415 |
1416 | def clear(self):
1417 | self._config.log.debug("Calling /api/v1/clear")
1418 | url = self._config.cloud_url + "/api/v1/clear"
1419 | payload = {"task_id": self.task_id}
1420 | resp = post_request(url, payload, self._config.secret, self._config.log)
1421 | if resp.status == 'NOT_FOUND':
1422 | raise FastmapException("No task found")
1423 | if resp.status == 'NOT_READY':
1424 | raise FastmapException("Task not cleared. Task status is not \"DONE\".")
1425 | if resp.status == 'FOUND':
1426 | self._config.log.info("Server cleared task %s...", self.task_id)
1427 | task_dict = resp.obj['task']
1428 | make_dt(task_dict)
1429 | self._task_state = task_dict['task_state']
1430 | self._outcome = task_dict['outcome']
1431 | return task_dict
1432 | raise FastmapException("Unexpected status from server %r" % resp.status)
1433 |
1434 | def _fetch_result_dict(self):
1435 | if self._result_dict:
1436 | return
1437 | url = self._config.cloud_url + "/api/v1/result"
1438 | payload = {"task_id": self.task_id}
1439 |
1440 | result_idx = 0
1441 | result_buffer = b''
1442 | while True:
1443 | self._config.log.debug("Calling /api/v1/result part %d" % result_idx)
1444 | payload['result_idx'] = result_idx
1445 | resp = post_request(url, payload, self._config.secret, self._config.log)
1446 | if resp.status == 'NOT_FOUND':
1447 | raise FastmapException("No task found")
1448 |
1449 | task_dict = resp.obj['task']
1450 | self._task_state = task_dict['task_state']
1451 | self._outcome = task_dict['outcome']
1452 |
1453 | if resp.status == 'NOT_READY':
1454 | break
1455 | # raise FastmapException("Result not ready")
1456 | if resp.status not in ('ERROR', "SUCCESS"):
1457 | break
1458 | # raise FastmapException("Unexpected status from server %r" % resp.status)
1459 |
1460 | if resp.obj['instruction'] == 'APPEND':
1461 | result_buffer += resp.obj['result_part']
1462 | result_idx += 1
1463 | assert result_idx < resp.obj['result_len']
1464 | continue
1465 |
1466 | assert resp.obj['instruction'] == 'UNPICKLE'
1467 | result_buffer += resp.obj['result_part']
1468 | try:
1469 | self._result_dict = dill.loads(gzip.decompress(result_buffer))
1470 | except dill.UnpicklingError:
1471 | raise FastmapException("Error unpickling response") from None
1472 | except gzip.BadGzipFile:
1473 | raise FastmapException("Error unzipping response") from None
1474 |
1475 | # if self._task_type == 'MAP':
1476 | # self._result_dict += _return
1477 | # else:
1478 | # self._result_dict = _return
1479 | break
1480 |
1481 |
1482 | def local_exit_handler(config):
1483 | # config.log.info("Received exit signal. Would kill %d local task(s)... " % len(config.local_threads))
1484 | # for thread in config.local_threads:
1485 | # thread.kill()
1486 | pass
1487 |
1488 |
1489 | class FastmapConfig():
1490 | """
1491 | The configuration object. Do not instantiate this directly.
1492 | Instead, either:
1493 | - use init to get a new FastmapConfig object
1494 | - use global_init to allow fastmap to run without an init object.
1495 |
1496 | This object exposes one public method: fastmap.
1497 | """
1498 |
1499 | __slots__ = [
1500 | "secret",
1501 | "verbosity",
1502 | "log",
1503 | "exec_policy",
1504 | "machine_type",
1505 | "cloud_url",
1506 | "requirements",
1507 | "local_threads",
1508 | ]
1509 |
1510 | def __init__(self, config):
1511 | # TODO parameter checking is weirdly divided between create and init
1512 | self.exec_policy = config['exec_policy']
1513 | self.log = FastmapLogger(config['verbosity'])
1514 | self.verbosity = config['verbosity']
1515 | self.cloud_url = config['cloud_url']
1516 | self.requirements = config['requirements']
1517 | self.machine_type = config['machine_type']
1518 | self.local_threads = []
1519 |
1520 | if self.cloud_url:
1521 | if not self.cloud_url.startswith("http"):
1522 | self.cloud_url = "http://" + self.cloud_url
1523 | if self.cloud_url.endswith("/"):
1524 | self.cloud_url = self.cloud_url[:-1]
1525 | elif self.exec_policy != ExecPolicy.LOCAL:
1526 | self.exec_policy = ExecPolicy.LOCAL
1527 | self.log.warning("No cloud_url provided. "
1528 | "Setting exec_policy to LOCAL.")
1529 |
1530 | if multiprocessing.current_process().name != "MainProcess":
1531 | # Fixes issue with multiple loud inits during local multiprocessing
1532 | # in Mac / Windows
1533 | self.log.hush()
1534 |
1535 | if config['secret']:
1536 | if not isinstance(config['secret'], str) or not re.match(SECRET_RE, config['secret']):
1537 | raise FastmapException("Invalid secret token format.")
1538 | self.secret = config['secret']
1539 | else:
1540 | if self.exec_policy != ExecPolicy.LOCAL:
1541 | raise FastmapException("No secret provided on exec_policy==LOCAL.")
1542 | self.secret = None
1543 |
1544 | if self.requirements:
1545 | if not isinstance(self.requirements, list):
1546 | raise FastmapException("Invalid 'requirements' format. It must be a "
1547 | "list in 'package==1.2.3' form.")
1548 | for req in self.requirements:
1549 | if not REQUIREMENT_RE.match(req):
1550 | raise FastmapException("Invalid requirement format %r. Requirements "
1551 | "must be formatted like 'package==1.2.3'." % req)
1552 | self.log.restore_verbosity() # undo hush
1553 |
1554 | @staticmethod
1555 | def create(config=None, **kwargs):
1556 | if not config and os.path.exists(DEFAULT_CONFIG_DIR):
1557 | try:
1558 | with open(DEFAULT_CONFIG_DIR) as f:
1559 | c = json.loads(f.read())
1560 | except Exception as e:
1561 | raise FastmapException(f"Exception loading '{DEFAULT_CONFIG_DIR}'") from e
1562 | elif not config:
1563 | c = dict(DEFAULT_INLINE_CONFIG)
1564 | elif isinstance(config, dict):
1565 | c = dict(config)
1566 | elif isinstance(config, str):
1567 | try:
1568 | with open(config) as f:
1569 | c = json.loads(f.read())
1570 | except Exception as e:
1571 | raise FastmapException(f"Exception loading '{config}'") from e
1572 | else:
1573 | raise FastmapException(f"Unknown config type {type(config)}")
1574 |
1575 | for k, v in kwargs.items():
1576 | if k not in DEFAULT_INLINE_CONFIG.keys():
1577 | raise FastmapException(f"Unknown parameter: {k}")
1578 | c[k] = v
1579 |
1580 | for k in DEFAULT_INLINE_CONFIG.keys():
1581 | if k not in c:
1582 | raise FastmapException(f"Missing configuration parameter: {k}")
1583 |
1584 | if c['machine_type'] not in MachineType:
1585 | raise FastmapException(f"Unknown machine_type '{c['machine_type']}'.")
1586 |
1587 | if c['exec_policy'] not in ExecPolicy:
1588 | raise FastmapException(f"Unknown exec_policy '{c['exec_policy']}'.")
1589 |
1590 | if c['exec_policy'] == ExecPolicy.LOCAL:
1591 | local_config = FastmapLocalConfig(c)
1592 | atexit.register(local_exit_handler, local_config)
1593 | return local_config
1594 | return FastmapCloudConfig(c)
1595 |
1596 | @set_docstring(OFFLOAD_DOCSTRING)
1597 | def offload(self, func: FunctionType, kwargs=None,
1598 | hook=None, webhook=None, label=""):
1599 | self.log.info("Fastmap offload." \
1600 | "\n verbosity: %s." \
1601 | "\n exec_policy: %s." % (self.verbosity, self.exec_policy))
1602 | if not callable(func):
1603 | raise FastmapException("'func' must be a function")
1604 |
1605 | func_name = get_func_name(func) # before applying kwargs, get func_name
1606 | if kwargs:
1607 | kwargs = kwargs or {}
1608 | if not isinstance(kwargs, dict):
1609 | raise FastmapException("'kwargs' must be a dict.")
1610 | func = functools.partial(func, **kwargs)
1611 |
1612 | pickled_func = pickle_function(func, func_name)
1613 | func_payload, func_hash = get_payload_and_hash(pickled_func, self)
1614 |
1615 | if self.exec_policy == ExecPolicy.LOCAL:
1616 | task = FastmapLocalTask.create(self, func_payload, func_name=func_name,
1617 | hook=hook, webhook=webhook, label=label)
1618 | self.local_threads.append(task)
1619 | return task
1620 |
1621 | assert self.exec_policy == ExecPolicy.CLOUD
1622 | init_remote(self, func_hash, func_payload)
1623 | return FastmapCloudTask.create(self, func_name=func_name, func_hash=func_hash,
1624 | hook=hook, webhook=webhook, label=label)
1625 |
1626 |
1627 | class FastmapLocalConfig(FastmapConfig):
1628 | # TODO we really oughta implement these
1629 |
1630 | def get_task(self, task_id):
1631 | raise NotImplementedError()
1632 |
1633 | def poll_all(self):
1634 | raise NotImplementedError()
1635 |
1636 | def clear_all(self):
1637 | raise NotImplementedError()
1638 |
1639 | def map(self, *args, **kwargs):
1640 | raise NotImplementedError()
1641 |
1642 |
1643 | def check_task_id(func) -> FunctionType:
1644 | @functools.wraps(func)
1645 | def inner(self, task_id):
1646 | if not task_id or not re.match(TASK_RE, task_id):
1647 | raise FastmapException("Invalid task_id format %r" % task_id)
1648 | return func(self, task_id)
1649 | return inner
1650 |
1651 |
1652 | def make_dt(task):
1653 | task['start_time'] = datetime.datetime.fromtimestamp(task['start_time'])
1654 |
1655 |
1656 | class FastmapCloudConfig(FastmapConfig):
1657 |
1658 | @check_task_id
1659 | @set_docstring(GET_TASK_DOCSTRING)
1660 | def get_task(self, task_id):
1661 | return FastmapCloudTask(self, task_id=task_id)
1662 |
1663 | @set_docstring(POLL_ALL_DOCSTRING)
1664 | def poll_all(self):
1665 | resp = post_request(self.cloud_url + '/api/v1/poll_all', {},
1666 | self.secret, self.log)
1667 | tasks = resp.obj['tasks']
1668 | list(map(make_dt, tasks))
1669 | return tasks
1670 |
1671 | @set_docstring(CLEAR_ALL_DOCSTRING)
1672 | def clear_all(self):
1673 | resp = post_request(self.cloud_url + '/api/v1/clear_all', {},
1674 | self.secret, self.log)
1675 | self.log.info("Cleared %d tasks", resp.obj['count'])
1676 | cleared_tasks = resp.obj['cleared_tasks']
1677 | list(map(make_dt, cleared_tasks))
1678 | return cleared_tasks
1679 |
1680 | # @set_docstring(MAP_DOCSTRING)
1681 | # def map(self, func: FunctionType, iterable: Iterable, kwargs=None,
1682 | # hook=None, label=""):
1683 | # raise AssertionError("This is not ready yet")
1684 |
1685 | # if kwargs:
1686 | # kwargs = kwargs or {}
1687 | # if not isinstance(kwargs, dict):
1688 | # raise FastmapException("'kwargs' must be a dict.")
1689 | # func = functools.partial(func, **kwargs)
1690 |
1691 | # pickled_func = pickle_function(func)
1692 | # func_payload, func_hash = get_payload_and_hash(pickled_func, self)
1693 |
1694 | # if self.exec_policy == ExecPolicy.LOCAL:
1695 | # task = FastmapLocalTask.create_map(self, func_payload, func_name=func_name,
1696 | # iterable=iterable, hook=hook, webhook=webhook, label=label)
1697 | # self.local_threads.append(task)
1698 | # return task
1699 |
1700 | # assert self.exec_policy == ExecPolicy.CLOUD
1701 | # init_remote(self, func_hash, func_payload)
1702 |
1703 | # return FastmapCloudTask.create_map(self, func_hash=func_hash,
1704 | # iterable=iterable,
1705 | # hook=hook, webhook=webhook, label=label)
1706 |
1707 | # @check_task_id
1708 | # @set_docstring(POLL_DOCSTRING)
1709 | # def poll(self, task_id):
1710 | # return FastmapCloudTask(self, task_id=task_id).poll()
1711 |
1712 | # @check_task_id
1713 | # @set_docstring(RETRY_DOCSTRING)
1714 | # def retry(self, task_id):
1715 | # return FastmapCloudTask(self, task_id=task_id).retry()
1716 |
1717 | # @check_task_id
1718 | # @set_docstring(KILL_DOCSTRING)
1719 | # def kill(self, task_id):
1720 | # return FastmapCloudTask(self, task_id=task_id).kill()
1721 |
1722 | # @check_task_id
1723 | # @set_docstring(WAIT_DOCSTRING)
1724 | # def wait(self, task_id):
1725 | # return FastmapCloudTask(self, task_id=task_id).wait()
1726 |
1727 | # @check_task_id
1728 | # @set_docstring(RETURN_VALUE_DOCSTRING)
1729 | # def return_value(self, task_id):
1730 | # return FastmapCloudTask(self, task_id=task_id).return_value()
1731 |
1732 | # @check_task_id
1733 | # @set_docstring(TRACEBACK_DOCSTRING)
1734 | # def traceback(self, task_id):
1735 | # return FastmapCloudTask(self, task_id=task_id).traceback()
1736 |
1737 | # @check_task_id
1738 | # @set_docstring(CLEAR_DOCSTRING)
1739 | # def clear(self, task_id):
1740 | # return FastmapCloudTask(self, task_id=task_id).clear()
1741 |
1742 | # @check_task_id
1743 | # @set_docstring(ALL_LOGS_DOCSTRING)
1744 | # def all_logs(self, task_id):
1745 | # return FastmapCloudTask(self, task_id=task_id).all_logs()
1746 |
1747 | # @check_task_id
1748 | # @set_docstring(NEW_LOGS_DOCSTRING)
1749 | # def new_logs(self, task_id):
1750 | # return FastmapCloudTask(self, task_id=task_id).new_logs()
1751 |
1752 | # def _log_final_stats(self, fname: str, mapper: Mapper, proc_cnt: int,
1753 | # total_dur: float):
1754 | # """ After finishing the .fastmap(...) run, log stats for the user """
1755 | # avg_runtime = mapper.avg_runtime
1756 | # total_credits_used = mapper.total_credits_used
1757 |
1758 | # print()
1759 | # if not avg_runtime:
1760 | # self.log.info("Done processing %r in %.2fms." % (fname, total_dur*1000))
1761 | # else:
1762 | # time_saved = avg_runtime * proc_cnt - total_dur
1763 | # if time_saved > 0.02:
1764 | # self.log.info("Processed %d elements from %r in %s. "
1765 | # "You saved ~%s.", proc_cnt, fname,
1766 | # fmt_dur(total_dur), fmt_dur(time_saved))
1767 | # elif abs(time_saved) < 0.02:
1768 | # self.log.info("Processed %d elements from %r in %s. This "
1769 | # "ran at about the same speed as the builtin map.",
1770 | # proc_cnt, fname, fmt_dur(total_dur))
1771 | # elif self.exec_policy == ExecPolicy.LOCAL:
1772 | # self.log.info("Processed %d elements from %r in %s. This "
1773 | # "ran slower than the map builtin by ~%s. "
1774 | # "Consider not using fastmap here.",
1775 | # proc_cnt, fname, fmt_dur(total_dur),
1776 | # fmt_dur(time_saved * -1))
1777 | # else:
1778 | # self.log.info("Processed %d elements from %r in %s. "
1779 | # "This ran slower than the map builtin by ~%s. "
1780 | # "Consider connecting to a faster "
1781 | # "internet, reducing your data size, or using "
1782 | # "exec_policy LOCAL or ADAPTIVE.",
1783 | # proc_cnt, fname, fmt_dur(total_dur),
1784 | # fmt_dur(time_saved * -1))
1785 |
1786 | # if total_credits_used:
1787 | # self.log.info("Spent $%.4f.", total_credits_used / 100)
1788 | # self.log.info("Fastmap done.")
1789 |
1790 |
1791 | def chunk_bytes(payload: bytes, size: int) -> list:
1792 | return [payload[i:i + size] for i in range(0, len(payload), size)]
1793 |
1794 |
1795 | def init_remote(config, func_hash, func_payload):
1796 | """
1797 | Get the function and modules uploaded to the cloud via the
1798 | /api/v1/init endpoint. This must happen BEFORE calling /api/v1/map.
1799 | Because of server-side caching, and the potential for very large
1800 | payloads, check with the function hash before uploading the function.
1801 | """
1802 |
1803 | # Step 1: Try just uploaded the function hash. If it exists, we are good.
1804 | req_dict = {}
1805 | req_dict['func_hash'] = func_hash
1806 | url = config.cloud_url + "/api/v1/init"
1807 | resp = post_request(url, req_dict, config.secret, config.log)
1808 |
1809 | if resp.status_code != 200:
1810 | raise FastmapException("Cloud initialization failed %r." % resp.obj)
1811 | if resp.status == InitStatus.FOUND:
1812 | config.log.info("Function already on the server.")
1813 | return
1814 | if resp.status != InitStatus.NOT_FOUND:
1815 | raise FastmapException("Unexpected init status %r." % resp.obj)
1816 |
1817 | # Step 2: If the server can't find the func, we need to upload it
1818 | # We might need to chunk the upload due to cloud run limits
1819 | func_parts = chunk_bytes(func_payload, 5 * MB) # 5MB is arbitrary but feels right
1820 | for i, func_part in enumerate(func_parts):
1821 | req_dict['func'] = func_part
1822 | req_dict['part_idx'] = i
1823 | req_dict['part_len'] = len(func_parts)
1824 | payload = msgpack.dumps(req_dict)
1825 | payload_bytes = fmt_bytes(len(payload))
1826 | if len(func_parts) > 1:
1827 | config.log.info("Uploading code (%s) part %d/%d..." %
1828 | (payload_bytes, i + 1, len(func_parts)))
1829 | else:
1830 | config.log.info("Uploading code (%s)..." % payload_bytes)
1831 | resp = post_request(url, payload, config.secret, config.log)
1832 |
1833 | if resp.status_code != 200:
1834 | raise FastmapException("Cloud initialization failed %r." % resp.obj)
1835 | if resp.status == InitStatus.UPLOADED:
1836 | continue
1837 | raise FastmapException("Cloud initialization failed. Function not uploaded.")
1838 | config.log.info("Done uploading code.")
1839 | return
1840 |
1841 |
1842 | def get_payload_and_hash(pickled_func, config):
1843 | local_sources, installed_mods, requirements = get_dependencies(
1844 | config.requirements, config.log)
1845 | func_payload = msgpack.dumps({
1846 | 'func': pickled_func,
1847 | 'local_sources': local_sources,
1848 | 'installed_mods': installed_mods,
1849 | 'requirements': requirements})
1850 | compressed_payload = gzip.compress(func_payload, compresslevel=1)
1851 | func_hash = get_hash(compressed_payload)
1852 | return compressed_payload, func_hash
1853 |
--------------------------------------------------------------------------------