├── deployment ├── requirements.txt ├── mfbot-cfn-template.yml └── mfbot-trop.py ├── metaflowbot ├── message_templates │ ├── __init__.py │ └── templates.py ├── version.py ├── __init__.py ├── actions │ ├── __init__.py │ ├── runs │ │ ├── __init__.py │ │ ├── rules.yml │ │ ├── commands.py │ │ └── run_resolver.py │ └── basic │ │ ├── __init__.py │ │ ├── rules.yml │ │ └── commands.py ├── exceptions.py ├── __main__.py ├── action_loader.py ├── rules.py ├── state.py ├── cli.py ├── slack_client.py └── server.py ├── requirements.txt ├── docs ├── images │ ├── app-token.png │ ├── app_install.png │ ├── bot-token.png │ ├── cfn-deploy.png │ ├── slacksetup.png │ ├── aws-iam-role-exec.png │ ├── Secret-manager-setup.png │ └── slackbot-event-lifecycle.jpg ├── README.md ├── architecture.md ├── creating-custom-actions.md ├── slack-scopes.md └── deployment.md ├── MANIFEST.in ├── Dockerfile ├── .github └── workflows │ ├── pypi_publish.yml │ └── docker_publish.yml ├── manifest.yml ├── setup.py ├── extras └── heroku │ └── .github │ └── workflows │ └── deploy.yml ├── README.md └── LICENSE /deployment/requirements.txt: -------------------------------------------------------------------------------- 1 | troposphere -------------------------------------------------------------------------------- /metaflowbot/message_templates/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /metaflowbot/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.3" 2 | -------------------------------------------------------------------------------- /metaflowbot/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /metaflowbot/actions/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | metaflow>=2.4.0 2 | timeago==1.0.15 3 | pyyaml==5.4.1 4 | slack_sdk==3.7.0 5 | requests 6 | -------------------------------------------------------------------------------- /docs/images/app-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/app-token.png -------------------------------------------------------------------------------- /docs/images/app_install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/app_install.png -------------------------------------------------------------------------------- /docs/images/bot-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/bot-token.png -------------------------------------------------------------------------------- /docs/images/cfn-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/cfn-deploy.png -------------------------------------------------------------------------------- /docs/images/slacksetup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/slacksetup.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include Docs/** 3 | include metaflowbot/actions/**/*.yml 4 | include manifest.yml 5 | -------------------------------------------------------------------------------- /docs/images/aws-iam-role-exec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/aws-iam-role-exec.png -------------------------------------------------------------------------------- /docs/images/Secret-manager-setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/Secret-manager-setup.png -------------------------------------------------------------------------------- /docs/images/slackbot-event-lifecycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/outerbounds/metaflowbot/HEAD/docs/images/slackbot-event-lifecycle.jpg -------------------------------------------------------------------------------- /metaflowbot/actions/runs/__init__.py: -------------------------------------------------------------------------------- 1 | import pkgutil 2 | 3 | from metaflowbot.rules import MFBRules 4 | 5 | from . import commands 6 | 7 | data = pkgutil.get_data(__name__, "rules.yml") 8 | RULES = MFBRules.make_subpackage_rules(data) 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Metaflow bot docker file 2 | FROM python:3.7.2 3 | ADD . /metaflowbot 4 | RUN pip3 install /metaflowbot/. 5 | RUN pip3 install metaflowbot-actions-jokes 6 | CMD python3 -m metaflowbot --slack-bot-token $(echo $SLACK_BOT_TOKEN) server --admin $(echo $ADMIN_USER_ADDRESS) 7 | -------------------------------------------------------------------------------- /metaflowbot/exceptions.py: -------------------------------------------------------------------------------- 1 | class MFBException(Exception): 2 | headline = "MetaflowBot error" 3 | traceback = False 4 | 5 | def __init__(self, msg): 6 | self.msg = msg 7 | 8 | 9 | class MFBRulesParseException(MFBException): 10 | headline = "MetaflowBot rules file error" 11 | traceback = False 12 | -------------------------------------------------------------------------------- /metaflowbot/actions/basic/__init__.py: -------------------------------------------------------------------------------- 1 | import pkgutil 2 | 3 | # This is the template for creating Commands and Rules 4 | from metaflowbot.rules import MFBRules 5 | 6 | from . import commands 7 | 8 | data = pkgutil.get_data(__name__, "rules.yml") 9 | RULES = MFBRules.make_subpackage_rules(data) 10 | # In order for click to register commands we need 11 | # the package should have a relative import in the __init__.py file 12 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 👋 2 | 3 | If you are interested in deploying the Metaflowbot (either locally or on AWS), take a look at our [deployment guide](./deployment.md). 4 | 5 | If you are curious to add more functionality to the bot by creating new commands and actions, you can get started [here](./creating-custom-actions.md). 6 | 7 | If you would like to understand how the bot works, [this article](./architecture.md) is a good starting point. For the security conscious, here are the [Slack scopes](./slack-scopes.md) that Metaflowbot requires to function. 8 | 9 | If you would like any assistance, please reach out to as at http://slack.outerbounds.co 10 | 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/pypi_publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish PyPi 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | publish: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - name: Set up Python 3.7.x 11 | uses: actions/setup-python@v1 12 | with: 13 | python-version: '3.7.x' 14 | - name: Install Python 3.7.x dependencies 15 | run: | 16 | python3 -m pip install --upgrade pip 17 | pip3 install setuptools wheel twine 18 | - name: Build package 19 | run: | 20 | python3 setup.py sdist bdist_wheel --universal 21 | - name: Publish package 22 | uses: pypa/gh-action-pypi-publish@release/v1 23 | with: 24 | user: __token__ 25 | password: ${{ secrets.PYPI_PASSWORD }} 26 | verbose: true 27 | 28 | -------------------------------------------------------------------------------- /manifest.yml: -------------------------------------------------------------------------------- 1 | _metadata: 2 | major_version: 1 3 | minor_version: 1 4 | display_information: 5 | name: flowey 6 | features: 7 | app_home: 8 | home_tab_enabled: false 9 | messages_tab_enabled: true 10 | messages_tab_read_only_enabled: false 11 | bot_user: 12 | display_name: flowey 13 | always_online: true 14 | oauth_config: 15 | scopes: 16 | bot: 17 | - app_mentions:read 18 | - channels:manage 19 | - channels:read 20 | - chat:write 21 | - im:history 22 | - im:read 23 | - im:write 24 | - users:read.email 25 | - users:read 26 | settings: 27 | event_subscriptions: 28 | bot_events: 29 | - message.im 30 | - app_mention 31 | interactivity: 32 | is_enabled: true 33 | org_deploy_enabled: false 34 | socket_mode_enabled: true 35 | -------------------------------------------------------------------------------- /metaflowbot/actions/basic/rules.yml: -------------------------------------------------------------------------------- 1 | - name: First message of the thread, requesting help 2 | event_type: new_thread 3 | message: help(.*)|how to(.*) 4 | action: 5 | op: new-thread 6 | create-thread: true 7 | 8 | - name: Generic help fallback 9 | event_type: user_message 10 | message: help(.*)|how to(.*) 11 | action: 12 | op: new-thread 13 | create-thread: false 14 | 15 | 16 | - name: find version 17 | event_type: new_thread 18 | message: (version|--version) 19 | action: 20 | op: version 21 | create-thread: true 22 | 23 | - name: find version 24 | event_type: user_message 25 | message: (version|--version) 26 | action: 27 | op: version 28 | create-thread: false 29 | 30 | # This should be last fallback rule for new thread. 31 | - name: First message of the thread 32 | event_type: new_thread 33 | action: 34 | op: new-thread 35 | create-thread: true 36 | -------------------------------------------------------------------------------- /metaflowbot/actions/runs/rules.yml: -------------------------------------------------------------------------------- 1 | - name: Define a run to be inspected 2 | event_type: user_message 3 | message: inspect (.+) 4 | action: 5 | op: inspect-run 6 | runspec: '{event.msg}' 7 | 8 | 9 | 10 | - name: Inspect run without an argument but a run is set, basic inspect 11 | event_type: user_message 12 | message: inspect 13 | action: 14 | op: inspect-run 15 | runspec: 'inspect {context[inspect.run_id]}' 16 | 17 | - name: Inspect but no run set 18 | event_type: user_message 19 | message: inspect 20 | context: 21 | inspect.run_id: null 22 | action: 23 | op: inspect-run 24 | howto: true 25 | 26 | 27 | - name: How to inspect (fallback) 28 | event_type: user_message 29 | message: how to inspect(.*) 30 | action: 31 | op: inspect 32 | howto: true 33 | 34 | - name: How to inspect (fallback) 35 | event_type: new_thread 36 | message: (how to inspect(.*)) 37 | action: 38 | op: inspect 39 | howto: true 40 | create-thread: true 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | import setuptools 5 | 6 | 7 | def get_long_description() -> str: 8 | with open("README.md") as fh: 9 | return fh.read() 10 | 11 | 12 | def get_required() -> List[str]: 13 | with open("requirements.txt") as fh: 14 | return fh.read().splitlines() 15 | 16 | 17 | def get_version(): 18 | with open(os.path.join("metaflowbot", "version.py")) as fh: 19 | for line in fh: 20 | if line.startswith("__version__ = "): 21 | return line.split()[-1].strip().strip("'").replace("\"","") 22 | 23 | 24 | setuptools.setup( 25 | name="metaflowbot", 26 | packages=setuptools.find_packages(), 27 | version=get_version(), 28 | license="Apache License 2.0", 29 | author="Outerbounds", 30 | include_package_data=True, 31 | url="https://github.com/outerbounds/metaflowbot", 32 | long_description=get_long_description(), 33 | long_description_content_type="text/markdown", 34 | install_requires=get_required(), 35 | python_requires=">=3.6", 36 | entry_points={ 37 | "console_scripts": ["metaflowbot=metaflowbot.__main__:main"], 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /metaflowbot/__main__.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | 3 | import click 4 | 5 | # `action_loader` needs to be loaded before `cli` because 6 | # it will load all the actions that may have been custom installed. 7 | # Once `action_loader` is loaded, `cli` can be safely loaded; 8 | # We do this because `cli` loads of SUPPORTED_ACTIONS object which needs 9 | # `action_loader` to be loaded first 10 | from . import action_loader, cli 11 | from .exceptions import MFBException 12 | import os 13 | 14 | class CliState(object): 15 | def __init__(self): 16 | self.token = None 17 | self.publish_state = None 18 | self.reply = None 19 | self.thread = None 20 | self.sc = None 21 | 22 | def main(): 23 | if os.environ.get('USERNAME',None) is None: 24 | os.environ['USERNAME'] = 'metaflowbot' 25 | try: 26 | cli.cli(auto_envvar_prefix="MFB", obj=CliState()) 27 | except cli.MFBException as ex: 28 | click.secho(ex.headline, fg="white", bold=True) 29 | if ex.traceback: 30 | traceback.print_exc() 31 | else: 32 | click.secho(ex.msg, fg="red", bold=True) 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /.github/workflows/docker_publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | on: 3 | release: 4 | types : [published] 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v2 11 | - name: Docker meta 12 | id: meta 13 | uses: docker/metadata-action@v3 14 | with: 15 | images: | 16 | outerbounds/metaflowbot 17 | tags: | 18 | type=ref,event=branch 19 | type=ref,event=pr 20 | type=semver,pattern={{version}} 21 | type=semver,pattern={{major}}.{{minor}} 22 | type=sha 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v1 25 | - name: Login to DockerHub 26 | if: github.event_name != 'pull_request' 27 | uses: docker/login-action@v1 28 | with: 29 | username: ${{ secrets.DOCKER_USERNAME }} 30 | password: ${{ secrets.DOCKER_ACCESS_TOKEN }} 31 | - name: Build and push 32 | uses: docker/build-push-action@v2 33 | with: 34 | context: . 35 | push: ${{ github.event_name != 'pull_request' }} 36 | tags: ${{ steps.meta.outputs.tags }} 37 | labels: ${{ steps.meta.outputs.labels }} 38 | -------------------------------------------------------------------------------- /extras/heroku/.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy 2 | on: 3 | push: 4 | tags: releases/[1-9]+.[0-9]+.[0-9]+ 5 | branches: main 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: akhileshns/heroku-deploy@v3.12.12 # This is the action 12 | with: 13 | # How to create : https://help.heroku.com/PBGP6IDE/how-should-i-generate-an-api-key-that-allows-me-to-use-the-heroku-platform-api 14 | heroku_api_key: ${{secrets.HEROKU_API_KEY}} 15 | heroku_app_name: "metaflow-bot" #Must be unique in Heroku 16 | # User address under which the heroku app is configured 17 | heroku_email: ${{secrets.ADMIN_USER_ADDRESS}} 18 | checkstring: "ok" 19 | usedocker: true 20 | rollbackonhealthcheckfailed: false 21 | docker_heroku_process_type: "worker" 22 | env: 23 | # these environment variables are set with HD_ prefix because 24 | # artifacts with HD_ prefix are shared like secrets on heroku 25 | HD_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_TOKEN }} 26 | HD_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_TOKEN }} 27 | HD_SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} 28 | HD_SLACK_APP_TOKEN: ${{ secrets.SLACK_APP_TOKEN }} 29 | HD_USERNAME: ${{ secrets.USERNAME }} 30 | HD_METAFLOW_SERVICE_AUTH_KEY: ${{secrets.METAFLOW_SERVICE_AUTH_KEY}} 31 | HD_METAFLOW_SERVICE_URL: ${{ secrets.METAFLOW_SERVICE_URL }} 32 | HD_ADMIN_USER_ADDRESS: ${{secrets.ADMIN_USER_ADDRESS}} 33 | HD_METAFLOW_DATASTORE_SYSROOT_S3: ${{secrets.METAFLOW_DATASTORE_SYSROOT_S3}} 34 | -------------------------------------------------------------------------------- /metaflowbot/action_loader.py: -------------------------------------------------------------------------------- 1 | # we import actions just to execute the action modules, 2 | # so they will get a chance to add themselves to cli.actions 3 | import importlib 4 | import pkgutil 5 | 6 | from . import actions 7 | 8 | 9 | # Code from https://packaging.python.org/guides/creating-and-discovering-plugins/#using-namespace-packages 10 | def iter_namespace(ns_pkg): 11 | # Specifying the second argument (prefix) to iter_modules makes the 12 | # returned name an absolute name instead of a relative one. This allows 13 | # import_module to work without having to do additional modification to 14 | # the name. 15 | return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".") 16 | 17 | 18 | SUPPORTED_ACTIONS = {} 19 | SUPPORTED_RULES = [] 20 | # This module is loaded at Last because the rules 21 | # in basic have Greeting actions wired to opening new-thread 22 | # For this reason we need to ensure that metaflowbot.actions.basic loads last 23 | LAST_LOAD_MODULE = "metaflowbot.actions.basic" 24 | 25 | for finder, name, ispkg in iter_namespace(actions): 26 | if name == LAST_LOAD_MODULE: 27 | continue 28 | action_package = importlib.import_module(name) 29 | action = name 30 | try: 31 | # Register the rules here 32 | assert action_package.RULES is not None 33 | SUPPORTED_RULES.extend(action_package.RULES) 34 | SUPPORTED_ACTIONS[action] = action_package 35 | except AttributeError as e: 36 | print( 37 | f"Ignoring import of action {action} since it lacks any associated rules." 38 | ) 39 | 40 | 41 | SUPPORTED_ACTIONS[LAST_LOAD_MODULE] = importlib.import_module(LAST_LOAD_MODULE) 42 | SUPPORTED_RULES.extend(SUPPORTED_ACTIONS[LAST_LOAD_MODULE].RULES) 43 | -------------------------------------------------------------------------------- /metaflowbot/actions/basic/commands.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | 3 | import click 4 | 5 | from metaflowbot.cli import action 6 | from metaflowbot.message_templates.templates import (DEFAULT_ERROR_MESSAGE, 7 | BotVersion, IntroMessage, 8 | error_message) 9 | from metaflowbot.state import MFBState 10 | 11 | 12 | @action.command(help="new_thread") 13 | @click.option("--create-thread/--no-create-thread", help="Will create a new thread") 14 | @click.pass_obj 15 | def new_thread(obj, create_thread=False): 16 | try: 17 | if create_thread: 18 | obj.publish_state(MFBState.message_new_thread(obj.thread)) 19 | greeting = IntroMessage() 20 | dm_token = "<@%s>" % obj.sc.bot_user_id() 21 | intromsg, blocks = greeting.get_slack_message(dm_token) 22 | obj.reply(intromsg, blocks=blocks) 23 | except: 24 | traceback.print_exc() 25 | my_traceback = traceback.format_exc() 26 | obj.reply(DEFAULT_ERROR_MESSAGE, **error_message(my_traceback)) 27 | 28 | 29 | @action.command(help="reply") 30 | @click.option("--message", required=True, help="Reply this message") 31 | @click.pass_obj 32 | def reply(obj, message=None): 33 | obj.reply(message) 34 | 35 | 36 | @action.command(help="version") 37 | @click.option("--create-thread/--no-create-thread", help="Will create a new thread") 38 | @click.pass_obj 39 | def version(obj, create_thread=False): 40 | try: 41 | if create_thread: 42 | obj.publish_state(MFBState.message_new_thread(obj.thread)) 43 | message = BotVersion().get_slack_message() 44 | obj.reply(message) 45 | except: 46 | traceback.print_exc() 47 | my_traceback = traceback.format_exc() 48 | obj.reply(DEFAULT_ERROR_MESSAGE, **error_message(my_traceback)) 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Metaflowbot - Slack Bot for your Metaflow flows! 2 | 3 | Metaflowbot makes it fun and easy to monitor your Metaflow runs, past and present. Imagine starting a training run that lasts for hours - you can now monitor it anywhere using Slack on your mobile device! 4 | 5 | ![2021-08-09 15 57 31](https://user-images.githubusercontent.com/763451/128784858-d9e37401-05de-4d02-82c5-29444ab4e1b3.gif) 6 | 7 | The bot is [easy to deploy](./docs/deployment.md): It is just a Python process with few external dependencies - no databases needed. Its [security footprint is small](./docs/slack-scopes.md) as it uses only a tightly scoped set of Slack calls. During development you can run the bot on any workstation, so it is quick to [iterate on custom actions](./docs/creating-custom-actions.md) and extend it to suit your needs. For production deployments the bot ships with a [CloudFormation template](./deployment/mfbot-cfn-template.yml) for automating your deployments to AWS. 8 | 9 | ## Communicating with the bot 10 | 11 | There are two ways interact with the Metaflow bot. You can invite the bot on a `channel` or directly speak to it via `direct message`. 12 | 13 | - `@flowey help` : Help 14 | 15 | - `@flowey tell me a joke` 16 | 17 | - `@flowey how to inspect` : How to inspect 18 | 19 | - `@flowey inspect HelloFlow` : Inspect `Run`s of a particular `Flow` 20 | 21 | - `@flowey inspect savin's HelloFlow`: Inspect `Run`s of a particular `Flow` 22 | 23 | - `@flowey inspect savin's HelloFlow tagged some_tag` : Inspect `Run`s of a particular `Flow` 24 | 25 | - `@flowey inspect HelloFlow/12` : Inspect an individual `Run` instance 26 | 27 | - `@flowey inspect the latest run of HelloFlow` : Inspect an individual `Run` instance 28 | 29 | - `@flowey inspect savin's latest run of HelloFlow` : Inspect an individual `Run` instance 30 | 31 | 32 | If you require some customization for your deployment or need additional help, please feel free to reach out to us at http://slack.outerbounds.co. We are very happy to help! 33 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | 2 | # Metaflowbot Event Lifecycle 3 | ![](images/slackbot-event-lifecycle.jpg) 4 | 5 | `MFBServer` listens to events from Slack via `MFBSlackClientV2`and converts then to internal event representation. These events are matched with rules to and based on the matched rules an action is invoked. 6 | 7 | In the above event lifecycle, the `metaflowbot` uses Slack as a stateful store for storing context about threads. When the Bot detects a `state_change` event, its stores its contents in the `MFBState` [Object](../metaflowbot/state.py). 8 | 9 | ## Core Objects 10 | 11 | - `MFBServer` is present in [metaflowbot.server](../metaflowbot/server.py). This class is the server that parses messages from slack events, applies rules and invokes actions based on the matched rules. 12 | 13 | - `MFBSlackClientV2` is present in [metaflowbot.slack_client](../metaflowbot/slack_client.py). This class provides a wrapper over slack APIs. 14 | 15 | - `MFBRules` is present in [metaflowbot.rules](../metaflowbot/rules.py). This class helps match rules with events. 16 | 17 | - `MFBState` is present in [metaflowbot.state](../metaflowbot/rules.py). This class helps hold state about the open threads and conversations with users. It is used by `MFBServer`. 18 | 19 | - [metaflowbot.actions](../metaflowbot/actions) is the subpackage that holds all the actions of the bot. Ways to create new subpackages can be found [here](./Creating-Your-Action.md). 20 | 21 | ## Event Lifecycle Psuedo Code 22 | ```python 23 | 24 | while server runs forever: # metaflowbot.server.MFBServer.loop_forever() 25 | foreach event from slack: # metaflowbot.slack_client.MFBSlackClientV2.rtm_events() 26 | convert events to MFBot Compatible events: # metaflowbot.server._make_events(event) 27 | if event matches rules: # metaflowbot.rules.MFBRules.match(event,rule) 28 | apply action matches by rule # metaflowbot.server.actions 29 | # Calls MFBServer._take_action 30 | # Every action is invoked as a seperate python process. 31 | ``` 32 | ## Rule 33 | 34 | - A basic rule looks like the following: 35 | 36 | ```yml 37 | 38 | - name: Generic help fallback # Name of the rule 39 | event_type: user_message # Type of event the rule needs to filter 40 | message: help(.*)|how to(.*) # pattern to use when filtering the message 41 | action: 42 | op: new-thread # Operation in metaflowbot.actions. 43 | create-thread: false # Arguement for the action to be invoked. 44 | 45 | ``` 46 | 47 | ## Events 48 | ### `new_thread` 49 | 50 | > When a completely new thread is created 51 | ### `user_message` 52 | 53 | > When a user messages on a thread 54 | -------------------------------------------------------------------------------- /metaflowbot/rules.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import yaml 4 | 5 | from .action_loader import SUPPORTED_RULES 6 | from .exceptions import MFBRulesParseException 7 | 8 | 9 | class MFBRules(object): 10 | """MFBRules 11 | Object that runs the rule framework on the Bot. 12 | 13 | Currently Handled Events : 14 | # Internal Events 15 | - lost_process 16 | # User Facing events: 17 | - new_thread 18 | - Publishes state 19 | - user_message 20 | - Some messsage Publishes state 21 | # Possible User Facing events in Future 22 | - slash_message : Possible in future 23 | """ 24 | 25 | def __init__(self): 26 | # Changed the way rules are loaded here from Initial version. 27 | # Now every subpackage botaction needs toe expose a `RULES` object which 28 | # gets registered at init to make the bot actions more customizable. 29 | self.rules = SUPPORTED_RULES 30 | 31 | def __len__(self): 32 | return len(self.rules) 33 | 34 | @staticmethod 35 | def make_subpackage_rules(data): 36 | try: 37 | rules = yaml.load(data, Loader=yaml.SafeLoader) 38 | except Exception as ex: 39 | raise MFBRulesParseException(str(ex)) 40 | 41 | for i, rule in enumerate(rules): 42 | if not all(k in rule for k in ("name", "event_type", "action")): 43 | raise MFBRulesParseException( 44 | "Rule #%d does not have name, " 45 | "event_type, and action " 46 | "specified." % (i + 1) 47 | ) 48 | msg = rule.get("message") 49 | if msg: 50 | rule["message"] = re.compile(msg, flags=re.IGNORECASE) 51 | return rules 52 | 53 | def match(self, event, state): 54 | for rule in self.rules: 55 | event_type = rule.get("event_type") 56 | # If event type of rule and event type of message don't match continue 57 | if event_type and event_type != event.type: 58 | continue 59 | 60 | message = rule.get("message") 61 | re_match = None 62 | if message: 63 | re_match = message.match(event.msg.strip()) 64 | # if Message didn't match the rule theen continue 65 | if not re_match: 66 | continue 67 | 68 | if event.type == "state_change" and not state.is_event_match( 69 | event, rule.get("state_change", {}) 70 | ): 71 | continue 72 | context = rule.get("context") 73 | if context and not state.is_state_match(context, event): 74 | continue 75 | return ( 76 | rule["name"], 77 | rule["action"], 78 | re_match.groups() if re_match else [], 79 | rule.get("ephemeral_context_update"), 80 | ) 81 | -------------------------------------------------------------------------------- /docs/creating-custom-actions.md: -------------------------------------------------------------------------------- 1 | # Create Your Own Bot Action 2 | The initial version of the bot ships with actions that allow you to monitor the status of Metaflow runs, past and present. Imagine starting a training run that lasts for hours - you can now monitor it anywhere using Slack on your mobile device! You can converse with the bot over direct messages or or invite the bot to a channel. 3 | 4 | With the diversity of machine learning and data science use cases, we have wanted to make it easy to extend the bot with new actions. As an example, we ship a [simple jokes command](https://github.com/outerbounds/metaflowbot-jokes-action) :clown_face:. You can use it as a template to create custom actions which can be enabled just by doing a `pip install`. We would love to see people contributing actions of all kinds - please share with us (http://slack.outerbounds.co) if you have any ideas or prototypes! 5 | 6 | 7 | ### How To Create Your Own Bot Action 8 | Create your own custom action by creating a Python package with the following folder structure - 9 | 10 | ``` 11 | your_bot_action/ # the name of this dir doesn't matter 12 | ├ setup.py 13 | ├ metaflowbot/ # namespace package name 14 | │ └ __init__.py # special pkgutil namespace __init__.py 15 | │ └ action/ # namespace sub package name 16 | │ ├__init__.py # special pkgutil namespace __init__.py 17 | │ └ your-special-acton/ # dir name must match the package name in `setup.py` 18 | │ └ __init__.py # Contains a prespecified code block given below 19 | │ └ rules.yml. # This mandatory to create rules 20 | │ └ commands.py. # This create main commands from click 21 | . 22 | ``` 23 | 24 | Every module must contain a `rules.yml`, a `__init__.py`, and a module that contains click commands imported from `metaflowbot.cli.actions`. Every rule in the `rules.yml` should follow [this](./architecture.md##Rule) structure 25 | 26 | The `__init__.py` inside an installable action must contain the following code to integrate with `metaflowbot`'s actions 27 | ```python 28 | import pkgutil 29 | 30 | from metaflowbot.rules import MFBRules 31 | 32 | data = pkgutil.get_data(__name__, "rules.yml") 33 | RULES = MFBRules.make_subpackage_rules(data) 34 | from . import commands 35 | ``` 36 | 37 | #### Locally Development 38 | 39 | Export the slack tokens as environment variables : 40 | 41 | ```sh 42 | export SLACK_APP_TOKEN=xapp-1-AAAAAAAAAAA-2222222222222-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 43 | export SLACK_BOT_TOKEN=xoxb-2222222222222-2222222222222-AAAAAAAAAAAAAAAAAAAAAAAA 44 | ``` 45 | 46 | - Install `metaflowbot` repository and install your custom development repo; ensure you have set the `PYTHONPATH` correctly so that `metaflowbot` can be resolved. 47 | 48 | ```sh 49 | pip install metaflowbot 50 | pip install -e ./ 51 | ``` 52 | 53 | - If you are running the bot locally with a local metadata provider, then run the above command inside the directory where the `.metaflow` folder is present. 54 | -------------------------------------------------------------------------------- /metaflowbot/state.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import traceback 4 | 5 | PREFIX = "[MFB] " 6 | 7 | 8 | class MFBState(object): 9 | def __init__(self): 10 | self._thread_state = {} 11 | self._channel_name = {} 12 | self._user_info = {} 13 | self._monitors = {} 14 | 15 | @classmethod 16 | def _make_message(cls, **kwargs): 17 | return "%s`%s`" % (PREFIX, json.dumps(kwargs)) 18 | 19 | def _parse_message(self, msg): 20 | try: 21 | return json.loads(msg[len(PREFIX) + 1 : -1]) 22 | except: 23 | return {} 24 | 25 | def channel_name(self, chan): 26 | return self._channel_name.get(chan) 27 | 28 | def user_name(self, user): 29 | return self._user_info.get(user) 30 | 31 | def get_thread(self, event): 32 | if event.type == "state_change": 33 | return self._parse_message(event.msg).get("thread") 34 | else: 35 | return "%s:%s" % (event.chan, event.thread_ts) 36 | 37 | def get_thread_state(self, thread): 38 | return self._thread_state.get(thread, {}) 39 | 40 | def is_state_match(self, context, event): 41 | # note that if the event is a state change event, the state 42 | # is matched against the originating thread, not against the 43 | # admin thread, which would be pointless 44 | thread = self._thread_state.get(self.get_thread(event), {}) 45 | return all(thread.get(k) == v for k, v in context.items()) 46 | 47 | def is_event_match(self, event, rule): 48 | msg = self._parse_message(event.msg) 49 | if msg.get("type") == rule.get("type") == "set": 50 | # 'set' sets multiple k-v pairs in the thread state. 51 | # The rule matches a specified key in the event. It 52 | # can optionally match the value too. 53 | attrs = msg.get("attributes", {}) 54 | key = rule.get("key") 55 | return key in attrs and ("value" not in rule or rule["value"] == attrs[key]) 56 | else: 57 | return all(msg.get(k) == v for k, v in rule.items()) 58 | 59 | def is_known_thread(self, chan, thread_ts): 60 | key = "%s:%s" % (chan, thread_ts) 61 | return key in self._thread_state 62 | 63 | def is_admin_thread_parent(self, msg): 64 | return ( 65 | self.is_state_message(msg) 66 | and self._parse_message(msg).get("type") == "admin_thread" 67 | ) 68 | 69 | def is_state_message(self, msg): 70 | return msg and msg.startswith(PREFIX) 71 | 72 | def update(self, event): 73 | try: 74 | msg = self._parse_message(event.msg) 75 | msg_type = msg["type"] 76 | if msg_type in ("admin_thread", "noop"): 77 | pass 78 | elif msg_type == "new_thread": 79 | self._thread_state[msg["thread"]] = {} 80 | elif msg_type == "set": 81 | self.update_thread(msg["thread"], msg["attributes"]) 82 | else: 83 | return False 84 | return True 85 | except: 86 | traceback.print_exc() 87 | return False 88 | 89 | def update_thread(self, thread, attributes): 90 | if thread in self._thread_state: 91 | thread_state = self._thread_state[thread] 92 | for k, v in attributes.items(): 93 | if isinstance(v, dict): 94 | prev_dict = thread_state.get(k) 95 | if prev_dict is None: 96 | thread_state[k] = dict(v) 97 | else: 98 | prev_dict.update(v) 99 | else: 100 | thread_state[k] = v 101 | 102 | @classmethod 103 | def message_noop(cls): 104 | return cls._make_message(type="noop") 105 | 106 | @classmethod 107 | def message_new_thread(cls, thread): 108 | return cls._make_message(type="new_thread", thread=thread) 109 | 110 | @classmethod 111 | def message_new_admin_thread(cls): 112 | return cls._make_message(type="admin_thread") 113 | 114 | @classmethod 115 | def message_set_attributes(cls, thread, attributes): 116 | return cls._make_message(type="set", attributes=attributes, thread=thread) 117 | -------------------------------------------------------------------------------- /metaflowbot/cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from datetime import datetime 4 | 5 | import click 6 | 7 | from .action_loader import SUPPORTED_ACTIONS 8 | from .exceptions import MFBException 9 | from .rules import MFBRules 10 | from .server import MFBServer, StateNotFound 11 | from .slack_client import MFBSlackClientV2 12 | 13 | LOGGER_TIMESTAMP = "magenta" 14 | LOGGER_COLOR = "green" 15 | LOGGER_BAD_COLOR = "red" 16 | 17 | 18 | def logger(body="", system_msg=False, head="", bad=False, timestamp=True): 19 | if timestamp: 20 | tstamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] 21 | click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False) 22 | if head: 23 | click.secho(head, fg=LOGGER_COLOR, nl=False) 24 | click.secho(body, bold=system_msg, fg=LOGGER_BAD_COLOR if bad else None) 25 | 26 | 27 | @click.group() 28 | @click.option( 29 | "--debug", 30 | is_flag=True, 31 | default=False, 32 | help="Debug mode: Print to stdout instead of sending to Slack", 33 | ) 34 | @click.option( 35 | "--slack-bot-token", 36 | envvar="SLACK_BOT_TOKEN", 37 | help="Bot token to make web API calls to Slack.", 38 | ) 39 | @click.option( 40 | "--slack-app-token", 41 | envvar="SLACK_APP_TOKEN", 42 | help="App token to make a socket connection to Slack.", 43 | ) 44 | @click.option("--admin-thread", help="Admin thread for actions (do not set manually)") 45 | @click.option("--reply-thread", help="Reply thread for actions (do not set manually)") 46 | @click.pass_obj 47 | def cli( 48 | obj, 49 | debug=False, 50 | slack_bot_token=None, 51 | slack_app_token=None, 52 | admin_thread=None, 53 | reply_thread=None, 54 | ): 55 | obj.sc = MFBSlackClientV2(slack_bot_token, slack_app_token=slack_app_token) 56 | if debug: 57 | obj.publish_state = lambda msg: logger(msg, head="[debug state] ") 58 | obj.reply = lambda msg: logger(msg, head="[debug reply] ") 59 | else: 60 | if admin_thread: 61 | obj.publish_state = lambda msg: obj.sc.post_message( 62 | msg, *admin_thread.split(":") 63 | ) 64 | 65 | if reply_thread: 66 | obj.thread = reply_thread 67 | channel, thread_ts = reply_thread.split(":") 68 | obj.reply = lambda msg, attachments=None, blocks=None: obj.sc.post_message( 69 | msg, channel, thread_ts, attachments=attachments, blocks=blocks 70 | ) 71 | 72 | 73 | @cli.command(help="Start the Metaflow bot server.") 74 | @click.option( 75 | "--admin", 76 | required=True, 77 | help="Email of the admin user (used to idenify the admin " "Slack account).", 78 | ) 79 | @click.option( 80 | "--new-admin-thread", 81 | is_flag=True, 82 | default=False, 83 | help="Initialize a new admin thread in a DM between " 84 | "metaflowbot and the admin user.", 85 | ) 86 | @click.option( 87 | "--load-state/--no-load-state", 88 | default=True, 89 | show_default=True, 90 | help="Reconstruct state based on the admin channel.", 91 | ) 92 | @click.option( 93 | "--action-user", 94 | default="nobody", 95 | show_default=True, 96 | help="If the server is run as root, sudo to this user " 97 | "for all actions, to prevent system-wide side-effects.", 98 | ) 99 | @click.pass_obj 100 | def server(obj, admin=None, new_admin_thread=False, load_state=None, action_user=None): 101 | def log(msg="", **kwargs): 102 | logger(msg, system_msg=True, **kwargs) 103 | 104 | if os.getuid() != 0: 105 | action_user = None 106 | spaces = "\n\t\t\t" 107 | modules = spaces.join(SUPPORTED_ACTIONS.keys()) 108 | modules_message = f"Discovered the following actions :{spaces}{modules}" 109 | log(modules_message) 110 | rules_obj = MFBRules() 111 | log("Loaded %d rules" % (len(rules_obj))) 112 | 113 | server = MFBServer(obj.sc, admin, rules_obj, logger, action_user) 114 | if new_admin_thread: 115 | server.new_admin_thread() 116 | log("Started a new admin thread.") 117 | 118 | if load_state: 119 | log("Starting to load previous state..") 120 | try: 121 | server.reconstruct_state() 122 | except StateNotFound as e: 123 | log("Previous state was not found. " 124 | "Making new admin thread") 125 | server.new_admin_thread() 126 | time.sleep(2) 127 | server.reconstruct_state() 128 | except: 129 | raise 130 | log("State reconstructed.") 131 | log(head="Activating the bot..") 132 | server.loop_forever() 133 | 134 | 135 | @cli.group(help="Bot actions") 136 | @click.pass_obj 137 | def action(ctx): 138 | pass 139 | -------------------------------------------------------------------------------- /metaflowbot/message_templates/templates.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import namedtuple 3 | from datetime import datetime 4 | from enum import Enum 5 | 6 | import timeago 7 | from metaflow.metaflow_version import get_version 8 | 9 | from ..version import __version__ 10 | 11 | DEFAULT_ERROR_MESSAGE = ":skull_and_crossbones: Oops something went wrong" 12 | SLACK_MAX_BLOCKS = 50 13 | 14 | 15 | class HEADINGS(Enum): 16 | NO_FLOWS = "No flows found :expressionless:" 17 | NO_RUNS = "No runs matched :expressionless:" 18 | NO_S3_DATASTORE = "Seems like the flow was run on local. "\ 19 | "Metaflow bot only currently supports S3 as a datastore :expressionless:" 20 | 21 | 22 | class RESPONSES(Enum): 23 | NO_FLOW_IN_NAMESPACE = ( 24 | f"I couldn't find any flows on a Global namespace." 25 | "If you are new to Metaflow please have a look at the docs" 26 | ": https://docs.metaflow.org/" 27 | ) 28 | WAITING = f"Alright, Just a minute I am resolving your query." 29 | 30 | USER_NOT_PRESENT = ( 31 | ":meow_thinkingcool: It seems the User you may have been " 32 | "looking for is not present in the Metadata service. Maybe try a different " 33 | "user ?" 34 | ) 35 | 36 | 37 | class Template: 38 | def _make_null_response( 39 | self, 40 | response_line=RESPONSES.NO_FLOW_IN_NAMESPACE.value, 41 | headline=HEADINGS.NO_FLOWS.value, 42 | ): 43 | 44 | message = [ 45 | { 46 | "type": "header", 47 | "text": {"type": "plain_text", "text": headline}, 48 | }, 49 | {"type": "section", "text": {"type": "mrkdwn", "text": f"{response_line}"}}, 50 | ] 51 | message.extend(self.make_context_block()) 52 | return message 53 | 54 | def make_context_block(self): 55 | return [ 56 | {"type": "divider"}, 57 | { 58 | "type": "context", 59 | "elements": [ 60 | { 61 | "type": "mrkdwn", 62 | "text": ":question: Get help at any time by messaging help on " 63 | "this thread or type *help* in a DM with me\n", 64 | } 65 | ], 66 | }, 67 | ] 68 | 69 | 70 | class BotVersion(Template): 71 | def _get_metadata_endpoint(self): 72 | from metaflow import get_metadata 73 | 74 | mds = get_metadata() 75 | _, service_url = mds.split("@") 76 | return service_url 77 | 78 | def get_slack_message(self): 79 | message = ( 80 | f"Running Metaflowbot version `{__version__}` with Metaflow version " 81 | f"`{get_version(pep440=True)}` and " 82 | "configured with metaflow service endpoint " 83 | f"- `{self._get_metadata_endpoint()}`" 84 | ) 85 | 86 | return message 87 | 88 | 89 | # Block Messages on slack : https://api.slack.com/reference/block-kit/blocks 90 | 91 | 92 | class IntroMessage(Template): 93 | def get_slack_message(self, bot_name): 94 | intro_message = ( 95 | f"Hey, I am {bot_name}, the Metaflowbot :robot_face:! " 96 | "I can help you to inspect results of past runs. " 97 | f"If you want to inspect results, type the following commands for more information: " 98 | f"{bot_name} how to inspect run or {bot_name} how to inspect. " 99 | ) 100 | 101 | INTO_MESSAGE_BLOCKS = [ 102 | {"type": "section", "text": {"type": "mrkdwn", "text": intro_message}}, 103 | {"type": "divider"}, 104 | { 105 | "type": "context", 106 | "elements": [ 107 | { 108 | "type": "mrkdwn", 109 | "text": "Note that all discussions with me should happen in a thread. " 110 | f"You can open a new thread with me e.g. by saying `{bot_name} hey` on any channel " 111 | f"or by direct messaging {bot_name} with `hey` or `hello`. " 112 | "You can open multiple threads with me if you want to. " 113 | "Each thread is an independent discussion.", 114 | } 115 | ], 116 | }, 117 | ] 118 | # INTO_MESSAGE_BLOCKS.extend(self.make_context_block()) 119 | return intro_message, INTO_MESSAGE_BLOCKS 120 | 121 | 122 | def error_message(traceback_err, message=DEFAULT_ERROR_MESSAGE): 123 | return { 124 | "blocks": [ 125 | {"type": "section", "text": {"type": "mrkdwn", "text": f"{message}"}}, 126 | { 127 | "type": "section", 128 | "text": {"type": "mrkdwn", "text": f"\n```{traceback_err}```"}, 129 | }, 130 | ] 131 | } 132 | -------------------------------------------------------------------------------- /docs/slack-scopes.md: -------------------------------------------------------------------------------- 1 | # Slack Permission Scopes 2 | Current Permission Scopes in use by Metaflowbot. 3 | 4 | | Oauth Permissions | Scope meaning | Why Needed | Needed | Link | 5 | | ----------------- | ----------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------- | 6 | | app_mentions:read | View messages that directly mention @your\_slack\_app in conversations that the app is in | Main event that for channel related triggers | Yes | [https://api.slack.com/scopes/app\_mentions:read](https://api.slack.com/scopes/app_mentions:read) | 7 | | channels:manage | Manage public channels that your slack app has been added to and create new ones | For `conversations.open` method for chat responses | Yes | [https://api.slack.com/scopes/channels:manage](https://api.slack.com/scopes/channels:manage) | 8 | | channels:read | View basic information about public channels in a workspace | Needed for `conversations.replies` | Yes | [https://api.slack.com/scopes/channels:read](https://api.slack.com/scopes/channels:read) | 9 | | chat:write | Post messages in approved channels & conversations | Needed For [Post Message Function](https://api.slack.com/methods/chat.postMessage) | Yes | [https://api.slack.com/scopes/chat:write](https://api.slack.com/scopes/chat:write) | 10 | | im:read | View basic information about direct messages that your slack app has been added to | To Read IMs being sent | Yes | [https://api.slack.com/scopes/im:read](https://api.slack.com/scopes/im:read) | 11 | | im:write | Start direct messages with people | To write to Im channels for admin and others | Yes | [https://api.slack.com/scopes/im:write](https://api.slack.com/scopes/im:write) | 12 | | im:history | View messages and other content in direct messages that your slack app has been added to | For state retieval | Yes | [https://api.slack.com/scopes/im:history](https://api.slack.com/scopes/im:history) | 13 | | users:read.email | View email addresses of people in a workspace | For Reading admin user's email for DM's | Yes | [https://api.slack.com/scopes/users:read.email](https://api.slack.com/scopes/users:read.email) | 14 | | users:read | View people in a workspace | Needed with `users.reademail` scope | Yes | [https://api.slack.com/scopes/users:read](https://api.slack.com/scopes/users:read) | 15 | 16 | # Slack Event Subsciptions 17 | 18 | 1. [`app_metion`](https://api.slack.com/events/app_mention) : To get events when `@` mentioned on channel. 19 | 2. [`message.im`](https://api.slack.com/events/message.im) : To get direct message events. 20 | 21 | This constraint ensures that the bot won't listen to *all* messages on a channel; only the ones where the bot is specifically tagged. 22 | 23 | # Slack API Rate Limits 24 | 25 | 26 | 27 | | Where is it Needed | API Call | Links | Rate Limt Tier | 28 | |:--------------------------------------------------------------------------- |:--------------------- |:--------------------------------------------------- |:---------------------------- | 29 | | To send messages via Slack client | chat_postMessage | https://api.slack.com/methods/chat.postMessage | 1 message/per channel/second | 30 | | To read messages from Slack admin channel (Figure sec around this) | conversations_history | https://api.slack.com/methods/conversations.history | 50 Req/min | 31 | | To read the Slack admin channel thread for the bot (Figure sec around this) | conversations_replies | https://api.slack.com/methods/conversations.replies | 50 Req/min | 32 | | To create threads with the bot | conversations_open | https://api.slack.com/methods/conversations.open | 50 Req/min | 33 | -------------------------------------------------------------------------------- /metaflowbot/actions/runs/commands.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from datetime import datetime 3 | 4 | import click 5 | import timeago 6 | from metaflow import Run, namespace 7 | from metaflow.client.filecache import FileCacheException 8 | from metaflow.exception import MetaflowNotFound 9 | 10 | from metaflowbot.cli import action 11 | from metaflowbot.message_templates.templates import (DEFAULT_ERROR_MESSAGE, 12 | HEADINGS, 13 | SLACK_MAX_BLOCKS, 14 | error_message) 15 | from metaflowbot.state import MFBState 16 | 17 | from .run_resolver import (ResolvedRun, RunNotFound, RunResolver, 18 | RunResolverException, datetime_response_parsing, 19 | find_user, step_runtime) 20 | 21 | 22 | @action.command(help="Set a new run to be inspected") 23 | @click.option("--runspec", help="A query to find a run to inspect") 24 | @click.option("--howto/--no-howto", help="Only show help text") 25 | @click.pass_context 26 | def inspect_run(ctx, runspec=None, howto=False): 27 | obj = ctx.obj 28 | resolver = RunResolver("inspect") 29 | if howto: 30 | obj.reply(howto_inspect_run(resolver)) 31 | else: 32 | try: 33 | obj.reply("Searching runs. Just a minute...") 34 | runs = resolver.resolve(runspec) 35 | if len(runs) == 1: 36 | attrs = {"inspect.run_id": runs[0].id} 37 | state = MFBState.message_set_attributes(obj.thread, attrs) 38 | obj.publish_state(state) 39 | obj.reply("Ok, inspecting *%s*." % (runs[0].id)) 40 | ctx.invoke(inspect, run_id=runs[0].id) 41 | else: 42 | reply = resolver.format_runs(runs, lambda _: None) 43 | obj.reply(reply) 44 | except RunResolverException as ex: 45 | obj.reply(str(ex)) 46 | except RunNotFound as ex: 47 | obj.reply(str(ex)) 48 | except Exception as e: 49 | traceback.print_exc() 50 | my_traceback = traceback.format_exc() 51 | obj.reply(DEFAULT_ERROR_MESSAGE, **error_message(my_traceback)) 52 | 53 | 54 | @action.command(help="Inspect the current run or show help text") 55 | @click.option("--run-id", help="Run ID to inspect") 56 | @click.option("--create-thread/--no-create-thread", help="Will create a new thread") 57 | @click.option("--howto/--no-howto", help="Only show help text") 58 | @click.pass_obj 59 | def inspect(obj, run_id=None, create_thread=False, howto=False): 60 | resolver = RunResolver("inspect") 61 | if create_thread: 62 | obj.publish_state(MFBState.message_new_thread(obj.thread)) 63 | if howto: 64 | obj.reply(howto_inspect_run(resolver)) 65 | else: 66 | try: 67 | reply_inspect(obj, run_id) 68 | except MetaflowNotFound as e: 69 | obj.reply(HEADINGS.NO_RUNS.value) 70 | except FileCacheException as e: 71 | obj.reply(HEADINGS.NO_S3_DATASTORE.value) 72 | except: 73 | traceback.print_exc() 74 | my_traceback = traceback.format_exc() 75 | obj.reply(DEFAULT_ERROR_MESSAGE, **error_message(my_traceback)) 76 | 77 | 78 | def run_status(run): 79 | try: 80 | if run.finished: 81 | if run.successful: 82 | parsed_time_string = datetime_response_parsing( 83 | ( 84 | run.finished_at - run.created_at 85 | ).total_seconds() 86 | ) 87 | return "It ran for %s and finished successfully." % parsed_time_string 88 | else: 89 | return "It did not finish successfully." 90 | else: 91 | return "It has not finished." 92 | except FileCacheException as e: 93 | # in case of local ds and service MD 94 | return "" 95 | 96 | def are_tasks_success(tasks): 97 | if all(task.successful for task in tasks): 98 | return True 99 | else: 100 | return False 101 | 102 | 103 | def reply_inspect(obj, run_id): 104 | 105 | # max_steps is added because slack has a limit on how large a payload 106 | # can be sent to slack. 107 | def step_resolver(steps, max_steps=SLACK_MAX_BLOCKS): 108 | sects = [] 109 | discovered_datastore = False 110 | local_ds = False 111 | 112 | for idx, step in enumerate(reversed(steps)): 113 | if idx > max_steps: 114 | break 115 | tasks = list(step) 116 | 117 | if not discovered_datastore: 118 | discovered_datastore = True 119 | # to derive datastore location we use name as a heuristic 120 | if 'name' in tasks[0]: 121 | object_storage_loc = tasks[0]['name']._object['location'] 122 | if 's3://' not in object_storage_loc: 123 | # As task.successful is linked to S3 124 | # We check : https://github.com/Netflix/metaflow/blob/9f832e62b3d4288acae8de483dc5709d660dc347/metaflow/client/core.py#L712 125 | local_ds = True 126 | else: 127 | # if name is not present right now just do local_ds = True 128 | local_ds = True 129 | 130 | 131 | if not local_ds: 132 | task_success_flag = are_tasks_success(tasks) 133 | 134 | if local_ds: 135 | color = "warning" 136 | status = "Unable to get task status." 137 | elif task_success_flag: 138 | color = "good" 139 | status = "All tasks finished successfully." 140 | else: 141 | color = "warning" 142 | status = "Some tasks failed or are still running." 143 | 144 | fields = [ 145 | {"title": "Status", "value": status, "short": False}, 146 | {"title": "Runtime", "value": step_runtime(tasks), "short": True}, 147 | {"title": "Tasks Started", "value": len(tasks), "short": True}, 148 | ] 149 | sects.append( 150 | { 151 | "fallback": "step %s" % step.id, 152 | "title": "Step: " + step.id, 153 | "fields": fields, 154 | "color": color, 155 | } 156 | ) 157 | return sects,local_ds 158 | 159 | def make_resolved_run(run: Run, total_steps=0, max_steps=SLACK_MAX_BLOCKS,local_ds = False): 160 | resolved_run = ResolvedRun( 161 | id=run.pathspec, 162 | who=find_user(run), 163 | flow=run.pathspec.split("/")[0], 164 | when=run.created_at, 165 | ) 166 | ago = timeago.format(resolved_run.when, now=datetime.utcnow()) 167 | run_stat = run_status(run) 168 | tnc = "_Some information (duration/status) couldn't be "\ 169 | "determined since the flow ran " \ 170 | "with datastore configured to local filesystem._" 171 | head = [ 172 | "Run *%s* was started %s by _%s_." 173 | % (resolved_run.id, ago, resolved_run.who), 174 | run_stat, 175 | "Tags: %s" % ", ".join("`%s`" % tag for tag in run.tags), 176 | '' if not local_ds else tnc, 177 | "Steps:" 178 | if total_steps <= max_steps 179 | else f"Showing {max_steps}/{total_steps} Steps:", 180 | ] 181 | 182 | return "\n".join(head) 183 | namespace(None) 184 | run = Run(run_id) 185 | steps = list(run) 186 | attachments,local_ds = step_resolver(steps) 187 | resolved_run_info = make_resolved_run(run, total_steps=len(steps),local_ds=local_ds) 188 | obj.reply(resolved_run_info,attachments=attachments) 189 | 190 | 191 | def howto_inspect_run(resolver): 192 | return ( 193 | "Use `inspect` to specify the run to inspect. The run " 194 | "can be running currently (it does not have to be finished) or " 195 | "it can be any historical run. %s" % resolver.howto() 196 | ) 197 | -------------------------------------------------------------------------------- /metaflowbot/actions/runs/run_resolver.py: -------------------------------------------------------------------------------- 1 | import math 2 | import re 3 | from collections import namedtuple 4 | from datetime import datetime 5 | 6 | import timeago 7 | from metaflow import Flow, namespace 8 | from metaflow.exception import MetaflowNotFound 9 | 10 | 11 | ResolvedRun = namedtuple("ResolvedRun", ["id", "flow", "who", "when"]) 12 | 13 | ResolvedStep = namedtuple( 14 | "ResolvedStep", 15 | [ 16 | "num_tasks", 17 | "name", 18 | "started_on", 19 | "finished_at", 20 | "step_runtime", 21 | ], 22 | ) 23 | 24 | 25 | class RunResolverException(Exception): 26 | def __init__(self, flow): 27 | self.flow = flow 28 | 29 | def __str__(self): 30 | return "Couldn't find the run. :expressionless: " 31 | 32 | 33 | class RunNotFound(RunResolverException): 34 | def __init__(self, flow): 35 | super().__init__(flow) 36 | 37 | 38 | class RunSyntaxError(RunResolverException): 39 | def __init__(self, command): 40 | self.command = command 41 | 42 | def __str__(self): 43 | return ( 44 | "Hmm, I am not sure what you mean. Type " 45 | "`how to %s` for help." % self.command 46 | ) 47 | 48 | 49 | class FlowNotFound(RunResolverException): 50 | def __str__(self): 51 | return ( 52 | "Flow `%s` not found. Note that flow names are " 53 | "case-sensitive." % self.flow 54 | ) 55 | 56 | 57 | class NoRuns(RunResolverException): 58 | def __init__(self, flow, command): 59 | self.flow = flow 60 | self.command = command 61 | 62 | def __str__(self): 63 | return ( 64 | "I couldn't find any runs with the given spec. " 65 | "You can see a list of runs if you specify just the " 66 | "flow name, `%s %s`." % (self.command, self.flow) 67 | ) 68 | 69 | 70 | STYLES = [ # [Run/ID] 71 | "(?P[a-z0-9_]+)/(?P[a-z0-9_\-]+)", 72 | # (someone's / the) (latest run of) [flow], (tagged tag) 73 | "((?P[a-z0-9_\-]+)'s?|the)? ?(?Platest run of )?" 74 | "(?P[a-z0-9_]+),?( tagged (?P.+))?", 75 | ] 76 | 77 | PARSER = [re.compile(x, re.IGNORECASE) for x in STYLES] 78 | 79 | 80 | 81 | def running_time(run): 82 | try: 83 | if run.finished: 84 | if run.successful: 85 | mins = ( 86 | run.finished_at - run.created_at 87 | ).total_seconds() / 60 88 | return mins 89 | except: 90 | pass 91 | return None 92 | 93 | 94 | def datetime_response_parsing(secs): 95 | if secs < 60: 96 | return "%d %s" % (secs,'second' if secs == 1 else 'seconds') 97 | elif secs < (60 * 60): # If less than one hour 98 | return "%d %s" % (secs / 60, 'minute' if int(secs / 60) == 1 else 'minutes') 99 | elif secs < (24 * 60 * 60): # If less than one day 100 | num_hours = math.floor(secs / (60 * 60)) 101 | num_mins = (secs % (60 * 60)) / 60 102 | hr_str = 'hours' if num_hours > 1 else 'hour' 103 | min_str = 'minutes' if num_mins > 1 else 'minute' 104 | return "%d %s and %d %s" % (num_hours, hr_str, num_mins, min_str) 105 | else: # More than a day 106 | num_days = math.floor(secs / (24 * 60 * 60)) 107 | num_hours = (secs % (24 * 60 * 60)) / (60 * 60) 108 | hr_str = 'hours' if num_hours > 1 else 'hour' 109 | day_str = 'days' if num_days > 1 else 'day' 110 | return "%d %s and %d %s" % (num_days, day_str, num_hours, hr_str) 111 | 112 | 113 | def step_runtime(tasks): 114 | # This is works with even local datastore. 115 | if tasks: 116 | try: 117 | end = [ 118 | t.finished_at for t in tasks if t.finished_at is not None 119 | ] 120 | if all(end) and len(end) > 0: 121 | secs = (max(end) - tasks[-1].created_at).total_seconds() 122 | return datetime_response_parsing(secs) 123 | except: 124 | pass 125 | return "?" 126 | 127 | 128 | class RunResolver(object): 129 | def __init__(self, command): 130 | self.command = command 131 | 132 | def resolve(self, msg, max_runs=5): 133 | match = None 134 | if msg.startswith(self.command): 135 | msg = msg[len(self.command) :].strip() 136 | match = list(filter(None, [p.match(msg) for p in PARSER])) 137 | if match: 138 | query = match[0].groupdict() 139 | runs = list(self._query(query, max_runs)) 140 | if runs: 141 | return runs 142 | else: 143 | raise NoRuns(query["flow"], self.command) 144 | else: 145 | raise RunSyntaxError(self.command) 146 | 147 | def format_runs(self, runs, run_filter): 148 | msg = ["I found these runs:"] 149 | example = None 150 | for run in runs: 151 | exclude = run_filter(run) 152 | if not exclude and not example: 153 | example = run.id 154 | msg.append( 155 | " - {x}`{run.id}`{x} _by {run.who}, {when}_ {reason}".format( 156 | run=run, 157 | when=timeago.format(run.when, now=datetime.utcnow()), 158 | x="~" if exclude else "", 159 | reason="(%s)" % exclude if exclude else "", 160 | ) 161 | ) 162 | if example: 163 | msg.append( 164 | "Choose one of the run IDs above by writing e.g. " 165 | "`%s %s`" % (self.command, example) 166 | ) 167 | else: 168 | msg.append( 169 | "It seems none of these runs were eligible. Try " 170 | "another query (try `how to %s` for ideas)" % self.command 171 | ) 172 | return "\n".join(msg) 173 | 174 | def _query(self, query, max_runs): 175 | def _resolved_run(run): 176 | return ResolvedRun( 177 | id=run.pathspec, 178 | who=find_user(run), 179 | flow=run.pathspec.split("/")[0], 180 | when=run.created_at, 181 | ) 182 | 183 | try: 184 | namespace(None) 185 | flow = Flow(query["flow"]) 186 | except MetaflowNotFound: 187 | raise FlowNotFound(query["flow"]) 188 | 189 | runid = query.get("runid") 190 | if runid: 191 | try: 192 | runs = [flow[runid]] 193 | except KeyError: 194 | raise RunNotFound(flow) 195 | else: 196 | tags = [] 197 | if query.get("tag"): 198 | tags.append(query["tag"]) 199 | if query.get("user"): 200 | tags.append("user:" + query["user"]) 201 | runs = list(flow.runs(*tags)) 202 | if query.get("latest"): 203 | runs = runs[:1] 204 | return map(_resolved_run, runs[:max_runs]) 205 | 206 | def howto(self): 207 | return ( 208 | "There are a number of ways to refer to an existing run:\n" 209 | " - Use an existing run ID: `{cmd} HelloFlow/12`.\n" 210 | " - Use a flow name: `{cmd} HelloFlow`.\n" 211 | " - Use a flow name with a user: `{cmd} dberg's HelloFlow`.\n" 212 | " - Use the latest run of a user: `{cmd} dberg's latest run of HelloFlow`.\n" 213 | " - Use the latest run by anyone: `{cmd} the latest run of HelloFlow`.\n" 214 | "You can filter by a tag by appending `tagged some_tag` in any of the " 215 | "expressions above except the first one. If there are multiple " 216 | "eligible runs, I will show you a list of run IDs to choose from.".format( 217 | cmd=self.command 218 | ) 219 | ) 220 | 221 | 222 | def find_user(run): 223 | usrlst = [tag for tag in run.tags if tag.startswith("user:")] 224 | if usrlst: 225 | return usrlst[0][5:] 226 | else: 227 | return "unknown" 228 | 229 | 230 | if __name__ == "__main__": 231 | import sys 232 | 233 | print("\n".join(map(str, RunResolver("use code from").resolve(sys.argv[1])))) 234 | -------------------------------------------------------------------------------- /deployment/mfbot-cfn-template.yml: -------------------------------------------------------------------------------- 1 | Description: Cloudformation Stack for Deploying Metaflowbot 2 | Parameters: 3 | AdminEmailAddress: 4 | Description: Email address of the admin user in the slack workspace 5 | Type: String 6 | MetadataServiceAuthParameterKey: 7 | Default: METADATASERVICE_AUTH_KEY 8 | Description: Key for Metadata service auth parameter in Secrets Manager. 9 | Type: String 10 | MetadataServiceUrl: 11 | Description: URL of the metadata service 12 | Type: String 13 | MetaflowDatastoreSysrootS3: 14 | Description: 'Amazon S3 URL for Metaflow DataStore ' 15 | Type: String 16 | MetaflowbotSecretsManagerARN: 17 | Description: ARN of the secret holding Metaflowbot credentials in Secrets Manager 18 | Type: String 19 | SlackAppTokenParameterKey: 20 | Default: SLACK_APP_TOKEN_KEY 21 | Description: Key for SLACK_APP_TOKEN parameter in Secrets Manager. 22 | Type: String 23 | SlackBotTokenParameterKey: 24 | Default: SLACK_BOT_TOKEN_KEY 25 | Description: Key for SLACK_BOT_TOKEN parameter in Secrets Manager. 26 | Type: String 27 | Resources: 28 | EcsClusterRole: 29 | Properties: 30 | AssumeRolePolicyDocument: 31 | Statement: 32 | - Action: sts:AssumeRole 33 | Effect: Allow 34 | Principal: 35 | Service: ecs-tasks.amazonaws.com 36 | Version: '2012-10-17' 37 | ManagedPolicyArns: 38 | - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM 39 | Path: / 40 | Type: AWS::IAM::Role 41 | EcsTaskRole: 42 | Properties: 43 | AssumeRolePolicyDocument: 44 | Statement: 45 | - Action: sts:AssumeRole 46 | Effect: Allow 47 | Principal: 48 | Service: ecs-tasks.amazonaws.com 49 | Version: '2012-10-17' 50 | Path: / 51 | Type: AWS::IAM::Role 52 | InternetGatewayAttachment: 53 | Properties: 54 | InternetGatewayId: !Ref 'MetaflowbotInternetGateway' 55 | VpcId: !Ref 'MetaflowbotPublicVpc' 56 | Type: AWS::EC2::VPCGatewayAttachment 57 | MetaflowbotCluster: 58 | Type: AWS::ECS::Cluster 59 | MetaflowbotDeployment: 60 | Properties: 61 | Cluster: !Ref 'MetaflowbotCluster' 62 | DesiredCount: 1 63 | LaunchType: FARGATE 64 | NetworkConfiguration: 65 | AwsvpcConfiguration: 66 | AssignPublicIp: ENABLED 67 | SecurityGroups: 68 | - !Ref 'MetaflowbotSecurityGroup' 69 | Subnets: 70 | - !Ref 'MetaflowbotDeploymentSubnet' 71 | TaskDefinition: !Ref 'MetaflowbotTaskDefinition' 72 | Type: AWS::ECS::Service 73 | MetaflowbotDeploymentSubnet: 74 | Properties: 75 | AvailabilityZone: !Select 76 | - 0 77 | - !GetAZs 78 | Ref: AWS::Region 79 | CidrBlock: 10.0.0.0/24 80 | MapPublicIpOnLaunch: true 81 | VpcId: !Ref 'MetaflowbotPublicVpc' 82 | Type: AWS::EC2::Subnet 83 | MetaflowbotInternetGateway: 84 | Type: AWS::EC2::InternetGateway 85 | MetaflowbotLogGroup: 86 | Properties: 87 | LogGroupName: !Join 88 | - '' 89 | - - /ecs/ 90 | - !Ref 'AWS::StackName' 91 | - -metaflowbot 92 | Type: AWS::Logs::LogGroup 93 | MetaflowbotPublicVpc: 94 | Properties: 95 | CidrBlock: 10.0.0.0/16 96 | Type: AWS::EC2::VPC 97 | MetaflowbotSecretAccess: 98 | Properties: 99 | PolicyDocument: 100 | Statement: 101 | - Action: 102 | - secretsmanager:GetSecretValue 103 | Effect: Allow 104 | Resource: 105 | - !Ref 'MetaflowbotSecretsManagerARN' 106 | Sid: S3GetObject 107 | Version: '2012-10-17' 108 | PolicyName: Metaflowbot 109 | Roles: 110 | - !Ref 'EcsClusterRole' 111 | Type: AWS::IAM::Policy 112 | MetaflowbotSecurityGroup: 113 | Properties: 114 | GroupDescription: Allow All In and outbound traffic 115 | SecurityGroupEgress: 116 | - CidrIp: '0.0.0.0/0' 117 | FromPort: 0 118 | IpProtocol: tcp 119 | ToPort: 65534 120 | VpcId: !Ref 'MetaflowbotPublicVpc' 121 | Type: AWS::EC2::SecurityGroup 122 | MetaflowbotTaskDefinition: 123 | Properties: 124 | ContainerDefinitions: 125 | - Environment: 126 | - Name: ADMIN_USER_ADDRESS 127 | Value: !Ref 'AdminEmailAddress' 128 | - Name: USERNAME 129 | Value: slackbot 130 | - Name: METAFLOW_SERVICE_URL 131 | Value: !Ref 'MetadataServiceUrl' 132 | - Name: METAFLOW_DATASTORE_SYSROOT_S3 133 | Value: !Ref 'MetaflowDatastoreSysrootS3' 134 | - Name: METAFLOW_DEFAULT_DATASTORE 135 | Value: s3 136 | - Name: METAFLOW_DEFAULT_METADATA 137 | Value: service 138 | Essential: true 139 | Image: outerbounds/metaflowbot 140 | LogConfiguration: 141 | LogDriver: awslogs 142 | Options: 143 | awslogs-group: !Join 144 | - '' 145 | - - /ecs/ 146 | - !Ref 'AWS::StackName' 147 | - -metaflowbot 148 | awslogs-region: !Ref 'AWS::Region' 149 | awslogs-stream-prefix: ecs 150 | Name: metaflowbot 151 | Secrets: 152 | - Name: METAFLOW_SERVICE_AUTH_KEY 153 | ValueFrom: !Join 154 | - '' 155 | - - !Ref 'MetaflowbotSecretsManagerARN' 156 | - ':' 157 | - !Ref 'MetadataServiceAuthParameterKey' 158 | - '::' 159 | - Name: SLACK_APP_TOKEN 160 | ValueFrom: !Join 161 | - '' 162 | - - !Ref 'MetaflowbotSecretsManagerARN' 163 | - ':' 164 | - !Ref 'SlackAppTokenParameterKey' 165 | - '::' 166 | - Name: SLACK_BOT_TOKEN 167 | ValueFrom: !Join 168 | - '' 169 | - - !Ref 'MetaflowbotSecretsManagerARN' 170 | - ':' 171 | - !Ref 'SlackBotTokenParameterKey' 172 | - '::' 173 | Cpu: '4096' 174 | ExecutionRoleArn: !GetAtt 'EcsClusterRole.Arn' 175 | Memory: '8192' 176 | NetworkMode: awsvpc 177 | RequiresCompatibilities: 178 | - FARGATE 179 | TaskRoleArn: !GetAtt 'EcsTaskRole.Arn' 180 | Type: AWS::ECS::TaskDefinition 181 | PolicyEcr: 182 | Properties: 183 | PolicyDocument: 184 | Statement: 185 | - Action: 186 | - ecr:GetAuthorizationToken 187 | Effect: Allow 188 | Resource: 189 | - '*' 190 | - Action: 191 | - ecr:GetDownloadUrlForLayer 192 | - ecr:BatchGetImage 193 | - ecr:BatchCheckLayerAvailability 194 | - logs:CreateLogStream 195 | - logs:PutLogEvents 196 | Effect: Allow 197 | Resource: 198 | - '*' 199 | Sid: AllowPull 200 | Version: '2012-10-17' 201 | PolicyName: MetaflowbotEcrPolicy 202 | Roles: 203 | - !Ref 'EcsClusterRole' 204 | Type: AWS::IAM::Policy 205 | PublicDefaultRoute: 206 | Properties: 207 | DestinationCidrBlock: '0.0.0.0/0' 208 | GatewayId: !Ref 'MetaflowbotInternetGateway' 209 | RouteTableId: !Ref 'PublicRouteTable' 210 | Type: AWS::EC2::Route 211 | PublicRouteAssociation: 212 | Properties: 213 | RouteTableId: !Ref 'PublicRouteTable' 214 | SubnetId: !Ref 'MetaflowbotDeploymentSubnet' 215 | Type: AWS::EC2::SubnetRouteTableAssociation 216 | PublicRouteTable: 217 | Properties: 218 | VpcId: !Ref 'MetaflowbotPublicVpc' 219 | Type: AWS::EC2::RouteTable 220 | S3AccessPolicy: 221 | Properties: 222 | PolicyDocument: 223 | Statement: 224 | - Action: 225 | - s3:GetObject 226 | - s3:ListBucket 227 | Effect: Allow 228 | Resource: 229 | - !Join 230 | - '' 231 | - - !Join 232 | - '' 233 | - - 'arn:aws:s3:::' 234 | - !Select 235 | - 1 236 | - !Split 237 | - s3:// 238 | - !Ref 'MetaflowDatastoreSysrootS3' 239 | - /* 240 | Sid: S3GetObject 241 | Version: '2012-10-17' 242 | PolicyName: MetaflowbotS3AccessPolicy 243 | Roles: 244 | - !Ref 'EcsTaskRole' 245 | Type: AWS::IAM::Policy 246 | 247 | -------------------------------------------------------------------------------- /metaflowbot/slack_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import partial 3 | from itertools import islice 4 | from queue import Empty, Queue 5 | from threading import Event, Thread 6 | from typing import List 7 | 8 | from slack_sdk import WebClient 9 | from slack_sdk.socket_mode import SocketModeClient 10 | from slack_sdk.socket_mode.request import SocketModeRequest 11 | from slack_sdk.socket_mode.response import SocketModeResponse 12 | 13 | from .exceptions import MFBException 14 | 15 | NUM_RETRIES = 3 16 | MIN_RTM_EVENTS_INTERVAL = 1 17 | 18 | 19 | class MFBInvalidPermalink(MFBException): 20 | headlink = "Invalid Slack permalink" 21 | 22 | def __init__(self, url): 23 | super(MFBInvalidPermalink, self).__init__("Invalid permalink: %s" % url) 24 | 25 | 26 | class MFBUserNotFound(MFBException): 27 | headline = "User not found" 28 | 29 | def __init__(self, user): 30 | super(MFBUserNotFound, self).__init__("User not found: %s" % user) 31 | 32 | 33 | class MFBChannelNotFound(MFBException): 34 | headline = "Channel not found" 35 | 36 | def __init__(self, chan): 37 | super(MFBChannelNotFound, self).__init__("Channel not found: %s" % chan) 38 | 39 | 40 | class MFBClientException(MFBException): 41 | headline = "Slack client failed" 42 | traceback = True 43 | 44 | def __init__(self, method, args, resp=None): 45 | lst = ", ".join("%s=%s" % x for x in args.items()) 46 | msg = "Request '%s' with args %s failed" % (method, lst) 47 | if resp: 48 | msg += ". Unknown response: %s" % resp 49 | self.resp = resp 50 | super(MFBClientException, self).__init__(msg) 51 | 52 | def __str__(self): 53 | return self.msg 54 | 55 | 56 | class MFBRequestFailed(MFBClientException): 57 | pass 58 | 59 | 60 | class MFBRateLimitException(MFBClientException): 61 | pass 62 | 63 | 64 | class SlackMessageQueue(Queue): 65 | """SlackMessageQueue 66 | Message Queue to hold all Slack messages from `SlackSocketSubscriber`. 67 | """ 68 | 69 | def __init__(self, maxsize: int = 0) -> None: 70 | super().__init__(maxsize=maxsize) 71 | 72 | def injest(self, messages: List[dict]) -> None: 73 | """injest 74 | Push Multiple messsage to Queue. \ 75 | Queue.put is by default Blocking to avoid Loosing messages and has Mutex's internally. 76 | """ 77 | for m in messages: 78 | self.put(m) 79 | 80 | def flush(self) -> List[dict]: 81 | """flush 82 | Take all messages from the Queue and return to the caller. 83 | Essentially Empty Everything. 84 | https://stackoverflow.com/questions/8196254/how-to-iterate-queue-queue-items-in-python 85 | """ 86 | try: 87 | queue_items = [] 88 | while True: 89 | queue_items.append(self.get_nowait()) 90 | except Empty: 91 | pass 92 | return queue_items 93 | 94 | 95 | def process( 96 | message_event_queue: SlackMessageQueue, 97 | client: SocketModeClient, 98 | req: SocketModeRequest, 99 | ): 100 | if req.type == "events_api": 101 | # Slash commands will have a different req.type 102 | """ 103 | # Permisssions for : 104 | - https://api.slack.com/events/message.im 105 | - https://api.slack.com/events/app_mention 106 | Payload from different events can be found : https://api.slack.com/events/ 107 | Current `req.payload['event'].type == app_mention | message` 108 | """ 109 | # ! acknowledgement is needed to ensure messages are not Double sent 110 | # Acknowledge the request anyway 111 | response = SocketModeResponse(envelope_id=req.envelope_id) 112 | client.send_socket_mode_response(response) 113 | message_event_queue.injest([req.payload["event"]]) 114 | 115 | 116 | class SlackSocketSubscriber(Thread): 117 | """SlackSocketSubscriber 118 | This will be a daemon thread that will connect to slack and subscribe to the message feed via the `SocketModeClient` 119 | This will use a message queue to keep sending messages to the main thread. 120 | 121 | This is a daemon thread because it should die when the program shuts and we don't care much about it once it starts 122 | """ 123 | 124 | def __init__( 125 | self, 126 | app_token, 127 | message_event_queue: Queue, 128 | ) -> None: 129 | assert app_token is not None 130 | super().__init__(daemon=True) 131 | self.message_event_queue = message_event_queue 132 | self._app_token = app_token 133 | 134 | def run(self) -> None: 135 | """run 136 | wire up the socket feed and the function to filter flush items to queue 137 | """ 138 | self.sc = SocketModeClient( 139 | # This app-level token will be used only for establishing a connection 140 | app_token=self._app_token, # xapp-A111-222-xyz 141 | ) 142 | # Bind the queue to ensure Message Paassig. 143 | subscriber_func = partial(process, self.message_event_queue) 144 | self.sc.socket_mode_request_listeners.append(subscriber_func) 145 | # Establish a WebSocket connection to the Socket Mode servers 146 | self.sc.connect() 147 | Event().wait() 148 | 149 | 150 | def create_slack_subscriber(app_token): 151 | message_event_queue = SlackMessageQueue() 152 | socket_thread = SlackSocketSubscriber(app_token, message_event_queue) 153 | socket_thread.start() 154 | return message_event_queue, socket_thread 155 | 156 | 157 | class MFBSlackClientV2(object): 158 | """MFBSlackClientV2 159 | Replaces the `slack_client` with `slack_sdk` 160 | - Leverages the `WebClient` and the `SocketModeClient` (With RTM Message subscriber) 161 | 162 | `slack_sdk` Socket Management: 163 | https://slack.dev/python-slack-sdk/socket-mode/index.html#socketmodeclient 164 | 165 | `slack_sdk` WebClient : 166 | https://slack.dev/python-slack-sdk/web/index.html 167 | 168 | `SocketModeClient` leverages a Message queue which gets the RTM events. 169 | 170 | """ 171 | 172 | def __init__(self, slack_token, slack_app_token=None) -> None: 173 | assert slack_token is not None 174 | self.sc = WebClient(token=slack_token) # xoxb-111-222-xyz 175 | self._app_token = slack_app_token 176 | self.rtm_connected = False 177 | self._slack_token = slack_token 178 | self._last_rtm_events = 0 179 | self._rmt_feed_queue = None 180 | self._socket_tread = None 181 | 182 | @property 183 | def token(self): 184 | return self._slack_token 185 | 186 | def bot_name(self): 187 | return self.sc.auth_test()["user"] 188 | 189 | def bot_user_id(self): 190 | # permission : auth.test 191 | return self.sc.auth_test()["user_id"] 192 | 193 | def post_message(self, msg, channel, thread=None, attachments=None, blocks=None): 194 | # This function is important because the CLI wrapper with click and the 195 | # MFBServer will use this to put messages in admin thread and actual user threads. 196 | args = {"channel": channel} 197 | if msg is not None: 198 | args["text"] = msg 199 | if attachments: 200 | args["attachments"] = json.dumps(attachments) 201 | if thread: 202 | args["thread_ts"] = thread 203 | if blocks is not None: 204 | args["blocks"] = blocks 205 | return self.sc.chat_postMessage(**args)["ts"] 206 | 207 | def _connect(self): 208 | # Instantiate event reader over here. 209 | if not self.rtm_connected: 210 | self._rmt_feed_queue, self._socket_tread = create_slack_subscriber( 211 | self._app_token 212 | ) 213 | self.rtm_connected = True 214 | return self.rtm_connected 215 | 216 | def rtm_events(self): 217 | """rtm_events 218 | this method is only called by the server to get the get the realtime 219 | events from slack. 220 | 221 | Version 1 used SlackClient.rtm_read() to retrieve the messages from slack with a 222 | timeout. 223 | 224 | Verison 2 is using SocketMode and in that light is it better to create a thread 225 | along with a message queue to keep flushing information socket consumer and the 226 | main server thread. 227 | 228 | This version also tries to connect to many 229 | """ 230 | self._connect() 231 | return self._rmt_feed_queue.flush() 232 | 233 | def im_channel(self, user): 234 | # Older API : im.open --> Deprecated 235 | # New API : conversations.open 236 | # SCOPE: mpim:write 237 | # SCOPE: im:write 238 | # SCOPE: groups:write 239 | # SCOPE: channels:manage 240 | try: 241 | return self.sc.conversations_open(users=user)["channel"]["id"] 242 | except MFBRequestFailed as ex: 243 | if ex.resp["error"] == "user_not_found": 244 | raise MFBUserNotFound(user) 245 | else: 246 | raise 247 | 248 | def user_by_email(self, email): 249 | # permission : users.lookupByEmail 250 | try: 251 | return self.sc.users_lookupByEmail(email=email)["user"]["id"] 252 | except MFBRequestFailed as ex: 253 | if ex.resp["error"] == "users_not_found": 254 | raise MFBUserNotFound(email) 255 | else: 256 | raise 257 | 258 | def past_events(self, channel, **opts): 259 | # API : conversations.history 260 | # SCOPE : channels:history 261 | # SCOPE : groups:history 262 | # SCOPE : im:history 263 | # SCOPE : mpim:history 264 | events = self._page_iter( 265 | self.sc.conversations_history, "messages", channel=channel 266 | ) 267 | return self._format_history(events, **opts) 268 | 269 | def past_replies(self, channel, thread, **opts): 270 | # API: conversations.replies 271 | # SCOPE : channels:history 272 | # SCOPE : groups:history 273 | # SCOPE : im:history 274 | # SCOPE : mpim:history 275 | events = self._page_iter( 276 | self.sc.conversations_replies, "messages", channel=channel, ts=thread 277 | ) 278 | return ( 279 | event 280 | for event in self._format_history(events, **opts) 281 | if "reply_count" not in event 282 | ) 283 | 284 | def _format_history(self, events, max_number=None, sort_key="ts"): 285 | if max_number is not None: 286 | events = islice(events, max_number) 287 | if sort_key: 288 | events = sorted(events, key=lambda x: x[sort_key]) 289 | return events 290 | 291 | def _page_iter(self, method, it_field, **args): 292 | # Iteartor for getting paginated data 293 | args["limit"] = 200 294 | while True: 295 | resp = method(**args) 296 | for item in resp[it_field]: 297 | yield item 298 | cursor = None 299 | if "response_metadata" in resp: 300 | cursor = resp["response_metadata"].get("next_cursor") 301 | if cursor: 302 | args["cursor"] = cursor 303 | else: 304 | break 305 | -------------------------------------------------------------------------------- /metaflowbot/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import subprocess 4 | import sys 5 | import time 6 | import traceback 7 | from collections import namedtuple 8 | 9 | from .exceptions import MFBException 10 | from .slack_client import MFBSlackClientV2 11 | from .state import MFBState 12 | 13 | # When a monitored process disappears, wait this many 14 | # seconds before announcing it as a failed, lost process. 15 | # In the normal case the monitored process sends a status 16 | # update before LOST_PROCESS_LIMIT seconds have passed. 17 | LOST_PROCESS_LIMIT = 30 18 | 19 | Event = namedtuple( 20 | "Event", 21 | [ 22 | "type", 23 | "msg", 24 | "user", 25 | "user_name", 26 | "chan", 27 | "chan_name", 28 | "ts", 29 | "thread_ts", 30 | "is_im", 31 | "is_mention", 32 | "is_direct", 33 | ], 34 | ) 35 | 36 | class StateNotFound(MFBException): 37 | pass 38 | 39 | class FormatFriendlyDict(object): 40 | def __init__(self, data): 41 | self.data = data 42 | 43 | def __getitem__(self, k): 44 | v = self.data.get(k) 45 | if v is None: 46 | return "" 47 | elif isinstance(v, dict): 48 | return json.dumps(v) 49 | else: 50 | return v 51 | 52 | 53 | class MFBServer(object): 54 | def __init__( 55 | self, slack_client: MFBSlackClientV2, admin_email, rules, logger, action_user 56 | ): 57 | self.sc = slack_client 58 | self.state = MFBState() 59 | self.dm_token = "<@%s>" % self.sc.bot_user_id() 60 | self.admin = self.sc.user_by_email(admin_email) 61 | self.admin_chan = self.sc.im_channel(self.admin) 62 | self.admin_thread = None 63 | self.rules = rules 64 | self.logger = logger 65 | self.action_user = action_user 66 | self._lost_processes = {} 67 | 68 | def new_admin_thread(self): 69 | ts = self.sc.post_message( 70 | self.state.message_new_admin_thread(), self.admin_chan 71 | ) 72 | # sc.past_events considers only replies, so we need a reply in 73 | # the thread. Let's send a noop reply. 74 | self.sc.post_message(self.state.message_noop(), self.admin_chan, thread=ts) 75 | 76 | def reconstruct_state(self): 77 | """reconstruct_state 78 | On restart, State is reconstructed using a slack channel that has 79 | the dump of all messages. 80 | 81 | :raises StateNotFound: [description] 82 | """ 83 | for event in self._state_event_log(): 84 | self._update_state(event) 85 | self.admin_thread = event.thread_ts 86 | if self.admin_thread is None: 87 | raise StateNotFound( 88 | "Could not find a state thread. " "Restart with --new-admin-thread." 89 | ) 90 | 91 | def _state_event_log(self): 92 | for top_event in self.sc.past_events(self.admin_chan): 93 | if self.state.is_admin_thread_parent(top_event["text"]): 94 | thread_ts = top_event["ts"] 95 | event_iter = self.sc.past_replies(self.admin_chan, thread_ts) 96 | yield from self._make_events(event_iter, admin_thread=thread_ts) 97 | 98 | def _log_event(self, event): 99 | head = "{0.type}" 100 | if event.type != "state_change": 101 | chan_field = "{0.chan}" 102 | if event.chan_name is not None: 103 | chan_field += " (#{0.chan_name})" 104 | elif event.chan[0] == "D": 105 | chan_field += " (direct)" 106 | user_field = "{0.user}" 107 | if event.user_name is not None: 108 | user_field += " (@{0.user_name})" 109 | head += " %s %s {0.thread_ts}" % (chan_field, user_field) 110 | head += " > " 111 | 112 | self.logger( 113 | event.msg, head=head.format(event), system_msg=(event.type == "state") 114 | ) 115 | 116 | def loop_forever(self): 117 | while True: 118 | for event in self._make_events(self.sc.rtm_events()): 119 | self._log_event(event) 120 | if event.type == "state_change": 121 | self._update_state(event) 122 | self._apply_rule(event) 123 | time.sleep(1) 124 | 125 | 126 | def _make_events(self, event_iter, admin_thread=None): 127 | """_make_events [summary] 128 | Makes custom Event object from slack events. 129 | """ 130 | if admin_thread is None: 131 | admin_thread = self.admin_thread 132 | for ev in event_iter: 133 | try: 134 | if ev["type"] == "message" or ev["type"] == "app_mention": 135 | thread_ts = ev.get("thread_ts") 136 | chan = ev.get("channel") 137 | msg = ev.get("text", "") 138 | user = ev.get("user") 139 | ts = ev.get("ts") 140 | is_im = chan and chan[0] == "D" 141 | is_mention = msg and self.dm_token in msg 142 | mfb_type = None 143 | 144 | if is_mention: 145 | # get rid of @metaflow 146 | msg = msg.replace(self.dm_token, "") 147 | 148 | # Type 1: State messages in the state thread. 149 | # Unfortunately Slack doesn't give us the channel ID, 150 | # so we must ensure that this is a state message 151 | # otherwise. 152 | if thread_ts == admin_thread and self.state.is_state_message(msg): 153 | mfb_type = "state_change" 154 | 155 | # To identify messsagae by bots we can now just check for bot_id 156 | elif "bot_id" not in ev: 157 | # Type 2: User messages in an existing thread. We 158 | # ignore MFB's replies in existing threads. 159 | if self.state.is_known_thread(chan, thread_ts): 160 | mfb_type = "user_message" 161 | 162 | # Type 3: Messages mentioning @metaflow that are not 163 | # in existing threads. They activate new threads. 164 | elif is_mention or is_im: 165 | mfb_type = "new_thread" 166 | if not thread_ts: 167 | # this is not an existing thread, so the thread_ts 168 | # is the parent event ts. 169 | thread_ts = ts 170 | if mfb_type: 171 | yield Event( 172 | type=mfb_type, 173 | msg=msg.strip(), 174 | is_mention=is_mention, 175 | user=user, 176 | user_name=self.state.user_name(user), 177 | chan=chan, 178 | is_im=is_im, 179 | is_direct=is_im or is_mention, 180 | chan_name=self.state.channel_name(chan), 181 | ts=ts, 182 | thread_ts=thread_ts, 183 | ) 184 | except GeneratorExit: 185 | pass 186 | except: 187 | traceback.print_exc() 188 | self.logger( 189 | str(ev), head="Ignored a bad message: ", system_msg=True, bad=True 190 | ) 191 | 192 | def _update_state(self, event): 193 | if not self.state.update(event): 194 | self.logger( 195 | str(event), 196 | head="Ignored a bad state message: ", 197 | system_msg=True, 198 | bad=True, 199 | ) 200 | 201 | def _apply_rule(self, event): 202 | match = self.rules.match(event, self.state) 203 | if match: 204 | rule_name, action, msg_groups, context_update = match 205 | self.logger(rule_name, head=" -> Invoking rule: ") 206 | # if the rule matched a state change event, we want to 207 | # send replies to the originating thread, not to the 208 | # admin thread of the event 209 | reply_thread = self.state.get_thread(event) 210 | self._take_action(event, reply_thread, msg_groups, **action) 211 | if context_update: 212 | # ephemeral context update. Normally thread state (context) 213 | # gets updated via events in the admin thread but there are 214 | # a few cases where we need to update the state in the same 215 | # "transaction" with rule evaluation, e.g. to prevent the 216 | # same rule being invoked multiple times. This code path 217 | # serves this purpose. 218 | self.state.update_thread(reply_thread, context_update) 219 | 220 | def _take_action( 221 | self, event, reply_thread="", msg_groups="", op=None, **action_spec 222 | ): 223 | 224 | # click demands that local is set to UTF-8 225 | env = {"LANG": "C.UTF-8", "LC_ALL": "C.UTF-8"} 226 | 227 | # use a custom cache location, to make sure our 228 | # action user has permissions to use it 229 | env["METAFLOW_CLIENT_CACHE_PATH"] = "metaflow_client_cache" 230 | 231 | if "PATH" in os.environ: 232 | # sys.executable does not work in subprocesses 233 | # if PATH is not set 234 | env["PATH"] = os.environ["PATH"] 235 | 236 | if "PYTHONPATH" in os.environ: 237 | env["PYTHONPATH"] = os.environ["PYTHONPATH"] 238 | 239 | # Use the Parent server thread's environment variables. 240 | env.update({k: os.environ[k] for k in os.environ if k not in env}) 241 | cmd = [] 242 | # We don't need app-token because only calling 243 | # server needs an app token. 244 | cmd += [ 245 | sys.executable, 246 | "-m", 247 | "metaflowbot", 248 | "--slack-bot-token", 249 | self.sc.token, 250 | "--admin-thread", 251 | "%s:%s" % (self.admin_chan, self.admin_thread), 252 | "--reply-thread", 253 | reply_thread, 254 | ] 255 | 256 | cmd.extend(("action", op)) 257 | context = FormatFriendlyDict(self.state.get_thread_state(reply_thread)) 258 | try: 259 | for k, v in action_spec.items(): 260 | if isinstance(v, bool): 261 | cmd.append("--%s%s" % ("" if v else "no-", k)) 262 | else: 263 | tmpl = str(v).format( 264 | event=event, context=context, message_group=msg_groups 265 | ) 266 | # note: empty strings are currently not supported as valid 267 | # values, since we need them to denote missing values 268 | if tmpl: 269 | cmd.extend(("--%s" % k, tmpl)) 270 | subprocess.Popen( 271 | cmd, 272 | env=env, 273 | # don't inherit signals (esp SIGINT) in children 274 | preexec_fn=os.setpgrp, 275 | ) 276 | except: 277 | traceback.print_exc() 278 | self.logger( 279 | str(cmd), 280 | head="Taking action (%s) failed: " % op, 281 | system_msg=True, 282 | bad=True, 283 | ) 284 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Netflix, Inc., Step Computing, Inc 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/deployment.md: -------------------------------------------------------------------------------- 1 | # Deploying the Metaflowbot 2 | 3 | Deploying the Metaflowbot involves - 4 | 1. [Setting up the Metaflowbot on Slack](#setting-up-the-metaflowbot-on-slack), and 5 | 2. [Running the Metaflowbot server](#running-the-metaflowbot-server) 6 | - locally 7 | - [with pip](#locally-with-pip) 8 | - [via a docker image](#locally-via-a-docker-image) 9 | - on AWS: 10 | - [with AWS CloudFormation](#on-aws-with-aws-cloudformation) 11 | - [manually](#on-aws-manually) 12 | 13 | ## Setting up the Metaflowbot on Slack 14 | 15 | 1. [Create an App on Slack UI](https://api.slack.com/apps) using the provided [manifest](../manifest.yml). The default name of the Metaflowbot is `@flowey`. To customize the name of the Metaflowbot, change `display_information.name` and `bot_user.display_name` in the [manifest](../manifest.yml). 16 | 17 | ![](images/slacksetup.png) 18 | 19 | 2. Install the App 20 | ![](images/app_install.png) 21 | 22 | 3. Generate an App token (`SLACK_APP_TOKEN`): This token allows the Metaflowbot to make a socket connection to Slack and will be used later to configure the bot. 23 | ![](images/app-token.png) 24 | 25 | 4. Generate Bot token (`SLACK_BOT_TOKEN`) : This token allows the Metaflowbot to make web API calls and will be used later to configure the bot. 26 | ![](images/bot-token.png) 27 | 28 | 5. Here is a walkthrough 29 | ![ezgif-6-e492971fc733](https://user-images.githubusercontent.com/763451/128785552-42fd0e17-ea91-40b4-9183-ee98c87a0ed5.gif) 30 | 31 | 32 | ## Running the Metaflowbot server 33 | 34 | ### locally with pip 35 | 36 | The Metaflowbot server is available as a [pip package from PyPI](https://pypi.org/project/metaflowbot/) and can be directly invoked. 37 | 38 | 1. Install `metaflowbot` Python package from PyPI 39 | 40 | ```sh 41 | pip install metaflowbot 42 | pip install metaflowbot-actions-jokes # Optional dependency 43 | ``` 44 | 45 | 2. Launch the Metaflowbot server by providing `--admin` argument with the email address of your slack account; Metaflowbot will open a message thread with you to maintain it's state (as a poor man's database). Replace `SLACK_APP_TOKEN` & `SLACK_BOT_TOKEN` with the values obtained while [setting up the Metaflowbot on Slack](#setting-up-the-metaflowbot-on-slack). 46 | 47 | ```sh 48 | SLACK_APP_TOKEN=xapp-foo SLACK_BOT_TOKEN=xoxb-bar python -m metaflowbot server --admin me@server.com 49 | ``` 50 | 51 | ### locally via a docker image 52 | 53 | The Metaflowbot server is also available as a docker image from [Docker Hub](https://hub.docker.com/repository/docker/outerbounds/metaflowbot). There are multiple ways to configure the image; just ensure that `ADMIN_USER_ADDRESS` environment variable points to your email address in the Slack workspace - 54 | 55 | - through environment variables 56 | ```sh 57 | docker run -i -t --rm \ 58 | -e SLACK_BOT_TOKEN=$(echo $SLACK_BOT_TOKEN) \ 59 | -e ADMIN_USER_ADDRESS=admin@server.com \ 60 | -e SLACK_APP_TOKEN=$(echo $SLACK_APP_TOKEN) \ 61 | -e AWS_SECRET_ACCESS_KEY=$(echo $AWS_SECRET_ACCESS_KEY) \ 62 | -e AWS_ACCESS_KEY_ID=$(echo $AWS_ACCESS_KEY_ID) \ 63 | -e USERNAME=metaflowbot \ 64 | -e METAFLOW_SERVICE_AUTH_KEY=$(echo $METAFLOW_SERVICE_AUTH_KEY) \ 65 | -e METAFLOW_SERVICE_URL=$(echo $METAFLOW_SERVICE_URL) \ 66 | -e METAFLOW_DATASTORE_SYSROOT_S3=$(echo $METAFLOW_DATASTORE_SYSROOT_S3) \ 67 | -e METAFLOW_DEFAULT_DATASTORE=s3 \ 68 | -e METAFLOW_DEFAULT_METADATA=service \ 69 | outerbounds/metaflowbot 70 | ``` 71 | 72 | - through `~/.metaflowconfig`. 73 | ```sh 74 | docker run -it \ 75 | -v ~/.metaflowconfig:/metaflowconfig --rm \ 76 | -e SLACK_BOT_TOKEN=$(echo $SLACK_BOT_TOKEN) \ 77 | -e ADMIN_USER_ADDRESS=admin@server.com \ 78 | -e SLACK_APP_TOKEN=$(echo $SLACK_APP_TOKEN) \ 79 | -e AWS_SECRET_ACCESS_KEY=$(echo $AWS_SECRET_ACCESS_KEY) \ 80 | -e AWS_ACCESS_KEY_ID=$(echo $AWS_ACCESS_KEY_ID) \ 81 | -e USERNAME=metaflowbot \ 82 | -e METAFLOW_HOME=/.metaflowconfig \ 83 | outerbounds/metaflowbot 84 | ``` 85 | 86 | ### on AWS with AWS CloudFormation 87 | Metaflow bot ships with an [AWS CloudFormation template](../deployment/mfbot-cfn-template.yml) that automates the deployment of all the necessary AWS resources. The template is provided in the [deployment](../deployment) folder. 88 | 89 | The major components of the template are: 90 | 1. AWS Identity and Access Management - Set policies for accessing cloud resources and secrets needed for deployment. 91 | 2. AWS VPC Networking - A VPC with public subnet and internet gateway to deploy Metaflowbot. 92 | 3. AWS VPC Security Groups - Outbound traffic access for Metaflowbot's container. 93 | 4. AWS ECS - Deploying the Metaflowbot's container as a [Fargate](https://aws.amazon.com/fargate/) task. 94 | 5. AWS SecretsManager - Access to secrets holding authentication information about Slack and Metadata service. 95 | 96 | Deploying the template requires a few auth tokens (for Slack and Metaflow Service); these need to be created in [AWS Secrets Manager](https://console.aws.amazon.com/secretsmanager) which are referenced in the CloudFormation template. 97 | 98 | 1. In your AWS Console for AWS Secrets Manager, create a secret with the auth tokens for Slack (`SLACK_APP_TOKEN`, `SLACK_BOT_TOKEN`, ) and Metaflow Service (`METAFLOW_SERVICE_AUTH_KEY`). Copy the ARN of the secret 99 | ![](./images/Secret-manager-setup.png) 100 | 101 | 2. Paste the ARN of the secret along with other metadata + s3 related deployment details. 102 | ![](./images/cfn-deploy.png) 103 | 104 | ### on AWS manually 105 | 106 | If you cannot use the [AWS CloudFormation template](../deployment/mfbot-cfn-template.yml), follow these steps for a manual deployment of Metaflowbot on AWS. 107 | 108 | Please note that Metaflow bot can re-use existing AWS resources - for example, your existing ECS cluster for container deployment. The instructions listed here will create these resources from scratch. If you have a strong background in administering AWS resources, you will notice that many of the security policies are fairly permissive and are intended to serve as a starting point for more complex deployments. Please reach out to us if you would like to discuss more involved deployments. 109 | 110 | 111 | #### VPC 112 | 113 | 1. Open the [Amazon VPC console](https://console.aws.amazon.com/vpc/) and in the left navigation pane, choose VPC Dashboard. 114 | 2. Choose _Launch VPC Wizard_, 115 | 3. Choose _VPC with a Single Public Subnet_ and press _Select_. 116 | 4. For _VPC name_, give your VPC a unique name. 117 | 5. Choose _Create VPC_. 118 | 6. When the wizard is finished, choose _OK_. 119 | 120 | #### ECS Execution IAM Role (Optional) 121 | 122 | 1. Open the [IAM console](https://console.aws.amazon.com/iam/) and in the navigation pane, choose Roles, _Create role_. 123 | 2. For _Select type of trusted entity section_, choose _AWS service_. 124 | 3. For _Choose the service that will use this role_, choose _Elastic Container Service_. 125 | 4. For _Select your use case_, choose _Elastic Container Service Task_ and choose _Next: Permissions_. 126 | 5. Choose _AmazonECSTaskExecutionRolePolicy_. 127 | 5. Choose _Next:tags_. 128 | 6. For _Add tags (optional)_, enter any metadata tags you want to associate with the IAM role, and then choose _Next: Review_. 129 | 6. For _Role name_, enter a name for your role and then choose _Create role_ to finish. Note the ARN of the IAM role you just created. 130 | 131 | #### ECS Task IAM Role 132 | 133 | 1. Open the [IAM console](https://console.aws.amazon.com/iam/) and in the navigation pane, choose _Roles, Create role_. 134 | 2. For Select _type of trusted entity section_, choose _AWS service._ 135 | 3. For Choose the service that will use this role, choose Elastic Container Service. 136 | 4. For Select your use case, choose _Elastic Container Service Task_ and choose _Next: Permissions._ 137 | 5. Next, we will create a [policy](https://console.aws.amazon.com/iamv2/home#/policies) for Amazon S3 and attach it to this role: 138 | 1. Amazon S3 for data storage 139 | 1. Choose _Create Policy_ to open a new window. 140 | 2. Use the visual service editor to create the policy 141 | 1. For _Service_, choose _S3_. 142 | 2. For _Actions_, add _GetObject_ and _ListBucket_ as allowed actions 143 | 3. For _resources_, the bucket name should be the same as metaflow datastore S3 bucket. For object choose _any_ for object name. Choose _Save changes_. 144 | 4. Choose _Review policy_. On the Review policy page, for _Name_ type your own unique name and choose _Create_ policy to finish. 145 | 6. Click the refresh button in the original pane (in Step 4.) and choose the policy that you just created (in Step 5.). Choose _Next:tags_. 146 | 7. For _Add tags_ (optional), enter any metadata tags you want to associate with the IAM role, and then choose _Next: Review_. 147 | 8. For _Role name_, enter a name for your role and then choose _Create role_ to finish. 148 | 149 | ### ECS Cluster + Fargate Task 150 | 151 | 1. Open the [ECS console](https://console.aws.amazon.com/ecs) and from the navigation bar, select the region to use. 152 | 2. Choose _Create Cluster_ under _Clusters_. 153 | 3. Choose _Networking only_, _Next step_. 154 | 4. Pick a name for _Cluster name_. Don't enable Create VPC. We will use the VPC [we have created previously](#vpc). You can choose to check _Enable Container Insights_. Choose _Create_. 155 | 5. Choose _View Cluster_ and choose _Task Definitions_ on the left side pane. 156 | 6. Choose _Create new Task Definition_, _Fargate_ and Next step. 157 | 1. Under _Configure task_ and _container definitions_, 158 | 1. Choose a _Task Definition Name_. 159 | 2. Choose the _Task Role_ as the one you [just created above](#ecs-task-iam-role). 160 | 2. Under _Task execution IAM role_, set the _Task execution role_ to _ecsTaskExecutionRole_ or set it to the IAM role created for [ECS execution](#ecs-execution-iam-role). Leave it empty otherwise. 161 | 3. Under _Task size_, 162 | 1. Choose 8 GB for _Task memory (GB)_ 163 | 2. Choose 4 vCPU for _Task CPU (vCPU)_. 164 | 4. Under _Container Definitions_, choose Add container 165 | 1. Set _metaflowbot_ as the _Container name_. 166 | 2. Set _outerbounds/metaflowbot_ as the _Image_. 167 | 3. Leave other options as is. 168 | 4. Under _Advanced container configuration_, in _Environment variables_ add the following values 169 | 1. Set _Key_ as ADMIN_USER_ADDRESS and the _Value_ as the email address of the user in the slack workspace with whom the bot will open a message thread to store state related information. 170 | 2. Set _Key_ as METAFLOW_SERVICE_URL and the _Value_ as the URL to the metadata service. 171 | 3. Set _Key_ as METAFLOW_DATASTORE_SYSROOT_S3 and the _Value_ as S3 bucket URL for metaflow datastore. 172 | 4. Set _Key_ as METAFLOW_DEFAULT_DATASTORE and _Value_ as _s3_. 173 | 5. Set _Key_ as METAFLOW_DEFAULT_METADATA and _Value_ as _service_. 174 | 6. Set _Key_ as USERNAME and _Value_ as _slackbot_. 175 | 7. Set _Key_ as SLACK_APP_TOKEN and _Value_ as the SLACK_APP_TOKEN retrieved from [Slack]. 176 | 8. Set _Key_ as SLACK_BOT_TOKEN and _Value_ as the SLACK_BOT_TOKEN retrieved from [Slack]. 177 | 9. If your metadata service has an authentication key to it then Set _Key_ as METAFLOW_SERVICE_AUTH_KEY and value as the authentication token of the metadata service. 178 | 5. Choose _Add_. 179 | 5. Choose _Create_. 180 | 7. _Choose_ _Clusters_ in the left side pane and select the cluster you created in Step 4. 181 | 8. _Choose_ _Create_ under _Services_, 182 | 1. Choose _Fargate_ as _Lauch type_. 183 | 2. Choose the task definition that you created in Step 6. for _Task Definition_. Pick the latest for _Revision_. 184 | 3. For _Platform version_ choose _Latest_. 185 | 4. Leave the _Cluster_ as is (pointing to the cluster that you are configuring). 186 | 5. Pick a name for _Service name_. 187 | 6. *Set 1* for _Number of tasks_. 188 | 7. Choose _Rolling update_ for _Deployment type_. 189 | 9. Choose _Next step_. 190 | 10. For _Configure network_, 191 | 1. For _Cluster VPC_, choose the VPC that you have created [previously](#vpc). 192 | 2. Choose the only public subnet. 193 | 11. For _Load balancing_, choose None as Load balancer type. 194 | 12. For _Auto-assign public IP_ keep it as _ENABLED_. 195 | 13. Choose _Next step_. 196 | 14. Leave options in _Set Auto Scaling (optional)_ to the default : _Do not adjust the service’s desired count_ 197 | 15. Choose _Next step_ and _Create Service_. 198 | 16. Choose _View Service_ and wait for the task to get to the running state. 199 | 17. Once the task is running, check if the slack bot is responding to messages in DM's or in a channel it is invited to. 200 | -------------------------------------------------------------------------------- /deployment/mfbot-trop.py: -------------------------------------------------------------------------------- 1 | 2 | import troposphere 3 | import troposphere.ec2 as ec2 4 | import troposphere.elasticloadbalancing as elb 5 | from troposphere import ( 6 | GetAZs, 7 | Parameter, 8 | Ref, 9 | Region, 10 | Select, 11 | Split, 12 | StackName, 13 | Template, 14 | Join, 15 | GetAtt, 16 | ) 17 | from troposphere.logs import ( 18 | LogGroup 19 | ) 20 | from troposphere.iam import ( 21 | Role, 22 | PolicyType 23 | 24 | ) 25 | from troposphere.ecs import ( 26 | AwsvpcConfiguration, 27 | Cluster, 28 | ContainerDefinition, 29 | Environment, 30 | LogConfiguration, 31 | Secret, 32 | Service, 33 | TaskDefinition, 34 | NetworkConfiguration 35 | ) 36 | 37 | 38 | BotDeploymentTemplate = Template() 39 | 40 | BotDeploymentTemplate.set_description( 41 | """Cloudformation Stack for Deploying Metaflowbot""" 42 | ) 43 | 44 | METADATA_SERVICE_URL = BotDeploymentTemplate.add_parameter(Parameter("MetadataServiceUrl",Type='String',\ 45 | Description="URL of the metadata service")) 46 | 47 | ADMIN_USER_ADDRESS = BotDeploymentTemplate.add_parameter(Parameter(\ 48 | "AdminEmailAddress", 49 | Type='String',\ 50 | Description="Email address of the admin user in the slack workspace")) 51 | 52 | # Generate ARN from the s3 url and remove ARN parameter. 53 | MFS3ROOTPATH = BotDeploymentTemplate.add_parameter(Parameter("MetaflowDatastoreSysrootS3", 54 | Type='String',\ 55 | Description="Amazon S3 URL for Metaflow DataStore ")) 56 | 57 | MFS3ARN = Join('',['arn:aws:s3:::',Select(1,Split("s3://",Ref(MFS3ROOTPATH))),]) 58 | Metaflowbot_SECRETS_ARN = BotDeploymentTemplate.add_parameter(Parameter("MetaflowbotSecretsManagerARN", 59 | Type='String',\ 60 | Description="ARN of the secret holding Metaflowbot credentials in Secrets Manager")) 61 | 62 | # These are Parameter Store Secure secret names. 63 | METADATA_AUTH = BotDeploymentTemplate.add_parameter(Parameter("MetadataServiceAuthParameterKey", 64 | Type='String',\ 65 | Default="METADATASERVICE_AUTH_KEY",\ 66 | Description="Key for Metadata service auth parameter in Secrets Manager.")) 67 | SLACK_APP_TOKEN = BotDeploymentTemplate.add_parameter(Parameter("SlackAppTokenParameterKey", 68 | Type='String',\ 69 | Default="SLACK_APP_TOKEN_KEY",\ 70 | Description="Key for SLACK_APP_TOKEN parameter in Secrets Manager.")) 71 | SLACK_BOT_TOKEN = BotDeploymentTemplate.add_parameter(Parameter("SlackBotTokenParameterKey", 72 | Type='String',\ 73 | Default="SLACK_BOT_TOKEN_KEY",\ 74 | Description="Key for SLACK_BOT_TOKEN parameter in Secrets Manager.")) 75 | 76 | cluster = BotDeploymentTemplate.add_resource(Cluster("MetaflowbotCluster")) 77 | 78 | 79 | ENV_DICT = { 80 | "ADMIN_USER_ADDRESS":Ref(ADMIN_USER_ADDRESS), 81 | "USERNAME":"slackbot", 82 | "METAFLOW_SERVICE_URL":Ref(METADATA_SERVICE_URL), 83 | "METAFLOW_DATASTORE_SYSROOT_S3":Ref(MFS3ROOTPATH), 84 | "METAFLOW_DEFAULT_DATASTORE":"s3", 85 | "METAFLOW_DEFAULT_METADATA":"service" 86 | } 87 | 88 | # best practices from : https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data-secrets.html 89 | SECRETS = [ 90 | Secret( 91 | Name='METAFLOW_SERVICE_AUTH_KEY', 92 | ValueFrom=Join("",[Ref(Metaflowbot_SECRETS_ARN),":",Ref(METADATA_AUTH),"::"]) 93 | ), 94 | Secret( 95 | Name='SLACK_APP_TOKEN', 96 | ValueFrom=Join("",[Ref(Metaflowbot_SECRETS_ARN),":",Ref(SLACK_APP_TOKEN),"::"]) 97 | ),Secret( 98 | Name='SLACK_BOT_TOKEN', 99 | ValueFrom=Join("",[Ref(Metaflowbot_SECRETS_ARN),":",Ref(SLACK_BOT_TOKEN),"::"]) 100 | ) 101 | ] 102 | 103 | # task role vs execution role : 104 | # https://selfoverflow.com/questions/48999472/difference-between-aws-elastic-container-services-ecs-executionrole-and-taskr/49947471 105 | # ECS execution role is capabilities of ECS agent 106 | # ECS task role is specific capabilities within the task itself : s3_access_iam_role (capabilities of task) 107 | 108 | 109 | EcsClusterRole = BotDeploymentTemplate.add_resource( 110 | Role( 111 | "EcsClusterRole", 112 | Path="/", 113 | ManagedPolicyArns=["arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM"], 114 | AssumeRolePolicyDocument={ 115 | "Version": "2012-10-17", 116 | "Statement": [ 117 | { 118 | "Action": "sts:AssumeRole", 119 | "Principal": {"Service": "ecs-tasks.amazonaws.com"}, 120 | "Effect": "Allow", 121 | }, 122 | ], 123 | }, 124 | ) 125 | ) 126 | 127 | EcsTaskRole = BotDeploymentTemplate.add_resource( 128 | Role( 129 | "EcsTaskRole", 130 | Path="/", 131 | AssumeRolePolicyDocument={ 132 | "Version": "2012-10-17", 133 | "Statement": [ 134 | { 135 | "Action": "sts:AssumeRole", 136 | "Principal": {"Service": "ecs-tasks.amazonaws.com"}, 137 | "Effect": "Allow", 138 | } 139 | ], 140 | }, 141 | ) 142 | ) 143 | 144 | PolicyEcr = BotDeploymentTemplate.add_resource( 145 | PolicyType( 146 | "PolicyEcr", 147 | PolicyName="MetaflowbotEcrPolicy", 148 | PolicyDocument={ 149 | "Version": "2012-10-17", 150 | "Statement": [ 151 | { 152 | "Action": ["ecr:GetAuthorizationToken"], 153 | "Resource": ["*"], 154 | "Effect": "Allow", 155 | }, 156 | { 157 | "Action": [ 158 | "ecr:GetDownloadUrlForLayer", 159 | "ecr:BatchGetImage", 160 | "ecr:BatchCheckLayerAvailability", 161 | "logs:CreateLogStream", 162 | "logs:PutLogEvents" 163 | ], 164 | "Resource": ["*"], 165 | "Effect": "Allow", 166 | "Sid": "AllowPull", 167 | }, 168 | ], 169 | }, 170 | Roles=[Ref(EcsClusterRole)], 171 | ) 172 | ) 173 | 174 | secrets_access_policy = BotDeploymentTemplate.add_resource( 175 | PolicyType( 176 | "MetaflowbotSecretAccess", 177 | # 178 | PolicyName='Metaflowbot', 179 | PolicyDocument= { 180 | "Version": "2012-10-17", 181 | "Statement": [ 182 | { 183 | "Action": [ 184 | "secretsmanager:GetSecretValue", 185 | ], 186 | "Resource": [ 187 | Ref(Metaflowbot_SECRETS_ARN) 188 | ], 189 | "Effect": "Allow", 190 | "Sid": "S3GetObject", 191 | }, 192 | ] 193 | }, 194 | Roles=[Ref(EcsClusterRole)], 195 | ) 196 | ) 197 | 198 | S3AccessPolicy = BotDeploymentTemplate.add_resource( 199 | PolicyType( 200 | "S3AccessPolicy", 201 | PolicyName="MetaflowbotS3AccessPolicy", 202 | PolicyDocument={ 203 | "Version": "2012-10-17", 204 | "Statement": [ 205 | { 206 | "Action": [ 207 | "s3:GetObject", 208 | # s3:ListBucket for https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html 209 | "s3:ListBucket", 210 | ], 211 | "Resource": [ 212 | Join('',[MFS3ARN,'/*']) 213 | ], 214 | "Effect": "Allow", 215 | "Sid": "S3GetObject", 216 | }, 217 | ], 218 | }, 219 | Roles=[Ref(EcsTaskRole)], 220 | ) 221 | ) 222 | ### Routing and VPC Settings. 223 | # Create a VPC with Subnet. The VPC should have an IG attached 224 | # and rule created in it's route table; 225 | 226 | vpc = BotDeploymentTemplate.add_resource(ec2.VPC('MetaflowbotPublicVpc',CidrBlock="10.0.0.0/16",)) 227 | 228 | subnet = BotDeploymentTemplate.add_resource(ec2.Subnet( 229 | "MetaflowbotDeploymentSubnet", 230 | AvailabilityZone=Select( 231 | 0,GetAZs(region=Region) 232 | ), 233 | CidrBlock="10.0.0.0/24", 234 | VpcId=Ref(vpc), 235 | MapPublicIpOnLaunch=True, 236 | )) 237 | 238 | internetgateway = BotDeploymentTemplate.add_resource(ec2.InternetGateway("MetaflowbotInternetGateway")) 239 | 240 | net_gw_vpc_attachment = BotDeploymentTemplate.add_resource( 241 | ec2.VPCGatewayAttachment( 242 | "InternetGatewayAttachment", 243 | VpcId=Ref(vpc), 244 | InternetGatewayId=Ref(internetgateway), 245 | ) 246 | ) 247 | 248 | 249 | public_route_table = BotDeploymentTemplate.add_resource( 250 | ec2.RouteTable( 251 | "PublicRouteTable", 252 | VpcId=Ref(vpc), 253 | ) 254 | ) 255 | 256 | public_route_association = BotDeploymentTemplate.add_resource( 257 | ec2.SubnetRouteTableAssociation( 258 | "PublicRouteAssociation", 259 | SubnetId=Ref(subnet), 260 | RouteTableId=Ref(public_route_table), 261 | ) 262 | ) 263 | 264 | default_public_route = BotDeploymentTemplate.add_resource( 265 | ec2.Route( 266 | "PublicDefaultRoute", 267 | RouteTableId=Ref(public_route_table), 268 | DestinationCidrBlock="0.0.0.0/0", 269 | GatewayId=Ref(internetgateway), 270 | ) 271 | ) 272 | LOG_GROUP_STRING = Join("",[ 273 | '/ecs/', 274 | StackName, 275 | "-metaflowbot", 276 | ]) 277 | loggroup= BotDeploymentTemplate.add_resource( 278 | LogGroup( 279 | "MetaflowbotLogGroup", 280 | LogGroupName=LOG_GROUP_STRING 281 | ) 282 | ) 283 | 284 | task_definition = BotDeploymentTemplate.add_resource( 285 | TaskDefinition( 286 | "MetaflowbotTaskDefinition", 287 | RequiresCompatibilities=["FARGATE"], 288 | Cpu="4096", 289 | ExecutionRoleArn=GetAtt(EcsClusterRole,"Arn"), 290 | TaskRoleArn=GetAtt(EcsTaskRole,"Arn"), 291 | Memory="8192", 292 | NetworkMode="awsvpc", 293 | ContainerDefinitions=[ 294 | ContainerDefinition( 295 | Name="metaflowbot", 296 | Image="outerbounds/metaflowbot", 297 | Essential=True, 298 | LogConfiguration=LogConfiguration( 299 | LogDriver = "awslogs", 300 | Options= { 301 | "awslogs-group": LOG_GROUP_STRING, 302 | "awslogs-region":Region, 303 | "awslogs-stream-prefix": 'ecs' 304 | }, 305 | ), 306 | Environment = [ 307 | Environment(**dict(Name=k,Value=v)) for k,v in ENV_DICT.items() 308 | ], 309 | Secrets=SECRETS 310 | ) 311 | ], 312 | ) 313 | ) 314 | 315 | 316 | efs_security_group = BotDeploymentTemplate.add_resource(ec2.SecurityGroup( 317 | "MetaflowbotSecurityGroup", 318 | # Outbound rules 319 | # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ec2-security-group-rule.html 320 | SecurityGroupEgress = [ 321 | ec2.SecurityGroupRule( 322 | "MetaflowbotOutboundRules", 323 | ToPort=65534, 324 | FromPort=0, 325 | IpProtocol="tcp", 326 | CidrIp="0.0.0.0/0", 327 | ) 328 | ], 329 | VpcId=Ref(vpc), 330 | GroupDescription="Allow All In and outbound traffic", 331 | )) 332 | 333 | service = BotDeploymentTemplate.add_resource( 334 | Service( 335 | "MetaflowbotDeployment", 336 | Cluster=Ref(cluster), 337 | DesiredCount=1, 338 | TaskDefinition=Ref(task_definition), 339 | LaunchType="FARGATE", 340 | NetworkConfiguration=NetworkConfiguration( 341 | AwsvpcConfiguration=AwsvpcConfiguration( 342 | Subnets=[Ref(subnet)], 343 | AssignPublicIp='ENABLED', 344 | SecurityGroups=[Ref(efs_security_group)] 345 | ) 346 | ), 347 | ) 348 | ) 349 | print(BotDeploymentTemplate.to_yaml()) 350 | 351 | 352 | --------------------------------------------------------------------------------