├── airflow ├── flow_toolz │ ├── templates │ │ ├── __init__.py │ │ └── dag_template.py │ ├── __init__.py │ └── utils.py ├── invoke.yaml ├── Dockerfile ├── .env ├── setup.py ├── dags │ └── example_dag.py └── tasks.py ├── pyproject.toml ├── reverse-proxy ├── Dockerfile └── traefik.toml ├── grafana ├── Dockerfile ├── datasource.yaml ├── dashboard.yaml └── dashboard.json ├── docker-compose.prod.yaml ├── invoke.yaml ├── .gitignore ├── docker-compose.yaml ├── readme.md ├── license.txt └── tasks.py /airflow/flow_toolz/templates/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | py36 = true -------------------------------------------------------------------------------- /reverse-proxy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM traefik 2 | 3 | ADD traefik.toml /etc/traefik/traefik.toml -------------------------------------------------------------------------------- /airflow/flow_toolz/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is our utility library, where cross-module code will be written. 3 | """ -------------------------------------------------------------------------------- /airflow/invoke.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | echo: true 3 | 4 | auth: 5 | username: 6 | password: 7 | email: 8 | 9 | airflow: 10 | variables: 11 | code_bucket: 12 | source_data_bucket: 13 | platform_root_dir: -------------------------------------------------------------------------------- /grafana/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM grafana/grafana 2 | 3 | ADD datasource.yaml /etc/grafana/provisioning/datasources/ 4 | 5 | ADD dashboard.yaml /etc/grafana/provisioning/dashboards/ 6 | 7 | ADD dashboard.json /var/lib/grafana/dashboards/ 8 | -------------------------------------------------------------------------------- /grafana/datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: influx 5 | type: influxdb 6 | access: proxy 7 | database: cadvisor 8 | url: http://influxdb:8086 9 | isDefault: true 10 | editable: true -------------------------------------------------------------------------------- /grafana/dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'default' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | updateIntervalSeconds: 10 10 | options: 11 | path: /var/lib/grafana/dashboards -------------------------------------------------------------------------------- /docker-compose.prod.yaml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | dags: 5 | flow-toolz: 6 | 7 | services: 8 | webserver: 9 | environment: 10 | AIRFLOW__WEBSERVER__AUTHENTICATE: "true" 11 | 12 | reverse-proxy: 13 | command: 14 | - "--api" 15 | - "--docker" 16 | - "--docker.swarmMode" 17 | - "--docker.watch" 18 | -------------------------------------------------------------------------------- /invoke.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | echo: true 3 | 4 | stack_name: airflow 5 | 6 | gcp: 7 | zone: us-central1-a 8 | machine_type: n1-standard-2 9 | managers: 3 10 | workers: 0 11 | kms: 12 | key: secrets 13 | keyring: airflow 14 | location: us-central1 15 | 16 | encrypt: 17 | files: 18 | - default-service-account.json 19 | - aws-credentials.ini 20 | - airflow/.secrets.env 21 | - reverse-proxy/certificate.crt 22 | - reverse-proxy/key.key 23 | -------------------------------------------------------------------------------- /airflow/flow_toolz/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | General utilities. 3 | """ 4 | from contextlib import contextmanager 5 | 6 | import requests 7 | 8 | 9 | @contextmanager 10 | def get_session(headers: dict = None, auth: tuple = None) -> requests.Session: 11 | """ 12 | Yield a requests.Session context with the given headers and auth. 13 | 14 | Args: 15 | headers (dict): request headers 16 | auth (tuple): (username, password) http basic auth 17 | 18 | Yields: requests.Session 19 | 20 | """ 21 | with requests.Session() as session: 22 | session.auth = auth 23 | session.headers = headers 24 | yield session 25 | -------------------------------------------------------------------------------- /airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python 3.7 produces errors since async becomes a keyword; use 3.6 instead 2 | 3 | FROM python:3.6-slim 4 | 5 | RUN apt-get update && \ 6 | apt-get install -y \ 7 | build-essential \ 8 | curl \ 9 | less 10 | 11 | # SLUGIFY_USES_TEXT_UNIDECODE ensures airflow doesn't install an unnecessary GNU-licensed lib 12 | 13 | ENV SLUGIFY_USES_TEXT_UNIDECODE yes 14 | 15 | ADD flow_toolz flow_toolz 16 | 17 | ADD setup.py . 18 | 19 | RUN pip install -U pip setuptools wheel && \ 20 | pip install -e '.[all]' 21 | 22 | ADD dags/* /dags/ 23 | 24 | ADD tasks.py . 25 | ADD invoke.yaml . 26 | 27 | ENTRYPOINT [ "invoke" ] 28 | 29 | CMD [ "-l" ] -------------------------------------------------------------------------------- /airflow/.env: -------------------------------------------------------------------------------- 1 | C_FORCE_ROOT=true 2 | 3 | AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT_CONN=/run/secrets/service-account-json 4 | AIRFLOW_CONN_BIGQUERY_DEFAULT_CONN=/run/secrets/service-account-json 5 | 6 | AIRFLOW__WEBSERVER__WEB_SERVER_PORT=80 7 | 8 | AIRFLOW__CORE__AIRFLOW_HOME=/airflow 9 | AIRFLOW__CORE__DAGS_FOLDER=/dags 10 | 11 | AIRFLOW__CORE__BASE_LOG_FOLDER=$AIRFLOW__CORE__AIRFLOW_HOME/logs 12 | AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB 13 | AIRFLOW__CORE__EXECUTOR=CeleryExecutor 14 | AIRFLOW__CORE__LOAD_EXAMPLES=false 15 | AIRFLOW__CORE__DAG_CONCURRENCY=4 16 | 17 | AIRFLOW__CELERY__BROKER_URL=amqp://rabbitmq 18 | AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB 19 | 20 | AIRFLOW__WEBSERVER__AUTHENTICATE=true 21 | AIRFLOW__WEBSERVER__AUTH_BACKEND=airflow.contrib.auth.backends.password_auth 22 | 23 | # the following mirrors what's in .secrets.env for the sake of documentation 24 | # since it will not be in the repo unencrypted 25 | 26 | # the same env vars in .secrets.env will override these values 27 | 28 | POSTGRES_USER=airflow 29 | POSTGRES_PASSWORD=airflow 30 | POSTGRES_DB=airflow 31 | 32 | INVOKE_AUTH_USERNAME=admin 33 | INVOKE_AUTH_PASSWORD=admin 34 | INVOKE_AUTH_EMAIL=hello@world.com 35 | 36 | GCP_PROJECT_ID= 37 | -------------------------------------------------------------------------------- /airflow/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | from setuptools import find_packages, setup 6 | 7 | # Package meta-data. 8 | NAME = "flow_toolz" 9 | DESCRIPTION = "Shared airflow libs." 10 | URL = "https://github.com/" 11 | EMAIL = "your@email.com" 12 | AUTHOR = "your name" 13 | REQUIRES_PYTHON = ">=3.6.0" 14 | VERSION = "alpha" 15 | 16 | REQUIRED = [ 17 | "requests", 18 | "boto3", 19 | "invoke", 20 | "dataclasses", 21 | "importlib_resources", 22 | ] 23 | 24 | airflow_requirements = [ 25 | "apache-airflow[gcp_api,password,google_auth,s3,postgres,celery]" 26 | ] 27 | test_requirements = ["pytest"] 28 | data_science_requirements = [ 29 | "jupyterlab", 30 | "fastparquet", 31 | "pandas", 32 | "pandas-gbq", 33 | ] 34 | development_requirements = ["jinja2", "klaxon"] 35 | 36 | EXTRAS = { 37 | "airflow": airflow_requirements, 38 | "test": test_requirements, 39 | "data-science": data_science_requirements, 40 | "dev": development_requirements, 41 | "all": airflow_requirements 42 | + test_requirements 43 | + data_science_requirements 44 | + development_requirements, 45 | } 46 | 47 | 48 | setup( 49 | name=NAME, 50 | version=VERSION, 51 | description=DESCRIPTION, 52 | author=AUTHOR, 53 | author_email=EMAIL, 54 | python_requires=REQUIRES_PYTHON, 55 | url=URL, 56 | packages=find_packages(exclude=("tests",)), 57 | install_requires=REQUIRED, 58 | extras_require=EXTRAS, 59 | include_package_data=True, 60 | classifiers=[ 61 | # Trove classifiers 62 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 63 | "License :: OSI Approved :: MIT License", 64 | "Programming Language :: Python", 65 | "Programming Language :: Python :: 3", 66 | "Programming Language :: Python :: 3.6", 67 | "Programming Language :: Python :: Implementation :: CPython", 68 | "Programming Language :: Python :: Implementation :: PyPy", 69 | ], 70 | ) 71 | -------------------------------------------------------------------------------- /airflow/dags/example_dag.py: -------------------------------------------------------------------------------- 1 | """ 2 | Do things. 3 | """ 4 | from functools import partial 5 | from pprint import pprint 6 | import datetime as dt 7 | import typing as T 8 | 9 | from dataclasses import dataclass 10 | 11 | from airflow.models import DAG 12 | from airflow.operators.dummy_operator import DummyOperator 13 | from airflow.operators.python_operator import PythonOperator 14 | 15 | 16 | dag = DAG( 17 | dag_id="example_dag_v1_p3", 18 | default_args={ 19 | "owner": "Stephan Fitzpatrick", 20 | "email": "knowsuchagency@gmail.com", 21 | "start_date": dt.datetime(*map(int, "2019-02-12".split("-"))), 22 | }, 23 | schedule_interval="0 7 * * *", 24 | ) 25 | 26 | 27 | @dataclass 28 | class ExampleResult: 29 | string: T.Optional[str] 30 | 31 | 32 | def hello_airflow(execution_date: dt.datetime, argument=None, **kwargs): 33 | """ 34 | Print the execution date (and other variables passed from airflow). 35 | 36 | Args: 37 | execution_date (dt.datetime): the time of the dag's execution (passed by airflow) 38 | argument: an example argument 39 | **kwargs: other variables passed from airflow 40 | 41 | """ 42 | print(f"argument passed was: {argument}") 43 | print(f"execution date is: {execution_date}") 44 | print("variables (besides execution_date) passed from airflow:") 45 | pprint(kwargs) 46 | 47 | return ExampleResult(string="aloha, airflow") 48 | 49 | 50 | def validate(task_instance, **kwargs): 51 | """ABV always be validating.""" 52 | example_result = task_instance.xcom_pull(task_ids="hello_airflow") 53 | 54 | assert example_result.string == "hello airflow", "failed, as expected" 55 | 56 | 57 | with dag: 58 | 59 | start = DummyOperator(task_id="start") 60 | 61 | start >> PythonOperator( 62 | task_id="hello_airflow", 63 | python_callable=partial(hello_airflow, argument="I'm a teapot"), 64 | provide_context=True, 65 | ) >> PythonOperator( 66 | task_id="validate_hello_airflow", python_callable=validate, provide_context=True 67 | ) 68 | -------------------------------------------------------------------------------- /airflow/flow_toolz/templates/dag_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Do things. 3 | """ 4 | from functools import partial 5 | from pprint import pprint 6 | import datetime as dt 7 | import typing as T 8 | 9 | from dataclasses import dataclass 10 | 11 | from airflow.models import DAG 12 | from airflow.operators.dummy_operator import DummyOperator 13 | from airflow.operators.python_operator import PythonOperator 14 | 15 | 16 | dag = DAG( 17 | dag_id="{{ dag_id }}", 18 | default_args={ 19 | "owner": "{{ owner }}", 20 | "email": "{{ email }}", 21 | "start_date": dt.datetime(*map(int, "{{ start_date }}".split("-"))), 22 | }, 23 | schedule_interval="{{ schedule_interval }}", 24 | ) 25 | 26 | 27 | @dataclass 28 | class ExampleResult: 29 | string: T.Optional[str] 30 | 31 | 32 | def hello_airflow(execution_date: dt.datetime, argument=None, **kwargs): 33 | """ 34 | Print the execution date (and other variables passed from airflow). 35 | 36 | Args: 37 | execution_date (dt.datetime): the time of the dag's execution (passed by airflow) 38 | argument: an example argument 39 | **kwargs: other variables passed from airflow 40 | 41 | """ 42 | print(f"argument passed was: {argument}") 43 | print(f"execution date is: {execution_date}") 44 | print("variables (besides execution_date) passed from airflow:") 45 | pprint(kwargs) 46 | 47 | return ExampleResult(string="aloha, airflow") 48 | 49 | 50 | def validate(task_instance, **kwargs): 51 | """ABV always be validating.""" 52 | example_result = task_instance.xcom_pull(task_ids="hello_airflow") 53 | 54 | assert example_result.string == "hello airflow", "failed, as expected" 55 | 56 | 57 | with dag: 58 | 59 | start = DummyOperator(task_id="start") 60 | 61 | start >> PythonOperator( 62 | task_id="hello_airflow", 63 | python_callable=partial(hello_airflow, argument="I'm a teapot"), 64 | provide_context=True, 65 | ) >> PythonOperator( 66 | task_id="validate_hello_airflow", python_callable=validate, provide_context=True 67 | ) 68 | -------------------------------------------------------------------------------- /airflow/tasks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import logging 3 | import time 4 | 5 | from invoke import task 6 | 7 | 8 | @task 9 | def wait(c, seconds=10): 10 | logging.info(f"waiting {seconds} seconds") 11 | time.sleep(seconds) 12 | 13 | 14 | @task 15 | def initdb(c): 16 | c.run("airflow initdb", warn=True) 17 | 18 | 19 | @task 20 | def add_user(c): 21 | from airflow import models, settings 22 | from airflow.contrib.auth.backends.password_auth import PasswordUser 23 | 24 | import sqlalchemy 25 | 26 | user = PasswordUser(models.User()) 27 | user.username = c.config.auth.username 28 | user.email = c.config.auth.email 29 | user.password = c.config.auth.password 30 | 31 | session = settings.Session() 32 | session.add(user) 33 | 34 | try: 35 | session.commit() 36 | except sqlalchemy.exc.IntegrityError as e: 37 | logging.exception(e) 38 | finally: 39 | session.close() 40 | 41 | 42 | @task 43 | def set_airflow_variables(c): 44 | """Configure airflow variables from configuration.""" 45 | for key, value in c.config.airflow.variables.items(): 46 | c.run(f"airflow variables --set {key} {value}") 47 | 48 | 49 | @task 50 | def configure_aws(c): 51 | """Copy aws secrets file to default aws credentials location.""" 52 | secret_path = Path("/run/secrets/aws-credentials") 53 | 54 | credentials_root = Path(Path.home(), ".aws") 55 | credentials_path = Path(credentials_root, "credentials") 56 | 57 | if secret_path.exists() and not credentials_path.exists(): 58 | logging.info("aws secrets file found; writing to default path") 59 | credentials_root.mkdir(exist_ok=True) 60 | credentials_path.write_text(secret_path.read_text()) 61 | 62 | logging.info("aws secrets file not found. skipping configuration") 63 | 64 | 65 | @task(initdb, add_user, set_airflow_variables, configure_aws) 66 | def initialize(c): 67 | """Initialize db and anything else necessary prior to webserver, scheduler, workers etc.""" 68 | 69 | 70 | @task(initialize) 71 | def webserver(c): 72 | c.run("airflow webserver") 73 | 74 | 75 | @task(wait) 76 | def scheduler(c): 77 | c.run("airflow scheduler") 78 | 79 | 80 | @task(wait) 81 | def worker(c): 82 | c.run("airflow worker") 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### macOS template 3 | # General 4 | .DS_Store 5 | .AppleDouble 6 | .LSOverride 7 | 8 | # Icon must end with two \r 9 | Icon 10 | 11 | # Thumbnails 12 | ._* 13 | 14 | # Files that might appear in the root of a volume 15 | .DocumentRevisions-V100 16 | .fseventsd 17 | .Spotlight-V100 18 | .TemporaryItems 19 | .Trashes 20 | .VolumeIcon.icns 21 | .com.apple.timemachine.donotpresent 22 | 23 | # Directories potentially created on remote AFP share 24 | .AppleDB 25 | .AppleDesktop 26 | Network Trash Folder 27 | Temporary Items 28 | .apdisk 29 | ### JetBrains template 30 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 31 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 32 | 33 | # just to avoid all the pain 34 | .idea/ 35 | 36 | # User-specific stuff 37 | .idea/**/workspace.xml 38 | .idea/**/tasks.xml 39 | .idea/**/dictionaries 40 | .idea/**/shelf 41 | 42 | # Sensitive or high-churn files 43 | .idea/**/dataSources/ 44 | .idea/**/dataSources.ids 45 | .idea/**/dataSources.local.xml 46 | .idea/**/sqlDataSources.xml 47 | .idea/**/dynamic.xml 48 | .idea/**/uiDesigner.xml 49 | .idea/**/dbnavigator.xml 50 | 51 | # Gradle 52 | .idea/**/gradle.xml 53 | .idea/**/libraries 54 | 55 | # CMake 56 | cmake-build-debug/ 57 | cmake-build-release/ 58 | 59 | # Mongo Explorer plugin 60 | .idea/**/mongoSettings.xml 61 | 62 | # File-based project format 63 | *.iws 64 | 65 | # IntelliJ 66 | out/ 67 | 68 | # mpeltonen/sbt-idea plugin 69 | .idea_modules/ 70 | 71 | # JIRA plugin 72 | atlassian-ide-plugin.xml 73 | 74 | # Cursive Clojure plugin 75 | .idea/replstate.xml 76 | 77 | # Crashlytics plugin (for Android Studio and IntelliJ) 78 | com_crashlytics_export_strings.xml 79 | crashlytics.properties 80 | crashlytics-build.properties 81 | fabric.properties 82 | 83 | # Editor-based Rest Client 84 | .idea/httpRequests 85 | ### Python template 86 | # Byte-compiled / optimized / DLL files 87 | __pycache__/ 88 | *.py[cod] 89 | *$py.class 90 | 91 | # C extensions 92 | *.so 93 | 94 | # Distribution / packaging 95 | .Python 96 | build/ 97 | develop-eggs/ 98 | dist/ 99 | downloads/ 100 | eggs/ 101 | .eggs/ 102 | lib/ 103 | lib64/ 104 | parts/ 105 | sdist/ 106 | var/ 107 | wheels/ 108 | *.egg-info/ 109 | .installed.cfg 110 | *.egg 111 | MANIFEST 112 | 113 | # PyInstaller 114 | # Usually these files are written by a python script from a template 115 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 116 | *.manifest 117 | *.spec 118 | 119 | # Installer logs 120 | pip-log.txt 121 | pip-delete-this-directory.txt 122 | 123 | # Unit test / coverage reports 124 | htmlcov/ 125 | .tox/ 126 | .coverage 127 | .coverage.* 128 | .cache 129 | nosetests.xml 130 | coverage.xml 131 | *.cover 132 | .hypothesis/ 133 | .pytest_cache/ 134 | 135 | # Translations 136 | *.mo 137 | *.pot 138 | 139 | # Django stuff: 140 | *.log 141 | local_settings.py 142 | db.sqlite3 143 | 144 | # Flask stuff: 145 | instance/ 146 | .webassets-cache 147 | 148 | # Scrapy stuff: 149 | .scrapy 150 | 151 | # Sphinx documentation 152 | docs/_build/ 153 | 154 | # PyBuilder 155 | target/ 156 | 157 | # Jupyter Notebook 158 | .ipynb_checkpoints 159 | 160 | # pyenv 161 | .python-version 162 | 163 | # celery beat schedule file 164 | celerybeat-schedule 165 | 166 | # SageMath parsed files 167 | *.sage.py 168 | 169 | # Environments 170 | .secrets.env 171 | .venv 172 | .windows-venv 173 | env/ 174 | venv/ 175 | ENV/ 176 | env.bak/ 177 | venv.bak/ 178 | 179 | # Spyder project settings 180 | .spyderproject 181 | .spyproject 182 | 183 | # Rope project settings 184 | .ropeproject 185 | 186 | # mkdocs documentation 187 | /site 188 | 189 | # mypy 190 | .mypy_cache/ 191 | 192 | # secrets 193 | aws-credentials.ini 194 | default-service-account.json 195 | reverse-proxy/certificate.crt 196 | reverse-proxy/key.key -------------------------------------------------------------------------------- /reverse-proxy/traefik.toml: -------------------------------------------------------------------------------- 1 | ################################################################ 2 | # Global configuration 3 | ################################################################ 4 | 5 | # Enable debug mode 6 | # 7 | # Optional 8 | # Default: false 9 | # 10 | debug = false 11 | 12 | # Log level 13 | # 14 | # Optional 15 | # Default: "ERROR" 16 | # 17 | logLevel = "DEBUG" 18 | 19 | # Entrypoints to be used by frontends that do not specify any entrypoint. 20 | # Each frontend can specify its own entrypoints. 21 | # 22 | # Optional 23 | # Default: ["http"] 24 | # 25 | defaultEntryPoints = ["http", "https"] 26 | 27 | ################################################################ 28 | # Entrypoints configuration 29 | ################################################################ 30 | 31 | # Entrypoints definition 32 | # 33 | # Optional 34 | # Default: 35 | [entryPoints] 36 | [entryPoints.http] 37 | address = ":80" 38 | [entryPoints.http.redirect] 39 | entryPoint = "https" 40 | [entryPoints.https] 41 | address = ":443" 42 | [entryPoints.https.tls] 43 | [[entryPoints.https.tls.certificates]] 44 | certFile = "/run/secrets/certificate.crt" 45 | keyFile = "/run/secrets/key.key" 46 | 47 | ################################################################ 48 | # Traefik logs configuration 49 | ################################################################ 50 | 51 | # Traefik logs 52 | # Enabled by default and log to stdout 53 | # 54 | # Optional 55 | # 56 | # [traefikLog] 57 | 58 | # Sets the filepath for the traefik log. If not specified, stdout will be used. 59 | # Intermediate directories are created if necessary. 60 | # 61 | # Optional 62 | # Default: os.Stdout 63 | # 64 | # filePath = "log/traefik.log" 65 | 66 | # Format is either "json" or "common". 67 | # 68 | # Optional 69 | # Default: "common" 70 | # 71 | # format = "common" 72 | 73 | ################################################################ 74 | # Access logs configuration 75 | ################################################################ 76 | 77 | # Enable access logs 78 | # By default it will write to stdout and produce logs in the textual 79 | # Common Log Format (CLF), extended with additional fields. 80 | # 81 | # Optional 82 | # 83 | # [accessLog] 84 | 85 | # Sets the file path for the access log. If not specified, stdout will be used. 86 | # Intermediate directories are created if necessary. 87 | # 88 | # Optional 89 | # Default: os.Stdout 90 | # 91 | # filePath = "/path/to/log/log.txt" 92 | 93 | # Format is either "json" or "common". 94 | # 95 | # Optional 96 | # Default: "common" 97 | # 98 | # format = "common" 99 | 100 | ################################################################ 101 | # API and dashboard configuration 102 | ################################################################ 103 | 104 | # Enable API and dashboard 105 | [api] 106 | 107 | # Name of the related entry point 108 | # 109 | # Optional 110 | # Default: "traefik" 111 | # 112 | # entryPoint = "traefik" 113 | 114 | # Enabled Dashboard 115 | # 116 | # Optional 117 | # Default: true 118 | # 119 | # dashboard = false 120 | 121 | ################################################################ 122 | # Ping configuration 123 | ################################################################ 124 | 125 | # Enable ping 126 | [ping] 127 | 128 | # Name of the related entry point 129 | # 130 | # Optional 131 | # Default: "traefik" 132 | # 133 | # entryPoint = "traefik" 134 | 135 | ################################################################ 136 | # Docker configuration backend 137 | ################################################################ 138 | 139 | # Enable Docker configuration backend 140 | [docker] 141 | 142 | # Docker server endpoint. Can be a tcp or a unix socket endpoint. 143 | # 144 | # Required 145 | # Default: "unix:///var/run/docker.sock" 146 | # 147 | # endpoint = "tcp://10.10.10.10:2375" 148 | 149 | # Default domain used. 150 | # Can be overridden by setting the "traefik.domain" label on a container. 151 | # 152 | # Optional 153 | # Default: "" 154 | # 155 | # domain = "docker.localhost" 156 | 157 | # Expose containers by default in traefik 158 | # 159 | # Optional 160 | # Default: true 161 | # 162 | # exposedByDefault = true 163 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | services: 4 | 5 | reverse-proxy: 6 | build: ./reverse-proxy/ 7 | image: knowsuchagency/airflow-reverse-proxy 8 | command: --api --docker 9 | ports: 10 | - "443:443" 11 | - "80:80" 12 | - "8080:8080" # The Web UI (enabled by --api) 13 | volumes: 14 | - /var/run/docker.sock:/var/run/docker.sock 15 | secrets: 16 | - source: tls-certificate 17 | target: certificate.crt 18 | - source: tls-key 19 | target: key.key 20 | 21 | 22 | # time-series db used to store machine metrics 23 | influxdb: 24 | image: influxdb 25 | volumes: 26 | - influx:/var/lib/influxdb 27 | expose: 28 | - 8086 29 | deploy: 30 | placement: 31 | constraints: 32 | - node.role == manager 33 | 34 | # this "service" exists only to seed the time-series db 35 | init-influx: 36 | depends_on: 37 | - influxdb 38 | image: influxdb 39 | volumes: 40 | - influx:/var/lib/influxdb 41 | deploy: 42 | replicas: 0 43 | command: influx -host influxdb -execute 'CREATE DATABASE cadvisor' 44 | 45 | # metrics front-end 46 | grafana: 47 | build: ./grafana/ 48 | image: knowsuchagency/airflow-grafana 49 | ports: 50 | - "3000:3000" 51 | volumes: 52 | - grafana:/var/lib/grafana 53 | depends_on: 54 | - influxdb 55 | deploy: 56 | placement: 57 | constraints: 58 | - node.role == manager 59 | 60 | # fetches system and container metrics from cluster nodes 61 | cadvisor: 62 | image: google/cadvisor 63 | hostname: '{{.Node.Hostname}}' 64 | command: -logtostderr -docker_only -storage_driver=influxdb -storage_driver_db=cadvisor -storage_driver_host=influxdb:8086 65 | volumes: 66 | - /:/rootfs:ro 67 | - /var/run:/var/run:rw 68 | - /sys:/sys:ro 69 | - /var/lib/docker/:/var/lib/docker:ro 70 | depends_on: 71 | - influxdb 72 | deploy: 73 | mode: global 74 | 75 | # should be eventually swapped out for hosted pg instance 76 | postgres: 77 | image: postgres 78 | env_file: 79 | ./airflow/.env 80 | ports: 81 | - "5432:5432" 82 | expose: 83 | - 5672 84 | deploy: 85 | placement: 86 | constraints: 87 | - node.role == manager 88 | 89 | # celery backend 90 | rabbitmq: 91 | image: rabbitmq:management 92 | restart: always 93 | deploy: 94 | placement: 95 | constraints: 96 | - node.role == manager 97 | 98 | # airflow front-end 99 | webserver: 100 | build: ./airflow 101 | image: knowsuchagency/airflow-base 102 | expose: 103 | - 80 104 | ports: 105 | - "8888:80" 106 | depends_on: 107 | - postgres 108 | - reverse-proxy 109 | command: webserver 110 | env_file: 111 | - ./airflow/.env 112 | - ./airflow/.secrets.env 113 | volumes: 114 | - airflow-home:/airflow 115 | - dags:/dags 116 | - flow-toolz:/flow_toolz 117 | - aws-credentials:/root/.aws 118 | secrets: 119 | - service-account-json 120 | - aws-credentials 121 | deploy: 122 | placement: 123 | constraints: 124 | - node.role == manager 125 | # traefik labels are under deploy for swarm mode 126 | labels: 127 | - "traefik.frontend.rule=Host:your.host.com" 128 | - "traefik.port=80" 129 | - "traefik.enable=true" 130 | labels: 131 | - "traefik.frontend.rule=Host:localhost" 132 | environment: 133 | AIRFLOW__WEBSERVER__AUTHENTICATE: "false" 134 | 135 | # airflow job scheduler 136 | scheduler: 137 | image: knowsuchagency/airflow-base 138 | depends_on: 139 | - rabbitmq 140 | - webserver 141 | env_file: 142 | - ./airflow/.env 143 | - ./airflow/.secrets.env 144 | command: scheduler 145 | volumes: 146 | - airflow-home:/airflow 147 | - dags:/dags 148 | - flow-toolz:/flow_toolz 149 | - aws-credentials:/root/.aws 150 | secrets: 151 | - service-account-json 152 | - aws-credentials 153 | deploy: 154 | placement: 155 | constraints: 156 | - node.role == manager 157 | 158 | # executes airflow DAGs 159 | worker: 160 | image: knowsuchagency/airflow-base 161 | depends_on: 162 | - scheduler 163 | env_file: 164 | - ./airflow/.env 165 | - ./airflow/.secrets.env 166 | command: worker 167 | volumes: 168 | - airflow-home:/airflow 169 | - dags:/dags 170 | - flow-toolz:/flow_toolz 171 | - aws-credentials:/root/.aws 172 | secrets: 173 | - service-account-json 174 | - aws-credentials 175 | deploy: 176 | replicas: 3 177 | placement: 178 | preferences: 179 | - spread: node.role == worker 180 | 181 | volumes: 182 | airflow-home: 183 | influx: 184 | grafana: 185 | aws-credentials: 186 | dags: 187 | driver: local 188 | driver_opts: 189 | type: none 190 | device: "${PWD}/airflow/dags" 191 | o: bind 192 | flow-toolz: 193 | driver: local 194 | driver_opts: 195 | type: none 196 | device: "${PWD}/airflow/flow_toolz" 197 | o: bind 198 | 199 | secrets: 200 | service-account-json: 201 | file: ./default-service-account.json 202 | aws-credentials: 203 | file: ./aws-credentials.ini 204 | tls-certificate: 205 | file: ./reverse-proxy/certificate.crt 206 | tls-key: 207 | file: ./reverse-proxy/key.key 208 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Airflow 2 | 3 | This is meant as a template for getting up-and running with 4 | apache airflow quickly using docker compose for local development 5 | and docker swarm on Google Cloud for deployment. 6 | 7 | What this is meant to do is help you establish a baseline deployment/development 8 | environment with sane defaults. 9 | 10 | There are many things that could be improved, but this should get you 11 | up-and-running quickly with some good patterns. 12 | 13 | ## Some of the features 14 | * [invoke] for orchestration and configuration 15 | * [traefik] as edge proxy 16 | * [grafana] as a metrics front-end for your cluster 17 | * `pip install`able `flow_toolz` package for library code 18 | * a recipe for creating new dags that can easily be extended `inv new-dag` 19 | 20 | 21 | # Requirements 22 | 23 | * docker `brew cask install docker-edge` 24 | * python3 `brew install python` 25 | 26 | ## Quickstart 27 | ```bash 28 | # create a virtual environment 29 | python3 -m venv venv 30 | # activate virtual environment 31 | . venv/bin/activate 32 | # install the flow_toolz package 33 | pip install 'airflow/[dev]' 34 | # generate self-signed tls cert and other filestubs 35 | inv bootstrap 36 | # bring up the server for local development 37 | docker-compose up 38 | ``` 39 | 40 | # Authentication 41 | 42 | 43 | You'll need to create two files at the project root for the purposes of authentication. 44 | **They can be empty at first**, just to get the server running, 45 | since docker-compose will expect them to exist. 46 | 47 | * `aws-credentials.ini` 48 | * `default-service-account.json` 49 | 50 | ## AWS 51 | 52 | `aws-credentials.ini` 53 | ```.ini 54 | [default] 55 | aws_access_key_id = 56 | aws_secret_access_key = 57 | ``` 58 | 59 | ## GCP 60 | `default-service-account.json` 61 | 62 | The `default-service-account.json` [service account key] at the project root will be used 63 | to authenticate with Google cloud by default. 64 | 65 | --- 66 | 67 | ## TLS 68 | 69 | In the [reverse-proxy](./reverse-proxy) folder, you will need 70 | a `certificate.crt` and `key.key` file that you can generate 71 | with the `inv create-certificate` command. 72 | 73 | This is really here just to get you started, you'll want to configure 74 | traefik to use [letsencrypt] or other means to establish HTTPS on your 75 | production deployment. 76 | 77 | ## Other 78 | 79 | For other string-based secrets, you'll need a `.secrets.env`[./airflow/.secrets.env] i.e.: 80 | ``` 81 | AIRFLOW_CONN_POSTGRES_MASTER={{password}} 82 | ``` 83 | 84 | In general: 85 | 86 | Authentication **strings** should be a [the secrets file](./airflow/.secrets.env) 87 | 88 | Authentication **files** should be set as a docker secret in the [docker compose file](docker-compose.yaml) 89 | 90 | Secrets SHOULD NOT be checked into version control. 91 | 92 | # Local Development 93 | 94 | Initialize the development server (once you have the authentication files described earlier) 95 | 96 | ``docker-compose up`` 97 | 98 | Note: it may take some time for the docker images to build at first 99 | 100 | The airflow ui will now render to [localhost](localhost:80) 101 | 102 | The reverse proxy admin panel will be at [localhost:8080](localhost:8080) 103 | 104 | Grafana dashboard will be at [localhost:3000](localhost:3000) 105 | ``` 106 | user: admin 107 | pw: admin 108 | ``` 109 | 110 | DAGs, and libraries in the [airflow](airflow) folder will automatically 111 | be mounted onto the the services on your local deployment and updated on 112 | the running containers in real-time. 113 | 114 | ## Writing a new DAG 115 | 116 | There exists a [handy dag template](airflow/flow_toolz/templates/dag_template.py) 117 | for new dags. 118 | 119 | You can use this template to quickly write new dags by using the [task runner](tasks.py): 120 | ```bash 121 | # invoke the new-dag task 122 | # you will be prompted to provide parameters 123 | # such as `dag_id` and `owner` 124 | inv new-dag 125 | ``` 126 | 127 | # Library Code 128 | 129 | In the [airflow](./airflow) folder, there is a [flow_toolz](airflow/flow_toolz) directory. 130 | That directory is a Python package, meaning it can be `pip install`ed. 131 | 132 | Code that is shared between dags, or that you want to use outside of airflow (for testing/development) purposes 133 | should be put there. 134 | 135 | ``` 136 | . venv/bin/activate 137 | 138 | pip install -e './airflow' 139 | 140 | # in python, I can now 141 | 142 | import flow_toolz 143 | ... 144 | 145 | ``` 146 | 147 | # Configuration 148 | 149 | The infrastructure -- services and how they'll communicate -- 150 | are all described in `docker-compose.yaml` 151 | 152 | Cross-service configuration -- environment variables that will exist across different services/machines -- 153 | will be in either a `.env` file or `.secrets.env` -- the latter for sensitive information that 154 | should not exist in version control. 155 | 156 | You'll notice some of these environment variables follow the pattern `AIRFLOW__{foo}__{bar}`. 157 | 158 | That tells airflow to configure itself with those variables as opposed to their analog in its default config file. 159 | More information on how Airflow reads configuration can be found [at this link](https://airflow.apache.org/howto/set-config.html) 160 | 161 | For configuration related to automated cli tasks [executed via invoke](http://www.pyinvoke.org/), 162 | those are in `invoke.yaml` files and can be overridden by environment variables as well. 163 | For more information on how `invoke` configuration works, [follow this link](http://docs.pyinvoke.org/en/0.11.1/concepts/configuration.html). 164 | 165 | # Deployment 166 | 167 | ### Create the swarm (spins up machines on GCP) 168 | ```bash 169 | inv swarm-up 170 | ``` 171 | 172 | ### Deploy to our swarm 173 | ```bash 174 | inv deploy --prod 175 | ``` 176 | 177 | ## Notes 178 | 179 | * you'll want to change the names of the images in the docker-compose file for your own deployment 180 | * invoke tasks that make use of google cloud i.e. `inv deploy` will expect 181 | a `project` element in the configuration. I have this set in my `/etc/invoke.yaml` 182 | 183 | Here's an example: 184 | ```yaml 185 | gcp: 186 | project: myproject 187 | ``` 188 | 189 | You'll likely also want to change your default host bind **ip** in Docker for Mac 190 | 191 | ![Imgur](https://i.imgur.com/Vu8vgqA.png) 192 | 193 | [invoke]:http://www.pyinvoke.org/ 194 | [traefik]:https://traefik.io/ 195 | [grafana]:https://grafana.com/ 196 | [letsencrypt]:https://letsencrypt.org/ 197 | [service account key]:https://cloud.google.com/iam/docs/creating-managing-service-account-keys 198 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Stephan Fitzpatrick 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from concurrent import futures 2 | from functools import partial 3 | from pprint import pprint 4 | from pathlib import Path 5 | import subprocess as sp 6 | import itertools as it 7 | import datetime as dt 8 | import atexit 9 | import os 10 | import re 11 | 12 | from importlib_resources import read_text 13 | 14 | from invoke import task 15 | 16 | from jinja2 import Template 17 | 18 | 19 | shell = partial(sp.run, stdin=sp.PIPE, stdout=sp.PIPE, universal_newlines=True) 20 | 21 | 22 | @task 23 | def swarm_up(c): 24 | """ 25 | Create a docker swarm on google cloud. 26 | """ 27 | 28 | project = c.config.gcp.project 29 | zone = c.config.gcp.zone 30 | machine_type = c.config.gcp.machine_type 31 | managers = c.config.gcp.managers 32 | workers = c.config.gcp.workers 33 | 34 | machines_desired = tuple( 35 | it.chain( 36 | it.product(("manager",), range(managers)), 37 | it.product(("worker",), range(workers)), 38 | ) 39 | ) 40 | 41 | with futures.ThreadPoolExecutor() as ex: 42 | 43 | for role, number in machines_desired: 44 | 45 | ex.submit( 46 | c.run, 47 | f""" 48 | docker-machine create \ 49 | --driver google \ 50 | --google-project {project} \ 51 | --google-zone {zone} \ 52 | --google-machine-type {machine_type} \ 53 | --google-tags docker \ 54 | swarm-{role}-{number} 55 | """, 56 | warn=True, 57 | ) 58 | 59 | for role, number in machines_desired: 60 | 61 | machine_name = f"swarm-{role}-{number}" 62 | 63 | if role == "manager" and number == 0: 64 | 65 | manager_name = machine_name 66 | 67 | manager_ip = c.run( 68 | f""" 69 | gcloud compute instances describe \ 70 | --project {project} \ 71 | --zone {zone} \ 72 | --format 'value(networkInterfaces[0].networkIP)' \ 73 | {machine_name} 74 | """ 75 | ).stdout.strip() 76 | 77 | c.run( 78 | f""" 79 | docker-machine ssh {machine_name} sudo docker swarm init \ 80 | --advertise-addr {manager_ip} 81 | """, 82 | warn=True, 83 | ) 84 | elif role == "manager": 85 | manager_token = c.run( 86 | f"docker-machine ssh {manager_name} sudo docker swarm join-token manager | grep token |" 87 | + " awk '{ print $5 }'" 88 | ).stdout.strip() 89 | 90 | c.run( 91 | f""" 92 | docker-machine ssh {machine_name} sudo docker swarm join \ 93 | --token {manager_token} \ 94 | {manager_ip}:2377 95 | """, 96 | warn=True, 97 | ) 98 | else: 99 | worker_token = c.run( 100 | f"docker-machine ssh {manager_name}" 101 | + " sudo docker swarm join-token worker | grep token | awk '{ print $5 }'" 102 | ).stdout.strip() 103 | 104 | c.run( 105 | f""" 106 | docker-machine ssh {machine_name} sudo docker swarm join \ 107 | --token {worker_token} \ 108 | {manager_ip}:2377 109 | """, 110 | warn=True, 111 | ) 112 | 113 | 114 | @task 115 | def swarm_down(c): 116 | """Take the swarm workers down.""" 117 | configure_prod_or_local(c, prod=False) 118 | with futures.ThreadPoolExecutor() as ex: 119 | for line in c.run("docker-machine ls", hide=True).stdout.splitlines()[ 120 | 1: 121 | ]: 122 | if not any( 123 | line.startswith(n) for n in ("swarm-manager", "swarm-worker") 124 | ): 125 | continue 126 | name, *_ = line.split() 127 | ex.submit(c.run, f"docker-machine rm -f {name}", warn=True) 128 | 129 | 130 | @task 131 | def rebuild(c): 132 | """Rebuild and push to remote repository.""" 133 | c.run("docker-compose build") 134 | c.run("docker-compose push", pty=True) 135 | 136 | 137 | @task(aliases=["up"]) 138 | def deploy(c, rebuild_=False, stack=False, prod=False, ngrok=False): 139 | """ 140 | Deploy the airflow instance. 141 | 142 | Args: 143 | c: invoke context 144 | rebuild_: rebuild the images prior to deployment 145 | stack: use docker swarm mode 146 | prod: deploy to production 147 | ngrok: deploy locally, but expose to internet via ngrok 148 | 149 | """ 150 | configure_prod_or_local(c, prod) 151 | if ngrok: 152 | if rebuild_: 153 | rebuild(c) 154 | atexit.register(c.run, "docker-compose down") 155 | c.run("docker-compose up -d") 156 | c.run("ngrok http 8080", pty=True) 157 | elif prod or stack: 158 | if prod or rebuild_: 159 | rebuild(c) 160 | c.run( 161 | f"docker stack deploy -c docker-compose.yaml -c docker-compose.prod.yaml {c.config.stack_name}" 162 | ) 163 | else: 164 | if rebuild_: 165 | rebuild(c) 166 | c.run(f"docker-compose up") 167 | 168 | 169 | @task(aliases=["down"]) 170 | def undeploy(c, prod=False): 171 | """Tear down the code that's deployed.""" 172 | configure_prod_or_local(c, prod) 173 | c.run(f"docker stack remove {c.config.stack_name}") 174 | 175 | 176 | @task 177 | def status(c, prod=False): 178 | """View the status of the deployed services.""" 179 | configure_prod_or_local(c, prod) 180 | c.run(f"docker service ls") 181 | 182 | 183 | @task(aliases=["add-ingress"]) 184 | def expose_thyself( 185 | c, name=None, rules="tcp:80,tcp:443", service="reverse-proxy" 186 | ): 187 | """Expose our app to the outside world.""" 188 | 189 | name = c.config.stack_name + "-ingress" if name is None else name 190 | 191 | c.run( 192 | f""" 193 | gcloud compute firewall-rules create '{name}' \ 194 | --project={c.config.gcp.project} \ 195 | --description='Allow ingress into our docker swarm for {service}' \ 196 | --direction=INGRESS \ 197 | --priority=1000 \ 198 | --network=default \ 199 | --action=ALLOW \ 200 | --rules={rules} \ 201 | --target-tags=docker-machine 202 | """ 203 | ) 204 | 205 | 206 | @task(aliases=["rm-ingress"]) 207 | def unexpose_thyself(c, name=None): 208 | """Unexpose our app to the outside world.""" 209 | 210 | name = c.config.stack_name + "-ingress" if name is not None else name 211 | 212 | c.run(f"gcloud compute firewall-rules delete {name}") 213 | 214 | 215 | @task 216 | def get_swarm_machine_env(c, machine="swarm-manager-0"): 217 | """Return a swarm machine's docker env vars as a dict.""" 218 | result = {} 219 | output = shell(["docker-machine", "env", machine]).stdout 220 | for line in output.splitlines(): 221 | if "=" in line: 222 | key, value = line.split("=") 223 | *_, key = key.split() 224 | value = value.strip('"') 225 | result[key] = value 226 | pprint(result) 227 | return result 228 | 229 | 230 | def configure_prod_or_local(c, prod=False): 231 | """Configure the execution environment based on whether we're deploying locally or to prod.""" 232 | env_vars = ( 233 | "DOCKER_TLS_VERIFY", 234 | "DOCKER_HOST", 235 | "DOCKER_CERT_PATH", 236 | "DOCKER_MACHINE_NAME", 237 | ) 238 | 239 | if prod: 240 | c.config.run.update({"env": get_swarm_machine_env(c)}) 241 | else: 242 | for var in (v for v in env_vars if v in os.environ): 243 | os.environ.pop(var) 244 | 245 | 246 | @task 247 | def encrypt( 248 | c, source, key=None, destination=None, location=None, keyring=None 249 | ): 250 | """ 251 | Encrypt a file using google cloud kms. 252 | 253 | Args: 254 | source: The source file to be encrypted 255 | key: The name of the key 256 | destination: The file that will be created by encryption 257 | location: gcp zone 258 | keyring: gcp kms keyring 259 | """ 260 | destination = destination if destination is not None else source + ".enc" 261 | location = location if location is not None else c.config.gcp.kms.location 262 | keyring = keyring if keyring is not None else c.config.gcp.kms.keyring 263 | key = key if key is not None else c.config.gcp.kms.key 264 | 265 | c.run( 266 | f"gcloud kms encrypt --key={key} --location={location} --keyring={keyring} " 267 | f"--plaintext-file={source} --ciphertext-file={destination}" 268 | ) 269 | 270 | 271 | @task 272 | def encrypt_files(c): 273 | """ 274 | Encrypt files in invoke.yaml encrypt.files 275 | """ 276 | 277 | for f in c.config.encrypt.files: 278 | encrypt(c, Path(f).__fspath__()) 279 | 280 | 281 | @task(aliases=["gen-cert"]) 282 | def create_certificate(c, days=365): 283 | """ 284 | Generate a TLS certificate and key pair. 285 | 286 | Args: 287 | c: invoke context 288 | days: the number of days till your certificate expires 289 | 290 | """ 291 | c.run( 292 | f"openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout " 293 | f"reverse-proxy/key.key -out reverse-proxy/certificate.crt" 294 | ) 295 | 296 | 297 | @task 298 | def new_dag( 299 | c, 300 | dag_id=None, 301 | owner=None, 302 | email=None, 303 | start_date=None, 304 | schedule_interval=None, 305 | force=False, 306 | ): 307 | """ 308 | Render a new dag and put it in the dags folder. 309 | 310 | Args: 311 | c: invoke context 312 | dag_id: i.e. my_dag_v1_p3 (dag_name, version, priority[1-high, 2-med, 3-low]) 313 | owner: you 314 | email: your email 315 | start_date: date in iso format 316 | schedule_interval: cron expression 317 | force: overwrite dag module if it exists 318 | 319 | """ 320 | 321 | yesterday = dt.date.today() - dt.timedelta(days=1) 322 | # v - version 323 | # p - priority (1, 2, 3) == (high, medium, low) 324 | defaults = { 325 | "dag_id": "example_dag_v1_p3", 326 | "owner": "Stephan Fitzpatrick", 327 | "email": "knowsuchagency@gmail.com", 328 | "start_date": yesterday.isoformat(), 329 | "schedule_interval": "0 7 * * *", 330 | } 331 | 332 | template_text = read_text("flow_toolz.templates", "dag_template.py") 333 | 334 | template = Template(template_text) 335 | 336 | args = {} 337 | 338 | locals_ = locals() 339 | 340 | print( 341 | "rendering your new dag. please enter the following values:", 342 | end=os.linesep * 2, 343 | ) 344 | 345 | for key, default_value in defaults.items(): 346 | 347 | explicit_value = locals_[key] 348 | 349 | if explicit_value: 350 | args[key] = explicit_value 351 | else: 352 | value = input(f"{key} (default: {default_value}) -> ").strip() 353 | 354 | args[key] = value or defaults[key] 355 | 356 | rendered_text = template.render(**args) 357 | 358 | print() 359 | 360 | filename = re.sub(r"_v[^.]+", "", args["dag_id"], re.IGNORECASE) + ".py" 361 | 362 | dag_path = Path("airflow", "dags", filename) 363 | 364 | if dag_path.exists() and not force: 365 | raise SystemExit(f"{filename} already exists. aborting") 366 | 367 | print(f"writing dag to: {dag_path}") 368 | 369 | dag_path.write_text(rendered_text + os.linesep) 370 | 371 | 372 | @task 373 | def connect(c): 374 | """Connect to airflow deployment.""" 375 | manager_ip = c.run("docker-machine ip swarm-manager-0").stdout 376 | c.run(f"open http://{manager_ip}") 377 | 378 | 379 | @task(aliases=["bootstrap"]) 380 | def generate_auth_files(c): 381 | """Generate ssl keys and other file stubs for authentication purposes.""" 382 | create_certificate(c) 383 | 384 | for filename in c.config.encrypt.files: 385 | filepath = Path(filename) 386 | if not filepath.exists(): 387 | filepath.touch() 388 | -------------------------------------------------------------------------------- /grafana/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_INFLUX", 5 | "label": "influx", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "influxdb", 9 | "pluginName": "InfluxDB" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "4.2.0" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "influxdb", 28 | "name": "InfluxDB", 29 | "version": "1.0.0" 30 | } 31 | ], 32 | "annotations": { 33 | "list": [] 34 | }, 35 | "editable": true, 36 | "gnetId": null, 37 | "graphTooltip": 0, 38 | "hideControls": false, 39 | "id": 2, 40 | "links": [], 41 | "refresh": "5s", 42 | "rows": [ 43 | { 44 | "collapse": false, 45 | "height": 250, 46 | "panels": [ 47 | { 48 | "aliasColors": {}, 49 | "bars": false, 50 | "dashLength": 10, 51 | "dashes": false, 52 | "datasource": "influx", 53 | "fill": 1, 54 | "id": 1, 55 | "interval": "", 56 | "legend": { 57 | "alignAsTable": true, 58 | "avg": true, 59 | "current": true, 60 | "max": true, 61 | "min": true, 62 | "rightSide": false, 63 | "show": true, 64 | "total": true, 65 | "values": true 66 | }, 67 | "lines": true, 68 | "linewidth": 1, 69 | "links": [], 70 | "nullPointMode": "connected", 71 | "percentage": false, 72 | "pointradius": 5, 73 | "points": false, 74 | "renderer": "flot", 75 | "seriesOverrides": [], 76 | "spaceLength": 10, 77 | "span": 6, 78 | "stack": false, 79 | "steppedLine": false, 80 | "targets": [ 81 | { 82 | "alias": "Memory {host: $tag_machine, container: $tag_container_name}", 83 | "dsType": "influxdb", 84 | "groupBy": [ 85 | { 86 | "params": [ 87 | "$__interval" 88 | ], 89 | "type": "time" 90 | }, 91 | { 92 | "params": [ 93 | "machine" 94 | ], 95 | "type": "tag" 96 | }, 97 | { 98 | "params": [ 99 | "container_name" 100 | ], 101 | "type": "tag" 102 | } 103 | ], 104 | "measurement": "memory_usage", 105 | "orderByTime": "ASC", 106 | "policy": "default", 107 | "query": "SELECT \"value\" FROM \"memory_usage\" WHERE \"container_name\" =~ /^$container$/ AND \"machine\" =~ /^$host$/ AND $timeFilter", 108 | "rawQuery": false, 109 | "refId": "A", 110 | "resultFormat": "time_series", 111 | "select": [ 112 | [ 113 | { 114 | "params": [ 115 | "value" 116 | ], 117 | "type": "field" 118 | }, 119 | { 120 | "params": [], 121 | "type": "mean" 122 | } 123 | ] 124 | ], 125 | "tags": [ 126 | { 127 | "key": "container_name", 128 | "operator": "=~", 129 | "value": "/^$container$*/" 130 | }, 131 | { 132 | "condition": "AND", 133 | "key": "machine", 134 | "operator": "=~", 135 | "value": "/^$host$/" 136 | } 137 | ] 138 | } 139 | ], 140 | "thresholds": [], 141 | "timeFrom": null, 142 | "timeShift": null, 143 | "title": "Memory", 144 | "tooltip": { 145 | "shared": true, 146 | "sort": 0, 147 | "value_type": "individual" 148 | }, 149 | "type": "graph", 150 | "xaxis": { 151 | "buckets": null, 152 | "mode": "time", 153 | "name": null, 154 | "show": true, 155 | "values": [] 156 | }, 157 | "yaxes": [ 158 | { 159 | "format": "decbytes", 160 | "label": null, 161 | "logBase": 1, 162 | "max": null, 163 | "min": null, 164 | "show": true 165 | }, 166 | { 167 | "format": "short", 168 | "label": null, 169 | "logBase": 1, 170 | "max": null, 171 | "min": null, 172 | "show": true 173 | } 174 | ] 175 | }, 176 | { 177 | "aliasColors": {}, 178 | "bars": false, 179 | "dashLength": 10, 180 | "dashes": false, 181 | "datasource": "influx", 182 | "fill": 1, 183 | "id": 2, 184 | "legend": { 185 | "alignAsTable": true, 186 | "avg": true, 187 | "current": true, 188 | "max": true, 189 | "min": true, 190 | "show": true, 191 | "total": true, 192 | "values": true 193 | }, 194 | "lines": true, 195 | "linewidth": 1, 196 | "links": [], 197 | "nullPointMode": "connected", 198 | "percentage": false, 199 | "pointradius": 5, 200 | "points": false, 201 | "renderer": "flot", 202 | "seriesOverrides": [], 203 | "spaceLength": 10, 204 | "span": 6, 205 | "stack": false, 206 | "steppedLine": false, 207 | "targets": [ 208 | { 209 | "alias": "CPU {host: $tag_machine, container: $tag_container_name}", 210 | "dsType": "influxdb", 211 | "groupBy": [ 212 | { 213 | "params": [ 214 | "$interval" 215 | ], 216 | "type": "time" 217 | }, 218 | { 219 | "params": [ 220 | "machine" 221 | ], 222 | "type": "tag" 223 | }, 224 | { 225 | "params": [ 226 | "container_name" 227 | ], 228 | "type": "tag" 229 | } 230 | ], 231 | "measurement": "cpu_usage_total", 232 | "orderByTime": "ASC", 233 | "policy": "default", 234 | "refId": "A", 235 | "resultFormat": "time_series", 236 | "select": [ 237 | [ 238 | { 239 | "params": [ 240 | "value" 241 | ], 242 | "type": "field" 243 | }, 244 | { 245 | "params": [], 246 | "type": "mean" 247 | }, 248 | { 249 | "params": [ 250 | "10s" 251 | ], 252 | "type": "derivative" 253 | } 254 | ] 255 | ], 256 | "tags": [ 257 | { 258 | "key": "container_name", 259 | "operator": "=~", 260 | "value": "/^$container$*/" 261 | }, 262 | { 263 | "condition": "AND", 264 | "key": "machine", 265 | "operator": "=~", 266 | "value": "/^$host$/" 267 | } 268 | ] 269 | } 270 | ], 271 | "thresholds": [], 272 | "timeFrom": null, 273 | "timeShift": null, 274 | "title": "CPU", 275 | "tooltip": { 276 | "shared": true, 277 | "sort": 0, 278 | "value_type": "individual" 279 | }, 280 | "type": "graph", 281 | "xaxis": { 282 | "buckets": null, 283 | "mode": "time", 284 | "name": null, 285 | "show": true, 286 | "values": [] 287 | }, 288 | "yaxes": [ 289 | { 290 | "format": "hertz", 291 | "label": null, 292 | "logBase": 1, 293 | "max": null, 294 | "min": null, 295 | "show": true 296 | }, 297 | { 298 | "format": "short", 299 | "label": null, 300 | "logBase": 1, 301 | "max": null, 302 | "min": null, 303 | "show": true 304 | } 305 | ] 306 | } 307 | ], 308 | "repeat": null, 309 | "repeatIteration": null, 310 | "repeatRowId": null, 311 | "showTitle": false, 312 | "title": "Dashboard Row", 313 | "titleSize": "h6" 314 | }, 315 | { 316 | "collapse": false, 317 | "height": 250, 318 | "panels": [ 319 | { 320 | "aliasColors": {}, 321 | "bars": false, 322 | "dashLength": 10, 323 | "dashes": false, 324 | "datasource": "influx", 325 | "fill": 1, 326 | "id": 3, 327 | "legend": { 328 | "alignAsTable": true, 329 | "avg": true, 330 | "current": true, 331 | "max": true, 332 | "min": true, 333 | "show": true, 334 | "total": true, 335 | "values": true 336 | }, 337 | "lines": true, 338 | "linewidth": 1, 339 | "links": [], 340 | "nullPointMode": "connected", 341 | "percentage": false, 342 | "pointradius": 5, 343 | "points": false, 344 | "renderer": "flot", 345 | "seriesOverrides": [], 346 | "spaceLength": 10, 347 | "span": 6, 348 | "stack": false, 349 | "steppedLine": false, 350 | "targets": [ 351 | { 352 | "alias": "Usage {host: $tag_machine, container: $tag_container_name}", 353 | "dsType": "influxdb", 354 | "groupBy": [ 355 | { 356 | "params": [ 357 | "$interval" 358 | ], 359 | "type": "time" 360 | }, 361 | { 362 | "params": [ 363 | "container_name" 364 | ], 365 | "type": "tag" 366 | }, 367 | { 368 | "params": [ 369 | "machine" 370 | ], 371 | "type": "tag" 372 | } 373 | ], 374 | "measurement": "fs_usage", 375 | "orderByTime": "ASC", 376 | "policy": "default", 377 | "refId": "A", 378 | "resultFormat": "time_series", 379 | "select": [ 380 | [ 381 | { 382 | "params": [ 383 | "value" 384 | ], 385 | "type": "field" 386 | }, 387 | { 388 | "params": [], 389 | "type": "mean" 390 | } 391 | ] 392 | ], 393 | "tags": [ 394 | { 395 | "key": "machine", 396 | "operator": "=~", 397 | "value": "/^$host$/" 398 | }, 399 | { 400 | "condition": "AND", 401 | "key": "container_name", 402 | "operator": "=~", 403 | "value": "/^$container$*/" 404 | } 405 | ] 406 | }, 407 | { 408 | "alias": "Limit {host: $tag_machine, container: $tag_container_name}", 409 | "dsType": "influxdb", 410 | "groupBy": [ 411 | { 412 | "params": [ 413 | "$interval" 414 | ], 415 | "type": "time" 416 | }, 417 | { 418 | "params": [ 419 | "container_name" 420 | ], 421 | "type": "tag" 422 | }, 423 | { 424 | "params": [ 425 | "machine" 426 | ], 427 | "type": "tag" 428 | } 429 | ], 430 | "measurement": "fs_limit", 431 | "orderByTime": "ASC", 432 | "policy": "default", 433 | "refId": "B", 434 | "resultFormat": "time_series", 435 | "select": [ 436 | [ 437 | { 438 | "params": [ 439 | "value" 440 | ], 441 | "type": "field" 442 | }, 443 | { 444 | "params": [], 445 | "type": "mean" 446 | } 447 | ] 448 | ], 449 | "tags": [ 450 | { 451 | "key": "machine", 452 | "operator": "=~", 453 | "value": "/^$host$/" 454 | }, 455 | { 456 | "condition": "AND", 457 | "key": "container_name", 458 | "operator": "=~", 459 | "value": "/^$container$*/" 460 | } 461 | ] 462 | } 463 | ], 464 | "thresholds": [], 465 | "timeFrom": null, 466 | "timeShift": null, 467 | "title": "File System", 468 | "tooltip": { 469 | "shared": true, 470 | "sort": 0, 471 | "value_type": "individual" 472 | }, 473 | "type": "graph", 474 | "xaxis": { 475 | "buckets": null, 476 | "mode": "time", 477 | "name": null, 478 | "show": true, 479 | "values": [] 480 | }, 481 | "yaxes": [ 482 | { 483 | "format": "decbytes", 484 | "label": null, 485 | "logBase": 1, 486 | "max": null, 487 | "min": null, 488 | "show": true 489 | }, 490 | { 491 | "format": "short", 492 | "label": null, 493 | "logBase": 1, 494 | "max": null, 495 | "min": null, 496 | "show": true 497 | } 498 | ] 499 | }, 500 | { 501 | "aliasColors": {}, 502 | "bars": false, 503 | "dashLength": 10, 504 | "dashes": false, 505 | "datasource": "influx", 506 | "fill": 1, 507 | "id": 4, 508 | "legend": { 509 | "alignAsTable": true, 510 | "avg": true, 511 | "current": true, 512 | "max": true, 513 | "min": true, 514 | "show": true, 515 | "total": true, 516 | "values": true 517 | }, 518 | "lines": true, 519 | "linewidth": 1, 520 | "links": [], 521 | "nullPointMode": "connected", 522 | "percentage": false, 523 | "pointradius": 5, 524 | "points": false, 525 | "renderer": "flot", 526 | "seriesOverrides": [], 527 | "spaceLength": 10, 528 | "span": 6, 529 | "stack": false, 530 | "steppedLine": false, 531 | "targets": [ 532 | { 533 | "alias": "RX {host: $tag_machine, container: $tag_container_name}", 534 | "dsType": "influxdb", 535 | "groupBy": [ 536 | { 537 | "params": [ 538 | "$interval" 539 | ], 540 | "type": "time" 541 | }, 542 | { 543 | "params": [ 544 | "container_name" 545 | ], 546 | "type": "tag" 547 | }, 548 | { 549 | "params": [ 550 | "machine" 551 | ], 552 | "type": "tag" 553 | } 554 | ], 555 | "measurement": "rx_bytes", 556 | "orderByTime": "ASC", 557 | "policy": "default", 558 | "refId": "A", 559 | "resultFormat": "time_series", 560 | "select": [ 561 | [ 562 | { 563 | "params": [ 564 | "value" 565 | ], 566 | "type": "field" 567 | }, 568 | { 569 | "params": [], 570 | "type": "mean" 571 | }, 572 | { 573 | "params": [ 574 | "10s" 575 | ], 576 | "type": "derivative" 577 | } 578 | ] 579 | ], 580 | "tags": [ 581 | { 582 | "key": "machine", 583 | "operator": "=~", 584 | "value": "/^$host$/" 585 | }, 586 | { 587 | "condition": "AND", 588 | "key": "container_name", 589 | "operator": "=~", 590 | "value": "/^$container$*/" 591 | } 592 | ] 593 | }, 594 | { 595 | "alias": "TX {host: $tag_machine, container: $tag_container_name}", 596 | "dsType": "influxdb", 597 | "groupBy": [ 598 | { 599 | "params": [ 600 | "$interval" 601 | ], 602 | "type": "time" 603 | }, 604 | { 605 | "params": [ 606 | "container_name" 607 | ], 608 | "type": "tag" 609 | }, 610 | { 611 | "params": [ 612 | "machine" 613 | ], 614 | "type": "tag" 615 | } 616 | ], 617 | "measurement": "tx_bytes", 618 | "orderByTime": "ASC", 619 | "policy": "default", 620 | "refId": "B", 621 | "resultFormat": "time_series", 622 | "select": [ 623 | [ 624 | { 625 | "params": [ 626 | "value" 627 | ], 628 | "type": "field" 629 | }, 630 | { 631 | "params": [], 632 | "type": "mean" 633 | }, 634 | { 635 | "params": [ 636 | "10s" 637 | ], 638 | "type": "derivative" 639 | } 640 | ] 641 | ], 642 | "tags": [ 643 | { 644 | "key": "machine", 645 | "operator": "=~", 646 | "value": "/^$host$/" 647 | }, 648 | { 649 | "condition": "AND", 650 | "key": "container_name", 651 | "operator": "=~", 652 | "value": "/^$container$*/" 653 | } 654 | ] 655 | } 656 | ], 657 | "thresholds": [], 658 | "timeFrom": null, 659 | "timeShift": null, 660 | "title": "Network", 661 | "tooltip": { 662 | "shared": true, 663 | "sort": 0, 664 | "value_type": "individual" 665 | }, 666 | "type": "graph", 667 | "xaxis": { 668 | "buckets": null, 669 | "mode": "time", 670 | "name": null, 671 | "show": true, 672 | "values": [] 673 | }, 674 | "yaxes": [ 675 | { 676 | "format": "Bps", 677 | "label": null, 678 | "logBase": 1, 679 | "max": null, 680 | "min": null, 681 | "show": true 682 | }, 683 | { 684 | "format": "short", 685 | "label": null, 686 | "logBase": 1, 687 | "max": null, 688 | "min": null, 689 | "show": true 690 | } 691 | ] 692 | } 693 | ], 694 | "repeat": null, 695 | "repeatIteration": null, 696 | "repeatRowId": null, 697 | "showTitle": false, 698 | "title": "Dashboard Row", 699 | "titleSize": "h6" 700 | } 701 | ], 702 | "schemaVersion": 14, 703 | "style": "dark", 704 | "tags": [], 705 | "templating": { 706 | "list": [ 707 | { 708 | "allValue": "", 709 | "current": { 710 | "text": "backbox.corde.org", 711 | "value": "backbox.corde.org" 712 | }, 713 | "datasource": "influx", 714 | "hide": 0, 715 | "includeAll": true, 716 | "label": "Host", 717 | "multi": false, 718 | "name": "host", 719 | "options": [], 720 | "query": "show tag values with key = \"machine\"", 721 | "refresh": 1, 722 | "regex": "", 723 | "sort": 0, 724 | "tagValuesQuery": "", 725 | "tags": [], 726 | "tagsQuery": "", 727 | "type": "query", 728 | "useTags": false 729 | }, 730 | { 731 | "allValue": null, 732 | "current": { 733 | "text": "/", 734 | "value": "/" 735 | }, 736 | "datasource": "influx", 737 | "hide": 0, 738 | "includeAll": false, 739 | "label": "Container", 740 | "multi": false, 741 | "name": "container", 742 | "options": [], 743 | "query": "show tag values with key = \"container_name\" WHERE machine =~ /^$host$/", 744 | "refresh": 1, 745 | "regex": "/([^.]+)/", 746 | "sort": 0, 747 | "tagValuesQuery": "", 748 | "tags": [], 749 | "tagsQuery": "", 750 | "type": "query", 751 | "useTags": false 752 | } 753 | ] 754 | }, 755 | "time": { 756 | "from": "now-6h", 757 | "to": "now" 758 | }, 759 | "timepicker": { 760 | "refresh_intervals": [ 761 | "5s", 762 | "10s", 763 | "30s", 764 | "1m", 765 | "5m", 766 | "15m", 767 | "30m", 768 | "1h", 769 | "2h", 770 | "1d" 771 | ], 772 | "time_options": [ 773 | "5m", 774 | "15m", 775 | "1h", 776 | "6h", 777 | "12h", 778 | "24h", 779 | "2d", 780 | "7d", 781 | "30d" 782 | ] 783 | }, 784 | "timezone": "browser", 785 | "title": "cAdvisor", 786 | "version": 2 787 | } --------------------------------------------------------------------------------