├── .gitignore ├── version ├── docs ├── .gitignore ├── mkdocs.yml └── docs │ ├── index.md │ ├── terminator.md │ ├── frontend.md │ ├── tracing.md │ ├── statuses.md │ ├── timeouts.md │ ├── scaling.md │ ├── overview.md │ ├── quickstart.md │ └── deployment.md ├── fresh.conf ├── frontend ├── src │ ├── components │ │ ├── ActivationFormatter │ │ │ ├── ActivationFormatter.scss │ │ │ └── ActivationFormatter.jsx │ │ ├── Menu │ │ │ ├── Menu.scss │ │ │ └── Menu.jsx │ │ ├── QueueSelector │ │ │ ├── QueueSelector.scss │ │ │ └── QueueSelector.jsx │ │ ├── StatusSelector │ │ │ ├── StatusSelector.scss │ │ │ └── StatusSelector.jsx │ │ ├── Search │ │ │ ├── Search.scss │ │ │ └── Search.jsx │ │ ├── SearchBox │ │ │ ├── SearchBox.scss │ │ │ └── SearchBox.jsx │ │ ├── CommandLineFormatter │ │ │ ├── CommandLineFormatter.scss │ │ │ └── CommandLineFormatter.jsx │ │ ├── ImageFormatter │ │ │ ├── ImageFormatter.scss │ │ │ └── ImageFormatter.jsx │ │ ├── StatusFormatter │ │ │ ├── StatusFormatter.scss │ │ │ └── StatusFormatter.jsx │ │ ├── SectionLoader │ │ │ ├── SectionLoader.jsx │ │ │ └── SectionLoader.scss │ │ ├── JobQueueRowRenderer │ │ │ └── JobQueueRowRenderer.jsx │ │ ├── RowRenderer │ │ │ └── RowRenderer.jsx │ │ ├── DateTimeFormatter │ │ │ └── DateTimeFormatter.jsx │ │ ├── Terminal │ │ │ ├── Terminal.scss │ │ │ └── Terminal.jsx │ │ ├── JobLinkFormatter │ │ │ └── JobLinkFormatter.jsx │ │ ├── DurationFormatter │ │ │ └── DurationFormatter.jsx │ │ └── NameFormatter │ │ │ └── NameFormatter.jsx │ ├── containers │ │ └── LayoutContainer │ │ │ ├── LayoutContainer.scss │ │ │ └── LayoutContainer.jsx │ ├── utils │ │ ├── actionReducer.js │ │ ├── debounce.js │ │ └── getChartColor.js │ ├── pages │ │ ├── JobQueuesPage │ │ │ ├── JobQueuesPage.scss │ │ │ └── JobQueuesPage.jsx │ │ ├── JobPage │ │ │ └── JobPage.scss │ │ ├── JobsPage │ │ │ ├── JobsPage.scss │ │ │ └── JobsPage.jsx │ │ └── StatsPage │ │ │ └── StatsPage.scss │ ├── stores │ │ ├── layout.js │ │ ├── index.js │ │ ├── jobqueue.js │ │ └── status.js │ ├── index.jsx │ ├── index.scss │ └── api │ │ ├── api.js │ │ └── jobs.json ├── public │ ├── favicon.ico │ ├── manifest.json │ └── index.html ├── README.md ├── deploy.sh ├── Dockerfile ├── .gitignore ├── .babelrc ├── .editorconfig ├── package.json ├── .eslintrc └── webpack.config.js ├── screenshot.png ├── migrations ├── 00010_add_job_status_index.sql ├── 00005_add_exitcode.sql ├── 00006_add_log_stream_name.sql ├── 00003_add_status_reason.sql ├── 00004_add_run_started_at_column.sql ├── 00020_bigger_job_id.sql ├── 00017_add_job_queue_timestamp_index_to_jobs.sql ├── 00023_add_array_properties_column.sql ├── 00012_add_activated_job_queues.sql ├── 00011_add_kill_requested_column_to_jobs.sql ├── 00019_add_forced_scaling_column.sql ├── 00009_add_indexes_to_event_logs.sql ├── 00022_reduced_trigram_index.sql ├── 00007_add_compute_environment_event_log.sql ├── 00008_add_job_summary_event_log.sql ├── 00013_add_task_arn_instance_id_table.sql ├── 00021_single_trigram_index.sql ├── 00001_jobs.sql ├── 00015_pg_trgm_gin_indexes.sql ├── 00014_add_instance_id_activity.sql ├── 00018_job_status_events.sql ├── 00016_revert_jobs_full_text_search.sql └── 00002_jobs_full_text_search.sql ├── test.toml ├── jobs ├── jobs_test.go ├── timeout_killer.go ├── killer_handler.go ├── compute_environment_monitor.go ├── scaler.go ├── jobs.go └── monitor_ecs_clusters.go ├── logentries.go ├── batchiepatchie-dockercompose-config.toml ├── CONTRIBUTORS.md ├── Dockerfile ├── README.md ├── LICENSE ├── envsubstituter ├── envsubstituter_test.go └── envsubstituter.go ├── docker_run.sh ├── docker-compose.yml ├── fetcher └── fetcher.go ├── awsclients └── awsclients.go ├── go.mod ├── handlers ├── job_status_subscriptions.go └── job_status_notification.go ├── config └── config.go └── batchiepatchie.go /.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | -------------------------------------------------------------------------------- /version: -------------------------------------------------------------------------------- 1 | noversion 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | site/ 2 | -------------------------------------------------------------------------------- /fresh.conf: -------------------------------------------------------------------------------- 1 | ignored: frontend 2 | -------------------------------------------------------------------------------- /frontend/src/components/ActivationFormatter/ActivationFormatter.scss: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdRoll/batchiepatchie/HEAD/screenshot.png -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdRoll/batchiepatchie/HEAD/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/src/components/Menu/Menu.scss: -------------------------------------------------------------------------------- 1 | .menu { 2 | .nav-tabs { 3 | margin-bottom: 12px; 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /frontend/src/components/QueueSelector/QueueSelector.scss: -------------------------------------------------------------------------------- 1 | .queue-selector { 2 | float: left; 3 | margin-right: 12px; 4 | width: 400px; 5 | } 6 | -------------------------------------------------------------------------------- /frontend/src/components/StatusSelector/StatusSelector.scss: -------------------------------------------------------------------------------- 1 | .status-selector { 2 | float: left; 3 | margin-right: 12px; 4 | width: 250px; 5 | } 6 | -------------------------------------------------------------------------------- /frontend/src/containers/LayoutContainer/LayoutContainer.scss: -------------------------------------------------------------------------------- 1 | .layout-container { 2 | .search { 3 | margin-top: 12px; 4 | margin-bottom: -12px; 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # Batchiepatchie Frontend 2 | 3 | * `npm install` 4 | * `npm run dev` -- for dev server 5 | * `npm run build` -- for unminified build 6 | * `npm run build:dist` -- for minified build 7 | 8 | The frontend will also work with `yarn`: 9 | 10 | * `yarn dev` 11 | -------------------------------------------------------------------------------- /frontend/src/components/Search/Search.scss: -------------------------------------------------------------------------------- 1 | .search { 2 | .lds-spinner { 3 | float: right; 4 | margin-right: -24px; 5 | margin-top: 6px; 6 | } 7 | } 8 | 9 | .search-info { 10 | font-size: 16px; 11 | margin-left: 6px; 12 | align-self: center; 13 | } 14 | -------------------------------------------------------------------------------- /frontend/src/utils/actionReducer.js: -------------------------------------------------------------------------------- 1 | export default function actionReducer(ACTION_HANDLERS, initialState) { 2 | return function reducer(state = initialState, action) { 3 | const handler = ACTION_HANDLERS[action && action.type]; 4 | return handler ? handler(state, action) : state; 5 | }; 6 | } 7 | -------------------------------------------------------------------------------- /migrations/00010_add_job_status_index.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE INDEX jobs_status ON jobs (status); 4 | 5 | -- +goose Down 6 | -- SQL in this section is executed when the migration is rolled back. 7 | DROP INDEX jobs_status; 8 | 9 | -------------------------------------------------------------------------------- /migrations/00005_add_exitcode.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | ALTER TABLE jobs ADD COLUMN exitcode integer; 4 | 5 | -- +goose Down 6 | -- SQL in this section is executed when the migration is rolled back. 7 | ALTER TABLE jobs DROP COLUMN exitcode; 8 | 9 | -------------------------------------------------------------------------------- /test.toml: -------------------------------------------------------------------------------- 1 | host = "0.0.0.0" 2 | port = 5454 3 | region = "us-west-2" 4 | database_host = "127.0.0.1" 5 | database_port = 5432 6 | database_username = "postgres" 7 | database_name = "postgres" 8 | database_password = "123456" 9 | frontend_assets = "local" 10 | frontend_assets_local_prefix = "frontend/dist" 11 | -------------------------------------------------------------------------------- /frontend/src/components/SearchBox/SearchBox.scss: -------------------------------------------------------------------------------- 1 | .search-box { 2 | display: inline-block; 3 | margin-left: 12px; 4 | 5 | input { 6 | margin-left: 6px; 7 | margin-right: 6px; 8 | width: 200px; 9 | } 10 | 11 | .search-not-found { 12 | margin-left: 6px; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /migrations/00006_add_log_stream_name.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | ALTER TABLE jobs ADD COLUMN log_stream_name TEXT; 4 | 5 | -- +goose Down 6 | -- SQL in this section is executed when the migration is rolled back. 7 | ALTER TABLE jobs DROP COLUMN log_stream_name; 8 | 9 | -------------------------------------------------------------------------------- /frontend/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euxo pipefail 3 | 4 | # Set FRONTEND_S3_PREFIX before running this script. It determines where in S3 5 | # you want to put your static files. 6 | 7 | cd "$(dirname "$0")" 8 | npm run build:dist 9 | aws s3 sync --acl public-read /opt/frontend/dist ${FRONTEND_S3_PREFIX}/$VERSION 10 | -------------------------------------------------------------------------------- /frontend/src/components/CommandLineFormatter/CommandLineFormatter.scss: -------------------------------------------------------------------------------- 1 | .command-line-formatter { 2 | pre { 3 | margin: 0; 4 | padding: 0; 5 | background: none; 6 | border: none; 7 | overflow: hidden; 8 | white-space: nowrap; 9 | text-overflow: ellipsis; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /jobs/jobs_test.go: -------------------------------------------------------------------------------- 1 | package jobs_test 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | "github.com/AdRoll/batchiepatchie/jobs" 8 | ) 9 | 10 | func TestJobStruct(t *testing.T) { 11 | j := jobs.Job{ 12 | Id: "myId", 13 | } 14 | 15 | _, err := json.Marshal(j) 16 | if err != nil { 17 | t.Error(err) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /migrations/00003_add_status_reason.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | ALTER TABLE jobs ADD COLUMN status_reason TEXT; 5 | 6 | -- +goose Down 7 | -- SQL in this section is executed when the migration is rolled back. 8 | 9 | ALTER TABLE jobs DROP COLUMN status_reason; 10 | 11 | -------------------------------------------------------------------------------- /migrations/00004_add_run_started_at_column.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | ALTER TABLE jobs ADD COLUMN run_started_at timestamp with time zone; 4 | 5 | -- +goose Down 6 | -- SQL in this section is executed when the migration is rolled back. 7 | ALTER TABLE jobs DROP COLUMN run_started_at; 8 | 9 | -------------------------------------------------------------------------------- /migrations/00020_bigger_job_id.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | ALTER TABLE jobs ALTER COLUMN job_id TYPE VARCHAR(44); 5 | 6 | -- +goose Down 7 | -- SQL in this section is executed when the migration is rolled back. 8 | 9 | ALTER TABLE jobs ALTER COLUMN job_id TYPE VARCHAR(36); 10 | -------------------------------------------------------------------------------- /migrations/00017_add_job_queue_timestamp_index_to_jobs.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE INDEX job_queue_timestamp_jobs ON jobs (job_queue, last_updated); 4 | 5 | -- +goose Down 6 | -- SQL in this section is executed when the migration is rolled back. 7 | DROP INDEX job_queue_timestamp_jobs; 8 | -------------------------------------------------------------------------------- /migrations/00023_add_array_properties_column.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | ALTER TABLE jobs ADD COLUMN array_properties JSONB; 5 | 6 | -- +goose Down 7 | -- SQL in this section is executed when the migration is rolled back. 8 | 9 | ALTER TABLE jobs DROP COLUMN array_properties; 10 | -------------------------------------------------------------------------------- /migrations/00012_add_activated_job_queues.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE TABLE activated_job_queues ( 4 | job_queue TEXT NOT NULL PRIMARY KEY 5 | ); 6 | 7 | -- +goose Down 8 | -- SQL in this section is executed when the migration is rolled back. 9 | DROP TABLE activated_job_queues; 10 | 11 | -------------------------------------------------------------------------------- /migrations/00011_add_kill_requested_column_to_jobs.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | ALTER TABLE jobs ADD COLUMN termination_requested BOOLEAN NOT NULL DEFAULT 'f'; 4 | -- +goose Down 5 | -- SQL in this section is executed when the migration is rolled back. 6 | ALTER TABLE jobs DROP COLUMN termination_requested; 7 | 8 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:10.16.3-buster 2 | 3 | # AWS cli tools 4 | RUN apt-get update && apt-get install -y \ 5 | build-essential \ 6 | python \ 7 | python-dev \ 8 | python-pip 9 | RUN pip install PyYAML==5.3.1 10 | RUN pip install awscli 11 | 12 | # Copy and install frontend requirements 13 | COPY . /opt/frontend 14 | WORKDIR /opt/frontend 15 | RUN yarn 16 | -------------------------------------------------------------------------------- /frontend/src/components/ImageFormatter/ImageFormatter.scss: -------------------------------------------------------------------------------- 1 | .image-formatter { 2 | .ecr-label { 3 | position: absolute; 4 | top: 0px; 5 | left: calc(100% - 15px); 6 | font-weight: bold; 7 | font-size: 0.5em; 8 | margin: 0em; 9 | margin-top: -2px; 10 | padding: 0em; 11 | color: #aaa; 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "192x192", 8 | "type": "image/png" 9 | } 10 | ], 11 | "start_url": "./index.html", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | 23 | dist/ 24 | -------------------------------------------------------------------------------- /logentries.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | // forked version to fix go mod issue 5 | "github.com/jcftang/logentriesrus" 6 | log "github.com/sirupsen/logrus" 7 | ) 8 | 9 | func setUpLogEntriesHooks(host string, key string) { 10 | le, err := logentriesrus.NewLogentriesrusHook(host, key) 11 | if err != nil { 12 | log.Fatal("Cannot connect to logentries: ", err) 13 | } 14 | 15 | log.AddHook(le) 16 | } 17 | -------------------------------------------------------------------------------- /migrations/00019_add_forced_scaling_column.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | ALTER TABLE activated_job_queues 5 | ADD COLUMN forced_scaling BOOLEAN NOT NULL DEFAULT 'f'; 6 | 7 | 8 | -- +goose Down 9 | -- SQL in this section is executed when the migration is rolled back. 10 | 11 | ALTER TABLE activated_job_queues 12 | DROP COLUMN forced_scaling; 13 | -------------------------------------------------------------------------------- /batchiepatchie-dockercompose-config.toml: -------------------------------------------------------------------------------- 1 | host = "0.0.0.0" 2 | port = 5454 3 | region = "us-west-2" 4 | database_host = "postgres" 5 | database_port = 5432 6 | database_username = "postgres" 7 | database_name = "postgres" 8 | database_password = "123456" 9 | frontend_assets = "local" 10 | frontend_assets_local_prefix = "frontend/dist" 11 | use_auto_scaler = false 12 | use_cleaner = true 13 | clean_period = 30 # seconds 14 | -------------------------------------------------------------------------------- /frontend/src/components/StatusFormatter/StatusFormatter.scss: -------------------------------------------------------------------------------- 1 | .status-formatter { 2 | .alert { 3 | padding: 3px 3px; 4 | padding-left: 8px; 5 | padding-right: 8px; 6 | margin-bottom: 0; 7 | } 8 | 9 | .alert-gone { 10 | color: #fff; 11 | background: #4d4d4d; 12 | } 13 | 14 | .alert-terminated { 15 | color: #fff; 16 | background: #bbaaee; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /frontend/src/components/SectionLoader/SectionLoader.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import './SectionLoader.scss'; 3 | 4 | export default function SectionLoader() { 5 | return ( 6 |
7 |
8 |
9 | ); 10 | } 11 | -------------------------------------------------------------------------------- /frontend/src/components/JobQueueRowRenderer/JobQueueRowRenderer.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import ReactDataGrid from 'react-data-grid'; 3 | 4 | const { Row } = ReactDataGrid; 5 | 6 | export default class JobQueueRowRenderer extends React.Component { 7 | static propTypes = { 8 | idx: PropTypes.number.isRequired 9 | }; 10 | 11 | render() { 12 | return (
this.row = node } {...this.props}/>
); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /frontend/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "react", 4 | ["es2015", { "modules": false } ] 5 | ], 6 | "plugins": [ 7 | "lodash", 8 | "recharts", 9 | "transform-object-rest-spread", 10 | "transform-decorators-legacy", 11 | "transform-class-properties", 12 | "transform-async-to-generator" 13 | ], 14 | "env": { 15 | "production": { 16 | "plugins": ["transform-react-remove-prop-types"] 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /frontend/src/pages/JobQueuesPage/JobQueuesPage.scss: -------------------------------------------------------------------------------- 1 | .job-queues-page { 2 | .job-queues-listings { 3 | h2 { 4 | text-align: center; 5 | margin-bottom: 10px; 6 | } 7 | } 8 | .job-queues-grid { 9 | padding: 10px; 10 | .activation-formatter-btn { 11 | text-align: center; 12 | width: 100%; 13 | } 14 | } 15 | .btn { 16 | padding: 3px; 17 | font-size: 0.7em; 18 | font-weight: bold; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | The following individuals have contributed their time and effort to improving Batchiepatchie 2 | -------------------------------------------------------------------------------------------- 3 | 4 | * Abimael Martinez Carrete 5 | * Alex Holyoke 6 | * Chris Evans 7 | * Joey Robert 8 | * Knut Nesheim 9 | * Lorenzo Hernandez 10 | * Luis Fernando Barrera 11 | * Matheus Alves 12 | * Mikko Juola 13 | * Oleg Avdeev 14 | * Ronald Paloschi 15 | * Roozbeh Zabihollahi 16 | 17 | -------------------------------------------------------------------------------- /migrations/00009_add_indexes_to_event_logs.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE INDEX compute_environment_event_log_compute_environment ON compute_environment_event_log (compute_environment, timestamp); 4 | CREATE INDEX job_summary_event_log_job_queue ON job_summary_event_log (job_queue, timestamp); 5 | 6 | -- +goose Down 7 | -- SQL in this section is executed when the migration is rolled back. 8 | DROP INDEX job_summary_event_log_job_queue; 9 | DROP INDEX compute_environment_event_log_compute_environment; 10 | 11 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Batchiepatchie 2 | pages: 3 | - Home: index.md 4 | - Overview: overview.md 5 | - Quick start: quickstart.md 6 | - Deployment: deployment.md 7 | - Frontend: frontend.md 8 | - Job statuses: statuses.md 9 | - Timeouts: timeouts.md 10 | - Scaling hack: scaling.md 11 | - Terminator: terminator.md 12 | - Tracing: tracing.md 13 | theme: 14 | name: readthedocs 15 | use_directory_urls: false 16 | -------------------------------------------------------------------------------- /frontend/src/components/CommandLineFormatter/CommandLineFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import './CommandLineFormatter.scss'; 3 | 4 | export default class CommandLineFormatter extends React.Component { 5 | static propTypes = { 6 | value: PropTypes.string 7 | }; 8 | 9 | render() { 10 | if (!this.props.value) { 11 | return ''; 12 | } 13 | 14 | return ( 15 |
16 |
{ this.props.value }
17 |
18 | ); 19 | } 20 | }; 21 | -------------------------------------------------------------------------------- /frontend/src/components/RowRenderer/RowRenderer.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import ReactDataGrid from 'react-data-grid'; 3 | 4 | const { Row } = ReactDataGrid; 5 | 6 | export default class RowRenderer extends React.Component { 7 | static propTypes = { 8 | idx: PropTypes.number.isRequired 9 | }; 10 | 11 | getRowStyle = () => { 12 | return { 13 | color: this.props.row.termination_requested ? '#FF0000' : '#000000' 14 | }; 15 | }; 16 | 17 | render() { 18 | return (
this.row = node } { ...this.props }/>
); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /frontend/src/components/DateTimeFormatter/DateTimeFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import moment from 'moment'; 3 | 4 | export default class DateTimeFormatter extends React.Component { 5 | static propTypes = { 6 | value: PropTypes.string 7 | }; 8 | 9 | render() { 10 | const dt = moment.utc(this.props.value); 11 | const dtStr = dt.isValid() ? `${dt.format('YYYY-MM-DD h:mm:ss a z')} (${dt.fromNow()})` : ''; 12 | 13 | return ( 14 |
15 | { dtStr } 16 |
17 | ); 18 | } 19 | }; 20 | -------------------------------------------------------------------------------- /docs/docs/index.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Project documentation 2 | ====================================== 3 | 4 | This is documentation for Batchiepatchie, a monitoring tool for AWS Batch. 5 | 6 | If you are new here, we suggest you read the high-level overview first before 7 | reading on other topics. 8 | 9 | Index 10 | ----- 11 | 12 | - [High-level overview](overview.md) 13 | - [Quick start](quickstart.md) 14 | - [Deployment](deployment.md) 15 | - [Frontend](frontend.md) 16 | - [Job statuses](statuses.md) 17 | - [Timeouts](timeouts.md) 18 | - [Scaling hack](scaling.md) 19 | - [Terminator](terminator.md) 20 | - [Tracing](tracing.md) 21 | 22 | -------------------------------------------------------------------------------- /migrations/00022_reduced_trigram_index.sql: -------------------------------------------------------------------------------- 1 | -- +goose NO TRANSACTION 2 | -- +goose Up 3 | -- SQL in this section is executed when the migration is applied. 4 | DROP INDEX CONCURRENTLY trgm_idx_jobs; 5 | 6 | CREATE INDEX CONCURRENTLY trgm_idx_jobs ON jobs USING gin ( 7 | (job_id || job_name || job_queue || image) gin_trgm_ops 8 | ); 9 | 10 | -- +goose Down 11 | -- SQL in this section is executed when the migration is rolled back. 12 | DROP INDEX CONCURRENTLY trgm_idx_jobs; 13 | 14 | CREATE INDEX CONCURRENTLY trgm_idx_jobs ON jobs USING gin ( 15 | (job_id || job_name || job_queue || image || command_line || job_definition) gin_trgm_ops 16 | ); 17 | -------------------------------------------------------------------------------- /frontend/src/components/Terminal/Terminal.scss: -------------------------------------------------------------------------------- 1 | .terminal { 2 | overflow-x: auto; 3 | overflow-y: hidden; 4 | 5 | pre { 6 | padding: 0; 7 | font-size: 12px; 8 | margin-bottom: 0; 9 | border: 0; 10 | overflow: hidden; 11 | margin-left: 6px; 12 | margin-right: 6px; 13 | color: #333; 14 | } 15 | 16 | // Color it orange if it is the current search result. 17 | .current-search-result { 18 | background-color: orange; 19 | } 20 | 21 | // If it is a search result on a different line, color it yellow. 22 | mark, .mark { 23 | background-color: yellow; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /frontend/src/utils/debounce.js: -------------------------------------------------------------------------------- 1 | // Taken from https://davidwalsh.name/javascript-debounce-function 2 | export default function debounce(func, wait, immediate) { 3 | var timeout; 4 | return function() { 5 | var context = this; 6 | var args = arguments; 7 | var later = function() { 8 | timeout = null; 9 | if (!immediate) { 10 | func.apply(context, args); 11 | } 12 | }; 13 | var callNow = immediate && !timeout; 14 | clearTimeout(timeout); 15 | timeout = setTimeout(later, wait); 16 | if (callNow) { 17 | func.apply(context, args); 18 | } 19 | }; 20 | }; 21 | -------------------------------------------------------------------------------- /docs/docs/terminator.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Terminator 2 | --------------------------- 3 | 4 | Batchiepatchie can terminate EC2 instances that look like they've got stuck. 5 | 6 | At this time, Batchiepatchie will terminate EC2 instances that have jobs on 7 | them that have been in `STARTING` state for more than 10 minutes. This is a bug 8 | that occasionally happens with AWS Batch. 9 | 10 | This feature is by default turned off but can be enabled by specifying 11 | `kill_stuck_jobs = true` in the Batchiepatchie configuration file. The behavior 12 | will be exercised on all jobs Batchiepatchie knows about. 13 | 14 | Batchiepatchie requires `ec2:TerminateInstances` to be able to invoke 15 | termination on instances. 16 | -------------------------------------------------------------------------------- /frontend/src/components/ImageFormatter/ImageFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import './ImageFormatter.scss'; 3 | 4 | const ECR_REGEX = /^[0-9]+\.dkr\.ecr\.[^.]+\.amazonaws.com\/(.+)$/; 5 | 6 | export default class ImageFormatter extends React.Component { 7 | static propTypes = { 8 | value: PropTypes.string.isRequired 9 | }; 10 | 11 | render() { 12 | let value = this.props.value; 13 | const re_match = value.match(ECR_REGEX); 14 | if ( re_match && re_match.length > 1 ) { 15 | value = re_match[1]; 16 | } 17 | 18 | return ( 19 |
20 | { value } 21 |
22 | ); 23 | } 24 | }; 25 | 26 | -------------------------------------------------------------------------------- /frontend/src/stores/layout.js: -------------------------------------------------------------------------------- 1 | // import { call, put, takeLatest } from 'redux-saga/effects'; 2 | import actionReducer from 'utils/actionReducer'; 3 | 4 | // Action names 5 | export const SET_PAGE_DIMENSIONS = 'SET_PAGE_DIMENSIONS'; 6 | 7 | // Initial state 8 | const initialState = { 9 | height: 800 10 | }; 11 | 12 | const actions = {}; 13 | 14 | // Reducers 15 | actions[SET_PAGE_DIMENSIONS] = (state, { payload }) => { 16 | return { 17 | ...state, 18 | ...payload 19 | }; 20 | }; 21 | 22 | 23 | // Action Creators 24 | export function setPageDimensions(dimensions) { 25 | return { 26 | type: SET_PAGE_DIMENSIONS, 27 | payload: dimensions 28 | }; 29 | }; 30 | 31 | // Root reducer 32 | export default actionReducer(actions, initialState); 33 | -------------------------------------------------------------------------------- /migrations/00007_add_compute_environment_event_log.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE TABLE compute_environment_event_log ( 4 | timestamp timestamp with time zone NOT NULL, 5 | compute_environment TEXT NOT NULL, 6 | desired_vcpus INTEGER, 7 | max_vcpus INTEGER, 8 | min_vcpus INTEGER, 9 | state TEXT, 10 | service_role TEXT 11 | ); 12 | 13 | CREATE INDEX compute_environment_event_log_timestamp ON compute_environment_event_log (timestamp); 14 | 15 | -- +goose Down 16 | -- SQL in this section is executed when the migration is rolled back. 17 | DROP INDEX compute_environment_event_log_timestamp; 18 | DROP TABLE compute_environment_event_log; 19 | 20 | -------------------------------------------------------------------------------- /migrations/00008_add_job_summary_event_log.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE TABLE job_summary_event_log ( 4 | timestamp timestamp with time zone NOT NULL, 5 | job_queue TEXT NOT NULL, 6 | submitted INTEGER NOT NULL, 7 | pending INTEGER NOT NULL, 8 | runnable INTEGER NOT NULL, 9 | starting INTEGER NOT NULL, 10 | running INTEGER NOT NULL 11 | ); 12 | 13 | CREATE INDEX job_summary_event_log_timestamp ON job_summary_event_log (timestamp); 14 | 15 | -- +goose Down 16 | -- SQL in this section is executed when the migration is rolled back. 17 | DROP INDEX job_summary_event_log_timestamp; 18 | DROP TABLE job_summary_event_log; 19 | 20 | -------------------------------------------------------------------------------- /jobs/timeout_killer.go: -------------------------------------------------------------------------------- 1 | package jobs 2 | 3 | import ( 4 | "github.com/opentracing/opentracing-go" 5 | log "github.com/sirupsen/logrus" 6 | ) 7 | 8 | func KillTimedOutJobs(finder FinderStorer) error { 9 | span := opentracing.StartSpan("KillTimedOutJobs") 10 | defer span.Finish() 11 | 12 | timed_out_jobs, err := finder.FindTimedoutJobs() 13 | if err != nil { 14 | return err 15 | } 16 | killer, err := NewKillerHandler() 17 | if err != nil { 18 | return err 19 | } 20 | 21 | log.Info("There are ", len(timed_out_jobs), " that need killing.") 22 | 23 | for _, job_id := range timed_out_jobs { 24 | err = killer.KillOne(job_id, "timeout", finder) 25 | if err != nil { 26 | log.Info("Requested termination for ", job_id) 27 | } 28 | } 29 | log.Info("Timed out killer round complete.") 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /migrations/00013_add_task_arn_instance_id_table.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | CREATE TABLE task_arns_to_instance_info ( 5 | task_arn TEXT NOT NULL, 6 | instance_id TEXT NOT NULL, 7 | public_ip TEXT NOT NULL, 8 | private_ip TEXT NOT NULL, 9 | PRIMARY KEY(task_arn, instance_id) 10 | ); 11 | 12 | CREATE INDEX task_arns_task_arns ON task_arns_to_instance_info (task_arn); 13 | CREATE INDEX task_arns_instance_id ON task_arns_to_instance_info (instance_id); 14 | 15 | ALTER TABLE jobs ADD COLUMN task_arn TEXT; 16 | 17 | -- +goose Down 18 | -- SQL in this section is executed when the migration is rolled back. 19 | ALTER TABLE jobs DROP COLUMN task_arn; 20 | 21 | DROP INDEX task_arns_task_arns; 22 | DROP INDEX task_arns_instance_id; 23 | DROP TABLE task_arns_to_instance_info; 24 | 25 | -------------------------------------------------------------------------------- /frontend/src/pages/JobPage/JobPage.scss: -------------------------------------------------------------------------------- 1 | .job-page { 2 | .job-menu .nav-tabs { 3 | margin-bottom: 12px; 4 | } 5 | 6 | h2 { 7 | float: left; 8 | } 9 | 10 | .section-loader { 11 | float: right; 12 | } 13 | button { 14 | margin: 2px; 15 | } 16 | 17 | .array-job-icon { 18 | font-size: 30px; 19 | margin-right: 6px; 20 | cursor: default; 21 | } 22 | 23 | .status-formatter { 24 | display: inline-block; 25 | margin-right: 6px; 26 | } 27 | 28 | .child-array-job-statuses { 29 | .status-formatter { 30 | margin-top: 3px; 31 | } 32 | } 33 | 34 | .auto-scroll-checkbox { 35 | margin-top: 12px; 36 | margin-left: 12px; 37 | 38 | input { 39 | margin-right: 3px; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.21 2 | 3 | RUN mkdir -p /go/src/github.com/AdRoll/batchiepatchie 4 | WORKDIR /go/src/github.com/AdRoll/batchiepatchie 5 | COPY . /go/src/github.com/AdRoll/batchiepatchie 6 | 7 | RUN go mod download -x 8 | 9 | EXPOSE 5454 10 | EXPOSE 9999 11 | 12 | RUN go install github.com/pilu/fresh@latest 13 | RUN go install github.com/go-delve/delve/cmd/dlv@latest 14 | RUN wget https://github.com/pressly/goose/releases/download/v2.6.0/goose-linux64 -O /usr/bin/goose 15 | # RUN go get -u github.com/pressly/goose/cmd/goose 16 | RUN set -eux; \ 17 | apt-get update; \ 18 | apt-get install -y gosu; \ 19 | rm -rf /var/lib/apt/lists/*; \ 20 | # verify that the binary works 21 | gosu nobody true 22 | 23 | 24 | RUN chmod +x /usr/bin/goose 25 | RUN chmod +x /go/src/github.com/AdRoll/batchiepatchie/docker_run.sh 26 | CMD ["/go/src/github.com/AdRoll/batchiepatchie/docker_run.sh"] 27 | -------------------------------------------------------------------------------- /docs/docs/frontend.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Frontend 2 | ------------------------- 3 | 4 | To build the frontend static files and JavaScript, you will need `node`, `npm` 5 | and `yarn`. 6 | 7 | Operation 8 | --------- 9 | 10 | The official way is to use [yarn](https://yarnpkg.com/lang/en/) to install dependencies. 11 | 12 | ```bash 13 | $ cd frontend 14 | $ yarn 15 | $ npm run build # This creates unminified build 16 | $ npm run build:dist # This creates minified build 17 | ``` 18 | 19 | The static files are placed in `frontend/dist` in Batchiepatchie repository. 20 | The `test.toml` file that comes with Batchiepatchie is pointed to this 21 | directory from root of batchiepatchie repository. 22 | 23 | For development, if you do not want to use the `docker-compose` mechanism described in our [quickstart page](quickstart.md), you can instead do: 24 | 25 | ```bash 26 | $ npm run dev 27 | ``` 28 | -------------------------------------------------------------------------------- /frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | <% for (key in htmlWebpackPlugin.files.css) { %> 5 | 6 | <% } %> 7 | 8 | 9 | 10 | 11 | 12 | <%= htmlWebpackPlugin.options.title %> 13 | 14 | 15 |
16 | 17 | <% for (key in htmlWebpackPlugin.files.chunks) { %> 18 | 19 | <% } %> 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/docs/tracing.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Tracing 2 | ------------------------ 3 | 4 | Batchiepatchie supports tracing of many synchronization and API calls. This can 5 | create a profile where the time of different operations can be put on a 6 | histogram and in general get insight what parts of batchiepatchie are taking 7 | large amounts of time. This is useful in debugging batchiepatchie itself. 8 | 9 | The implementation right now only supports DataDog. The feature can be enabled 10 | by adding `use_datadog_tracing = true` in the configuration file. 11 | 12 | Even though DataDog is the only supported tracing target right now; most of the 13 | tracing code has been implemented in terms of [Go opentracing library](https://github.com/opentracing/opentracing-go). 14 | If you wish to use an alternative, you can modify `batchiepatchie.go` file in the 15 | repository and modify it to instantiate opentracing handle with some other way. 16 | -------------------------------------------------------------------------------- /frontend/src/components/ActivationFormatter/ActivationFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import './ActivationFormatter.scss'; 3 | 4 | export default class ActivationFormatter extends React.Component { 5 | static propTypes = { 6 | value: PropTypes.object 7 | }; 8 | 9 | render() { 10 | const value = this.props.value.action; 11 | 12 | let classes = 'btn btn-xs btn-success'; 13 | if ( value === 'DEACTIVATE' ) { 14 | classes = 'btn btn-xs btn-danger'; 15 | } 16 | 17 | if ( value !== '' ) { 18 | return ( 19 |
20 | 23 |
24 | ); 25 | } else { 26 | return (); 27 | } 28 | } 29 | }; 30 | -------------------------------------------------------------------------------- /frontend/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | # A special property that should be specified at the top of the file outside of 4 | # any sections. Set to true to stop .editor config file search on current file 5 | root = true 6 | 7 | [Makefile] 8 | indent_style = tab 9 | 10 | [*] 11 | # Indentation style 12 | # Possible values - tab, space 13 | indent_style = space 14 | 15 | # Indentation size in single-spaced characters 16 | # Possible values - an integer, tab 17 | indent_size = 4 18 | 19 | # Line ending file format 20 | # Possible values - lf, crlf, cr 21 | end_of_line = lf 22 | 23 | # File character encoding 24 | # Possible values - latin1, utf-8, utf-16be, utf-16le 25 | charset = utf-8 26 | 27 | # Denotes whether to trim whitespace at the end of lines 28 | # Possible values - true, false 29 | trim_trailing_whitespace = true 30 | 31 | # Denotes whether file should end with a newline 32 | # Possible values - true, false 33 | insert_final_newline = true 34 | -------------------------------------------------------------------------------- /frontend/src/pages/JobsPage/JobsPage.scss: -------------------------------------------------------------------------------- 1 | .jobs-page { 2 | h2 { 3 | float: left; 4 | } 5 | 6 | .actions { 7 | float: right; 8 | margin-bottom: 12px; 9 | 10 | select { 11 | float: right; 12 | margin-right: 12px; 13 | height: 42px; 14 | line-height: 42px; 15 | width: auto; 16 | } 17 | 18 | .btn { 19 | float: right; 20 | margin-right: 12px; 21 | } 22 | } 23 | 24 | nav { 25 | text-align: center; 26 | 27 | .pagination { 28 | margin-bottom: 0; 29 | } 30 | } 31 | 32 | .auto-refresh { 33 | float: left; 34 | margin-top: 6px; 35 | margin-right: 12px; 36 | label { 37 | margin-left: 10px; 38 | } 39 | } 40 | 41 | .array-job-icon { 42 | font-size: 20px; 43 | margin-right: 6px; 44 | cursor: default; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /frontend/src/components/JobLinkFormatter/JobLinkFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { Link } from 'react-router'; 3 | 4 | export default class JobLinkFormatter extends React.Component { 5 | static propTypes = { 6 | value: PropTypes.oneOfType([ 7 | PropTypes.string, 8 | PropTypes.number, 9 | ]).isRequired 10 | }; 11 | 12 | render() { 13 | const value = this.props.value; 14 | 15 | /* 16 | * Don't display the entire ID (it's kind of long). 17 | * 18 | * All JobIDs have predictable format so we'll take just first 8 characters. 19 | * 20 | * 35c55019-c25d-4de6-9338-27c678495df -> 35c55019 21 | */ 22 | 23 | const value_prefix = value.substr(0, 8); 24 | 25 | return ( 26 |
27 | { value_prefix } 28 |
29 | ); 30 | } 31 | }; 32 | -------------------------------------------------------------------------------- /docs/docs/statuses.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Job statuses 2 | ----------------------------- 3 | 4 | Batchiepatchie can show 8 different statuses for a job. 5 | 6 | * Submitted 7 | * Pending 8 | * Runnable 9 | * Running 10 | * Succeeded 11 | * Failed 12 | * Gone 13 | * Terminated 14 | 15 | Of these, first 6 correspond to [AWS Batch job 16 | states](https://docs.aws.amazon.com/batch/latest/userguide/job_states.html). 17 | 18 | The last two, `GONE` and `TERMINATED` are Batchiepatchie-specific. 19 | 20 | * `GONE`: This means Batchiepatchie lost track of a job. There is no information if the job 21 | has succeeded or failed. A large number of jobs with `GONE` status can indicate problems 22 | with Batchiepatchie or AWS Batch setup but by itself it is harmless. 23 | 24 | * `TERMINATED`: This is the same as `FAILED` but if the job exit code 25 | indicates `SIGKILL` type of exit, then instead of `FAILED`, we display the 26 | text `TERMINATED`. This often means the job was killed by "Terminate job" 27 | button, timeouts or out of memory. 28 | 29 | -------------------------------------------------------------------------------- /frontend/src/stores/index.js: -------------------------------------------------------------------------------- 1 | import { 2 | createStore, 3 | combineReducers, 4 | applyMiddleware, 5 | compose 6 | } from 'redux'; 7 | import thunk from 'redux-thunk'; 8 | import { browserHistory } from 'react-router'; 9 | import { routerReducer, routerMiddleware } from 'react-router-redux'; 10 | 11 | // Reducers 12 | import jobReducer from './job'; 13 | import layoutReducer from './layout'; 14 | import statusReducer from './status'; 15 | import jobQueueReducer from './jobqueue'; 16 | 17 | const rootReducer = combineReducers({ 18 | job: jobReducer, 19 | jobqueue: jobQueueReducer, 20 | layout: layoutReducer, 21 | routing: routerReducer, 22 | status: statusReducer 23 | }); 24 | 25 | const finalCreateStore = compose( 26 | applyMiddleware(routerMiddleware(browserHistory), thunk), 27 | window.devToolsExtension && process.env.NODE_ENV === 'development' ? window.devToolsExtension() : f => f 28 | )(createStore); 29 | 30 | export default function configureStore(initialState) { 31 | return finalCreateStore(rootReducer, initialState); 32 | }; 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie 2 | --------------- 3 | 4 | Batchiepatchie is a service built on top of AWS Batch that collects information 5 | on all the jobs that are running and makes them easily searchable through a 6 | beautiful user interface. Internally, Batchiepatchie mirrors the state of AWS 7 | Batch in a PostgreSQL database. It can scale for millions of jobs and for many 8 | use cases is a substantial improvement over AWS Management Console for AWS 9 | Batch. 10 | 11 | ![Screenshot of Batchiepatchie](screenshot.png?raw=true) 12 | 13 | There is [detailed documentation](docs/docs/index.md) located inside our docs directory that describe features of Batchiepatchie and 14 | deployment instructions. 15 | 16 | How to contribute 17 | ----------------- 18 | 19 | Simply open issues or pull requests on this GitHub repository. Contributors 20 | need to sign a CLA; we have an automatic CLA assistant to make this process as 21 | seamless as possible and it should appear when you open your pull request. 22 | 23 | License 24 | ------- 25 | 26 | Batchiepatchie is licensed under MIT license. 27 | -------------------------------------------------------------------------------- /frontend/src/utils/getChartColor.js: -------------------------------------------------------------------------------- 1 | const CHART_COLORS = [ 2 | '#FF0000', 3 | '#7F0000', 4 | '#FFA280', 5 | '#806C60', 6 | '#FF8800', 7 | '#FFE1BF', 8 | '#996600', 9 | '#FFCC00', 10 | '#66644D', 11 | '#4C4700', 12 | '#EEFF00', 13 | '#66FF00', 14 | '#7DB359', 15 | '#8FBFA3', 16 | '#005930', 17 | '#00FFAA', 18 | '#00EEFF', 19 | '#003C40', 20 | '#00AAFF', 21 | '#738C99', 22 | '#004480', 23 | '#0066FF', 24 | '#0000FF', 25 | '#0000BF', 26 | '#1A1966', 27 | '#C8BFFF', 28 | '#9559B3', 29 | '#CC00FF', 30 | '#590047', 31 | '#FF00AA', 32 | '#FFBFEA', 33 | '#A65369', 34 | '#FF4059', 35 | '#400009', 36 | ]; 37 | 38 | // Persist colors for consistency 39 | const savedColors = {}; 40 | let index = 0; 41 | 42 | export default function getChartColor(value) { 43 | if (savedColors[value]) { 44 | return savedColors[value]; 45 | } 46 | const color = CHART_COLORS[index % CHART_COLORS.length]; 47 | savedColors[value] = color; 48 | index++; 49 | return color; 50 | } 51 | -------------------------------------------------------------------------------- /migrations/00021_single_trigram_index.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | DROP INDEX trgm_idx_jobs_job_id; 4 | DROP INDEX trgm_idx_jobs_job_name; 5 | DROP INDEX trgm_idx_jobs_job_queue; 6 | DROP INDEX trgm_idx_jobs_image; 7 | DROP INDEX trgm_idx_jobs_command_line; 8 | DROP INDEX trgm_idx_jobs_job_definition; 9 | 10 | CREATE INDEX trgm_idx_jobs ON jobs USING gin ( 11 | (job_id || job_name || job_queue || image || command_line || job_definition) gin_trgm_ops 12 | ); 13 | 14 | -- +goose Down 15 | -- SQL in this section is executed when the migration is rolled back. 16 | DROP INDEX trgm_idx_jobs; 17 | 18 | CREATE INDEX trgm_idx_jobs_job_id ON jobs USING gin (job_id gin_trgm_ops); 19 | CREATE INDEX trgm_idx_jobs_job_name ON jobs USING gin (job_name gin_trgm_ops); 20 | CREATE INDEX trgm_idx_jobs_job_queue ON jobs USING gin (job_queue gin_trgm_ops); 21 | CREATE INDEX trgm_idx_jobs_image ON jobs USING gin (image gin_trgm_ops); 22 | CREATE INDEX trgm_idx_jobs_command_line ON jobs USING gin (command_line gin_trgm_ops); 23 | CREATE INDEX trgm_idx_jobs_job_definition ON jobs USING gin (job_definition gin_trgm_ops); 24 | -------------------------------------------------------------------------------- /frontend/src/pages/StatsPage/StatsPage.scss: -------------------------------------------------------------------------------- 1 | .stats-page { 2 | label { 3 | margin-left: 12px; 4 | } 5 | 6 | .actions { 7 | float: right; 8 | margin-bottom: 12px; 9 | 10 | select { 11 | float: right; 12 | margin-right: 12px; 13 | height: 42px; 14 | line-height: 42px; 15 | width: auto; 16 | } 17 | 18 | // react-datetime 19 | .rdt { 20 | display: inline-block; 21 | margin-left: 6px; 22 | 23 | .rdtPicker { 24 | margin-left: -78px; 25 | } 26 | } 27 | } 28 | 29 | .color-block { 30 | height: 9px; 31 | width: 9px; 32 | display: inline-block; 33 | margin-right: 6px; 34 | } 35 | 36 | td { 37 | vertical-align: middle; 38 | 39 | 40 | .status-column { 41 | width: 105px; 42 | } 43 | } 44 | 45 | tr:last-of-type { 46 | font-weight: bold; 47 | } 48 | 49 | select.metric-picker { 50 | margin-top: -7px; 51 | margin-right: 0; 52 | margin-left: 6px; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 AdRoll, Inc. and Batchiepatchie contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /migrations/00001_jobs.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | CREATE TABLE jobs ( 5 | job_id CHAR(36) NOT NULL PRIMARY KEY, 6 | job_name TEXT NOT NULL, 7 | job_definition TEXT NOT NULL, 8 | job_queue TEXT NOT NULL, 9 | image TEXT NOT NULL, 10 | status VARCHAR(9) NOT NULL, 11 | created_at timestamp with time zone NOT NULL, 12 | stopped_at timestamp with time zone, 13 | vcpus INTEGER NOT NULL, 14 | memory INTEGER NOT NULL, 15 | timeout INTEGER, 16 | command_line TEXT NOT NULL, 17 | last_updated timestamp with time zone NOT NULL 18 | ); 19 | 20 | CREATE INDEX jobs_created_at_timestamp ON jobs (created_at); 21 | CREATE INDEX jobs_stopped_at_timestamp ON jobs (stopped_at); 22 | CREATE INDEX jobs_last_updated_timestamp ON jobs (last_updated); 23 | 24 | -- +goose Down 25 | -- SQL in this section is executed when the migration is rolled back. 26 | 27 | DROP INDEX jobs_created_at_timestamp; 28 | DROP INDEX jobs_stopped_at_timestamp; 29 | DROP INDEX jobs_last_updated_timestamp; 30 | DROP TABLE jobs; 31 | 32 | -------------------------------------------------------------------------------- /frontend/src/components/StatusFormatter/StatusFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { STATUSES, STATUS_LABELS } from 'stores/job'; 3 | import './StatusFormatter.scss'; 4 | 5 | export const STATUS_CLASSES = { 6 | [STATUSES.SUBMITTED]: 'alert alert-info', 7 | [STATUSES.PENDING]: 'alert alert-info', 8 | [STATUSES.RUNNABLE]: 'alert alert-info', 9 | [STATUSES.STARTING]: 'alert alert-warning', 10 | [STATUSES.RUNNING]: 'alert alert-warning', 11 | [STATUSES.FAILED]: 'alert alert-danger', 12 | [STATUSES.SUCCEEDED]: 'alert alert-success', 13 | [STATUSES.GONE]: 'alert alert-gone', 14 | [STATUSES.TERMINATED]: 'alert alert-terminated' 15 | }; 16 | 17 | export default class StatusFormatter extends React.Component { 18 | static propTypes = { 19 | count: PropTypes.number, 20 | value: PropTypes.string.isRequired 21 | }; 22 | 23 | render() { 24 | const value = this.props.value; 25 | const count = this.props.count; 26 | 27 | return ( 28 |
29 |
30 | { count } 31 | { count && ' ' } 32 | { STATUS_LABELS[value] } 33 |
34 |
35 | ); 36 | } 37 | }; 38 | -------------------------------------------------------------------------------- /envsubstituter/envsubstituter_test.go: -------------------------------------------------------------------------------- 1 | package envsubstituter 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestEnvironmentSubstitute(t *testing.T) { 9 | if str, _ := EnvironmentSubstitute("hello"); str != "hello" { 10 | t.Errorf("'hello' was not substituted to 'hello': '%s'", str) 11 | } 12 | 13 | if str, _ := EnvironmentSubstitute(""); str != "" { 14 | t.Errorf("Empty string was not substituted to another empty string") 15 | } 16 | 17 | if _, err := EnvironmentSubstitute("argblarg $(malfie) org"); err != nil { 18 | t.Errorf("Malformed substitution was not ignored as expected") 19 | } 20 | 21 | if _, err := EnvironmentSubstitute("argblarg ${hunter2kajsdmalfie} org"); err == nil { 22 | t.Errorf("Missing environment variable didn't throw an error") 23 | } 24 | 25 | os.Setenv("hunter2kajsdmalfie", "blah") 26 | unset := func() { 27 | os.Unsetenv("hunter2kajsdmalfie") 28 | } 29 | defer unset() 30 | 31 | if str, err := EnvironmentSubstitute("argblarg ${hunter2kajsdmalfie} org"); str != "argblarg blah org" { 32 | t.Errorf("Environment variable was not substituted correctly: '%s', %s", str, err) 33 | } 34 | 35 | if str, err := EnvironmentSubstitute("argblarg ${hunter2kajsdmalfie} ${hunter2kajsdmalfie} org"); str != "argblarg blah blah org" { 36 | t.Errorf("Environment variable was not substituted correctly: '%s', %s", str, err) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /migrations/00015_pg_trgm_gin_indexes.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | -- Have to change to VARCHAR so gin index works 5 | ALTER TABLE jobs DROP CONSTRAINT jobs_pkey; 6 | ALTER TABLE jobs ALTER COLUMN job_id TYPE VARCHAR(36); 7 | ALTER TABLE jobs ADD PRIMARY KEY (job_id); 8 | 9 | CREATE EXTENSION pg_trgm; 10 | 11 | CREATE INDEX trgm_idx_jobs_job_id ON jobs USING gin (job_id gin_trgm_ops); 12 | CREATE INDEX trgm_idx_jobs_job_name ON jobs USING gin (job_name gin_trgm_ops); 13 | CREATE INDEX trgm_idx_jobs_job_queue ON jobs USING gin (job_queue gin_trgm_ops); 14 | CREATE INDEX trgm_idx_jobs_image ON jobs USING gin (image gin_trgm_ops); 15 | CREATE INDEX trgm_idx_jobs_command_line ON jobs USING gin (command_line gin_trgm_ops); 16 | CREATE INDEX trgm_idx_jobs_job_definition ON jobs USING gin (job_definition gin_trgm_ops); 17 | 18 | -- +goose Down 19 | -- SQL in this section is executed when the migration is rolled back. 20 | DROP INDEX trgm_idx_jobs_job_id; 21 | DROP INDEX trgm_idx_jobs_job_name; 22 | DROP INDEX trgm_idx_jobs_job_queue; 23 | DROP INDEX trgm_idx_jobs_image; 24 | DROP INDEX trgm_idx_jobs_command_line; 25 | DROP INDEX trgm_idx_jobs_job_definition; 26 | 27 | DROP EXTENSION pg_trgm; 28 | 29 | ALTER TABLE jobs DROP CONSTRAINT jobs_pkey; 30 | ALTER TABLE jobs ALTER COLUMN job_id TYPE CHAR(36); 31 | ALTER TABLE jobs ADD PRIMARY KEY (job_id); -------------------------------------------------------------------------------- /frontend/src/components/Menu/Menu.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { Link } from 'react-router'; 3 | import { connect } from 'react-redux'; 4 | import classNames from 'classnames'; 5 | import './Menu.scss'; 6 | 7 | const pages = [ 8 | { 9 | name: 'Jobs', 10 | path: process.env.BASE_URL + '/' 11 | }, 12 | { 13 | name: 'Job queues', 14 | path: process.env.BASE_URL + '/job_queues' 15 | }, 16 | { 17 | name: 'Stats', 18 | path: process.env.BASE_URL + '/stats' 19 | }, 20 | ]; 21 | 22 | function Menu({ path }) { 23 | return ( 24 |
25 | 35 |
36 | ); 37 | } 38 | 39 | Menu.propTypes = { 40 | path: PropTypes.string.isRequired 41 | }; 42 | 43 | const mapStateToProps = state => ({ 44 | path: state.routing.locationBeforeTransitions.pathname, 45 | }); 46 | 47 | const actions = {}; 48 | 49 | export default connect(mapStateToProps, actions)(Menu); 50 | -------------------------------------------------------------------------------- /frontend/src/index.jsx: -------------------------------------------------------------------------------- 1 | import 'whatwg-fetch'; 2 | import './index.scss'; 3 | import 'react-virtualized/styles.css'; 4 | import React from 'react'; 5 | import { render } from 'react-dom'; 6 | import { Provider } from 'react-redux'; 7 | import { Router, IndexRoute, Route, browserHistory } from 'react-router'; 8 | import { syncHistoryWithStore } from 'react-router-redux'; 9 | import configureStore from './stores'; 10 | 11 | import LayoutContainer from './containers/LayoutContainer/LayoutContainer'; 12 | 13 | // Pages 14 | import JobsPage from './pages/JobsPage/JobsPage'; 15 | import JobQueuesPage from './pages/JobQueuesPage/JobQueuesPage'; 16 | import JobPage from './pages/JobPage/JobPage'; 17 | import StatsPage from './pages/StatsPage/StatsPage'; 18 | 19 | // Store and router 20 | const store = configureStore(); 21 | const history = syncHistoryWithStore(browserHistory, store); 22 | 23 | render( 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | , 34 | document.getElementById('root') 35 | ); 36 | -------------------------------------------------------------------------------- /frontend/src/components/QueueSelector/QueueSelector.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { connect } from 'react-redux'; 3 | import { setSelectedQueue } from 'stores/job'; 4 | import Select from 'react-select'; 5 | import './QueueSelector.scss'; 6 | 7 | class QueueSelector extends React.Component { 8 | static propTypes = { 9 | queues: PropTypes.array.isRequired, 10 | selectedQueue: PropTypes.string.isRequired, 11 | setSelectedQueue: PropTypes.func.isRequired 12 | }; 13 | 14 | render() { 15 | return ( 16 |
17 | 34 |
35 | ); 36 | } 37 | }; 38 | 39 | 40 | const mapStateToProps = state => ({ 41 | selectedStatus: state.job.selectedStatus 42 | }); 43 | 44 | const actions = { 45 | setSelectedStatus 46 | }; 47 | 48 | export default connect(mapStateToProps, actions)(StatusSelector); 49 | -------------------------------------------------------------------------------- /docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | # This script is the entry point for the Go Reporting application when 6 | # running inside the docker container. 7 | 8 | # https://denibertovic.com/posts/handling-permissions-with-docker-volumes/ 9 | # This dance let's the Docker image create directories when run locally with 10 | # docker-compose. 11 | USER_ID=${LOCAL_USER_ID:-501} 12 | useradd --shell /bin/bash -u $USER_ID -o -c "" -m user || true 13 | export HOME=/home/user 14 | 15 | OWNER=`ls -ld . | awk '{print $3}'` 16 | ME=`whoami` 17 | 18 | CHANGE_TO=user 19 | # Don't change our identity if the current files are owned by us already. 20 | if [ "${OWNER}" = "${ME}" ]; then 21 | echo "I will not change my user because my files are already owned by me." 22 | CHANGE_TO="${ME}" 23 | fi; 24 | 25 | exec gosu ${CHANGE_TO} bash <<"EOF" 26 | set -euxo pipefail 27 | export VERSION=`cat version` 28 | 29 | # Get local IP address; or just assume it is 127.0.0.1 30 | BATCHIEPATCHIE_IP=$(curl http://instance-data/latest/meta-data/local-ipv4) || BATCHIEPATCHIE_IP=127.0.0.1 31 | export BATCHIEPATCHIE_IP 32 | 33 | BUILD_ENV_ENV=${BUILD_ENV:-} 34 | 35 | if [ "${BUILD_ENV_ENV}" = "DEBUG" ]; then 36 | # Runs the Delve debugger in headless mode. 37 | dlv debug --headless=true --listen=:9999 --accept-multiclient=true 38 | fi; 39 | 40 | if [ "${BUILD_ENV_ENV}" = "PRODUCTION" ]; then 41 | sleep 5 42 | go build -buildvcs=false 43 | ./batchiepatchie 44 | else 45 | sleep 5 46 | # Runs the application through Fresh for code reloading. 47 | fresh -c fresh.conf 48 | fi; 49 | EOF 50 | -------------------------------------------------------------------------------- /envsubstituter/envsubstituter.go: -------------------------------------------------------------------------------- 1 | package envsubstituter 2 | 3 | // This module imports a fairly simple string substitution functionality using 4 | // environment variables. 5 | 6 | // The key function is EnvironmentSubstitute(string) (string, error) This looks 7 | // for "${BLAH}" strings and replaces them with environment variables. If 8 | // environment variables are not defined, it returns an error. 9 | 10 | import ( 11 | "bytes" 12 | "fmt" 13 | "os" 14 | ) 15 | 16 | func EnvironmentSubstitute(subject string) (string, error) { 17 | // This thing is extremely unoptimized but right now it doesn't really 18 | // need to be fast. We iterate through the string and look for "${", 19 | // then take everything until next "}". 20 | 21 | var result bytes.Buffer 22 | 23 | for i := 0; i < len(subject); i++ { 24 | if i < len(subject)-1 && subject[i] == '$' && subject[i+1] == '{' { 25 | var env_name bytes.Buffer 26 | j := i + 2 27 | for ; j < len(subject); j++ { 28 | if subject[j] == '}' { 29 | env_value, present := os.LookupEnv(env_name.String()) 30 | if !present { 31 | return "", fmt.Errorf("Environment variable '%v' is not defined. Cannot perform substitution on '%s'", env_name, subject) 32 | } 33 | result.WriteString(env_value) 34 | break 35 | } else { 36 | env_name.WriteByte(subject[j]) 37 | } 38 | } 39 | if j >= len(subject) { 40 | return "", fmt.Errorf("No matching } found in '%s'", subject) 41 | } 42 | i = j 43 | continue 44 | } else { 45 | result.WriteByte(subject[i]) 46 | } 47 | } 48 | 49 | return result.String(), nil 50 | } 51 | -------------------------------------------------------------------------------- /frontend/src/components/NameFormatter/NameFormatter.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import ReactTooltip from 'react-tooltip'; 3 | 4 | export default class NameFormatter extends React.Component { 5 | static propTypes = { 6 | value: PropTypes.string.isRequired, 7 | // dependentValues contains the row. It is typically set using getRowMetaData. 8 | dependentValues: PropTypes.object, 9 | id: PropTypes.string 10 | }; 11 | 12 | render() { 13 | const name = this.props.value; 14 | const adaptedNameSplit = name.split('-'); 15 | let adaptedName = adaptedNameSplit.slice(2, adaptedNameSplit.length).join('-') 16 | const id = this.props.id; 17 | 18 | /* drop pybatch prefix if it's there; it's just noise */ 19 | if (!name.startsWith("pybatch-")) { 20 | adaptedName = name; 21 | } 22 | const job = this.props.dependentValues; 23 | return ( 24 | 25 | { job && job.array_properties && 26 | 27 | 28 | ◱ 29 | 30 | 31 | Parent Array Job 32 | 33 | 34 | } 35 | { adaptedName } 36 | { id &&  ({ id }) } 37 | 38 | ); 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /migrations/00014_add_instance_id_activity.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | CREATE TABLE instances ( 5 | appeared_at timestamp with time zone NOT NULL, 6 | disappeared_at timestamp with time zone, 7 | launched_at timestamp with time zone, 8 | ami TEXT NOT NULL, 9 | instance_id TEXT NOT NULL PRIMARY KEY, 10 | instance_type TEXT NOT NULL, 11 | compute_environment_arn TEXT NOT NULL, 12 | ecs_cluster_arn TEXT NOT NULL, 13 | availability_zone TEXT NOT NULL, 14 | spot_instance_request_id TEXT, 15 | private_ip_address TEXT, 16 | public_ip_address TEXT 17 | ); 18 | 19 | CREATE TABLE instance_event_log ( 20 | timestamp timestamp with time zone NOT NULL, 21 | instance_id TEXT NOT NULL, 22 | active_jobs JSONB NOT NULL, 23 | PRIMARY KEY(timestamp, instance_id) 24 | ); 25 | 26 | CREATE INDEX instances_disappeared_at ON instances (disappeared_at); 27 | CREATE INDEX instances_launched_at ON instances (launched_at); 28 | CREATE INDEX instances_appeared_at ON instances (appeared_at); 29 | CREATE INDEX instance_event_log_instance_id ON instance_event_log (instance_id); 30 | 31 | -- +goose Down 32 | -- SQL in this section is executed when the migration is rolled back. 33 | DROP INDEX instances_appeared_at; 34 | DROP INDEX instance_event_log_instance_id; 35 | DROP INDEX instances_disappeared_at; 36 | DROP INDEX instances_launched_at; 37 | DROP TABLE instance_event_log; 38 | DROP TABLE instances; 39 | 40 | -------------------------------------------------------------------------------- /docs/docs/scaling.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Scaling hack 2 | ----------------------------- 3 | 4 | Batchiepatchie has a crude hack that can force the scaling up of AWS Batch 5 | compute environments based on number of jobs in a job queue. 6 | 7 | It works by adjusting the minimum cpu count on a compute environment 8 | periodically. This forces AWS Batch to scale up instances instantly up to the 9 | amount requested. 10 | 11 | This feature has no exposed UI component so if you want to make use of it, you 12 | must set it manually. 13 | 14 | 1. Log in to Batchiepatchie PostgreSQL database 15 | 2. Modify `activated_job_queues` table; you need to set `forced_scaling` to true for any job queues you want to use for scaling hack. 16 | 17 | The following line executed in `psql` would set this behavior to all job queues: 18 | 19 | ```psql 20 | UPDATE activated_job_queues SET forced_scaling = 't'; 21 | ``` 22 | 23 | #### Caveats 24 | 25 | * If someone in UI deactivates and then re-activates a job queue, the setting 26 | becomes reset and no scaling will occur. 27 | 28 | * The scaling is done on compute environments, yet the setting is set on job queues. 29 | If two job queues are attached to some compute environment but only one of them has 30 | `forced_scaling=t`, then the scaling will only take into account the jobs on one of the 31 | job queues. 32 | 33 | * Scaling is not supported for job queues that are attached to multiple compute environments. 34 | 35 | * The scaling only works on managed AWS Batch compute environments. It does nothing if 36 | the attached compute environment is unmanaged. 37 | 38 | Due to the fragile nature of this feature, it is, by default, disabled and out 39 | of sight. In the future, we may remove this functionality. 40 | -------------------------------------------------------------------------------- /migrations/00018_job_status_events.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | CREATE TABLE job_status_events ( 4 | job_id CHAR(36) NOT NULL PRIMARY KEY, 5 | updated timestamp with time zone NOT NULL 6 | ); 7 | 8 | -- +goose StatementBegin 9 | CREATE FUNCTION job_status_update_update() RETURNS trigger AS 10 | $body$ 11 | BEGIN 12 | IF NEW.status <> OLD.status THEN 13 | INSERT INTO job_status_events ( job_id, updated ) VALUES ( NEW.job_id, now() ) ON CONFLICT ( job_id ) DO UPDATE SET updated = now(); 14 | END IF; 15 | RETURN NEW; 16 | END; 17 | $body$ LANGUAGE plpgsql; 18 | -- +goose StatementEnd 19 | 20 | -- +goose StatementBegin 21 | CREATE FUNCTION job_status_update_insert() RETURNS trigger AS 22 | $body$ 23 | BEGIN 24 | INSERT INTO job_status_events ( job_id, updated ) VALUES ( NEW.job_id, now() ) ON CONFLICT ( job_id ) DO UPDATE SET updated = now(); 25 | RETURN NEW; 26 | END; 27 | $body$ LANGUAGE plpgsql; 28 | -- +goose StatementEnd 29 | 30 | -- +goose StatementBegin 31 | CREATE TRIGGER job_status_update_trigger_insert 32 | AFTER 33 | INSERT 34 | ON jobs 35 | FOR EACH ROW 36 | EXECUTE PROCEDURE job_status_update_insert(); 37 | -- +goose StatementEnd 38 | 39 | -- +goose StatementBegin 40 | CREATE TRIGGER job_status_update_trigger_update 41 | AFTER 42 | UPDATE 43 | ON jobs 44 | FOR EACH ROW 45 | EXECUTE PROCEDURE job_status_update_update(); 46 | -- +goose StatementEnd 47 | 48 | -- +goose Down 49 | -- SQL in this section is executed when the migration is rolled back. 50 | DROP TRIGGER job_status_update_trigger_insert ON jobs; 51 | DROP TRIGGER job_status_update_trigger_update ON jobs; 52 | DROP FUNCTION job_status_update_update(); 53 | DROP FUNCTION job_status_update_insert(); 54 | DROP TABLE job_status_events; 55 | 56 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | api: 3 | build: . 4 | environment: 5 | - BUILD_ENV=${BUILD_ENV} 6 | - BATCHIEPATCHIE_CONFIG=batchiepatchie-dockercompose-config.toml 7 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 8 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 9 | volumes: 10 | - .:/go/src/github.com/AdRoll/batchiepatchie 11 | ports: 12 | - "9999:9999" 13 | - "5454:5454" 14 | privileged: true # Privileges are dropped by docker_run.sh, privileges needed for user setup in local development 15 | ulimits: 16 | nproc: 65535 17 | nofile: 18 | soft: 90000 19 | hard: 90000 20 | depends_on: 21 | postgres: 22 | condition: service_healthy 23 | migrations: 24 | condition: service_completed_successfully 25 | 26 | migrations: 27 | build: . 28 | volumes: 29 | - .:/go/src/github.com/AdRoll/batchiepatchie 30 | command: sh -c 'cd migrations && goose postgres "user=postgres dbname=postgres sslmode=disable host=postgres password=123456" up' 31 | depends_on: 32 | postgres: 33 | condition: service_healthy 34 | 35 | 36 | postgres: 37 | image: postgres:9.6.2-alpine 38 | ports: 39 | - 5432:5432 40 | environment: 41 | POSTGRES_PASSWORD: 123456 42 | healthcheck: 43 | test: ["CMD", "pg_isready", "-U", "postgres"] 44 | interval: 5s 45 | timeout: 5s 46 | retries: 5 47 | 48 | frontend: 49 | build: 50 | context: ./frontend 51 | dockerfile: Dockerfile 52 | volumes: 53 | - ./frontend/src:/opt/frontend/src 54 | - ./frontend/public:/opt/frontend/public 55 | ports: 56 | - "8080:8080" 57 | command: npm run dev 58 | 59 | networks: 60 | default: 61 | ipam: 62 | config: 63 | - subnet: "172.29.0.0/16" 64 | 65 | -------------------------------------------------------------------------------- /migrations/00016_revert_jobs_full_text_search.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | DROP INDEX jobs_weighted_sv_idx; 5 | DROP TRIGGER jobs_update_tsvector on jobs; 6 | ALTER TABLE jobs DROP COLUMN weighted_search_vector; 7 | 8 | -- +goose Down 9 | -- SQL in this section is executed when the migration is rolled back. 10 | 11 | -- add a column to store the searchable info for jobs. 12 | ALTER TABLE jobs ADD COLUMN weighted_search_vector tsvector; 13 | 14 | -- updates all job entries with the searchable information; 15 | UPDATE jobs SET 16 | weighted_search_vector = x.weighted_tsv 17 | FROM ( 18 | SELECT job_id, 19 | to_tsvector(jobs.job_id) || 20 | to_tsvector(jobs.job_name) || 21 | to_tsvector(jobs.job_definition) || 22 | to_tsvector(jobs.job_queue) || 23 | to_tsvector(jobs.image) || 24 | to_tsvector(jobs.command_line) AS weighted_tsv 25 | FROM jobs 26 | ) AS x 27 | WHERE x.job_id = jobs.job_id; 28 | 29 | -- a trigger to generate searchable information for each new entry. 30 | -- +goose StatementBegin 31 | CREATE FUNCTION jobs_weighted_search_vector_trigger() RETURNS trigger AS $$ 32 | begin 33 | new.weighted_search_vector := 34 | to_tsvector(new.job_id) || 35 | to_tsvector(new.job_name) || 36 | to_tsvector(new.job_definition) || 37 | to_tsvector(new.job_queue) || 38 | to_tsvector(new.image) || 39 | to_tsvector(new.command_line); 40 | return new; 41 | end; 42 | $$ 43 | LANGUAGE plpgsql; 44 | -- +goose StatementEnd 45 | 46 | -- use the function as a trigger. 47 | CREATE TRIGGER jobs_update_tsvector BEFORE INSERT OR UPDATE 48 | ON jobs 49 | FOR EACH ROW EXECUTE PROCEDURE jobs_weighted_search_vector_trigger(); 50 | 51 | -- create an index for the jobs search info. 52 | CREATE INDEX jobs_weighted_sv_idx ON jobs USING GIST(weighted_search_vector); 53 | -------------------------------------------------------------------------------- /frontend/src/components/SectionLoader/SectionLoader.scss: -------------------------------------------------------------------------------- 1 | // Based on https://loading.io/css/ 2 | .lds-spinner { 3 | color: official; 4 | display: inline-block; 5 | position: relative; 6 | width: 20px; 7 | height: 20px; 8 | } 9 | .lds-spinner div { 10 | transform-origin: 10px 10px; 11 | animation: lds-spinner 1.2s linear infinite; 12 | } 13 | .lds-spinner div:after { 14 | content: " "; 15 | display: block; 16 | position: absolute; 17 | top: 0.75px; 18 | left: 9.25px; 19 | width: 1.5px; 20 | height: 4.5px; 21 | border-radius: 20%; 22 | background: black; 23 | } 24 | .lds-spinner div:nth-child(1) { 25 | transform: rotate(0deg); 26 | animation-delay: -1.1s; 27 | } 28 | .lds-spinner div:nth-child(2) { 29 | transform: rotate(30deg); 30 | animation-delay: -1s; 31 | } 32 | .lds-spinner div:nth-child(3) { 33 | transform: rotate(60deg); 34 | animation-delay: -0.9s; 35 | } 36 | .lds-spinner div:nth-child(4) { 37 | transform: rotate(90deg); 38 | animation-delay: -0.8s; 39 | } 40 | .lds-spinner div:nth-child(5) { 41 | transform: rotate(120deg); 42 | animation-delay: -0.7s; 43 | } 44 | .lds-spinner div:nth-child(6) { 45 | transform: rotate(150deg); 46 | animation-delay: -0.6s; 47 | } 48 | .lds-spinner div:nth-child(7) { 49 | transform: rotate(180deg); 50 | animation-delay: -0.5s; 51 | } 52 | .lds-spinner div:nth-child(8) { 53 | transform: rotate(210deg); 54 | animation-delay: -0.4s; 55 | } 56 | .lds-spinner div:nth-child(9) { 57 | transform: rotate(240deg); 58 | animation-delay: -0.3s; 59 | } 60 | .lds-spinner div:nth-child(10) { 61 | transform: rotate(270deg); 62 | animation-delay: -0.2s; 63 | } 64 | .lds-spinner div:nth-child(11) { 65 | transform: rotate(300deg); 66 | animation-delay: -0.1s; 67 | } 68 | .lds-spinner div:nth-child(12) { 69 | transform: rotate(330deg); 70 | animation-delay: 0s; 71 | } 72 | @keyframes lds-spinner { 73 | 0% { 74 | opacity: 1; 75 | } 76 | 100% { 77 | opacity: 0; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /migrations/00002_jobs_full_text_search.sql: -------------------------------------------------------------------------------- 1 | -- +goose Up 2 | -- SQL in this section is executed when the migration is applied. 3 | 4 | -- add a column to store the searchable info for jobs. 5 | ALTER TABLE jobs ADD COLUMN weighted_search_vector tsvector; 6 | 7 | -- updates all job entries with the searchable information; 8 | UPDATE jobs SET 9 | weighted_search_vector = x.weighted_tsv 10 | FROM ( 11 | SELECT job_id, 12 | to_tsvector(jobs.job_id) || 13 | to_tsvector(jobs.job_name) || 14 | to_tsvector(jobs.job_definition) || 15 | to_tsvector(jobs.job_queue) || 16 | to_tsvector(jobs.image) || 17 | to_tsvector(jobs.command_line) AS weighted_tsv 18 | FROM jobs 19 | ) AS x 20 | WHERE x.job_id = jobs.job_id; 21 | 22 | -- a trigger to generate searchable information for each new entry. 23 | -- +goose StatementBegin 24 | CREATE FUNCTION jobs_weighted_search_vector_trigger() RETURNS trigger AS $$ 25 | begin 26 | new.weighted_search_vector := 27 | to_tsvector(new.job_id) || 28 | to_tsvector(new.job_name) || 29 | to_tsvector(new.job_definition) || 30 | to_tsvector(new.job_queue) || 31 | to_tsvector(new.image) || 32 | to_tsvector(new.command_line); 33 | return new; 34 | end; 35 | $$ 36 | LANGUAGE plpgsql; 37 | -- +goose StatementEnd 38 | 39 | -- use the function as a trigger. 40 | -- +goose StatementBegin 41 | CREATE TRIGGER jobs_update_tsvector BEFORE INSERT OR UPDATE 42 | ON jobs 43 | FOR EACH ROW EXECUTE PROCEDURE jobs_weighted_search_vector_trigger(); 44 | -- +goose StatementEnd 45 | 46 | -- create an index for the jobs search info. 47 | CREATE INDEX jobs_weighted_sv_idx ON jobs USING GIST(weighted_search_vector); 48 | 49 | 50 | -- +goose Down 51 | -- SQL in this section is executed when the migration is rolled back. 52 | 53 | DROP INDEX jobs_weighted_sv_idx; 54 | DROP TRIGGER jobs_update_tsvector on jobs; 55 | DROP FUNCTION jobs_weighted_search_vector_trigger CASCADE; 56 | ALTER TABLE jobs DROP COLUMN weighted_search_vector; 57 | -------------------------------------------------------------------------------- /fetcher/fetcher.go: -------------------------------------------------------------------------------- 1 | package fetcher 2 | 3 | // This module is just a wrapper that can either fetch files out of S3 or 4 | // locally. 5 | 6 | import ( 7 | "io" 8 | "os" 9 | "regexp" 10 | 11 | "github.com/AdRoll/batchiepatchie/awsclients" 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/aws/session" 14 | "github.com/aws/aws-sdk-go/service/s3" 15 | "github.com/aws/aws-sdk-go/service/s3/s3manager" 16 | ) 17 | 18 | var s3Regex = regexp.MustCompile("^s3://([^/]+)/(.+)$") 19 | 20 | func ReadAllNoSessions(location string) ([]byte, error) { 21 | s3match := s3Regex.FindStringSubmatch(location) 22 | if s3match == nil { 23 | return readAllLocalFile(location) 24 | } 25 | 26 | // This function is like ReadAll but does not rely on awsclients package having been set up yet. 27 | ses := session.Must(session.NewSession(&aws.Config{Region: aws.String("us-east-1"), MaxRetries: aws.Int(10)})) 28 | region_loc, err := s3manager.GetBucketRegion(aws.BackgroundContext(), ses, s3match[1], "us-east-1") 29 | if err != nil { 30 | return nil, err 31 | } 32 | session := session.Must(session.NewSession(&aws.Config{Region: aws.String(region_loc)})) 33 | s3s := s3.New(session) 34 | 35 | result, err := s3s.GetObject(&s3.GetObjectInput{ 36 | Bucket: aws.String(s3match[1]), 37 | Key: aws.String(s3match[2]), 38 | }) 39 | if err != nil { 40 | return nil, err 41 | } 42 | defer result.Body.Close() 43 | return io.ReadAll(result.Body) 44 | } 45 | 46 | func ReadAll(location string) ([]byte, error) { 47 | s3match := s3Regex.FindStringSubmatch(location) 48 | if s3match == nil { 49 | return readAllLocalFile(location) 50 | } 51 | 52 | bucket := s3match[1] 53 | key := s3match[2] 54 | 55 | s3client, err := awsclients.GetS3ClientForBucket(bucket) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | result, err := s3client.GetObject(&s3.GetObjectInput{ 61 | Bucket: aws.String(bucket), 62 | Key: aws.String(key), 63 | }) 64 | if err != nil { 65 | return nil, err 66 | } 67 | 68 | defer result.Body.Close() 69 | return io.ReadAll(result.Body) 70 | } 71 | 72 | func readAllLocalFile(location string) ([]byte, error) { 73 | return os.ReadFile(location) 74 | } 75 | -------------------------------------------------------------------------------- /awsclients/awsclients.go: -------------------------------------------------------------------------------- 1 | package awsclients 2 | 3 | // This module just consolidates all Client objects in one place so we don't 4 | // hammer metadata services or anything. 5 | 6 | import ( 7 | "sync" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/session" 11 | "github.com/aws/aws-sdk-go/service/batch" 12 | "github.com/aws/aws-sdk-go/service/cloudwatchlogs" 13 | "github.com/aws/aws-sdk-go/service/ec2" 14 | "github.com/aws/aws-sdk-go/service/ecs" 15 | "github.com/aws/aws-sdk-go/service/s3" 16 | "github.com/aws/aws-sdk-go/service/s3/s3manager" 17 | ) 18 | 19 | var Session *session.Session 20 | var s3B map[string]*s3.S3 21 | var s3R map[string]*s3.S3 22 | var Batch *batch.Batch 23 | var ECS *ecs.ECS 24 | var EC2 *ec2.EC2 25 | var CloudWatchLogs *cloudwatchlogs.CloudWatchLogs 26 | var S3General *s3.S3 27 | 28 | var s3Lock = &sync.Mutex{} 29 | 30 | func GetS3ClientForBucket(bucket string) (*s3.S3, error) { 31 | s3Lock.Lock() 32 | 33 | region, ok := s3B[bucket] 34 | if !ok { 35 | // Unlock the mutex for the duration of getting bucket location. 36 | s3Lock.Unlock() 37 | region_loc, err := s3manager.GetBucketRegion(aws.BackgroundContext(), Session, bucket, "us-east-1") 38 | if err != nil { 39 | return nil, err 40 | } 41 | s3Lock.Lock() 42 | 43 | region_svc, ok := s3R[region_loc] 44 | if !ok { 45 | s3Lock.Unlock() 46 | session := session.Must( 47 | session.NewSession(&aws.Config{Region: aws.String(region_loc)})) 48 | region_svc_loc := s3.New(session) 49 | s3Lock.Lock() 50 | s3R[region_loc] = region_svc_loc 51 | region_svc = region_svc_loc 52 | } 53 | s3B[bucket] = region_svc 54 | region = region_svc 55 | } 56 | 57 | s3Lock.Unlock() 58 | return region, nil 59 | } 60 | 61 | func OpenSessions(region string) error { 62 | conf := &aws.Config{ 63 | Region: aws.String(region), 64 | MaxRetries: aws.Int(10), 65 | } 66 | Session = session.Must(session.NewSession(conf)) 67 | Batch = batch.New(Session) 68 | S3General = s3.New(Session) 69 | ECS = ecs.New(Session) 70 | EC2 = ec2.New(Session) 71 | s3B = make(map[string]*s3.S3) 72 | s3R = make(map[string]*s3.S3) 73 | CloudWatchLogs = cloudwatchlogs.New(Session) 74 | 75 | return nil 76 | } 77 | -------------------------------------------------------------------------------- /frontend/src/containers/LayoutContainer/LayoutContainer.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { connect } from 'react-redux'; 3 | import Menu from 'components/Menu/Menu'; 4 | import Search from 'components/Search/Search'; 5 | import { setPageDimensions } from 'stores/layout'; 6 | import './LayoutContainer.scss'; 7 | 8 | class LayoutContainer extends React.Component { 9 | static propTypes = { 10 | children: PropTypes.element.isRequired, 11 | path: PropTypes.string.isRequired, 12 | setPageDimensions: PropTypes.func.isRequired 13 | }; 14 | 15 | componentDidMount() { 16 | this.onResize(); 17 | window.addEventListener('resize', this.onResize); 18 | } 19 | 20 | componentWillUnmount() { 21 | window.removeEventListener('resize', this.onResize); 22 | } 23 | 24 | render() { 25 | const onJobsPage = this.props.path === process.env.BASE_URL + '/'; 26 | return ( 27 |
28 |
29 |
30 |

Batchiepatchie

31 |
32 |
33 | {onJobsPage && } 34 |
35 |
36 | 37 |
38 |
39 | 40 |
41 |
42 | 43 |
44 |
45 | { this.props.children } 46 |
47 |
48 |
49 | ); 50 | } 51 | 52 | onResize = () => { 53 | this.props.setPageDimensions({ height: window.innerHeight, width: window.innerWidth }); 54 | } 55 | } 56 | 57 | const mapStateToProps = state => ({ 58 | path: state.routing.locationBeforeTransitions.pathname 59 | }); 60 | 61 | const actions = { 62 | setPageDimensions 63 | }; 64 | 65 | export default connect(mapStateToProps, actions)(LayoutContainer); 66 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/AdRoll/batchiepatchie 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/BurntSushi/toml v1.2.0 7 | github.com/aws/aws-sdk-go v1.44.62 8 | github.com/bakatz/echo-logrusmiddleware v1.1.1 9 | github.com/gorilla/websocket v1.5.0 10 | github.com/jcftang/logentriesrus v0.0.0-20220725204439-b4dedce84d23 11 | github.com/labstack/echo v3.3.10+incompatible 12 | github.com/labstack/gommon v0.3.1 13 | github.com/lib/pq v1.10.6 14 | github.com/opentracing/opentracing-go v1.2.0 15 | github.com/sirupsen/logrus v1.9.0 16 | gopkg.in/DataDog/dd-trace-go.v1 v1.40.1 17 | ) 18 | 19 | require ( 20 | github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583 // indirect 21 | github.com/DataDog/datadog-go v4.8.2+incompatible // indirect 22 | github.com/DataDog/datadog-go/v5 v5.0.2 // indirect 23 | github.com/DataDog/sketches-go v1.2.1 // indirect 24 | github.com/Microsoft/go-winio v0.5.1 // indirect 25 | github.com/SpalkLtd/le_go v0.0.0-20220711045526-8feb6e635941 // indirect 26 | github.com/cespare/xxhash/v2 v2.1.2 // indirect 27 | github.com/dgraph-io/ristretto v0.1.0 // indirect 28 | github.com/dustin/go-humanize v1.0.0 // indirect 29 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect 30 | github.com/google/uuid v1.3.0 // indirect 31 | github.com/jmespath/go-jmespath v0.4.0 // indirect 32 | github.com/josharian/intern v1.0.0 // indirect 33 | github.com/mailru/easyjson v0.7.7 // indirect 34 | github.com/mattn/go-colorable v0.1.11 // indirect 35 | github.com/mattn/go-isatty v0.0.14 // indirect 36 | github.com/philhofer/fwd v1.1.1 // indirect 37 | github.com/pkg/errors v0.9.1 // indirect 38 | github.com/tinylib/msgp v1.1.2 // indirect 39 | github.com/valyala/bytebufferpool v1.0.0 // indirect 40 | github.com/valyala/fasttemplate v1.2.1 // indirect 41 | golang.org/x/crypto v0.0.0-20220214200702-86341886e292 // indirect 42 | golang.org/x/net v0.0.0-20220225172249-27dd8689420f // indirect 43 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect 44 | golang.org/x/text v0.3.7 // indirect 45 | golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 // indirect 46 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 47 | google.golang.org/protobuf v1.27.1 // indirect 48 | ) 49 | -------------------------------------------------------------------------------- /docs/docs/overview.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Overview 2 | ========================= 3 | 4 | Batchiepatchie is a monitoring tool for AWS Batch. It is written in the Go 5 | language. 6 | 7 | AWS Batch is a service, provided by Amazon Web Services, that runs docker 8 | containers on EC2 instances. Typically, these EC2 instances are brought up when 9 | batch jobs are submitted and scaled down when there are no jobs to run. On high 10 | level, you tell AWS Batch "Please run my docker container located at URL X, 11 | with N cpus and M gigabytes of memory" and AWS Batch will figure it out. 12 | Detailed documentation on AWS Batch can be found on [their 13 | website](https://aws.amazon.com/documentation/batch/). 14 | 15 | Batchiepatchie exists because the user interface on Amazon's own dashboard leaves 16 | certain things to be desired. In particular, Batchiepatchie strives to make the following 17 | use cases easier: 18 | 19 | * Find currently running and historical jobs very quickly among thousands of other jobs. 20 | 21 | * Find and read the logs of any job without having to navigate through a complicated UI. 22 | 23 | * Work around some quirks in AWS Batch itself. 24 | 25 | * Implement timeouts for AWS Batch jobs. 26 | 27 | * Collect historical information about jobs. 28 | 29 | * Make it easy to cancel jobs en-masse. 30 | 31 | Batchiepatchie has a search box that is designed to work fast with free-form 32 | text. Batchiepatchie will also remember jobs forever, so you should be able to 33 | find jobs even from months in the past in seconds. 34 | 35 | AWS Batch jobs place standard output and error from jobs into CloudWatch logs. 36 | Batchiepatchie knows how to find these logs and display directly in its web 37 | interface, saving valuable time when you need to read the logs of a batch job. 38 | 39 | Batchiepatchie has some features to cancel many jobs at once. This is useful 40 | when someone submits a large distributed job by mistake and it needs to be 41 | killed. 42 | 43 | Batchiepatchie collects data about instances and ECS clusters used by batch 44 | jobs in a PostgreSQL database. The data can later be used to analyze the costs 45 | and behaviour of batch jobs. 46 | 47 | One major feature of AWS Batch that is not currently properly supported in 48 | Batchiepatchie is array jobs. The parent job will show up but child jobs will 49 | not display properly. 50 | -------------------------------------------------------------------------------- /frontend/src/index.scss: -------------------------------------------------------------------------------- 1 | $icon-font-path: "~bootstrap-sass/assets/fonts/bootstrap/"; 2 | @import "~bootswatch/dist/simplex/variables"; 3 | @import "~bootstrap/scss/bootstrap"; 4 | @import "~bootswatch/dist/simplex/bootswatch"; 5 | 6 | body { 7 | } 8 | 9 | .alert-warning { 10 | background: #ffb; 11 | border: solid 1px #efef77; 12 | } 13 | 14 | .alert-danger { 15 | background: #faa; 16 | border: solid 1px #eaa; 17 | } 18 | 19 | .nav { 20 | font-weight: bold; 21 | li { 22 | margin: 10px; 23 | } 24 | .active a { 25 | text-decoration: underline; 26 | color: #000; 27 | } 28 | } 29 | 30 | .pagination { 31 | display: inline-block; 32 | margin-top: 10px; 33 | text-align: center; 34 | font-size: 1.2em; 35 | .disabled { 36 | color: #aaa; 37 | } 38 | li { 39 | padding: 10px; 40 | border-top: 1px solid #aaa; 41 | border-bottom: 1px solid #aaa; 42 | float: left; 43 | } 44 | li :hover { 45 | cursor: pointer; 46 | } 47 | } 48 | 49 | pre { 50 | padding: 8px; 51 | background: #f0f0f0; 52 | border: 1px solid #aaa; 53 | } 54 | 55 | .terminal { 56 | background: #f0f0f0; 57 | } 58 | 59 | .col-xs-5ths, .col-xs-2-5ths, .col-xs-3-5ths { 60 | position: relative; 61 | min-height: 1px; 62 | padding-right: 15px; 63 | padding-left: 15px; 64 | } 65 | 66 | .col-xs-5ths { 67 | width: 20%; 68 | float: left; 69 | } 70 | 71 | .col-xs-2-5ths { 72 | width: 40%; 73 | float: left; 74 | } 75 | 76 | .col-xs-3-5ths { 77 | width: 60%; 78 | float: left; 79 | } 80 | 81 | .clear { 82 | clear: both; 83 | } 84 | 85 | .Select-menu-outer { 86 | max-height: 400px; 87 | } 88 | .Select-menu { 89 | max-height: 400px; 90 | } 91 | 92 | // override stickiness of checkbox-select frozen header row: 93 | // https://github.com/adazzle/react-data-grid/issues/1386#issuecomment-510532495 94 | // the full select column isn't frozen so alignment gets off during horizontal scrolling 95 | .react-grid-Row .react-grid-Cell--frozen { 96 | transform: translate3d(0px, 0px, 0px) !important; 97 | } 98 | 99 | .react-grid-HeaderRow .react-grid-HeaderCell--frozen { 100 | transform: translate3d(0px, 0px, 0px) !important; 101 | } 102 | -------------------------------------------------------------------------------- /jobs/killer_handler.go: -------------------------------------------------------------------------------- 1 | package jobs 2 | 3 | import ( 4 | "github.com/AdRoll/batchiepatchie/awsclients" 5 | "github.com/aws/aws-sdk-go/aws" 6 | "github.com/aws/aws-sdk-go/service/batch" 7 | "github.com/aws/aws-sdk-go/service/ec2" 8 | "github.com/opentracing/opentracing-go" 9 | log "github.com/sirupsen/logrus" 10 | ) 11 | 12 | type KillerHandler struct { 13 | } 14 | 15 | func (th *KillerHandler) KillOne(jobID string, reason string, store Storer) error { 16 | span := opentracing.StartSpan("KillOne") 17 | defer span.Finish() 18 | 19 | input := &batch.TerminateJobInput{ 20 | JobId: aws.String(jobID), 21 | Reason: aws.String("Cancelled job from batchiepatchie: " + reason), 22 | } 23 | 24 | log.Info("Killing Job ", jobID, "...") 25 | _, err := awsclients.Batch.TerminateJob(input) 26 | if err != nil { 27 | log.Warning("Killing job failed: ", err) 28 | return err 29 | } 30 | 31 | return store.UpdateJobLogTerminationRequested(jobID) 32 | } 33 | 34 | func (th *KillerHandler) KillInstances(instances []string) error { 35 | span := opentracing.StartSpan("KillInstances") 36 | defer span.Finish() 37 | 38 | // Exit early if there are no instances to kill 39 | if len(instances) == 0 { 40 | return nil 41 | } 42 | /* While the terminate instances accepts batches, we deliberately call 43 | * it one instance at a time. The API call won't terminate anything if 44 | * even one of the instance IDs is wrong but we still do want to 45 | * terminate the others. 46 | 47 | This shouldn't be too inefficient since most of the time there's only 48 | one or two instances to terminate this way anyway. */ 49 | 50 | var final_ret error 51 | 52 | for _, instance_id := range instances { 53 | instances_ptr := make([]*string, 1) 54 | instances_ptr[0] = &instance_id 55 | terminate_instances := &ec2.TerminateInstancesInput{ 56 | InstanceIds: instances_ptr, 57 | } 58 | _, err := awsclients.EC2.TerminateInstances(terminate_instances) 59 | if err != nil { 60 | log.Warning("Cannot terminate instance ", instance_id, ": ", err) 61 | // Don't return early but record the error 62 | final_ret = err 63 | } 64 | log.Info("Terminated instance ", instance_id, " because it has a job at STARTING state stuck.") 65 | } 66 | 67 | return final_ret 68 | } 69 | 70 | func NewKillerHandler() (Killer, error) { 71 | var ret Killer = new(KillerHandler) 72 | return ret, nil 73 | } 74 | -------------------------------------------------------------------------------- /frontend/src/stores/jobqueue.js: -------------------------------------------------------------------------------- 1 | import actionReducer from 'utils/actionReducer'; 2 | import JobsApi from 'api/api'; 3 | import { fetchDataMultiple, JOB_QUEUES_ALL, JOB_QUEUES_ACTIVATED } from './status'; 4 | 5 | export const SET_JOB_QUEUE_ACTIVATED_QUEUES = 'SET_JOB_QUEUE_ACTIVATED_QUEUES'; 6 | export const SET_JOB_QUEUE_ALL_QUEUES = 'SET_JOB_QUEUE_ALL_QUEUES'; 7 | 8 | const initialState = { 9 | allJobQueues: [], 10 | activatedJobQueues: [] 11 | }; 12 | 13 | const actions = {}; 14 | 15 | actions[SET_JOB_QUEUE_ACTIVATED_QUEUES] = (state, { payload }) => { 16 | return { 17 | ...state, 18 | activatedJobQueues: payload 19 | }; 20 | }; 21 | 22 | actions[SET_JOB_QUEUE_ALL_QUEUES] = (state, { payload }) => { 23 | return { 24 | ...state, 25 | allJobQueues: payload 26 | }; 27 | }; 28 | 29 | export function setJobQueues(job_queues) { 30 | return { 31 | type: SET_JOB_QUEUE_ACTIVATED_QUEUES, 32 | payload: job_queues 33 | }; 34 | }; 35 | 36 | export function setAllJobQueues(job_queues) { 37 | return { 38 | type: SET_JOB_QUEUE_ALL_QUEUES, 39 | payload: job_queues 40 | }; 41 | }; 42 | 43 | export function fetchJobQueues() { 44 | return fetchDataMultiple([ 45 | { 46 | status: JOB_QUEUES_ACTIVATED, 47 | fetch: fetchJobQueuesInner, 48 | result: setJobQueues 49 | } 50 | ]); 51 | } 52 | 53 | export function fetchAllJobQueues() { 54 | return fetchDataMultiple([ 55 | { 56 | status: JOB_QUEUES_ALL, 57 | fetch: fetchAllJobQueuesInner, 58 | result: setAllJobQueues 59 | } 60 | ]); 61 | } 62 | 63 | export function activateJobQueue(job_queue_name) { 64 | return (dispatch, getState) => { 65 | return JobsApi.activateJobQueue(job_queue_name); 66 | }; 67 | } 68 | 69 | export function deactivateJobQueue(job_queue_name) { 70 | return (dispatch, getState) => { 71 | return JobsApi.deactivateJobQueue(job_queue_name); 72 | }; 73 | } 74 | 75 | function fetchJobQueuesInner() { 76 | return (dispatch, getState) => { 77 | const state = getState(); 78 | return JobsApi.getJobQueues(); 79 | }; 80 | }; 81 | 82 | function fetchAllJobQueuesInner() { 83 | return (dispatch, getState) => { 84 | const state = getState(); 85 | return JobsApi.getAllJobQueues(); 86 | }; 87 | }; 88 | 89 | // Root reducer 90 | export default actionReducer(actions, initialState); 91 | -------------------------------------------------------------------------------- /jobs/compute_environment_monitor.go: -------------------------------------------------------------------------------- 1 | package jobs 2 | 3 | import ( 4 | "github.com/AdRoll/batchiepatchie/awsclients" 5 | "github.com/aws/aws-sdk-go/service/batch" 6 | "github.com/opentracing/opentracing-go" 7 | log "github.com/sirupsen/logrus" 8 | ) 9 | 10 | func GetComputeEnvironments(parentSpan opentracing.Span) ([]ComputeEnvironment, error) { 11 | span := opentracing.StartSpan("GetComputeEnvironments", opentracing.ChildOf(parentSpan.Context())) 12 | defer span.Finish() 13 | 14 | var nextToken *string 15 | var hundred int64 16 | 17 | compute_environments := make([]*batch.ComputeEnvironmentDetail, 0) 18 | 19 | for { 20 | hundred = 100 21 | out, err := awsclients.Batch.DescribeComputeEnvironments(&batch.DescribeComputeEnvironmentsInput{ 22 | MaxResults: &hundred, 23 | NextToken: nextToken, 24 | }) 25 | if err != nil { 26 | log.Warning("Failed to fetch compute environments: ", err) 27 | return nil, err 28 | } 29 | nextToken = out.NextToken 30 | 31 | compute_environments = append(compute_environments, out.ComputeEnvironments...) 32 | 33 | if nextToken == nil { 34 | break 35 | } 36 | } 37 | 38 | /* Transform into our internal format, which is a bit nicer */ 39 | ce_lst := make([]ComputeEnvironment, 0) 40 | for _, ce_aws := range compute_environments { 41 | if ce_aws.ComputeEnvironmentName != nil && 42 | ce_aws.ComputeResources != nil && 43 | ce_aws.ServiceRole != nil && 44 | ce_aws.State != nil && 45 | ce_aws.ComputeResources.MaxvCpus != nil && 46 | ce_aws.ComputeResources.MinvCpus != nil && 47 | ce_aws.ComputeResources.DesiredvCpus != nil { 48 | ce := ComputeEnvironment{ 49 | Name: *ce_aws.ComputeEnvironmentName, 50 | WantedvCpus: *ce_aws.ComputeResources.DesiredvCpus, 51 | MinvCpus: *ce_aws.ComputeResources.MinvCpus, 52 | MaxvCpus: *ce_aws.ComputeResources.MaxvCpus, 53 | State: *ce_aws.State, 54 | ServiceRole: *ce_aws.ServiceRole} 55 | ce_lst = append(ce_lst, ce) 56 | } 57 | } 58 | 59 | return ce_lst, nil 60 | } 61 | 62 | func MonitorComputeEnvironments(fs Storer, queues []string) { 63 | span := opentracing.StartSpan("MonitorComputeEnvironments") 64 | defer span.Finish() 65 | 66 | if len(queues) == 0 { 67 | return 68 | } 69 | 70 | compute_environments, err := GetComputeEnvironments(span) 71 | if err != nil { 72 | log.Warning("Failed to get compute environments: ", err) 73 | return 74 | } 75 | 76 | err = fs.UpdateComputeEnvironmentsLog(compute_environments) 77 | if err != nil { 78 | log.Warning("Failed to update compute environments log: ", err) 79 | return 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /frontend/src/stores/status.js: -------------------------------------------------------------------------------- 1 | import actionReducer from 'utils/actionReducer'; 2 | 3 | // Action names 4 | export const SET_ERROR_STATE = 'SET_ERROR_STATE'; 5 | export const SET_LOADING_STATE = 'SET_LOADING_STATE'; 6 | 7 | // Constants 8 | export const JOB = 'JOB'; 9 | export const JOBS = 'JOBS'; 10 | export const LOGS = 'LOGS'; 11 | export const STATS = 'STATS'; 12 | export const JOB_QUEUES_ALL = 'JOB_QUEUES_ALL'; 13 | export const JOB_QUEUES_ACTIVATED = 'JOB_QUEUES_ACTIVATED'; 14 | export const STATUSES = [ 15 | JOB, 16 | JOBS, 17 | LOGS, 18 | STATS, 19 | JOB_QUEUES_ALL, 20 | JOB_QUEUES_ACTIVATED 21 | ]; 22 | 23 | export function setErrorState(namespace, error) { 24 | return { 25 | type: SET_ERROR_STATE, 26 | payload: { 27 | namespace, 28 | error 29 | } 30 | }; 31 | }; 32 | 33 | export function setLoadingState(namespace, loading) { 34 | return { 35 | type: SET_LOADING_STATE, 36 | payload: { 37 | namespace, 38 | loading 39 | } 40 | }; 41 | }; 42 | 43 | const initialState = STATUSES.reduce((state, status) => { 44 | state[status] = { 45 | loading: true, 46 | error: false 47 | }; 48 | return state; 49 | }, {}); 50 | 51 | const actions = {}; 52 | 53 | actions[SET_ERROR_STATE] = (state, { payload: { namespace, error } }) => { 54 | return { 55 | ...state, 56 | [namespace]: { 57 | ...state[namespace], 58 | error 59 | } 60 | }; 61 | }; 62 | 63 | actions[SET_LOADING_STATE] = (state, { payload: { namespace, loading } }) => { 64 | return { 65 | ...state, 66 | [namespace]: { 67 | ...state[namespace], 68 | loading 69 | } 70 | }; 71 | }; 72 | 73 | 74 | function fetchData(dispatch, { status, fetch, result, options }) { 75 | const setLoadingError = (loading, error) => { 76 | dispatch(setErrorState(status, error)); 77 | dispatch(setLoadingState(status, loading)); 78 | }; 79 | setLoadingError(true, false); 80 | 81 | return dispatch(fetch(options)) 82 | .then(data => { 83 | dispatch(result(data)); 84 | setLoadingError(false, false); 85 | }) 86 | .catch((e) => { 87 | console.error(e); 88 | setLoadingError(false, true); 89 | }); 90 | }; 91 | 92 | export function fetchDataMultiple(fetchDataArguments) { 93 | return (dispatch, getState) => { 94 | const state = getState(); 95 | const promises = fetchDataArguments.map(fetchDataArgument => fetchData(dispatch, fetchDataArgument)); 96 | const promise = Promise.all(promises); 97 | return promise; 98 | }; 99 | } 100 | 101 | export default actionReducer(actions, initialState); 102 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "batchiepatchie-frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "bootstrap": "4.3.1", 7 | "bootstrap-sass": "3.4.1", 8 | "bootswatch": "4.1.1", 9 | "classnames": "2.2.5", 10 | "font-awesome": "4.7.0", 11 | "humanize-duration": "3.18.0", 12 | "mixin-deep": "1.3.2", 13 | "moment": "^2.18.1", 14 | "moment-timezone": "^0.5.13", 15 | "numeral": "2.0.6", 16 | "promise-polyfill": "6.0.2", 17 | "query-string": "4.3.4", 18 | "react": "15.4.2", 19 | "react-addons-pure-render-mixin": "15.4.2", 20 | "react-addons-shallow-compare": "15.4.2", 21 | "react-bootstrap": "0.31.0", 22 | "react-data-grid": "^5.0.0", 23 | "react-datetime": "2.16.3", 24 | "react-dom": "15.4.2", 25 | "react-highlight-words": "0.11.0", 26 | "react-input-autosize": "2.2.1", 27 | "react-redux": "5.0.5", 28 | "react-router": "3.0.5", 29 | "react-router-redux": "4.0.8", 30 | "react-select": "1.2.1", 31 | "react-virtualized": "9.8.0", 32 | "recharts": "1.6.2", 33 | "redux": "3.7.0", 34 | "redux-thunk": "^2.2.0" 35 | }, 36 | "devDependencies": { 37 | "babel-core": "6.25.0", 38 | "babel-eslint": "7.2.3", 39 | "babel-loader": "7.1.0", 40 | "babel-plugin-lodash": "^3.3.4", 41 | "babel-plugin-recharts": "1.2.1", 42 | "babel-plugin-transform-async-to-generator": "6.24.1", 43 | "babel-plugin-transform-class-properties": "6.24.1", 44 | "babel-plugin-transform-decorators": "6.24.1", 45 | "babel-plugin-transform-decorators-legacy": "1.3.4", 46 | "babel-plugin-transform-object-rest-spread": "6.23.0", 47 | "babel-plugin-transform-react-remove-prop-types": "0.4.6", 48 | "babel-polyfill": "6.23.0", 49 | "babel-preset-es2015": "6.18.0", 50 | "babel-preset-react": "6.16.0", 51 | "babel-preset-react-app": "3.0.0", 52 | "babel-preset-stage-2": "6.24.1", 53 | "babel-runtime": "6.23.0", 54 | "chalk": "1.1.3", 55 | "css-loader": "0.28.4", 56 | "eslint": "4.18.2", 57 | "eslint-config-react-app": "1.0.4", 58 | "eslint-loader": "1.8.0", 59 | "eslint-plugin-flowtype": "2.34.0", 60 | "eslint-plugin-import": "2.6.0", 61 | "eslint-plugin-jsx-a11y": "5.0.3", 62 | "eslint-plugin-react": "7.1.0", 63 | "extract-text-webpack-plugin": "2.1.2", 64 | "file-loader": "0.11.2", 65 | "fs-extra": "3.0.1", 66 | "html-webpack-plugin": "2.29.0", 67 | "node-sass": "4.14.1", 68 | "react-tooltip": "4.2.21", 69 | "react-dev-utils": "3.1.2", 70 | "react-error-overlay": "1.0.7", 71 | "sass-loader": "6.0.7", 72 | "source-map-loader": "0.2.1", 73 | "style-loader": "0.18.2", 74 | "url-loader": "0.5.9", 75 | "webpack": "3.12.0", 76 | "webpack-cli": "2.1.5", 77 | "webpack-dev-server": "2.11.5", 78 | "webpack-manifest-plugin": "1.1.0", 79 | "whatwg-fetch": "2.0.3" 80 | }, 81 | "scripts": { 82 | "dev": "webpack-dev-server --host 0.0.0.0", 83 | "build": "webpack", 84 | "build:dist": "webpack -p" 85 | }, 86 | "babel": { 87 | "presets": [ 88 | "react-app" 89 | ] 90 | }, 91 | "eslintConfig": { 92 | "extends": "react-app" 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /frontend/src/api/api.js: -------------------------------------------------------------------------------- 1 | import jobs from './jobs.json'; 2 | 3 | class API { 4 | static baseURL = process.env.API_BASE_URL; 5 | 6 | getJob(id) { 7 | return this.get(this.joinUrls(`jobs/${id}`)); 8 | } 9 | 10 | getJobs(params) { 11 | return this.get(this.joinUrls('jobs', params)); 12 | } 13 | 14 | getLogs(id) { 15 | // This is some machinery to turn text response into a list of { 16 | // 'Message': line } objects. 17 | function to_text(response) { 18 | return response.text(); 19 | } 20 | function parse_text(text) { 21 | let parsed = []; 22 | const lines = text.split(/\n/); 23 | for (let line in lines) { 24 | parsed.push({ 'Message': lines[line] }); 25 | } 26 | return new Promise((resolve, reject) => resolve(parsed)); 27 | } 28 | return window.fetch(this.joinUrls(`jobs/${id}/logs?format=text`), { 'method': 'GET' }).then(this.checkStatus).then(to_text).then(parse_text); 29 | } 30 | 31 | getStats(params) { 32 | return this.get(this.joinUrls('jobs/stats', params)); 33 | } 34 | 35 | getJobQueues() { 36 | return this.get(this.joinUrls('job_queues/active')); 37 | } 38 | 39 | getAllJobQueues() { 40 | return this.get(this.joinUrls('job_queues/all')); 41 | } 42 | 43 | activateJobQueue(job_queue_name) { 44 | return this.post(this.joinUrls(`job_queues/${job_queue_name}/activate`), []); 45 | } 46 | 47 | deactivateJobQueue(job_queue_name) { 48 | return this.post(this.joinUrls(`job_queues/${job_queue_name}/deactivate`), []); 49 | } 50 | 51 | killJobs(ids) { 52 | return this.post(this.joinUrls('jobs/kill'), { ids }); 53 | } 54 | 55 | get(url) { 56 | return this.fetch('get', url); 57 | } 58 | 59 | post(url, body) { 60 | return this.fetch('post', url, JSON.stringify(body)); 61 | } 62 | 63 | put(url, body) { 64 | return this.fetch('put', url, JSON.stringify(body)); 65 | } 66 | 67 | delete(url) { 68 | return this.fetch('delete', url); 69 | } 70 | 71 | fetch(method, url, body) { 72 | return window.fetch(url, { method, body }) 73 | .then(this.checkStatus) 74 | .then(this.parseJSON); 75 | } 76 | 77 | checkStatus(response) { 78 | // Request is good 79 | if (response.ok) { 80 | return response; 81 | } 82 | 83 | // Request failed 84 | const error = new Error(response.statusText); 85 | error.response = response; 86 | throw error; 87 | } 88 | 89 | parseJSON(response) { 90 | return response.json(); 91 | }; 92 | 93 | joinUrls(endpoint, params) { 94 | const formattedParams = params ? 95 | '?' + this.formatQueryParams(params) : 96 | ''; 97 | 98 | return `${API.baseURL}/${endpoint}${formattedParams}`; 99 | } 100 | 101 | formatQueryParams(params) { 102 | return Object.keys(params) 103 | .filter(k => !!params[k]) 104 | .map(k => encodeURIComponent(k) + '=' + encodeURIComponent(params[k])) 105 | .join('&'); 106 | } 107 | } 108 | 109 | export default new API(); 110 | -------------------------------------------------------------------------------- /handlers/job_status_subscriptions.go: -------------------------------------------------------------------------------- 1 | package handlers 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | 7 | "github.com/AdRoll/batchiepatchie/jobs" 8 | "github.com/gorilla/websocket" 9 | "github.com/labstack/echo" 10 | log "github.com/sirupsen/logrus" 11 | ) 12 | 13 | var ( 14 | upgrader = websocket.Upgrader{} 15 | ) 16 | 17 | func (s *Server) SubscribeToJobEvent(c echo.Context) error { 18 | job_id := c.Param("id") 19 | 20 | ws, err := upgrader.Upgrade(c.Response(), c.Request(), nil) 21 | if err != nil { 22 | log.Warning("Invalid WebSocket attempt: ", err) 23 | return err 24 | } 25 | defer ws.Close() 26 | 27 | ws.SetReadLimit(1000) // We are not expecting to read anything so set low limit for reads 28 | 29 | events, unsubscribe := s.Storage.SubscribeToJobStatus(job_id) 30 | defer unsubscribe() 31 | 32 | // Launch a reader. We need it to detect if the connection closes 33 | // suddenly. 34 | go func() { 35 | _, _, _ = ws.ReadMessage() 36 | ws.Close() // Close is safe to run concurrently. 37 | log.Info("Stopped reading from websocket.") 38 | }() 39 | 40 | var previous_status *jobs.Job 41 | // Immediately send status update on the job. If there is such as job. 42 | job, err := s.Storage.FindOne(job_id) 43 | previous_status = job 44 | if err == nil && job != nil { 45 | marshalled, err := json.Marshal(*job) 46 | if err != nil { 47 | log.Warning("Cannot marshal job status to be sent to WebSocket: ", err) 48 | return err 49 | } 50 | now := time.Now() 51 | err = ws.SetWriteDeadline(now.Add(time.Second * 5)) 52 | if err != nil { 53 | log.Warning("Cannot set write deadline: ", err) 54 | return err 55 | } 56 | err = ws.WriteMessage(websocket.TextMessage, marshalled) 57 | if err != nil { 58 | log.Warning("Cannot send job status to WebSocket: ", err) 59 | return err 60 | } 61 | } 62 | 63 | for { 64 | var job_status *jobs.Job 65 | select { 66 | case stat := <-events: 67 | job_status = &stat 68 | case <-time.After(time.Second * 5): 69 | job_status = nil 70 | } 71 | 72 | if job_status != nil { 73 | previous_status = job_status 74 | marshalled, err := json.Marshal(*job_status) 75 | if err != nil { 76 | log.Warning("Cannot marshal job status to be sent to WebSocket: ", err) 77 | return err 78 | } 79 | 80 | now := time.Now() 81 | err = ws.SetWriteDeadline(now.Add(time.Second * 5)) 82 | if err != nil { 83 | log.Warning("Cannot set write deadline: ", err) 84 | return err 85 | } 86 | err = ws.WriteMessage(websocket.TextMessage, marshalled) 87 | if err != nil { 88 | log.Warning("Cannot send job status to WebSocket: ", err) 89 | return err 90 | } 91 | } else { 92 | marshalled := []byte("") 93 | if previous_status != nil { 94 | marshalled, err = json.Marshal(*previous_status) 95 | if err != nil { 96 | log.Warning("Cannot marshal job status to be set to WebSocket: ", err) 97 | return err 98 | } 99 | } 100 | now := time.Now() 101 | err = ws.SetWriteDeadline(now.Add(time.Second * 5)) 102 | if err != nil { 103 | log.Warning("Cannot set write deadline: ", err) 104 | return err 105 | } 106 | 107 | err = ws.WriteMessage(websocket.TextMessage, marshalled) 108 | if err != nil { 109 | log.Warning("Cannot write to websocket: ", err) 110 | return err 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /frontend/src/components/Terminal/Terminal.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import AutoSizer from 'react-virtualized/dist/commonjs/AutoSizer'; 3 | import List from 'react-virtualized/dist/commonjs/List'; 4 | import Highlighter from "react-highlight-words"; 5 | import './Terminal.scss'; 6 | 7 | const LOG_ROW_HEIGHT = 18; 8 | const CHAR_WIDTH = 8; 9 | 10 | export default class Terminal extends React.Component { 11 | static propTypes = { 12 | height: PropTypes.number.isRequired, 13 | autoScrollToBottom: PropTypes.bool.isRequired, 14 | // Search text to highlight. 15 | searchText: PropTypes.string.isRequired, 16 | // Index of the row with the current search result, or -1 if not found. 17 | currentSearchRow: PropTypes.number.isRequired, 18 | log: PropTypes.array.isRequired 19 | }; 20 | 21 | constructor(props) { 22 | super(props); 23 | this.state = { 24 | // key for the List component. This is incremented to force-refresh the List component. 25 | listKey: 0, 26 | }; 27 | } 28 | 29 | componentDidUpdate(prevProps) { 30 | if (prevProps.searchText !== this.props.searchText || prevProps.currentSearchRow !== this.props.currentSearchRow) { 31 | // If the search text or current search row changes, force-update the List so that the 32 | // Highlighter will re-render. The List is pretty aggressive about not rendering 33 | // when it doesn't have to. 34 | const { listKey } = this.state; 35 | this.setState({listKey: listKey + 1}) 36 | } 37 | } 38 | 39 | render() { 40 | const { log, height, autoScrollToBottom, currentSearchRow } = this.props; 41 | const { listKey } = this.state; 42 | const maxLength = log.reduce((memo, item) => Math.max(memo, item.length), 0); 43 | let listProps = {}; 44 | if (currentSearchRow > -1) { 45 | listProps = { scrollToIndex: currentSearchRow }; 46 | } 47 | if (autoScrollToBottom) { 48 | listProps = { scrollToIndex: log.length-1 }; 49 | } 50 | return ( 51 |
52 | 53 | { ({ width }) => ( 54 | 65 | ) } 66 | 67 |
68 | ); 69 | } 70 | 71 | rowRenderer = ({ index, key, style }) => { 72 | const { searchText, currentSearchRow } = this.props; 73 | const searchWords = searchText ? [searchText] : []; 74 | return ( 75 |
76 |                 
80 |             
) 81 | } 82 | 83 | noRowsRenderer = () => { 84 | return
No logs, possibly loading them...
; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /frontend/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "plugins": [ 4 | "react" 5 | ], 6 | "env": { 7 | "browser": true, 8 | "node": true, 9 | "es6": true, 10 | "mocha": true 11 | }, 12 | "globals": { 13 | "heap": false, 14 | "internalTrackingEnabled": false 15 | }, 16 | "rules": { 17 | "block-scoped-var": 2, 18 | "brace-style": [1, "1tbs", { "allowSingleLine": true }], 19 | "comma-dangle": [2, "only-multiline"], 20 | "comma-spacing": [1, { "before": false, "after": true }], 21 | "comma-style": [1, "last"], 22 | "consistent-return": 2, 23 | "consistent-this": [1, "that"], 24 | "curly": [2, "multi-line"], 25 | "default-case": 2, 26 | "dot-notation": 1, 27 | "eol-last": 1, 28 | "eqeqeq": 2, 29 | "no-lonely-if": 2, 30 | "new-parens": 2, 31 | "no-catch-shadow": 1, 32 | "no-delete-var": 2, 33 | "no-dupe-args": 2, 34 | "no-duplicate-case": 2, 35 | "no-else-return": 1, 36 | "no-extra-bind": 1, 37 | "no-empty": 2, 38 | "no-extra-boolean-cast": 2, 39 | "no-fallthrough": 2, 40 | "no-inner-declarations": 2, 41 | "no-irregular-whitespace": 1, 42 | "no-func-assign": 1, 43 | "no-lone-blocks": 2, 44 | "no-mixed-spaces-and-tabs": 1, 45 | "no-multi-str": 1, 46 | "no-multiple-empty-lines": [1, { max: 2 }], 47 | "no-native-reassign": 2, 48 | "no-redeclare": 2, 49 | "no-return-assign": 2, 50 | "no-shadow": 2, 51 | "no-shadow-restricted-names": 2, 52 | "no-self-compare": 2, 53 | "no-sequences": 1, 54 | "no-spaced-func": 2, 55 | "no-throw-literal": 2, 56 | "no-trailing-spaces": 1, 57 | "no-undef-init": 1, 58 | "no-undefined": 2, 59 | "no-undef": 2, 60 | "no-unreachable": 2, 61 | "no-unused-expressions": 2, 62 | "no-unused-vars": 1, 63 | "no-use-before-define": [2, "nofunc"], 64 | "no-var": 1, 65 | "no-void": 2, 66 | "no-cond-assign": 2, 67 | "operator-linebreak": [1, "after"], 68 | "padded-blocks": [1, "never"], 69 | "prefer-const": [2, { 70 | "destructuring": "all" 71 | }], 72 | "quote-props": [2, "as-needed", { keywords: true, unnecessary: false }], 73 | "quotes": [1, "single", { "allowTemplateLiterals": true }], 74 | "jsx-quotes": [1, "prefer-single"], 75 | "react/jsx-no-undef": 1, 76 | "react/jsx-uses-react": 1, 77 | "react/jsx-uses-vars": 1, 78 | "react/no-did-mount-set-state": 1, 79 | "react/no-did-update-set-state": 1, 80 | "react/no-multi-comp": 1, 81 | "react/no-unknown-property": 1, 82 | "react/prop-types": 1, 83 | "react/react-in-jsx-scope": 1, 84 | "react/sort-comp": [1, { 85 | order: [ 86 | 'lifecycle', 87 | 'render', 88 | '/^render.+$/', 89 | 'everything-else' 90 | ] 91 | }], 92 | "react/self-closing-comp": 1, 93 | "react/jsx-curly-spacing": [1, "always"], 94 | "react/jsx-equals-spacing": [1, "never"], 95 | "space-before-blocks": [1, "always"], 96 | "react/no-direct-mutation-state": 1, 97 | "react/jsx-closing-bracket-location": 0, 98 | "react/jsx-indent": 1, 99 | "react/jsx-indent-props": 1, 100 | "react/jsx-key": 1, 101 | "react/jsx-no-duplicate-props": 1, 102 | "react/jsx-wrap-multilines": 1, 103 | "react/jsx-no-bind": 1, 104 | "semi": [1, "always"], 105 | "no-debugger": 1, 106 | "no-unexpected-multiline": 1, 107 | "max-len": [2, 120, 4], 108 | "spaced-comment": [1, "always", { "exceptions": ["-", "*"] }], 109 | "space-infix-ops": 1, 110 | "valid-typeof": 2, 111 | "vars-on-top": 2, 112 | "wrap-iife": 2, 113 | "yoda": 1 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /docs/docs/quickstart.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Quick start 2 | ============================ 3 | 4 | This page describes how to quickly get Batchiepatchie running. 5 | 6 | The process here is based on `docker-compose` tool that brings up necessary 7 | infrastructure locally. This is useful for development purposes but also to 8 | evaluate and test Batchiepatchie itself. For actual production deployment 9 | instructions, see [documentation on deployment page](deployment.md). 10 | 11 | Prerequisities 12 | -------------- 13 | 14 | You will need to set up some AWS Batch infrastructure or Batchiepatchie will 15 | not show anything. For this, we suggest you follow the ["Getting Started" guide 16 | on AWS Batch on AWS 17 | documentation](https://docs.aws.amazon.com/batch/latest/userguide/Batch_GetStarted.html). 18 | 19 | Aside from that, all you need is a working Docker and `docker-compose` tool. 20 | Docker Compose is usually installed with `docker` on most systems. Follow the 21 | instructions for your operating system to install these tools. 22 | 23 | Setting up 24 | ---------- 25 | 26 | The machine you are running Batchiepatchie needs to have AWS credentials 27 | available in some way. If you are running the docker on an EC2 instance, you 28 | are likely already good to go as Batchiepatchie can use IAM metadata service to 29 | obtain credentials. Otherwise, you need to pass credentials to the Docker 30 | Compose. Our `docker-compose.yml` file passes environment variables 31 | `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` as environment variables to the 32 | Batchiepatchie container so if you have these variables set up on your host 33 | system, the credentials should be passed correctly. Be aware that this is 34 | something of [a security 35 | issue](https://diogomonica.com/2017/03/27/why-you-shouldnt-use-env-variables-for-secret-data/) 36 | so we recommend that you do not use `docker-compose.yml` for actual deploys. 37 | 38 | Assuming that you have `docker` and `docker-compose` installed and usable, 39 | along with some AWS credentials, you can start Batchiepatchie: 40 | 41 | $ docker-compose up 42 | 43 | This will take a few minutes for the first run. The docker-compose will run 4 containers in total: 44 | 45 | * A frontend container, designed for frontend development. This will listen on http://127.0.0.1:8080/ 46 | 47 | * An API container, this runs the Batchiepatchie backend. This will listen on http://127.0.0.1:5454/ but you should use the 8080 endpoint instead. 48 | 49 | * A migration container. This only runs once in the beginning of Docker Compose phase to set up the database schema for PostgreSQL database used by Batchiepatchie. 50 | 51 | * A PostgreSQL container that runs a database used by Batchiepatchie. 52 | 53 | If everything went without errors, you should be able to access Batchiepatchie 54 | frontend at http://127.0.0.1:8080/. This setup is also designed to be used for 55 | development so modifying any code should automatically rebuild and reload 56 | Batchiepatchie. Docker Compose will mount the current directory from host 57 | inside the container so the containers use the files from host. 58 | 59 | Adding job queues 60 | ----------------- 61 | 62 | When you first start Batchiepatchie, there are jobs to be listed. If you have 63 | followed the prerequisites section on this page, you should have some AWS Batch 64 | infrastructure set up. 65 | 66 | You will need to manually add job queues to the system. This is easy; navigate 67 | to "Job queues" tab on Batchiepatchie UI and click "ACTIVATE" on some of the 68 | job queues (you need to set up some job queues with AWS Batch first before they 69 | appear in Batchiepatchie). 70 | 71 | Another way to do this is to manually log into the PostgreSQL database and add 72 | your queue: 73 | 74 | $ docker exec -it batchiepatchie_postgres_1 sh -c 'psql --user postgres --dbname postgres' 75 | postgres=# INSERT INTO activated_job_queues VALUES ( 'name-of-your-job-queue' ); 76 | INSERT 0 1 77 | postgres=# SELECT * FROM activated_job_queues; 78 | job_queue 79 | ----------- 80 | name-of-your-job-queue 81 | (1 row) 82 | 83 | postgres=# 84 | 85 | Once your job queue is inserted, Batchiepatchie will periodically poll AWS 86 | Batch to update its understanding of current state of batch jobs. 87 | -------------------------------------------------------------------------------- /frontend/src/components/SearchBox/SearchBox.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import debounce from 'utils/debounce'; 3 | import './SearchBox.scss'; 4 | 5 | /** 6 | * A search field with next and previous buttons 7 | */ 8 | export default class SearchBox extends React.Component { 9 | static propTypes = { 10 | // The lines of text to search. 11 | rows: PropTypes.array.isRequired, 12 | // Callback ith the new searchText and currentSearchRow. 13 | onSearchChanged: PropTypes.func.isRequired, 14 | }; 15 | 16 | constructor(props) { 17 | super(props); 18 | this.state = { 19 | // Search text to highlight. 20 | searchText: '', 21 | // Index of the row with the current search result, or -1 if not found. 22 | currentSearchRow: -1, 23 | // Whether to display the "Not found" message. 24 | notFound: false, 25 | }; 26 | this.onSearchTextChangedDebounced = debounce(this.onSearchTextChangedDebounced, 1000); 27 | } 28 | 29 | render() { 30 | const { searchText, notFound } = this.state; 31 | return ( 32 |
33 | Search: 34 | 35 | 36 | 37 | { notFound && Not found } 38 |
39 | ); 40 | } 41 | 42 | /** 43 | * Non-debounced text change handler. 44 | */ 45 | onSearchTextChanged = (event) => { 46 | this.setState({searchText: event.target.value, notFound: false}); 47 | this.onSearchTextChangedDebounced(event.target.value); 48 | } 49 | 50 | /** 51 | * Debounced text change handler. 52 | */ 53 | onSearchTextChangedDebounced = (searchText) => { 54 | const { onSearchChanged } = this.props; 55 | const newSearchRow = searchText === '' ? -1 : this.find(searchText, -1, 1); 56 | onSearchChanged(searchText, newSearchRow); 57 | if (newSearchRow === -1) { 58 | this.setState({notFound: searchText !== ''}); 59 | } 60 | this.setState({currentSearchRow: newSearchRow}); 61 | } 62 | 63 | /** 64 | * The Next button was clicked. 65 | */ 66 | onClickNext = () => { 67 | const { onSearchChanged } = this.props; 68 | const { currentSearchRow, searchText } = this.state; 69 | if (searchText === '') { 70 | return; 71 | } 72 | const newSearchRow = this.find(searchText, currentSearchRow, 1); 73 | if (newSearchRow === -1) { 74 | this.setState({notFound: true}); 75 | // Don't set currentSearchRow to -1 if the user tries to go past the last occurrence. 76 | // Just leave them at the last occurrence. 77 | } else { 78 | this.setState({notFound: false, currentSearchRow: newSearchRow}); 79 | onSearchChanged(searchText, newSearchRow); 80 | } 81 | } 82 | 83 | /** 84 | * The Prev button was clicked. 85 | */ 86 | onClickPrev = () => { 87 | const { onSearchChanged } = this.props; 88 | const { currentSearchRow, searchText } = this.state; 89 | if (searchText === '') { 90 | return; 91 | } 92 | const newSearchRow = this.find(searchText, currentSearchRow, -1); 93 | if (newSearchRow === -1) { 94 | this.setState({notFound: true}); 95 | // Don't set currentSearchRow to -1 if the user tries to go past the first occurrence. 96 | // Just leave them at the first occurrence. 97 | } else { 98 | this.setState({notFound: false, currentSearchRow: newSearchRow}); 99 | onSearchChanged(searchText, newSearchRow); 100 | } 101 | } 102 | 103 | /** 104 | * Looks in the rows for the search text and returns the index of the next matching row, 105 | * or -1 if not found. 106 | * 107 | * delta is +1 for Next and -1 for Prev. 108 | */ 109 | find = (searchText, currentSearchRow, delta) => { 110 | const { rows } = this.props; 111 | let i = currentSearchRow; 112 | i += delta; 113 | while (0 <= i && i <= rows.length - 1) { 114 | if (rows[i].toLowerCase().indexOf(searchText.toLowerCase()) > -1) { 115 | return i; 116 | } 117 | i += delta; 118 | } 119 | return -1; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /handlers/job_status_notification.go: -------------------------------------------------------------------------------- 1 | package handlers 2 | 3 | import ( 4 | "encoding/json" 5 | "io" 6 | "regexp" 7 | "strconv" 8 | "time" 9 | 10 | "github.com/AdRoll/batchiepatchie/jobs" 11 | "github.com/labstack/echo" 12 | "github.com/labstack/gommon/log" 13 | "github.com/opentracing/opentracing-go" 14 | ) 15 | 16 | // This structure and the ones below it match the CloudWatch event JSON we get from AWS Lambda function. 17 | // It doesn't match all the fields but matches most of the useful ones we track. 18 | type JobStatusNotification struct { 19 | Time string `json:"time"` 20 | Detail JobStatusNotificationDetail `json:"detail"` 21 | } 22 | 23 | type JobStatusNotificationDetail struct { 24 | JobName string `json:"jobName"` 25 | JobId string `json:"jobId"` 26 | JobQueue string `json:"jobQueue"` 27 | Status string `json:"status"` 28 | CreatedAt int64 `json:"createdAt"` 29 | StartedAt *int64 `json:"startedAt"` 30 | Container JobStatusNotificationContainer `json:"container"` 31 | JobDefinition string `json:"jobDefinition"` 32 | } 33 | 34 | type env struct { 35 | Key string `json:"name"` 36 | Value string `json:"value"` 37 | } 38 | 39 | type JobStatusNotificationContainer struct { 40 | Image string `json:"image"` 41 | Vcpus int64 `json:"vcpus"` 42 | Memory int64 `json:"memory"` 43 | Command []string `json:"command"` 44 | Environment []env `json:"environment"` 45 | TaskArn *string `json:"taskArn"` 46 | } 47 | 48 | var arnRegex = regexp.MustCompile("^arn.*/(.+?)$") 49 | 50 | func stripArn(arnied_name string) string { 51 | match := arnRegex.FindStringSubmatch(arnied_name) 52 | if match == nil { 53 | return arnied_name 54 | } 55 | return match[1] 56 | } 57 | 58 | func (s *Server) JobStatusNotification(c echo.Context) error { 59 | span := opentracing.StartSpan("API.JobStatusNotification") 60 | defer span.Finish() 61 | 62 | // This function can be called from outside to update job status. 63 | // It's meant to used from an AWS Lambda function that is triggered on AWS Batch events. 64 | body, err := io.ReadAll(io.LimitReader(c.Request().Body, 100000)) 65 | if err != nil { 66 | log.Warn("Failed reading job status notification posted on our API: ", err) 67 | return err 68 | } 69 | 70 | var job_status_notification JobStatusNotification 71 | 72 | if err = json.Unmarshal(body, &job_status_notification); err != nil { 73 | log.Warn("Cannot unmarshal JSON for job status notification posted on our API: ", err) 74 | return err 75 | } 76 | 77 | now := time.Now() 78 | 79 | // Sometimes we get these jobs that have barely any details in them. 80 | // The UI and the database can't deal with them so we skip them if it happens. 81 | if job_status_notification.Detail.JobName == "" { 82 | return nil 83 | } 84 | 85 | // Convert jobStatusNotification into jobs.Job definition that our 86 | // PostgreSQL storer understands. 87 | job := jobs.Job{} 88 | job.Id = job_status_notification.Detail.JobId 89 | job.Name = job_status_notification.Detail.JobName 90 | job.Status = job_status_notification.Detail.Status 91 | job.Description = job_status_notification.Detail.JobDefinition 92 | job.LastUpdated = now 93 | job.JobQueue = stripArn(job_status_notification.Detail.JobQueue) 94 | job.Image = job_status_notification.Detail.Container.Image 95 | job.CreatedAt = time.Unix(job_status_notification.Detail.CreatedAt/1000, 0) 96 | if job_status_notification.Detail.StartedAt != nil { 97 | time := time.Unix(*job_status_notification.Detail.StartedAt/1000, 0) 98 | job.RunStartTime = &time 99 | } else { 100 | job.RunStartTime = nil 101 | } 102 | job.VCpus = job_status_notification.Detail.Container.Vcpus 103 | job.Memory = job_status_notification.Detail.Container.Memory 104 | cmd, _ := json.Marshal(job_status_notification.Detail.Container.Command) 105 | job.CommandLine = string(cmd) 106 | 107 | timeout := -1 108 | for _, value := range job_status_notification.Detail.Container.Environment { 109 | if value.Key == "PYBATCH_TIMEOUT" { 110 | timeout, err = strconv.Atoi(value.Value) 111 | if err != nil { 112 | timeout = -1 113 | log.Warn("Cannot make sense of PYBATCH_TIMEOUT in job status notification: ", value.Value, " : ", err) 114 | } 115 | break 116 | } 117 | } 118 | job.Timeout = timeout 119 | 120 | jobs := make([]*jobs.Job, 1) 121 | jobs[0] = &job 122 | 123 | err = s.Storage.Store(jobs) 124 | if err != nil { 125 | log.Warn("Failed to store job status notification: ", err) 126 | return err 127 | } 128 | log.Info("Got job status notification for job: ", job_status_notification.Detail.JobId) 129 | return nil 130 | } 131 | -------------------------------------------------------------------------------- /frontend/src/components/Search/Search.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { connect } from 'react-redux'; 3 | import ReactTooltip from 'react-tooltip'; 4 | import debounce from 'utils/debounce'; 5 | import { 6 | setParams, 7 | setLocationToSearch 8 | } from 'stores/job'; 9 | import SectionLoader from 'components/SectionLoader/SectionLoader'; 10 | import { 11 | JOB, 12 | JOBS, 13 | STATS 14 | } from 'stores/status'; 15 | import './Search.scss'; 16 | 17 | // Fuels top of the page loading spinner 18 | function getStatusKey(path) { 19 | if (path.startsWith('/job')) { 20 | return JOB; 21 | } else if (path.startsWith('/stats')) { 22 | return STATS; 23 | } 24 | return JOBS; 25 | } 26 | 27 | class Search extends React.Component { 28 | static propTypes = { 29 | loading: PropTypes.bool.isRequired, 30 | qTemp: PropTypes.string.isRequired, 31 | dateRange: PropTypes.string.isRequired, 32 | setParams: PropTypes.func.isRequired, 33 | statusKey: PropTypes.string.isRequired, 34 | }; 35 | 36 | render() { 37 | const { 38 | loading, 39 | qTemp, 40 | dateRange 41 | } = this.props; 42 | 43 | return ( 44 |
45 |
46 |
47 | { loading && } 48 |
49 |
50 | 63 |
64 |
65 |
66 | 67 | 68 | 69 | 77 |
78 |
79 | ℹ️ 80 |
81 | 82 | 83 | Search is case-insensitive, partial-word, AND search on individual words.
84 | The following fields are searched: ID, Name, Image, and Queue. 85 |
86 |
87 |
88 |
89 |
90 |
91 | ); 92 | } 93 | 94 | onChange = (e) => { 95 | this.props.setParams({qTemp: e.target.value}); 96 | this.search(e.target.value); 97 | } 98 | 99 | onDateRangeChanged = (e) => { 100 | this.props.setParams({dateRange: e.target.value}); 101 | } 102 | 103 | search = debounce((q) => { 104 | this.props.setParams({q}); 105 | }, 500) 106 | 107 | onKeyPress = (e) => { 108 | if (e.key === 'Enter' && this.props.statusKey !== JOBS) { 109 | this.props.setLocationToSearch(); 110 | } 111 | } 112 | }; 113 | 114 | const mapStateToProps = state => { 115 | const statusKey = getStatusKey(state.routing.locationBeforeTransitions.pathname); 116 | return { 117 | statusKey, 118 | qTemp: state.job.qTemp, 119 | dateRange: state.job.dateRange, 120 | loading: state.status[statusKey].loading 121 | }; 122 | }; 123 | 124 | const actions = { 125 | setParams, 126 | setLocationToSearch 127 | }; 128 | 129 | export default connect(mapStateToProps, actions)(Search); 130 | -------------------------------------------------------------------------------- /frontend/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const url = require('url'); 3 | const webpack = require('webpack'); 4 | const ExtractTextPlugin = require('extract-text-webpack-plugin'); 5 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 6 | 7 | // 8 | // Environment 9 | // 10 | // To clarify: 11 | // - NODE_ENV controls the build type. It's unset for development builds, it's set to 'production' 12 | // for release builds. 13 | // - ENVIRONMENT controls the deployment environment (development, staging, production). 14 | // 15 | 16 | process.env.ENVIRONMENT = process.env.ENVIRONMENT || 'development'; 17 | process.env.API_BASE_URL = process.env.API_BASE_URL || '/api/v1'; 18 | process.env.BASE_URL = process.env.BASE_URL || ''; 19 | 20 | var assetsHref = '/'; 21 | 22 | switch (process.env.ENVIRONMENT) { 23 | case 'development': 24 | process.env.NODE_ENV = 'development'; 25 | break; 26 | case 'production': 27 | case 'staging': 28 | if (!process.env.LOCAL_ASSETS) { 29 | assetsHref = process.env.ASSETS_BASE_URL + process.env.VERSION + '/'; 30 | } 31 | 32 | process.env.NODE_ENV = 'production'; 33 | break; 34 | } 35 | 36 | // 37 | // Debug output 38 | // 39 | 40 | console.log('Assets:', assetsHref || 'local'); 41 | console.log('Base API URL:', process.env.API_BASE_URL); 42 | 43 | console.log('Environment variables:'); 44 | ['ENVIRONMENT', 'NODE_ENV'].map(function (value) { 45 | console.log(' ' + value + '=' + process.env[value]); 46 | }); 47 | 48 | // 49 | // Chunk names 50 | // 51 | 52 | function getChunkNamePattern(extension) { 53 | return '[name].' + extension; 54 | } 55 | 56 | // 57 | // Source maps 58 | // 59 | 60 | var devtoolConfig = 'cheap-module-source-map'; 61 | if (process.env.NODE_ENV === 'production') { 62 | devtoolConfig = undefined; 63 | } 64 | 65 | // 66 | // Webpack 67 | // 68 | 69 | module.exports = { 70 | devtool: devtoolConfig, 71 | entry: ['babel-polyfill', './src/index.jsx'], 72 | 73 | devServer: { 74 | historyApiFallback: true, 75 | index: 'index.html', 76 | proxy: { 77 | '/api': { 78 | target: 'http://api:5454/', 79 | secure: false 80 | } 81 | } 82 | }, 83 | 84 | module: { 85 | rules: [ 86 | { 87 | enforce: 'pre', 88 | loader: 'source-map-loader', 89 | test: /\.js$/, 90 | }, 91 | { 92 | test: /\.(js|jsx)$/, 93 | loader: require.resolve('babel-loader'), 94 | options: { 95 | 96 | // This is a feature of `babel-loader` for webpack (not Babel itself). 97 | // It enables caching results in ./node_modules/.cache/babel-loader/ 98 | // directory for faster rebuilds. 99 | cacheDirectory: true, 100 | }, 101 | }, 102 | { 103 | test: /\.css$/, use: ExtractTextPlugin.extract({ 104 | use: 'css-loader', 105 | }), 106 | }, 107 | { 108 | test: /\.scss$/, use: ExtractTextPlugin.extract({ 109 | use: [{ 110 | loader: 'css-loader', 111 | options: { 112 | alias: { 113 | '../fonts/bootstrap': path.resolve('node_modules/bootstrap-sass/assets/fonts/bootstrap'), 114 | }, 115 | import: false, 116 | }, 117 | }, { 118 | loader: 'sass-loader', 119 | options: { 120 | includePaths: [path.resolve('node_modules')], 121 | }, 122 | }] 123 | }), 124 | }, 125 | { 126 | test: /\.(eot|jpeg|jpg|png|svg|ttf|woff|woff2)/, 127 | use: 'file-loader', 128 | } 129 | ], 130 | }, 131 | 132 | output: { 133 | filename: getChunkNamePattern('js'), 134 | path: path.resolve('dist'), 135 | }, 136 | 137 | plugins: [ 138 | new ExtractTextPlugin(getChunkNamePattern('css')), 139 | new HtmlWebpackPlugin({ 140 | favicon: 'public/favicon.ico', 141 | assetsHref: assetsHref, 142 | inject: false, 143 | template: './public/index.html', 144 | title: 'Batchiepatchie', 145 | }), 146 | new webpack.EnvironmentPlugin(['ENVIRONMENT', 'NODE_ENV', 'API_BASE_URL', 'BASE_URL']) 147 | ], 148 | 149 | resolve: { 150 | extensions: [ 151 | '.js', 152 | '.jsx', 153 | ], 154 | modules: [ 155 | 'node_modules', 156 | 'src' 157 | ] 158 | }, 159 | }; 160 | -------------------------------------------------------------------------------- /jobs/scaler.go: -------------------------------------------------------------------------------- 1 | package jobs 2 | 3 | import ( 4 | "github.com/AdRoll/batchiepatchie/awsclients" 5 | "github.com/aws/aws-sdk-go/service/batch" 6 | "github.com/opentracing/opentracing-go" 7 | log "github.com/sirupsen/logrus" 8 | ) 9 | 10 | func ScaleComputeEnvironments(storer Storer, queues []string) { 11 | span := opentracing.StartSpan("ScaleComputeEnvironments") 12 | defer span.Finish() 13 | 14 | // Don't bother going to database if we have no queues. 15 | if len(queues) == 0 { 16 | return 17 | } 18 | 19 | running_loads, err := storer.EstimateRunningLoadByJobQueue(queues) 20 | if err != nil { 21 | log.Warning("Aborting compute environment scaling due to errors with store.") 22 | return 23 | } 24 | 25 | /* What happens here is that we look at what the current "desired" 26 | * vcpus and memory are and if they are lower than our estimate, we 27 | * manually bump it up. AWS Batch isn't very aggressive about the 28 | * scaling. 29 | * 30 | * We can assume AWS Batch will manually scale that stuff down later if needed. 31 | */ 32 | 33 | job_queue_names := make([]*string, 0) 34 | for job_queue := range running_loads { 35 | jq := job_queue 36 | job_queue_names = append(job_queue_names, &jq) 37 | } 38 | 39 | job_queues := &batch.DescribeJobQueuesInput{ 40 | JobQueues: job_queue_names, 41 | } 42 | 43 | job_queue_descs, err := awsclients.Batch.DescribeJobQueues(job_queues) 44 | if err != nil { 45 | log.Warning("Failed to describe job queues: ", err) 46 | return 47 | } 48 | 49 | job_queue_descs_map := make(map[string]*batch.JobQueueDetail) 50 | for _, job_queue_desc := range job_queue_descs.JobQueues { 51 | job_queue_descs_map[*job_queue_desc.JobQueueName] = job_queue_desc 52 | } 53 | 54 | wanted_vcpus_by_ce := make(map[string]int64) 55 | for job_queue, load := range running_loads { 56 | desc, ok := job_queue_descs_map[job_queue] 57 | if !ok { 58 | log.Info("Cannot find information for job queue ", job_queue, " so won't do any scaling for it.") 59 | continue 60 | } 61 | 62 | // job_queue must be in one of our active job queues 63 | ok = false 64 | for _, job_queue_allowed := range queues { 65 | if job_queue == job_queue_allowed { 66 | ok = true 67 | break 68 | } 69 | } 70 | if !ok { 71 | continue 72 | } 73 | 74 | if len(desc.ComputeEnvironmentOrder) < 1 { 75 | log.Warning("Job queue ", job_queue, " has no compute environment order set.") 76 | continue 77 | } 78 | 79 | // TODO: somehow distribute load to more than one compute environment instead of using the first one. 80 | ce := desc.ComputeEnvironmentOrder[0] 81 | ce_name := ce.ComputeEnvironment 82 | old_vcpus, ok := wanted_vcpus_by_ce[*ce_name] 83 | if ok { 84 | wanted_vcpus_by_ce[*ce_name] = old_vcpus + load.WantedVCpus 85 | } else { 86 | wanted_vcpus_by_ce[*ce_name] = load.WantedVCpus 87 | } 88 | } 89 | 90 | for ce, wanted := range wanted_vcpus_by_ce { 91 | log.Info("Wanted vcpus in compute environment ", ce, ": ", wanted) 92 | 93 | ces := make([]*string, 1) 94 | ces[0] = &ce 95 | out, err := awsclients.Batch.DescribeComputeEnvironments(&batch.DescribeComputeEnvironmentsInput{ 96 | ComputeEnvironments: ces, 97 | }) 98 | if err != nil { 99 | log.Warning("DescribeComputeEnvironments failed on compute environment ", ce, ": ", err) 100 | continue 101 | } 102 | 103 | if len(out.ComputeEnvironments) != 1 { 104 | log.Warning("Skipping compute environment ", ce, ", no information from DescribeComputeEnvironments") 105 | continue 106 | } 107 | 108 | detail := out.ComputeEnvironments[0] 109 | if detail.Status == nil || detail.State == nil || *detail.Status != "VALID" || *detail.State != "ENABLED" || detail.ComputeResources == nil { 110 | log.Warning("Not scaling ", ce, " because it's not both VALID and ENABLED.") 111 | continue 112 | } 113 | 114 | if detail.ComputeResources.DesiredvCpus == nil { 115 | // I'm not sure if AWS Batch would actually return nil here ever but it's allowed by types :shruggie: 116 | // Let's not crash if it's nil for whatever reason 117 | log.Warning("Not scaling ", ce, " because it has no desired vcpus set.") 118 | continue 119 | } 120 | if detail.ComputeResources.MaxvCpus == nil { 121 | log.Warning("Not scaling ", ce, " because it has no maximum vcpus set.") 122 | continue 123 | } 124 | 125 | batch_min_vcpus := *detail.ComputeResources.MinvCpus 126 | batch_max_vcpus := *detail.ComputeResources.MaxvCpus 127 | 128 | wanted := wanted 129 | if wanted > batch_max_vcpus { 130 | wanted = batch_max_vcpus 131 | } 132 | 133 | update_resources := batch.ComputeResourceUpdate{ 134 | MinvCpus: &wanted, 135 | } 136 | 137 | // Now for the meat...if the desired vcpus is lower than we would like, we scale up. 138 | if wanted != batch_min_vcpus { 139 | _, err := awsclients.Batch.UpdateComputeEnvironment(&batch.UpdateComputeEnvironmentInput{ 140 | ComputeEnvironment: &ce, 141 | ComputeResources: &update_resources, 142 | }) 143 | if err != nil { 144 | log.Error("Tried to scale ", ce, " but it failed: ", err) 145 | continue 146 | } 147 | log.Info("Updated job queue min vcpus in ", ce, " from ", batch_min_vcpus, " to ", wanted) 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /frontend/src/api/jobs.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "pybatch-5564b6b2-coredb-baker-merge", 5 | "queue": "staging", 6 | "last_updated": "2017-06-15", 7 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 8 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 9 | "cpus": "16", 10 | "memory": "110000 MiB", 11 | "timeout": "7200", 12 | "command": "python business/mk-simplified-trails", 13 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 14 | "instance": "r3.2xlarge", 15 | "status": "SUBMITTED", 16 | "created_at": "2017-06-15", 17 | "stopped_at": "2017-04-04" 18 | }, 19 | { 20 | "id": "2", 21 | "name": "pybatch-5564b6b2-coredb-baker-merge", 22 | "queue": "staging", 23 | "last_updated": "2017-06-16", 24 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 25 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 26 | "cpus": "16", 27 | "memory": "110000 MiB", 28 | "timeout": "7200", 29 | "command": "python business/mk-simplified-trails", 30 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 31 | "instance": "r3.2xlarge", 32 | "status": "PENDING", 33 | "created_at": "2017-06-16", 34 | "stopped_at": "2017-04-04" 35 | }, 36 | { 37 | "id": "3", 38 | "name": "pybatch-5564b6b2-coredb-baker-merge", 39 | "queue": "staging", 40 | "last_updated": "2017-06-17", 41 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 42 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 43 | "cpus": "16", 44 | "memory": "110000 MiB", 45 | "timeout": "7200", 46 | "command": "python business/mk-simplified-trails", 47 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 48 | "instance": "r3.2xlarge", 49 | "status": "RUNNABLE", 50 | "created_at": "2017-06-17", 51 | "stopped_at": "2017-04-04" 52 | }, 53 | { 54 | "id": "4", 55 | "name": "pybatch-5564b6b2-coredb-baker-merge", 56 | "queue": "staging", 57 | "last_updated": "2017-06-18", 58 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 59 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 60 | "cpus": "16", 61 | "memory": "110000 MiB", 62 | "timeout": "7200", 63 | "command": "python business/mk-simplified-trails", 64 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 65 | "instance": "r3.2xlarge", 66 | "status": "STARTING", 67 | "created_at": "2017-06-18", 68 | "stopped_at": "2017-04-04" 69 | }, 70 | { 71 | "id": "5", 72 | "name": "pybatch-5564b6b2-coredb-baker-merge", 73 | "queue": "staging", 74 | "last_updated": "2017-06-19", 75 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 76 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 77 | "cpus": "16", 78 | "memory": "110000 MiB", 79 | "timeout": "7200", 80 | "command": "python business/mk-simplified-trails", 81 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 82 | "instance": "r3.2xlarge", 83 | "status": "RUNNING", 84 | "created_at": "2017-06-19", 85 | "stopped_at": "2017-04-04" 86 | }, 87 | { 88 | "id": "6", 89 | "name": "pybatch-5564b6b2-coredb-baker-merge", 90 | "queue": "staging", 91 | "last_updated": "2017-06-20", 92 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 93 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 94 | "cpus": "16", 95 | "memory": "110000 MiB", 96 | "timeout": "7200", 97 | "command": "python business/mk-simplified-trails", 98 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 99 | "instance": "r3.2xlarge", 100 | "status": "FAILED", 101 | "created_at": "2017-06-20", 102 | "stopped_at": "2017-04-04" 103 | }, 104 | { 105 | "id": "7", 106 | "name": "pybatch-5564b6b2-coredb-baker-merge", 107 | "queue": "staging", 108 | "last_updated": "2017-06-21", 109 | "definition": "arn:aws:batch:us-west-2:129225457201:job-definition/pybatch-container-85c0146380a7:1", 110 | "containerImage": "example.docker.repository.example.com:5000/muncher/baker:staging", 111 | "cpus": "16", 112 | "memory": "110000 MiB", 113 | "timeout": "7200", 114 | "command": "python business/mk-simplified-trails", 115 | "container": "example.docker.repository.example.com:5000/muncher/business:staging", 116 | "instance": "r3.2xlarge", 117 | "status": "SUCCEEDED", 118 | "created_at": "2017-06-21", 119 | "stopped_at": "2017-04-04" 120 | } 121 | ] 122 | -------------------------------------------------------------------------------- /frontend/src/pages/JobQueuesPage/JobQueuesPage.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import ReactDataGrid from 'react-data-grid'; 3 | import NameFormatter from 'components/NameFormatter/NameFormatter'; 4 | import ActivationFormatter from 'components/ActivationFormatter/ActivationFormatter'; 5 | import JobQueueRowRenderer from 'components/JobQueueRowRenderer/JobQueueRowRenderer'; 6 | import { JOB_QUEUES_ALL, JOB_QUEUES_ACTIVATED } from 'stores/status'; 7 | import { connect } from 'react-redux'; 8 | import { 9 | fetchJobQueues, 10 | fetchAllJobQueues, 11 | activateJobQueue, 12 | deactivateJobQueue 13 | } from 'stores/jobqueue'; 14 | import './JobQueuesPage.scss'; 15 | 16 | const AUTO_REFRESH_TIMEOUT = 5000; // ms 17 | 18 | const COLUMNS = [ 19 | { 20 | key: 'name', 21 | name: 'Name', 22 | resizable: false, 23 | sortable: false, 24 | width: '100%', 25 | formatter: NameFormatter 26 | }, 27 | { 28 | key: 'activation', 29 | name: 'Activation', 30 | resizable: false, 31 | sortable: false, 32 | width: 100, 33 | formatter: ActivationFormatter 34 | } 35 | ]; 36 | 37 | class JobQueuesPage extends React.Component { 38 | componentDidMount() { 39 | this.fetchAll(); 40 | } 41 | 42 | fetchAll() { 43 | this.props.fetchJobQueues(); 44 | this.props.fetchAllJobQueues(); 45 | } 46 | 47 | setJobQueue(activation, job_queue) { 48 | switch(activation) 49 | { 50 | case 'ACTIVATE': 51 | this.props.activateJobQueue(job_queue).then(() => this.fetchAll()).catch(() => {}); 52 | break; 53 | case 'DEACTIVATE': 54 | this.props.deactivateJobQueue(job_queue).then(() => this.fetchAll()).catch(() => {}); 55 | break; 56 | } 57 | } 58 | 59 | render() { 60 | const status_all = this.props.status_all_job_queues; 61 | const status_activated = this.props.status_activated_job_queues; 62 | 63 | if ( (!status_all.loading && status_all.error) || 64 | (!status_activated.loading && status_activated.error) ) { 65 | return ( 66 |
67 |
68 | Could not load API responses for job queues. 69 |
70 |
71 | ); 72 | } 73 | 74 | let queues_activated = []; 75 | for ( let key in this.props.activatedJobQueues ) { 76 | const queue = this.props.activatedJobQueues[key]; 77 | queues_activated.push(queue); 78 | } 79 | 80 | let queues_all = []; 81 | for ( let key in this.props.allJobQueues ) { 82 | const queue = this.props.allJobQueues[key]; 83 | 84 | // TODO: this is quadratic check for if queue is already in activated list 85 | // With some small effort, we could it make it faster. 86 | let ok_to_add = true; 87 | for ( let key2 in queues_activated ) { 88 | if ( queue === queues_activated[key2] ) { 89 | ok_to_add = false; 90 | break; 91 | } 92 | } 93 | if ( ok_to_add ) { 94 | queues_all.push(queue); 95 | } 96 | } 97 | 98 | queues_all.sort(); 99 | queues_activated.sort(); 100 | 101 | const make_row_getter = (lst, act) => (i) => { 102 | if ( i < lst.length ) { 103 | return { name: lst[i], activation: { action: act, onClick: () => { this.setJobQueue(act, lst[i]); } } }; 104 | } else { 105 | return { name: '', activation: { action: '', onClick: () => {} } }; 106 | } 107 | }; 108 | 109 | const row_getter_all = make_row_getter(queues_all, 'ACTIVATE'); 110 | const row_getter_activated = make_row_getter(queues_activated, 'DEACTIVATE'); 111 | 112 | const height = 35+35*Math.max(queues_all.length, queues_activated.length); 113 | 114 | return ( 115 |
116 |
117 |
118 |
119 |

Batchiepatchie registered job queues

120 |
121 | 129 |
130 |
131 |
132 |

All job queues

133 |
134 | 142 |
143 |
144 |
145 |
146 |
147 | ); 148 | } 149 | } 150 | 151 | const mapStateToProps = state => ({ 152 | allJobQueues: state.jobqueue.allJobQueues, 153 | activatedJobQueues: state.jobqueue.activatedJobQueues, 154 | status_all_job_queues: state.status[JOB_QUEUES_ALL], 155 | status_activated_job_queues: state.status[JOB_QUEUES_ACTIVATED] 156 | }); 157 | 158 | const actions = { 159 | fetchAllJobQueues, 160 | fetchJobQueues, 161 | activateJobQueue, 162 | deactivateJobQueue 163 | }; 164 | 165 | export default connect(mapStateToProps, actions)(JobQueuesPage); 166 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | /* 4 | This module reads and does some basic validation on the TOML file used for 5 | Batchiepatchie configuration. It also fetches things from S3 (database 6 | password) if it's configured so. 7 | 8 | An exported structure, Config, is then exported to rest of Batchie Patchie. 9 | */ 10 | 11 | import ( 12 | "fmt" 13 | "reflect" 14 | 15 | "github.com/AdRoll/batchiepatchie/awsclients" 16 | "github.com/AdRoll/batchiepatchie/envsubstituter" 17 | "github.com/AdRoll/batchiepatchie/fetcher" 18 | "github.com/BurntSushi/toml" 19 | log "github.com/sirupsen/logrus" 20 | ) 21 | 22 | type Config struct { 23 | Port int `toml:"port"` 24 | Host string `toml:"host"` 25 | DatabaseHost string `toml:"database_host"` 26 | DatabasePort int `toml:"database_port"` 27 | DatabaseUsername string `toml:"database_username"` 28 | DatabaseName string `toml:"database_name"` 29 | DatabasePassword string `toml:"database_password"` 30 | DatabaseRootCertificate string `toml:"database_root_certificate"` 31 | 32 | LogEntriesHost string `toml:"logentries_host"` 33 | LogEntriesKey string `toml:"logentries_token"` 34 | 35 | Region string `toml:"region"` 36 | 37 | PasswordBucket string `toml:"password_bucket"` 38 | PasswordKey string `toml:"password_key"` 39 | 40 | FrontendAssets string `toml:"frontend_assets"` 41 | FrontendAssetsLocalPrefix string `toml:"frontend_assets_local_prefix"` 42 | FrontendAssetsBucket string `toml:"frontend_assets_bucket"` 43 | FrontendAssetsKey string `toml:"frontend_assets_key"` 44 | 45 | SyncPeriod int64 `toml:"sync_period"` 46 | ScalePeriod int64 `toml:"scale_period"` 47 | CleanPeriod int64 `toml:"clean_period"` 48 | 49 | KillStuckJobs bool `toml:"kill_stuck_jobs"` 50 | 51 | UseDatadogTracing bool `toml:"use_datadog_tracing"` 52 | 53 | UseAutoScaler bool `toml:"use_auto_scaler"` 54 | UseCleaner bool `toml:"use_cleaner"` 55 | } 56 | 57 | // Store config in a global variable 58 | var Conf Config 59 | 60 | func readPasswordConfiguration(contents string) (*string, error) { 61 | var pw_conf Config 62 | if _, err := toml.Decode(contents, &pw_conf); err != nil { 63 | return nil, err 64 | } 65 | 66 | if pw_conf.DatabasePassword == "" { 67 | return nil, fmt.Errorf("No passwords specified in password file.") 68 | } 69 | 70 | return &pw_conf.DatabasePassword, nil 71 | } 72 | 73 | func ReadConfiguration(filename string) error { 74 | tomlData, err := fetcher.ReadAllNoSessions(filename) 75 | if err != nil { 76 | return err 77 | } 78 | 79 | Conf = Config{ 80 | // Default values here 81 | SyncPeriod: 30, 82 | ScalePeriod: 30, 83 | CleanPeriod: 30 * 60, // 30 minutes in seconds 84 | KillStuckJobs: false, 85 | UseAutoScaler: true, 86 | UseCleaner: false, 87 | } 88 | if _, err := toml.Decode(string(tomlData), &Conf); err != nil { 89 | return err 90 | } 91 | 92 | // Substitute everything with environment variables. (using reflection) 93 | // Checkout envsubstituter module, it injects environment variables. 94 | rconf := reflect.ValueOf(&Conf) 95 | for i := 0; i < rconf.Elem().NumField(); i++ { 96 | struct_elem_v := rconf.Elem().Field(i) 97 | if struct_elem_v.Kind().String() == reflect.ValueOf("str").Kind().String() { 98 | ptr := struct_elem_v.Addr().Interface().(*string) 99 | sub, err := envsubstituter.EnvironmentSubstitute(*ptr) 100 | if err != nil { 101 | return err 102 | } 103 | *ptr = sub 104 | } 105 | } 106 | 107 | if Conf.Region == "" { 108 | log.Fatal("AWS region must be supplied.") 109 | } 110 | 111 | /* Sanity check configuration (Port == 0 if not supplied) */ 112 | if Conf.Port < 1 || Conf.Port > 65535 { 113 | log.Fatal("Port is invalid; expecting port between 1 and 65535") 114 | } 115 | 116 | // Note: not checking password; it can be legitimately empty 117 | if Conf.DatabaseHost == "" || Conf.DatabaseUsername == "" || Conf.DatabaseName == "" { 118 | log.Fatal("Incomplete Database configuration. database_host, database_port, database_username and database_name must be supplied in .toml configuration or you must use S3 configuration.") 119 | } 120 | 121 | if Conf.DatabasePort < 1 || Conf.DatabasePort > 65535 { 122 | log.Fatal("Database port is invalid; expecting port between 1 and 65535.") 123 | } 124 | 125 | // Where are my frontend assets? Check that the configuration makes sense 126 | if Conf.FrontendAssets != "local" && Conf.FrontendAssets != "s3" { 127 | log.Fatal("frontend_assets must be either 'local' or 's3'.") 128 | } 129 | 130 | err = awsclients.OpenSessions(Conf.Region) 131 | if err != nil { 132 | log.Fatal("Cannot open AWS sessions: ", err) 133 | } 134 | 135 | if Conf.FrontendAssets == "local" { 136 | if Conf.FrontendAssetsBucket != "" || Conf.FrontendAssetsKey != "" { 137 | log.Fatal("When using frontend_assets=\"local\" then neither frontend_assets_bucket or frontend_key should be specified.") 138 | } 139 | } else if Conf.FrontendAssets == "s3" { 140 | if Conf.FrontendAssetsLocalPrefix != "" { 141 | log.Fatal("When using frontend_assets=\"s3\" then frontend_assets_local_prefix should not be specified.") 142 | } 143 | if Conf.FrontendAssetsBucket == "" { 144 | log.Fatal("frontend_assets_bucket is empty. You need to set it.") 145 | } 146 | if Conf.FrontendAssetsKey == "" { 147 | log.Fatal("frontend_assets_key is empty. You need to set it.") 148 | } 149 | } 150 | 151 | if Conf.PasswordKey != "" { 152 | // Using S3 for passwords? Fetch the keys from AWS bucket. 153 | // Check that we are not using both database + KMS conf 154 | if Conf.DatabasePassword != "" { 155 | log.Fatal("Both KMS and non-KMS password supplied; can't decide which one to use.") 156 | } 157 | secret_key := Conf.PasswordKey 158 | 159 | s3path := "s3://" + Conf.PasswordBucket + "/" + secret_key 160 | 161 | log.Info("Fetching secret key from ", s3path) 162 | out, err := fetcher.ReadAll(s3path) 163 | if err != nil { 164 | log.Fatal("Cannot get secret key file: ", err) 165 | } 166 | 167 | pw, err := readPasswordConfiguration(string(out)) 168 | if err != nil { 169 | log.Fatal("Cannot parse password file: ", err) 170 | } 171 | 172 | Conf.DatabasePassword = *pw 173 | } 174 | 175 | return nil 176 | } 177 | -------------------------------------------------------------------------------- /batchiepatchie.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "os" 6 | "path" 7 | "strconv" 8 | 9 | "github.com/AdRoll/batchiepatchie/config" 10 | "github.com/AdRoll/batchiepatchie/fetcher" 11 | "github.com/AdRoll/batchiepatchie/handlers" 12 | "github.com/AdRoll/batchiepatchie/jobs" 13 | "github.com/AdRoll/batchiepatchie/syncer" 14 | "github.com/bakatz/echo-logrusmiddleware" 15 | "github.com/labstack/echo" 16 | "github.com/opentracing/opentracing-go" 17 | log "github.com/sirupsen/logrus" 18 | "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/opentracer" 19 | "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" 20 | ) 21 | 22 | // fetchIndex fetches the index.html from s3 23 | func fetchIndex() ([]byte, error) { 24 | if config.Conf.FrontendAssets == "local" { 25 | dir := path.Join(config.Conf.FrontendAssetsLocalPrefix, "index.html") 26 | log.Info("Getting index.html from local file:", dir) 27 | return fetcher.ReadAll(dir) 28 | } 29 | s3path := "s3://" + config.Conf.FrontendAssetsBucket + "/" + config.Conf.FrontendAssetsKey 30 | log.Info("Downloading index.html from ", s3path) 31 | return fetcher.ReadAll(s3path) 32 | } 33 | 34 | func pingHandler(c echo.Context) error { 35 | return c.String(http.StatusOK, "pong") 36 | } 37 | 38 | func main() { 39 | configurationFile := "" 40 | if len(os.Args) > 2 { 41 | log.Fatal("batchiepatchie expects exactly one argument: filename to .toml configuration.") 42 | } else if len(os.Args) == 2 { 43 | configurationFile = os.Args[1] 44 | } else { 45 | /* Fallback to using environment variables */ 46 | configurationFile = os.Getenv("BATCHIEPATCHIE_CONFIG") 47 | if configurationFile == "" { 48 | log.Fatal("No configuration file passed through either command line argument or BATCHIEPATCHIE_CONFIG environment variable.") 49 | } 50 | } 51 | 52 | log.SetFormatter(&log.JSONFormatter{}) 53 | log.SetOutput(os.Stderr) 54 | 55 | // Sets the global config.Conf 56 | err := config.ReadConfiguration(configurationFile) 57 | if err != nil { 58 | log.Fatal("Reading configuration failed, ", err) 59 | } 60 | 61 | if config.Conf.LogEntriesKey != "" { 62 | log.Info("logentries_token supplied, will connect to LogEntries.") 63 | logentries_host := "data.logentries.com:443" 64 | if config.Conf.LogEntriesHost != "" { 65 | logentries_host = config.Conf.LogEntriesHost 66 | } 67 | setUpLogEntriesHooks(logentries_host, config.Conf.LogEntriesKey) 68 | } 69 | 70 | var trace opentracing.Tracer 71 | if config.Conf.UseDatadogTracing { 72 | ip := os.Getenv("BATCHIEPATCHIE_IP") 73 | if ip != "" { 74 | // If we have been passed an IP explictly; attempt to 75 | // use it to connect to DataDog tracer When we run 76 | // batchiepatchie inside Docker container and ddtracer 77 | // on the host; this lets us connect to the agent 78 | // running on host. 79 | agentAddr := ip + ":8126" 80 | log.Info("Will attempt to ddtrace into ", agentAddr) 81 | trace = opentracer.New(tracer.WithServiceName("batchiepatchie"), tracer.WithAgentAddr(agentAddr)) 82 | } else { 83 | trace = opentracer.New(tracer.WithServiceName("batchiepatchie")) 84 | } 85 | } else { 86 | trace = opentracing.NoopTracer{} 87 | } 88 | opentracing.SetGlobalTracer(trace) 89 | 90 | storage, err := jobs.NewPostgreSQLStore(config.Conf.DatabaseHost, config.Conf.DatabasePort, config.Conf.DatabaseUsername, config.Conf.DatabaseName, config.Conf.DatabasePassword, config.Conf.DatabaseRootCertificate) 91 | if err != nil { 92 | log.Fatal("Creating postgresql store failed, ", err) 93 | } 94 | log.Info("Successfully connected to PostgreSQL database.") 95 | 96 | killer, err := jobs.NewKillerHandler() 97 | if err != nil { 98 | log.Fatal("Creating killer handler failed, ", err) 99 | } 100 | log.Info("killer handler started.") 101 | 102 | index, err := fetchIndex() 103 | if err != nil { 104 | log.Error("Falling back to basic index.html: ", err) 105 | version := os.Getenv("VERSION") 106 | if version == "" { 107 | index = []byte("

Cannot find index.html. VERSION environment variable is not set. Check that frontend has been deployed correctly and then restart backend.

") 108 | } else { 109 | index = []byte("

Cannot find index.html. (VERSION environment variable has been set but no file could be fetched). Check that frontend has been deployed correctly and then restart backend.

") 110 | } 111 | } 112 | 113 | // Launch the periodic synchronizer 114 | syncer.RunPeriodicSynchronizer(storage, killer) 115 | // Launch the periodic scaler 116 | if config.Conf.UseAutoScaler { 117 | log.Info("Auto-scaler enabled.") 118 | syncer.RunPeriodicScaler(storage) 119 | } else { 120 | log.Info("Auto-scaler disabled.") 121 | } 122 | // Launch the periodic cleaner 123 | if config.Conf.UseCleaner { 124 | syncer.RunPeriodicCleaner(storage) 125 | } else { 126 | log.Info("Cleaner disabled.") 127 | } 128 | 129 | // handle.Server is a structure to save context shared between requests 130 | s := &handlers.Server{ 131 | Storage: storage, 132 | Killer: killer, 133 | Index: index, 134 | } 135 | 136 | e := echo.New() 137 | 138 | // Logging middleware for API requests 139 | e.Logger = logrusmiddleware.Logger{Logger: log.StandardLogger()} 140 | e.Use(logrusmiddleware.Hook()) 141 | 142 | // Jobs API 143 | api := e.Group("/api/v1") 144 | { 145 | api.GET("/jobs/:id", s.FindOne) 146 | api.GET("/jobs", s.Find) 147 | api.POST("/jobs/kill", s.KillMany) 148 | api.GET("/jobs/:id/logs", s.FetchLogs) 149 | api.GET("/job_queues/active", s.ListActiveJobQueues) 150 | api.GET("/job_queues/all", s.ListAllJobQueues) 151 | api.POST("/job_queues/:name/activate", s.ActivateJobQueue) 152 | api.POST("/job_queues/:name/deactivate", s.DeactivateJobQueue) 153 | api.GET("/jobs/:id/status", s.GetStatus) 154 | api.POST("/jobs/notify", s.JobStatusNotification) 155 | api.GET("/jobs/:id/status_websocket", s.SubscribeToJobEvent) 156 | api.GET("/jobs/stats", s.JobStats) 157 | } 158 | 159 | e.GET("/ping", pingHandler) 160 | e.GET("/", s.IndexHandler) 161 | e.GET("/stats", s.IndexHandler) 162 | e.GET("/index.html", s.IndexHandler) 163 | 164 | // These are pseudo-URLs, the frontend will handle displaying the correct page 165 | e.GET("/job/:id", s.IndexHandler) 166 | e.GET("/job_queues", s.IndexHandler) 167 | 168 | if config.Conf.FrontendAssets == "local" { 169 | e.Static("/*", config.Conf.FrontendAssetsLocalPrefix) 170 | } 171 | 172 | // Launch web server 173 | e.Logger.Fatal(e.Start(config.Conf.Host + ":" + strconv.Itoa(config.Conf.Port))) 174 | } 175 | -------------------------------------------------------------------------------- /docs/docs/deployment.md: -------------------------------------------------------------------------------- 1 | Batchiepatchie - Deployment 2 | =========================== 3 | 4 | This page describes how to deploy Batchiepatchie in production environment. At 5 | the same time, it describes how Batchiepatchie is designed to be run. 6 | 7 | Operation 8 | --------- 9 | 10 | Batchiepatchie works by mirroring the state of AWS Batch in a PostgreSQL 11 | database. Unlike AWS Batch, Batchiepatchie will not forget about historical 12 | jobs (unless we manually delete old jobs from the database). 13 | 14 | There are two mechanisms Batchiepatchie can use to mirror its internal state: 15 | 16 | * Batchiepatchie polls periodically for all state from AWS Batch. 17 | 18 | * Batchiepatchie can be called by AWS Lambda function to instantly update state of some job. 19 | 20 | Out-of-box, the polling mechanism is enabled and will keep the jobs up to date 21 | in Batchiepatchie's eyes. The AWS Lambda setup is more complicated and is 22 | currently undocumented; we will fix this in the future. 23 | 24 | Building 25 | -------- 26 | 27 | Batchiepatchie is a Go project and if you have Go set up correctly, `go get` 28 | (to get dependencies) and `go build` should be sufficient inside 29 | Batchiepatchie's source code directory. 30 | 31 | $ go get 32 | $ go build 33 | 34 | You should end up with a `batchiepatchie` executable file in the current 35 | directory. 36 | 37 | Configuration file 38 | ------------------ 39 | 40 | Batchiepatchie is driven by a configuration file. An example is provided in the 41 | Batchiepatchie repository, called `test.toml`. The contents of this are reproduced below: 42 | 43 | ```toml 44 | host = "0.0.0.0" 45 | port = 5454 46 | region = "us-west-2" 47 | database_host = "postgres" 48 | database_port = 5432 49 | database_username = "postgres" 50 | database_name = "postgres" 51 | database_password = "123456" 52 | frontend_assets = "local" 53 | frontend_assets_local_prefix = "frontend/dist" 54 | ``` 55 | 56 | We will go through possible settings one by one. 57 | 58 | * `host` and `port`: These define which host and port Batchiepatchie should listen on. 59 | * `region`: This specifies which AWS region Batchiepatchie should operate in. 60 | * `database_host`: This describes the hostname to use for PostgreSQL store. 61 | * `database_port`: This describes the port where to connect for PostgreSQL store. 62 | * `database_username`: This specifies the username to use for PostgreSQL store. 63 | * `database_name`: This specifies the database name to use for PostgreSQL store. 64 | * `database_password`: This specifies the password to use to connect to PostgreSQL store. Mutually exclusive with `password_bucket` and `password_key` settings. 65 | * `password_bucket` and `password_key`: These specify an S3 bucket and key for an S3 object that contains the password. This way you can store your passwords encrypted in S3. The S3 object should contain a line: `database_password = ""`. These settings are mutually exclusive with plain `database_password` setting. 66 | * `frontend_assets`: This must be either `local` or `s3`. Batchiepatchie needs static files to show its UI and these static files can be stored locally or in S3. 67 | * `frontend_assets_local_prefix`: When `frontend_assets` is `local`, this must point to directory where `index.html` is located. Note that Batchiepatchie does not come with pre-built assets; you will need to build them in `frontend/` directory in Batchiepatchie repository first. Refer to [frontend build instructions](frontend.md) for more information. 68 | * `frontend_assets_bucket`: When `frontend_assets` is `s3`, this must point to the S3 bucket name where static assets are located. 69 | * `frontend_assets_key`: When `frontend_assets` is `s3, this must point to the key name that contains `index.html` for Batchiepatchie. Batchiepatchie will load this file from S3 at start up. Note that other static files are not loaded through S3. 70 | * `sync_period`: This specifies the number of seconds between polls with AWS Batch. By default, it is 30 seconds. 71 | * `scale_period`: This specifies the number of seconds between scaling hack polls. See more information about scaling hack on [this page](scaling). By default, this setting is 30 seconds. 72 | 73 | The configuration file is passed when invoking Batchiepatchie. 74 | 75 | $ ./batchiepatchie configuration.toml 76 | 77 | The configuration file can also be placed in S3: 78 | 79 | $ ./batchiepatchie s3://my-bucket/configuration.toml 80 | 81 | Settings about which job queues to ingest into Batchiepatchie database are not 82 | in the configuration file. These are set into the database instead. 83 | 84 | Database 85 | -------- 86 | 87 | Batchiepatchie requires a PostgreSQL database to store persistent data. We have 88 | tested Batchiepatchie with PostgreSQL 9.6 so we know 9.6 family works. The most 89 | exotic feature Batchiepatchie makes use of is [trigram 90 | indexes](https://www.postgresql.org/docs/9.6/static/pgtrgm.html) and these seem 91 | to have been available since PostgreSQL 9.1. It is possible Batchiepatchie will 92 | work with older PostgreSQL versions, such as 9.1, but we have not tested this. 93 | 94 | The database must be initialized with a schema. Batchiepatchie project uses 95 | [goose](https://github.com/pressly/goose) for migrations, and the migrations 96 | are located in `migrations/` directory in Batchiepatchie repository. 97 | 98 | If you have credentials to some PostgreSQL repository, you can run migrations 99 | with goose as in the example below: 100 | 101 | $ go get -u github.com/pressly/goose/cmd/goose # Install goose 102 | $ cd migrations 103 | $ goose postgres "user=batchiepatchie dbname=batchiepatchie password=blahblah" up 104 | 105 | Once the database has been initialized with the proper schema, Batchiepatchie 106 | can be started. 107 | 108 | IAM policies 109 | ------------ 110 | 111 | During its operation, Batchiepatchie makes various AWS calls and thus, requires 112 | permissions to do these operations. Below is a list of permissions 113 | Batchiepatchie needs: 114 | 115 | ### Essential permissions: 116 | 117 | batch:DescribeJobs 118 | batch:DescribeJobQueues 119 | batch:DescribeComputeEnvironments 120 | batch:ListJobs 121 | batch:TerminateJob 122 | ec2:DescribeInstances 123 | ecs:DescribeContainerInstances 124 | ecs:DescribeTasks 125 | ecs:ListContainerInstances 126 | ecs:ListTasks 127 | logs:DescribeLogStreams 128 | logs:GetLogEvents 129 | 130 | Aside from `batch:TerminateJob`, the essential permissions are all about 131 | fetching information from AWS. 132 | 133 | ### Optional permissions: 134 | 135 | batch:UpdateComputeEnvironment 136 | ec2:TerminateInstances 137 | s3:GetObject 138 | 139 | S3 permissions are required if you place any configuration to S3; Batchiepatchie needs to be able to fetch it. 140 | 141 | If you want to use the [scaling hack feature](scaling.md) of Batchiepatchie, you will need 142 | to let it modify compute environments with `batch:UpdateComputeEnvironment`. 143 | 144 | If you want to use the [terminate instance hack feature](terminator.md) of 145 | Batchiepatchie, you will need to give it permission to terminate instances. 146 | -------------------------------------------------------------------------------- /jobs/jobs.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package jobs implements the basic Job structure and related functionality 3 | */ 4 | package jobs 5 | 6 | import ( 7 | "database/sql/driver" 8 | "encoding/json" 9 | "errors" 10 | "time" 11 | ) 12 | 13 | // Job Status Constants 14 | const ( 15 | StatusFailed = "FAILED" 16 | StatusPending = "PENDING" 17 | StatusRunnable = "RUNNABLE" 18 | StatusRunning = "RUNNING" 19 | StatusStarting = "STARTING" 20 | StatusSubmitted = "SUBMITTED" 21 | StatusSucceeded = "SUCCEEDED" 22 | ) 23 | 24 | // StatusList is a list of all possible job statuses 25 | var StatusList = [...]string{ 26 | StatusFailed, 27 | StatusPending, 28 | StatusRunnable, 29 | StatusRunning, 30 | StatusStarting, 31 | StatusSubmitted, 32 | StatusSucceeded, 33 | } 34 | 35 | type JobStatus struct { 36 | Id string `json:"id"` 37 | Status string `json:"status"` 38 | } 39 | 40 | type Job struct { 41 | Id string `json:"id"` 42 | Name string `json:"name"` 43 | Status string `json:"status"` 44 | Description string `json:"desc"` 45 | LastUpdated time.Time `json:"last_updated"` 46 | JobQueue string `json:"job_queue"` 47 | Image string `json:"image"` 48 | CreatedAt time.Time `json:"created_at"` 49 | StoppedAt *time.Time `json:"stopped_at"` 50 | VCpus int64 `json:"vcpus"` 51 | Memory int64 `json:"memory"` 52 | Timeout int `json:"timeout"` 53 | CommandLine string `json:"command_line"` 54 | StatusReason *string `json:"status_reason"` 55 | RunStartTime *time.Time `json:"run_start_time"` 56 | ExitCode *int64 `json:"exitcode"` 57 | LogStreamName *string `json:"log_stream_name"` 58 | TerminationRequested bool `json:"termination_requested"` 59 | TaskARN *string `json:"task_arn"` 60 | InstanceID *string `json:"instance_id"` 61 | PublicIP *string `json:"public_ip"` 62 | PrivateIP *string `json:"private_ip"` 63 | ArrayProperties *ArrayProperties `json:"array_properties,omitempty"` 64 | } 65 | 66 | // ArrayProperties are properties of a parent array job. 67 | type ArrayProperties struct { 68 | Size int64 `json:"size"` 69 | StatusSummary StatusSummary `json:"status_summary"` 70 | } 71 | 72 | // Value implements the driver.Valuer interface. This method 73 | // is needed for JSONB serialization to the database. 74 | func (a ArrayProperties) Value() (driver.Value, error) { 75 | return json.Marshal(a) 76 | } 77 | 78 | // Scan implements the sql.Scanner interface. This method 79 | // is needed for JSONB deserialization from the database. 80 | func (a *ArrayProperties) Scan(value interface{}) error { 81 | b, ok := value.([]byte) 82 | if !ok { 83 | return errors.New("type assertion to []byte failed") 84 | } 85 | 86 | return json.Unmarshal(b, &a) 87 | } 88 | 89 | // StatusSummary is counts of statuses of child array jobs 90 | type StatusSummary struct { 91 | Starting int64 `json:"starting"` 92 | Failed int64 `json:"failed"` 93 | Running int64 `json:"running"` 94 | Succeeded int64 `json:"succeeded"` 95 | Runnable int64 `json:"runnable"` 96 | Submitted int64 `json:"submitted"` 97 | Pending int64 `json:"pending"` 98 | } 99 | 100 | // Options is the query options for the Find method to use 101 | type Options struct { 102 | Search string 103 | DateRange string 104 | Limit int 105 | Offset int 106 | Queues []string 107 | SortBy string 108 | SortAsc bool 109 | Status []string 110 | } 111 | 112 | type JobStatsOptions struct { 113 | Queues []string 114 | Status []string 115 | Interval int64 116 | Start int64 117 | End int64 118 | } 119 | 120 | type JobStats struct { 121 | JobQueue string `json:"job_queue"` 122 | Status string `json:"status"` 123 | Timestamp float64 `json:"timestamp"` 124 | VCPUSeconds float64 `json:"vcpu_seconds"` 125 | MemorySeconds float64 `json:"memory_seconds"` 126 | InstanceSeconds float64 `json:"instance_seconds"` 127 | JobCount int `json:"job_count"` 128 | Interval int64 `json:"interval"` 129 | } 130 | 131 | // KillTaskID is a struct to handle JSON request to kill a task 132 | type KillTaskID struct { 133 | ID string `json:"id" form:"id" query:"id"` 134 | } 135 | 136 | // FinderStorer is an interface that can both save and retrieve jobs 137 | type FinderStorer interface { 138 | Finder 139 | Storer 140 | 141 | // Methods to get information about Job Queues 142 | ListActiveJobQueues() ([]string, error) 143 | ListForcedScalingJobQueues() ([]string, error) 144 | 145 | ActivateJobQueue(string) error 146 | DeactivateJobQueue(string) error 147 | } 148 | 149 | // Finder is an interface to find jobs in a database/store 150 | type Finder interface { 151 | // Find finds a jobs matching the query 152 | Find(opts *Options) ([]*Job, error) 153 | 154 | // FindOne finds a job matching the query 155 | FindOne(query string) (*Job, error) 156 | 157 | // FindTimedoutJobs finds all job IDs that should have timed out by now 158 | FindTimedoutJobs() ([]string, error) 159 | 160 | // Simple endpoint that returns a string for job status. 161 | GetStatus(jobid string) (*JobStatus, error) 162 | 163 | JobStats(opts *JobStatsOptions) ([]*JobStats, error) 164 | } 165 | 166 | // Storer is an interface to save jobs in a database/store 167 | type Storer interface { 168 | // Store saves a job 169 | Store(job []*Job) error 170 | 171 | // Gives the store a chance to stale jobs we no longer know about 172 | // The argument is a set (value is ignored) of all known job_ids currently by AWS Batch 173 | StaleOldJobs(map[string]bool) error 174 | 175 | // Finds estimated load per job queue 176 | EstimateRunningLoadByJobQueue([]string) (map[string]RunningLoad, error) 177 | 178 | // Update compute environment logs 179 | UpdateComputeEnvironmentsLog([]ComputeEnvironment) error 180 | 181 | // Update job summaries 182 | UpdateJobSummaryLog([]JobSummary) error 183 | 184 | // Mark on job that we requested it to be terminated 185 | UpdateJobLogTerminationRequested(string) error 186 | 187 | // Updates information on task arns and ec2 metadata 188 | UpdateTaskArnsInstanceIDs(map[string]Ec2Info, map[string]string) error 189 | 190 | // Updates information on EC2 instances running on ECS 191 | UpdateECSInstances(map[string]Ec2Info, map[string][]string) error 192 | 193 | // Gets alive EC2 instances (according to database) 194 | GetAliveEC2Instances() ([]string, error) 195 | 196 | // Gets all instance IDs that have jobs stuck in "STARTING" status 197 | GetStartingStateStuckEC2Instances() ([]string, error) 198 | 199 | // Subscribes to updates about a job status. (see more info on this 200 | // function in postgres_store.go) 201 | SubscribeToJobStatus(jobID string) (<-chan Job, func()) 202 | } 203 | 204 | // Cleaner allows you to clean the database 205 | type Cleaner interface { 206 | // CleanOldJobs cleans old jobs from the database 207 | CleanOldJobs() error 208 | 209 | // CleanOldInstanceEventLogs cleans old instance event logs from the database 210 | CleanOldInstanceEventLogs() error 211 | } 212 | 213 | // Killer is an interface to kill jobs in the queue 214 | type Killer interface { 215 | // KillOne kills a job matching the query 216 | KillOne(jobID string, reason string, store Storer) error 217 | 218 | // Kills jobs and instances that are stuck in STARTING status 219 | KillInstances(instances []string) error 220 | } 221 | 222 | // This structure describes how many vcpus and memory the currently queued jobs require 223 | type RunningLoad struct { 224 | WantedVCpus int64 225 | WantedMemory int64 226 | } 227 | 228 | type ComputeEnvironment struct { 229 | Name string 230 | WantedvCpus int64 231 | MinvCpus int64 232 | MaxvCpus int64 233 | State string 234 | ServiceRole string 235 | } 236 | 237 | type JobSummary struct { 238 | JobQueue string 239 | Submitted int64 240 | Pending int64 241 | Runnable int64 242 | Starting int64 243 | Running int64 244 | } 245 | 246 | type Ec2Info struct { 247 | PrivateIP *string 248 | PublicIP *string 249 | AMI string 250 | ComputeEnvironmentARN string 251 | ECSClusterARN string 252 | AvailabilityZone string 253 | SpotInstanceRequestID *string 254 | InstanceType string 255 | LaunchedAt *time.Time 256 | } 257 | -------------------------------------------------------------------------------- /jobs/monitor_ecs_clusters.go: -------------------------------------------------------------------------------- 1 | package jobs 2 | 3 | import ( 4 | "github.com/AdRoll/batchiepatchie/awsclients" 5 | "github.com/aws/aws-sdk-go/service/batch" 6 | "github.com/aws/aws-sdk-go/service/ec2" 7 | "github.com/aws/aws-sdk-go/service/ecs" 8 | "github.com/opentracing/opentracing-go" 9 | log "github.com/sirupsen/logrus" 10 | ) 11 | 12 | type arnInfo struct { 13 | ecsClusterARN string 14 | computeEnvironmentARN string 15 | } 16 | 17 | func MonitorECSClusters(fs Storer, queues []string) error { 18 | span := opentracing.StartSpan("MonitorECSClusters") 19 | defer span.Finish() 20 | 21 | /* TODO: handle pagination in all these API calls. */ 22 | 23 | /* First we collect all compute environments references by any queues 24 | * */ 25 | job_queue_names := make([]*string, 0) 26 | for _, job_queue := range queues { 27 | jq := job_queue 28 | job_queue_names = append(job_queue_names, &jq) 29 | } 30 | 31 | job_queues := &batch.DescribeJobQueuesInput{ 32 | JobQueues: job_queue_names, 33 | } 34 | 35 | job_queue_descs, err := awsclients.Batch.DescribeJobQueues(job_queues) 36 | if err != nil { 37 | log.Warning("Failed to describe job queues: ", err) 38 | return err 39 | } 40 | 41 | compute_environments := make(map[string]bool) 42 | for _, job_queue_desc := range job_queue_descs.JobQueues { 43 | for _, compute_env_order := range job_queue_desc.ComputeEnvironmentOrder { 44 | compute_environments[*compute_env_order.ComputeEnvironment] = true 45 | } 46 | } 47 | 48 | /* Now that we got compute environments (in the map above), we can get 49 | * their description and the ECS cluster names they point to. */ 50 | compute_environments_lst := make([]*string, len(compute_environments)) 51 | i := 0 52 | for name := range compute_environments { 53 | n := name 54 | compute_environments_lst[i] = &n 55 | i++ 56 | } 57 | 58 | compute_environments_input := &batch.DescribeComputeEnvironmentsInput{ 59 | ComputeEnvironments: compute_environments_lst, 60 | } 61 | 62 | compute_environment_descs, err := awsclients.Batch.DescribeComputeEnvironments(compute_environments_input) 63 | if err != nil { 64 | log.Warning("Failed to describe compute environments: ", err) 65 | return err 66 | } 67 | 68 | ecs_clusters := make(map[string]string) 69 | for _, compute_environment_desc := range compute_environment_descs.ComputeEnvironments { 70 | if compute_environment_desc.EcsClusterArn != nil { 71 | ecs_clusters[*compute_environment_desc.EcsClusterArn] = *compute_environment_desc.ComputeEnvironmentArn 72 | } 73 | } 74 | 75 | ecs_clusters_lst := make([]*string, len(ecs_clusters)) 76 | i = 0 77 | 78 | task_ec2_mapping := make(map[string]string) 79 | ec2instances_set := make(map[string]arnInfo) 80 | tasks_per_ec2instance := make(map[string][]string) 81 | 82 | for name := range ecs_clusters { 83 | n := name 84 | ecs_clusters_lst[i] = &n 85 | i++ 86 | 87 | task_mapping := make(map[string]string) 88 | var next_token *string 89 | for { 90 | var tasks_input *ecs.ListTasksInput 91 | if next_token == nil { 92 | tasks_input = &ecs.ListTasksInput{ 93 | Cluster: &n, 94 | } 95 | } else { 96 | tasks_input = &ecs.ListTasksInput{ 97 | Cluster: &n, 98 | NextToken: next_token, 99 | } 100 | } 101 | task_listing, err := awsclients.ECS.ListTasks(tasks_input) 102 | if err != nil { 103 | log.Warning("Failed to list tasks: ", err) 104 | return err 105 | } 106 | 107 | task_arns := make([]*string, 0) 108 | for _, task := range task_listing.TaskArns { 109 | n := *task 110 | task_arns = append(task_arns, &n) 111 | } 112 | 113 | if len(task_arns) > 0 { 114 | describe_tasks := &ecs.DescribeTasksInput{ 115 | Cluster: &n, 116 | Tasks: task_arns, 117 | } 118 | 119 | task_descs, err := awsclients.ECS.DescribeTasks(describe_tasks) 120 | if err != nil { 121 | log.Warning("Failed to describe tasks: ", err) 122 | return err 123 | } 124 | 125 | for _, task_desc := range task_descs.Tasks { 126 | task_mapping[*task_desc.TaskArn] = *task_desc.ContainerInstanceArn 127 | } 128 | } 129 | 130 | next_token = task_listing.NextToken 131 | if next_token == nil { 132 | break 133 | } 134 | } 135 | /* task_mapping should now contain mapping from Task ARNs to container instance ARNs. 136 | now, figure out the actual instance IDs for those container instance ARNs. 137 | 138 | We first get all container ARNs by API call and then 139 | complement it with the ones we got from tasks. */ 140 | 141 | next_token = nil 142 | container_arn_set := make(map[string]bool, 0) 143 | for { 144 | var describe_container_instances *ecs.ListContainerInstancesInput 145 | if next_token == nil { 146 | describe_container_instances = &ecs.ListContainerInstancesInput{ 147 | Cluster: &n, 148 | } 149 | } else { 150 | describe_container_instances = &ecs.ListContainerInstancesInput{ 151 | Cluster: &n, 152 | NextToken: next_token, 153 | } 154 | } 155 | 156 | container_arns, err := awsclients.ECS.ListContainerInstances(describe_container_instances) 157 | if err != nil { 158 | log.Warning("Failed to list container instances: ", err) 159 | return err 160 | } 161 | 162 | for _, arn_ref := range container_arns.ContainerInstanceArns { 163 | if arn_ref != nil { 164 | arn := *arn_ref 165 | container_arn_set[arn] = true 166 | } 167 | } 168 | 169 | next_token = container_arns.NextToken 170 | if next_token == nil { 171 | break 172 | } 173 | } 174 | 175 | for _, container_arn := range task_mapping { 176 | container_arn_set[container_arn] = true 177 | } 178 | container_arn_lst := make([]*string, len(container_arn_set)) 179 | j := 0 180 | for container_arn := range container_arn_set { 181 | n := container_arn 182 | container_arn_lst[j] = &n 183 | j++ 184 | } 185 | 186 | /* now, describe container_arns */ 187 | cursor := 0 188 | for { 189 | if cursor >= len(container_arn_lst) { 190 | break 191 | } 192 | cursor_end := cursor + 50 193 | if cursor_end > len(container_arn_lst) { 194 | cursor_end = len(container_arn_lst) 195 | } 196 | 197 | lst := make([]*string, cursor_end-cursor) 198 | for i, v := range container_arn_lst[cursor:cursor_end] { 199 | n := *v 200 | lst[i] = &n 201 | } 202 | container_input := &ecs.DescribeContainerInstancesInput{ 203 | Cluster: &n, 204 | ContainerInstances: lst, 205 | } 206 | cursor += 50 207 | container_descs, err := awsclients.ECS.DescribeContainerInstances(container_input) 208 | if err != nil { 209 | log.Warning("Cannot describe container instances: ", err) 210 | return err 211 | } 212 | 213 | for _, container_desc := range container_descs.ContainerInstances { 214 | /* TODO: this is quadratic. Fix it at some point */ 215 | for task_arn, container_arn := range task_mapping { 216 | if container_arn == *container_desc.ContainerInstanceArn { 217 | task_ec2_mapping[task_arn] = *container_desc.Ec2InstanceId 218 | lst, ok := tasks_per_ec2instance[*container_desc.Ec2InstanceId] 219 | if ok { 220 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = append(lst, task_arn) 221 | } else { 222 | new_lst := make([]string, 1) 223 | new_lst[0] = task_arn 224 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = new_lst 225 | } 226 | } 227 | } 228 | if container_desc.Ec2InstanceId != nil { 229 | ec2instances_set[*container_desc.Ec2InstanceId] = arnInfo{ 230 | ecsClusterARN: n, 231 | computeEnvironmentARN: ecs_clusters[n], 232 | } 233 | /* Make sure there is an empty job listing when there are no tasks on the instance */ 234 | _, ok := tasks_per_ec2instance[*container_desc.Ec2InstanceId] 235 | if !ok { 236 | new_lst := make([]string, 0) 237 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = new_lst 238 | } 239 | } 240 | } 241 | } 242 | } 243 | 244 | ec2instances_lst := make([]*string, 0) 245 | for ec2instance := range ec2instances_set { 246 | n := ec2instance 247 | ec2instances_lst = append(ec2instances_lst, &n) 248 | } 249 | 250 | ec2instances_info := make(map[string]Ec2Info) 251 | 252 | cursor := 0 253 | for { 254 | cursor_end := cursor + 50 255 | if cursor >= len(ec2instances_lst) { 256 | break 257 | } 258 | if cursor_end > len(ec2instances_lst) { 259 | cursor_end = len(ec2instances_lst) 260 | } 261 | 262 | lst := make([]*string, cursor_end-cursor) 263 | for i, v := range ec2instances_lst[cursor:cursor_end] { 264 | n := *v 265 | lst[i] = &n 266 | } 267 | cursor += 50 268 | 269 | instances_input := &ec2.DescribeInstancesInput{ 270 | InstanceIds: lst, 271 | } 272 | instances_descs, err := awsclients.EC2.DescribeInstances(instances_input) 273 | if err != nil { 274 | log.Warning("Cannot describe instances: ", err) 275 | return err 276 | } 277 | 278 | for _, reservation := range instances_descs.Reservations { 279 | for _, instance := range reservation.Instances { 280 | // What is `fromMaybe` of Go language? 281 | public_ip := instance.PublicIpAddress 282 | private_ip := instance.PrivateIpAddress 283 | ami := "" 284 | if instance.ImageId != nil { 285 | ami = *instance.ImageId 286 | } 287 | instance_id := "" 288 | if instance.InstanceId != nil { 289 | instance_id = *instance.InstanceId 290 | } 291 | compute_environment_arn := "" 292 | ecs_cluster_arn := "" 293 | info, ok := ec2instances_set[instance_id] 294 | if ok { 295 | compute_environment_arn = info.computeEnvironmentARN 296 | ecs_cluster_arn = info.ecsClusterARN 297 | } 298 | az := "" 299 | if instance.Placement != nil && instance.Placement.AvailabilityZone != nil { 300 | az = *instance.Placement.AvailabilityZone 301 | } 302 | sir := instance.SpotInstanceRequestId 303 | instance_type := "" 304 | if instance.InstanceType != nil { 305 | instance_type = *instance.InstanceType 306 | } 307 | launched_at := instance.LaunchTime 308 | ec2instances_info[*instance.InstanceId] = Ec2Info{ 309 | PublicIP: public_ip, 310 | PrivateIP: private_ip, 311 | AMI: ami, 312 | ComputeEnvironmentARN: compute_environment_arn, 313 | ECSClusterARN: ecs_cluster_arn, 314 | AvailabilityZone: az, 315 | SpotInstanceRequestID: sir, 316 | InstanceType: instance_type, 317 | LaunchedAt: launched_at, 318 | } 319 | } 320 | } 321 | } 322 | 323 | err1 := fs.UpdateTaskArnsInstanceIDs(ec2instances_info, task_ec2_mapping) 324 | err2 := fs.UpdateECSInstances(ec2instances_info, tasks_per_ec2instance) 325 | 326 | if err1 != nil { 327 | return err1 328 | } 329 | return err2 330 | } 331 | -------------------------------------------------------------------------------- /frontend/src/pages/JobsPage/JobsPage.jsx: -------------------------------------------------------------------------------- 1 | import React, { PropTypes } from 'react'; 2 | import { connect } from 'react-redux'; 3 | import classNames from 'classnames'; 4 | import ReactDataGrid from 'react-data-grid'; 5 | import { 6 | fetchJobsPage, 7 | killJobs, 8 | setSelectedIds, 9 | setParams, 10 | syncJobQueues, 11 | updateJobsQueryParams, 12 | QUERY_PARAM_DEFAULTS 13 | } from 'stores/job'; 14 | import { JOBS } from 'stores/status'; 15 | import CommandLineFormatter from 'components/CommandLineFormatter/CommandLineFormatter'; 16 | import DateTimeFormatter from 'components/DateTimeFormatter/DateTimeFormatter'; 17 | import StatusFormatter from 'components/StatusFormatter/StatusFormatter'; 18 | import JobLinkFormatter from 'components/JobLinkFormatter/JobLinkFormatter'; 19 | import NameFormatter from 'components/NameFormatter/NameFormatter'; 20 | import ImageFormatter from 'components/ImageFormatter/ImageFormatter'; 21 | import DurationFormatter from 'components/DurationFormatter/DurationFormatter'; 22 | import RowRenderer from 'components/RowRenderer/RowRenderer'; 23 | import QueueSelector from 'components/QueueSelector/QueueSelector'; 24 | import StatusSelector from 'components/StatusSelector/StatusSelector'; 25 | import './JobsPage.scss'; 26 | import 'react-select/dist/react-select.css'; 27 | 28 | const AUTO_REFRESH_TIMEOUT = 5000; // ms 29 | 30 | const COLUMNS = [ 31 | { 32 | key: 'id', 33 | name: 'ID', 34 | resizable: false, 35 | sortable: true, 36 | width: 95, 37 | formatter: JobLinkFormatter 38 | }, 39 | { 40 | key: 'status', 41 | name: 'Status', 42 | resizable: false, 43 | sortable: true, 44 | width: 120, 45 | formatter: StatusFormatter 46 | }, 47 | { 48 | key: 'name', 49 | name: 'Name', 50 | resizable: true, 51 | sortable: true, 52 | width: 310, 53 | getRowMetaData: (job) => job, 54 | formatter: NameFormatter 55 | }, 56 | { 57 | key: 'image', 58 | name: 'Image', 59 | resizable: true, 60 | width: 270, 61 | formatter: ImageFormatter 62 | }, 63 | { 64 | key: 'runtime', 65 | name: 'Runtime', 66 | resizable: true, 67 | width: 140, 68 | formatter: DurationFormatter 69 | }, 70 | { 71 | key: 'total_elapsed_time', 72 | name: 'Total elapsed', 73 | resizable: true, 74 | width: 140, 75 | formatter: DurationFormatter 76 | }, 77 | { 78 | key: 'stopped_at', 79 | name: 'Stopped At', 80 | resizable: true, 81 | sortable: true, 82 | width: 280, 83 | formatter: DateTimeFormatter 84 | }, 85 | { 86 | key: 'job_queue', 87 | name: 'Queue', 88 | resizable: true, 89 | width: 270 90 | }, 91 | { 92 | key: 'last_updated', 93 | name: 'Last Updated', 94 | resizable: true, 95 | sortable: true, 96 | width: 280, 97 | formatter: DateTimeFormatter 98 | }, 99 | { 100 | key: 'vcpus', 101 | name: 'CPUs', 102 | width: 80 103 | }, 104 | { 105 | key: 'memory', 106 | name: 'Memory', 107 | width: 80 108 | }, 109 | { 110 | key: 'command_line', 111 | name: 'Command Line', 112 | width: 800, 113 | resizable: true, 114 | formatter: CommandLineFormatter 115 | } 116 | ]; 117 | 118 | const MIN_WIDTH = COLUMNS.reduce((memo, column) => memo + column.width, 0); 119 | 120 | const PAGE_SIZE = 100; 121 | 122 | class JobsPage extends React.Component { 123 | static propTypes = { 124 | fetchJobsPage: PropTypes.func.isRequired, 125 | height: PropTypes.number.isRequired, 126 | jobs: PropTypes.array.isRequired, 127 | killJobs: PropTypes.func.isRequired, 128 | q: PropTypes.string, 129 | dateRange: PropTypes.string, 130 | routing: PropTypes.object.isRequired, 131 | selectedIds: PropTypes.array.isRequired, 132 | setParams: PropTypes.func.isRequired, 133 | setSelectedIds: PropTypes.func.isRequired, 134 | sortColumn: PropTypes.string, 135 | sortDirection: PropTypes.string, 136 | status: PropTypes.object.isRequired, 137 | syncJobQueues: PropTypes.func.isRequired, 138 | updateJobsQueryParams: PropTypes.func.isRequired, 139 | }; 140 | 141 | constructor(props) { 142 | super(props); 143 | // Using state for autoRefresh so it resets to false on navigation 144 | this.state = { 145 | autoRefresh: false 146 | }; 147 | } 148 | 149 | componentDidMount() { 150 | this.loadStateFromQueryParams(); 151 | this.props.syncJobQueues(); 152 | this.props.fetchJobsPage(); 153 | } 154 | 155 | componentDidUpdate(prevProps) { 156 | if (this.props.q !== prevProps.q || 157 | this.props.dateRange !== prevProps.dateRange || 158 | this.props.sortColumn !== prevProps.sortColumn || 159 | this.props.sortDirection !== prevProps.sortDirection || 160 | this.props.page !== prevProps.page || 161 | this.props.selectedStatus !== prevProps.selectedStatus || 162 | this.props.selectedQueue !== prevProps.selectedQueue) { 163 | this.props.updateJobsQueryParams(); 164 | this.props.fetchJobsPage(); 165 | } 166 | } 167 | 168 | componentWillUnmount() { 169 | const autoRefresh = false; 170 | this.state.autoRefresh = autoRefresh; 171 | this.setState({ autoRefresh }); 172 | } 173 | 174 | render() { 175 | const { 176 | jobs, 177 | height, 178 | queues, 179 | status 180 | } = this.props; 181 | 182 | if (!status.loading && status.error) { 183 | return ( 184 |
185 |
186 | Could not load API response for jobs. 187 |
188 |
189 | ); 190 | } 191 | 192 | const listHeight = height - 240; 193 | 194 | return ( 195 |
196 |

Jobs

197 | 198 |
199 | 205 | 206 | 213 | 214 | 215 |
216 | 223 | 224 |
225 |
226 | 227 | 252 | 253 | 264 |
265 | ); 266 | } 267 | 268 | rowGetter = (i) => { 269 | return this.props.jobs[i]; 270 | } 271 | 272 | onRowsSelected = (rows) => { 273 | this.props.setSelectedIds(this.props.selectedIds.concat(rows.map(r => r.row.id))); 274 | this.props.updateJobsQueryParams(); 275 | } 276 | 277 | onRowsDeselected = (rows) => { 278 | const rowIds = rows.map(r => r.row.id); 279 | this.props.setSelectedIds(this.props.selectedIds.filter(i => rowIds.indexOf(i) === -1 )); 280 | this.props.updateJobsQueryParams(); 281 | } 282 | 283 | onGridSort = (sortColumn, sortDirection) => { 284 | this.props.setParams({sortColumn, sortDirection}); 285 | } 286 | 287 | killJobs = () => { 288 | this.props.killJobs(this.props.selectedIds) 289 | .then(() => this.props.fetchJobsPage()) 290 | .catch(() => {}); 291 | } 292 | 293 | previousPage = () => { 294 | if (this.props.page > 0) { 295 | this.props.setParams({ page: this.props.page - 1 }); 296 | } 297 | } 298 | 299 | nextPage = () => { 300 | if (this.props.jobs.length === PAGE_SIZE) { 301 | this.props.setParams({ page: this.props.page + 1 }); 302 | } 303 | } 304 | 305 | handleStatusChange = (newStatus) => { 306 | this.props.setParams({ selectedStatus: newStatus }); 307 | } 308 | 309 | handleQueueChange = (newQueue) => { 310 | this.props.setParams({ selectedQueue: newQueue }); 311 | } 312 | 313 | // Load query params into store, resetting any values with defaults 314 | loadStateFromQueryParams = () => { 315 | const query = this.props.routing.locationBeforeTransitions.query; 316 | const queryParamsWithDefaults = { 317 | ...QUERY_PARAM_DEFAULTS, 318 | ...query, 319 | qTemp: query.q || '', 320 | dateRange: query.dateRange || '1d', 321 | page: query.page ? parseInt(query.page) : 0, 322 | selectedIds: query.selectedIds ? query.selectedIds.split(',') : [], 323 | selectedQueue: !query.selectedQueue ? 'all' : query.selectedQueue, 324 | selectedStatus: !query.selectedStatus ? 'all' : query.selectedStatus, 325 | }; 326 | this.props.setParams(queryParamsWithDefaults); 327 | } 328 | 329 | setAutoRefresh = (e) => { 330 | const autoRefresh = e.target.checked; 331 | this.state.autoRefresh = autoRefresh; 332 | this.setState({ autoRefresh }); 333 | this.autoRefresh(); 334 | } 335 | 336 | autoRefresh = () => { 337 | if (this.state.autoRefresh) { 338 | this.props.fetchJobsPage().then(() => { 339 | setTimeout(() => { 340 | this.autoRefresh(); 341 | }, AUTO_REFRESH_TIMEOUT); 342 | }); 343 | } 344 | } 345 | } 346 | 347 | const mapStateToProps = state => ({ 348 | q: state.job.q, 349 | dateRange: state.job.dateRange, 350 | jobs: state.job.jobs, 351 | page: state.job.page, 352 | height: state.layout.height, 353 | queues: state.job.queues, 354 | routing: state.routing, 355 | selectedIds: state.job.selectedIds, 356 | selectedQueue: state.job.selectedQueue, 357 | selectedStatus: state.job.selectedStatus, 358 | status: state.status[JOBS], 359 | sortColumn: state.job.sortColumn, 360 | sortDirection: state.job.sortDirection, 361 | }); 362 | 363 | const actions = { 364 | fetchJobsPage, 365 | killJobs, 366 | setSelectedIds, 367 | setParams, 368 | syncJobQueues, 369 | updateJobsQueryParams 370 | }; 371 | 372 | export default connect(mapStateToProps, actions)(JobsPage); 373 | --------------------------------------------------------------------------------