17 | );
18 | }
19 | };
20 |
--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Project documentation
2 | ======================================
3 |
4 | This is documentation for Batchiepatchie, a monitoring tool for AWS Batch.
5 |
6 | If you are new here, we suggest you read the high-level overview first before
7 | reading on other topics.
8 |
9 | Index
10 | -----
11 |
12 | - [High-level overview](overview.md)
13 | - [Quick start](quickstart.md)
14 | - [Deployment](deployment.md)
15 | - [Frontend](frontend.md)
16 | - [Job statuses](statuses.md)
17 | - [Timeouts](timeouts.md)
18 | - [Scaling hack](scaling.md)
19 | - [Terminator](terminator.md)
20 | - [Tracing](tracing.md)
21 |
22 |
--------------------------------------------------------------------------------
/migrations/00022_reduced_trigram_index.sql:
--------------------------------------------------------------------------------
1 | -- +goose NO TRANSACTION
2 | -- +goose Up
3 | -- SQL in this section is executed when the migration is applied.
4 | DROP INDEX CONCURRENTLY trgm_idx_jobs;
5 |
6 | CREATE INDEX CONCURRENTLY trgm_idx_jobs ON jobs USING gin (
7 | (job_id || job_name || job_queue || image) gin_trgm_ops
8 | );
9 |
10 | -- +goose Down
11 | -- SQL in this section is executed when the migration is rolled back.
12 | DROP INDEX CONCURRENTLY trgm_idx_jobs;
13 |
14 | CREATE INDEX CONCURRENTLY trgm_idx_jobs ON jobs USING gin (
15 | (job_id || job_name || job_queue || image || command_line || job_definition) gin_trgm_ops
16 | );
17 |
--------------------------------------------------------------------------------
/frontend/src/components/Terminal/Terminal.scss:
--------------------------------------------------------------------------------
1 | .terminal {
2 | overflow-x: auto;
3 | overflow-y: hidden;
4 |
5 | pre {
6 | padding: 0;
7 | font-size: 12px;
8 | margin-bottom: 0;
9 | border: 0;
10 | overflow: hidden;
11 | margin-left: 6px;
12 | margin-right: 6px;
13 | color: #333;
14 | }
15 |
16 | // Color it orange if it is the current search result.
17 | .current-search-result {
18 | background-color: orange;
19 | }
20 |
21 | // If it is a search result on a different line, color it yellow.
22 | mark, .mark {
23 | background-color: yellow;
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/frontend/src/utils/debounce.js:
--------------------------------------------------------------------------------
1 | // Taken from https://davidwalsh.name/javascript-debounce-function
2 | export default function debounce(func, wait, immediate) {
3 | var timeout;
4 | return function() {
5 | var context = this;
6 | var args = arguments;
7 | var later = function() {
8 | timeout = null;
9 | if (!immediate) {
10 | func.apply(context, args);
11 | }
12 | };
13 | var callNow = immediate && !timeout;
14 | clearTimeout(timeout);
15 | timeout = setTimeout(later, wait);
16 | if (callNow) {
17 | func.apply(context, args);
18 | }
19 | };
20 | };
21 |
--------------------------------------------------------------------------------
/docs/docs/terminator.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Terminator
2 | ---------------------------
3 |
4 | Batchiepatchie can terminate EC2 instances that look like they've got stuck.
5 |
6 | At this time, Batchiepatchie will terminate EC2 instances that have jobs on
7 | them that have been in `STARTING` state for more than 10 minutes. This is a bug
8 | that occasionally happens with AWS Batch.
9 |
10 | This feature is by default turned off but can be enabled by specifying
11 | `kill_stuck_jobs = true` in the Batchiepatchie configuration file. The behavior
12 | will be exercised on all jobs Batchiepatchie knows about.
13 |
14 | Batchiepatchie requires `ec2:TerminateInstances` to be able to invoke
15 | termination on instances.
16 |
--------------------------------------------------------------------------------
/frontend/src/components/ImageFormatter/ImageFormatter.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import './ImageFormatter.scss';
3 |
4 | const ECR_REGEX = /^[0-9]+\.dkr\.ecr\.[^.]+\.amazonaws.com\/(.+)$/;
5 |
6 | export default class ImageFormatter extends React.Component {
7 | static propTypes = {
8 | value: PropTypes.string.isRequired
9 | };
10 |
11 | render() {
12 | let value = this.props.value;
13 | const re_match = value.match(ECR_REGEX);
14 | if ( re_match && re_match.length > 1 ) {
15 | value = re_match[1];
16 | }
17 |
18 | return (
19 |
20 | { value }
21 |
22 | );
23 | }
24 | };
25 |
26 |
--------------------------------------------------------------------------------
/frontend/src/stores/layout.js:
--------------------------------------------------------------------------------
1 | // import { call, put, takeLatest } from 'redux-saga/effects';
2 | import actionReducer from 'utils/actionReducer';
3 |
4 | // Action names
5 | export const SET_PAGE_DIMENSIONS = 'SET_PAGE_DIMENSIONS';
6 |
7 | // Initial state
8 | const initialState = {
9 | height: 800
10 | };
11 |
12 | const actions = {};
13 |
14 | // Reducers
15 | actions[SET_PAGE_DIMENSIONS] = (state, { payload }) => {
16 | return {
17 | ...state,
18 | ...payload
19 | };
20 | };
21 |
22 |
23 | // Action Creators
24 | export function setPageDimensions(dimensions) {
25 | return {
26 | type: SET_PAGE_DIMENSIONS,
27 | payload: dimensions
28 | };
29 | };
30 |
31 | // Root reducer
32 | export default actionReducer(actions, initialState);
33 |
--------------------------------------------------------------------------------
/migrations/00007_add_compute_environment_event_log.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 | CREATE TABLE compute_environment_event_log (
4 | timestamp timestamp with time zone NOT NULL,
5 | compute_environment TEXT NOT NULL,
6 | desired_vcpus INTEGER,
7 | max_vcpus INTEGER,
8 | min_vcpus INTEGER,
9 | state TEXT,
10 | service_role TEXT
11 | );
12 |
13 | CREATE INDEX compute_environment_event_log_timestamp ON compute_environment_event_log (timestamp);
14 |
15 | -- +goose Down
16 | -- SQL in this section is executed when the migration is rolled back.
17 | DROP INDEX compute_environment_event_log_timestamp;
18 | DROP TABLE compute_environment_event_log;
19 |
20 |
--------------------------------------------------------------------------------
/migrations/00008_add_job_summary_event_log.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 | CREATE TABLE job_summary_event_log (
4 | timestamp timestamp with time zone NOT NULL,
5 | job_queue TEXT NOT NULL,
6 | submitted INTEGER NOT NULL,
7 | pending INTEGER NOT NULL,
8 | runnable INTEGER NOT NULL,
9 | starting INTEGER NOT NULL,
10 | running INTEGER NOT NULL
11 | );
12 |
13 | CREATE INDEX job_summary_event_log_timestamp ON job_summary_event_log (timestamp);
14 |
15 | -- +goose Down
16 | -- SQL in this section is executed when the migration is rolled back.
17 | DROP INDEX job_summary_event_log_timestamp;
18 | DROP TABLE job_summary_event_log;
19 |
20 |
--------------------------------------------------------------------------------
/jobs/timeout_killer.go:
--------------------------------------------------------------------------------
1 | package jobs
2 |
3 | import (
4 | "github.com/opentracing/opentracing-go"
5 | log "github.com/sirupsen/logrus"
6 | )
7 |
8 | func KillTimedOutJobs(finder FinderStorer) error {
9 | span := opentracing.StartSpan("KillTimedOutJobs")
10 | defer span.Finish()
11 |
12 | timed_out_jobs, err := finder.FindTimedoutJobs()
13 | if err != nil {
14 | return err
15 | }
16 | killer, err := NewKillerHandler()
17 | if err != nil {
18 | return err
19 | }
20 |
21 | log.Info("There are ", len(timed_out_jobs), " that need killing.")
22 |
23 | for _, job_id := range timed_out_jobs {
24 | err = killer.KillOne(job_id, "timeout", finder)
25 | if err != nil {
26 | log.Info("Requested termination for ", job_id)
27 | }
28 | }
29 | log.Info("Timed out killer round complete.")
30 | return nil
31 | }
32 |
--------------------------------------------------------------------------------
/migrations/00013_add_task_arn_instance_id_table.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 |
4 | CREATE TABLE task_arns_to_instance_info (
5 | task_arn TEXT NOT NULL,
6 | instance_id TEXT NOT NULL,
7 | public_ip TEXT NOT NULL,
8 | private_ip TEXT NOT NULL,
9 | PRIMARY KEY(task_arn, instance_id)
10 | );
11 |
12 | CREATE INDEX task_arns_task_arns ON task_arns_to_instance_info (task_arn);
13 | CREATE INDEX task_arns_instance_id ON task_arns_to_instance_info (instance_id);
14 |
15 | ALTER TABLE jobs ADD COLUMN task_arn TEXT;
16 |
17 | -- +goose Down
18 | -- SQL in this section is executed when the migration is rolled back.
19 | ALTER TABLE jobs DROP COLUMN task_arn;
20 |
21 | DROP INDEX task_arns_task_arns;
22 | DROP INDEX task_arns_instance_id;
23 | DROP TABLE task_arns_to_instance_info;
24 |
25 |
--------------------------------------------------------------------------------
/frontend/src/pages/JobPage/JobPage.scss:
--------------------------------------------------------------------------------
1 | .job-page {
2 | .job-menu .nav-tabs {
3 | margin-bottom: 12px;
4 | }
5 |
6 | h2 {
7 | float: left;
8 | }
9 |
10 | .section-loader {
11 | float: right;
12 | }
13 | button {
14 | margin: 2px;
15 | }
16 |
17 | .array-job-icon {
18 | font-size: 30px;
19 | margin-right: 6px;
20 | cursor: default;
21 | }
22 |
23 | .status-formatter {
24 | display: inline-block;
25 | margin-right: 6px;
26 | }
27 |
28 | .child-array-job-statuses {
29 | .status-formatter {
30 | margin-top: 3px;
31 | }
32 | }
33 |
34 | .auto-scroll-checkbox {
35 | margin-top: 12px;
36 | margin-left: 12px;
37 |
38 | input {
39 | margin-right: 3px;
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.21
2 |
3 | RUN mkdir -p /go/src/github.com/AdRoll/batchiepatchie
4 | WORKDIR /go/src/github.com/AdRoll/batchiepatchie
5 | COPY . /go/src/github.com/AdRoll/batchiepatchie
6 |
7 | RUN go mod download -x
8 |
9 | EXPOSE 5454
10 | EXPOSE 9999
11 |
12 | RUN go install github.com/pilu/fresh@latest
13 | RUN go install github.com/go-delve/delve/cmd/dlv@latest
14 | RUN wget https://github.com/pressly/goose/releases/download/v2.6.0/goose-linux64 -O /usr/bin/goose
15 | # RUN go get -u github.com/pressly/goose/cmd/goose
16 | RUN set -eux; \
17 | apt-get update; \
18 | apt-get install -y gosu; \
19 | rm -rf /var/lib/apt/lists/*; \
20 | # verify that the binary works
21 | gosu nobody true
22 |
23 |
24 | RUN chmod +x /usr/bin/goose
25 | RUN chmod +x /go/src/github.com/AdRoll/batchiepatchie/docker_run.sh
26 | CMD ["/go/src/github.com/AdRoll/batchiepatchie/docker_run.sh"]
27 |
--------------------------------------------------------------------------------
/docs/docs/frontend.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Frontend
2 | -------------------------
3 |
4 | To build the frontend static files and JavaScript, you will need `node`, `npm`
5 | and `yarn`.
6 |
7 | Operation
8 | ---------
9 |
10 | The official way is to use [yarn](https://yarnpkg.com/lang/en/) to install dependencies.
11 |
12 | ```bash
13 | $ cd frontend
14 | $ yarn
15 | $ npm run build # This creates unminified build
16 | $ npm run build:dist # This creates minified build
17 | ```
18 |
19 | The static files are placed in `frontend/dist` in Batchiepatchie repository.
20 | The `test.toml` file that comes with Batchiepatchie is pointed to this
21 | directory from root of batchiepatchie repository.
22 |
23 | For development, if you do not want to use the `docker-compose` mechanism described in our [quickstart page](quickstart.md), you can instead do:
24 |
25 | ```bash
26 | $ npm run dev
27 | ```
28 |
--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | <% for (key in htmlWebpackPlugin.files.css) { %>
5 |
6 | <% } %>
7 |
8 |
9 |
10 |
11 |
12 | <%= htmlWebpackPlugin.options.title %>
13 |
14 |
15 |
16 |
17 | <% for (key in htmlWebpackPlugin.files.chunks) { %>
18 |
19 | <% } %>
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/docs/tracing.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Tracing
2 | ------------------------
3 |
4 | Batchiepatchie supports tracing of many synchronization and API calls. This can
5 | create a profile where the time of different operations can be put on a
6 | histogram and in general get insight what parts of batchiepatchie are taking
7 | large amounts of time. This is useful in debugging batchiepatchie itself.
8 |
9 | The implementation right now only supports DataDog. The feature can be enabled
10 | by adding `use_datadog_tracing = true` in the configuration file.
11 |
12 | Even though DataDog is the only supported tracing target right now; most of the
13 | tracing code has been implemented in terms of [Go opentracing library](https://github.com/opentracing/opentracing-go).
14 | If you wish to use an alternative, you can modify `batchiepatchie.go` file in the
15 | repository and modify it to instantiate opentracing handle with some other way.
16 |
--------------------------------------------------------------------------------
/frontend/src/components/ActivationFormatter/ActivationFormatter.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import './ActivationFormatter.scss';
3 |
4 | export default class ActivationFormatter extends React.Component {
5 | static propTypes = {
6 | value: PropTypes.object
7 | };
8 |
9 | render() {
10 | const value = this.props.value.action;
11 |
12 | let classes = 'btn btn-xs btn-success';
13 | if ( value === 'DEACTIVATE' ) {
14 | classes = 'btn btn-xs btn-danger';
15 | }
16 |
17 | if ( value !== '' ) {
18 | return (
19 |
20 |
23 |
24 | );
25 | } else {
26 | return ();
27 | }
28 | }
29 | };
30 |
--------------------------------------------------------------------------------
/frontend/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org
2 |
3 | # A special property that should be specified at the top of the file outside of
4 | # any sections. Set to true to stop .editor config file search on current file
5 | root = true
6 |
7 | [Makefile]
8 | indent_style = tab
9 |
10 | [*]
11 | # Indentation style
12 | # Possible values - tab, space
13 | indent_style = space
14 |
15 | # Indentation size in single-spaced characters
16 | # Possible values - an integer, tab
17 | indent_size = 4
18 |
19 | # Line ending file format
20 | # Possible values - lf, crlf, cr
21 | end_of_line = lf
22 |
23 | # File character encoding
24 | # Possible values - latin1, utf-8, utf-16be, utf-16le
25 | charset = utf-8
26 |
27 | # Denotes whether to trim whitespace at the end of lines
28 | # Possible values - true, false
29 | trim_trailing_whitespace = true
30 |
31 | # Denotes whether file should end with a newline
32 | # Possible values - true, false
33 | insert_final_newline = true
34 |
--------------------------------------------------------------------------------
/frontend/src/pages/JobsPage/JobsPage.scss:
--------------------------------------------------------------------------------
1 | .jobs-page {
2 | h2 {
3 | float: left;
4 | }
5 |
6 | .actions {
7 | float: right;
8 | margin-bottom: 12px;
9 |
10 | select {
11 | float: right;
12 | margin-right: 12px;
13 | height: 42px;
14 | line-height: 42px;
15 | width: auto;
16 | }
17 |
18 | .btn {
19 | float: right;
20 | margin-right: 12px;
21 | }
22 | }
23 |
24 | nav {
25 | text-align: center;
26 |
27 | .pagination {
28 | margin-bottom: 0;
29 | }
30 | }
31 |
32 | .auto-refresh {
33 | float: left;
34 | margin-top: 6px;
35 | margin-right: 12px;
36 | label {
37 | margin-left: 10px;
38 | }
39 | }
40 |
41 | .array-job-icon {
42 | font-size: 20px;
43 | margin-right: 6px;
44 | cursor: default;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/frontend/src/components/JobLinkFormatter/JobLinkFormatter.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { Link } from 'react-router';
3 |
4 | export default class JobLinkFormatter extends React.Component {
5 | static propTypes = {
6 | value: PropTypes.oneOfType([
7 | PropTypes.string,
8 | PropTypes.number,
9 | ]).isRequired
10 | };
11 |
12 | render() {
13 | const value = this.props.value;
14 |
15 | /*
16 | * Don't display the entire ID (it's kind of long).
17 | *
18 | * All JobIDs have predictable format so we'll take just first 8 characters.
19 | *
20 | * 35c55019-c25d-4de6-9338-27c678495df -> 35c55019
21 | */
22 |
23 | const value_prefix = value.substr(0, 8);
24 |
25 | return (
26 |
27 | { value_prefix }
28 |
29 | );
30 | }
31 | };
32 |
--------------------------------------------------------------------------------
/docs/docs/statuses.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Job statuses
2 | -----------------------------
3 |
4 | Batchiepatchie can show 8 different statuses for a job.
5 |
6 | * Submitted
7 | * Pending
8 | * Runnable
9 | * Running
10 | * Succeeded
11 | * Failed
12 | * Gone
13 | * Terminated
14 |
15 | Of these, first 6 correspond to [AWS Batch job
16 | states](https://docs.aws.amazon.com/batch/latest/userguide/job_states.html).
17 |
18 | The last two, `GONE` and `TERMINATED` are Batchiepatchie-specific.
19 |
20 | * `GONE`: This means Batchiepatchie lost track of a job. There is no information if the job
21 | has succeeded or failed. A large number of jobs with `GONE` status can indicate problems
22 | with Batchiepatchie or AWS Batch setup but by itself it is harmless.
23 |
24 | * `TERMINATED`: This is the same as `FAILED` but if the job exit code
25 | indicates `SIGKILL` type of exit, then instead of `FAILED`, we display the
26 | text `TERMINATED`. This often means the job was killed by "Terminate job"
27 | button, timeouts or out of memory.
28 |
29 |
--------------------------------------------------------------------------------
/frontend/src/stores/index.js:
--------------------------------------------------------------------------------
1 | import {
2 | createStore,
3 | combineReducers,
4 | applyMiddleware,
5 | compose
6 | } from 'redux';
7 | import thunk from 'redux-thunk';
8 | import { browserHistory } from 'react-router';
9 | import { routerReducer, routerMiddleware } from 'react-router-redux';
10 |
11 | // Reducers
12 | import jobReducer from './job';
13 | import layoutReducer from './layout';
14 | import statusReducer from './status';
15 | import jobQueueReducer from './jobqueue';
16 |
17 | const rootReducer = combineReducers({
18 | job: jobReducer,
19 | jobqueue: jobQueueReducer,
20 | layout: layoutReducer,
21 | routing: routerReducer,
22 | status: statusReducer
23 | });
24 |
25 | const finalCreateStore = compose(
26 | applyMiddleware(routerMiddleware(browserHistory), thunk),
27 | window.devToolsExtension && process.env.NODE_ENV === 'development' ? window.devToolsExtension() : f => f
28 | )(createStore);
29 |
30 | export default function configureStore(initialState) {
31 | return finalCreateStore(rootReducer, initialState);
32 | };
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie
2 | ---------------
3 |
4 | Batchiepatchie is a service built on top of AWS Batch that collects information
5 | on all the jobs that are running and makes them easily searchable through a
6 | beautiful user interface. Internally, Batchiepatchie mirrors the state of AWS
7 | Batch in a PostgreSQL database. It can scale for millions of jobs and for many
8 | use cases is a substantial improvement over AWS Management Console for AWS
9 | Batch.
10 |
11 | 
12 |
13 | There is [detailed documentation](docs/docs/index.md) located inside our docs directory that describe features of Batchiepatchie and
14 | deployment instructions.
15 |
16 | How to contribute
17 | -----------------
18 |
19 | Simply open issues or pull requests on this GitHub repository. Contributors
20 | need to sign a CLA; we have an automatic CLA assistant to make this process as
21 | seamless as possible and it should appear when you open your pull request.
22 |
23 | License
24 | -------
25 |
26 | Batchiepatchie is licensed under MIT license.
27 |
--------------------------------------------------------------------------------
/frontend/src/utils/getChartColor.js:
--------------------------------------------------------------------------------
1 | const CHART_COLORS = [
2 | '#FF0000',
3 | '#7F0000',
4 | '#FFA280',
5 | '#806C60',
6 | '#FF8800',
7 | '#FFE1BF',
8 | '#996600',
9 | '#FFCC00',
10 | '#66644D',
11 | '#4C4700',
12 | '#EEFF00',
13 | '#66FF00',
14 | '#7DB359',
15 | '#8FBFA3',
16 | '#005930',
17 | '#00FFAA',
18 | '#00EEFF',
19 | '#003C40',
20 | '#00AAFF',
21 | '#738C99',
22 | '#004480',
23 | '#0066FF',
24 | '#0000FF',
25 | '#0000BF',
26 | '#1A1966',
27 | '#C8BFFF',
28 | '#9559B3',
29 | '#CC00FF',
30 | '#590047',
31 | '#FF00AA',
32 | '#FFBFEA',
33 | '#A65369',
34 | '#FF4059',
35 | '#400009',
36 | ];
37 |
38 | // Persist colors for consistency
39 | const savedColors = {};
40 | let index = 0;
41 |
42 | export default function getChartColor(value) {
43 | if (savedColors[value]) {
44 | return savedColors[value];
45 | }
46 | const color = CHART_COLORS[index % CHART_COLORS.length];
47 | savedColors[value] = color;
48 | index++;
49 | return color;
50 | }
51 |
--------------------------------------------------------------------------------
/migrations/00021_single_trigram_index.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 | DROP INDEX trgm_idx_jobs_job_id;
4 | DROP INDEX trgm_idx_jobs_job_name;
5 | DROP INDEX trgm_idx_jobs_job_queue;
6 | DROP INDEX trgm_idx_jobs_image;
7 | DROP INDEX trgm_idx_jobs_command_line;
8 | DROP INDEX trgm_idx_jobs_job_definition;
9 |
10 | CREATE INDEX trgm_idx_jobs ON jobs USING gin (
11 | (job_id || job_name || job_queue || image || command_line || job_definition) gin_trgm_ops
12 | );
13 |
14 | -- +goose Down
15 | -- SQL in this section is executed when the migration is rolled back.
16 | DROP INDEX trgm_idx_jobs;
17 |
18 | CREATE INDEX trgm_idx_jobs_job_id ON jobs USING gin (job_id gin_trgm_ops);
19 | CREATE INDEX trgm_idx_jobs_job_name ON jobs USING gin (job_name gin_trgm_ops);
20 | CREATE INDEX trgm_idx_jobs_job_queue ON jobs USING gin (job_queue gin_trgm_ops);
21 | CREATE INDEX trgm_idx_jobs_image ON jobs USING gin (image gin_trgm_ops);
22 | CREATE INDEX trgm_idx_jobs_command_line ON jobs USING gin (command_line gin_trgm_ops);
23 | CREATE INDEX trgm_idx_jobs_job_definition ON jobs USING gin (job_definition gin_trgm_ops);
24 |
--------------------------------------------------------------------------------
/frontend/src/pages/StatsPage/StatsPage.scss:
--------------------------------------------------------------------------------
1 | .stats-page {
2 | label {
3 | margin-left: 12px;
4 | }
5 |
6 | .actions {
7 | float: right;
8 | margin-bottom: 12px;
9 |
10 | select {
11 | float: right;
12 | margin-right: 12px;
13 | height: 42px;
14 | line-height: 42px;
15 | width: auto;
16 | }
17 |
18 | // react-datetime
19 | .rdt {
20 | display: inline-block;
21 | margin-left: 6px;
22 |
23 | .rdtPicker {
24 | margin-left: -78px;
25 | }
26 | }
27 | }
28 |
29 | .color-block {
30 | height: 9px;
31 | width: 9px;
32 | display: inline-block;
33 | margin-right: 6px;
34 | }
35 |
36 | td {
37 | vertical-align: middle;
38 |
39 |
40 | .status-column {
41 | width: 105px;
42 | }
43 | }
44 |
45 | tr:last-of-type {
46 | font-weight: bold;
47 | }
48 |
49 | select.metric-picker {
50 | margin-top: -7px;
51 | margin-right: 0;
52 | margin-left: 6px;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 AdRoll, Inc. and Batchiepatchie contributors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 | of the Software, and to permit persons to whom the Software is furnished to do
10 | so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/migrations/00001_jobs.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 |
4 | CREATE TABLE jobs (
5 | job_id CHAR(36) NOT NULL PRIMARY KEY,
6 | job_name TEXT NOT NULL,
7 | job_definition TEXT NOT NULL,
8 | job_queue TEXT NOT NULL,
9 | image TEXT NOT NULL,
10 | status VARCHAR(9) NOT NULL,
11 | created_at timestamp with time zone NOT NULL,
12 | stopped_at timestamp with time zone,
13 | vcpus INTEGER NOT NULL,
14 | memory INTEGER NOT NULL,
15 | timeout INTEGER,
16 | command_line TEXT NOT NULL,
17 | last_updated timestamp with time zone NOT NULL
18 | );
19 |
20 | CREATE INDEX jobs_created_at_timestamp ON jobs (created_at);
21 | CREATE INDEX jobs_stopped_at_timestamp ON jobs (stopped_at);
22 | CREATE INDEX jobs_last_updated_timestamp ON jobs (last_updated);
23 |
24 | -- +goose Down
25 | -- SQL in this section is executed when the migration is rolled back.
26 |
27 | DROP INDEX jobs_created_at_timestamp;
28 | DROP INDEX jobs_stopped_at_timestamp;
29 | DROP INDEX jobs_last_updated_timestamp;
30 | DROP TABLE jobs;
31 |
32 |
--------------------------------------------------------------------------------
/frontend/src/components/StatusFormatter/StatusFormatter.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { STATUSES, STATUS_LABELS } from 'stores/job';
3 | import './StatusFormatter.scss';
4 |
5 | export const STATUS_CLASSES = {
6 | [STATUSES.SUBMITTED]: 'alert alert-info',
7 | [STATUSES.PENDING]: 'alert alert-info',
8 | [STATUSES.RUNNABLE]: 'alert alert-info',
9 | [STATUSES.STARTING]: 'alert alert-warning',
10 | [STATUSES.RUNNING]: 'alert alert-warning',
11 | [STATUSES.FAILED]: 'alert alert-danger',
12 | [STATUSES.SUCCEEDED]: 'alert alert-success',
13 | [STATUSES.GONE]: 'alert alert-gone',
14 | [STATUSES.TERMINATED]: 'alert alert-terminated'
15 | };
16 |
17 | export default class StatusFormatter extends React.Component {
18 | static propTypes = {
19 | count: PropTypes.number,
20 | value: PropTypes.string.isRequired
21 | };
22 |
23 | render() {
24 | const value = this.props.value;
25 | const count = this.props.count;
26 |
27 | return (
28 |
36 | );
37 | }
38 | };
39 |
40 |
--------------------------------------------------------------------------------
/docs/docs/timeouts.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Timeouts
2 | -------------------------
3 |
4 | Batchiepatchie can automatically terminate jobs that have timed out. This is
5 | useful since timeouts are not supported by AWS Batch itself, out-of-box, at
6 | this time.
7 |
8 | The timeout support is always turned on but it will only be exercised on jobs
9 | that have set a timeout on themselves.
10 |
11 | To have your jobs be automatically terminated by Batchiepatchie if they take
12 | too long, you need to set environment variable `PYBATCH_TIMEOUT` on them.
13 |
14 | For example, to specify 1 hour (i.e. 3600 seconds) timeout on a job, you can set:
15 |
16 | PYBATCH_TIMEOUT=3600
17 |
18 | In the job definition or job submission for AWS Batch.
19 |
20 | When Batchiepatchie is polling for jobs, if it sees any jobs that were
21 | _submitted_ to AWS Batch more than `PYBATCH_TIMEOUT` seconds ago, it will
22 | invoke `batch:TerminateJobs` on them.
23 |
24 | Be aware that in some cases, `batch:TerminateJobs` is not sufficient to
25 | actually kill a job. However, it is the best Batchiepatchie can do. Jobs that
26 | have had `batch:TerminateJobs` called on them will appear in red color on job
27 | listing. When the jobs get killed, they'll either appear as `FAILED` or
28 | `TERMINATED`.
29 |
30 | Historical note
31 | ---------------
32 |
33 | The name "PYBATCH" comes from an internal library used at AdRoll, where
34 | `pybatch` is a name of a Python library that submits jobs to AWS Batch. This
35 | library had its own concept of timeouts and later this was propagated to
36 | Batchiepatchie.
37 |
--------------------------------------------------------------------------------
/frontend/src/components/StatusSelector/StatusSelector.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { connect } from 'react-redux';
3 | import {
4 | setSelectedStatus,
5 | STATUS_ORDER
6 | } from 'stores/job';
7 | import Select from 'react-select';
8 | import './StatusSelector.scss';
9 |
10 | class StatusSelector extends React.Component {
11 | static propTypes = {
12 | selectedStatus: PropTypes.string.isRequired,
13 | setSelectedStatus: PropTypes.func.isRequired,
14 | statusOrder: PropTypes.array,
15 | };
16 |
17 | render() {
18 | const statusOrder = this.props.statusOrder || STATUS_ORDER;
19 | const statusOptions = statusOrder.map(s => ({ label: s, value: s }));
20 |
21 | return (
22 |
23 |
34 |
35 | );
36 | }
37 | };
38 |
39 |
40 | const mapStateToProps = state => ({
41 | selectedStatus: state.job.selectedStatus
42 | });
43 |
44 | const actions = {
45 | setSelectedStatus
46 | };
47 |
48 | export default connect(mapStateToProps, actions)(StatusSelector);
49 |
--------------------------------------------------------------------------------
/docker_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -euxo pipefail
4 |
5 | # This script is the entry point for the Go Reporting application when
6 | # running inside the docker container.
7 |
8 | # https://denibertovic.com/posts/handling-permissions-with-docker-volumes/
9 | # This dance let's the Docker image create directories when run locally with
10 | # docker-compose.
11 | USER_ID=${LOCAL_USER_ID:-501}
12 | useradd --shell /bin/bash -u $USER_ID -o -c "" -m user || true
13 | export HOME=/home/user
14 |
15 | OWNER=`ls -ld . | awk '{print $3}'`
16 | ME=`whoami`
17 |
18 | CHANGE_TO=user
19 | # Don't change our identity if the current files are owned by us already.
20 | if [ "${OWNER}" = "${ME}" ]; then
21 | echo "I will not change my user because my files are already owned by me."
22 | CHANGE_TO="${ME}"
23 | fi;
24 |
25 | exec gosu ${CHANGE_TO} bash <<"EOF"
26 | set -euxo pipefail
27 | export VERSION=`cat version`
28 |
29 | # Get local IP address; or just assume it is 127.0.0.1
30 | BATCHIEPATCHIE_IP=$(curl http://instance-data/latest/meta-data/local-ipv4) || BATCHIEPATCHIE_IP=127.0.0.1
31 | export BATCHIEPATCHIE_IP
32 |
33 | BUILD_ENV_ENV=${BUILD_ENV:-}
34 |
35 | if [ "${BUILD_ENV_ENV}" = "DEBUG" ]; then
36 | # Runs the Delve debugger in headless mode.
37 | dlv debug --headless=true --listen=:9999 --accept-multiclient=true
38 | fi;
39 |
40 | if [ "${BUILD_ENV_ENV}" = "PRODUCTION" ]; then
41 | sleep 5
42 | go build -buildvcs=false
43 | ./batchiepatchie
44 | else
45 | sleep 5
46 | # Runs the application through Fresh for code reloading.
47 | fresh -c fresh.conf
48 | fi;
49 | EOF
50 |
--------------------------------------------------------------------------------
/envsubstituter/envsubstituter.go:
--------------------------------------------------------------------------------
1 | package envsubstituter
2 |
3 | // This module imports a fairly simple string substitution functionality using
4 | // environment variables.
5 |
6 | // The key function is EnvironmentSubstitute(string) (string, error) This looks
7 | // for "${BLAH}" strings and replaces them with environment variables. If
8 | // environment variables are not defined, it returns an error.
9 |
10 | import (
11 | "bytes"
12 | "fmt"
13 | "os"
14 | )
15 |
16 | func EnvironmentSubstitute(subject string) (string, error) {
17 | // This thing is extremely unoptimized but right now it doesn't really
18 | // need to be fast. We iterate through the string and look for "${",
19 | // then take everything until next "}".
20 |
21 | var result bytes.Buffer
22 |
23 | for i := 0; i < len(subject); i++ {
24 | if i < len(subject)-1 && subject[i] == '$' && subject[i+1] == '{' {
25 | var env_name bytes.Buffer
26 | j := i + 2
27 | for ; j < len(subject); j++ {
28 | if subject[j] == '}' {
29 | env_value, present := os.LookupEnv(env_name.String())
30 | if !present {
31 | return "", fmt.Errorf("Environment variable '%v' is not defined. Cannot perform substitution on '%s'", env_name, subject)
32 | }
33 | result.WriteString(env_value)
34 | break
35 | } else {
36 | env_name.WriteByte(subject[j])
37 | }
38 | }
39 | if j >= len(subject) {
40 | return "", fmt.Errorf("No matching } found in '%s'", subject)
41 | }
42 | i = j
43 | continue
44 | } else {
45 | result.WriteByte(subject[i])
46 | }
47 | }
48 |
49 | return result.String(), nil
50 | }
51 |
--------------------------------------------------------------------------------
/frontend/src/components/NameFormatter/NameFormatter.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import ReactTooltip from 'react-tooltip';
3 |
4 | export default class NameFormatter extends React.Component {
5 | static propTypes = {
6 | value: PropTypes.string.isRequired,
7 | // dependentValues contains the row. It is typically set using getRowMetaData.
8 | dependentValues: PropTypes.object,
9 | id: PropTypes.string
10 | };
11 |
12 | render() {
13 | const name = this.props.value;
14 | const adaptedNameSplit = name.split('-');
15 | let adaptedName = adaptedNameSplit.slice(2, adaptedNameSplit.length).join('-')
16 | const id = this.props.id;
17 |
18 | /* drop pybatch prefix if it's there; it's just noise */
19 | if (!name.startsWith("pybatch-")) {
20 | adaptedName = name;
21 | }
22 | const job = this.props.dependentValues;
23 | return (
24 |
25 | { job && job.array_properties &&
26 |
27 |
28 | ◱
29 |
30 |
31 | Parent Array Job
32 |
33 |
34 | }
35 | { adaptedName }
36 | { id && ({ id }) }
37 |
38 | );
39 | }
40 | };
41 |
--------------------------------------------------------------------------------
/migrations/00014_add_instance_id_activity.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 |
4 | CREATE TABLE instances (
5 | appeared_at timestamp with time zone NOT NULL,
6 | disappeared_at timestamp with time zone,
7 | launched_at timestamp with time zone,
8 | ami TEXT NOT NULL,
9 | instance_id TEXT NOT NULL PRIMARY KEY,
10 | instance_type TEXT NOT NULL,
11 | compute_environment_arn TEXT NOT NULL,
12 | ecs_cluster_arn TEXT NOT NULL,
13 | availability_zone TEXT NOT NULL,
14 | spot_instance_request_id TEXT,
15 | private_ip_address TEXT,
16 | public_ip_address TEXT
17 | );
18 |
19 | CREATE TABLE instance_event_log (
20 | timestamp timestamp with time zone NOT NULL,
21 | instance_id TEXT NOT NULL,
22 | active_jobs JSONB NOT NULL,
23 | PRIMARY KEY(timestamp, instance_id)
24 | );
25 |
26 | CREATE INDEX instances_disappeared_at ON instances (disappeared_at);
27 | CREATE INDEX instances_launched_at ON instances (launched_at);
28 | CREATE INDEX instances_appeared_at ON instances (appeared_at);
29 | CREATE INDEX instance_event_log_instance_id ON instance_event_log (instance_id);
30 |
31 | -- +goose Down
32 | -- SQL in this section is executed when the migration is rolled back.
33 | DROP INDEX instances_appeared_at;
34 | DROP INDEX instance_event_log_instance_id;
35 | DROP INDEX instances_disappeared_at;
36 | DROP INDEX instances_launched_at;
37 | DROP TABLE instance_event_log;
38 | DROP TABLE instances;
39 |
40 |
--------------------------------------------------------------------------------
/docs/docs/scaling.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Scaling hack
2 | -----------------------------
3 |
4 | Batchiepatchie has a crude hack that can force the scaling up of AWS Batch
5 | compute environments based on number of jobs in a job queue.
6 |
7 | It works by adjusting the minimum cpu count on a compute environment
8 | periodically. This forces AWS Batch to scale up instances instantly up to the
9 | amount requested.
10 |
11 | This feature has no exposed UI component so if you want to make use of it, you
12 | must set it manually.
13 |
14 | 1. Log in to Batchiepatchie PostgreSQL database
15 | 2. Modify `activated_job_queues` table; you need to set `forced_scaling` to true for any job queues you want to use for scaling hack.
16 |
17 | The following line executed in `psql` would set this behavior to all job queues:
18 |
19 | ```psql
20 | UPDATE activated_job_queues SET forced_scaling = 't';
21 | ```
22 |
23 | #### Caveats
24 |
25 | * If someone in UI deactivates and then re-activates a job queue, the setting
26 | becomes reset and no scaling will occur.
27 |
28 | * The scaling is done on compute environments, yet the setting is set on job queues.
29 | If two job queues are attached to some compute environment but only one of them has
30 | `forced_scaling=t`, then the scaling will only take into account the jobs on one of the
31 | job queues.
32 |
33 | * Scaling is not supported for job queues that are attached to multiple compute environments.
34 |
35 | * The scaling only works on managed AWS Batch compute environments. It does nothing if
36 | the attached compute environment is unmanaged.
37 |
38 | Due to the fragile nature of this feature, it is, by default, disabled and out
39 | of sight. In the future, we may remove this functionality.
40 |
--------------------------------------------------------------------------------
/migrations/00018_job_status_events.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 | CREATE TABLE job_status_events (
4 | job_id CHAR(36) NOT NULL PRIMARY KEY,
5 | updated timestamp with time zone NOT NULL
6 | );
7 |
8 | -- +goose StatementBegin
9 | CREATE FUNCTION job_status_update_update() RETURNS trigger AS
10 | $body$
11 | BEGIN
12 | IF NEW.status <> OLD.status THEN
13 | INSERT INTO job_status_events ( job_id, updated ) VALUES ( NEW.job_id, now() ) ON CONFLICT ( job_id ) DO UPDATE SET updated = now();
14 | END IF;
15 | RETURN NEW;
16 | END;
17 | $body$ LANGUAGE plpgsql;
18 | -- +goose StatementEnd
19 |
20 | -- +goose StatementBegin
21 | CREATE FUNCTION job_status_update_insert() RETURNS trigger AS
22 | $body$
23 | BEGIN
24 | INSERT INTO job_status_events ( job_id, updated ) VALUES ( NEW.job_id, now() ) ON CONFLICT ( job_id ) DO UPDATE SET updated = now();
25 | RETURN NEW;
26 | END;
27 | $body$ LANGUAGE plpgsql;
28 | -- +goose StatementEnd
29 |
30 | -- +goose StatementBegin
31 | CREATE TRIGGER job_status_update_trigger_insert
32 | AFTER
33 | INSERT
34 | ON jobs
35 | FOR EACH ROW
36 | EXECUTE PROCEDURE job_status_update_insert();
37 | -- +goose StatementEnd
38 |
39 | -- +goose StatementBegin
40 | CREATE TRIGGER job_status_update_trigger_update
41 | AFTER
42 | UPDATE
43 | ON jobs
44 | FOR EACH ROW
45 | EXECUTE PROCEDURE job_status_update_update();
46 | -- +goose StatementEnd
47 |
48 | -- +goose Down
49 | -- SQL in this section is executed when the migration is rolled back.
50 | DROP TRIGGER job_status_update_trigger_insert ON jobs;
51 | DROP TRIGGER job_status_update_trigger_update ON jobs;
52 | DROP FUNCTION job_status_update_update();
53 | DROP FUNCTION job_status_update_insert();
54 | DROP TABLE job_status_events;
55 |
56 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | api:
3 | build: .
4 | environment:
5 | - BUILD_ENV=${BUILD_ENV}
6 | - BATCHIEPATCHIE_CONFIG=batchiepatchie-dockercompose-config.toml
7 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
8 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
9 | volumes:
10 | - .:/go/src/github.com/AdRoll/batchiepatchie
11 | ports:
12 | - "9999:9999"
13 | - "5454:5454"
14 | privileged: true # Privileges are dropped by docker_run.sh, privileges needed for user setup in local development
15 | ulimits:
16 | nproc: 65535
17 | nofile:
18 | soft: 90000
19 | hard: 90000
20 | depends_on:
21 | postgres:
22 | condition: service_healthy
23 | migrations:
24 | condition: service_completed_successfully
25 |
26 | migrations:
27 | build: .
28 | volumes:
29 | - .:/go/src/github.com/AdRoll/batchiepatchie
30 | command: sh -c 'cd migrations && goose postgres "user=postgres dbname=postgres sslmode=disable host=postgres password=123456" up'
31 | depends_on:
32 | postgres:
33 | condition: service_healthy
34 |
35 |
36 | postgres:
37 | image: postgres:9.6.2-alpine
38 | ports:
39 | - 5432:5432
40 | environment:
41 | POSTGRES_PASSWORD: 123456
42 | healthcheck:
43 | test: ["CMD", "pg_isready", "-U", "postgres"]
44 | interval: 5s
45 | timeout: 5s
46 | retries: 5
47 |
48 | frontend:
49 | build:
50 | context: ./frontend
51 | dockerfile: Dockerfile
52 | volumes:
53 | - ./frontend/src:/opt/frontend/src
54 | - ./frontend/public:/opt/frontend/public
55 | ports:
56 | - "8080:8080"
57 | command: npm run dev
58 |
59 | networks:
60 | default:
61 | ipam:
62 | config:
63 | - subnet: "172.29.0.0/16"
64 |
65 |
--------------------------------------------------------------------------------
/migrations/00016_revert_jobs_full_text_search.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 |
4 | DROP INDEX jobs_weighted_sv_idx;
5 | DROP TRIGGER jobs_update_tsvector on jobs;
6 | ALTER TABLE jobs DROP COLUMN weighted_search_vector;
7 |
8 | -- +goose Down
9 | -- SQL in this section is executed when the migration is rolled back.
10 |
11 | -- add a column to store the searchable info for jobs.
12 | ALTER TABLE jobs ADD COLUMN weighted_search_vector tsvector;
13 |
14 | -- updates all job entries with the searchable information;
15 | UPDATE jobs SET
16 | weighted_search_vector = x.weighted_tsv
17 | FROM (
18 | SELECT job_id,
19 | to_tsvector(jobs.job_id) ||
20 | to_tsvector(jobs.job_name) ||
21 | to_tsvector(jobs.job_definition) ||
22 | to_tsvector(jobs.job_queue) ||
23 | to_tsvector(jobs.image) ||
24 | to_tsvector(jobs.command_line) AS weighted_tsv
25 | FROM jobs
26 | ) AS x
27 | WHERE x.job_id = jobs.job_id;
28 |
29 | -- a trigger to generate searchable information for each new entry.
30 | -- +goose StatementBegin
31 | CREATE FUNCTION jobs_weighted_search_vector_trigger() RETURNS trigger AS $$
32 | begin
33 | new.weighted_search_vector :=
34 | to_tsvector(new.job_id) ||
35 | to_tsvector(new.job_name) ||
36 | to_tsvector(new.job_definition) ||
37 | to_tsvector(new.job_queue) ||
38 | to_tsvector(new.image) ||
39 | to_tsvector(new.command_line);
40 | return new;
41 | end;
42 | $$
43 | LANGUAGE plpgsql;
44 | -- +goose StatementEnd
45 |
46 | -- use the function as a trigger.
47 | CREATE TRIGGER jobs_update_tsvector BEFORE INSERT OR UPDATE
48 | ON jobs
49 | FOR EACH ROW EXECUTE PROCEDURE jobs_weighted_search_vector_trigger();
50 |
51 | -- create an index for the jobs search info.
52 | CREATE INDEX jobs_weighted_sv_idx ON jobs USING GIST(weighted_search_vector);
53 |
--------------------------------------------------------------------------------
/frontend/src/components/SectionLoader/SectionLoader.scss:
--------------------------------------------------------------------------------
1 | // Based on https://loading.io/css/
2 | .lds-spinner {
3 | color: official;
4 | display: inline-block;
5 | position: relative;
6 | width: 20px;
7 | height: 20px;
8 | }
9 | .lds-spinner div {
10 | transform-origin: 10px 10px;
11 | animation: lds-spinner 1.2s linear infinite;
12 | }
13 | .lds-spinner div:after {
14 | content: " ";
15 | display: block;
16 | position: absolute;
17 | top: 0.75px;
18 | left: 9.25px;
19 | width: 1.5px;
20 | height: 4.5px;
21 | border-radius: 20%;
22 | background: black;
23 | }
24 | .lds-spinner div:nth-child(1) {
25 | transform: rotate(0deg);
26 | animation-delay: -1.1s;
27 | }
28 | .lds-spinner div:nth-child(2) {
29 | transform: rotate(30deg);
30 | animation-delay: -1s;
31 | }
32 | .lds-spinner div:nth-child(3) {
33 | transform: rotate(60deg);
34 | animation-delay: -0.9s;
35 | }
36 | .lds-spinner div:nth-child(4) {
37 | transform: rotate(90deg);
38 | animation-delay: -0.8s;
39 | }
40 | .lds-spinner div:nth-child(5) {
41 | transform: rotate(120deg);
42 | animation-delay: -0.7s;
43 | }
44 | .lds-spinner div:nth-child(6) {
45 | transform: rotate(150deg);
46 | animation-delay: -0.6s;
47 | }
48 | .lds-spinner div:nth-child(7) {
49 | transform: rotate(180deg);
50 | animation-delay: -0.5s;
51 | }
52 | .lds-spinner div:nth-child(8) {
53 | transform: rotate(210deg);
54 | animation-delay: -0.4s;
55 | }
56 | .lds-spinner div:nth-child(9) {
57 | transform: rotate(240deg);
58 | animation-delay: -0.3s;
59 | }
60 | .lds-spinner div:nth-child(10) {
61 | transform: rotate(270deg);
62 | animation-delay: -0.2s;
63 | }
64 | .lds-spinner div:nth-child(11) {
65 | transform: rotate(300deg);
66 | animation-delay: -0.1s;
67 | }
68 | .lds-spinner div:nth-child(12) {
69 | transform: rotate(330deg);
70 | animation-delay: 0s;
71 | }
72 | @keyframes lds-spinner {
73 | 0% {
74 | opacity: 1;
75 | }
76 | 100% {
77 | opacity: 0;
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/migrations/00002_jobs_full_text_search.sql:
--------------------------------------------------------------------------------
1 | -- +goose Up
2 | -- SQL in this section is executed when the migration is applied.
3 |
4 | -- add a column to store the searchable info for jobs.
5 | ALTER TABLE jobs ADD COLUMN weighted_search_vector tsvector;
6 |
7 | -- updates all job entries with the searchable information;
8 | UPDATE jobs SET
9 | weighted_search_vector = x.weighted_tsv
10 | FROM (
11 | SELECT job_id,
12 | to_tsvector(jobs.job_id) ||
13 | to_tsvector(jobs.job_name) ||
14 | to_tsvector(jobs.job_definition) ||
15 | to_tsvector(jobs.job_queue) ||
16 | to_tsvector(jobs.image) ||
17 | to_tsvector(jobs.command_line) AS weighted_tsv
18 | FROM jobs
19 | ) AS x
20 | WHERE x.job_id = jobs.job_id;
21 |
22 | -- a trigger to generate searchable information for each new entry.
23 | -- +goose StatementBegin
24 | CREATE FUNCTION jobs_weighted_search_vector_trigger() RETURNS trigger AS $$
25 | begin
26 | new.weighted_search_vector :=
27 | to_tsvector(new.job_id) ||
28 | to_tsvector(new.job_name) ||
29 | to_tsvector(new.job_definition) ||
30 | to_tsvector(new.job_queue) ||
31 | to_tsvector(new.image) ||
32 | to_tsvector(new.command_line);
33 | return new;
34 | end;
35 | $$
36 | LANGUAGE plpgsql;
37 | -- +goose StatementEnd
38 |
39 | -- use the function as a trigger.
40 | -- +goose StatementBegin
41 | CREATE TRIGGER jobs_update_tsvector BEFORE INSERT OR UPDATE
42 | ON jobs
43 | FOR EACH ROW EXECUTE PROCEDURE jobs_weighted_search_vector_trigger();
44 | -- +goose StatementEnd
45 |
46 | -- create an index for the jobs search info.
47 | CREATE INDEX jobs_weighted_sv_idx ON jobs USING GIST(weighted_search_vector);
48 |
49 |
50 | -- +goose Down
51 | -- SQL in this section is executed when the migration is rolled back.
52 |
53 | DROP INDEX jobs_weighted_sv_idx;
54 | DROP TRIGGER jobs_update_tsvector on jobs;
55 | DROP FUNCTION jobs_weighted_search_vector_trigger CASCADE;
56 | ALTER TABLE jobs DROP COLUMN weighted_search_vector;
57 |
--------------------------------------------------------------------------------
/fetcher/fetcher.go:
--------------------------------------------------------------------------------
1 | package fetcher
2 |
3 | // This module is just a wrapper that can either fetch files out of S3 or
4 | // locally.
5 |
6 | import (
7 | "io"
8 | "os"
9 | "regexp"
10 |
11 | "github.com/AdRoll/batchiepatchie/awsclients"
12 | "github.com/aws/aws-sdk-go/aws"
13 | "github.com/aws/aws-sdk-go/aws/session"
14 | "github.com/aws/aws-sdk-go/service/s3"
15 | "github.com/aws/aws-sdk-go/service/s3/s3manager"
16 | )
17 |
18 | var s3Regex = regexp.MustCompile("^s3://([^/]+)/(.+)$")
19 |
20 | func ReadAllNoSessions(location string) ([]byte, error) {
21 | s3match := s3Regex.FindStringSubmatch(location)
22 | if s3match == nil {
23 | return readAllLocalFile(location)
24 | }
25 |
26 | // This function is like ReadAll but does not rely on awsclients package having been set up yet.
27 | ses := session.Must(session.NewSession(&aws.Config{Region: aws.String("us-east-1"), MaxRetries: aws.Int(10)}))
28 | region_loc, err := s3manager.GetBucketRegion(aws.BackgroundContext(), ses, s3match[1], "us-east-1")
29 | if err != nil {
30 | return nil, err
31 | }
32 | session := session.Must(session.NewSession(&aws.Config{Region: aws.String(region_loc)}))
33 | s3s := s3.New(session)
34 |
35 | result, err := s3s.GetObject(&s3.GetObjectInput{
36 | Bucket: aws.String(s3match[1]),
37 | Key: aws.String(s3match[2]),
38 | })
39 | if err != nil {
40 | return nil, err
41 | }
42 | defer result.Body.Close()
43 | return io.ReadAll(result.Body)
44 | }
45 |
46 | func ReadAll(location string) ([]byte, error) {
47 | s3match := s3Regex.FindStringSubmatch(location)
48 | if s3match == nil {
49 | return readAllLocalFile(location)
50 | }
51 |
52 | bucket := s3match[1]
53 | key := s3match[2]
54 |
55 | s3client, err := awsclients.GetS3ClientForBucket(bucket)
56 | if err != nil {
57 | return nil, err
58 | }
59 |
60 | result, err := s3client.GetObject(&s3.GetObjectInput{
61 | Bucket: aws.String(bucket),
62 | Key: aws.String(key),
63 | })
64 | if err != nil {
65 | return nil, err
66 | }
67 |
68 | defer result.Body.Close()
69 | return io.ReadAll(result.Body)
70 | }
71 |
72 | func readAllLocalFile(location string) ([]byte, error) {
73 | return os.ReadFile(location)
74 | }
75 |
--------------------------------------------------------------------------------
/awsclients/awsclients.go:
--------------------------------------------------------------------------------
1 | package awsclients
2 |
3 | // This module just consolidates all Client objects in one place so we don't
4 | // hammer metadata services or anything.
5 |
6 | import (
7 | "sync"
8 |
9 | "github.com/aws/aws-sdk-go/aws"
10 | "github.com/aws/aws-sdk-go/aws/session"
11 | "github.com/aws/aws-sdk-go/service/batch"
12 | "github.com/aws/aws-sdk-go/service/cloudwatchlogs"
13 | "github.com/aws/aws-sdk-go/service/ec2"
14 | "github.com/aws/aws-sdk-go/service/ecs"
15 | "github.com/aws/aws-sdk-go/service/s3"
16 | "github.com/aws/aws-sdk-go/service/s3/s3manager"
17 | )
18 |
19 | var Session *session.Session
20 | var s3B map[string]*s3.S3
21 | var s3R map[string]*s3.S3
22 | var Batch *batch.Batch
23 | var ECS *ecs.ECS
24 | var EC2 *ec2.EC2
25 | var CloudWatchLogs *cloudwatchlogs.CloudWatchLogs
26 | var S3General *s3.S3
27 |
28 | var s3Lock = &sync.Mutex{}
29 |
30 | func GetS3ClientForBucket(bucket string) (*s3.S3, error) {
31 | s3Lock.Lock()
32 |
33 | region, ok := s3B[bucket]
34 | if !ok {
35 | // Unlock the mutex for the duration of getting bucket location.
36 | s3Lock.Unlock()
37 | region_loc, err := s3manager.GetBucketRegion(aws.BackgroundContext(), Session, bucket, "us-east-1")
38 | if err != nil {
39 | return nil, err
40 | }
41 | s3Lock.Lock()
42 |
43 | region_svc, ok := s3R[region_loc]
44 | if !ok {
45 | s3Lock.Unlock()
46 | session := session.Must(
47 | session.NewSession(&aws.Config{Region: aws.String(region_loc)}))
48 | region_svc_loc := s3.New(session)
49 | s3Lock.Lock()
50 | s3R[region_loc] = region_svc_loc
51 | region_svc = region_svc_loc
52 | }
53 | s3B[bucket] = region_svc
54 | region = region_svc
55 | }
56 |
57 | s3Lock.Unlock()
58 | return region, nil
59 | }
60 |
61 | func OpenSessions(region string) error {
62 | conf := &aws.Config{
63 | Region: aws.String(region),
64 | MaxRetries: aws.Int(10),
65 | }
66 | Session = session.Must(session.NewSession(conf))
67 | Batch = batch.New(Session)
68 | S3General = s3.New(Session)
69 | ECS = ecs.New(Session)
70 | EC2 = ec2.New(Session)
71 | s3B = make(map[string]*s3.S3)
72 | s3R = make(map[string]*s3.S3)
73 | CloudWatchLogs = cloudwatchlogs.New(Session)
74 |
75 | return nil
76 | }
77 |
--------------------------------------------------------------------------------
/frontend/src/containers/LayoutContainer/LayoutContainer.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { connect } from 'react-redux';
3 | import Menu from 'components/Menu/Menu';
4 | import Search from 'components/Search/Search';
5 | import { setPageDimensions } from 'stores/layout';
6 | import './LayoutContainer.scss';
7 |
8 | class LayoutContainer extends React.Component {
9 | static propTypes = {
10 | children: PropTypes.element.isRequired,
11 | path: PropTypes.string.isRequired,
12 | setPageDimensions: PropTypes.func.isRequired
13 | };
14 |
15 | componentDidMount() {
16 | this.onResize();
17 | window.addEventListener('resize', this.onResize);
18 | }
19 |
20 | componentWillUnmount() {
21 | window.removeEventListener('resize', this.onResize);
22 | }
23 |
24 | render() {
25 | const onJobsPage = this.props.path === process.env.BASE_URL + '/';
26 | return (
27 |
28 |
29 |
30 |
Batchiepatchie
31 |
32 |
33 | {onJobsPage && }
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 | { this.props.children }
46 |
47 |
48 |
49 | );
50 | }
51 |
52 | onResize = () => {
53 | this.props.setPageDimensions({ height: window.innerHeight, width: window.innerWidth });
54 | }
55 | }
56 |
57 | const mapStateToProps = state => ({
58 | path: state.routing.locationBeforeTransitions.pathname
59 | });
60 |
61 | const actions = {
62 | setPageDimensions
63 | };
64 |
65 | export default connect(mapStateToProps, actions)(LayoutContainer);
66 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/AdRoll/batchiepatchie
2 |
3 | go 1.18
4 |
5 | require (
6 | github.com/BurntSushi/toml v1.2.0
7 | github.com/aws/aws-sdk-go v1.44.62
8 | github.com/bakatz/echo-logrusmiddleware v1.1.1
9 | github.com/gorilla/websocket v1.5.0
10 | github.com/jcftang/logentriesrus v0.0.0-20220725204439-b4dedce84d23
11 | github.com/labstack/echo v3.3.10+incompatible
12 | github.com/labstack/gommon v0.3.1
13 | github.com/lib/pq v1.10.6
14 | github.com/opentracing/opentracing-go v1.2.0
15 | github.com/sirupsen/logrus v1.9.0
16 | gopkg.in/DataDog/dd-trace-go.v1 v1.40.1
17 | )
18 |
19 | require (
20 | github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583 // indirect
21 | github.com/DataDog/datadog-go v4.8.2+incompatible // indirect
22 | github.com/DataDog/datadog-go/v5 v5.0.2 // indirect
23 | github.com/DataDog/sketches-go v1.2.1 // indirect
24 | github.com/Microsoft/go-winio v0.5.1 // indirect
25 | github.com/SpalkLtd/le_go v0.0.0-20220711045526-8feb6e635941 // indirect
26 | github.com/cespare/xxhash/v2 v2.1.2 // indirect
27 | github.com/dgraph-io/ristretto v0.1.0 // indirect
28 | github.com/dustin/go-humanize v1.0.0 // indirect
29 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect
30 | github.com/google/uuid v1.3.0 // indirect
31 | github.com/jmespath/go-jmespath v0.4.0 // indirect
32 | github.com/josharian/intern v1.0.0 // indirect
33 | github.com/mailru/easyjson v0.7.7 // indirect
34 | github.com/mattn/go-colorable v0.1.11 // indirect
35 | github.com/mattn/go-isatty v0.0.14 // indirect
36 | github.com/philhofer/fwd v1.1.1 // indirect
37 | github.com/pkg/errors v0.9.1 // indirect
38 | github.com/tinylib/msgp v1.1.2 // indirect
39 | github.com/valyala/bytebufferpool v1.0.0 // indirect
40 | github.com/valyala/fasttemplate v1.2.1 // indirect
41 | golang.org/x/crypto v0.0.0-20220214200702-86341886e292 // indirect
42 | golang.org/x/net v0.0.0-20220225172249-27dd8689420f // indirect
43 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
44 | golang.org/x/text v0.3.7 // indirect
45 | golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 // indirect
46 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
47 | google.golang.org/protobuf v1.27.1 // indirect
48 | )
49 |
--------------------------------------------------------------------------------
/docs/docs/overview.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Overview
2 | =========================
3 |
4 | Batchiepatchie is a monitoring tool for AWS Batch. It is written in the Go
5 | language.
6 |
7 | AWS Batch is a service, provided by Amazon Web Services, that runs docker
8 | containers on EC2 instances. Typically, these EC2 instances are brought up when
9 | batch jobs are submitted and scaled down when there are no jobs to run. On high
10 | level, you tell AWS Batch "Please run my docker container located at URL X,
11 | with N cpus and M gigabytes of memory" and AWS Batch will figure it out.
12 | Detailed documentation on AWS Batch can be found on [their
13 | website](https://aws.amazon.com/documentation/batch/).
14 |
15 | Batchiepatchie exists because the user interface on Amazon's own dashboard leaves
16 | certain things to be desired. In particular, Batchiepatchie strives to make the following
17 | use cases easier:
18 |
19 | * Find currently running and historical jobs very quickly among thousands of other jobs.
20 |
21 | * Find and read the logs of any job without having to navigate through a complicated UI.
22 |
23 | * Work around some quirks in AWS Batch itself.
24 |
25 | * Implement timeouts for AWS Batch jobs.
26 |
27 | * Collect historical information about jobs.
28 |
29 | * Make it easy to cancel jobs en-masse.
30 |
31 | Batchiepatchie has a search box that is designed to work fast with free-form
32 | text. Batchiepatchie will also remember jobs forever, so you should be able to
33 | find jobs even from months in the past in seconds.
34 |
35 | AWS Batch jobs place standard output and error from jobs into CloudWatch logs.
36 | Batchiepatchie knows how to find these logs and display directly in its web
37 | interface, saving valuable time when you need to read the logs of a batch job.
38 |
39 | Batchiepatchie has some features to cancel many jobs at once. This is useful
40 | when someone submits a large distributed job by mistake and it needs to be
41 | killed.
42 |
43 | Batchiepatchie collects data about instances and ECS clusters used by batch
44 | jobs in a PostgreSQL database. The data can later be used to analyze the costs
45 | and behaviour of batch jobs.
46 |
47 | One major feature of AWS Batch that is not currently properly supported in
48 | Batchiepatchie is array jobs. The parent job will show up but child jobs will
49 | not display properly.
50 |
--------------------------------------------------------------------------------
/frontend/src/index.scss:
--------------------------------------------------------------------------------
1 | $icon-font-path: "~bootstrap-sass/assets/fonts/bootstrap/";
2 | @import "~bootswatch/dist/simplex/variables";
3 | @import "~bootstrap/scss/bootstrap";
4 | @import "~bootswatch/dist/simplex/bootswatch";
5 |
6 | body {
7 | }
8 |
9 | .alert-warning {
10 | background: #ffb;
11 | border: solid 1px #efef77;
12 | }
13 |
14 | .alert-danger {
15 | background: #faa;
16 | border: solid 1px #eaa;
17 | }
18 |
19 | .nav {
20 | font-weight: bold;
21 | li {
22 | margin: 10px;
23 | }
24 | .active a {
25 | text-decoration: underline;
26 | color: #000;
27 | }
28 | }
29 |
30 | .pagination {
31 | display: inline-block;
32 | margin-top: 10px;
33 | text-align: center;
34 | font-size: 1.2em;
35 | .disabled {
36 | color: #aaa;
37 | }
38 | li {
39 | padding: 10px;
40 | border-top: 1px solid #aaa;
41 | border-bottom: 1px solid #aaa;
42 | float: left;
43 | }
44 | li :hover {
45 | cursor: pointer;
46 | }
47 | }
48 |
49 | pre {
50 | padding: 8px;
51 | background: #f0f0f0;
52 | border: 1px solid #aaa;
53 | }
54 |
55 | .terminal {
56 | background: #f0f0f0;
57 | }
58 |
59 | .col-xs-5ths, .col-xs-2-5ths, .col-xs-3-5ths {
60 | position: relative;
61 | min-height: 1px;
62 | padding-right: 15px;
63 | padding-left: 15px;
64 | }
65 |
66 | .col-xs-5ths {
67 | width: 20%;
68 | float: left;
69 | }
70 |
71 | .col-xs-2-5ths {
72 | width: 40%;
73 | float: left;
74 | }
75 |
76 | .col-xs-3-5ths {
77 | width: 60%;
78 | float: left;
79 | }
80 |
81 | .clear {
82 | clear: both;
83 | }
84 |
85 | .Select-menu-outer {
86 | max-height: 400px;
87 | }
88 | .Select-menu {
89 | max-height: 400px;
90 | }
91 |
92 | // override stickiness of checkbox-select frozen header row:
93 | // https://github.com/adazzle/react-data-grid/issues/1386#issuecomment-510532495
94 | // the full select column isn't frozen so alignment gets off during horizontal scrolling
95 | .react-grid-Row .react-grid-Cell--frozen {
96 | transform: translate3d(0px, 0px, 0px) !important;
97 | }
98 |
99 | .react-grid-HeaderRow .react-grid-HeaderCell--frozen {
100 | transform: translate3d(0px, 0px, 0px) !important;
101 | }
102 |
--------------------------------------------------------------------------------
/jobs/killer_handler.go:
--------------------------------------------------------------------------------
1 | package jobs
2 |
3 | import (
4 | "github.com/AdRoll/batchiepatchie/awsclients"
5 | "github.com/aws/aws-sdk-go/aws"
6 | "github.com/aws/aws-sdk-go/service/batch"
7 | "github.com/aws/aws-sdk-go/service/ec2"
8 | "github.com/opentracing/opentracing-go"
9 | log "github.com/sirupsen/logrus"
10 | )
11 |
12 | type KillerHandler struct {
13 | }
14 |
15 | func (th *KillerHandler) KillOne(jobID string, reason string, store Storer) error {
16 | span := opentracing.StartSpan("KillOne")
17 | defer span.Finish()
18 |
19 | input := &batch.TerminateJobInput{
20 | JobId: aws.String(jobID),
21 | Reason: aws.String("Cancelled job from batchiepatchie: " + reason),
22 | }
23 |
24 | log.Info("Killing Job ", jobID, "...")
25 | _, err := awsclients.Batch.TerminateJob(input)
26 | if err != nil {
27 | log.Warning("Killing job failed: ", err)
28 | return err
29 | }
30 |
31 | return store.UpdateJobLogTerminationRequested(jobID)
32 | }
33 |
34 | func (th *KillerHandler) KillInstances(instances []string) error {
35 | span := opentracing.StartSpan("KillInstances")
36 | defer span.Finish()
37 |
38 | // Exit early if there are no instances to kill
39 | if len(instances) == 0 {
40 | return nil
41 | }
42 | /* While the terminate instances accepts batches, we deliberately call
43 | * it one instance at a time. The API call won't terminate anything if
44 | * even one of the instance IDs is wrong but we still do want to
45 | * terminate the others.
46 |
47 | This shouldn't be too inefficient since most of the time there's only
48 | one or two instances to terminate this way anyway. */
49 |
50 | var final_ret error
51 |
52 | for _, instance_id := range instances {
53 | instances_ptr := make([]*string, 1)
54 | instances_ptr[0] = &instance_id
55 | terminate_instances := &ec2.TerminateInstancesInput{
56 | InstanceIds: instances_ptr,
57 | }
58 | _, err := awsclients.EC2.TerminateInstances(terminate_instances)
59 | if err != nil {
60 | log.Warning("Cannot terminate instance ", instance_id, ": ", err)
61 | // Don't return early but record the error
62 | final_ret = err
63 | }
64 | log.Info("Terminated instance ", instance_id, " because it has a job at STARTING state stuck.")
65 | }
66 |
67 | return final_ret
68 | }
69 |
70 | func NewKillerHandler() (Killer, error) {
71 | var ret Killer = new(KillerHandler)
72 | return ret, nil
73 | }
74 |
--------------------------------------------------------------------------------
/frontend/src/stores/jobqueue.js:
--------------------------------------------------------------------------------
1 | import actionReducer from 'utils/actionReducer';
2 | import JobsApi from 'api/api';
3 | import { fetchDataMultiple, JOB_QUEUES_ALL, JOB_QUEUES_ACTIVATED } from './status';
4 |
5 | export const SET_JOB_QUEUE_ACTIVATED_QUEUES = 'SET_JOB_QUEUE_ACTIVATED_QUEUES';
6 | export const SET_JOB_QUEUE_ALL_QUEUES = 'SET_JOB_QUEUE_ALL_QUEUES';
7 |
8 | const initialState = {
9 | allJobQueues: [],
10 | activatedJobQueues: []
11 | };
12 |
13 | const actions = {};
14 |
15 | actions[SET_JOB_QUEUE_ACTIVATED_QUEUES] = (state, { payload }) => {
16 | return {
17 | ...state,
18 | activatedJobQueues: payload
19 | };
20 | };
21 |
22 | actions[SET_JOB_QUEUE_ALL_QUEUES] = (state, { payload }) => {
23 | return {
24 | ...state,
25 | allJobQueues: payload
26 | };
27 | };
28 |
29 | export function setJobQueues(job_queues) {
30 | return {
31 | type: SET_JOB_QUEUE_ACTIVATED_QUEUES,
32 | payload: job_queues
33 | };
34 | };
35 |
36 | export function setAllJobQueues(job_queues) {
37 | return {
38 | type: SET_JOB_QUEUE_ALL_QUEUES,
39 | payload: job_queues
40 | };
41 | };
42 |
43 | export function fetchJobQueues() {
44 | return fetchDataMultiple([
45 | {
46 | status: JOB_QUEUES_ACTIVATED,
47 | fetch: fetchJobQueuesInner,
48 | result: setJobQueues
49 | }
50 | ]);
51 | }
52 |
53 | export function fetchAllJobQueues() {
54 | return fetchDataMultiple([
55 | {
56 | status: JOB_QUEUES_ALL,
57 | fetch: fetchAllJobQueuesInner,
58 | result: setAllJobQueues
59 | }
60 | ]);
61 | }
62 |
63 | export function activateJobQueue(job_queue_name) {
64 | return (dispatch, getState) => {
65 | return JobsApi.activateJobQueue(job_queue_name);
66 | };
67 | }
68 |
69 | export function deactivateJobQueue(job_queue_name) {
70 | return (dispatch, getState) => {
71 | return JobsApi.deactivateJobQueue(job_queue_name);
72 | };
73 | }
74 |
75 | function fetchJobQueuesInner() {
76 | return (dispatch, getState) => {
77 | const state = getState();
78 | return JobsApi.getJobQueues();
79 | };
80 | };
81 |
82 | function fetchAllJobQueuesInner() {
83 | return (dispatch, getState) => {
84 | const state = getState();
85 | return JobsApi.getAllJobQueues();
86 | };
87 | };
88 |
89 | // Root reducer
90 | export default actionReducer(actions, initialState);
91 |
--------------------------------------------------------------------------------
/jobs/compute_environment_monitor.go:
--------------------------------------------------------------------------------
1 | package jobs
2 |
3 | import (
4 | "github.com/AdRoll/batchiepatchie/awsclients"
5 | "github.com/aws/aws-sdk-go/service/batch"
6 | "github.com/opentracing/opentracing-go"
7 | log "github.com/sirupsen/logrus"
8 | )
9 |
10 | func GetComputeEnvironments(parentSpan opentracing.Span) ([]ComputeEnvironment, error) {
11 | span := opentracing.StartSpan("GetComputeEnvironments", opentracing.ChildOf(parentSpan.Context()))
12 | defer span.Finish()
13 |
14 | var nextToken *string
15 | var hundred int64
16 |
17 | compute_environments := make([]*batch.ComputeEnvironmentDetail, 0)
18 |
19 | for {
20 | hundred = 100
21 | out, err := awsclients.Batch.DescribeComputeEnvironments(&batch.DescribeComputeEnvironmentsInput{
22 | MaxResults: &hundred,
23 | NextToken: nextToken,
24 | })
25 | if err != nil {
26 | log.Warning("Failed to fetch compute environments: ", err)
27 | return nil, err
28 | }
29 | nextToken = out.NextToken
30 |
31 | compute_environments = append(compute_environments, out.ComputeEnvironments...)
32 |
33 | if nextToken == nil {
34 | break
35 | }
36 | }
37 |
38 | /* Transform into our internal format, which is a bit nicer */
39 | ce_lst := make([]ComputeEnvironment, 0)
40 | for _, ce_aws := range compute_environments {
41 | if ce_aws.ComputeEnvironmentName != nil &&
42 | ce_aws.ComputeResources != nil &&
43 | ce_aws.ServiceRole != nil &&
44 | ce_aws.State != nil &&
45 | ce_aws.ComputeResources.MaxvCpus != nil &&
46 | ce_aws.ComputeResources.MinvCpus != nil &&
47 | ce_aws.ComputeResources.DesiredvCpus != nil {
48 | ce := ComputeEnvironment{
49 | Name: *ce_aws.ComputeEnvironmentName,
50 | WantedvCpus: *ce_aws.ComputeResources.DesiredvCpus,
51 | MinvCpus: *ce_aws.ComputeResources.MinvCpus,
52 | MaxvCpus: *ce_aws.ComputeResources.MaxvCpus,
53 | State: *ce_aws.State,
54 | ServiceRole: *ce_aws.ServiceRole}
55 | ce_lst = append(ce_lst, ce)
56 | }
57 | }
58 |
59 | return ce_lst, nil
60 | }
61 |
62 | func MonitorComputeEnvironments(fs Storer, queues []string) {
63 | span := opentracing.StartSpan("MonitorComputeEnvironments")
64 | defer span.Finish()
65 |
66 | if len(queues) == 0 {
67 | return
68 | }
69 |
70 | compute_environments, err := GetComputeEnvironments(span)
71 | if err != nil {
72 | log.Warning("Failed to get compute environments: ", err)
73 | return
74 | }
75 |
76 | err = fs.UpdateComputeEnvironmentsLog(compute_environments)
77 | if err != nil {
78 | log.Warning("Failed to update compute environments log: ", err)
79 | return
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/frontend/src/stores/status.js:
--------------------------------------------------------------------------------
1 | import actionReducer from 'utils/actionReducer';
2 |
3 | // Action names
4 | export const SET_ERROR_STATE = 'SET_ERROR_STATE';
5 | export const SET_LOADING_STATE = 'SET_LOADING_STATE';
6 |
7 | // Constants
8 | export const JOB = 'JOB';
9 | export const JOBS = 'JOBS';
10 | export const LOGS = 'LOGS';
11 | export const STATS = 'STATS';
12 | export const JOB_QUEUES_ALL = 'JOB_QUEUES_ALL';
13 | export const JOB_QUEUES_ACTIVATED = 'JOB_QUEUES_ACTIVATED';
14 | export const STATUSES = [
15 | JOB,
16 | JOBS,
17 | LOGS,
18 | STATS,
19 | JOB_QUEUES_ALL,
20 | JOB_QUEUES_ACTIVATED
21 | ];
22 |
23 | export function setErrorState(namespace, error) {
24 | return {
25 | type: SET_ERROR_STATE,
26 | payload: {
27 | namespace,
28 | error
29 | }
30 | };
31 | };
32 |
33 | export function setLoadingState(namespace, loading) {
34 | return {
35 | type: SET_LOADING_STATE,
36 | payload: {
37 | namespace,
38 | loading
39 | }
40 | };
41 | };
42 |
43 | const initialState = STATUSES.reduce((state, status) => {
44 | state[status] = {
45 | loading: true,
46 | error: false
47 | };
48 | return state;
49 | }, {});
50 |
51 | const actions = {};
52 |
53 | actions[SET_ERROR_STATE] = (state, { payload: { namespace, error } }) => {
54 | return {
55 | ...state,
56 | [namespace]: {
57 | ...state[namespace],
58 | error
59 | }
60 | };
61 | };
62 |
63 | actions[SET_LOADING_STATE] = (state, { payload: { namespace, loading } }) => {
64 | return {
65 | ...state,
66 | [namespace]: {
67 | ...state[namespace],
68 | loading
69 | }
70 | };
71 | };
72 |
73 |
74 | function fetchData(dispatch, { status, fetch, result, options }) {
75 | const setLoadingError = (loading, error) => {
76 | dispatch(setErrorState(status, error));
77 | dispatch(setLoadingState(status, loading));
78 | };
79 | setLoadingError(true, false);
80 |
81 | return dispatch(fetch(options))
82 | .then(data => {
83 | dispatch(result(data));
84 | setLoadingError(false, false);
85 | })
86 | .catch((e) => {
87 | console.error(e);
88 | setLoadingError(false, true);
89 | });
90 | };
91 |
92 | export function fetchDataMultiple(fetchDataArguments) {
93 | return (dispatch, getState) => {
94 | const state = getState();
95 | const promises = fetchDataArguments.map(fetchDataArgument => fetchData(dispatch, fetchDataArgument));
96 | const promise = Promise.all(promises);
97 | return promise;
98 | };
99 | }
100 |
101 | export default actionReducer(actions, initialState);
102 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "batchiepatchie-frontend",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "bootstrap": "4.3.1",
7 | "bootstrap-sass": "3.4.1",
8 | "bootswatch": "4.1.1",
9 | "classnames": "2.2.5",
10 | "font-awesome": "4.7.0",
11 | "humanize-duration": "3.18.0",
12 | "mixin-deep": "1.3.2",
13 | "moment": "^2.18.1",
14 | "moment-timezone": "^0.5.13",
15 | "numeral": "2.0.6",
16 | "promise-polyfill": "6.0.2",
17 | "query-string": "4.3.4",
18 | "react": "15.4.2",
19 | "react-addons-pure-render-mixin": "15.4.2",
20 | "react-addons-shallow-compare": "15.4.2",
21 | "react-bootstrap": "0.31.0",
22 | "react-data-grid": "^5.0.0",
23 | "react-datetime": "2.16.3",
24 | "react-dom": "15.4.2",
25 | "react-highlight-words": "0.11.0",
26 | "react-input-autosize": "2.2.1",
27 | "react-redux": "5.0.5",
28 | "react-router": "3.0.5",
29 | "react-router-redux": "4.0.8",
30 | "react-select": "1.2.1",
31 | "react-virtualized": "9.8.0",
32 | "recharts": "1.6.2",
33 | "redux": "3.7.0",
34 | "redux-thunk": "^2.2.0"
35 | },
36 | "devDependencies": {
37 | "babel-core": "6.25.0",
38 | "babel-eslint": "7.2.3",
39 | "babel-loader": "7.1.0",
40 | "babel-plugin-lodash": "^3.3.4",
41 | "babel-plugin-recharts": "1.2.1",
42 | "babel-plugin-transform-async-to-generator": "6.24.1",
43 | "babel-plugin-transform-class-properties": "6.24.1",
44 | "babel-plugin-transform-decorators": "6.24.1",
45 | "babel-plugin-transform-decorators-legacy": "1.3.4",
46 | "babel-plugin-transform-object-rest-spread": "6.23.0",
47 | "babel-plugin-transform-react-remove-prop-types": "0.4.6",
48 | "babel-polyfill": "6.23.0",
49 | "babel-preset-es2015": "6.18.0",
50 | "babel-preset-react": "6.16.0",
51 | "babel-preset-react-app": "3.0.0",
52 | "babel-preset-stage-2": "6.24.1",
53 | "babel-runtime": "6.23.0",
54 | "chalk": "1.1.3",
55 | "css-loader": "0.28.4",
56 | "eslint": "4.18.2",
57 | "eslint-config-react-app": "1.0.4",
58 | "eslint-loader": "1.8.0",
59 | "eslint-plugin-flowtype": "2.34.0",
60 | "eslint-plugin-import": "2.6.0",
61 | "eslint-plugin-jsx-a11y": "5.0.3",
62 | "eslint-plugin-react": "7.1.0",
63 | "extract-text-webpack-plugin": "2.1.2",
64 | "file-loader": "0.11.2",
65 | "fs-extra": "3.0.1",
66 | "html-webpack-plugin": "2.29.0",
67 | "node-sass": "4.14.1",
68 | "react-tooltip": "4.2.21",
69 | "react-dev-utils": "3.1.2",
70 | "react-error-overlay": "1.0.7",
71 | "sass-loader": "6.0.7",
72 | "source-map-loader": "0.2.1",
73 | "style-loader": "0.18.2",
74 | "url-loader": "0.5.9",
75 | "webpack": "3.12.0",
76 | "webpack-cli": "2.1.5",
77 | "webpack-dev-server": "2.11.5",
78 | "webpack-manifest-plugin": "1.1.0",
79 | "whatwg-fetch": "2.0.3"
80 | },
81 | "scripts": {
82 | "dev": "webpack-dev-server --host 0.0.0.0",
83 | "build": "webpack",
84 | "build:dist": "webpack -p"
85 | },
86 | "babel": {
87 | "presets": [
88 | "react-app"
89 | ]
90 | },
91 | "eslintConfig": {
92 | "extends": "react-app"
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/frontend/src/api/api.js:
--------------------------------------------------------------------------------
1 | import jobs from './jobs.json';
2 |
3 | class API {
4 | static baseURL = process.env.API_BASE_URL;
5 |
6 | getJob(id) {
7 | return this.get(this.joinUrls(`jobs/${id}`));
8 | }
9 |
10 | getJobs(params) {
11 | return this.get(this.joinUrls('jobs', params));
12 | }
13 |
14 | getLogs(id) {
15 | // This is some machinery to turn text response into a list of {
16 | // 'Message': line } objects.
17 | function to_text(response) {
18 | return response.text();
19 | }
20 | function parse_text(text) {
21 | let parsed = [];
22 | const lines = text.split(/\n/);
23 | for (let line in lines) {
24 | parsed.push({ 'Message': lines[line] });
25 | }
26 | return new Promise((resolve, reject) => resolve(parsed));
27 | }
28 | return window.fetch(this.joinUrls(`jobs/${id}/logs?format=text`), { 'method': 'GET' }).then(this.checkStatus).then(to_text).then(parse_text);
29 | }
30 |
31 | getStats(params) {
32 | return this.get(this.joinUrls('jobs/stats', params));
33 | }
34 |
35 | getJobQueues() {
36 | return this.get(this.joinUrls('job_queues/active'));
37 | }
38 |
39 | getAllJobQueues() {
40 | return this.get(this.joinUrls('job_queues/all'));
41 | }
42 |
43 | activateJobQueue(job_queue_name) {
44 | return this.post(this.joinUrls(`job_queues/${job_queue_name}/activate`), []);
45 | }
46 |
47 | deactivateJobQueue(job_queue_name) {
48 | return this.post(this.joinUrls(`job_queues/${job_queue_name}/deactivate`), []);
49 | }
50 |
51 | killJobs(ids) {
52 | return this.post(this.joinUrls('jobs/kill'), { ids });
53 | }
54 |
55 | get(url) {
56 | return this.fetch('get', url);
57 | }
58 |
59 | post(url, body) {
60 | return this.fetch('post', url, JSON.stringify(body));
61 | }
62 |
63 | put(url, body) {
64 | return this.fetch('put', url, JSON.stringify(body));
65 | }
66 |
67 | delete(url) {
68 | return this.fetch('delete', url);
69 | }
70 |
71 | fetch(method, url, body) {
72 | return window.fetch(url, { method, body })
73 | .then(this.checkStatus)
74 | .then(this.parseJSON);
75 | }
76 |
77 | checkStatus(response) {
78 | // Request is good
79 | if (response.ok) {
80 | return response;
81 | }
82 |
83 | // Request failed
84 | const error = new Error(response.statusText);
85 | error.response = response;
86 | throw error;
87 | }
88 |
89 | parseJSON(response) {
90 | return response.json();
91 | };
92 |
93 | joinUrls(endpoint, params) {
94 | const formattedParams = params ?
95 | '?' + this.formatQueryParams(params) :
96 | '';
97 |
98 | return `${API.baseURL}/${endpoint}${formattedParams}`;
99 | }
100 |
101 | formatQueryParams(params) {
102 | return Object.keys(params)
103 | .filter(k => !!params[k])
104 | .map(k => encodeURIComponent(k) + '=' + encodeURIComponent(params[k]))
105 | .join('&');
106 | }
107 | }
108 |
109 | export default new API();
110 |
--------------------------------------------------------------------------------
/handlers/job_status_subscriptions.go:
--------------------------------------------------------------------------------
1 | package handlers
2 |
3 | import (
4 | "encoding/json"
5 | "time"
6 |
7 | "github.com/AdRoll/batchiepatchie/jobs"
8 | "github.com/gorilla/websocket"
9 | "github.com/labstack/echo"
10 | log "github.com/sirupsen/logrus"
11 | )
12 |
13 | var (
14 | upgrader = websocket.Upgrader{}
15 | )
16 |
17 | func (s *Server) SubscribeToJobEvent(c echo.Context) error {
18 | job_id := c.Param("id")
19 |
20 | ws, err := upgrader.Upgrade(c.Response(), c.Request(), nil)
21 | if err != nil {
22 | log.Warning("Invalid WebSocket attempt: ", err)
23 | return err
24 | }
25 | defer ws.Close()
26 |
27 | ws.SetReadLimit(1000) // We are not expecting to read anything so set low limit for reads
28 |
29 | events, unsubscribe := s.Storage.SubscribeToJobStatus(job_id)
30 | defer unsubscribe()
31 |
32 | // Launch a reader. We need it to detect if the connection closes
33 | // suddenly.
34 | go func() {
35 | _, _, _ = ws.ReadMessage()
36 | ws.Close() // Close is safe to run concurrently.
37 | log.Info("Stopped reading from websocket.")
38 | }()
39 |
40 | var previous_status *jobs.Job
41 | // Immediately send status update on the job. If there is such as job.
42 | job, err := s.Storage.FindOne(job_id)
43 | previous_status = job
44 | if err == nil && job != nil {
45 | marshalled, err := json.Marshal(*job)
46 | if err != nil {
47 | log.Warning("Cannot marshal job status to be sent to WebSocket: ", err)
48 | return err
49 | }
50 | now := time.Now()
51 | err = ws.SetWriteDeadline(now.Add(time.Second * 5))
52 | if err != nil {
53 | log.Warning("Cannot set write deadline: ", err)
54 | return err
55 | }
56 | err = ws.WriteMessage(websocket.TextMessage, marshalled)
57 | if err != nil {
58 | log.Warning("Cannot send job status to WebSocket: ", err)
59 | return err
60 | }
61 | }
62 |
63 | for {
64 | var job_status *jobs.Job
65 | select {
66 | case stat := <-events:
67 | job_status = &stat
68 | case <-time.After(time.Second * 5):
69 | job_status = nil
70 | }
71 |
72 | if job_status != nil {
73 | previous_status = job_status
74 | marshalled, err := json.Marshal(*job_status)
75 | if err != nil {
76 | log.Warning("Cannot marshal job status to be sent to WebSocket: ", err)
77 | return err
78 | }
79 |
80 | now := time.Now()
81 | err = ws.SetWriteDeadline(now.Add(time.Second * 5))
82 | if err != nil {
83 | log.Warning("Cannot set write deadline: ", err)
84 | return err
85 | }
86 | err = ws.WriteMessage(websocket.TextMessage, marshalled)
87 | if err != nil {
88 | log.Warning("Cannot send job status to WebSocket: ", err)
89 | return err
90 | }
91 | } else {
92 | marshalled := []byte("")
93 | if previous_status != nil {
94 | marshalled, err = json.Marshal(*previous_status)
95 | if err != nil {
96 | log.Warning("Cannot marshal job status to be set to WebSocket: ", err)
97 | return err
98 | }
99 | }
100 | now := time.Now()
101 | err = ws.SetWriteDeadline(now.Add(time.Second * 5))
102 | if err != nil {
103 | log.Warning("Cannot set write deadline: ", err)
104 | return err
105 | }
106 |
107 | err = ws.WriteMessage(websocket.TextMessage, marshalled)
108 | if err != nil {
109 | log.Warning("Cannot write to websocket: ", err)
110 | return err
111 | }
112 | }
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/frontend/src/components/Terminal/Terminal.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import AutoSizer from 'react-virtualized/dist/commonjs/AutoSizer';
3 | import List from 'react-virtualized/dist/commonjs/List';
4 | import Highlighter from "react-highlight-words";
5 | import './Terminal.scss';
6 |
7 | const LOG_ROW_HEIGHT = 18;
8 | const CHAR_WIDTH = 8;
9 |
10 | export default class Terminal extends React.Component {
11 | static propTypes = {
12 | height: PropTypes.number.isRequired,
13 | autoScrollToBottom: PropTypes.bool.isRequired,
14 | // Search text to highlight.
15 | searchText: PropTypes.string.isRequired,
16 | // Index of the row with the current search result, or -1 if not found.
17 | currentSearchRow: PropTypes.number.isRequired,
18 | log: PropTypes.array.isRequired
19 | };
20 |
21 | constructor(props) {
22 | super(props);
23 | this.state = {
24 | // key for the List component. This is incremented to force-refresh the List component.
25 | listKey: 0,
26 | };
27 | }
28 |
29 | componentDidUpdate(prevProps) {
30 | if (prevProps.searchText !== this.props.searchText || prevProps.currentSearchRow !== this.props.currentSearchRow) {
31 | // If the search text or current search row changes, force-update the List so that the
32 | // Highlighter will re-render. The List is pretty aggressive about not rendering
33 | // when it doesn't have to.
34 | const { listKey } = this.state;
35 | this.setState({listKey: listKey + 1})
36 | }
37 | }
38 |
39 | render() {
40 | const { log, height, autoScrollToBottom, currentSearchRow } = this.props;
41 | const { listKey } = this.state;
42 | const maxLength = log.reduce((memo, item) => Math.max(memo, item.length), 0);
43 | let listProps = {};
44 | if (currentSearchRow > -1) {
45 | listProps = { scrollToIndex: currentSearchRow };
46 | }
47 | if (autoScrollToBottom) {
48 | listProps = { scrollToIndex: log.length-1 };
49 | }
50 | return (
51 |
;
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/frontend/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "parser": "babel-eslint",
3 | "plugins": [
4 | "react"
5 | ],
6 | "env": {
7 | "browser": true,
8 | "node": true,
9 | "es6": true,
10 | "mocha": true
11 | },
12 | "globals": {
13 | "heap": false,
14 | "internalTrackingEnabled": false
15 | },
16 | "rules": {
17 | "block-scoped-var": 2,
18 | "brace-style": [1, "1tbs", { "allowSingleLine": true }],
19 | "comma-dangle": [2, "only-multiline"],
20 | "comma-spacing": [1, { "before": false, "after": true }],
21 | "comma-style": [1, "last"],
22 | "consistent-return": 2,
23 | "consistent-this": [1, "that"],
24 | "curly": [2, "multi-line"],
25 | "default-case": 2,
26 | "dot-notation": 1,
27 | "eol-last": 1,
28 | "eqeqeq": 2,
29 | "no-lonely-if": 2,
30 | "new-parens": 2,
31 | "no-catch-shadow": 1,
32 | "no-delete-var": 2,
33 | "no-dupe-args": 2,
34 | "no-duplicate-case": 2,
35 | "no-else-return": 1,
36 | "no-extra-bind": 1,
37 | "no-empty": 2,
38 | "no-extra-boolean-cast": 2,
39 | "no-fallthrough": 2,
40 | "no-inner-declarations": 2,
41 | "no-irregular-whitespace": 1,
42 | "no-func-assign": 1,
43 | "no-lone-blocks": 2,
44 | "no-mixed-spaces-and-tabs": 1,
45 | "no-multi-str": 1,
46 | "no-multiple-empty-lines": [1, { max: 2 }],
47 | "no-native-reassign": 2,
48 | "no-redeclare": 2,
49 | "no-return-assign": 2,
50 | "no-shadow": 2,
51 | "no-shadow-restricted-names": 2,
52 | "no-self-compare": 2,
53 | "no-sequences": 1,
54 | "no-spaced-func": 2,
55 | "no-throw-literal": 2,
56 | "no-trailing-spaces": 1,
57 | "no-undef-init": 1,
58 | "no-undefined": 2,
59 | "no-undef": 2,
60 | "no-unreachable": 2,
61 | "no-unused-expressions": 2,
62 | "no-unused-vars": 1,
63 | "no-use-before-define": [2, "nofunc"],
64 | "no-var": 1,
65 | "no-void": 2,
66 | "no-cond-assign": 2,
67 | "operator-linebreak": [1, "after"],
68 | "padded-blocks": [1, "never"],
69 | "prefer-const": [2, {
70 | "destructuring": "all"
71 | }],
72 | "quote-props": [2, "as-needed", { keywords: true, unnecessary: false }],
73 | "quotes": [1, "single", { "allowTemplateLiterals": true }],
74 | "jsx-quotes": [1, "prefer-single"],
75 | "react/jsx-no-undef": 1,
76 | "react/jsx-uses-react": 1,
77 | "react/jsx-uses-vars": 1,
78 | "react/no-did-mount-set-state": 1,
79 | "react/no-did-update-set-state": 1,
80 | "react/no-multi-comp": 1,
81 | "react/no-unknown-property": 1,
82 | "react/prop-types": 1,
83 | "react/react-in-jsx-scope": 1,
84 | "react/sort-comp": [1, {
85 | order: [
86 | 'lifecycle',
87 | 'render',
88 | '/^render.+$/',
89 | 'everything-else'
90 | ]
91 | }],
92 | "react/self-closing-comp": 1,
93 | "react/jsx-curly-spacing": [1, "always"],
94 | "react/jsx-equals-spacing": [1, "never"],
95 | "space-before-blocks": [1, "always"],
96 | "react/no-direct-mutation-state": 1,
97 | "react/jsx-closing-bracket-location": 0,
98 | "react/jsx-indent": 1,
99 | "react/jsx-indent-props": 1,
100 | "react/jsx-key": 1,
101 | "react/jsx-no-duplicate-props": 1,
102 | "react/jsx-wrap-multilines": 1,
103 | "react/jsx-no-bind": 1,
104 | "semi": [1, "always"],
105 | "no-debugger": 1,
106 | "no-unexpected-multiline": 1,
107 | "max-len": [2, 120, 4],
108 | "spaced-comment": [1, "always", { "exceptions": ["-", "*"] }],
109 | "space-infix-ops": 1,
110 | "valid-typeof": 2,
111 | "vars-on-top": 2,
112 | "wrap-iife": 2,
113 | "yoda": 1
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/docs/docs/quickstart.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Quick start
2 | ============================
3 |
4 | This page describes how to quickly get Batchiepatchie running.
5 |
6 | The process here is based on `docker-compose` tool that brings up necessary
7 | infrastructure locally. This is useful for development purposes but also to
8 | evaluate and test Batchiepatchie itself. For actual production deployment
9 | instructions, see [documentation on deployment page](deployment.md).
10 |
11 | Prerequisities
12 | --------------
13 |
14 | You will need to set up some AWS Batch infrastructure or Batchiepatchie will
15 | not show anything. For this, we suggest you follow the ["Getting Started" guide
16 | on AWS Batch on AWS
17 | documentation](https://docs.aws.amazon.com/batch/latest/userguide/Batch_GetStarted.html).
18 |
19 | Aside from that, all you need is a working Docker and `docker-compose` tool.
20 | Docker Compose is usually installed with `docker` on most systems. Follow the
21 | instructions for your operating system to install these tools.
22 |
23 | Setting up
24 | ----------
25 |
26 | The machine you are running Batchiepatchie needs to have AWS credentials
27 | available in some way. If you are running the docker on an EC2 instance, you
28 | are likely already good to go as Batchiepatchie can use IAM metadata service to
29 | obtain credentials. Otherwise, you need to pass credentials to the Docker
30 | Compose. Our `docker-compose.yml` file passes environment variables
31 | `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` as environment variables to the
32 | Batchiepatchie container so if you have these variables set up on your host
33 | system, the credentials should be passed correctly. Be aware that this is
34 | something of [a security
35 | issue](https://diogomonica.com/2017/03/27/why-you-shouldnt-use-env-variables-for-secret-data/)
36 | so we recommend that you do not use `docker-compose.yml` for actual deploys.
37 |
38 | Assuming that you have `docker` and `docker-compose` installed and usable,
39 | along with some AWS credentials, you can start Batchiepatchie:
40 |
41 | $ docker-compose up
42 |
43 | This will take a few minutes for the first run. The docker-compose will run 4 containers in total:
44 |
45 | * A frontend container, designed for frontend development. This will listen on http://127.0.0.1:8080/
46 |
47 | * An API container, this runs the Batchiepatchie backend. This will listen on http://127.0.0.1:5454/ but you should use the 8080 endpoint instead.
48 |
49 | * A migration container. This only runs once in the beginning of Docker Compose phase to set up the database schema for PostgreSQL database used by Batchiepatchie.
50 |
51 | * A PostgreSQL container that runs a database used by Batchiepatchie.
52 |
53 | If everything went without errors, you should be able to access Batchiepatchie
54 | frontend at http://127.0.0.1:8080/. This setup is also designed to be used for
55 | development so modifying any code should automatically rebuild and reload
56 | Batchiepatchie. Docker Compose will mount the current directory from host
57 | inside the container so the containers use the files from host.
58 |
59 | Adding job queues
60 | -----------------
61 |
62 | When you first start Batchiepatchie, there are jobs to be listed. If you have
63 | followed the prerequisites section on this page, you should have some AWS Batch
64 | infrastructure set up.
65 |
66 | You will need to manually add job queues to the system. This is easy; navigate
67 | to "Job queues" tab on Batchiepatchie UI and click "ACTIVATE" on some of the
68 | job queues (you need to set up some job queues with AWS Batch first before they
69 | appear in Batchiepatchie).
70 |
71 | Another way to do this is to manually log into the PostgreSQL database and add
72 | your queue:
73 |
74 | $ docker exec -it batchiepatchie_postgres_1 sh -c 'psql --user postgres --dbname postgres'
75 | postgres=# INSERT INTO activated_job_queues VALUES ( 'name-of-your-job-queue' );
76 | INSERT 0 1
77 | postgres=# SELECT * FROM activated_job_queues;
78 | job_queue
79 | -----------
80 | name-of-your-job-queue
81 | (1 row)
82 |
83 | postgres=#
84 |
85 | Once your job queue is inserted, Batchiepatchie will periodically poll AWS
86 | Batch to update its understanding of current state of batch jobs.
87 |
--------------------------------------------------------------------------------
/frontend/src/components/SearchBox/SearchBox.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import debounce from 'utils/debounce';
3 | import './SearchBox.scss';
4 |
5 | /**
6 | * A search field with next and previous buttons
7 | */
8 | export default class SearchBox extends React.Component {
9 | static propTypes = {
10 | // The lines of text to search.
11 | rows: PropTypes.array.isRequired,
12 | // Callback ith the new searchText and currentSearchRow.
13 | onSearchChanged: PropTypes.func.isRequired,
14 | };
15 |
16 | constructor(props) {
17 | super(props);
18 | this.state = {
19 | // Search text to highlight.
20 | searchText: '',
21 | // Index of the row with the current search result, or -1 if not found.
22 | currentSearchRow: -1,
23 | // Whether to display the "Not found" message.
24 | notFound: false,
25 | };
26 | this.onSearchTextChangedDebounced = debounce(this.onSearchTextChangedDebounced, 1000);
27 | }
28 |
29 | render() {
30 | const { searchText, notFound } = this.state;
31 | return (
32 |
33 | Search:
34 |
35 |
36 |
37 | { notFound && Not found }
38 |
39 | );
40 | }
41 |
42 | /**
43 | * Non-debounced text change handler.
44 | */
45 | onSearchTextChanged = (event) => {
46 | this.setState({searchText: event.target.value, notFound: false});
47 | this.onSearchTextChangedDebounced(event.target.value);
48 | }
49 |
50 | /**
51 | * Debounced text change handler.
52 | */
53 | onSearchTextChangedDebounced = (searchText) => {
54 | const { onSearchChanged } = this.props;
55 | const newSearchRow = searchText === '' ? -1 : this.find(searchText, -1, 1);
56 | onSearchChanged(searchText, newSearchRow);
57 | if (newSearchRow === -1) {
58 | this.setState({notFound: searchText !== ''});
59 | }
60 | this.setState({currentSearchRow: newSearchRow});
61 | }
62 |
63 | /**
64 | * The Next button was clicked.
65 | */
66 | onClickNext = () => {
67 | const { onSearchChanged } = this.props;
68 | const { currentSearchRow, searchText } = this.state;
69 | if (searchText === '') {
70 | return;
71 | }
72 | const newSearchRow = this.find(searchText, currentSearchRow, 1);
73 | if (newSearchRow === -1) {
74 | this.setState({notFound: true});
75 | // Don't set currentSearchRow to -1 if the user tries to go past the last occurrence.
76 | // Just leave them at the last occurrence.
77 | } else {
78 | this.setState({notFound: false, currentSearchRow: newSearchRow});
79 | onSearchChanged(searchText, newSearchRow);
80 | }
81 | }
82 |
83 | /**
84 | * The Prev button was clicked.
85 | */
86 | onClickPrev = () => {
87 | const { onSearchChanged } = this.props;
88 | const { currentSearchRow, searchText } = this.state;
89 | if (searchText === '') {
90 | return;
91 | }
92 | const newSearchRow = this.find(searchText, currentSearchRow, -1);
93 | if (newSearchRow === -1) {
94 | this.setState({notFound: true});
95 | // Don't set currentSearchRow to -1 if the user tries to go past the first occurrence.
96 | // Just leave them at the first occurrence.
97 | } else {
98 | this.setState({notFound: false, currentSearchRow: newSearchRow});
99 | onSearchChanged(searchText, newSearchRow);
100 | }
101 | }
102 |
103 | /**
104 | * Looks in the rows for the search text and returns the index of the next matching row,
105 | * or -1 if not found.
106 | *
107 | * delta is +1 for Next and -1 for Prev.
108 | */
109 | find = (searchText, currentSearchRow, delta) => {
110 | const { rows } = this.props;
111 | let i = currentSearchRow;
112 | i += delta;
113 | while (0 <= i && i <= rows.length - 1) {
114 | if (rows[i].toLowerCase().indexOf(searchText.toLowerCase()) > -1) {
115 | return i;
116 | }
117 | i += delta;
118 | }
119 | return -1;
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/handlers/job_status_notification.go:
--------------------------------------------------------------------------------
1 | package handlers
2 |
3 | import (
4 | "encoding/json"
5 | "io"
6 | "regexp"
7 | "strconv"
8 | "time"
9 |
10 | "github.com/AdRoll/batchiepatchie/jobs"
11 | "github.com/labstack/echo"
12 | "github.com/labstack/gommon/log"
13 | "github.com/opentracing/opentracing-go"
14 | )
15 |
16 | // This structure and the ones below it match the CloudWatch event JSON we get from AWS Lambda function.
17 | // It doesn't match all the fields but matches most of the useful ones we track.
18 | type JobStatusNotification struct {
19 | Time string `json:"time"`
20 | Detail JobStatusNotificationDetail `json:"detail"`
21 | }
22 |
23 | type JobStatusNotificationDetail struct {
24 | JobName string `json:"jobName"`
25 | JobId string `json:"jobId"`
26 | JobQueue string `json:"jobQueue"`
27 | Status string `json:"status"`
28 | CreatedAt int64 `json:"createdAt"`
29 | StartedAt *int64 `json:"startedAt"`
30 | Container JobStatusNotificationContainer `json:"container"`
31 | JobDefinition string `json:"jobDefinition"`
32 | }
33 |
34 | type env struct {
35 | Key string `json:"name"`
36 | Value string `json:"value"`
37 | }
38 |
39 | type JobStatusNotificationContainer struct {
40 | Image string `json:"image"`
41 | Vcpus int64 `json:"vcpus"`
42 | Memory int64 `json:"memory"`
43 | Command []string `json:"command"`
44 | Environment []env `json:"environment"`
45 | TaskArn *string `json:"taskArn"`
46 | }
47 |
48 | var arnRegex = regexp.MustCompile("^arn.*/(.+?)$")
49 |
50 | func stripArn(arnied_name string) string {
51 | match := arnRegex.FindStringSubmatch(arnied_name)
52 | if match == nil {
53 | return arnied_name
54 | }
55 | return match[1]
56 | }
57 |
58 | func (s *Server) JobStatusNotification(c echo.Context) error {
59 | span := opentracing.StartSpan("API.JobStatusNotification")
60 | defer span.Finish()
61 |
62 | // This function can be called from outside to update job status.
63 | // It's meant to used from an AWS Lambda function that is triggered on AWS Batch events.
64 | body, err := io.ReadAll(io.LimitReader(c.Request().Body, 100000))
65 | if err != nil {
66 | log.Warn("Failed reading job status notification posted on our API: ", err)
67 | return err
68 | }
69 |
70 | var job_status_notification JobStatusNotification
71 |
72 | if err = json.Unmarshal(body, &job_status_notification); err != nil {
73 | log.Warn("Cannot unmarshal JSON for job status notification posted on our API: ", err)
74 | return err
75 | }
76 |
77 | now := time.Now()
78 |
79 | // Sometimes we get these jobs that have barely any details in them.
80 | // The UI and the database can't deal with them so we skip them if it happens.
81 | if job_status_notification.Detail.JobName == "" {
82 | return nil
83 | }
84 |
85 | // Convert jobStatusNotification into jobs.Job definition that our
86 | // PostgreSQL storer understands.
87 | job := jobs.Job{}
88 | job.Id = job_status_notification.Detail.JobId
89 | job.Name = job_status_notification.Detail.JobName
90 | job.Status = job_status_notification.Detail.Status
91 | job.Description = job_status_notification.Detail.JobDefinition
92 | job.LastUpdated = now
93 | job.JobQueue = stripArn(job_status_notification.Detail.JobQueue)
94 | job.Image = job_status_notification.Detail.Container.Image
95 | job.CreatedAt = time.Unix(job_status_notification.Detail.CreatedAt/1000, 0)
96 | if job_status_notification.Detail.StartedAt != nil {
97 | time := time.Unix(*job_status_notification.Detail.StartedAt/1000, 0)
98 | job.RunStartTime = &time
99 | } else {
100 | job.RunStartTime = nil
101 | }
102 | job.VCpus = job_status_notification.Detail.Container.Vcpus
103 | job.Memory = job_status_notification.Detail.Container.Memory
104 | cmd, _ := json.Marshal(job_status_notification.Detail.Container.Command)
105 | job.CommandLine = string(cmd)
106 |
107 | timeout := -1
108 | for _, value := range job_status_notification.Detail.Container.Environment {
109 | if value.Key == "PYBATCH_TIMEOUT" {
110 | timeout, err = strconv.Atoi(value.Value)
111 | if err != nil {
112 | timeout = -1
113 | log.Warn("Cannot make sense of PYBATCH_TIMEOUT in job status notification: ", value.Value, " : ", err)
114 | }
115 | break
116 | }
117 | }
118 | job.Timeout = timeout
119 |
120 | jobs := make([]*jobs.Job, 1)
121 | jobs[0] = &job
122 |
123 | err = s.Storage.Store(jobs)
124 | if err != nil {
125 | log.Warn("Failed to store job status notification: ", err)
126 | return err
127 | }
128 | log.Info("Got job status notification for job: ", job_status_notification.Detail.JobId)
129 | return nil
130 | }
131 |
--------------------------------------------------------------------------------
/frontend/src/components/Search/Search.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { connect } from 'react-redux';
3 | import ReactTooltip from 'react-tooltip';
4 | import debounce from 'utils/debounce';
5 | import {
6 | setParams,
7 | setLocationToSearch
8 | } from 'stores/job';
9 | import SectionLoader from 'components/SectionLoader/SectionLoader';
10 | import {
11 | JOB,
12 | JOBS,
13 | STATS
14 | } from 'stores/status';
15 | import './Search.scss';
16 |
17 | // Fuels top of the page loading spinner
18 | function getStatusKey(path) {
19 | if (path.startsWith('/job')) {
20 | return JOB;
21 | } else if (path.startsWith('/stats')) {
22 | return STATS;
23 | }
24 | return JOBS;
25 | }
26 |
27 | class Search extends React.Component {
28 | static propTypes = {
29 | loading: PropTypes.bool.isRequired,
30 | qTemp: PropTypes.string.isRequired,
31 | dateRange: PropTypes.string.isRequired,
32 | setParams: PropTypes.func.isRequired,
33 | statusKey: PropTypes.string.isRequired,
34 | };
35 |
36 | render() {
37 | const {
38 | loading,
39 | qTemp,
40 | dateRange
41 | } = this.props;
42 |
43 | return (
44 |
45 |
46 |
47 | { loading && }
48 |
49 |
50 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
77 |
78 |
79 | ℹ️
80 |
81 |
82 |
83 | Search is case-insensitive, partial-word, AND search on individual words.
84 | The following fields are searched: ID, Name, Image, and Queue.
85 |
86 |
68 | Could not load API responses for job queues.
69 |
70 |
71 | );
72 | }
73 |
74 | let queues_activated = [];
75 | for ( let key in this.props.activatedJobQueues ) {
76 | const queue = this.props.activatedJobQueues[key];
77 | queues_activated.push(queue);
78 | }
79 |
80 | let queues_all = [];
81 | for ( let key in this.props.allJobQueues ) {
82 | const queue = this.props.allJobQueues[key];
83 |
84 | // TODO: this is quadratic check for if queue is already in activated list
85 | // With some small effort, we could it make it faster.
86 | let ok_to_add = true;
87 | for ( let key2 in queues_activated ) {
88 | if ( queue === queues_activated[key2] ) {
89 | ok_to_add = false;
90 | break;
91 | }
92 | }
93 | if ( ok_to_add ) {
94 | queues_all.push(queue);
95 | }
96 | }
97 |
98 | queues_all.sort();
99 | queues_activated.sort();
100 |
101 | const make_row_getter = (lst, act) => (i) => {
102 | if ( i < lst.length ) {
103 | return { name: lst[i], activation: { action: act, onClick: () => { this.setJobQueue(act, lst[i]); } } };
104 | } else {
105 | return { name: '', activation: { action: '', onClick: () => {} } };
106 | }
107 | };
108 |
109 | const row_getter_all = make_row_getter(queues_all, 'ACTIVATE');
110 | const row_getter_activated = make_row_getter(queues_activated, 'DEACTIVATE');
111 |
112 | const height = 35+35*Math.max(queues_all.length, queues_activated.length);
113 |
114 | return (
115 |
116 |
117 |
118 |
119 |
Batchiepatchie registered job queues
120 |
121 |
129 |
130 |
131 |
132 |
All job queues
133 |
134 |
142 |
143 |
144 |
145 |
146 |
147 | );
148 | }
149 | }
150 |
151 | const mapStateToProps = state => ({
152 | allJobQueues: state.jobqueue.allJobQueues,
153 | activatedJobQueues: state.jobqueue.activatedJobQueues,
154 | status_all_job_queues: state.status[JOB_QUEUES_ALL],
155 | status_activated_job_queues: state.status[JOB_QUEUES_ACTIVATED]
156 | });
157 |
158 | const actions = {
159 | fetchAllJobQueues,
160 | fetchJobQueues,
161 | activateJobQueue,
162 | deactivateJobQueue
163 | };
164 |
165 | export default connect(mapStateToProps, actions)(JobQueuesPage);
166 |
--------------------------------------------------------------------------------
/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | /*
4 | This module reads and does some basic validation on the TOML file used for
5 | Batchiepatchie configuration. It also fetches things from S3 (database
6 | password) if it's configured so.
7 |
8 | An exported structure, Config, is then exported to rest of Batchie Patchie.
9 | */
10 |
11 | import (
12 | "fmt"
13 | "reflect"
14 |
15 | "github.com/AdRoll/batchiepatchie/awsclients"
16 | "github.com/AdRoll/batchiepatchie/envsubstituter"
17 | "github.com/AdRoll/batchiepatchie/fetcher"
18 | "github.com/BurntSushi/toml"
19 | log "github.com/sirupsen/logrus"
20 | )
21 |
22 | type Config struct {
23 | Port int `toml:"port"`
24 | Host string `toml:"host"`
25 | DatabaseHost string `toml:"database_host"`
26 | DatabasePort int `toml:"database_port"`
27 | DatabaseUsername string `toml:"database_username"`
28 | DatabaseName string `toml:"database_name"`
29 | DatabasePassword string `toml:"database_password"`
30 | DatabaseRootCertificate string `toml:"database_root_certificate"`
31 |
32 | LogEntriesHost string `toml:"logentries_host"`
33 | LogEntriesKey string `toml:"logentries_token"`
34 |
35 | Region string `toml:"region"`
36 |
37 | PasswordBucket string `toml:"password_bucket"`
38 | PasswordKey string `toml:"password_key"`
39 |
40 | FrontendAssets string `toml:"frontend_assets"`
41 | FrontendAssetsLocalPrefix string `toml:"frontend_assets_local_prefix"`
42 | FrontendAssetsBucket string `toml:"frontend_assets_bucket"`
43 | FrontendAssetsKey string `toml:"frontend_assets_key"`
44 |
45 | SyncPeriod int64 `toml:"sync_period"`
46 | ScalePeriod int64 `toml:"scale_period"`
47 | CleanPeriod int64 `toml:"clean_period"`
48 |
49 | KillStuckJobs bool `toml:"kill_stuck_jobs"`
50 |
51 | UseDatadogTracing bool `toml:"use_datadog_tracing"`
52 |
53 | UseAutoScaler bool `toml:"use_auto_scaler"`
54 | UseCleaner bool `toml:"use_cleaner"`
55 | }
56 |
57 | // Store config in a global variable
58 | var Conf Config
59 |
60 | func readPasswordConfiguration(contents string) (*string, error) {
61 | var pw_conf Config
62 | if _, err := toml.Decode(contents, &pw_conf); err != nil {
63 | return nil, err
64 | }
65 |
66 | if pw_conf.DatabasePassword == "" {
67 | return nil, fmt.Errorf("No passwords specified in password file.")
68 | }
69 |
70 | return &pw_conf.DatabasePassword, nil
71 | }
72 |
73 | func ReadConfiguration(filename string) error {
74 | tomlData, err := fetcher.ReadAllNoSessions(filename)
75 | if err != nil {
76 | return err
77 | }
78 |
79 | Conf = Config{
80 | // Default values here
81 | SyncPeriod: 30,
82 | ScalePeriod: 30,
83 | CleanPeriod: 30 * 60, // 30 minutes in seconds
84 | KillStuckJobs: false,
85 | UseAutoScaler: true,
86 | UseCleaner: false,
87 | }
88 | if _, err := toml.Decode(string(tomlData), &Conf); err != nil {
89 | return err
90 | }
91 |
92 | // Substitute everything with environment variables. (using reflection)
93 | // Checkout envsubstituter module, it injects environment variables.
94 | rconf := reflect.ValueOf(&Conf)
95 | for i := 0; i < rconf.Elem().NumField(); i++ {
96 | struct_elem_v := rconf.Elem().Field(i)
97 | if struct_elem_v.Kind().String() == reflect.ValueOf("str").Kind().String() {
98 | ptr := struct_elem_v.Addr().Interface().(*string)
99 | sub, err := envsubstituter.EnvironmentSubstitute(*ptr)
100 | if err != nil {
101 | return err
102 | }
103 | *ptr = sub
104 | }
105 | }
106 |
107 | if Conf.Region == "" {
108 | log.Fatal("AWS region must be supplied.")
109 | }
110 |
111 | /* Sanity check configuration (Port == 0 if not supplied) */
112 | if Conf.Port < 1 || Conf.Port > 65535 {
113 | log.Fatal("Port is invalid; expecting port between 1 and 65535")
114 | }
115 |
116 | // Note: not checking password; it can be legitimately empty
117 | if Conf.DatabaseHost == "" || Conf.DatabaseUsername == "" || Conf.DatabaseName == "" {
118 | log.Fatal("Incomplete Database configuration. database_host, database_port, database_username and database_name must be supplied in .toml configuration or you must use S3 configuration.")
119 | }
120 |
121 | if Conf.DatabasePort < 1 || Conf.DatabasePort > 65535 {
122 | log.Fatal("Database port is invalid; expecting port between 1 and 65535.")
123 | }
124 |
125 | // Where are my frontend assets? Check that the configuration makes sense
126 | if Conf.FrontendAssets != "local" && Conf.FrontendAssets != "s3" {
127 | log.Fatal("frontend_assets must be either 'local' or 's3'.")
128 | }
129 |
130 | err = awsclients.OpenSessions(Conf.Region)
131 | if err != nil {
132 | log.Fatal("Cannot open AWS sessions: ", err)
133 | }
134 |
135 | if Conf.FrontendAssets == "local" {
136 | if Conf.FrontendAssetsBucket != "" || Conf.FrontendAssetsKey != "" {
137 | log.Fatal("When using frontend_assets=\"local\" then neither frontend_assets_bucket or frontend_key should be specified.")
138 | }
139 | } else if Conf.FrontendAssets == "s3" {
140 | if Conf.FrontendAssetsLocalPrefix != "" {
141 | log.Fatal("When using frontend_assets=\"s3\" then frontend_assets_local_prefix should not be specified.")
142 | }
143 | if Conf.FrontendAssetsBucket == "" {
144 | log.Fatal("frontend_assets_bucket is empty. You need to set it.")
145 | }
146 | if Conf.FrontendAssetsKey == "" {
147 | log.Fatal("frontend_assets_key is empty. You need to set it.")
148 | }
149 | }
150 |
151 | if Conf.PasswordKey != "" {
152 | // Using S3 for passwords? Fetch the keys from AWS bucket.
153 | // Check that we are not using both database + KMS conf
154 | if Conf.DatabasePassword != "" {
155 | log.Fatal("Both KMS and non-KMS password supplied; can't decide which one to use.")
156 | }
157 | secret_key := Conf.PasswordKey
158 |
159 | s3path := "s3://" + Conf.PasswordBucket + "/" + secret_key
160 |
161 | log.Info("Fetching secret key from ", s3path)
162 | out, err := fetcher.ReadAll(s3path)
163 | if err != nil {
164 | log.Fatal("Cannot get secret key file: ", err)
165 | }
166 |
167 | pw, err := readPasswordConfiguration(string(out))
168 | if err != nil {
169 | log.Fatal("Cannot parse password file: ", err)
170 | }
171 |
172 | Conf.DatabasePassword = *pw
173 | }
174 |
175 | return nil
176 | }
177 |
--------------------------------------------------------------------------------
/batchiepatchie.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "net/http"
5 | "os"
6 | "path"
7 | "strconv"
8 |
9 | "github.com/AdRoll/batchiepatchie/config"
10 | "github.com/AdRoll/batchiepatchie/fetcher"
11 | "github.com/AdRoll/batchiepatchie/handlers"
12 | "github.com/AdRoll/batchiepatchie/jobs"
13 | "github.com/AdRoll/batchiepatchie/syncer"
14 | "github.com/bakatz/echo-logrusmiddleware"
15 | "github.com/labstack/echo"
16 | "github.com/opentracing/opentracing-go"
17 | log "github.com/sirupsen/logrus"
18 | "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/opentracer"
19 | "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
20 | )
21 |
22 | // fetchIndex fetches the index.html from s3
23 | func fetchIndex() ([]byte, error) {
24 | if config.Conf.FrontendAssets == "local" {
25 | dir := path.Join(config.Conf.FrontendAssetsLocalPrefix, "index.html")
26 | log.Info("Getting index.html from local file:", dir)
27 | return fetcher.ReadAll(dir)
28 | }
29 | s3path := "s3://" + config.Conf.FrontendAssetsBucket + "/" + config.Conf.FrontendAssetsKey
30 | log.Info("Downloading index.html from ", s3path)
31 | return fetcher.ReadAll(s3path)
32 | }
33 |
34 | func pingHandler(c echo.Context) error {
35 | return c.String(http.StatusOK, "pong")
36 | }
37 |
38 | func main() {
39 | configurationFile := ""
40 | if len(os.Args) > 2 {
41 | log.Fatal("batchiepatchie expects exactly one argument: filename to .toml configuration.")
42 | } else if len(os.Args) == 2 {
43 | configurationFile = os.Args[1]
44 | } else {
45 | /* Fallback to using environment variables */
46 | configurationFile = os.Getenv("BATCHIEPATCHIE_CONFIG")
47 | if configurationFile == "" {
48 | log.Fatal("No configuration file passed through either command line argument or BATCHIEPATCHIE_CONFIG environment variable.")
49 | }
50 | }
51 |
52 | log.SetFormatter(&log.JSONFormatter{})
53 | log.SetOutput(os.Stderr)
54 |
55 | // Sets the global config.Conf
56 | err := config.ReadConfiguration(configurationFile)
57 | if err != nil {
58 | log.Fatal("Reading configuration failed, ", err)
59 | }
60 |
61 | if config.Conf.LogEntriesKey != "" {
62 | log.Info("logentries_token supplied, will connect to LogEntries.")
63 | logentries_host := "data.logentries.com:443"
64 | if config.Conf.LogEntriesHost != "" {
65 | logentries_host = config.Conf.LogEntriesHost
66 | }
67 | setUpLogEntriesHooks(logentries_host, config.Conf.LogEntriesKey)
68 | }
69 |
70 | var trace opentracing.Tracer
71 | if config.Conf.UseDatadogTracing {
72 | ip := os.Getenv("BATCHIEPATCHIE_IP")
73 | if ip != "" {
74 | // If we have been passed an IP explictly; attempt to
75 | // use it to connect to DataDog tracer When we run
76 | // batchiepatchie inside Docker container and ddtracer
77 | // on the host; this lets us connect to the agent
78 | // running on host.
79 | agentAddr := ip + ":8126"
80 | log.Info("Will attempt to ddtrace into ", agentAddr)
81 | trace = opentracer.New(tracer.WithServiceName("batchiepatchie"), tracer.WithAgentAddr(agentAddr))
82 | } else {
83 | trace = opentracer.New(tracer.WithServiceName("batchiepatchie"))
84 | }
85 | } else {
86 | trace = opentracing.NoopTracer{}
87 | }
88 | opentracing.SetGlobalTracer(trace)
89 |
90 | storage, err := jobs.NewPostgreSQLStore(config.Conf.DatabaseHost, config.Conf.DatabasePort, config.Conf.DatabaseUsername, config.Conf.DatabaseName, config.Conf.DatabasePassword, config.Conf.DatabaseRootCertificate)
91 | if err != nil {
92 | log.Fatal("Creating postgresql store failed, ", err)
93 | }
94 | log.Info("Successfully connected to PostgreSQL database.")
95 |
96 | killer, err := jobs.NewKillerHandler()
97 | if err != nil {
98 | log.Fatal("Creating killer handler failed, ", err)
99 | }
100 | log.Info("killer handler started.")
101 |
102 | index, err := fetchIndex()
103 | if err != nil {
104 | log.Error("Falling back to basic index.html: ", err)
105 | version := os.Getenv("VERSION")
106 | if version == "" {
107 | index = []byte("
Cannot find index.html. VERSION environment variable is not set. Check that frontend has been deployed correctly and then restart backend.
")
108 | } else {
109 | index = []byte("
Cannot find index.html. (VERSION environment variable has been set but no file could be fetched). Check that frontend has been deployed correctly and then restart backend.
")
110 | }
111 | }
112 |
113 | // Launch the periodic synchronizer
114 | syncer.RunPeriodicSynchronizer(storage, killer)
115 | // Launch the periodic scaler
116 | if config.Conf.UseAutoScaler {
117 | log.Info("Auto-scaler enabled.")
118 | syncer.RunPeriodicScaler(storage)
119 | } else {
120 | log.Info("Auto-scaler disabled.")
121 | }
122 | // Launch the periodic cleaner
123 | if config.Conf.UseCleaner {
124 | syncer.RunPeriodicCleaner(storage)
125 | } else {
126 | log.Info("Cleaner disabled.")
127 | }
128 |
129 | // handle.Server is a structure to save context shared between requests
130 | s := &handlers.Server{
131 | Storage: storage,
132 | Killer: killer,
133 | Index: index,
134 | }
135 |
136 | e := echo.New()
137 |
138 | // Logging middleware for API requests
139 | e.Logger = logrusmiddleware.Logger{Logger: log.StandardLogger()}
140 | e.Use(logrusmiddleware.Hook())
141 |
142 | // Jobs API
143 | api := e.Group("/api/v1")
144 | {
145 | api.GET("/jobs/:id", s.FindOne)
146 | api.GET("/jobs", s.Find)
147 | api.POST("/jobs/kill", s.KillMany)
148 | api.GET("/jobs/:id/logs", s.FetchLogs)
149 | api.GET("/job_queues/active", s.ListActiveJobQueues)
150 | api.GET("/job_queues/all", s.ListAllJobQueues)
151 | api.POST("/job_queues/:name/activate", s.ActivateJobQueue)
152 | api.POST("/job_queues/:name/deactivate", s.DeactivateJobQueue)
153 | api.GET("/jobs/:id/status", s.GetStatus)
154 | api.POST("/jobs/notify", s.JobStatusNotification)
155 | api.GET("/jobs/:id/status_websocket", s.SubscribeToJobEvent)
156 | api.GET("/jobs/stats", s.JobStats)
157 | }
158 |
159 | e.GET("/ping", pingHandler)
160 | e.GET("/", s.IndexHandler)
161 | e.GET("/stats", s.IndexHandler)
162 | e.GET("/index.html", s.IndexHandler)
163 |
164 | // These are pseudo-URLs, the frontend will handle displaying the correct page
165 | e.GET("/job/:id", s.IndexHandler)
166 | e.GET("/job_queues", s.IndexHandler)
167 |
168 | if config.Conf.FrontendAssets == "local" {
169 | e.Static("/*", config.Conf.FrontendAssetsLocalPrefix)
170 | }
171 |
172 | // Launch web server
173 | e.Logger.Fatal(e.Start(config.Conf.Host + ":" + strconv.Itoa(config.Conf.Port)))
174 | }
175 |
--------------------------------------------------------------------------------
/docs/docs/deployment.md:
--------------------------------------------------------------------------------
1 | Batchiepatchie - Deployment
2 | ===========================
3 |
4 | This page describes how to deploy Batchiepatchie in production environment. At
5 | the same time, it describes how Batchiepatchie is designed to be run.
6 |
7 | Operation
8 | ---------
9 |
10 | Batchiepatchie works by mirroring the state of AWS Batch in a PostgreSQL
11 | database. Unlike AWS Batch, Batchiepatchie will not forget about historical
12 | jobs (unless we manually delete old jobs from the database).
13 |
14 | There are two mechanisms Batchiepatchie can use to mirror its internal state:
15 |
16 | * Batchiepatchie polls periodically for all state from AWS Batch.
17 |
18 | * Batchiepatchie can be called by AWS Lambda function to instantly update state of some job.
19 |
20 | Out-of-box, the polling mechanism is enabled and will keep the jobs up to date
21 | in Batchiepatchie's eyes. The AWS Lambda setup is more complicated and is
22 | currently undocumented; we will fix this in the future.
23 |
24 | Building
25 | --------
26 |
27 | Batchiepatchie is a Go project and if you have Go set up correctly, `go get`
28 | (to get dependencies) and `go build` should be sufficient inside
29 | Batchiepatchie's source code directory.
30 |
31 | $ go get
32 | $ go build
33 |
34 | You should end up with a `batchiepatchie` executable file in the current
35 | directory.
36 |
37 | Configuration file
38 | ------------------
39 |
40 | Batchiepatchie is driven by a configuration file. An example is provided in the
41 | Batchiepatchie repository, called `test.toml`. The contents of this are reproduced below:
42 |
43 | ```toml
44 | host = "0.0.0.0"
45 | port = 5454
46 | region = "us-west-2"
47 | database_host = "postgres"
48 | database_port = 5432
49 | database_username = "postgres"
50 | database_name = "postgres"
51 | database_password = "123456"
52 | frontend_assets = "local"
53 | frontend_assets_local_prefix = "frontend/dist"
54 | ```
55 |
56 | We will go through possible settings one by one.
57 |
58 | * `host` and `port`: These define which host and port Batchiepatchie should listen on.
59 | * `region`: This specifies which AWS region Batchiepatchie should operate in.
60 | * `database_host`: This describes the hostname to use for PostgreSQL store.
61 | * `database_port`: This describes the port where to connect for PostgreSQL store.
62 | * `database_username`: This specifies the username to use for PostgreSQL store.
63 | * `database_name`: This specifies the database name to use for PostgreSQL store.
64 | * `database_password`: This specifies the password to use to connect to PostgreSQL store. Mutually exclusive with `password_bucket` and `password_key` settings.
65 | * `password_bucket` and `password_key`: These specify an S3 bucket and key for an S3 object that contains the password. This way you can store your passwords encrypted in S3. The S3 object should contain a line: `database_password = ""`. These settings are mutually exclusive with plain `database_password` setting.
66 | * `frontend_assets`: This must be either `local` or `s3`. Batchiepatchie needs static files to show its UI and these static files can be stored locally or in S3.
67 | * `frontend_assets_local_prefix`: When `frontend_assets` is `local`, this must point to directory where `index.html` is located. Note that Batchiepatchie does not come with pre-built assets; you will need to build them in `frontend/` directory in Batchiepatchie repository first. Refer to [frontend build instructions](frontend.md) for more information.
68 | * `frontend_assets_bucket`: When `frontend_assets` is `s3`, this must point to the S3 bucket name where static assets are located.
69 | * `frontend_assets_key`: When `frontend_assets` is `s3, this must point to the key name that contains `index.html` for Batchiepatchie. Batchiepatchie will load this file from S3 at start up. Note that other static files are not loaded through S3.
70 | * `sync_period`: This specifies the number of seconds between polls with AWS Batch. By default, it is 30 seconds.
71 | * `scale_period`: This specifies the number of seconds between scaling hack polls. See more information about scaling hack on [this page](scaling). By default, this setting is 30 seconds.
72 |
73 | The configuration file is passed when invoking Batchiepatchie.
74 |
75 | $ ./batchiepatchie configuration.toml
76 |
77 | The configuration file can also be placed in S3:
78 |
79 | $ ./batchiepatchie s3://my-bucket/configuration.toml
80 |
81 | Settings about which job queues to ingest into Batchiepatchie database are not
82 | in the configuration file. These are set into the database instead.
83 |
84 | Database
85 | --------
86 |
87 | Batchiepatchie requires a PostgreSQL database to store persistent data. We have
88 | tested Batchiepatchie with PostgreSQL 9.6 so we know 9.6 family works. The most
89 | exotic feature Batchiepatchie makes use of is [trigram
90 | indexes](https://www.postgresql.org/docs/9.6/static/pgtrgm.html) and these seem
91 | to have been available since PostgreSQL 9.1. It is possible Batchiepatchie will
92 | work with older PostgreSQL versions, such as 9.1, but we have not tested this.
93 |
94 | The database must be initialized with a schema. Batchiepatchie project uses
95 | [goose](https://github.com/pressly/goose) for migrations, and the migrations
96 | are located in `migrations/` directory in Batchiepatchie repository.
97 |
98 | If you have credentials to some PostgreSQL repository, you can run migrations
99 | with goose as in the example below:
100 |
101 | $ go get -u github.com/pressly/goose/cmd/goose # Install goose
102 | $ cd migrations
103 | $ goose postgres "user=batchiepatchie dbname=batchiepatchie password=blahblah" up
104 |
105 | Once the database has been initialized with the proper schema, Batchiepatchie
106 | can be started.
107 |
108 | IAM policies
109 | ------------
110 |
111 | During its operation, Batchiepatchie makes various AWS calls and thus, requires
112 | permissions to do these operations. Below is a list of permissions
113 | Batchiepatchie needs:
114 |
115 | ### Essential permissions:
116 |
117 | batch:DescribeJobs
118 | batch:DescribeJobQueues
119 | batch:DescribeComputeEnvironments
120 | batch:ListJobs
121 | batch:TerminateJob
122 | ec2:DescribeInstances
123 | ecs:DescribeContainerInstances
124 | ecs:DescribeTasks
125 | ecs:ListContainerInstances
126 | ecs:ListTasks
127 | logs:DescribeLogStreams
128 | logs:GetLogEvents
129 |
130 | Aside from `batch:TerminateJob`, the essential permissions are all about
131 | fetching information from AWS.
132 |
133 | ### Optional permissions:
134 |
135 | batch:UpdateComputeEnvironment
136 | ec2:TerminateInstances
137 | s3:GetObject
138 |
139 | S3 permissions are required if you place any configuration to S3; Batchiepatchie needs to be able to fetch it.
140 |
141 | If you want to use the [scaling hack feature](scaling.md) of Batchiepatchie, you will need
142 | to let it modify compute environments with `batch:UpdateComputeEnvironment`.
143 |
144 | If you want to use the [terminate instance hack feature](terminator.md) of
145 | Batchiepatchie, you will need to give it permission to terminate instances.
146 |
--------------------------------------------------------------------------------
/jobs/jobs.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package jobs implements the basic Job structure and related functionality
3 | */
4 | package jobs
5 |
6 | import (
7 | "database/sql/driver"
8 | "encoding/json"
9 | "errors"
10 | "time"
11 | )
12 |
13 | // Job Status Constants
14 | const (
15 | StatusFailed = "FAILED"
16 | StatusPending = "PENDING"
17 | StatusRunnable = "RUNNABLE"
18 | StatusRunning = "RUNNING"
19 | StatusStarting = "STARTING"
20 | StatusSubmitted = "SUBMITTED"
21 | StatusSucceeded = "SUCCEEDED"
22 | )
23 |
24 | // StatusList is a list of all possible job statuses
25 | var StatusList = [...]string{
26 | StatusFailed,
27 | StatusPending,
28 | StatusRunnable,
29 | StatusRunning,
30 | StatusStarting,
31 | StatusSubmitted,
32 | StatusSucceeded,
33 | }
34 |
35 | type JobStatus struct {
36 | Id string `json:"id"`
37 | Status string `json:"status"`
38 | }
39 |
40 | type Job struct {
41 | Id string `json:"id"`
42 | Name string `json:"name"`
43 | Status string `json:"status"`
44 | Description string `json:"desc"`
45 | LastUpdated time.Time `json:"last_updated"`
46 | JobQueue string `json:"job_queue"`
47 | Image string `json:"image"`
48 | CreatedAt time.Time `json:"created_at"`
49 | StoppedAt *time.Time `json:"stopped_at"`
50 | VCpus int64 `json:"vcpus"`
51 | Memory int64 `json:"memory"`
52 | Timeout int `json:"timeout"`
53 | CommandLine string `json:"command_line"`
54 | StatusReason *string `json:"status_reason"`
55 | RunStartTime *time.Time `json:"run_start_time"`
56 | ExitCode *int64 `json:"exitcode"`
57 | LogStreamName *string `json:"log_stream_name"`
58 | TerminationRequested bool `json:"termination_requested"`
59 | TaskARN *string `json:"task_arn"`
60 | InstanceID *string `json:"instance_id"`
61 | PublicIP *string `json:"public_ip"`
62 | PrivateIP *string `json:"private_ip"`
63 | ArrayProperties *ArrayProperties `json:"array_properties,omitempty"`
64 | }
65 |
66 | // ArrayProperties are properties of a parent array job.
67 | type ArrayProperties struct {
68 | Size int64 `json:"size"`
69 | StatusSummary StatusSummary `json:"status_summary"`
70 | }
71 |
72 | // Value implements the driver.Valuer interface. This method
73 | // is needed for JSONB serialization to the database.
74 | func (a ArrayProperties) Value() (driver.Value, error) {
75 | return json.Marshal(a)
76 | }
77 |
78 | // Scan implements the sql.Scanner interface. This method
79 | // is needed for JSONB deserialization from the database.
80 | func (a *ArrayProperties) Scan(value interface{}) error {
81 | b, ok := value.([]byte)
82 | if !ok {
83 | return errors.New("type assertion to []byte failed")
84 | }
85 |
86 | return json.Unmarshal(b, &a)
87 | }
88 |
89 | // StatusSummary is counts of statuses of child array jobs
90 | type StatusSummary struct {
91 | Starting int64 `json:"starting"`
92 | Failed int64 `json:"failed"`
93 | Running int64 `json:"running"`
94 | Succeeded int64 `json:"succeeded"`
95 | Runnable int64 `json:"runnable"`
96 | Submitted int64 `json:"submitted"`
97 | Pending int64 `json:"pending"`
98 | }
99 |
100 | // Options is the query options for the Find method to use
101 | type Options struct {
102 | Search string
103 | DateRange string
104 | Limit int
105 | Offset int
106 | Queues []string
107 | SortBy string
108 | SortAsc bool
109 | Status []string
110 | }
111 |
112 | type JobStatsOptions struct {
113 | Queues []string
114 | Status []string
115 | Interval int64
116 | Start int64
117 | End int64
118 | }
119 |
120 | type JobStats struct {
121 | JobQueue string `json:"job_queue"`
122 | Status string `json:"status"`
123 | Timestamp float64 `json:"timestamp"`
124 | VCPUSeconds float64 `json:"vcpu_seconds"`
125 | MemorySeconds float64 `json:"memory_seconds"`
126 | InstanceSeconds float64 `json:"instance_seconds"`
127 | JobCount int `json:"job_count"`
128 | Interval int64 `json:"interval"`
129 | }
130 |
131 | // KillTaskID is a struct to handle JSON request to kill a task
132 | type KillTaskID struct {
133 | ID string `json:"id" form:"id" query:"id"`
134 | }
135 |
136 | // FinderStorer is an interface that can both save and retrieve jobs
137 | type FinderStorer interface {
138 | Finder
139 | Storer
140 |
141 | // Methods to get information about Job Queues
142 | ListActiveJobQueues() ([]string, error)
143 | ListForcedScalingJobQueues() ([]string, error)
144 |
145 | ActivateJobQueue(string) error
146 | DeactivateJobQueue(string) error
147 | }
148 |
149 | // Finder is an interface to find jobs in a database/store
150 | type Finder interface {
151 | // Find finds a jobs matching the query
152 | Find(opts *Options) ([]*Job, error)
153 |
154 | // FindOne finds a job matching the query
155 | FindOne(query string) (*Job, error)
156 |
157 | // FindTimedoutJobs finds all job IDs that should have timed out by now
158 | FindTimedoutJobs() ([]string, error)
159 |
160 | // Simple endpoint that returns a string for job status.
161 | GetStatus(jobid string) (*JobStatus, error)
162 |
163 | JobStats(opts *JobStatsOptions) ([]*JobStats, error)
164 | }
165 |
166 | // Storer is an interface to save jobs in a database/store
167 | type Storer interface {
168 | // Store saves a job
169 | Store(job []*Job) error
170 |
171 | // Gives the store a chance to stale jobs we no longer know about
172 | // The argument is a set (value is ignored) of all known job_ids currently by AWS Batch
173 | StaleOldJobs(map[string]bool) error
174 |
175 | // Finds estimated load per job queue
176 | EstimateRunningLoadByJobQueue([]string) (map[string]RunningLoad, error)
177 |
178 | // Update compute environment logs
179 | UpdateComputeEnvironmentsLog([]ComputeEnvironment) error
180 |
181 | // Update job summaries
182 | UpdateJobSummaryLog([]JobSummary) error
183 |
184 | // Mark on job that we requested it to be terminated
185 | UpdateJobLogTerminationRequested(string) error
186 |
187 | // Updates information on task arns and ec2 metadata
188 | UpdateTaskArnsInstanceIDs(map[string]Ec2Info, map[string]string) error
189 |
190 | // Updates information on EC2 instances running on ECS
191 | UpdateECSInstances(map[string]Ec2Info, map[string][]string) error
192 |
193 | // Gets alive EC2 instances (according to database)
194 | GetAliveEC2Instances() ([]string, error)
195 |
196 | // Gets all instance IDs that have jobs stuck in "STARTING" status
197 | GetStartingStateStuckEC2Instances() ([]string, error)
198 |
199 | // Subscribes to updates about a job status. (see more info on this
200 | // function in postgres_store.go)
201 | SubscribeToJobStatus(jobID string) (<-chan Job, func())
202 | }
203 |
204 | // Cleaner allows you to clean the database
205 | type Cleaner interface {
206 | // CleanOldJobs cleans old jobs from the database
207 | CleanOldJobs() error
208 |
209 | // CleanOldInstanceEventLogs cleans old instance event logs from the database
210 | CleanOldInstanceEventLogs() error
211 | }
212 |
213 | // Killer is an interface to kill jobs in the queue
214 | type Killer interface {
215 | // KillOne kills a job matching the query
216 | KillOne(jobID string, reason string, store Storer) error
217 |
218 | // Kills jobs and instances that are stuck in STARTING status
219 | KillInstances(instances []string) error
220 | }
221 |
222 | // This structure describes how many vcpus and memory the currently queued jobs require
223 | type RunningLoad struct {
224 | WantedVCpus int64
225 | WantedMemory int64
226 | }
227 |
228 | type ComputeEnvironment struct {
229 | Name string
230 | WantedvCpus int64
231 | MinvCpus int64
232 | MaxvCpus int64
233 | State string
234 | ServiceRole string
235 | }
236 |
237 | type JobSummary struct {
238 | JobQueue string
239 | Submitted int64
240 | Pending int64
241 | Runnable int64
242 | Starting int64
243 | Running int64
244 | }
245 |
246 | type Ec2Info struct {
247 | PrivateIP *string
248 | PublicIP *string
249 | AMI string
250 | ComputeEnvironmentARN string
251 | ECSClusterARN string
252 | AvailabilityZone string
253 | SpotInstanceRequestID *string
254 | InstanceType string
255 | LaunchedAt *time.Time
256 | }
257 |
--------------------------------------------------------------------------------
/jobs/monitor_ecs_clusters.go:
--------------------------------------------------------------------------------
1 | package jobs
2 |
3 | import (
4 | "github.com/AdRoll/batchiepatchie/awsclients"
5 | "github.com/aws/aws-sdk-go/service/batch"
6 | "github.com/aws/aws-sdk-go/service/ec2"
7 | "github.com/aws/aws-sdk-go/service/ecs"
8 | "github.com/opentracing/opentracing-go"
9 | log "github.com/sirupsen/logrus"
10 | )
11 |
12 | type arnInfo struct {
13 | ecsClusterARN string
14 | computeEnvironmentARN string
15 | }
16 |
17 | func MonitorECSClusters(fs Storer, queues []string) error {
18 | span := opentracing.StartSpan("MonitorECSClusters")
19 | defer span.Finish()
20 |
21 | /* TODO: handle pagination in all these API calls. */
22 |
23 | /* First we collect all compute environments references by any queues
24 | * */
25 | job_queue_names := make([]*string, 0)
26 | for _, job_queue := range queues {
27 | jq := job_queue
28 | job_queue_names = append(job_queue_names, &jq)
29 | }
30 |
31 | job_queues := &batch.DescribeJobQueuesInput{
32 | JobQueues: job_queue_names,
33 | }
34 |
35 | job_queue_descs, err := awsclients.Batch.DescribeJobQueues(job_queues)
36 | if err != nil {
37 | log.Warning("Failed to describe job queues: ", err)
38 | return err
39 | }
40 |
41 | compute_environments := make(map[string]bool)
42 | for _, job_queue_desc := range job_queue_descs.JobQueues {
43 | for _, compute_env_order := range job_queue_desc.ComputeEnvironmentOrder {
44 | compute_environments[*compute_env_order.ComputeEnvironment] = true
45 | }
46 | }
47 |
48 | /* Now that we got compute environments (in the map above), we can get
49 | * their description and the ECS cluster names they point to. */
50 | compute_environments_lst := make([]*string, len(compute_environments))
51 | i := 0
52 | for name := range compute_environments {
53 | n := name
54 | compute_environments_lst[i] = &n
55 | i++
56 | }
57 |
58 | compute_environments_input := &batch.DescribeComputeEnvironmentsInput{
59 | ComputeEnvironments: compute_environments_lst,
60 | }
61 |
62 | compute_environment_descs, err := awsclients.Batch.DescribeComputeEnvironments(compute_environments_input)
63 | if err != nil {
64 | log.Warning("Failed to describe compute environments: ", err)
65 | return err
66 | }
67 |
68 | ecs_clusters := make(map[string]string)
69 | for _, compute_environment_desc := range compute_environment_descs.ComputeEnvironments {
70 | if compute_environment_desc.EcsClusterArn != nil {
71 | ecs_clusters[*compute_environment_desc.EcsClusterArn] = *compute_environment_desc.ComputeEnvironmentArn
72 | }
73 | }
74 |
75 | ecs_clusters_lst := make([]*string, len(ecs_clusters))
76 | i = 0
77 |
78 | task_ec2_mapping := make(map[string]string)
79 | ec2instances_set := make(map[string]arnInfo)
80 | tasks_per_ec2instance := make(map[string][]string)
81 |
82 | for name := range ecs_clusters {
83 | n := name
84 | ecs_clusters_lst[i] = &n
85 | i++
86 |
87 | task_mapping := make(map[string]string)
88 | var next_token *string
89 | for {
90 | var tasks_input *ecs.ListTasksInput
91 | if next_token == nil {
92 | tasks_input = &ecs.ListTasksInput{
93 | Cluster: &n,
94 | }
95 | } else {
96 | tasks_input = &ecs.ListTasksInput{
97 | Cluster: &n,
98 | NextToken: next_token,
99 | }
100 | }
101 | task_listing, err := awsclients.ECS.ListTasks(tasks_input)
102 | if err != nil {
103 | log.Warning("Failed to list tasks: ", err)
104 | return err
105 | }
106 |
107 | task_arns := make([]*string, 0)
108 | for _, task := range task_listing.TaskArns {
109 | n := *task
110 | task_arns = append(task_arns, &n)
111 | }
112 |
113 | if len(task_arns) > 0 {
114 | describe_tasks := &ecs.DescribeTasksInput{
115 | Cluster: &n,
116 | Tasks: task_arns,
117 | }
118 |
119 | task_descs, err := awsclients.ECS.DescribeTasks(describe_tasks)
120 | if err != nil {
121 | log.Warning("Failed to describe tasks: ", err)
122 | return err
123 | }
124 |
125 | for _, task_desc := range task_descs.Tasks {
126 | task_mapping[*task_desc.TaskArn] = *task_desc.ContainerInstanceArn
127 | }
128 | }
129 |
130 | next_token = task_listing.NextToken
131 | if next_token == nil {
132 | break
133 | }
134 | }
135 | /* task_mapping should now contain mapping from Task ARNs to container instance ARNs.
136 | now, figure out the actual instance IDs for those container instance ARNs.
137 |
138 | We first get all container ARNs by API call and then
139 | complement it with the ones we got from tasks. */
140 |
141 | next_token = nil
142 | container_arn_set := make(map[string]bool, 0)
143 | for {
144 | var describe_container_instances *ecs.ListContainerInstancesInput
145 | if next_token == nil {
146 | describe_container_instances = &ecs.ListContainerInstancesInput{
147 | Cluster: &n,
148 | }
149 | } else {
150 | describe_container_instances = &ecs.ListContainerInstancesInput{
151 | Cluster: &n,
152 | NextToken: next_token,
153 | }
154 | }
155 |
156 | container_arns, err := awsclients.ECS.ListContainerInstances(describe_container_instances)
157 | if err != nil {
158 | log.Warning("Failed to list container instances: ", err)
159 | return err
160 | }
161 |
162 | for _, arn_ref := range container_arns.ContainerInstanceArns {
163 | if arn_ref != nil {
164 | arn := *arn_ref
165 | container_arn_set[arn] = true
166 | }
167 | }
168 |
169 | next_token = container_arns.NextToken
170 | if next_token == nil {
171 | break
172 | }
173 | }
174 |
175 | for _, container_arn := range task_mapping {
176 | container_arn_set[container_arn] = true
177 | }
178 | container_arn_lst := make([]*string, len(container_arn_set))
179 | j := 0
180 | for container_arn := range container_arn_set {
181 | n := container_arn
182 | container_arn_lst[j] = &n
183 | j++
184 | }
185 |
186 | /* now, describe container_arns */
187 | cursor := 0
188 | for {
189 | if cursor >= len(container_arn_lst) {
190 | break
191 | }
192 | cursor_end := cursor + 50
193 | if cursor_end > len(container_arn_lst) {
194 | cursor_end = len(container_arn_lst)
195 | }
196 |
197 | lst := make([]*string, cursor_end-cursor)
198 | for i, v := range container_arn_lst[cursor:cursor_end] {
199 | n := *v
200 | lst[i] = &n
201 | }
202 | container_input := &ecs.DescribeContainerInstancesInput{
203 | Cluster: &n,
204 | ContainerInstances: lst,
205 | }
206 | cursor += 50
207 | container_descs, err := awsclients.ECS.DescribeContainerInstances(container_input)
208 | if err != nil {
209 | log.Warning("Cannot describe container instances: ", err)
210 | return err
211 | }
212 |
213 | for _, container_desc := range container_descs.ContainerInstances {
214 | /* TODO: this is quadratic. Fix it at some point */
215 | for task_arn, container_arn := range task_mapping {
216 | if container_arn == *container_desc.ContainerInstanceArn {
217 | task_ec2_mapping[task_arn] = *container_desc.Ec2InstanceId
218 | lst, ok := tasks_per_ec2instance[*container_desc.Ec2InstanceId]
219 | if ok {
220 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = append(lst, task_arn)
221 | } else {
222 | new_lst := make([]string, 1)
223 | new_lst[0] = task_arn
224 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = new_lst
225 | }
226 | }
227 | }
228 | if container_desc.Ec2InstanceId != nil {
229 | ec2instances_set[*container_desc.Ec2InstanceId] = arnInfo{
230 | ecsClusterARN: n,
231 | computeEnvironmentARN: ecs_clusters[n],
232 | }
233 | /* Make sure there is an empty job listing when there are no tasks on the instance */
234 | _, ok := tasks_per_ec2instance[*container_desc.Ec2InstanceId]
235 | if !ok {
236 | new_lst := make([]string, 0)
237 | tasks_per_ec2instance[*container_desc.Ec2InstanceId] = new_lst
238 | }
239 | }
240 | }
241 | }
242 | }
243 |
244 | ec2instances_lst := make([]*string, 0)
245 | for ec2instance := range ec2instances_set {
246 | n := ec2instance
247 | ec2instances_lst = append(ec2instances_lst, &n)
248 | }
249 |
250 | ec2instances_info := make(map[string]Ec2Info)
251 |
252 | cursor := 0
253 | for {
254 | cursor_end := cursor + 50
255 | if cursor >= len(ec2instances_lst) {
256 | break
257 | }
258 | if cursor_end > len(ec2instances_lst) {
259 | cursor_end = len(ec2instances_lst)
260 | }
261 |
262 | lst := make([]*string, cursor_end-cursor)
263 | for i, v := range ec2instances_lst[cursor:cursor_end] {
264 | n := *v
265 | lst[i] = &n
266 | }
267 | cursor += 50
268 |
269 | instances_input := &ec2.DescribeInstancesInput{
270 | InstanceIds: lst,
271 | }
272 | instances_descs, err := awsclients.EC2.DescribeInstances(instances_input)
273 | if err != nil {
274 | log.Warning("Cannot describe instances: ", err)
275 | return err
276 | }
277 |
278 | for _, reservation := range instances_descs.Reservations {
279 | for _, instance := range reservation.Instances {
280 | // What is `fromMaybe` of Go language?
281 | public_ip := instance.PublicIpAddress
282 | private_ip := instance.PrivateIpAddress
283 | ami := ""
284 | if instance.ImageId != nil {
285 | ami = *instance.ImageId
286 | }
287 | instance_id := ""
288 | if instance.InstanceId != nil {
289 | instance_id = *instance.InstanceId
290 | }
291 | compute_environment_arn := ""
292 | ecs_cluster_arn := ""
293 | info, ok := ec2instances_set[instance_id]
294 | if ok {
295 | compute_environment_arn = info.computeEnvironmentARN
296 | ecs_cluster_arn = info.ecsClusterARN
297 | }
298 | az := ""
299 | if instance.Placement != nil && instance.Placement.AvailabilityZone != nil {
300 | az = *instance.Placement.AvailabilityZone
301 | }
302 | sir := instance.SpotInstanceRequestId
303 | instance_type := ""
304 | if instance.InstanceType != nil {
305 | instance_type = *instance.InstanceType
306 | }
307 | launched_at := instance.LaunchTime
308 | ec2instances_info[*instance.InstanceId] = Ec2Info{
309 | PublicIP: public_ip,
310 | PrivateIP: private_ip,
311 | AMI: ami,
312 | ComputeEnvironmentARN: compute_environment_arn,
313 | ECSClusterARN: ecs_cluster_arn,
314 | AvailabilityZone: az,
315 | SpotInstanceRequestID: sir,
316 | InstanceType: instance_type,
317 | LaunchedAt: launched_at,
318 | }
319 | }
320 | }
321 | }
322 |
323 | err1 := fs.UpdateTaskArnsInstanceIDs(ec2instances_info, task_ec2_mapping)
324 | err2 := fs.UpdateECSInstances(ec2instances_info, tasks_per_ec2instance)
325 |
326 | if err1 != nil {
327 | return err1
328 | }
329 | return err2
330 | }
331 |
--------------------------------------------------------------------------------
/frontend/src/pages/JobsPage/JobsPage.jsx:
--------------------------------------------------------------------------------
1 | import React, { PropTypes } from 'react';
2 | import { connect } from 'react-redux';
3 | import classNames from 'classnames';
4 | import ReactDataGrid from 'react-data-grid';
5 | import {
6 | fetchJobsPage,
7 | killJobs,
8 | setSelectedIds,
9 | setParams,
10 | syncJobQueues,
11 | updateJobsQueryParams,
12 | QUERY_PARAM_DEFAULTS
13 | } from 'stores/job';
14 | import { JOBS } from 'stores/status';
15 | import CommandLineFormatter from 'components/CommandLineFormatter/CommandLineFormatter';
16 | import DateTimeFormatter from 'components/DateTimeFormatter/DateTimeFormatter';
17 | import StatusFormatter from 'components/StatusFormatter/StatusFormatter';
18 | import JobLinkFormatter from 'components/JobLinkFormatter/JobLinkFormatter';
19 | import NameFormatter from 'components/NameFormatter/NameFormatter';
20 | import ImageFormatter from 'components/ImageFormatter/ImageFormatter';
21 | import DurationFormatter from 'components/DurationFormatter/DurationFormatter';
22 | import RowRenderer from 'components/RowRenderer/RowRenderer';
23 | import QueueSelector from 'components/QueueSelector/QueueSelector';
24 | import StatusSelector from 'components/StatusSelector/StatusSelector';
25 | import './JobsPage.scss';
26 | import 'react-select/dist/react-select.css';
27 |
28 | const AUTO_REFRESH_TIMEOUT = 5000; // ms
29 |
30 | const COLUMNS = [
31 | {
32 | key: 'id',
33 | name: 'ID',
34 | resizable: false,
35 | sortable: true,
36 | width: 95,
37 | formatter: JobLinkFormatter
38 | },
39 | {
40 | key: 'status',
41 | name: 'Status',
42 | resizable: false,
43 | sortable: true,
44 | width: 120,
45 | formatter: StatusFormatter
46 | },
47 | {
48 | key: 'name',
49 | name: 'Name',
50 | resizable: true,
51 | sortable: true,
52 | width: 310,
53 | getRowMetaData: (job) => job,
54 | formatter: NameFormatter
55 | },
56 | {
57 | key: 'image',
58 | name: 'Image',
59 | resizable: true,
60 | width: 270,
61 | formatter: ImageFormatter
62 | },
63 | {
64 | key: 'runtime',
65 | name: 'Runtime',
66 | resizable: true,
67 | width: 140,
68 | formatter: DurationFormatter
69 | },
70 | {
71 | key: 'total_elapsed_time',
72 | name: 'Total elapsed',
73 | resizable: true,
74 | width: 140,
75 | formatter: DurationFormatter
76 | },
77 | {
78 | key: 'stopped_at',
79 | name: 'Stopped At',
80 | resizable: true,
81 | sortable: true,
82 | width: 280,
83 | formatter: DateTimeFormatter
84 | },
85 | {
86 | key: 'job_queue',
87 | name: 'Queue',
88 | resizable: true,
89 | width: 270
90 | },
91 | {
92 | key: 'last_updated',
93 | name: 'Last Updated',
94 | resizable: true,
95 | sortable: true,
96 | width: 280,
97 | formatter: DateTimeFormatter
98 | },
99 | {
100 | key: 'vcpus',
101 | name: 'CPUs',
102 | width: 80
103 | },
104 | {
105 | key: 'memory',
106 | name: 'Memory',
107 | width: 80
108 | },
109 | {
110 | key: 'command_line',
111 | name: 'Command Line',
112 | width: 800,
113 | resizable: true,
114 | formatter: CommandLineFormatter
115 | }
116 | ];
117 |
118 | const MIN_WIDTH = COLUMNS.reduce((memo, column) => memo + column.width, 0);
119 |
120 | const PAGE_SIZE = 100;
121 |
122 | class JobsPage extends React.Component {
123 | static propTypes = {
124 | fetchJobsPage: PropTypes.func.isRequired,
125 | height: PropTypes.number.isRequired,
126 | jobs: PropTypes.array.isRequired,
127 | killJobs: PropTypes.func.isRequired,
128 | q: PropTypes.string,
129 | dateRange: PropTypes.string,
130 | routing: PropTypes.object.isRequired,
131 | selectedIds: PropTypes.array.isRequired,
132 | setParams: PropTypes.func.isRequired,
133 | setSelectedIds: PropTypes.func.isRequired,
134 | sortColumn: PropTypes.string,
135 | sortDirection: PropTypes.string,
136 | status: PropTypes.object.isRequired,
137 | syncJobQueues: PropTypes.func.isRequired,
138 | updateJobsQueryParams: PropTypes.func.isRequired,
139 | };
140 |
141 | constructor(props) {
142 | super(props);
143 | // Using state for autoRefresh so it resets to false on navigation
144 | this.state = {
145 | autoRefresh: false
146 | };
147 | }
148 |
149 | componentDidMount() {
150 | this.loadStateFromQueryParams();
151 | this.props.syncJobQueues();
152 | this.props.fetchJobsPage();
153 | }
154 |
155 | componentDidUpdate(prevProps) {
156 | if (this.props.q !== prevProps.q ||
157 | this.props.dateRange !== prevProps.dateRange ||
158 | this.props.sortColumn !== prevProps.sortColumn ||
159 | this.props.sortDirection !== prevProps.sortDirection ||
160 | this.props.page !== prevProps.page ||
161 | this.props.selectedStatus !== prevProps.selectedStatus ||
162 | this.props.selectedQueue !== prevProps.selectedQueue) {
163 | this.props.updateJobsQueryParams();
164 | this.props.fetchJobsPage();
165 | }
166 | }
167 |
168 | componentWillUnmount() {
169 | const autoRefresh = false;
170 | this.state.autoRefresh = autoRefresh;
171 | this.setState({ autoRefresh });
172 | }
173 |
174 | render() {
175 | const {
176 | jobs,
177 | height,
178 | queues,
179 | status
180 | } = this.props;
181 |
182 | if (!status.loading && status.error) {
183 | return (
184 |