├── .env.template
├── .gitignore
├── README.md
├── cassandra-reaper
└── cassandra-reaper.env
├── cassandra
├── Dockerfile
├── cassandra.env
├── config
│ ├── collectd.cassandra.conf
│ ├── filebeat.yml
│ ├── graphite.cassandra.yaml
│ ├── jmxremote.access
│ ├── jmxremote.password
│ └── prometheus.yml
├── docker-entrypoint.sh
├── lib
│ ├── jmx_prometheus_javaagent-0.9.jar
│ ├── metrics-core-3.1.2.jar
│ ├── metrics-graphite-3.1.2.jar
│ ├── reporter-config-base-3.0.3.jar
│ └── reporter-config3-3.0.3.jar
└── schema.cql
├── docker-compose.yml
├── grafana
├── bin
│ ├── create-data-sources.sh
│ └── upload-dashboards.sh
├── dashboards
│ ├── tlp-cassandra-alerts.final.json
│ ├── tlp-cassandra-big-picture.final.json
│ ├── tlp-cassandra-client-connections.final.json
│ ├── tlp-cassandra-overview.final.json
│ ├── tlp-cassandra-read-path.final.json
│ ├── tlp-cassandra-reaper.final.json
│ └── tlp-cassandra-write-path.final.json
└── grafana.env
├── logspout
├── Dockerfile
├── build.sh
├── logspout.env
└── modules.go
├── pickle-factory
├── Dockerfile
├── docker-entrypoint.sh
├── factory.py
└── requirements.txt
├── pickle-shop
├── Dockerfile
├── docker-entrypoint.sh
├── requirements.txt
└── shop.py
├── pickle.env
└── prometheus
└── config
└── prometheus.yml
/.env.template:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PAPERTRAIL_PORT=55555
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .DS_Store
3 |
4 | data/
5 | .env
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Blog Post
2 |
3 | A new blog post covering each of the main components of this project can be found here:
4 |
5 | http://thelastpickle.com/blog/2018/01/23/docker-meet-cassandra.html
6 |
7 | # Pre-Meetup Setup
8 |
9 | ```bash
10 | git clone git@github.com:thelastpickle/docker-cassandra-bootstrap.git
11 | cd docker-cassandra-bootstrap
12 | cp .env.template .env
13 | docker-compose build
14 | ```
15 |
16 | If you would like to see a hosted log service interact seemlessly with this
17 | Docker Compose stack, sign up for [Papertrail](https://papertrailapp.com/?thank=1ad15b).
18 |
19 | Then find your specific port number by looking at your
20 | [Log Destinations](https://papertrailapp.com/account/destinations) and update
21 | your `.env` setting accordingly.
22 |
23 | # Starting From Scratch
24 |
25 | ```bash
26 | # turn off all running Docker containers
27 | docker-compose down
28 |
29 | # delete any persistent data
30 | rm -rf data/
31 |
32 | # rebuild the images
33 | docker-compose build
34 | ```
35 |
36 |
37 | # Meetup Workflow
38 |
39 | Start our Docker-integrated logging connector:
40 |
41 | ```bash
42 | # start Docker logging connector
43 | docker-compose up logspout
44 |
45 | # view logging HTTP endpoint
46 | curl http://localhost:8000/logs
47 | ```
48 |
49 | Start Cassandra and setup the required schema:
50 |
51 | ```bash
52 | # start Cassandra
53 | docker-compose up cassandra
54 |
55 | # view cluster status
56 | docker-compose run nodetool status
57 |
58 | # create schema
59 | docker-compose run cqlsh -f /schema.cql
60 |
61 | # confirm schema
62 | docker-compose run cqlsh -e "DESCRIBE SCHEMA;"
63 | ```
64 |
65 | Start Reaper for Apache Cassandra and monitor your new cluster:
66 |
67 | ```bash
68 | # start Reaper for Apache Cassandra
69 | docker-compose up cassandra-reaper
70 |
71 | open http://localhost:8080/webui/
72 |
73 | # add one-off repair
74 |
75 | # add scheduled repair
76 | ```
77 |
78 | Start Prometheus and become familiar with the UI:
79 |
80 | ```bash
81 | # start Prometheus
82 | docker-compose up prometheus
83 |
84 | open http://localhost:9090
85 | ```
86 |
87 | Start Grafana, connect it to the Prometheus data source, and upload the TLP
88 | Dashboards.
89 |
90 | ```bash
91 | # start Grafana
92 | docker-compose up grafana
93 |
94 | # create
95 | ./grafana/bin/create-data-sources.sh
96 |
97 | # user/pass: admin/admin
98 | open http://localhost:3000
99 |
100 | # upload dashboards
101 | ./grafana/bin/upload-dashboards.sh
102 | ```
103 |
104 | # Sample Application
105 |
106 | Generate fake workforce and activity:
107 |
108 | ```bash
109 | docker-compose run pickle-factory
110 | ```
111 |
112 | Sample timesheets:
113 |
114 | ```bash
115 | docker-compose run pickle-shop
116 | ```
117 |
--------------------------------------------------------------------------------
/cassandra-reaper/cassandra-reaper.env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | REAPER_CASS_KEYSPACE=reaper_db
4 | REAPER_STORAGE_TYPE=cassandra
5 | REAPER_CASS_CLUSTER_NAME=pickle-db
6 | REAPER_CASS_CONTACT_POINTS=[cassandra]
7 |
8 | # use the credentials that match the ./cassandra/config/jmxremote.* configurations
9 | REAPER_JMX_USERNAME=reaperUser
10 | REAPER_JMX_PASSWORD=reaperPass
11 |
--------------------------------------------------------------------------------
/cassandra/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM cassandra:3.11
2 |
3 | # install wget, for the custom metrics-graphite reporter
4 | #RUN set -x \
5 | # && apt-get update \
6 | # && apt-get install -y --no-install-recommends \
7 | # wget \
8 | # && rm -rf /var/lib/apt/lists/*
9 |
10 | # install the custom metrics-graphite reporter, to allow measurement filtering
11 | #RUN echo "JVM_OPTS=\"\$JVM_OPTS -Dcassandra.metricsReporterConfigFile=graphite.yaml\"" >> /etc/cassandra/cassandra-env.sh
12 | #RUN wget -P /usr/share/cassandra/lib/ \
13 | # http://central.maven.org/maven2/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar
14 | #RUN rm /usr/share/cassandra/lib/metrics-core-3.1.0.jar \
15 | # /usr/share/cassandra/lib/reporter-config-base-3.0.0.jar \
16 | # /usr/share/cassandra/lib/reporter-config3-3.0.0.jar
17 | #COPY lib/metrics-core-3.1.2.jar \
18 | # lib/metrics-graphite-3.1.2.jar \
19 | # lib/reporter-config-base-3.0.3.jar \
20 | # lib/reporter-config3-3.0.3.jar \
21 | # /usr/share/cassandra/lib/
22 |
23 | # install Java 8, which is required for the custom metrics reporter used above
24 | #RUN set -x \
25 | # && echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main" \
26 | # | tee /etc/apt/sources.list.d/webupd8team-java.list \
27 | # && echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main" \
28 | # | tee -a /etc/apt/sources.list.d/webupd8team-java.list \
29 | # && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 \
30 | # && apt-get update \
31 | # && echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true \
32 | # | /usr/bin/debconf-set-selections \
33 | # && apt-get install -y \
34 | # oracle-java8-installer \
35 | # oracle-java8-set-default
36 |
37 | # install filebeat for Logstash ingestion
38 | #ENV ELK_VERSION 5.3.0
39 | #ENV DOWNLOAD_URL https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-${ELK_VERSION}-amd64.deb
40 | #RUN set -x \
41 | # && apt-get update \
42 | # && apt-get install -y --no-install-recommends \
43 | # curl \
44 | # && rm -rf /var/lib/apt/lists/* \
45 | # && curl -L -O ${DOWNLOAD_URL} \
46 | # && dpkg -i filebeat-${ELK_VERSION}-amd64.deb \
47 | # && rm filebeat-${ELK_VERSION}-amd64.deb \
48 | # && update-rc.d filebeat defaults 95 10 \
49 | # && echo "/etc/init.d/filebeat start" \
50 | # >> /etc/cassandra/cassandra-env.sh \
51 | # && apt-get purge -y --auto-remove \
52 | # curl
53 | #RUN mkdir \
54 | # /var/lib/filebeat \
55 | # /var/log/filebeat \
56 | # && touch /var/run/filebeat.pid \
57 | # && chown cassandra:cassandra \
58 | # /var/lib/filebeat \
59 | # /var/log/filebeat \
60 | # /var/run/filebeat.pid
61 |
62 | # install collectd
63 | # NOTE: jessie packages are now being included since librrd4 and
64 | # libmicrohttpd10 were missing from the stretch repos
65 | RUN set -x \
66 | && echo "deb http://pkg.ci.collectd.org/deb jessie collectd-5.7" \
67 | > /etc/apt/sources.list.d/pkg.ci.collectd.org.list \
68 | && gpg --keyserver hkp://pgp.mit.edu:80 --recv-keys 3994D24FB8543576 \
69 | && gpg --export -a 3994D24FB8543576 | apt-key add - \
70 | && apt-get update \
71 | && apt-get install -y --no-install-recommends \
72 | collectd=5.7.1-1.1 \
73 | collectd-utils \
74 | libprotobuf-c-dev \
75 | libmicrohttpd-dev \
76 | && echo "deb http://deb.debian.org/debian jessie main" \
77 | >> /etc/apt/sources.list.d/pkg.ci.collectd.org.list \
78 | && apt-get update \
79 | && apt-get install -y \
80 | librrd4 \
81 | libmicrohttpd10 \
82 | && rm -rf /var/lib/apt/lists/*
83 | RUN touch /var/log/collectd.log \
84 | && chown cassandra:cassandra /var/log/collectd.log
85 |
86 | # install Prometheus JMX exporter
87 | # NOTE: 0.10 will not work until this issue is resolved:
88 | # https://github.com/prometheus/jmx_exporter/issues/170
89 | ENV JMX_EXPORTER_VERSION 0.9
90 | COPY lib/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar \
91 | /prometheus/
92 | RUN echo 'JVM_OPTS="$JVM_OPTS -javaagent:'/prometheus/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar=7070:/prometheus/prometheus.yml'"' \
93 | | tee -a /etc/cassandra/cassandra-env.sh
94 |
95 | # add JMX authentication files for Reaper access
96 | COPY config/jmxremote.access /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access
97 | COPY config/jmxremote.password /etc/cassandra/jmxremote.password
98 | RUN chown cassandra:cassandra \
99 | /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access \
100 | /etc/cassandra/jmxremote.password \
101 | && chmod 600 \
102 | /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access \
103 | /etc/cassandra/jmxremote.password
104 |
105 | # overwrite the base docker-entrypoint.sh with modified one, for filebeat perms
106 | COPY docker-entrypoint.sh /docker-entrypoint.sh
107 |
108 | # does not work for some reason
109 | #RUN echo "exec service collectd start &" \
110 | # >> /etc/cassandra/cassandra-env.sh
111 |
--------------------------------------------------------------------------------
/cassandra/cassandra.env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # define heap size for local development
4 | MAX_HEAP_SIZE=500M
5 | HEAP_NEWSIZE=100M
6 |
7 | # define cluster topology
8 | CASSANDRA_CLUSTER_NAME=pickle-db
9 | CASSANDRA_DC=pickle-east
10 | CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
11 |
12 | # define gossip entrypoints
13 | CASSANDRA_SEEDS=cassandra,cassandra2,cassandra3
14 |
15 | # open JMX port for access by Reaper
16 | # WARNING: this is unsafe in production without proper firewall settings
17 | LOCAL_JMX=no
18 |
--------------------------------------------------------------------------------
/cassandra/config/collectd.cassandra.conf:
--------------------------------------------------------------------------------
1 | LoadPlugin logfile
2 |
3 | LogLevel "debug"
4 | File "/var/log/collectd.log"
5 | Timestamp true
6 |
7 |
8 | LoadPlugin disk
9 | LoadPlugin load
10 | LoadPlugin syslog
11 | LoadPlugin interface
12 | LoadPlugin memory
13 | LoadPlugin rrdtool
14 | LoadPlugin syslog
15 |
16 | Interval 30
17 | Hostname "cassandra"
18 |
19 | LoadPlugin df
20 |
21 | MountPoint "/"
22 | MountPoint "/var"
23 | MountPoint "/var/lib/cassandra"
24 | MountPoint "/var/lib/cassandra/commitlog"
25 | MountPoint "/var/lib/cassandra/data"
26 | MountPoint "/var/lib/cassandra/saved_caches"
27 | MountPoint "/var/log"
28 | MountPoint "/var/log/cassandra"
29 | IgnoreSelected false
30 | ValuesPercentage true
31 |
32 |
33 | LoadPlugin cpu
34 |
35 | ReportByState true
36 | ReportByCpu false
37 | ValuesPercentage true
38 | ReportNumCpu true
39 |
40 |
41 | #LoadPlugin write_graphite
42 | #
43 | #
44 | # Host "graphite"
45 | # Port "2003"
46 | # Prefix "collectd.environment.cassandra.data_center."
47 | # #Protocol "udp"
48 | # EscapeCharacter "_"
49 | # SeparateInstances true
50 | # StoreRates false
51 | # AlwaysAppendDS false
52 | #
53 | #
54 |
55 | LoadPlugin write_prometheus
56 |
57 | Port "9103"
58 |
59 |
--------------------------------------------------------------------------------
/cassandra/config/filebeat.yml:
--------------------------------------------------------------------------------
1 | output:
2 | logstash:
3 | enabled: true
4 | hosts:
5 | - logstash:5044
6 | filebeat:
7 | prospectors:
8 | - input_type: log
9 | paths:
10 | - "/var/log/cassandra/system.log*"
11 | document_type: cassandra_system_logs
12 | exclude_files: ['\.zip$']
13 | multiline.pattern: '^TRACE|DEBUG|WARN|INFO|ERROR'
14 | multiline.negate: true
15 | multiline.match: after
16 | - input_type: log
17 | paths:
18 | - "/var/log/cassandra/gc.log.*.current"
19 | document_type: cassandra_gc_logs
20 | exclude_files: ['\.zip$']
21 | multiline.pattern: '^TRACE|DEBUG|WARN|INFO|ERROR'
22 | multiline.negate: true
23 | multiline.match: after
24 |
--------------------------------------------------------------------------------
/cassandra/config/graphite.cassandra.yaml:
--------------------------------------------------------------------------------
1 | graphite:
2 | -
3 | # udp: true
4 | pickled: true
5 | period: 1
6 | timeunit: 'SECONDS'
7 | prefix: 'cassandra.environment.data_center.cassandra-3_0'
8 | hosts:
9 | - host: 'graphite'
10 | port: 2004 # port: 2003, for non-pickled use
11 | predicate:
12 | color: "white"
13 | useQualifiedName: true
14 | patterns:
15 | - "^jvm.gc.*"
16 | - "^jvm.memory.*"
17 | - "^org.apache.cassandra.metrics.Client.*"
18 | - "^org.apache.cassandra.metrics.ClientRequest.ConditionNotMet.*"
19 | - "^org.apache.cassandra.metrics.ClientRequest.ContentionHistogram.*"
20 | - "^org.apache.cassandra.metrics.ClientRequest.Latency.*"
21 | - "^org.apache.cassandra.metrics.ClientRequest.Timeouts.*"
22 | - "^org.apache.cassandra.metrics.ClientRequest.Unavailables.*"
23 | - "^org.apache.cassandra.metrics.ClientRequest.UnfinishedCommit.*"
24 | - "^org.apache.cassandra.metrics.ClientRequestMetrics.*"
25 | - "^org.apache.cassandra.metrics.Table.AllMemtablesHeapSize.*"
26 | - "^org.apache.cassandra.metrics.Table.AllMemtablesLiveDataSize.*"
27 | - "^org.apache.cassandra.metrics.Table.AllMemtablesOffHeapSize.*"
28 | - "^org.apache.cassandra.metrics.Table.AllMemtablesOnHeapSize.*"
29 | - "^org.apache.cassandra.metrics.Table.AnticompactionTime.*"
30 | - "^org.apache.cassandra.metrics.Table.BloomFilterDiskSpaceUsed.*"
31 | - "^org.apache.cassandra.metrics.Table.BloomFilterFalseRatio.*"
32 | - "^org.apache.cassandra.metrics.Table.BloomFilterOffHeapMemoryUsed.*"
33 | - "^org.apache.cassandra.metrics.Table.CasCommitLatency.*"
34 | - "^org.apache.cassandra.metrics.Table.CasPrepareLatency.*"
35 | - "^org.apache.cassandra.metrics.Table.CasProposeLatency.*"
36 | - "^org.apache.cassandra.metrics.Table.CompressionMetadataOffHeapMemoryUsed.*"
37 | - "^org.apache.cassandra.metrics.Table.CompressionRatio.*"
38 | - "^org.apache.cassandra.metrics.Table.CoordinatorReadLatency.*"
39 | - "^org.apache.cassandra.metrics.Table.CoordinatorScanLatency.*"
40 | - "^org.apache.cassandra.metrics.Table.EstimatedColumnCountHistogram.*"
41 | - "^org.apache.cassandra.metrics.Table.EstimatedPartitionCount.*"
42 | - "^org.apache.cassandra.metrics.Table.EstimatedPartitionSizeHistogram.*"
43 | - "^org.apache.cassandra.metrics.Table.IndexSummaryOffHeapMemoryUsed.*"
44 | - "^org.apache.cassandra.metrics.Table.KeyCacheHitRate.*"
45 | - "^org.apache.cassandra.metrics.Table.LiveDiskSpaceUsed.*"
46 | - "^org.apache.cassandra.metrics.Table.LiveScannedHistogram.*"
47 | - "^org.apache.cassandra.metrics.Table.LiveSSTableCount.*"
48 | - "^org.apache.cassandra.metrics.Table.MaxPartitionSize.*"
49 | - "^org.apache.cassandra.metrics.Table.MeanPartitionSize.*"
50 | - "^org.apache.cassandra.metrics.Table.MemtableColumnsCount.*"
51 | - "^org.apache.cassandra.metrics.Table.MemtableLiveDataSize.*"
52 | - "^org.apache.cassandra.metrics.Table.MemtableOffHeapSize.*"
53 | - "^org.apache.cassandra.metrics.Table.MemtableOnHeapSize.*"
54 | - "^org.apache.cassandra.metrics.Table.PercentRepaired.*"
55 | - "^org.apache.cassandra.metrics.Table.RangeLatency.*"
56 | - "^org.apache.cassandra.metrics.Table.ReadLatency.*"
57 | - "^org.apache.cassandra.metrics.Table.RowCacheHit.*"
58 | - "^org.apache.cassandra.metrics.Table.RowCacheMiss.*"
59 | - "^org.apache.cassandra.metrics.Table.SSTablesPerReadHistogram.*"
60 | - "^org.apache.cassandra.metrics.Table.SyncTime.*"
61 | - "^org.apache.cassandra.metrics.Table.TombstoneScannedHistogram.*"
62 | - "^org.apache.cassandra.metrics.Table.TotalDiskSpaceUsed.*"
63 | - "^org.apache.cassandra.metrics.Table.TrueSnapshotsSize.*"
64 | - "^org.apache.cassandra.metrics.Table.ValidationTime.*"
65 | - "^org.apache.cassandra.metrics.Table.WriteLatency.*"
66 | - "^org.apache.cassandra.metrics.Table.*.PendingCompactions.*"
67 | - "^org.apache.cassandra.metrics.CommitLog.ActiveTasks.*"
68 | - "^org.apache.cassandra.metrics.CommitLog.CurrentlyBlockedTask.*"
69 | - "^org.apache.cassandra.metrics.CommitLog.PendingTasks.*"
70 | - "^org.apache.cassandra.metrics.CommitLog.TotalCommitLogSize.*"
71 | - "^org.apache.cassandra.metrics.Compaction.BytesCompacted.*"
72 | - "^org.apache.cassandra.metrics.Compaction.PendingTasks.*"
73 | - "^org.apache.cassandra.metrics.Compaction.TotalCompactionsCompleted.*"
74 | - "^org.apache.cassandra.metrics.Connection.TotalTimeouts.*"
75 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsCount.*"
76 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsEvicted.*"
77 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsRatio.*"
78 | - "^org.apache.cassandra.metrics.CQL.RegularStatementsExecuted.*"
79 | - "^org.apache.cassandra.metrics.DroppedMessage.Dropped.*"
80 | - "^org.apache.cassandra.metrics.HintedHandOffManager.*"
81 | - "^org.apache.cassandra.metrics.ReadRepair.*"
82 | - "^org.apache.cassandra.metrics.Storage.Exceptions.*"
83 | - "^org.apache.cassandra.metrics.Storage.Load.*"
84 | - "^org.apache.cassandra.metrics.Storage.TotalHints.*"
85 | - "^org.apache.cassandra.metrics.Storage.TotalHintsInProgress.*"
86 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.AntiEntropyStage.*"
87 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.AntiEntropyStage.*"
88 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.internal.CompactionExecutor.*"
89 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.CompactionExecutor.*"
90 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.CompactionExecutor.*"
91 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.internal.GossipStage.*"
92 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.HintsDispatcher.*"
93 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.HintsDispatcher.*"
94 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.internal.MemtableFlushWriter.*"
95 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.MemtableFlushWriter.*"
96 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.MemtableFlushWriter.*"
97 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.internal.MigrationStage.*"
98 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.MigrationStage.*"
99 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.MigrationStage.*"
100 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.ValidationExecutor.*"
101 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.ValidationExecutor.*"
102 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.request.MutationStage.*"
103 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.MutationStage.*"
104 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.MutationStage.*"
105 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReadRepairStage.*"
106 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReadRepairStage.*"
107 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReadStage.*"
108 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReadStage.*"
109 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReplicateOnWriteStage.*"
110 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReplicateOnWriteStage.*"
111 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.request.RequestResponseStage.*"
112 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.request.RequestResponseStage.*"
113 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.RequestResponseStage.*"
114 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.RequestResponseStage.*"
115 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.transport.Native-Transport-Requests.*"
116 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.transport.Native-Transport-Requests.*"
117 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.transport.Native-Transport-Requests.*"
118 | histogram:
119 | color: "white"
120 | useQualifiedName: true
121 | patterns:
122 | - metric: ".*"
123 | measure: "p75|p95|max"
124 | timer:
125 | color: "white"
126 | useQualifiedName: true
127 | patterns:
128 | - metric: ".*"
129 | measure: "p75|p95|max"
130 | - metric: "^org.apache.cassandra.metrics.ClientRequest.Latency.*"
131 | measure: "m1_rate|p99"
132 | - metric: "^org.apache.cassandra.metrics.Table.*Latency.*"
133 | measure: "m1_rate"
134 | meter:
135 | color: "white"
136 | useQualifiedName: true
137 | patterns:
138 | - metric: "^org.apache.cassandra.metrics.ClientRequest.*"
139 | measure: "m1_rate"
140 | - metric: "^org.apache.cassandra.metrics.Compaction.TotalCompactionsCompleted.*"
141 | measure: "m1_rate"
142 | - metric: "^org.apache.cassandra.metrics.DroppedMessage.Dropped.*"
143 | measure: "m1_rate"
144 |
--------------------------------------------------------------------------------
/cassandra/config/jmxremote.access:
--------------------------------------------------------------------------------
1 | cassandraUser readwrite
2 | reaperUser readwrite
3 |
4 |
--------------------------------------------------------------------------------
/cassandra/config/jmxremote.password:
--------------------------------------------------------------------------------
1 | cassandraUser cassandraPass
2 | reaperUser reaperPass
3 |
4 |
--------------------------------------------------------------------------------
/cassandra/config/prometheus.yml:
--------------------------------------------------------------------------------
1 | ---
2 | lowercaseOutputLabelNames: false
3 | lowercaseOutputName: false
4 | blacklistObjectNames:
5 | - "org.apache.cassandra.metrics:type=*,keyspace=system*,*"
6 |
7 | rules:
8 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)'
9 | name: org.apache.cassandra.metrics.Client.$1
10 |
11 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max|OneMinuteRate)'
12 | name: org.apache.cassandra.metrics.ClientRequest.$2
13 | labels:
14 | scope: $1
15 | unit: $3
16 |
17 | - pattern: 'org.apache.cassandra.metrics<>(99thPercentile)'
18 | name: org.apache.cassandra.metrics.ClientRequest.$2
19 | labels:
20 | scope: $1
21 | unit: $3
22 |
23 | # Blacklisted: TrueSnapshotsSize, EstimatedPartitionCount
24 | # Enabling these metrics will cause excessive system load since Cassandra has to hit disk
25 | # to populate both of these metrics
26 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max|OneMinuteRate)'
27 | # name: org.apache.cassandra.metrics.Table.$4.$2.$3.$5
28 | name: org.apache.cassandra.metrics.Table.$4
29 | labels:
30 | keyspace: $2
31 | table: $3
32 | unit: $5
33 |
34 | # Missing: ActiveTasks|CurrentlyBlockedTask
35 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)'
36 | name: org.apache.cassandra.metrics.CommitLog.$1
37 | labels:
38 | unit: $2
39 |
40 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|OneMinuteRate)'
41 | name: org.apache.cassandra.metrics.Compaction.$1
42 | labels:
43 | unit: $2
44 |
45 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)'
46 | name: org.apache.cassandra.metrics.CQL.$1
47 | labels:
48 | unit: $2
49 |
50 | - pattern: 'org.apache.cassandra.metrics<>(OneMinuteRate)'
51 | name: org.apache.cassandra.metrics.DroppedMessages.Dropped
52 | labels:
53 | message_type: $1
54 | unit: $2
55 |
56 | # HintedHandOffManager introduced in 3.0:
57 | #- pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)'
58 | # name: org.apache.cassandra.metrics.HintedHandOffManager.$1
59 | # labels:
60 | # unit: $2
61 |
62 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)'
63 | name: org.apache.cassandra.metrics.Storage.$1
64 | labels:
65 | unit: $2
66 |
67 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)'
68 | name: org.apache.cassandra.metrics.ThreadPools
69 | labels:
70 | status_pool: $3
71 | thread_type: $1
72 | thread_pool: $2
73 |
74 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)'
75 | name: org.apache.cassandra.metrics.ThreadPools
76 | labels:
77 | status_pool: $3
78 | thread_type: $1
79 | thread_pool: $2
80 |
81 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)'
82 | name: org.apache.cassandra.metrics.ThreadPools
83 | labels:
84 | status_pool: $3
85 | thread_type: $1
86 | thread_pool: $2
87 |
88 | # jvm.fd introduced in 3.0:
89 | #- pattern: 'java.lang<>(\w*)'
90 | # name: jvm.fd.$1.$2
91 |
92 | - pattern: 'java.lang<>(\w*)'
93 | name: jvm.gc.$1
94 | labels:
95 | unit: $2
96 |
97 | - pattern: 'java.lang<>(\w*)'
98 | name: jvm.memory.pools.$1
99 | labels:
100 | unit: $2
101 |
--------------------------------------------------------------------------------
/cassandra/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # copied from:
4 | # https://github.com/docker-library/cassandra/blob/b77e932d6935318f599026cd1ccf0a2697b3224a/3.0/docker-entrypoint.sh
5 |
6 | set -e
7 |
8 | # modifications here:
9 | # must be owned by the current user or root
10 | # chown cassandra /etc/filebeat/filebeat.yml
11 | service collectd start
12 | # modifications end
13 |
14 | # first arg is `-f` or `--some-option`
15 | if [ "${1:0:1}" = '-' ]; then
16 | set -- cassandra -f "$@"
17 | fi
18 |
19 | # allow the container to be started with `--user`
20 | if [ "$1" = 'cassandra' -a "$(id -u)" = '0' ]; then
21 | chown -R cassandra /var/lib/cassandra /var/log/cassandra "$CASSANDRA_CONFIG"
22 | exec gosu cassandra "$BASH_SOURCE" "$@"
23 | fi
24 |
25 | if [ "$1" = 'cassandra' ]; then
26 | : ${CASSANDRA_RPC_ADDRESS='0.0.0.0'}
27 |
28 | : ${CASSANDRA_LISTEN_ADDRESS='auto'}
29 | if [ "$CASSANDRA_LISTEN_ADDRESS" = 'auto' ]; then
30 | CASSANDRA_LISTEN_ADDRESS="$(hostname --ip-address)"
31 | fi
32 |
33 | : ${CASSANDRA_BROADCAST_ADDRESS="$CASSANDRA_LISTEN_ADDRESS"}
34 |
35 | if [ "$CASSANDRA_BROADCAST_ADDRESS" = 'auto' ]; then
36 | CASSANDRA_BROADCAST_ADDRESS="$(hostname --ip-address)"
37 | fi
38 | : ${CASSANDRA_BROADCAST_RPC_ADDRESS:=$CASSANDRA_BROADCAST_ADDRESS}
39 |
40 | if [ -n "${CASSANDRA_NAME:+1}" ]; then
41 | : ${CASSANDRA_SEEDS:="cassandra"}
42 | fi
43 | : ${CASSANDRA_SEEDS:="$CASSANDRA_BROADCAST_ADDRESS"}
44 |
45 | sed -ri 's/(- seeds:).*/\1 "'"$CASSANDRA_SEEDS"'"/' "$CASSANDRA_CONFIG/cassandra.yaml"
46 |
47 | for yaml in \
48 | broadcast_address \
49 | broadcast_rpc_address \
50 | cluster_name \
51 | endpoint_snitch \
52 | listen_address \
53 | num_tokens \
54 | rpc_address \
55 | start_rpc \
56 | ; do
57 | var="CASSANDRA_${yaml^^}"
58 | val="${!var}"
59 | if [ "$val" ]; then
60 | sed -ri 's/^(# )?('"$yaml"':).*/\2 '"$val"'/' "$CASSANDRA_CONFIG/cassandra.yaml"
61 | fi
62 | done
63 |
64 | for rackdc in dc rack; do
65 | var="CASSANDRA_${rackdc^^}"
66 | val="${!var}"
67 | if [ "$val" ]; then
68 | sed -ri 's/^('"$rackdc"'=).*/\1 '"$val"'/' "$CASSANDRA_CONFIG/cassandra-rackdc.properties"
69 | fi
70 | done
71 | fi
72 |
73 | exec "$@"
74 |
--------------------------------------------------------------------------------
/cassandra/lib/jmx_prometheus_javaagent-0.9.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/jmx_prometheus_javaagent-0.9.jar
--------------------------------------------------------------------------------
/cassandra/lib/metrics-core-3.1.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/metrics-core-3.1.2.jar
--------------------------------------------------------------------------------
/cassandra/lib/metrics-graphite-3.1.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/metrics-graphite-3.1.2.jar
--------------------------------------------------------------------------------
/cassandra/lib/reporter-config-base-3.0.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/reporter-config-base-3.0.3.jar
--------------------------------------------------------------------------------
/cassandra/lib/reporter-config3-3.0.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/reporter-config3-3.0.3.jar
--------------------------------------------------------------------------------
/cassandra/schema.cql:
--------------------------------------------------------------------------------
1 | CREATE KEYSPACE IF NOT EXISTS pickle
2 | WITH replication = {'class': 'NetworkTopologyStrategy', 'pickle-east': 1 };
3 |
4 | CREATE TABLE IF NOT EXISTS pickle.employees (
5 | employee_id uuid,
6 | PRIMARY KEY ((employee_id))
7 | ) WITH comment = 'Table with all employee IDs.';
8 |
9 | CREATE TABLE IF NOT EXISTS pickle.timesheets (
10 | employee_id uuid,
11 | pickle_tree_id uuid,
12 | timestamp timeuuid,
13 | pickle_count int,
14 | pickle_avg_size float,
15 | watered boolean,
16 | PRIMARY KEY ((employee_id), pickle_tree_id, timestamp)
17 | ) WITH CLUSTERING ORDER BY (pickle_tree_id ASC, timestamp DESC)
18 | AND comment = 'The most recent employee visits for each pickle tree.';
19 |
20 | CREATE TABLE IF NOT EXISTS pickle.trees (
21 | pickle_tree_id uuid,
22 | timestamp timeuuid,
23 | employee_id uuid,
24 | pickle_count int,
25 | pickle_avg_size float,
26 | watered boolean,
27 | PRIMARY KEY ((pickle_tree_id), timestamp)
28 | ) WITH CLUSTERING ORDER BY (timestamp DESC)
29 | AND comment = 'The most recent history for each pickle tree';
30 |
31 | CREATE TABLE IF NOT EXISTS pickle.production (
32 | pickle_count int,
33 | pickle_tree_id uuid,
34 | timestamp timeuuid,
35 | PRIMARY KEY ((pickle_count), pickle_tree_id, timestamp)
36 | ) WITH CLUSTERING ORDER BY (pickle_tree_id ASC, timestamp ASC)
37 | AND comment = 'How often a pickle tree produces the same number of pickles.';
38 |
39 |
40 | CREATE KEYSPACE IF NOT EXISTS reaper_db
41 | WITH replication = {'class': 'NetworkTopologyStrategy', 'pickle-east': 1 };
42 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '2.1'
2 |
3 | services:
4 | cassandra:
5 | build: cassandra
6 | env_file:
7 | - cassandra/cassandra.env
8 | # ports:
9 | # - "7199:7199" # JMX
10 | # - "7000:7000" # cluster communication
11 | # - "7001:7001" # cluster communication (SSL)
12 | # - "9042:9042" # native protocol clients
13 | # - "9160:9160" # thrift clients
14 | volumes:
15 | - ./cassandra/config/collectd.cassandra.conf:/etc/collectd/collectd.conf
16 | # - ./cassandra/config/graphite.cassandra.yaml:/etc/cassandra/graphite.yaml
17 | # - ./cassandra/config/filebeat.yml:/etc/filebeat/filebeat.yml
18 | - ./cassandra/config/prometheus.yml:/prometheus/prometheus.yml
19 | - ./data/cassandra:/var/lib/cassandra
20 |
21 | cqlsh:
22 | image: cassandra:3.11
23 | entrypoint: cqlsh cassandra
24 | volumes:
25 | - ./cassandra/schema.cql:/schema.cql
26 |
27 | nodetool:
28 | image: cassandra:3.11
29 | entrypoint: nodetool -h cassandra -u cassandraUser -pw cassandraPass
30 | command: help
31 |
32 | cassandra-reaper:
33 | image: thelastpickle/cassandra-reaper:ab0fff2
34 | env_file:
35 | - cassandra-reaper/cassandra-reaper.env
36 | links:
37 | - cassandra:cassandra
38 | ports:
39 | - "8080:8080"
40 | - "8081:8081"
41 |
42 | grafana:
43 | image: grafana/grafana:4.5.2
44 | env_file:
45 | - grafana/grafana.env
46 | links:
47 | - prometheus:prometheus
48 | ports:
49 | - "3000:3000"
50 | restart: always
51 | volumes:
52 | - ./data/grafana:/var/lib/grafana
53 |
54 | logspout:
55 | build: logspout
56 | # command: syslog+tcp://logs.papertrailapp.com:$PAPERTRAIL_PORT
57 | env_file:
58 | - logspout/logspout.env
59 | restart: always
60 | ports:
61 | - "8000:80"
62 | volumes:
63 | # security concern:
64 | # https://raesene.github.io/blog/2016/03/06/The-Dangers-Of-Docker.sock/
65 | # http://stackoverflow.com/questions/40844197
66 | - /var/run/docker.sock:/var/run/docker.sock:ro
67 |
68 | pickle-factory:
69 | build: pickle-factory
70 | env_file:
71 | - pickle.env
72 | volumes:
73 | - ./pickle-factory:/usr/src/app
74 |
75 | pickle-shop:
76 | build: pickle-shop
77 | env_file:
78 | - pickle.env
79 |
80 | prometheus:
81 | image: prom/prometheus:v1.7.1
82 | links:
83 | - cassandra:cassandra
84 | - cassandra-reaper:cassandra-reaper
85 | ports:
86 | - "9090:9090"
87 | volumes:
88 | - ./data/prometheus:/prometheus
89 | - ./prometheus/config/prometheus.yml:/etc/prometheus/prometheus.yml
90 |
--------------------------------------------------------------------------------
/grafana/bin/create-data-sources.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -ex
4 |
5 | GRAFANA_USER=admin
6 | GRAFANA_PASS=admin
7 | GRAFANA_IP=localhost
8 | GRAFANA_PORT=3000
9 |
10 | while :
11 | do
12 | curl -H 'Content-Type: application/json' \
13 | -X POST http://${GRAFANA_USER}:${GRAFANA_PASS}@${GRAFANA_IP}:${GRAFANA_PORT}/api/datasources \
14 | --data-binary '{
15 | "name":"graphite",
16 | "type":"graphite",
17 | "url":"http://graphite:80",
18 | "access":"proxy",
19 | "basicAuth":true,
20 | "basicAuthUser":"guest",
21 | "basicAuthPassword":"guest"}' \
22 | && echo \
23 | && curl -H 'Content-Type: application/json' \
24 | -X POST http://${GRAFANA_USER}:${GRAFANA_PASS}@${GRAFANA_IP}:${GRAFANA_PORT}/api/datasources \
25 | --data-binary '{
26 | "name":"prometheus",
27 | "type":"prometheus",
28 | "isDefault":true,
29 | "url":"http://prometheus:9090",
30 | "access":"proxy"}' \
31 | && break
32 | sleep 1
33 | done
34 |
--------------------------------------------------------------------------------
/grafana/bin/upload-dashboards.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -ex
4 |
5 | GRAFANA_USER=admin
6 | GRAFANA_PASS=admin
7 | GRAFANA_API_URL=localhost
8 | GRAFANA_API_PORT=3000
9 |
10 | GRAFANA_DASHBOARD_DIR=grafana/dashboards/
11 |
12 | for json_dashboard in `ls -p ${GRAFANA_DASHBOARD_DIR} | grep -v /`
13 | do
14 | cat ${GRAFANA_DASHBOARD_DIR}${json_dashboard} \
15 | | curl \
16 | -u ${GRAFANA_USER}:${GRAFANA_PASS} \
17 | -X POST \
18 | -H "Content-Type: application/json" -H "Accept: application/json" \
19 | -d @- \
20 | ${GRAFANA_API_URL}:${GRAFANA_API_PORT}/api/dashboards/import
21 | done
22 |
--------------------------------------------------------------------------------
/grafana/dashboards/tlp-cassandra-big-picture.final.json:
--------------------------------------------------------------------------------
1 | {
2 | "dashboard": {
3 | "annotations": {
4 | "list": []
5 | },
6 | "editable": true,
7 | "gnetId": null,
8 | "graphTooltip": 0,
9 | "hideControls": false,
10 | "id": null,
11 | "links": [
12 | {
13 | "asDropdown": true,
14 | "icon": "external link",
15 | "includeVars": false,
16 | "keepTime": true,
17 | "tags": [
18 | "tlp",
19 | "cassandra"
20 | ],
21 | "title": "Other TLP Dashboards",
22 | "type": "dashboards"
23 | }
24 | ],
25 | "refresh": "5m",
26 | "rows": [
27 | {
28 | "collapse": false,
29 | "height": "150px",
30 | "panels": [
31 | {
32 | "cacheTimeout": null,
33 | "colorBackground": false,
34 | "colorValue": false,
35 | "colors": [
36 | "rgba(245, 54, 54, 0.9)",
37 | "rgba(237, 129, 40, 0.89)",
38 | "rgba(50, 172, 45, 0.97)"
39 | ],
40 | "datasource": null,
41 | "decimals": 0,
42 | "format": "none",
43 | "gauge": {
44 | "maxValue": 100,
45 | "minValue": 0,
46 | "show": true,
47 | "thresholdLabels": false,
48 | "thresholdMarkers": true
49 | },
50 | "height": "200",
51 | "hideTimeOverride": true,
52 | "id": 24,
53 | "interval": null,
54 | "links": [],
55 | "mappingType": 1,
56 | "mappingTypes": [
57 | {
58 | "name": "value to text",
59 | "value": 1
60 | },
61 | {
62 | "name": "range to text",
63 | "value": 2
64 | }
65 | ],
66 | "maxDataPoints": 100,
67 | "nullPointMode": "connected",
68 | "nullText": null,
69 | "postfix": "%",
70 | "postfixFontSize": "50%",
71 | "prefix": "",
72 | "prefixFontSize": "50%",
73 | "rangeMaps": [
74 | {
75 | "from": "null",
76 | "text": "N/A",
77 | "to": "null"
78 | }
79 | ],
80 | "span": 4,
81 | "sparkline": {
82 | "fillColor": "rgba(31, 118, 189, 0.18)",
83 | "full": false,
84 | "lineColor": "rgb(31, 120, 193)",
85 | "show": false
86 | },
87 | "tableColumn": "",
88 | "targets": [
89 | {
90 | "expr": "sum(collectd_cpu_percent{environment=\"$env\", data_center=\"$dc\", cpu!=\"idle\"}) / sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})",
91 | "format": "time_series",
92 | "legendFormat": "",
93 | "refId": "A",
94 | "target": "divideSeries(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), #B)",
95 | "targetFull": "divideSeries(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count))",
96 | "textEditor": false
97 | },
98 | {
99 | "expr": "sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})",
100 | "format": "time_series",
101 | "hide": true,
102 | "legendFormat": "",
103 | "refId": "B",
104 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count)",
105 | "textEditor": false
106 | }
107 | ],
108 | "thresholds": "",
109 | "timeFrom": null,
110 | "timeShift": "10s",
111 | "title": "Cluster-Wide CPU Usage",
112 | "transparent": true,
113 | "type": "singlestat",
114 | "valueFontSize": "80%",
115 | "valueMaps": [
116 | {
117 | "op": "=",
118 | "text": "N/A",
119 | "value": "null"
120 | }
121 | ],
122 | "valueName": "avg"
123 | },
124 | {
125 | "cacheTimeout": null,
126 | "colorBackground": false,
127 | "colorValue": false,
128 | "colors": [
129 | "rgba(245, 54, 54, 0.9)",
130 | "rgba(237, 129, 40, 0.89)",
131 | "rgba(50, 172, 45, 0.97)"
132 | ],
133 | "datasource": null,
134 | "decimals": 0,
135 | "format": "none",
136 | "gauge": {
137 | "maxValue": 100,
138 | "minValue": 0,
139 | "show": true,
140 | "thresholdLabels": false,
141 | "thresholdMarkers": true
142 | },
143 | "height": "200",
144 | "hideTimeOverride": true,
145 | "id": 27,
146 | "interval": null,
147 | "links": [],
148 | "mappingType": 1,
149 | "mappingTypes": [
150 | {
151 | "name": "value to text",
152 | "value": 1
153 | },
154 | {
155 | "name": "range to text",
156 | "value": 2
157 | }
158 | ],
159 | "maxDataPoints": 100,
160 | "nullPointMode": "connected",
161 | "nullText": null,
162 | "postfix": "%",
163 | "postfixFontSize": "50%",
164 | "prefix": "",
165 | "prefixFontSize": "50%",
166 | "rangeMaps": [
167 | {
168 | "from": "null",
169 | "text": "N/A",
170 | "to": "null"
171 | }
172 | ],
173 | "span": 4,
174 | "sparkline": {
175 | "fillColor": "rgba(31, 118, 189, 0.18)",
176 | "full": false,
177 | "lineColor": "rgb(31, 120, 193)",
178 | "show": false
179 | },
180 | "tableColumn": "",
181 | "targets": [
182 | {
183 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\", memory!=\"free\"}) / sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"}) * 100",
184 | "format": "time_series",
185 | "legendFormat": "",
186 | "refId": "A",
187 | "target": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used}), #B), 100)",
188 | "targetFull": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used}), sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)), 100)",
189 | "textEditor": false
190 | },
191 | {
192 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"})",
193 | "format": "time_series",
194 | "hide": true,
195 | "legendFormat": "",
196 | "refId": "B",
197 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)",
198 | "textEditor": false
199 | }
200 | ],
201 | "thresholds": "",
202 | "timeFrom": null,
203 | "timeShift": "10s",
204 | "title": "Cluster-Wide Memory Usage",
205 | "transparent": true,
206 | "type": "singlestat",
207 | "valueFontSize": "80%",
208 | "valueMaps": [
209 | {
210 | "op": "=",
211 | "text": "N/A",
212 | "value": "null"
213 | }
214 | ],
215 | "valueName": "avg"
216 | },
217 | {
218 | "cacheTimeout": null,
219 | "colorBackground": false,
220 | "colorValue": false,
221 | "colors": [
222 | "rgba(245, 54, 54, 0.9)",
223 | "rgba(237, 129, 40, 0.89)",
224 | "rgba(50, 172, 45, 0.97)"
225 | ],
226 | "datasource": null,
227 | "decimals": 0,
228 | "format": "none",
229 | "gauge": {
230 | "maxValue": 100,
231 | "minValue": 0,
232 | "show": true,
233 | "thresholdLabels": false,
234 | "thresholdMarkers": true
235 | },
236 | "height": "200",
237 | "hideTimeOverride": true,
238 | "id": 31,
239 | "interval": null,
240 | "links": [],
241 | "mappingType": 1,
242 | "mappingTypes": [
243 | {
244 | "name": "value to text",
245 | "value": 1
246 | },
247 | {
248 | "name": "range to text",
249 | "value": 2
250 | }
251 | ],
252 | "maxDataPoints": 100,
253 | "nullPointMode": "connected",
254 | "nullText": null,
255 | "postfix": "%",
256 | "postfixFontSize": "50%",
257 | "prefix": "",
258 | "prefixFontSize": "50%",
259 | "rangeMaps": [
260 | {
261 | "from": "null",
262 | "text": "N/A",
263 | "to": "null"
264 | }
265 | ],
266 | "span": 4,
267 | "sparkline": {
268 | "fillColor": "rgba(31, 118, 189, 0.18)",
269 | "full": false,
270 | "lineColor": "rgb(31, 120, 193)",
271 | "show": false
272 | },
273 | "tableColumn": "",
274 | "targets": [
275 | {
276 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\", type=\"used\"}) / sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"}) * 100",
277 | "format": "time_series",
278 | "legendFormat": "",
279 | "refId": "A",
280 | "target": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used), #B), 100)",
281 | "targetFull": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used), sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)), 100)",
282 | "textEditor": false
283 | },
284 | {
285 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"})",
286 | "format": "time_series",
287 | "hide": true,
288 | "legendFormat": "",
289 | "refId": "B",
290 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)",
291 | "textEditor": false
292 | }
293 | ],
294 | "thresholds": "",
295 | "timeShift": "30s",
296 | "title": "Cluster-Wide Disk Usage",
297 | "transparent": true,
298 | "type": "singlestat",
299 | "valueFontSize": "80%",
300 | "valueMaps": [
301 | {
302 | "op": "=",
303 | "text": "N/A",
304 | "value": "null"
305 | }
306 | ],
307 | "valueName": "avg"
308 | },
309 | {
310 | "cacheTimeout": null,
311 | "colorBackground": false,
312 | "colorValue": false,
313 | "colors": [
314 | "rgba(245, 54, 54, 0.9)",
315 | "rgba(237, 129, 40, 0.89)",
316 | "rgba(50, 172, 45, 0.97)"
317 | ],
318 | "datasource": null,
319 | "decimals": 1,
320 | "format": "none",
321 | "gauge": {
322 | "maxValue": null,
323 | "minValue": 0,
324 | "show": false,
325 | "thresholdLabels": false,
326 | "thresholdMarkers": true
327 | },
328 | "hideTimeOverride": true,
329 | "id": 23,
330 | "interval": null,
331 | "links": [],
332 | "mappingType": 1,
333 | "mappingTypes": [
334 | {
335 | "name": "value to text",
336 | "value": 1
337 | },
338 | {
339 | "name": "range to text",
340 | "value": 2
341 | }
342 | ],
343 | "maxDataPoints": 100,
344 | "nullPointMode": "connected",
345 | "nullText": null,
346 | "postfix": " cores",
347 | "postfixFontSize": "50%",
348 | "prefix": "",
349 | "prefixFontSize": "50%",
350 | "rangeMaps": [
351 | {
352 | "from": "null",
353 | "text": "N/A",
354 | "to": "null"
355 | }
356 | ],
357 | "span": 2,
358 | "sparkline": {
359 | "fillColor": "rgba(31, 118, 189, 0.18)",
360 | "full": false,
361 | "lineColor": "rgb(31, 120, 193)",
362 | "show": true
363 | },
364 | "tableColumn": "",
365 | "targets": [
366 | {
367 | "expr": "sum(collectd_cpu_percent{environment=\"$env\", data_center=\"$dc\", cpu!=\"idle\"}) * 0.01",
368 | "format": "time_series",
369 | "legendFormat": "",
370 | "refId": "A",
371 | "target": "scale(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), 0.01)",
372 | "textEditor": false
373 | }
374 | ],
375 | "thresholds": "",
376 | "timeShift": "10s",
377 | "title": "Cluster-Wide CPU Usage",
378 | "transparent": true,
379 | "type": "singlestat",
380 | "valueFontSize": "80%",
381 | "valueMaps": [
382 | {
383 | "op": "=",
384 | "text": "N/A",
385 | "value": "null"
386 | }
387 | ],
388 | "valueName": "avg"
389 | },
390 | {
391 | "cacheTimeout": null,
392 | "colorBackground": false,
393 | "colorValue": false,
394 | "colors": [
395 | "rgba(245, 54, 54, 0.9)",
396 | "rgba(237, 129, 40, 0.89)",
397 | "rgba(50, 172, 45, 0.97)"
398 | ],
399 | "datasource": null,
400 | "decimals": 0,
401 | "format": "none",
402 | "gauge": {
403 | "maxValue": 100,
404 | "minValue": 0,
405 | "show": false,
406 | "thresholdLabels": false,
407 | "thresholdMarkers": true
408 | },
409 | "id": 22,
410 | "interval": null,
411 | "links": [],
412 | "mappingType": 1,
413 | "mappingTypes": [
414 | {
415 | "name": "value to text",
416 | "value": 1
417 | },
418 | {
419 | "name": "range to text",
420 | "value": 2
421 | }
422 | ],
423 | "maxDataPoints": 100,
424 | "nullPointMode": "connected",
425 | "nullText": null,
426 | "postfix": " cores",
427 | "postfixFontSize": "50%",
428 | "prefix": "",
429 | "prefixFontSize": "50%",
430 | "rangeMaps": [
431 | {
432 | "from": "null",
433 | "text": "N/A",
434 | "to": "null"
435 | }
436 | ],
437 | "span": 2,
438 | "sparkline": {
439 | "fillColor": "rgba(31, 118, 189, 0.18)",
440 | "full": false,
441 | "lineColor": "rgb(31, 120, 193)",
442 | "show": false
443 | },
444 | "tableColumn": "",
445 | "targets": [
446 | {
447 | "expr": "sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})",
448 | "format": "time_series",
449 | "legendFormat": "",
450 | "refId": "A",
451 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count)",
452 | "textEditor": false
453 | }
454 | ],
455 | "thresholds": "",
456 | "title": "Cluster-Wide CPU Allocation",
457 | "transparent": true,
458 | "type": "singlestat",
459 | "valueFontSize": "80%",
460 | "valueMaps": [
461 | {
462 | "op": "=",
463 | "text": "N/A",
464 | "value": "null"
465 | }
466 | ],
467 | "valueName": "max"
468 | },
469 | {
470 | "cacheTimeout": null,
471 | "colorBackground": false,
472 | "colorValue": false,
473 | "colors": [
474 | "rgba(245, 54, 54, 0.9)",
475 | "rgba(237, 129, 40, 0.89)",
476 | "rgba(50, 172, 45, 0.97)"
477 | ],
478 | "datasource": null,
479 | "decimals": 1,
480 | "format": "kbytes",
481 | "gauge": {
482 | "maxValue": null,
483 | "minValue": 0,
484 | "show": false,
485 | "thresholdLabels": false,
486 | "thresholdMarkers": true
487 | },
488 | "hideTimeOverride": true,
489 | "id": 25,
490 | "interval": null,
491 | "links": [],
492 | "mappingType": 1,
493 | "mappingTypes": [
494 | {
495 | "name": "value to text",
496 | "value": 1
497 | },
498 | {
499 | "name": "range to text",
500 | "value": 2
501 | }
502 | ],
503 | "maxDataPoints": 100,
504 | "nullPointMode": "connected",
505 | "nullText": null,
506 | "postfix": "",
507 | "postfixFontSize": "50%",
508 | "prefix": "",
509 | "prefixFontSize": "50%",
510 | "rangeMaps": [
511 | {
512 | "from": "null",
513 | "text": "N/A",
514 | "to": "null"
515 | }
516 | ],
517 | "span": 2,
518 | "sparkline": {
519 | "fillColor": "rgba(31, 118, 189, 0.18)",
520 | "full": false,
521 | "lineColor": "rgb(31, 120, 193)",
522 | "show": true
523 | },
524 | "tableColumn": "",
525 | "targets": [
526 | {
527 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\", memory!=\"free\"})",
528 | "format": "time_series",
529 | "legendFormat": "",
530 | "refId": "A",
531 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used})",
532 | "textEditor": false
533 | }
534 | ],
535 | "thresholds": "",
536 | "timeShift": "10s",
537 | "title": "Cluster-Wide Memory Usage",
538 | "transparent": true,
539 | "type": "singlestat",
540 | "valueFontSize": "80%",
541 | "valueMaps": [
542 | {
543 | "op": "=",
544 | "text": "N/A",
545 | "value": "null"
546 | }
547 | ],
548 | "valueName": "avg"
549 | },
550 | {
551 | "cacheTimeout": null,
552 | "colorBackground": false,
553 | "colorValue": false,
554 | "colors": [
555 | "rgba(245, 54, 54, 0.9)",
556 | "rgba(237, 129, 40, 0.89)",
557 | "rgba(50, 172, 45, 0.97)"
558 | ],
559 | "datasource": null,
560 | "decimals": 1,
561 | "format": "kbytes",
562 | "gauge": {
563 | "maxValue": 100,
564 | "minValue": 0,
565 | "show": false,
566 | "thresholdLabels": false,
567 | "thresholdMarkers": true
568 | },
569 | "id": 26,
570 | "interval": null,
571 | "links": [],
572 | "mappingType": 1,
573 | "mappingTypes": [
574 | {
575 | "name": "value to text",
576 | "value": 1
577 | },
578 | {
579 | "name": "range to text",
580 | "value": 2
581 | }
582 | ],
583 | "maxDataPoints": 100,
584 | "nullPointMode": "connected",
585 | "nullText": null,
586 | "postfix": "",
587 | "postfixFontSize": "50%",
588 | "prefix": "",
589 | "prefixFontSize": "50%",
590 | "rangeMaps": [
591 | {
592 | "from": "null",
593 | "text": "N/A",
594 | "to": "null"
595 | }
596 | ],
597 | "span": 2,
598 | "sparkline": {
599 | "fillColor": "rgba(31, 118, 189, 0.18)",
600 | "full": false,
601 | "lineColor": "rgb(31, 120, 193)",
602 | "show": false
603 | },
604 | "tableColumn": "",
605 | "targets": [
606 | {
607 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"})",
608 | "format": "time_series",
609 | "legendFormat": "",
610 | "refId": "A",
611 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)",
612 | "textEditor": false
613 | }
614 | ],
615 | "thresholds": "",
616 | "title": "Cluster-Wide Memory Allocation",
617 | "transparent": true,
618 | "type": "singlestat",
619 | "valueFontSize": "80%",
620 | "valueMaps": [
621 | {
622 | "op": "=",
623 | "text": "N/A",
624 | "value": "null"
625 | }
626 | ],
627 | "valueName": "max"
628 | },
629 | {
630 | "cacheTimeout": null,
631 | "colorBackground": false,
632 | "colorValue": false,
633 | "colors": [
634 | "rgba(245, 54, 54, 0.9)",
635 | "rgba(237, 129, 40, 0.89)",
636 | "rgba(50, 172, 45, 0.97)"
637 | ],
638 | "datasource": null,
639 | "decimals": 1,
640 | "format": "bytes",
641 | "gauge": {
642 | "maxValue": 100,
643 | "minValue": 0,
644 | "show": false,
645 | "thresholdLabels": false,
646 | "thresholdMarkers": true
647 | },
648 | "hideTimeOverride": true,
649 | "id": 29,
650 | "interval": null,
651 | "links": [],
652 | "mappingType": 1,
653 | "mappingTypes": [
654 | {
655 | "name": "value to text",
656 | "value": 1
657 | },
658 | {
659 | "name": "range to text",
660 | "value": 2
661 | }
662 | ],
663 | "maxDataPoints": 100,
664 | "nullPointMode": "connected",
665 | "nullText": null,
666 | "postfix": "",
667 | "postfixFontSize": "50%",
668 | "prefix": "",
669 | "prefixFontSize": "50%",
670 | "rangeMaps": [
671 | {
672 | "from": "null",
673 | "text": "N/A",
674 | "to": "null"
675 | }
676 | ],
677 | "span": 2,
678 | "sparkline": {
679 | "fillColor": "rgba(31, 118, 189, 0.18)",
680 | "full": false,
681 | "lineColor": "rgb(31, 120, 193)",
682 | "show": true
683 | },
684 | "tableColumn": "",
685 | "targets": [
686 | {
687 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\", type=\"used\"})",
688 | "format": "time_series",
689 | "legendFormat": "",
690 | "refId": "A",
691 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used)",
692 | "textEditor": false
693 | }
694 | ],
695 | "thresholds": "",
696 | "timeShift": "30s",
697 | "title": "Cluster-Wide Disk Usage",
698 | "transparent": true,
699 | "type": "singlestat",
700 | "valueFontSize": "80%",
701 | "valueMaps": [
702 | {
703 | "op": "=",
704 | "text": "N/A",
705 | "value": "null"
706 | }
707 | ],
708 | "valueName": "avg"
709 | },
710 | {
711 | "cacheTimeout": null,
712 | "colorBackground": false,
713 | "colorValue": false,
714 | "colors": [
715 | "rgba(245, 54, 54, 0.9)",
716 | "rgba(237, 129, 40, 0.89)",
717 | "rgba(50, 172, 45, 0.97)"
718 | ],
719 | "datasource": null,
720 | "decimals": 1,
721 | "format": "bytes",
722 | "gauge": {
723 | "maxValue": 100,
724 | "minValue": 0,
725 | "show": false,
726 | "thresholdLabels": false,
727 | "thresholdMarkers": true
728 | },
729 | "id": 30,
730 | "interval": null,
731 | "links": [],
732 | "mappingType": 1,
733 | "mappingTypes": [
734 | {
735 | "name": "value to text",
736 | "value": 1
737 | },
738 | {
739 | "name": "range to text",
740 | "value": 2
741 | }
742 | ],
743 | "maxDataPoints": 100,
744 | "nullPointMode": "connected",
745 | "nullText": null,
746 | "postfix": "",
747 | "postfixFontSize": "50%",
748 | "prefix": "",
749 | "prefixFontSize": "50%",
750 | "rangeMaps": [
751 | {
752 | "from": "null",
753 | "text": "N/A",
754 | "to": "null"
755 | }
756 | ],
757 | "span": 2,
758 | "sparkline": {
759 | "fillColor": "rgba(31, 118, 189, 0.18)",
760 | "full": false,
761 | "lineColor": "rgb(31, 120, 193)",
762 | "show": false
763 | },
764 | "tableColumn": "",
765 | "targets": [
766 | {
767 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"})",
768 | "format": "time_series",
769 | "legendFormat": "",
770 | "refId": "A",
771 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)",
772 | "textEditor": false
773 | }
774 | ],
775 | "thresholds": "",
776 | "title": "Cluster-Wide Disk Allocation",
777 | "transparent": true,
778 | "type": "singlestat",
779 | "valueFontSize": "80%",
780 | "valueMaps": [
781 | {
782 | "op": "=",
783 | "text": "N/A",
784 | "value": "null"
785 | }
786 | ],
787 | "valueName": "max"
788 | }
789 | ],
790 | "repeat": null,
791 | "repeatIteration": null,
792 | "repeatRowId": null,
793 | "showTitle": true,
794 | "title": "System-Level Metrics",
795 | "titleSize": "h6"
796 | }
797 | ],
798 | "schemaVersion": 14,
799 | "style": "dark",
800 | "tags": [
801 | "tlp",
802 | "cassandra"
803 | ],
804 | "templating": {
805 | "list": [
806 | {
807 | "allValue": null,
808 | "current": {
809 | "text": "environment",
810 | "value": "environment"
811 | },
812 | "datasource": "prometheus",
813 | "hide": 0,
814 | "includeAll": false,
815 | "label": "Environment",
816 | "multi": false,
817 | "name": "env",
818 | "options": [],
819 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)",
820 | "refresh": 2,
821 | "regex": "",
822 | "sort": 1,
823 | "tagValuesQuery": "",
824 | "tags": [],
825 | "tagsQuery": "",
826 | "type": "query",
827 | "useTags": true
828 | },
829 | {
830 | "allValue": "",
831 | "current": {
832 | "text": "data_center",
833 | "value": "data_center"
834 | },
835 | "datasource": "prometheus",
836 | "hide": 0,
837 | "includeAll": false,
838 | "label": "Data Center",
839 | "multi": false,
840 | "name": "dc",
841 | "options": [],
842 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)",
843 | "refresh": 2,
844 | "regex": "",
845 | "sort": 1,
846 | "tagValuesQuery": null,
847 | "tags": [],
848 | "tagsQuery": null,
849 | "type": "query",
850 | "useTags": false
851 | },
852 | {
853 | "allValue": null,
854 | "current": {
855 | "text": "All",
856 | "value": "$__all"
857 | },
858 | "datasource": "prometheus",
859 | "hide": 0,
860 | "includeAll": true,
861 | "label": "Host",
862 | "multi": false,
863 | "name": "host",
864 | "options": [],
865 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)",
866 | "refresh": 2,
867 | "regex": "",
868 | "sort": 1,
869 | "tagValuesQuery": null,
870 | "tags": [],
871 | "tagsQuery": null,
872 | "type": "query",
873 | "useTags": false
874 | }
875 | ]
876 | },
877 | "time": {
878 | "from": "now-6h",
879 | "to": "now"
880 | },
881 | "timepicker": {
882 | "refresh_intervals": [
883 | "5s",
884 | "10s",
885 | "30s",
886 | "1m",
887 | "5m",
888 | "15m",
889 | "30m",
890 | "1h",
891 | "2h",
892 | "1d"
893 | ],
894 | "time_options": [
895 | "5m",
896 | "15m",
897 | "1h",
898 | "6h",
899 | "12h",
900 | "24h",
901 | "2d",
902 | "7d",
903 | "30d"
904 | ]
905 | },
906 | "timezone": "browser",
907 | "title": "TLP - Cassandra - Big Picture",
908 | "version": null
909 | },
910 | "overwrite": true
911 | }
912 |
--------------------------------------------------------------------------------
/grafana/dashboards/tlp-cassandra-client-connections.final.json:
--------------------------------------------------------------------------------
1 | {
2 | "dashboard": {
3 | "annotations": {
4 | "list": []
5 | },
6 | "editable": true,
7 | "gnetId": null,
8 | "graphTooltip": 0,
9 | "hideControls": false,
10 | "id": null,
11 | "links": [
12 | {
13 | "asDropdown": true,
14 | "icon": "external link",
15 | "includeVars": false,
16 | "keepTime": true,
17 | "tags": [
18 | "tlp",
19 | "cassandra"
20 | ],
21 | "title": "Other TLP Dashboards",
22 | "type": "dashboards"
23 | }
24 | ],
25 | "refresh": "5m",
26 | "rows": [
27 | {
28 | "collapse": false,
29 | "height": 250,
30 | "panels": [
31 | {
32 | "aliasColors": {},
33 | "bars": false,
34 | "dashLength": 10,
35 | "dashes": false,
36 | "datasource": null,
37 | "description": "The number of connected clients to each Cassandra node. Plots both native and Thrift protocol connections if available. Since Cassandra 2.2, Thrift connections are not opened by default.\n\n##### Values\n\nA Cassandra node supports up to 128 connections per node. Values above 128 are not to be expected.\n\nIt is important, however, that there is a balanced number of connections among the nodes.\n\n##### False Positives\n\nThis metric is pretty straightforward and should not fluctuate unexpectedly.\n\n##### Required Actions\n\nIf the number of connections approaches 128 per node, the client connection options and the load balancing strategy need to be revised.\n\n##### Warning\n\nNot balanced number of connections needs to be acted on.",
38 | "fill": 1,
39 | "id": 13,
40 | "legend": {
41 | "alignAsTable": true,
42 | "avg": true,
43 | "current": true,
44 | "hideZero": false,
45 | "max": true,
46 | "min": false,
47 | "rightSide": false,
48 | "show": false,
49 | "sort": "max",
50 | "sortDesc": true,
51 | "total": false,
52 | "values": true
53 | },
54 | "lines": true,
55 | "linewidth": 1,
56 | "links": [],
57 | "nullPointMode": "null",
58 | "percentage": false,
59 | "pointradius": 5,
60 | "points": false,
61 | "renderer": "flot",
62 | "seriesOverrides": [],
63 | "spaceLength": 10,
64 | "span": 6,
65 | "stack": false,
66 | "steppedLine": false,
67 | "targets": [
68 | {
69 | "expr": "org_apache_cassandra_metrics_Client_connectedNativeClients{environment=\"$env\", data_center=\"$dc\"}",
70 | "format": "time_series",
71 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.connectedNativeClients",
72 | "refId": "A",
73 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedNativeClients.value, 3, 9)"
74 | },
75 | {
76 | "expr": "org_apache_cassandra_metrics_Client_connectedThriftClients{environment=\"$env\", data_center=\"$dc\"}",
77 | "format": "time_series",
78 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.connectedThriftClients",
79 | "refId": "B",
80 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedThriftClients.value, 3, 9)"
81 | }
82 | ],
83 | "thresholds": [],
84 | "timeFrom": null,
85 | "timeShift": null,
86 | "title": "Connections (per host)",
87 | "tooltip": {
88 | "shared": false,
89 | "sort": 2,
90 | "value_type": "individual"
91 | },
92 | "transparent": true,
93 | "type": "graph",
94 | "xaxis": {
95 | "buckets": null,
96 | "mode": "time",
97 | "name": null,
98 | "show": true,
99 | "values": []
100 | },
101 | "yaxes": [
102 | {
103 | "format": "short",
104 | "label": "Client connections",
105 | "logBase": 1,
106 | "max": null,
107 | "min": "0",
108 | "show": true
109 | },
110 | {
111 | "format": "short",
112 | "label": null,
113 | "logBase": 1,
114 | "max": null,
115 | "min": null,
116 | "show": false
117 | }
118 | ]
119 | },
120 | {
121 | "aliasColors": {},
122 | "bars": false,
123 | "dashLength": 10,
124 | "dashes": false,
125 | "datasource": null,
126 | "description": "Number of client connections summed per data cetnter.\n\n##### Values\n\nAbsolute values depend on the number of nodes per data center.\n\n##### False Positives\n\nIt is possible to have significant differences among sum of connections to each data center. This can happen in situations when different data centers serve different cohorts of clients having peak load happening at different times.",
127 | "fill": 1,
128 | "id": 16,
129 | "legend": {
130 | "alignAsTable": true,
131 | "avg": true,
132 | "current": true,
133 | "max": true,
134 | "min": false,
135 | "show": false,
136 | "sort": "max",
137 | "sortDesc": true,
138 | "total": false,
139 | "values": true
140 | },
141 | "lines": true,
142 | "linewidth": 1,
143 | "links": [],
144 | "nullPointMode": "null",
145 | "percentage": false,
146 | "pointradius": 5,
147 | "points": false,
148 | "renderer": "flot",
149 | "seriesOverrides": [],
150 | "spaceLength": 10,
151 | "span": 6,
152 | "stack": false,
153 | "steppedLine": false,
154 | "targets": [
155 | {
156 | "expr": "sum(org_apache_cassandra_metrics_Client_connectedNativeClients{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center)",
157 | "format": "time_series",
158 | "hide": false,
159 | "legendFormat": "{{environment}}.{{data_center}}.connectedNativeClients",
160 | "refId": "A",
161 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedNativeClients.value, 3), 2, 8)"
162 | },
163 | {
164 | "expr": "sum(org_apache_cassandra_metrics_Client_connectedThriftClients{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center)",
165 | "format": "time_series",
166 | "legendFormat": "{{environment}}.{{data_center}}.connectedThriftClients",
167 | "refId": "B",
168 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedThriftClients.value, 3), 2, 8)"
169 | }
170 | ],
171 | "thresholds": [],
172 | "timeFrom": null,
173 | "timeShift": null,
174 | "title": "Connections (per data center)",
175 | "tooltip": {
176 | "shared": false,
177 | "sort": 2,
178 | "value_type": "individual"
179 | },
180 | "transparent": true,
181 | "type": "graph",
182 | "xaxis": {
183 | "buckets": null,
184 | "mode": "time",
185 | "name": null,
186 | "show": true,
187 | "values": []
188 | },
189 | "yaxes": [
190 | {
191 | "format": "short",
192 | "label": "Client connections",
193 | "logBase": 1,
194 | "max": null,
195 | "min": "0",
196 | "show": true
197 | },
198 | {
199 | "format": "short",
200 | "label": "",
201 | "logBase": 1,
202 | "max": null,
203 | "min": null,
204 | "show": false
205 | }
206 | ]
207 | },
208 | {
209 | "aliasColors": {},
210 | "bars": false,
211 | "dashLength": 10,
212 | "dashes": false,
213 | "datasource": null,
214 | "description": "A Cassandra client typically interacts with the cluster via a coordinator node, which is responsible for arranging the operation to be carried out.\n\nIf the operation takes too long the coordinator will time out the operation. This graph shows a rate of the timeouts happening per node.\n\n##### Values\n\nThis graph shows a per-minute rate of timeouts happening on each node, further broken down by the operation type.\n\n##### False Positives\n\nTimeouts should not happen under any circumstances. There are no false positives.\n\n##### Required Actions\n\nIf timeouts happen, their cause needs to be investigated. The top-level reasons for timeouts occurring include over-loaded nodes, misconfigured clients or ill-fitting data model (including sub-optimal queries).\n\n##### Warning\n\nTimeouts might percolate all the way to application level, where they can cause SLA degradation.",
215 | "fill": 1,
216 | "id": 14,
217 | "legend": {
218 | "alignAsTable": true,
219 | "avg": true,
220 | "current": true,
221 | "hideZero": false,
222 | "max": true,
223 | "min": false,
224 | "rightSide": false,
225 | "show": false,
226 | "sort": "max",
227 | "sortDesc": true,
228 | "total": false,
229 | "values": true
230 | },
231 | "lines": true,
232 | "linewidth": 1,
233 | "links": [],
234 | "nullPointMode": "null",
235 | "percentage": false,
236 | "pointradius": 5,
237 | "points": false,
238 | "renderer": "flot",
239 | "seriesOverrides": [],
240 | "spaceLength": 10,
241 | "span": 6,
242 | "stack": false,
243 | "steppedLine": false,
244 | "targets": [
245 | {
246 | "expr": "org_apache_cassandra_metrics_ClientRequest_Timeouts{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}",
247 | "format": "time_series",
248 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.m1",
249 | "refId": "A",
250 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Timeouts.1MinuteRate, 3, 9)",
251 | "textEditor": false
252 | }
253 | ],
254 | "thresholds": [],
255 | "timeFrom": null,
256 | "timeShift": null,
257 | "title": "Timeouts (per host)",
258 | "tooltip": {
259 | "shared": false,
260 | "sort": 2,
261 | "value_type": "individual"
262 | },
263 | "transparent": true,
264 | "type": "graph",
265 | "xaxis": {
266 | "buckets": null,
267 | "mode": "time",
268 | "name": null,
269 | "show": true,
270 | "values": []
271 | },
272 | "yaxes": [
273 | {
274 | "format": "opm",
275 | "label": "Timeouts",
276 | "logBase": 1,
277 | "max": null,
278 | "min": "0",
279 | "show": true
280 | },
281 | {
282 | "format": "short",
283 | "label": null,
284 | "logBase": 1,
285 | "max": null,
286 | "min": null,
287 | "show": false
288 | }
289 | ]
290 | },
291 | {
292 | "aliasColors": {},
293 | "bars": false,
294 | "dashLength": 10,
295 | "dashes": false,
296 | "datasource": null,
297 | "description": "Similar to `Timeouts (per host)` graph, with the difference that this graph sums the timeouts per data center.\n\nThe purpose of this graph is to provide insight if it is isolated nodes having problems, or if the problem is more wide-spread.\n\n##### Values\n\nAmount of timeouts happening in each datacenter per minute, further split among each operation type.",
298 | "fill": 1,
299 | "id": 15,
300 | "legend": {
301 | "alignAsTable": true,
302 | "avg": true,
303 | "current": true,
304 | "max": true,
305 | "min": false,
306 | "show": false,
307 | "sort": "max",
308 | "sortDesc": true,
309 | "total": false,
310 | "values": true
311 | },
312 | "lines": true,
313 | "linewidth": 1,
314 | "links": [],
315 | "nullPointMode": "null",
316 | "percentage": false,
317 | "pointradius": 5,
318 | "points": false,
319 | "renderer": "flot",
320 | "seriesOverrides": [],
321 | "spaceLength": 10,
322 | "span": 6,
323 | "stack": false,
324 | "steppedLine": false,
325 | "targets": [
326 | {
327 | "expr": "sum(org_apache_cassandra_metrics_ClientRequest_Timeouts{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}) by (environment, data_center, scope)",
328 | "format": "time_series",
329 | "legendFormat": "{{environment}}.{{data_center}}.{{scope}}.m1",
330 | "refId": "A",
331 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Timeouts.1MinuteRate, 3), 2, 8)",
332 | "textEditor": false
333 | }
334 | ],
335 | "thresholds": [],
336 | "timeFrom": null,
337 | "timeShift": null,
338 | "title": "Timeouts (per data center)",
339 | "tooltip": {
340 | "shared": false,
341 | "sort": 2,
342 | "value_type": "individual"
343 | },
344 | "transparent": true,
345 | "type": "graph",
346 | "xaxis": {
347 | "buckets": null,
348 | "mode": "time",
349 | "name": null,
350 | "show": true,
351 | "values": []
352 | },
353 | "yaxes": [
354 | {
355 | "format": "opm",
356 | "label": "Timeouts",
357 | "logBase": 1,
358 | "max": null,
359 | "min": "0",
360 | "show": true
361 | },
362 | {
363 | "format": "short",
364 | "label": "",
365 | "logBase": 1,
366 | "max": null,
367 | "min": null,
368 | "show": false
369 | }
370 | ]
371 | },
372 | {
373 | "aliasColors": {},
374 | "bars": false,
375 | "dashLength": 10,
376 | "dashes": false,
377 | "datasource": null,
378 | "description": "A Cassandra client typically interacts with the cluster via a coordinator node, which is responsible for arranging the operation to be carried out.\n\nFor certain operations, the coordinator needs responses from multiple nodes. If the coordinator can not possibly obtain responses from these nodes, an `Unavailable` error occurs.\n\nFor example, in a cluster with replication factor of three and two replicas down for a given partition, executing an operation with a `QUORUM` consistency will trigger an `Unavailable` error.\n\n##### Values\n\nThe values are a per-second rate of `Unavailable` errors occurring per node, further broken down by operation type.\n\n##### False Positives\n\nFalse positives should not happen.\n\nHowever, sometimes the cause of `Unavailable` errors is a discrepancy between keyspace topology and consistency used by the application. \n\n##### Required Actions\n\n`Unavailable` errors should not happen in clusters. If they do, they cause needs to be investigated and promptly addressed.\n\n##### Warning\n\n`Unavailable` errors can be a symptom of down nodes. Down nodes are generally a bad thing and should be investigated and promptly fixed.",
379 | "fill": 1,
380 | "id": 17,
381 | "legend": {
382 | "alignAsTable": true,
383 | "avg": true,
384 | "current": true,
385 | "hideZero": false,
386 | "max": true,
387 | "min": false,
388 | "show": false,
389 | "sort": "max",
390 | "sortDesc": true,
391 | "total": false,
392 | "values": true
393 | },
394 | "lines": true,
395 | "linewidth": 1,
396 | "links": [],
397 | "nullPointMode": "null",
398 | "percentage": false,
399 | "pointradius": 5,
400 | "points": false,
401 | "renderer": "flot",
402 | "seriesOverrides": [],
403 | "spaceLength": 10,
404 | "span": 6,
405 | "stack": false,
406 | "steppedLine": false,
407 | "targets": [
408 | {
409 | "expr": "org_apache_cassandra_metrics_ClientRequest_Unavailables{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}",
410 | "format": "time_series",
411 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.m1",
412 | "refId": "A",
413 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Unavailables.1MinuteRate, 3, 9)",
414 | "textEditor": false
415 | }
416 | ],
417 | "thresholds": [],
418 | "timeFrom": null,
419 | "timeShift": null,
420 | "title": "Unavailables (per host)",
421 | "tooltip": {
422 | "shared": false,
423 | "sort": 2,
424 | "value_type": "individual"
425 | },
426 | "transparent": true,
427 | "type": "graph",
428 | "xaxis": {
429 | "buckets": null,
430 | "mode": "time",
431 | "name": null,
432 | "show": true,
433 | "values": []
434 | },
435 | "yaxes": [
436 | {
437 | "format": "opm",
438 | "label": "Unavailable errors",
439 | "logBase": 1,
440 | "max": null,
441 | "min": "0",
442 | "show": true
443 | },
444 | {
445 | "format": "short",
446 | "label": "",
447 | "logBase": 1,
448 | "max": null,
449 | "min": null,
450 | "show": false
451 | }
452 | ]
453 | },
454 | {
455 | "aliasColors": {},
456 | "bars": false,
457 | "dashLength": 10,
458 | "dashes": false,
459 | "datasource": null,
460 | "description": "Similar to `Unavailables (per host)`, this graph shows the number of `Unavailable` errors occurring in the cluster, but sums the errors by data center.\n\n##### Values\n\nThe values are the number of `Unavailaber` errors happening per minute in each data center, further broken down by operation type.",
461 | "fill": 1,
462 | "id": 18,
463 | "legend": {
464 | "alignAsTable": true,
465 | "avg": true,
466 | "current": true,
467 | "hideZero": false,
468 | "max": true,
469 | "min": false,
470 | "show": false,
471 | "sort": "max",
472 | "sortDesc": true,
473 | "total": false,
474 | "values": true
475 | },
476 | "lines": true,
477 | "linewidth": 1,
478 | "links": [],
479 | "nullPointMode": "null",
480 | "percentage": false,
481 | "pointradius": 5,
482 | "points": false,
483 | "renderer": "flot",
484 | "seriesOverrides": [],
485 | "spaceLength": 10,
486 | "span": 6,
487 | "stack": false,
488 | "steppedLine": false,
489 | "targets": [
490 | {
491 | "expr": "sum(org_apache_cassandra_metrics_ClientRequest_Unavailables{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}) by (environment, data_center, scope)",
492 | "format": "time_series",
493 | "legendFormat": "{{environment}}.{{data_center}}.{{scope}}.m1",
494 | "refId": "B",
495 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Unavailables.1MinuteRate, 3), 2, 8)",
496 | "textEditor": false
497 | }
498 | ],
499 | "thresholds": [],
500 | "timeFrom": null,
501 | "timeShift": null,
502 | "title": "Unavailables (per data center)",
503 | "tooltip": {
504 | "shared": false,
505 | "sort": 2,
506 | "value_type": "individual"
507 | },
508 | "transparent": true,
509 | "type": "graph",
510 | "xaxis": {
511 | "buckets": null,
512 | "mode": "time",
513 | "name": null,
514 | "show": true,
515 | "values": []
516 | },
517 | "yaxes": [
518 | {
519 | "format": "opm",
520 | "label": "Unavailable errors",
521 | "logBase": 1,
522 | "max": null,
523 | "min": "0",
524 | "show": true
525 | },
526 | {
527 | "format": "short",
528 | "label": "",
529 | "logBase": 1,
530 | "max": null,
531 | "min": null,
532 | "show": false
533 | }
534 | ]
535 | }
536 | ],
537 | "repeat": null,
538 | "repeatIteration": null,
539 | "repeatRowId": null,
540 | "showTitle": true,
541 | "title": "Client Connections Overview",
542 | "titleSize": "h6"
543 | },
544 | {
545 | "collapse": false,
546 | "height": 250,
547 | "panels": [
548 | {
549 | "aliasColors": {},
550 | "bars": false,
551 | "dashLength": 10,
552 | "dashes": false,
553 | "datasource": null,
554 | "description": "Cassandra groups task of a particular type to its own thread pool. Monitoring thread pools is important to understand the saturation of the node.\n\nIn this graph, a thread pool responsible for handling client CQL requests is shown.\n\n##### Values\n\nThe values are the current number of tasks in a particular state. The states tracked are:\n* **Active**: tasks actively worked on.\n* **Pending**: queued tasks.\n* **Blocked**: tasks blocked due to queue saturation. \n\n##### False Positives\n\nIt is acceptable for the `Pending` and `Blocked` tasks to have non-zero value.\n\n##### Required Actions\n\nIn case of sharp increase, or constant values in order of several tens, node capacity assessment needs to happen.",
555 | "fill": 1,
556 | "id": 3,
557 | "legend": {
558 | "alignAsTable": true,
559 | "avg": true,
560 | "current": true,
561 | "max": true,
562 | "min": false,
563 | "show": false,
564 | "sort": "max",
565 | "sortDesc": true,
566 | "total": false,
567 | "values": true
568 | },
569 | "lines": true,
570 | "linewidth": 1,
571 | "links": [],
572 | "nullPointMode": "null",
573 | "percentage": false,
574 | "pointradius": 5,
575 | "points": false,
576 | "renderer": "flot",
577 | "seriesOverrides": [],
578 | "spaceLength": 10,
579 | "span": 6,
580 | "stack": false,
581 | "steppedLine": false,
582 | "targets": [
583 | {
584 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"ActiveTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}",
585 | "format": "time_series",
586 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
587 | "refId": "A",
588 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.ActiveTasks.value, 3, 11)"
589 | },
590 | {
591 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"PendingTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}",
592 | "format": "time_series",
593 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
594 | "refId": "D",
595 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.PendingTasks.value, 3, 11)"
596 | },
597 | {
598 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"CurrentlyBlockedTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}",
599 | "format": "time_series",
600 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
601 | "refId": "C",
602 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.CurrentlyBlockedTasks.value, 3, 11)"
603 | }
604 | ],
605 | "thresholds": [],
606 | "timeFrom": null,
607 | "timeShift": null,
608 | "title": "Native Requests Pool (per host)",
609 | "tooltip": {
610 | "shared": false,
611 | "sort": 2,
612 | "value_type": "individual"
613 | },
614 | "transparent": true,
615 | "type": "graph",
616 | "xaxis": {
617 | "buckets": null,
618 | "mode": "time",
619 | "name": null,
620 | "show": true,
621 | "values": []
622 | },
623 | "yaxes": [
624 | {
625 | "format": "short",
626 | "label": "Threads",
627 | "logBase": 1,
628 | "max": null,
629 | "min": "0",
630 | "show": true
631 | },
632 | {
633 | "format": "short",
634 | "label": null,
635 | "logBase": 1,
636 | "max": null,
637 | "min": null,
638 | "show": false
639 | }
640 | ]
641 | },
642 | {
643 | "aliasColors": {},
644 | "bars": false,
645 | "dashLength": 10,
646 | "dashes": false,
647 | "datasource": null,
648 | "description": "Cassandra groups task of a particular type to its own thread pool. Monitoring thread pools is important to understand the saturation of the node.\n\nThis thread pool handles coordinator requests to the cluster.\n\n##### Values\n\nThe values are the current number of tasks in a particular state. The states tracked are:\n\n* **Active**: tasks actively worked on.\n* **Pending**: queued tasks.\n* **Blocked**: tasks blocked due to queue saturation.\n\n##### False Positives\n\nIt is acceptable for the `Pending` and `Blocked` tasks to have non-zero value.\n\n##### Required Actions\n\nIn case of sharp increase, or constant values in order of several tens, node capacity assessment needs to happen.",
649 | "fill": 1,
650 | "id": 4,
651 | "legend": {
652 | "alignAsTable": true,
653 | "avg": true,
654 | "current": true,
655 | "max": true,
656 | "min": false,
657 | "show": false,
658 | "sort": "max",
659 | "sortDesc": true,
660 | "total": false,
661 | "values": true
662 | },
663 | "lines": true,
664 | "linewidth": 1,
665 | "links": [],
666 | "nullPointMode": "null",
667 | "percentage": false,
668 | "pointradius": 5,
669 | "points": false,
670 | "renderer": "flot",
671 | "seriesOverrides": [],
672 | "spaceLength": 10,
673 | "span": 6,
674 | "stack": false,
675 | "steppedLine": false,
676 | "targets": [
677 | {
678 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"ActiveTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}",
679 | "format": "time_series",
680 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
681 | "refId": "A",
682 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.ActiveTasks.value, 3, 11)"
683 | },
684 | {
685 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"PendingTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}",
686 | "format": "time_series",
687 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
688 | "refId": "B",
689 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.PendingTasks.value, 3, 11)"
690 | },
691 | {
692 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"CurrentlyBlockedTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}",
693 | "format": "time_series",
694 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}",
695 | "refId": "C",
696 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.CurrentlyBlockedTasks.value, 3, 11)"
697 | }
698 | ],
699 | "thresholds": [],
700 | "timeFrom": null,
701 | "timeShift": null,
702 | "title": "Coordinator Requests Pool (per host)",
703 | "tooltip": {
704 | "shared": false,
705 | "sort": 2,
706 | "value_type": "individual"
707 | },
708 | "transparent": true,
709 | "type": "graph",
710 | "xaxis": {
711 | "buckets": null,
712 | "mode": "time",
713 | "name": null,
714 | "show": true,
715 | "values": []
716 | },
717 | "yaxes": [
718 | {
719 | "format": "short",
720 | "label": "Threads",
721 | "logBase": 1,
722 | "max": null,
723 | "min": "0",
724 | "show": true
725 | },
726 | {
727 | "format": "short",
728 | "label": null,
729 | "logBase": 1,
730 | "max": null,
731 | "min": null,
732 | "show": false
733 | }
734 | ]
735 | }
736 | ],
737 | "repeat": null,
738 | "repeatIteration": null,
739 | "repeatRowId": null,
740 | "showTitle": true,
741 | "title": "Thread Pools",
742 | "titleSize": "h6"
743 | },
744 | {
745 | "collapse": false,
746 | "height": 250,
747 | "panels": [
748 | {
749 | "aliasColors": {},
750 | "bars": false,
751 | "dashLength": 10,
752 | "dashes": false,
753 | "datasource": null,
754 | "description": "Cassandra allows server-side caching of CQL queries to avoid their repetitive parsing, which yields certain performance benefits.\n\nThis graph shows the number of statements cached. \n\n##### Values\n\nThe values are a number of cached statements per node. The exact value of the statements in cache varies, because the the cache size is configured (since Cassandra 3.6) in bytes, rather than statement counts.\n\n##### False Positives\n\nMonotonic values in orders of hundreds can signal saturated cache. However, this situation needs to be correlated with the `Prepared Statements Eviction` graph to prove the saturation.\n\n##### Required Actions\n\nIt is unlikely the application actually needs the whole cache. If the saturation happens, it's typically a sign of repetitive statement preparation, which is sub-optimal.\n\n##### Warning\n\nSaturated prepared statements cache signals misuse on the application level, which should be corrected.",
755 | "fill": 1,
756 | "id": 5,
757 | "legend": {
758 | "alignAsTable": true,
759 | "avg": true,
760 | "current": true,
761 | "max": true,
762 | "min": false,
763 | "show": false,
764 | "sort": "max",
765 | "sortDesc": true,
766 | "total": false,
767 | "values": true
768 | },
769 | "lines": true,
770 | "linewidth": 1,
771 | "links": [],
772 | "nullPointMode": "null",
773 | "percentage": false,
774 | "pointradius": 5,
775 | "points": false,
776 | "renderer": "flot",
777 | "seriesOverrides": [],
778 | "spaceLength": 10,
779 | "span": 6,
780 | "stack": false,
781 | "steppedLine": false,
782 | "targets": [
783 | {
784 | "expr": "org_apache_cassandra_metrics_CQL_PreparedStatementsCount{environment=\"$env\", data_center=\"$dc\"}",
785 | "format": "time_series",
786 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsCount",
787 | "refId": "A",
788 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsCount.value, 3)"
789 | }
790 | ],
791 | "thresholds": [],
792 | "timeFrom": null,
793 | "timeShift": null,
794 | "title": "Prepared Statements Cache (by host)",
795 | "tooltip": {
796 | "shared": false,
797 | "sort": 2,
798 | "value_type": "individual"
799 | },
800 | "transparent": true,
801 | "type": "graph",
802 | "xaxis": {
803 | "buckets": null,
804 | "mode": "time",
805 | "name": null,
806 | "show": true,
807 | "values": []
808 | },
809 | "yaxes": [
810 | {
811 | "format": "short",
812 | "label": "Statements",
813 | "logBase": 1,
814 | "max": null,
815 | "min": "0",
816 | "show": true
817 | },
818 | {
819 | "format": "short",
820 | "label": null,
821 | "logBase": 1,
822 | "max": null,
823 | "min": null,
824 | "show": false
825 | }
826 | ]
827 | },
828 | {
829 | "aliasColors": {},
830 | "bars": false,
831 | "dashLength": 10,
832 | "dashes": false,
833 | "datasource": null,
834 | "description": "Cassandra allows server-side caching of CQL queries to avoid their repetitive parsing, which yields certain performance benefits.\n\nThis graph shows the eviction of prepared statements from their cache.\n\n##### Values\n\nThe values represent the number of statements evicted per second per node.\n\n##### False Positives\n\nIt is acceptable for a Cassandra node to evict some statements. This should happen sporadically, and exclusively after operator-triggered operations.\n\n##### Required Actions\n\nPrepared statement eviction occurring signals misuse at the application level which needs to be investigated and remedied.",
835 | "fill": 1,
836 | "id": 8,
837 | "legend": {
838 | "alignAsTable": true,
839 | "avg": true,
840 | "current": true,
841 | "max": true,
842 | "min": false,
843 | "show": false,
844 | "sort": "max",
845 | "sortDesc": true,
846 | "total": false,
847 | "values": true
848 | },
849 | "lines": true,
850 | "linewidth": 1,
851 | "links": [],
852 | "nullPointMode": "null",
853 | "percentage": false,
854 | "pointradius": 5,
855 | "points": false,
856 | "renderer": "flot",
857 | "seriesOverrides": [],
858 | "spaceLength": 10,
859 | "span": 6,
860 | "stack": false,
861 | "steppedLine": false,
862 | "targets": [
863 | {
864 | "expr": "deriv(org_apache_cassandra_metrics_CQL_PreparedStatementsEvicted{environment=\"$env\", data_center=\"$dc\"}[5m])",
865 | "format": "time_series",
866 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsEvicted",
867 | "refId": "A",
868 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsEvicted.count), 3)"
869 | }
870 | ],
871 | "thresholds": [],
872 | "timeFrom": null,
873 | "timeShift": null,
874 | "title": "Prepared Statements Eviction (by host, perSecond() issues)",
875 | "tooltip": {
876 | "shared": false,
877 | "sort": 2,
878 | "value_type": "individual"
879 | },
880 | "transparent": true,
881 | "type": "graph",
882 | "xaxis": {
883 | "buckets": null,
884 | "mode": "time",
885 | "name": null,
886 | "show": true,
887 | "values": []
888 | },
889 | "yaxes": [
890 | {
891 | "format": "ops",
892 | "label": "Eviction rate",
893 | "logBase": 1,
894 | "max": null,
895 | "min": "0",
896 | "show": true
897 | },
898 | {
899 | "format": "short",
900 | "label": null,
901 | "logBase": 1,
902 | "max": null,
903 | "min": null,
904 | "show": false
905 | }
906 | ]
907 | },
908 | {
909 | "aliasColors": {},
910 | "bars": false,
911 | "dashLength": 10,
912 | "dashes": false,
913 | "datasource": null,
914 | "description": "This graph shows the ratio between prepared and regular statements executed.\n\n##### Values\n\nThe closer to `1.0` the value, the bigger the portion of prepared statements executed.\n\nThe values are reported per each host.\n\n##### Required Actions\n\nBased on the work load, if this ratio swings towards the unexpected type of requests executed, an investigation of the application should ensue.",
915 | "fill": 1,
916 | "id": 6,
917 | "legend": {
918 | "alignAsTable": true,
919 | "avg": true,
920 | "current": true,
921 | "max": true,
922 | "min": false,
923 | "show": false,
924 | "sort": "max",
925 | "sortDesc": true,
926 | "total": false,
927 | "values": true
928 | },
929 | "lines": true,
930 | "linewidth": 1,
931 | "links": [],
932 | "nullPointMode": "null",
933 | "percentage": false,
934 | "pointradius": 5,
935 | "points": false,
936 | "renderer": "flot",
937 | "seriesOverrides": [],
938 | "spaceLength": 10,
939 | "span": 6,
940 | "stack": false,
941 | "steppedLine": false,
942 | "targets": [
943 | {
944 | "expr": "org_apache_cassandra_metrics_CQL_PreparedStatementsRatio{environment=\"$env\", data_center=\"$dc\"}",
945 | "format": "time_series",
946 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsRatio",
947 | "refId": "A",
948 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsRatio.value, 3)"
949 | }
950 | ],
951 | "thresholds": [],
952 | "timeFrom": null,
953 | "timeShift": null,
954 | "title": "Prepared vs Unprepared (by host)",
955 | "tooltip": {
956 | "shared": false,
957 | "sort": 2,
958 | "value_type": "individual"
959 | },
960 | "transparent": true,
961 | "type": "graph",
962 | "xaxis": {
963 | "buckets": null,
964 | "mode": "time",
965 | "name": null,
966 | "show": true,
967 | "values": []
968 | },
969 | "yaxes": [
970 | {
971 | "format": "short",
972 | "label": null,
973 | "logBase": 1,
974 | "max": "1",
975 | "min": "0",
976 | "show": true
977 | },
978 | {
979 | "format": "short",
980 | "label": null,
981 | "logBase": 1,
982 | "max": null,
983 | "min": null,
984 | "show": false
985 | }
986 | ]
987 | },
988 | {
989 | "aliasColors": {},
990 | "bars": false,
991 | "dashLength": 10,
992 | "dashes": false,
993 | "datasource": null,
994 | "description": "Unlike prepared statements, the regular statements need to be parsed every time they are executed. Because of this, they might not be desired if performance is critical.\n\n##### Values\n\nCassandra reports only the total number of regular statements executed over its uptime. Therefore we're using Grafana's `perSecond()` aggregation to visualise a per-second rate of regular statement execution per host.\n\n##### False Positives\n\nRegular statements are sometimes inevitable. For example, the java-driver performs several regular statements upon initialising its connection in order to learn the cluster topology etc.",
995 | "fill": 1,
996 | "id": 19,
997 | "legend": {
998 | "alignAsTable": true,
999 | "avg": true,
1000 | "current": true,
1001 | "max": true,
1002 | "min": false,
1003 | "show": false,
1004 | "sort": "max",
1005 | "sortDesc": true,
1006 | "total": false,
1007 | "values": true
1008 | },
1009 | "lines": true,
1010 | "linewidth": 1,
1011 | "links": [],
1012 | "nullPointMode": "null",
1013 | "percentage": false,
1014 | "pointradius": 5,
1015 | "points": false,
1016 | "renderer": "flot",
1017 | "seriesOverrides": [],
1018 | "spaceLength": 10,
1019 | "span": 6,
1020 | "stack": false,
1021 | "steppedLine": false,
1022 | "targets": [
1023 | {
1024 | "expr": "deriv(org_apache_cassandra_metrics_CQL_RegularStatementsExecuted{environment=\"$env\", data_center=\"$dc\"}[5m])",
1025 | "format": "time_series",
1026 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.RegularStatementsExecuted",
1027 | "refId": "A",
1028 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.RegularStatementsExecuted.count), 3)"
1029 | }
1030 | ],
1031 | "thresholds": [],
1032 | "timeFrom": null,
1033 | "timeShift": null,
1034 | "title": "Regular Statements Executed (by host, perSecond() issues)",
1035 | "tooltip": {
1036 | "shared": false,
1037 | "sort": 2,
1038 | "value_type": "individual"
1039 | },
1040 | "transparent": true,
1041 | "type": "graph",
1042 | "xaxis": {
1043 | "buckets": null,
1044 | "mode": "time",
1045 | "name": null,
1046 | "show": true,
1047 | "values": []
1048 | },
1049 | "yaxes": [
1050 | {
1051 | "format": "ops",
1052 | "label": "Statement rate",
1053 | "logBase": 1,
1054 | "max": null,
1055 | "min": "0",
1056 | "show": true
1057 | },
1058 | {
1059 | "format": "short",
1060 | "label": null,
1061 | "logBase": 1,
1062 | "max": null,
1063 | "min": null,
1064 | "show": false
1065 | }
1066 | ]
1067 | }
1068 | ],
1069 | "repeat": null,
1070 | "repeatIteration": null,
1071 | "repeatRowId": null,
1072 | "showTitle": true,
1073 | "title": "Statements",
1074 | "titleSize": "h6"
1075 | },
1076 | {
1077 | "collapse": false,
1078 | "height": 250,
1079 | "panels": [
1080 | {
1081 | "aliasColors": {},
1082 | "bars": false,
1083 | "dashLength": 10,
1084 | "dashes": false,
1085 | "datasource": null,
1086 | "description": "Cassandra exposes certain metrics related Light-Weight Transactions (LWT) that might indicate problems:\n* **ConditionNotMet**: indicates a failure to meet a condition requested by the transaction. This might indicate a contention in modifying a single cell.\n* **UnfinishedCommit** indicates premature conclusions of Paxos rounds, which is mundane by itself and has mundane consequences too.\n\n##### Values\n\nThe values are per-second rates of both problematic behaviours occurring, broken down per node and per LWT type (read or write).\n\n##### False Positives\n\nIt is acceptable for these metrics to be non-zero if their values are small and happen infrequently.\n\n##### Required Actions\n\nIn case the values are constantly non-zero, the use-case needs to revised as it might be designed in a sub-optimal way.",
1087 | "fill": 1,
1088 | "id": 11,
1089 | "legend": {
1090 | "alignAsTable": true,
1091 | "avg": true,
1092 | "current": true,
1093 | "max": true,
1094 | "min": false,
1095 | "show": false,
1096 | "sort": "max",
1097 | "sortDesc": true,
1098 | "total": false,
1099 | "values": true
1100 | },
1101 | "lines": true,
1102 | "linewidth": 1,
1103 | "links": [],
1104 | "nullPointMode": "null",
1105 | "percentage": false,
1106 | "pointradius": 5,
1107 | "points": false,
1108 | "renderer": "flot",
1109 | "seriesOverrides": [],
1110 | "spaceLength": 10,
1111 | "span": 6,
1112 | "stack": false,
1113 | "steppedLine": false,
1114 | "targets": [
1115 | {
1116 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_ConditionNotMet{environment=\"$env\", data_center=\"$dc\"}[5m])",
1117 | "format": "time_series",
1118 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.ConditionNotMet",
1119 | "refId": "A",
1120 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.ConditionNotMet.count), 3, 9, 10)"
1121 | },
1122 | {
1123 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_UnfinishedCommit{environment=\"$env\", data_center=\"$dc\"}[5m])",
1124 | "format": "time_series",
1125 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.UnfinishedCommit",
1126 | "refId": "C",
1127 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.UnfinishedCommit.count), 3, 9, 10)"
1128 | }
1129 | ],
1130 | "thresholds": [],
1131 | "timeFrom": null,
1132 | "timeShift": null,
1133 | "title": "LWT Problems (by host, perSecond() issues)",
1134 | "tooltip": {
1135 | "shared": false,
1136 | "sort": 2,
1137 | "value_type": "individual"
1138 | },
1139 | "transparent": true,
1140 | "type": "graph",
1141 | "xaxis": {
1142 | "buckets": null,
1143 | "mode": "time",
1144 | "name": null,
1145 | "show": true,
1146 | "values": []
1147 | },
1148 | "yaxes": [
1149 | {
1150 | "format": "ops",
1151 | "label": null,
1152 | "logBase": 1,
1153 | "max": null,
1154 | "min": "0",
1155 | "show": true
1156 | },
1157 | {
1158 | "format": "short",
1159 | "label": null,
1160 | "logBase": 1,
1161 | "max": null,
1162 | "min": null,
1163 | "show": false
1164 | }
1165 | ]
1166 | },
1167 | {
1168 | "aliasColors": {},
1169 | "bars": false,
1170 | "dashLength": 10,
1171 | "dashes": false,
1172 | "datasource": null,
1173 | "description": "Cassandra tracks total latency accumulated while carrying out a particular request. This happens on the coordinator level and represents the total time the request has spent in the cluster.\n\n##### Values\n\nCassandra reports an ever-raising value representing microseconds spent. For visualisation we are using Grafana's derivative to gauge any trends.\n\n##### Required Actions\n\nTrend changes in the values shown in these graphs indicate the subtle degradations that can happen in a Cassandra cluster and should trigger an investigation.",
1174 | "fill": 1,
1175 | "id": 12,
1176 | "legend": {
1177 | "alignAsTable": true,
1178 | "avg": true,
1179 | "current": true,
1180 | "max": true,
1181 | "min": false,
1182 | "show": false,
1183 | "sort": "max",
1184 | "sortDesc": true,
1185 | "total": false,
1186 | "values": true
1187 | },
1188 | "lines": true,
1189 | "linewidth": 1,
1190 | "links": [],
1191 | "nullPointMode": "null",
1192 | "percentage": false,
1193 | "pointradius": 5,
1194 | "points": false,
1195 | "renderer": "flot",
1196 | "seriesOverrides": [],
1197 | "spaceLength": 10,
1198 | "span": 6,
1199 | "stack": false,
1200 | "steppedLine": false,
1201 | "targets": [
1202 | {
1203 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_TotalLatency[5m])",
1204 | "format": "time_series",
1205 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}",
1206 | "refId": "D",
1207 | "target": "aliasByNode(derivative(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.TotalLatency.count), 3, 9)"
1208 | }
1209 | ],
1210 | "thresholds": [],
1211 | "timeFrom": null,
1212 | "timeShift": null,
1213 | "title": "Total Latency (possible derivative() issues)",
1214 | "tooltip": {
1215 | "shared": false,
1216 | "sort": 2,
1217 | "value_type": "individual"
1218 | },
1219 | "transparent": true,
1220 | "type": "graph",
1221 | "xaxis": {
1222 | "buckets": null,
1223 | "mode": "time",
1224 | "name": null,
1225 | "show": true,
1226 | "values": []
1227 | },
1228 | "yaxes": [
1229 | {
1230 | "format": "µs",
1231 | "label": null,
1232 | "logBase": 1,
1233 | "max": null,
1234 | "min": "0",
1235 | "show": true
1236 | },
1237 | {
1238 | "format": "short",
1239 | "label": null,
1240 | "logBase": 1,
1241 | "max": null,
1242 | "min": null,
1243 | "show": true
1244 | }
1245 | ]
1246 | }
1247 | ],
1248 | "repeat": null,
1249 | "repeatIteration": null,
1250 | "repeatRowId": null,
1251 | "showTitle": true,
1252 | "title": "Client Requests",
1253 | "titleSize": "h6"
1254 | }
1255 | ],
1256 | "schemaVersion": 14,
1257 | "style": "light",
1258 | "tags": [
1259 | "tlp",
1260 | "cassandra",
1261 | "beta"
1262 | ],
1263 | "templating": {
1264 | "list": [
1265 | {
1266 | "allValue": null,
1267 | "current": {
1268 | "text": "environment",
1269 | "value": "environment"
1270 | },
1271 | "datasource": "prometheus",
1272 | "hide": 0,
1273 | "includeAll": false,
1274 | "label": "Environment",
1275 | "multi": false,
1276 | "name": "env",
1277 | "options": [],
1278 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)",
1279 | "refresh": 2,
1280 | "regex": "",
1281 | "sort": 1,
1282 | "tagValuesQuery": "",
1283 | "tags": [],
1284 | "tagsQuery": "",
1285 | "type": "query",
1286 | "useTags": true
1287 | },
1288 | {
1289 | "allValue": "",
1290 | "current": {
1291 | "text": "data_center",
1292 | "value": "data_center"
1293 | },
1294 | "datasource": "prometheus",
1295 | "hide": 0,
1296 | "includeAll": false,
1297 | "label": "Data Center",
1298 | "multi": false,
1299 | "name": "dc",
1300 | "options": [],
1301 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)",
1302 | "refresh": 2,
1303 | "regex": "",
1304 | "sort": 1,
1305 | "tagValuesQuery": null,
1306 | "tags": [],
1307 | "tagsQuery": null,
1308 | "type": "query",
1309 | "useTags": false
1310 | },
1311 | {
1312 | "allValue": null,
1313 | "current": {
1314 | "text": "All",
1315 | "value": "$__all"
1316 | },
1317 | "datasource": "prometheus",
1318 | "hide": 0,
1319 | "includeAll": true,
1320 | "label": "Host",
1321 | "multi": false,
1322 | "name": "host",
1323 | "options": [],
1324 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)",
1325 | "refresh": 2,
1326 | "regex": "",
1327 | "sort": 1,
1328 | "tagValuesQuery": null,
1329 | "tags": [],
1330 | "tagsQuery": null,
1331 | "type": "query",
1332 | "useTags": false
1333 | }
1334 | ]
1335 | },
1336 | "time": {
1337 | "from": "now-6h",
1338 | "to": "now"
1339 | },
1340 | "timepicker": {
1341 | "refresh_intervals": [
1342 | "5s",
1343 | "10s",
1344 | "30s",
1345 | "1m",
1346 | "5m",
1347 | "15m",
1348 | "30m",
1349 | "1h",
1350 | "2h",
1351 | "1d"
1352 | ],
1353 | "time_options": [
1354 | "5m",
1355 | "15m",
1356 | "1h",
1357 | "6h",
1358 | "12h",
1359 | "24h",
1360 | "2d",
1361 | "7d",
1362 | "30d"
1363 | ]
1364 | },
1365 | "timezone": "browser",
1366 | "title": "TLP - Cassandra - Client Connections",
1367 | "version": null
1368 | },
1369 | "overwrite": true
1370 | }
1371 |
--------------------------------------------------------------------------------
/grafana/dashboards/tlp-cassandra-reaper.final.json:
--------------------------------------------------------------------------------
1 | {
2 | "dashboard": {
3 | "annotations": {
4 | "list": []
5 | },
6 | "editable": true,
7 | "gnetId": null,
8 | "graphTooltip": 0,
9 | "hideControls": false,
10 | "id": null,
11 | "links": [
12 | {
13 | "asDropdown": true,
14 | "icon": "external link",
15 | "includeVars": false,
16 | "keepTime": true,
17 | "tags": [
18 | "tlp",
19 | "cassandra"
20 | ],
21 | "title": "Other TLP Dashboards",
22 | "type": "dashboards"
23 | }
24 | ],
25 | "refresh": "5m",
26 | "rows": [
27 | {
28 | "collapse": false,
29 | "height": "250px",
30 | "panels": [
31 | {
32 | "aliasColors": {},
33 | "bars": false,
34 | "dashLength": 10,
35 | "dashes": false,
36 | "datasource": null,
37 | "fill": 1,
38 | "id": 1,
39 | "legend": {
40 | "alignAsTable": true,
41 | "avg": true,
42 | "current": true,
43 | "max": true,
44 | "min": false,
45 | "show": false,
46 | "sort": "max",
47 | "sortDesc": true,
48 | "total": false,
49 | "values": true
50 | },
51 | "lines": true,
52 | "linewidth": 1,
53 | "links": [],
54 | "nullPointMode": "null",
55 | "percentage": false,
56 | "pointradius": 5,
57 | "points": false,
58 | "renderer": "flot",
59 | "seriesOverrides": [],
60 | "spaceLength": 10,
61 | "span": 12,
62 | "stack": false,
63 | "steppedLine": false,
64 | "targets": [
65 | {
66 | "expr": "avg(io_cassandrareaper_jmx_JmxConnectionFactory_jmxConnectionsIntializer{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)",
67 | "format": "time_series",
68 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}",
69 | "refId": "A",
70 | "target": ""
71 | }
72 | ],
73 | "thresholds": [],
74 | "timeFrom": null,
75 | "timeShift": null,
76 | "title": "JMX Connections to Cassandra",
77 | "tooltip": {
78 | "shared": false,
79 | "sort": 2,
80 | "value_type": "individual"
81 | },
82 | "transparent": true,
83 | "type": "graph",
84 | "xaxis": {
85 | "buckets": null,
86 | "mode": "time",
87 | "name": null,
88 | "show": true,
89 | "values": []
90 | },
91 | "yaxes": [
92 | {
93 | "format": "short",
94 | "label": "DUNNO",
95 | "logBase": 1,
96 | "max": null,
97 | "min": null,
98 | "show": true
99 | },
100 | {
101 | "format": "short",
102 | "label": null,
103 | "logBase": 1,
104 | "max": null,
105 | "min": null,
106 | "show": true
107 | }
108 | ]
109 | }
110 | ],
111 | "repeat": null,
112 | "repeatIteration": null,
113 | "repeatRowId": null,
114 | "showTitle": false,
115 | "title": "Dashboard Row",
116 | "titleSize": "h6"
117 | },
118 | {
119 | "collapse": false,
120 | "height": 250,
121 | "panels": [
122 | {
123 | "aliasColors": {},
124 | "bars": false,
125 | "dashLength": 10,
126 | "dashes": false,
127 | "datasource": null,
128 | "fill": 1,
129 | "id": 2,
130 | "legend": {
131 | "alignAsTable": true,
132 | "avg": true,
133 | "current": true,
134 | "max": true,
135 | "min": false,
136 | "show": false,
137 | "sort": "max",
138 | "sortDesc": true,
139 | "total": false,
140 | "values": true
141 | },
142 | "lines": true,
143 | "linewidth": 1,
144 | "links": [],
145 | "nullPointMode": "null",
146 | "percentage": false,
147 | "pointradius": 5,
148 | "points": false,
149 | "renderer": "flot",
150 | "seriesOverrides": [],
151 | "spaceLength": 10,
152 | "span": 12,
153 | "stack": false,
154 | "steppedLine": false,
155 | "targets": [
156 | {
157 | "expr": "io_cassandrareaper_jmx_JmxConnectionFactory_jmxConnectionsIntializer_count{environment=\"$env\", data_center=\"$dc\"}",
158 | "format": "time_series",
159 | "legendFormat": "{{environment}}.{{data_center}}{{host}}",
160 | "refId": "A",
161 | "target": ""
162 | }
163 | ],
164 | "thresholds": [],
165 | "timeFrom": null,
166 | "timeShift": null,
167 | "title": "Number of JMX Connections to Cassandra",
168 | "tooltip": {
169 | "shared": false,
170 | "sort": 2,
171 | "value_type": "individual"
172 | },
173 | "transparent": true,
174 | "type": "graph",
175 | "xaxis": {
176 | "buckets": null,
177 | "mode": "time",
178 | "name": null,
179 | "show": true,
180 | "values": []
181 | },
182 | "yaxes": [
183 | {
184 | "decimals": 0,
185 | "format": "short",
186 | "label": "Number of connections to Cassandra",
187 | "logBase": 1,
188 | "max": null,
189 | "min": null,
190 | "show": true
191 | },
192 | {
193 | "format": "short",
194 | "label": null,
195 | "logBase": 1,
196 | "max": null,
197 | "min": null,
198 | "show": true
199 | }
200 | ]
201 | }
202 | ],
203 | "repeat": null,
204 | "repeatIteration": null,
205 | "repeatRowId": null,
206 | "showTitle": false,
207 | "title": "Dashboard Row",
208 | "titleSize": "h6"
209 | },
210 | {
211 | "collapse": false,
212 | "height": 250,
213 | "panels": [
214 | {
215 | "aliasColors": {},
216 | "bars": false,
217 | "dashLength": 10,
218 | "dashes": false,
219 | "datasource": null,
220 | "fill": 1,
221 | "id": 3,
222 | "legend": {
223 | "alignAsTable": true,
224 | "avg": true,
225 | "current": true,
226 | "max": true,
227 | "min": false,
228 | "show": false,
229 | "sort": "max",
230 | "sortDesc": true,
231 | "total": false,
232 | "values": true
233 | },
234 | "lines": true,
235 | "linewidth": 1,
236 | "links": [],
237 | "nullPointMode": "null",
238 | "percentage": false,
239 | "pointradius": 5,
240 | "points": false,
241 | "renderer": "flot",
242 | "seriesOverrides": [],
243 | "spaceLength": 10,
244 | "span": 12,
245 | "stack": false,
246 | "steppedLine": false,
247 | "targets": [
248 | {
249 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_open_files{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)",
250 | "format": "time_series",
251 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}",
252 | "refId": "A",
253 | "target": ""
254 | }
255 | ],
256 | "thresholds": [],
257 | "timeFrom": null,
258 | "timeShift": null,
259 | "title": "Number of Open Files",
260 | "tooltip": {
261 | "shared": false,
262 | "sort": 2,
263 | "value_type": "individual"
264 | },
265 | "transparent": true,
266 | "type": "graph",
267 | "xaxis": {
268 | "buckets": null,
269 | "mode": "time",
270 | "name": null,
271 | "show": true,
272 | "values": []
273 | },
274 | "yaxes": [
275 | {
276 | "decimals": 0,
277 | "format": "short",
278 | "label": "Open files",
279 | "logBase": 1,
280 | "max": null,
281 | "min": null,
282 | "show": true
283 | },
284 | {
285 | "format": "short",
286 | "label": null,
287 | "logBase": 1,
288 | "max": null,
289 | "min": null,
290 | "show": true
291 | }
292 | ]
293 | }
294 | ],
295 | "repeat": null,
296 | "repeatIteration": null,
297 | "repeatRowId": null,
298 | "showTitle": false,
299 | "title": "Dashboard Row",
300 | "titleSize": "h6"
301 | },
302 | {
303 | "collapse": false,
304 | "height": 250,
305 | "panels": [
306 | {
307 | "aliasColors": {},
308 | "bars": false,
309 | "dashLength": 10,
310 | "dashes": false,
311 | "datasource": null,
312 | "fill": 1,
313 | "id": 4,
314 | "legend": {
315 | "alignAsTable": true,
316 | "avg": true,
317 | "current": true,
318 | "max": true,
319 | "min": false,
320 | "show": false,
321 | "sort": "max",
322 | "sortDesc": true,
323 | "total": false,
324 | "values": true
325 | },
326 | "lines": true,
327 | "linewidth": 1,
328 | "links": [],
329 | "nullPointMode": "null",
330 | "percentage": false,
331 | "pointradius": 5,
332 | "points": false,
333 | "renderer": "flot",
334 | "seriesOverrides": [],
335 | "spaceLength": 10,
336 | "span": 12,
337 | "stack": false,
338 | "steppedLine": false,
339 | "targets": [
340 | {
341 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_releaseLead{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)",
342 | "format": "time_series",
343 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}",
344 | "refId": "A",
345 | "target": ""
346 | }
347 | ],
348 | "thresholds": [],
349 | "timeFrom": null,
350 | "timeShift": null,
351 | "title": "DUNNO",
352 | "tooltip": {
353 | "shared": false,
354 | "sort": 2,
355 | "value_type": "individual"
356 | },
357 | "transparent": true,
358 | "type": "graph",
359 | "xaxis": {
360 | "buckets": null,
361 | "mode": "time",
362 | "name": null,
363 | "show": true,
364 | "values": []
365 | },
366 | "yaxes": [
367 | {
368 | "format": "short",
369 | "label": "DUNNO",
370 | "logBase": 1,
371 | "max": null,
372 | "min": null,
373 | "show": true
374 | },
375 | {
376 | "format": "short",
377 | "label": null,
378 | "logBase": 1,
379 | "max": null,
380 | "min": null,
381 | "show": true
382 | }
383 | ]
384 | }
385 | ],
386 | "repeat": null,
387 | "repeatIteration": null,
388 | "repeatRowId": null,
389 | "showTitle": false,
390 | "title": "Dashboard Row",
391 | "titleSize": "h6"
392 | },
393 | {
394 | "collapse": false,
395 | "height": 250,
396 | "panels": [
397 | {
398 | "aliasColors": {},
399 | "bars": false,
400 | "dashLength": 10,
401 | "dashes": false,
402 | "datasource": null,
403 | "fill": 1,
404 | "id": 5,
405 | "legend": {
406 | "alignAsTable": true,
407 | "avg": true,
408 | "current": true,
409 | "max": true,
410 | "min": false,
411 | "show": false,
412 | "sort": "max",
413 | "sortDesc": true,
414 | "total": false,
415 | "values": true
416 | },
417 | "lines": true,
418 | "linewidth": 1,
419 | "links": [],
420 | "nullPointMode": "null",
421 | "percentage": false,
422 | "pointradius": 5,
423 | "points": false,
424 | "renderer": "flot",
425 | "seriesOverrides": [],
426 | "spaceLength": 10,
427 | "span": 12,
428 | "stack": false,
429 | "steppedLine": false,
430 | "targets": [
431 | {
432 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_takeLead{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)",
433 | "format": "time_series",
434 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}",
435 | "refId": "A",
436 | "target": ""
437 | }
438 | ],
439 | "thresholds": [],
440 | "timeFrom": null,
441 | "timeShift": null,
442 | "title": "DUNNO",
443 | "tooltip": {
444 | "shared": false,
445 | "sort": 2,
446 | "value_type": "individual"
447 | },
448 | "transparent": true,
449 | "type": "graph",
450 | "xaxis": {
451 | "buckets": null,
452 | "mode": "time",
453 | "name": null,
454 | "show": true,
455 | "values": []
456 | },
457 | "yaxes": [
458 | {
459 | "format": "short",
460 | "label": "DUNNO",
461 | "logBase": 1,
462 | "max": null,
463 | "min": null,
464 | "show": true
465 | },
466 | {
467 | "format": "short",
468 | "label": null,
469 | "logBase": 1,
470 | "max": null,
471 | "min": null,
472 | "show": true
473 | }
474 | ]
475 | }
476 | ],
477 | "repeat": null,
478 | "repeatIteration": null,
479 | "repeatRowId": null,
480 | "showTitle": false,
481 | "title": "Dashboard Row",
482 | "titleSize": "h6"
483 | }
484 | ],
485 | "schemaVersion": 14,
486 | "style": "dark",
487 | "tags": [
488 | "tlp",
489 | "cassandra",
490 | "beta"
491 | ],
492 | "templating": {
493 | "list": [
494 | {
495 | "allValue": null,
496 | "current": {
497 | "text": "environment",
498 | "value": "environment"
499 | },
500 | "datasource": "prometheus",
501 | "hide": 0,
502 | "includeAll": false,
503 | "label": "Environment",
504 | "multi": false,
505 | "name": "env",
506 | "options": [],
507 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)",
508 | "refresh": 2,
509 | "regex": "",
510 | "sort": 1,
511 | "tagValuesQuery": "",
512 | "tags": [],
513 | "tagsQuery": "",
514 | "type": "query",
515 | "useTags": true
516 | },
517 | {
518 | "allValue": "",
519 | "current": {
520 | "text": "data_center",
521 | "value": "data_center"
522 | },
523 | "datasource": "prometheus",
524 | "hide": 0,
525 | "includeAll": false,
526 | "label": "Data Center",
527 | "multi": false,
528 | "name": "dc",
529 | "options": [],
530 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)",
531 | "refresh": 2,
532 | "regex": "",
533 | "sort": 1,
534 | "tagValuesQuery": null,
535 | "tags": [],
536 | "tagsQuery": null,
537 | "type": "query",
538 | "useTags": false
539 | },
540 | {
541 | "allValue": null,
542 | "current": {
543 | "text": "All",
544 | "value": "$__all"
545 | },
546 | "datasource": "prometheus",
547 | "hide": 0,
548 | "includeAll": true,
549 | "label": "Host",
550 | "multi": false,
551 | "name": "host",
552 | "options": [],
553 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)",
554 | "refresh": 2,
555 | "regex": "",
556 | "sort": 1,
557 | "tagValuesQuery": null,
558 | "tags": [],
559 | "tagsQuery": null,
560 | "type": "query",
561 | "useTags": false
562 | }
563 | ]
564 | },
565 | "time": {
566 | "from": "now-6h",
567 | "to": "now"
568 | },
569 | "timepicker": {
570 | "refresh_intervals": [
571 | "5s",
572 | "10s",
573 | "30s",
574 | "1m",
575 | "5m",
576 | "15m",
577 | "30m",
578 | "1h",
579 | "2h",
580 | "1d"
581 | ],
582 | "time_options": [
583 | "5m",
584 | "15m",
585 | "1h",
586 | "6h",
587 | "12h",
588 | "24h",
589 | "2d",
590 | "7d",
591 | "30d"
592 | ]
593 | },
594 | "timezone": "",
595 | "title": "TLP - Cassandra - Reaper",
596 | "version": null
597 | },
598 | "overwrite": true
599 | }
600 |
--------------------------------------------------------------------------------
/grafana/grafana.env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # the name of the grafana-server instance
4 | GF_DEFAULT_INSTANCE_NAME=pickle-grafana
5 |
6 | # only log to console
7 | GF_LOG_MODE=console
8 | GF_LOG_CONSOLE_LEVEL=info
9 | # GF_LOG_CONSOLE_FORMAT=json
10 |
11 | # install plugins
12 | GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel
13 |
14 | # debug purposes
15 | GF_SERVER_ROUTER_LOGGING=true
16 |
17 | # define url used in Alert messages
18 | GF_SERVER_DOMAIN=localhost
19 |
--------------------------------------------------------------------------------
/logspout/Dockerfile:
--------------------------------------------------------------------------------
1 | # uses ONBUILD instructions described here:
2 | # https://github.com/gliderlabs/logspout/tree/master/custom
3 |
4 | FROM gliderlabs/logspout:master
5 | ENV SYSLOG_FORMAT rfc3164
6 |
--------------------------------------------------------------------------------
/logspout/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # unmodified from:
4 | # https://github.com/gliderlabs/logspout/blob/d6fe1803e9d9637d707ed57a873e46e6d0f0b2e6/custom/build.sh
5 |
6 | set -e
7 | apk add --update go build-base git mercurial ca-certificates
8 | mkdir -p /go/src/github.com/gliderlabs
9 | cp -r /src /go/src/github.com/gliderlabs/logspout
10 | cd /go/src/github.com/gliderlabs/logspout
11 | export GOPATH=/go
12 | go get
13 | go build -ldflags "-X main.Version=$1" -o /bin/logspout
14 | apk del go git mercurial build-base
15 | rm -rf /go /var/cache/apk/* /root/.glide
16 |
17 | # backwards compatibility
18 | ln -fs /tmp/docker.sock /var/run/docker.sock
19 |
--------------------------------------------------------------------------------
/logspout/logspout.env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # tail settings
4 | # BACKLOG=false
5 | # TAIL=100
6 |
7 | # Logstash-specific
8 | # ROUTE_URIS=logstash+tcp://logstash:5000
9 | # LOGSTASH_TAGS=docker-elk
10 |
--------------------------------------------------------------------------------
/logspout/modules.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | // installs the Logstash adapter for Logspout, and required dependencies
4 | // https://github.com/looplab/logspout-logstash
5 | import (
6 | _ "github.com/gliderlabs/logspout/adapters/syslog"
7 | _ "github.com/gliderlabs/logspout/httpstream"
8 | _ "github.com/gliderlabs/logspout/transports/tcp"
9 | _ "github.com/looplab/logspout-logstash"
10 | )
11 |
--------------------------------------------------------------------------------
/pickle-factory/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:2
2 |
3 | WORKDIR /usr/src/app
4 |
5 | # copied from: https://github.com/tianon/gosu/blob/e87cf95808a7b16208515c49012aa3410bc5bba8/INSTALL.md
6 | #ENV GOSU_VERSION 1.10
7 | #RUN set -ex; \
8 | # \
9 | # fetchDeps=' \
10 | # ca-certificates \
11 | # wget \
12 | # '; \
13 | # apt-get update; \
14 | # apt-get install -y --no-install-recommends $fetchDeps; \
15 | # rm -rf /var/lib/apt/lists/*; \
16 | # \
17 | # dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \
18 | # wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch"; \
19 | # wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc"; \
20 | # \
21 | ## verify the signature
22 | # export GNUPGHOME="$(mktemp -d)"; \
23 | # gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \
24 | # gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \
25 | # rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc; \
26 | # \
27 | # chmod +x /usr/local/bin/gosu; \
28 | ## verify that the binary works
29 | # gosu nobody true; \
30 | # \
31 | # apt-get purge -y --auto-remove $fetchDeps
32 |
33 | RUN apt-get update \
34 | && apt-get install -y \
35 | gcc \
36 | python-dev \
37 | python-snappy \
38 | libev4 \
39 | libev-dev
40 |
41 | COPY requirements.txt ./
42 | RUN pip install --no-cache-dir -r requirements.txt
43 |
44 | COPY . .
45 |
46 | ENV USER pickle
47 | #RUN adduser pickle
48 | ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"]
49 |
50 | CMD [ "python", "./factory.py" ]
51 |
--------------------------------------------------------------------------------
/pickle-factory/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -ex
4 |
5 | #exec gosu ${USER} "$@"
6 |
7 | $@
8 |
--------------------------------------------------------------------------------
/pickle-factory/factory.py:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/env python
2 |
3 | import os
4 | import random
5 | import uuid
6 |
7 | from cassandra import ConsistencyLevel
8 | from cassandra.io.libevreactor import LibevConnection
9 | from cassandra.cluster import Cluster
10 | from cassandra.policies import DCAwareRoundRobinPolicy
11 |
12 |
13 |
14 | # connect to Cassandra
15 |
16 | CASSANDRA_HOST = os.environ['CASSANDRA_HOST']
17 | CASSANDRA_DC = os.environ['CASSANDRA_DC']
18 | cluster = Cluster([CASSANDRA_HOST],
19 | load_balancing_policy=DCAwareRoundRobinPolicy(
20 | local_dc=CASSANDRA_DC), )
21 | cluster.connection_class = LibevConnection
22 | session = cluster.connect()
23 |
24 |
25 |
26 | # prepare statements
27 | print 'Preparing statements...'
28 |
29 | insert_employee = session.prepare("""
30 | INSERT INTO pickle.employees
31 | (employee_id)
32 | VALUES
33 | (?)
34 | """)
35 | insert_employee.consistency_level = ConsistencyLevel.QUORUM
36 |
37 | insert_timesheet = session.prepare("""
38 | INSERT INTO pickle.timesheets
39 | (employee_id, pickle_tree_id, timestamp, pickle_count, pickle_avg_size, watered)
40 | VALUES
41 | (?, ?, ?, ?, ?, ?)
42 | """)
43 | insert_timesheet.consistency_level = ConsistencyLevel.QUORUM
44 |
45 | insert_tree = session.prepare("""
46 | INSERT INTO pickle.trees
47 | (pickle_tree_id, timestamp, employee_id, pickle_count, pickle_avg_size, watered)
48 | VALUES
49 | (?, ?, ?, ?, ?, ?)
50 | """)
51 | insert_tree.consistency_level = ConsistencyLevel.ONE
52 |
53 | insert_production = session.prepare("""
54 | INSERT INTO pickle.production
55 | (pickle_count, pickle_tree_id, timestamp)
56 | VALUES
57 | (?, ?, ?)
58 | """)
59 | insert_production.consistency_level = ConsistencyLevel.ONE
60 |
61 |
62 |
63 | # generate employee_ids
64 | print 'Generating employee IDs...'
65 |
66 | futures = []
67 | employee_uuids = []
68 | pickle_tree_ids = []
69 | for _ in xrange(100):
70 | employee_uuid = uuid.uuid4() # random uuid
71 |
72 | future = session.execute_async(insert_employee, (employee_uuid,))
73 | employee_uuids.append(employee_uuid)
74 |
75 | pickle_tree_id = uuid.uuid4() # randome uuid
76 | pickle_tree_ids.append(pickle_tree_id)
77 |
78 | # confirm all futures were written
79 | while futures:
80 | print 'Committing employee information...'
81 | future = futures.pop()
82 | future.result()
83 |
84 |
85 |
86 | # generate simulated workforce
87 |
88 | for _ in xrange(100000):
89 | employee_uuid = random.choice(employee_uuids)
90 | pickle_tree_id = random.choice(pickle_tree_ids)
91 | timestamp = uuid.uuid1() # contains time information
92 | pickle_count = random.randint(0, 100)
93 | pickle_avg_size = random.uniform(0, 2)
94 | watered = random.randint(0, 1)
95 |
96 | future = session.execute_async(insert_timesheet,
97 | (employee_uuid, pickle_tree_id, timestamp,
98 | pickle_count, pickle_avg_size, watered))
99 | futures.append(future)
100 |
101 | future = session.execute_async(insert_tree,
102 | (pickle_tree_id, timestamp, employee_uuid,
103 | pickle_count, pickle_avg_size, watered))
104 | futures.append(future)
105 |
106 | future = session.execute_async(insert_production,
107 | (pickle_count, pickle_tree_id, timestamp))
108 | futures.append(future)
109 |
110 | if len(futures) > 3000:
111 | print 'Committing timesheets...'
112 | while futures:
113 | future = futures.pop()
114 | future.result()
115 |
116 | # confirm all futures were written
117 | while futures:
118 | print 'Committing last timesheets...'
119 | future = futures.pop()
120 | future.result()
121 |
122 | print 'Done.'
123 |
--------------------------------------------------------------------------------
/pickle-factory/requirements.txt:
--------------------------------------------------------------------------------
1 | cassandra-driver==3.11.0
2 | lz4==0.10.1
3 | scales==1.0.9
4 |
--------------------------------------------------------------------------------
/pickle-shop/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:2
2 |
3 | WORKDIR /usr/src/app
4 |
5 | # copied from: https://github.com/tianon/gosu/blob/e87cf95808a7b16208515c49012aa3410bc5bba8/INSTALL.md
6 | #ENV GOSU_VERSION 1.10
7 | #RUN set -ex; \
8 | # \
9 | # fetchDeps=' \
10 | # ca-certificates \
11 | # wget \
12 | # '; \
13 | # apt-get update; \
14 | # apt-get install -y --no-install-recommends $fetchDeps; \
15 | # rm -rf /var/lib/apt/lists/*; \
16 | # \
17 | # dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \
18 | # wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch"; \
19 | # wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc"; \
20 | # \
21 | ## verify the signature
22 | # export GNUPGHOME="$(mktemp -d)"; \
23 | # gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \
24 | # gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \
25 | # rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc; \
26 | # \
27 | # chmod +x /usr/local/bin/gosu; \
28 | ## verify that the binary works
29 | # gosu nobody true; \
30 | # \
31 | # apt-get purge -y --auto-remove $fetchDeps
32 |
33 | RUN apt-get update \
34 | && apt-get install -y \
35 | gcc \
36 | python-dev \
37 | python-snappy \
38 | libev4 \
39 | libev-dev
40 |
41 | COPY requirements.txt ./
42 | RUN pip install --no-cache-dir -r requirements.txt
43 |
44 | COPY . .
45 |
46 | ENV USER pickle
47 | #RUN adduser pickle
48 | ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"]
49 |
50 | CMD [ "python", "./shop.py" ]
51 |
--------------------------------------------------------------------------------
/pickle-shop/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -ex
4 |
5 | #exec gosu ${USER} "$@"
6 |
7 | $@
8 |
--------------------------------------------------------------------------------
/pickle-shop/requirements.txt:
--------------------------------------------------------------------------------
1 | cassandra-driver==3.11.0
2 | lz4==0.10.1
3 | scales==1.0.9
4 |
--------------------------------------------------------------------------------
/pickle-shop/shop.py:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/env python
2 |
3 | import os
4 | import random
5 |
6 | from cassandra import ConsistencyLevel
7 | from cassandra.io.libevreactor import LibevConnection
8 | from cassandra.cluster import Cluster
9 | from cassandra.policies import DCAwareRoundRobinPolicy
10 |
11 |
12 |
13 | # connect to Cassandra
14 |
15 | CASSANDRA_HOST = os.environ['CASSANDRA_HOST']
16 | CASSANDRA_DC = os.environ['CASSANDRA_DC']
17 | cluster = Cluster([CASSANDRA_HOST],
18 | load_balancing_policy=DCAwareRoundRobinPolicy(
19 | local_dc=CASSANDRA_DC), )
20 | cluster.connection_class = LibevConnection
21 | session = cluster.connect()
22 |
23 |
24 |
25 | # prepare statements
26 | print 'Preparing statements...'
27 |
28 | select_employee = session.prepare("""
29 | SELECT * FROM pickle.employees
30 | """)
31 | select_employee.consistency_level = ConsistencyLevel.QUORUM
32 |
33 | select_timesheet = session.prepare("""
34 | SELECT * FROM pickle.timesheets
35 | WHERE employee_id = ?
36 | """)
37 | select_timesheet.consistency_level = ConsistencyLevel.QUORUM
38 |
39 |
40 |
41 | # synchronous execution of prepared statements
42 | print 'Finding all employees...'
43 |
44 | employee_ids = []
45 | result = session.execute(select_employee)
46 | for row in result:
47 | employee_ids.append(row.employee_id)
48 |
49 | print 'Found %s employees!' % len(employee_ids)
50 |
51 |
52 |
53 | # sample workforce activity for 10 employees
54 | sample = random.sample(employee_ids, 10)
55 |
56 | # asynchronous multi-get
57 | print 'Perform multiple asynchronous read queries...'
58 |
59 | futures = []
60 | for employee_id in sample:
61 | future = session.execute_async(select_timesheet, (employee_id,))
62 | futures.append(future)
63 |
64 |
65 |
66 | # process returned results
67 |
68 | print 'Total Pickles Picked'
69 | print '===================='
70 |
71 | for future in futures:
72 | result = future.result()
73 |
74 | employee_id = None
75 | pickle_counts = 0
76 | for row in result:
77 | employee_id = row.employee_id
78 | pickle_counts += row.pickle_count
79 |
80 | if employee_id:
81 | print '%s: %s' % (employee_id, pickle_counts)
82 |
--------------------------------------------------------------------------------
/pickle.env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CASSANDRA_HOST=cassandra
4 | CASSANDRA_DC=pickle-east
5 |
--------------------------------------------------------------------------------
/prometheus/config/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 | scrape_interval: 30s
3 | evaluation_interval: 15s
4 | # scrape_timeout is set to the global default (10s).
5 |
6 | # Attach these labels to any time series or alerts when communicating with
7 | # external systems (federation, remote storage, Alertmanager).
8 | external_labels:
9 | monitor: 'pickle-farm'
10 |
11 | scrape_configs:
12 | - job_name: 'prometheus'
13 | static_configs:
14 | - targets: ['localhost:9090']
15 |
16 | - job_name: 'cassandra'
17 | static_configs:
18 | - targets: ['cassandra:7070']
19 | labels:
20 | service: 'cassandra'
21 | component: 'cassandra'
22 | environment: 'environment'
23 | data_center: 'data_center'
24 | host: 'cassandra'
25 | - targets: ['cassandra:9103']
26 | labels:
27 | service: 'cassandra'
28 | component: 'collectd'
29 | environment: 'environment'
30 | data_center: 'data_center'
31 | host: 'cassandra'
32 |
33 | - job_name: 'cassandra-reaper'
34 | metrics_path: '/prometheusMetrics'
35 | static_configs:
36 | - targets: ['cassandra-reaper:8081']
37 | labels:
38 | service: 'cassandra'
39 | component: 'cassandra-reaper'
40 | environment: 'environment'
41 | data_center: 'data_center'
42 | host: 'cassandra-reaper'
43 |
--------------------------------------------------------------------------------