├── .env.template ├── .gitignore ├── README.md ├── cassandra-reaper └── cassandra-reaper.env ├── cassandra ├── Dockerfile ├── cassandra.env ├── config │ ├── collectd.cassandra.conf │ ├── filebeat.yml │ ├── graphite.cassandra.yaml │ ├── jmxremote.access │ ├── jmxremote.password │ └── prometheus.yml ├── docker-entrypoint.sh ├── lib │ ├── jmx_prometheus_javaagent-0.9.jar │ ├── metrics-core-3.1.2.jar │ ├── metrics-graphite-3.1.2.jar │ ├── reporter-config-base-3.0.3.jar │ └── reporter-config3-3.0.3.jar └── schema.cql ├── docker-compose.yml ├── grafana ├── bin │ ├── create-data-sources.sh │ └── upload-dashboards.sh ├── dashboards │ ├── tlp-cassandra-alerts.final.json │ ├── tlp-cassandra-big-picture.final.json │ ├── tlp-cassandra-client-connections.final.json │ ├── tlp-cassandra-overview.final.json │ ├── tlp-cassandra-read-path.final.json │ ├── tlp-cassandra-reaper.final.json │ └── tlp-cassandra-write-path.final.json └── grafana.env ├── logspout ├── Dockerfile ├── build.sh ├── logspout.env └── modules.go ├── pickle-factory ├── Dockerfile ├── docker-entrypoint.sh ├── factory.py └── requirements.txt ├── pickle-shop ├── Dockerfile ├── docker-entrypoint.sh ├── requirements.txt └── shop.py ├── pickle.env └── prometheus └── config └── prometheus.yml /.env.template: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PAPERTRAIL_PORT=55555 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .DS_Store 3 | 4 | data/ 5 | .env 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blog Post 2 | 3 | A new blog post covering each of the main components of this project can be found here: 4 | 5 | http://thelastpickle.com/blog/2018/01/23/docker-meet-cassandra.html 6 | 7 | # Pre-Meetup Setup 8 | 9 | ```bash 10 | git clone git@github.com:thelastpickle/docker-cassandra-bootstrap.git 11 | cd docker-cassandra-bootstrap 12 | cp .env.template .env 13 | docker-compose build 14 | ``` 15 | 16 | If you would like to see a hosted log service interact seemlessly with this 17 | Docker Compose stack, sign up for [Papertrail](https://papertrailapp.com/?thank=1ad15b). 18 | 19 | Then find your specific port number by looking at your 20 | [Log Destinations](https://papertrailapp.com/account/destinations) and update 21 | your `.env` setting accordingly. 22 | 23 | # Starting From Scratch 24 | 25 | ```bash 26 | # turn off all running Docker containers 27 | docker-compose down 28 | 29 | # delete any persistent data 30 | rm -rf data/ 31 | 32 | # rebuild the images 33 | docker-compose build 34 | ``` 35 | 36 | 37 | # Meetup Workflow 38 | 39 | Start our Docker-integrated logging connector: 40 | 41 | ```bash 42 | # start Docker logging connector 43 | docker-compose up logspout 44 | 45 | # view logging HTTP endpoint 46 | curl http://localhost:8000/logs 47 | ``` 48 | 49 | Start Cassandra and setup the required schema: 50 | 51 | ```bash 52 | # start Cassandra 53 | docker-compose up cassandra 54 | 55 | # view cluster status 56 | docker-compose run nodetool status 57 | 58 | # create schema 59 | docker-compose run cqlsh -f /schema.cql 60 | 61 | # confirm schema 62 | docker-compose run cqlsh -e "DESCRIBE SCHEMA;" 63 | ``` 64 | 65 | Start Reaper for Apache Cassandra and monitor your new cluster: 66 | 67 | ```bash 68 | # start Reaper for Apache Cassandra 69 | docker-compose up cassandra-reaper 70 | 71 | open http://localhost:8080/webui/ 72 | 73 | # add one-off repair 74 | 75 | # add scheduled repair 76 | ``` 77 | 78 | Start Prometheus and become familiar with the UI: 79 | 80 | ```bash 81 | # start Prometheus 82 | docker-compose up prometheus 83 | 84 | open http://localhost:9090 85 | ``` 86 | 87 | Start Grafana, connect it to the Prometheus data source, and upload the TLP 88 | Dashboards. 89 | 90 | ```bash 91 | # start Grafana 92 | docker-compose up grafana 93 | 94 | # create 95 | ./grafana/bin/create-data-sources.sh 96 | 97 | # user/pass: admin/admin 98 | open http://localhost:3000 99 | 100 | # upload dashboards 101 | ./grafana/bin/upload-dashboards.sh 102 | ``` 103 | 104 | # Sample Application 105 | 106 | Generate fake workforce and activity: 107 | 108 | ```bash 109 | docker-compose run pickle-factory 110 | ``` 111 | 112 | Sample timesheets: 113 | 114 | ```bash 115 | docker-compose run pickle-shop 116 | ``` 117 | -------------------------------------------------------------------------------- /cassandra-reaper/cassandra-reaper.env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | REAPER_CASS_KEYSPACE=reaper_db 4 | REAPER_STORAGE_TYPE=cassandra 5 | REAPER_CASS_CLUSTER_NAME=pickle-db 6 | REAPER_CASS_CONTACT_POINTS=[cassandra] 7 | 8 | # use the credentials that match the ./cassandra/config/jmxremote.* configurations 9 | REAPER_JMX_USERNAME=reaperUser 10 | REAPER_JMX_PASSWORD=reaperPass 11 | -------------------------------------------------------------------------------- /cassandra/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM cassandra:3.11 2 | 3 | # install wget, for the custom metrics-graphite reporter 4 | #RUN set -x \ 5 | # && apt-get update \ 6 | # && apt-get install -y --no-install-recommends \ 7 | # wget \ 8 | # && rm -rf /var/lib/apt/lists/* 9 | 10 | # install the custom metrics-graphite reporter, to allow measurement filtering 11 | #RUN echo "JVM_OPTS=\"\$JVM_OPTS -Dcassandra.metricsReporterConfigFile=graphite.yaml\"" >> /etc/cassandra/cassandra-env.sh 12 | #RUN wget -P /usr/share/cassandra/lib/ \ 13 | # http://central.maven.org/maven2/net/java/dev/jna/jna/4.0.0/jna-4.0.0.jar 14 | #RUN rm /usr/share/cassandra/lib/metrics-core-3.1.0.jar \ 15 | # /usr/share/cassandra/lib/reporter-config-base-3.0.0.jar \ 16 | # /usr/share/cassandra/lib/reporter-config3-3.0.0.jar 17 | #COPY lib/metrics-core-3.1.2.jar \ 18 | # lib/metrics-graphite-3.1.2.jar \ 19 | # lib/reporter-config-base-3.0.3.jar \ 20 | # lib/reporter-config3-3.0.3.jar \ 21 | # /usr/share/cassandra/lib/ 22 | 23 | # install Java 8, which is required for the custom metrics reporter used above 24 | #RUN set -x \ 25 | # && echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main" \ 26 | # | tee /etc/apt/sources.list.d/webupd8team-java.list \ 27 | # && echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main" \ 28 | # | tee -a /etc/apt/sources.list.d/webupd8team-java.list \ 29 | # && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 \ 30 | # && apt-get update \ 31 | # && echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true \ 32 | # | /usr/bin/debconf-set-selections \ 33 | # && apt-get install -y \ 34 | # oracle-java8-installer \ 35 | # oracle-java8-set-default 36 | 37 | # install filebeat for Logstash ingestion 38 | #ENV ELK_VERSION 5.3.0 39 | #ENV DOWNLOAD_URL https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-${ELK_VERSION}-amd64.deb 40 | #RUN set -x \ 41 | # && apt-get update \ 42 | # && apt-get install -y --no-install-recommends \ 43 | # curl \ 44 | # && rm -rf /var/lib/apt/lists/* \ 45 | # && curl -L -O ${DOWNLOAD_URL} \ 46 | # && dpkg -i filebeat-${ELK_VERSION}-amd64.deb \ 47 | # && rm filebeat-${ELK_VERSION}-amd64.deb \ 48 | # && update-rc.d filebeat defaults 95 10 \ 49 | # && echo "/etc/init.d/filebeat start" \ 50 | # >> /etc/cassandra/cassandra-env.sh \ 51 | # && apt-get purge -y --auto-remove \ 52 | # curl 53 | #RUN mkdir \ 54 | # /var/lib/filebeat \ 55 | # /var/log/filebeat \ 56 | # && touch /var/run/filebeat.pid \ 57 | # && chown cassandra:cassandra \ 58 | # /var/lib/filebeat \ 59 | # /var/log/filebeat \ 60 | # /var/run/filebeat.pid 61 | 62 | # install collectd 63 | # NOTE: jessie packages are now being included since librrd4 and 64 | # libmicrohttpd10 were missing from the stretch repos 65 | RUN set -x \ 66 | && echo "deb http://pkg.ci.collectd.org/deb jessie collectd-5.7" \ 67 | > /etc/apt/sources.list.d/pkg.ci.collectd.org.list \ 68 | && gpg --keyserver hkp://pgp.mit.edu:80 --recv-keys 3994D24FB8543576 \ 69 | && gpg --export -a 3994D24FB8543576 | apt-key add - \ 70 | && apt-get update \ 71 | && apt-get install -y --no-install-recommends \ 72 | collectd=5.7.1-1.1 \ 73 | collectd-utils \ 74 | libprotobuf-c-dev \ 75 | libmicrohttpd-dev \ 76 | && echo "deb http://deb.debian.org/debian jessie main" \ 77 | >> /etc/apt/sources.list.d/pkg.ci.collectd.org.list \ 78 | && apt-get update \ 79 | && apt-get install -y \ 80 | librrd4 \ 81 | libmicrohttpd10 \ 82 | && rm -rf /var/lib/apt/lists/* 83 | RUN touch /var/log/collectd.log \ 84 | && chown cassandra:cassandra /var/log/collectd.log 85 | 86 | # install Prometheus JMX exporter 87 | # NOTE: 0.10 will not work until this issue is resolved: 88 | # https://github.com/prometheus/jmx_exporter/issues/170 89 | ENV JMX_EXPORTER_VERSION 0.9 90 | COPY lib/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar \ 91 | /prometheus/ 92 | RUN echo 'JVM_OPTS="$JVM_OPTS -javaagent:'/prometheus/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar=7070:/prometheus/prometheus.yml'"' \ 93 | | tee -a /etc/cassandra/cassandra-env.sh 94 | 95 | # add JMX authentication files for Reaper access 96 | COPY config/jmxremote.access /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access 97 | COPY config/jmxremote.password /etc/cassandra/jmxremote.password 98 | RUN chown cassandra:cassandra \ 99 | /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access \ 100 | /etc/cassandra/jmxremote.password \ 101 | && chmod 600 \ 102 | /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/management/jmxremote.access \ 103 | /etc/cassandra/jmxremote.password 104 | 105 | # overwrite the base docker-entrypoint.sh with modified one, for filebeat perms 106 | COPY docker-entrypoint.sh /docker-entrypoint.sh 107 | 108 | # does not work for some reason 109 | #RUN echo "exec service collectd start &" \ 110 | # >> /etc/cassandra/cassandra-env.sh 111 | -------------------------------------------------------------------------------- /cassandra/cassandra.env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # define heap size for local development 4 | MAX_HEAP_SIZE=500M 5 | HEAP_NEWSIZE=100M 6 | 7 | # define cluster topology 8 | CASSANDRA_CLUSTER_NAME=pickle-db 9 | CASSANDRA_DC=pickle-east 10 | CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch 11 | 12 | # define gossip entrypoints 13 | CASSANDRA_SEEDS=cassandra,cassandra2,cassandra3 14 | 15 | # open JMX port for access by Reaper 16 | # WARNING: this is unsafe in production without proper firewall settings 17 | LOCAL_JMX=no 18 | -------------------------------------------------------------------------------- /cassandra/config/collectd.cassandra.conf: -------------------------------------------------------------------------------- 1 | LoadPlugin logfile 2 | 3 | LogLevel "debug" 4 | File "/var/log/collectd.log" 5 | Timestamp true 6 | 7 | 8 | LoadPlugin disk 9 | LoadPlugin load 10 | LoadPlugin syslog 11 | LoadPlugin interface 12 | LoadPlugin memory 13 | LoadPlugin rrdtool 14 | LoadPlugin syslog 15 | 16 | Interval 30 17 | Hostname "cassandra" 18 | 19 | LoadPlugin df 20 | 21 | MountPoint "/" 22 | MountPoint "/var" 23 | MountPoint "/var/lib/cassandra" 24 | MountPoint "/var/lib/cassandra/commitlog" 25 | MountPoint "/var/lib/cassandra/data" 26 | MountPoint "/var/lib/cassandra/saved_caches" 27 | MountPoint "/var/log" 28 | MountPoint "/var/log/cassandra" 29 | IgnoreSelected false 30 | ValuesPercentage true 31 | 32 | 33 | LoadPlugin cpu 34 | 35 | ReportByState true 36 | ReportByCpu false 37 | ValuesPercentage true 38 | ReportNumCpu true 39 | 40 | 41 | #LoadPlugin write_graphite 42 | # 43 | # 44 | # Host "graphite" 45 | # Port "2003" 46 | # Prefix "collectd.environment.cassandra.data_center." 47 | # #Protocol "udp" 48 | # EscapeCharacter "_" 49 | # SeparateInstances true 50 | # StoreRates false 51 | # AlwaysAppendDS false 52 | # 53 | # 54 | 55 | LoadPlugin write_prometheus 56 | 57 | Port "9103" 58 | 59 | -------------------------------------------------------------------------------- /cassandra/config/filebeat.yml: -------------------------------------------------------------------------------- 1 | output: 2 | logstash: 3 | enabled: true 4 | hosts: 5 | - logstash:5044 6 | filebeat: 7 | prospectors: 8 | - input_type: log 9 | paths: 10 | - "/var/log/cassandra/system.log*" 11 | document_type: cassandra_system_logs 12 | exclude_files: ['\.zip$'] 13 | multiline.pattern: '^TRACE|DEBUG|WARN|INFO|ERROR' 14 | multiline.negate: true 15 | multiline.match: after 16 | - input_type: log 17 | paths: 18 | - "/var/log/cassandra/gc.log.*.current" 19 | document_type: cassandra_gc_logs 20 | exclude_files: ['\.zip$'] 21 | multiline.pattern: '^TRACE|DEBUG|WARN|INFO|ERROR' 22 | multiline.negate: true 23 | multiline.match: after 24 | -------------------------------------------------------------------------------- /cassandra/config/graphite.cassandra.yaml: -------------------------------------------------------------------------------- 1 | graphite: 2 | - 3 | # udp: true 4 | pickled: true 5 | period: 1 6 | timeunit: 'SECONDS' 7 | prefix: 'cassandra.environment.data_center.cassandra-3_0' 8 | hosts: 9 | - host: 'graphite' 10 | port: 2004 # port: 2003, for non-pickled use 11 | predicate: 12 | color: "white" 13 | useQualifiedName: true 14 | patterns: 15 | - "^jvm.gc.*" 16 | - "^jvm.memory.*" 17 | - "^org.apache.cassandra.metrics.Client.*" 18 | - "^org.apache.cassandra.metrics.ClientRequest.ConditionNotMet.*" 19 | - "^org.apache.cassandra.metrics.ClientRequest.ContentionHistogram.*" 20 | - "^org.apache.cassandra.metrics.ClientRequest.Latency.*" 21 | - "^org.apache.cassandra.metrics.ClientRequest.Timeouts.*" 22 | - "^org.apache.cassandra.metrics.ClientRequest.Unavailables.*" 23 | - "^org.apache.cassandra.metrics.ClientRequest.UnfinishedCommit.*" 24 | - "^org.apache.cassandra.metrics.ClientRequestMetrics.*" 25 | - "^org.apache.cassandra.metrics.Table.AllMemtablesHeapSize.*" 26 | - "^org.apache.cassandra.metrics.Table.AllMemtablesLiveDataSize.*" 27 | - "^org.apache.cassandra.metrics.Table.AllMemtablesOffHeapSize.*" 28 | - "^org.apache.cassandra.metrics.Table.AllMemtablesOnHeapSize.*" 29 | - "^org.apache.cassandra.metrics.Table.AnticompactionTime.*" 30 | - "^org.apache.cassandra.metrics.Table.BloomFilterDiskSpaceUsed.*" 31 | - "^org.apache.cassandra.metrics.Table.BloomFilterFalseRatio.*" 32 | - "^org.apache.cassandra.metrics.Table.BloomFilterOffHeapMemoryUsed.*" 33 | - "^org.apache.cassandra.metrics.Table.CasCommitLatency.*" 34 | - "^org.apache.cassandra.metrics.Table.CasPrepareLatency.*" 35 | - "^org.apache.cassandra.metrics.Table.CasProposeLatency.*" 36 | - "^org.apache.cassandra.metrics.Table.CompressionMetadataOffHeapMemoryUsed.*" 37 | - "^org.apache.cassandra.metrics.Table.CompressionRatio.*" 38 | - "^org.apache.cassandra.metrics.Table.CoordinatorReadLatency.*" 39 | - "^org.apache.cassandra.metrics.Table.CoordinatorScanLatency.*" 40 | - "^org.apache.cassandra.metrics.Table.EstimatedColumnCountHistogram.*" 41 | - "^org.apache.cassandra.metrics.Table.EstimatedPartitionCount.*" 42 | - "^org.apache.cassandra.metrics.Table.EstimatedPartitionSizeHistogram.*" 43 | - "^org.apache.cassandra.metrics.Table.IndexSummaryOffHeapMemoryUsed.*" 44 | - "^org.apache.cassandra.metrics.Table.KeyCacheHitRate.*" 45 | - "^org.apache.cassandra.metrics.Table.LiveDiskSpaceUsed.*" 46 | - "^org.apache.cassandra.metrics.Table.LiveScannedHistogram.*" 47 | - "^org.apache.cassandra.metrics.Table.LiveSSTableCount.*" 48 | - "^org.apache.cassandra.metrics.Table.MaxPartitionSize.*" 49 | - "^org.apache.cassandra.metrics.Table.MeanPartitionSize.*" 50 | - "^org.apache.cassandra.metrics.Table.MemtableColumnsCount.*" 51 | - "^org.apache.cassandra.metrics.Table.MemtableLiveDataSize.*" 52 | - "^org.apache.cassandra.metrics.Table.MemtableOffHeapSize.*" 53 | - "^org.apache.cassandra.metrics.Table.MemtableOnHeapSize.*" 54 | - "^org.apache.cassandra.metrics.Table.PercentRepaired.*" 55 | - "^org.apache.cassandra.metrics.Table.RangeLatency.*" 56 | - "^org.apache.cassandra.metrics.Table.ReadLatency.*" 57 | - "^org.apache.cassandra.metrics.Table.RowCacheHit.*" 58 | - "^org.apache.cassandra.metrics.Table.RowCacheMiss.*" 59 | - "^org.apache.cassandra.metrics.Table.SSTablesPerReadHistogram.*" 60 | - "^org.apache.cassandra.metrics.Table.SyncTime.*" 61 | - "^org.apache.cassandra.metrics.Table.TombstoneScannedHistogram.*" 62 | - "^org.apache.cassandra.metrics.Table.TotalDiskSpaceUsed.*" 63 | - "^org.apache.cassandra.metrics.Table.TrueSnapshotsSize.*" 64 | - "^org.apache.cassandra.metrics.Table.ValidationTime.*" 65 | - "^org.apache.cassandra.metrics.Table.WriteLatency.*" 66 | - "^org.apache.cassandra.metrics.Table.*.PendingCompactions.*" 67 | - "^org.apache.cassandra.metrics.CommitLog.ActiveTasks.*" 68 | - "^org.apache.cassandra.metrics.CommitLog.CurrentlyBlockedTask.*" 69 | - "^org.apache.cassandra.metrics.CommitLog.PendingTasks.*" 70 | - "^org.apache.cassandra.metrics.CommitLog.TotalCommitLogSize.*" 71 | - "^org.apache.cassandra.metrics.Compaction.BytesCompacted.*" 72 | - "^org.apache.cassandra.metrics.Compaction.PendingTasks.*" 73 | - "^org.apache.cassandra.metrics.Compaction.TotalCompactionsCompleted.*" 74 | - "^org.apache.cassandra.metrics.Connection.TotalTimeouts.*" 75 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsCount.*" 76 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsEvicted.*" 77 | - "^org.apache.cassandra.metrics.CQL.PreparedStatementsRatio.*" 78 | - "^org.apache.cassandra.metrics.CQL.RegularStatementsExecuted.*" 79 | - "^org.apache.cassandra.metrics.DroppedMessage.Dropped.*" 80 | - "^org.apache.cassandra.metrics.HintedHandOffManager.*" 81 | - "^org.apache.cassandra.metrics.ReadRepair.*" 82 | - "^org.apache.cassandra.metrics.Storage.Exceptions.*" 83 | - "^org.apache.cassandra.metrics.Storage.Load.*" 84 | - "^org.apache.cassandra.metrics.Storage.TotalHints.*" 85 | - "^org.apache.cassandra.metrics.Storage.TotalHintsInProgress.*" 86 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.AntiEntropyStage.*" 87 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.AntiEntropyStage.*" 88 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.internal.CompactionExecutor.*" 89 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.CompactionExecutor.*" 90 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.CompactionExecutor.*" 91 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.internal.GossipStage.*" 92 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.HintsDispatcher.*" 93 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.HintsDispatcher.*" 94 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.internal.MemtableFlushWriter.*" 95 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.MemtableFlushWriter.*" 96 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.MemtableFlushWriter.*" 97 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.internal.MigrationStage.*" 98 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.MigrationStage.*" 99 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.MigrationStage.*" 100 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.internal.ValidationExecutor.*" 101 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.internal.ValidationExecutor.*" 102 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.request.MutationStage.*" 103 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.MutationStage.*" 104 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.MutationStage.*" 105 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReadRepairStage.*" 106 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReadRepairStage.*" 107 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReadStage.*" 108 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReadStage.*" 109 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.ReplicateOnWriteStage.*" 110 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.ReplicateOnWriteStage.*" 111 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.request.RequestResponseStage.*" 112 | - "^org.apache.cassandra.metrics.ThreadPools.CompletedTasks.request.RequestResponseStage.*" 113 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.request.RequestResponseStage.*" 114 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.request.RequestResponseStage.*" 115 | - "^org.apache.cassandra.metrics.ThreadPools.ActiveTasks.transport.Native-Transport-Requests.*" 116 | - "^org.apache.cassandra.metrics.ThreadPools.CurrentlyBlockedTasks.transport.Native-Transport-Requests.*" 117 | - "^org.apache.cassandra.metrics.ThreadPools.PendingTasks.transport.Native-Transport-Requests.*" 118 | histogram: 119 | color: "white" 120 | useQualifiedName: true 121 | patterns: 122 | - metric: ".*" 123 | measure: "p75|p95|max" 124 | timer: 125 | color: "white" 126 | useQualifiedName: true 127 | patterns: 128 | - metric: ".*" 129 | measure: "p75|p95|max" 130 | - metric: "^org.apache.cassandra.metrics.ClientRequest.Latency.*" 131 | measure: "m1_rate|p99" 132 | - metric: "^org.apache.cassandra.metrics.Table.*Latency.*" 133 | measure: "m1_rate" 134 | meter: 135 | color: "white" 136 | useQualifiedName: true 137 | patterns: 138 | - metric: "^org.apache.cassandra.metrics.ClientRequest.*" 139 | measure: "m1_rate" 140 | - metric: "^org.apache.cassandra.metrics.Compaction.TotalCompactionsCompleted.*" 141 | measure: "m1_rate" 142 | - metric: "^org.apache.cassandra.metrics.DroppedMessage.Dropped.*" 143 | measure: "m1_rate" 144 | -------------------------------------------------------------------------------- /cassandra/config/jmxremote.access: -------------------------------------------------------------------------------- 1 | cassandraUser readwrite 2 | reaperUser readwrite 3 | 4 | -------------------------------------------------------------------------------- /cassandra/config/jmxremote.password: -------------------------------------------------------------------------------- 1 | cassandraUser cassandraPass 2 | reaperUser reaperPass 3 | 4 | -------------------------------------------------------------------------------- /cassandra/config/prometheus.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputLabelNames: false 3 | lowercaseOutputName: false 4 | blacklistObjectNames: 5 | - "org.apache.cassandra.metrics:type=*,keyspace=system*,*" 6 | 7 | rules: 8 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)' 9 | name: org.apache.cassandra.metrics.Client.$1 10 | 11 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max|OneMinuteRate)' 12 | name: org.apache.cassandra.metrics.ClientRequest.$2 13 | labels: 14 | scope: $1 15 | unit: $3 16 | 17 | - pattern: 'org.apache.cassandra.metrics<>(99thPercentile)' 18 | name: org.apache.cassandra.metrics.ClientRequest.$2 19 | labels: 20 | scope: $1 21 | unit: $3 22 | 23 | # Blacklisted: TrueSnapshotsSize, EstimatedPartitionCount 24 | # Enabling these metrics will cause excessive system load since Cassandra has to hit disk 25 | # to populate both of these metrics 26 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max|OneMinuteRate)' 27 | # name: org.apache.cassandra.metrics.Table.$4.$2.$3.$5 28 | name: org.apache.cassandra.metrics.Table.$4 29 | labels: 30 | keyspace: $2 31 | table: $3 32 | unit: $5 33 | 34 | # Missing: ActiveTasks|CurrentlyBlockedTask 35 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)' 36 | name: org.apache.cassandra.metrics.CommitLog.$1 37 | labels: 38 | unit: $2 39 | 40 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|OneMinuteRate)' 41 | name: org.apache.cassandra.metrics.Compaction.$1 42 | labels: 43 | unit: $2 44 | 45 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)' 46 | name: org.apache.cassandra.metrics.CQL.$1 47 | labels: 48 | unit: $2 49 | 50 | - pattern: 'org.apache.cassandra.metrics<>(OneMinuteRate)' 51 | name: org.apache.cassandra.metrics.DroppedMessages.Dropped 52 | labels: 53 | message_type: $1 54 | unit: $2 55 | 56 | # HintedHandOffManager introduced in 3.0: 57 | #- pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)' 58 | # name: org.apache.cassandra.metrics.HintedHandOffManager.$1 59 | # labels: 60 | # unit: $2 61 | 62 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value|75thPercentile|95thPercentile|Max)' 63 | name: org.apache.cassandra.metrics.Storage.$1 64 | labels: 65 | unit: $2 66 | 67 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)' 68 | name: org.apache.cassandra.metrics.ThreadPools 69 | labels: 70 | status_pool: $3 71 | thread_type: $1 72 | thread_pool: $2 73 | 74 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)' 75 | name: org.apache.cassandra.metrics.ThreadPools 76 | labels: 77 | status_pool: $3 78 | thread_type: $1 79 | thread_pool: $2 80 | 81 | - pattern: 'org.apache.cassandra.metrics<>(Count|Value)' 82 | name: org.apache.cassandra.metrics.ThreadPools 83 | labels: 84 | status_pool: $3 85 | thread_type: $1 86 | thread_pool: $2 87 | 88 | # jvm.fd introduced in 3.0: 89 | #- pattern: 'java.lang<>(\w*)' 90 | # name: jvm.fd.$1.$2 91 | 92 | - pattern: 'java.lang<>(\w*)' 93 | name: jvm.gc.$1 94 | labels: 95 | unit: $2 96 | 97 | - pattern: 'java.lang<>(\w*)' 98 | name: jvm.memory.pools.$1 99 | labels: 100 | unit: $2 101 | -------------------------------------------------------------------------------- /cassandra/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # copied from: 4 | # https://github.com/docker-library/cassandra/blob/b77e932d6935318f599026cd1ccf0a2697b3224a/3.0/docker-entrypoint.sh 5 | 6 | set -e 7 | 8 | # modifications here: 9 | # must be owned by the current user or root 10 | # chown cassandra /etc/filebeat/filebeat.yml 11 | service collectd start 12 | # modifications end 13 | 14 | # first arg is `-f` or `--some-option` 15 | if [ "${1:0:1}" = '-' ]; then 16 | set -- cassandra -f "$@" 17 | fi 18 | 19 | # allow the container to be started with `--user` 20 | if [ "$1" = 'cassandra' -a "$(id -u)" = '0' ]; then 21 | chown -R cassandra /var/lib/cassandra /var/log/cassandra "$CASSANDRA_CONFIG" 22 | exec gosu cassandra "$BASH_SOURCE" "$@" 23 | fi 24 | 25 | if [ "$1" = 'cassandra' ]; then 26 | : ${CASSANDRA_RPC_ADDRESS='0.0.0.0'} 27 | 28 | : ${CASSANDRA_LISTEN_ADDRESS='auto'} 29 | if [ "$CASSANDRA_LISTEN_ADDRESS" = 'auto' ]; then 30 | CASSANDRA_LISTEN_ADDRESS="$(hostname --ip-address)" 31 | fi 32 | 33 | : ${CASSANDRA_BROADCAST_ADDRESS="$CASSANDRA_LISTEN_ADDRESS"} 34 | 35 | if [ "$CASSANDRA_BROADCAST_ADDRESS" = 'auto' ]; then 36 | CASSANDRA_BROADCAST_ADDRESS="$(hostname --ip-address)" 37 | fi 38 | : ${CASSANDRA_BROADCAST_RPC_ADDRESS:=$CASSANDRA_BROADCAST_ADDRESS} 39 | 40 | if [ -n "${CASSANDRA_NAME:+1}" ]; then 41 | : ${CASSANDRA_SEEDS:="cassandra"} 42 | fi 43 | : ${CASSANDRA_SEEDS:="$CASSANDRA_BROADCAST_ADDRESS"} 44 | 45 | sed -ri 's/(- seeds:).*/\1 "'"$CASSANDRA_SEEDS"'"/' "$CASSANDRA_CONFIG/cassandra.yaml" 46 | 47 | for yaml in \ 48 | broadcast_address \ 49 | broadcast_rpc_address \ 50 | cluster_name \ 51 | endpoint_snitch \ 52 | listen_address \ 53 | num_tokens \ 54 | rpc_address \ 55 | start_rpc \ 56 | ; do 57 | var="CASSANDRA_${yaml^^}" 58 | val="${!var}" 59 | if [ "$val" ]; then 60 | sed -ri 's/^(# )?('"$yaml"':).*/\2 '"$val"'/' "$CASSANDRA_CONFIG/cassandra.yaml" 61 | fi 62 | done 63 | 64 | for rackdc in dc rack; do 65 | var="CASSANDRA_${rackdc^^}" 66 | val="${!var}" 67 | if [ "$val" ]; then 68 | sed -ri 's/^('"$rackdc"'=).*/\1 '"$val"'/' "$CASSANDRA_CONFIG/cassandra-rackdc.properties" 69 | fi 70 | done 71 | fi 72 | 73 | exec "$@" 74 | -------------------------------------------------------------------------------- /cassandra/lib/jmx_prometheus_javaagent-0.9.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/jmx_prometheus_javaagent-0.9.jar -------------------------------------------------------------------------------- /cassandra/lib/metrics-core-3.1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/metrics-core-3.1.2.jar -------------------------------------------------------------------------------- /cassandra/lib/metrics-graphite-3.1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/metrics-graphite-3.1.2.jar -------------------------------------------------------------------------------- /cassandra/lib/reporter-config-base-3.0.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/reporter-config-base-3.0.3.jar -------------------------------------------------------------------------------- /cassandra/lib/reporter-config3-3.0.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelastpickle/docker-cassandra-bootstrap/4a9d570496bef07153debb99b4feb806716ff2c4/cassandra/lib/reporter-config3-3.0.3.jar -------------------------------------------------------------------------------- /cassandra/schema.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE IF NOT EXISTS pickle 2 | WITH replication = {'class': 'NetworkTopologyStrategy', 'pickle-east': 1 }; 3 | 4 | CREATE TABLE IF NOT EXISTS pickle.employees ( 5 | employee_id uuid, 6 | PRIMARY KEY ((employee_id)) 7 | ) WITH comment = 'Table with all employee IDs.'; 8 | 9 | CREATE TABLE IF NOT EXISTS pickle.timesheets ( 10 | employee_id uuid, 11 | pickle_tree_id uuid, 12 | timestamp timeuuid, 13 | pickle_count int, 14 | pickle_avg_size float, 15 | watered boolean, 16 | PRIMARY KEY ((employee_id), pickle_tree_id, timestamp) 17 | ) WITH CLUSTERING ORDER BY (pickle_tree_id ASC, timestamp DESC) 18 | AND comment = 'The most recent employee visits for each pickle tree.'; 19 | 20 | CREATE TABLE IF NOT EXISTS pickle.trees ( 21 | pickle_tree_id uuid, 22 | timestamp timeuuid, 23 | employee_id uuid, 24 | pickle_count int, 25 | pickle_avg_size float, 26 | watered boolean, 27 | PRIMARY KEY ((pickle_tree_id), timestamp) 28 | ) WITH CLUSTERING ORDER BY (timestamp DESC) 29 | AND comment = 'The most recent history for each pickle tree'; 30 | 31 | CREATE TABLE IF NOT EXISTS pickle.production ( 32 | pickle_count int, 33 | pickle_tree_id uuid, 34 | timestamp timeuuid, 35 | PRIMARY KEY ((pickle_count), pickle_tree_id, timestamp) 36 | ) WITH CLUSTERING ORDER BY (pickle_tree_id ASC, timestamp ASC) 37 | AND comment = 'How often a pickle tree produces the same number of pickles.'; 38 | 39 | 40 | CREATE KEYSPACE IF NOT EXISTS reaper_db 41 | WITH replication = {'class': 'NetworkTopologyStrategy', 'pickle-east': 1 }; 42 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | cassandra: 5 | build: cassandra 6 | env_file: 7 | - cassandra/cassandra.env 8 | # ports: 9 | # - "7199:7199" # JMX 10 | # - "7000:7000" # cluster communication 11 | # - "7001:7001" # cluster communication (SSL) 12 | # - "9042:9042" # native protocol clients 13 | # - "9160:9160" # thrift clients 14 | volumes: 15 | - ./cassandra/config/collectd.cassandra.conf:/etc/collectd/collectd.conf 16 | # - ./cassandra/config/graphite.cassandra.yaml:/etc/cassandra/graphite.yaml 17 | # - ./cassandra/config/filebeat.yml:/etc/filebeat/filebeat.yml 18 | - ./cassandra/config/prometheus.yml:/prometheus/prometheus.yml 19 | - ./data/cassandra:/var/lib/cassandra 20 | 21 | cqlsh: 22 | image: cassandra:3.11 23 | entrypoint: cqlsh cassandra 24 | volumes: 25 | - ./cassandra/schema.cql:/schema.cql 26 | 27 | nodetool: 28 | image: cassandra:3.11 29 | entrypoint: nodetool -h cassandra -u cassandraUser -pw cassandraPass 30 | command: help 31 | 32 | cassandra-reaper: 33 | image: thelastpickle/cassandra-reaper:ab0fff2 34 | env_file: 35 | - cassandra-reaper/cassandra-reaper.env 36 | links: 37 | - cassandra:cassandra 38 | ports: 39 | - "8080:8080" 40 | - "8081:8081" 41 | 42 | grafana: 43 | image: grafana/grafana:4.5.2 44 | env_file: 45 | - grafana/grafana.env 46 | links: 47 | - prometheus:prometheus 48 | ports: 49 | - "3000:3000" 50 | restart: always 51 | volumes: 52 | - ./data/grafana:/var/lib/grafana 53 | 54 | logspout: 55 | build: logspout 56 | # command: syslog+tcp://logs.papertrailapp.com:$PAPERTRAIL_PORT 57 | env_file: 58 | - logspout/logspout.env 59 | restart: always 60 | ports: 61 | - "8000:80" 62 | volumes: 63 | # security concern: 64 | # https://raesene.github.io/blog/2016/03/06/The-Dangers-Of-Docker.sock/ 65 | # http://stackoverflow.com/questions/40844197 66 | - /var/run/docker.sock:/var/run/docker.sock:ro 67 | 68 | pickle-factory: 69 | build: pickle-factory 70 | env_file: 71 | - pickle.env 72 | volumes: 73 | - ./pickle-factory:/usr/src/app 74 | 75 | pickle-shop: 76 | build: pickle-shop 77 | env_file: 78 | - pickle.env 79 | 80 | prometheus: 81 | image: prom/prometheus:v1.7.1 82 | links: 83 | - cassandra:cassandra 84 | - cassandra-reaper:cassandra-reaper 85 | ports: 86 | - "9090:9090" 87 | volumes: 88 | - ./data/prometheus:/prometheus 89 | - ./prometheus/config/prometheus.yml:/etc/prometheus/prometheus.yml 90 | -------------------------------------------------------------------------------- /grafana/bin/create-data-sources.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | GRAFANA_USER=admin 6 | GRAFANA_PASS=admin 7 | GRAFANA_IP=localhost 8 | GRAFANA_PORT=3000 9 | 10 | while : 11 | do 12 | curl -H 'Content-Type: application/json' \ 13 | -X POST http://${GRAFANA_USER}:${GRAFANA_PASS}@${GRAFANA_IP}:${GRAFANA_PORT}/api/datasources \ 14 | --data-binary '{ 15 | "name":"graphite", 16 | "type":"graphite", 17 | "url":"http://graphite:80", 18 | "access":"proxy", 19 | "basicAuth":true, 20 | "basicAuthUser":"guest", 21 | "basicAuthPassword":"guest"}' \ 22 | && echo \ 23 | && curl -H 'Content-Type: application/json' \ 24 | -X POST http://${GRAFANA_USER}:${GRAFANA_PASS}@${GRAFANA_IP}:${GRAFANA_PORT}/api/datasources \ 25 | --data-binary '{ 26 | "name":"prometheus", 27 | "type":"prometheus", 28 | "isDefault":true, 29 | "url":"http://prometheus:9090", 30 | "access":"proxy"}' \ 31 | && break 32 | sleep 1 33 | done 34 | -------------------------------------------------------------------------------- /grafana/bin/upload-dashboards.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | GRAFANA_USER=admin 6 | GRAFANA_PASS=admin 7 | GRAFANA_API_URL=localhost 8 | GRAFANA_API_PORT=3000 9 | 10 | GRAFANA_DASHBOARD_DIR=grafana/dashboards/ 11 | 12 | for json_dashboard in `ls -p ${GRAFANA_DASHBOARD_DIR} | grep -v /` 13 | do 14 | cat ${GRAFANA_DASHBOARD_DIR}${json_dashboard} \ 15 | | curl \ 16 | -u ${GRAFANA_USER}:${GRAFANA_PASS} \ 17 | -X POST \ 18 | -H "Content-Type: application/json" -H "Accept: application/json" \ 19 | -d @- \ 20 | ${GRAFANA_API_URL}:${GRAFANA_API_PORT}/api/dashboards/import 21 | done 22 | -------------------------------------------------------------------------------- /grafana/dashboards/tlp-cassandra-big-picture.final.json: -------------------------------------------------------------------------------- 1 | { 2 | "dashboard": { 3 | "annotations": { 4 | "list": [] 5 | }, 6 | "editable": true, 7 | "gnetId": null, 8 | "graphTooltip": 0, 9 | "hideControls": false, 10 | "id": null, 11 | "links": [ 12 | { 13 | "asDropdown": true, 14 | "icon": "external link", 15 | "includeVars": false, 16 | "keepTime": true, 17 | "tags": [ 18 | "tlp", 19 | "cassandra" 20 | ], 21 | "title": "Other TLP Dashboards", 22 | "type": "dashboards" 23 | } 24 | ], 25 | "refresh": "5m", 26 | "rows": [ 27 | { 28 | "collapse": false, 29 | "height": "150px", 30 | "panels": [ 31 | { 32 | "cacheTimeout": null, 33 | "colorBackground": false, 34 | "colorValue": false, 35 | "colors": [ 36 | "rgba(245, 54, 54, 0.9)", 37 | "rgba(237, 129, 40, 0.89)", 38 | "rgba(50, 172, 45, 0.97)" 39 | ], 40 | "datasource": null, 41 | "decimals": 0, 42 | "format": "none", 43 | "gauge": { 44 | "maxValue": 100, 45 | "minValue": 0, 46 | "show": true, 47 | "thresholdLabels": false, 48 | "thresholdMarkers": true 49 | }, 50 | "height": "200", 51 | "hideTimeOverride": true, 52 | "id": 24, 53 | "interval": null, 54 | "links": [], 55 | "mappingType": 1, 56 | "mappingTypes": [ 57 | { 58 | "name": "value to text", 59 | "value": 1 60 | }, 61 | { 62 | "name": "range to text", 63 | "value": 2 64 | } 65 | ], 66 | "maxDataPoints": 100, 67 | "nullPointMode": "connected", 68 | "nullText": null, 69 | "postfix": "%", 70 | "postfixFontSize": "50%", 71 | "prefix": "", 72 | "prefixFontSize": "50%", 73 | "rangeMaps": [ 74 | { 75 | "from": "null", 76 | "text": "N/A", 77 | "to": "null" 78 | } 79 | ], 80 | "span": 4, 81 | "sparkline": { 82 | "fillColor": "rgba(31, 118, 189, 0.18)", 83 | "full": false, 84 | "lineColor": "rgb(31, 120, 193)", 85 | "show": false 86 | }, 87 | "tableColumn": "", 88 | "targets": [ 89 | { 90 | "expr": "sum(collectd_cpu_percent{environment=\"$env\", data_center=\"$dc\", cpu!=\"idle\"}) / sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})", 91 | "format": "time_series", 92 | "legendFormat": "", 93 | "refId": "A", 94 | "target": "divideSeries(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), #B)", 95 | "targetFull": "divideSeries(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count))", 96 | "textEditor": false 97 | }, 98 | { 99 | "expr": "sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})", 100 | "format": "time_series", 101 | "hide": true, 102 | "legendFormat": "", 103 | "refId": "B", 104 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count)", 105 | "textEditor": false 106 | } 107 | ], 108 | "thresholds": "", 109 | "timeFrom": null, 110 | "timeShift": "10s", 111 | "title": "Cluster-Wide CPU Usage", 112 | "transparent": true, 113 | "type": "singlestat", 114 | "valueFontSize": "80%", 115 | "valueMaps": [ 116 | { 117 | "op": "=", 118 | "text": "N/A", 119 | "value": "null" 120 | } 121 | ], 122 | "valueName": "avg" 123 | }, 124 | { 125 | "cacheTimeout": null, 126 | "colorBackground": false, 127 | "colorValue": false, 128 | "colors": [ 129 | "rgba(245, 54, 54, 0.9)", 130 | "rgba(237, 129, 40, 0.89)", 131 | "rgba(50, 172, 45, 0.97)" 132 | ], 133 | "datasource": null, 134 | "decimals": 0, 135 | "format": "none", 136 | "gauge": { 137 | "maxValue": 100, 138 | "minValue": 0, 139 | "show": true, 140 | "thresholdLabels": false, 141 | "thresholdMarkers": true 142 | }, 143 | "height": "200", 144 | "hideTimeOverride": true, 145 | "id": 27, 146 | "interval": null, 147 | "links": [], 148 | "mappingType": 1, 149 | "mappingTypes": [ 150 | { 151 | "name": "value to text", 152 | "value": 1 153 | }, 154 | { 155 | "name": "range to text", 156 | "value": 2 157 | } 158 | ], 159 | "maxDataPoints": 100, 160 | "nullPointMode": "connected", 161 | "nullText": null, 162 | "postfix": "%", 163 | "postfixFontSize": "50%", 164 | "prefix": "", 165 | "prefixFontSize": "50%", 166 | "rangeMaps": [ 167 | { 168 | "from": "null", 169 | "text": "N/A", 170 | "to": "null" 171 | } 172 | ], 173 | "span": 4, 174 | "sparkline": { 175 | "fillColor": "rgba(31, 118, 189, 0.18)", 176 | "full": false, 177 | "lineColor": "rgb(31, 120, 193)", 178 | "show": false 179 | }, 180 | "tableColumn": "", 181 | "targets": [ 182 | { 183 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\", memory!=\"free\"}) / sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"}) * 100", 184 | "format": "time_series", 185 | "legendFormat": "", 186 | "refId": "A", 187 | "target": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used}), #B), 100)", 188 | "targetFull": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used}), sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)), 100)", 189 | "textEditor": false 190 | }, 191 | { 192 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"})", 193 | "format": "time_series", 194 | "hide": true, 195 | "legendFormat": "", 196 | "refId": "B", 197 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)", 198 | "textEditor": false 199 | } 200 | ], 201 | "thresholds": "", 202 | "timeFrom": null, 203 | "timeShift": "10s", 204 | "title": "Cluster-Wide Memory Usage", 205 | "transparent": true, 206 | "type": "singlestat", 207 | "valueFontSize": "80%", 208 | "valueMaps": [ 209 | { 210 | "op": "=", 211 | "text": "N/A", 212 | "value": "null" 213 | } 214 | ], 215 | "valueName": "avg" 216 | }, 217 | { 218 | "cacheTimeout": null, 219 | "colorBackground": false, 220 | "colorValue": false, 221 | "colors": [ 222 | "rgba(245, 54, 54, 0.9)", 223 | "rgba(237, 129, 40, 0.89)", 224 | "rgba(50, 172, 45, 0.97)" 225 | ], 226 | "datasource": null, 227 | "decimals": 0, 228 | "format": "none", 229 | "gauge": { 230 | "maxValue": 100, 231 | "minValue": 0, 232 | "show": true, 233 | "thresholdLabels": false, 234 | "thresholdMarkers": true 235 | }, 236 | "height": "200", 237 | "hideTimeOverride": true, 238 | "id": 31, 239 | "interval": null, 240 | "links": [], 241 | "mappingType": 1, 242 | "mappingTypes": [ 243 | { 244 | "name": "value to text", 245 | "value": 1 246 | }, 247 | { 248 | "name": "range to text", 249 | "value": 2 250 | } 251 | ], 252 | "maxDataPoints": 100, 253 | "nullPointMode": "connected", 254 | "nullText": null, 255 | "postfix": "%", 256 | "postfixFontSize": "50%", 257 | "prefix": "", 258 | "prefixFontSize": "50%", 259 | "rangeMaps": [ 260 | { 261 | "from": "null", 262 | "text": "N/A", 263 | "to": "null" 264 | } 265 | ], 266 | "span": 4, 267 | "sparkline": { 268 | "fillColor": "rgba(31, 118, 189, 0.18)", 269 | "full": false, 270 | "lineColor": "rgb(31, 120, 193)", 271 | "show": false 272 | }, 273 | "tableColumn": "", 274 | "targets": [ 275 | { 276 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\", type=\"used\"}) / sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"}) * 100", 277 | "format": "time_series", 278 | "legendFormat": "", 279 | "refId": "A", 280 | "target": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used), #B), 100)", 281 | "targetFull": "scale(divideSeries(sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used), sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)), 100)", 282 | "textEditor": false 283 | }, 284 | { 285 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"})", 286 | "format": "time_series", 287 | "hide": true, 288 | "legendFormat": "", 289 | "refId": "B", 290 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)", 291 | "textEditor": false 292 | } 293 | ], 294 | "thresholds": "", 295 | "timeShift": "30s", 296 | "title": "Cluster-Wide Disk Usage", 297 | "transparent": true, 298 | "type": "singlestat", 299 | "valueFontSize": "80%", 300 | "valueMaps": [ 301 | { 302 | "op": "=", 303 | "text": "N/A", 304 | "value": "null" 305 | } 306 | ], 307 | "valueName": "avg" 308 | }, 309 | { 310 | "cacheTimeout": null, 311 | "colorBackground": false, 312 | "colorValue": false, 313 | "colors": [ 314 | "rgba(245, 54, 54, 0.9)", 315 | "rgba(237, 129, 40, 0.89)", 316 | "rgba(50, 172, 45, 0.97)" 317 | ], 318 | "datasource": null, 319 | "decimals": 1, 320 | "format": "none", 321 | "gauge": { 322 | "maxValue": null, 323 | "minValue": 0, 324 | "show": false, 325 | "thresholdLabels": false, 326 | "thresholdMarkers": true 327 | }, 328 | "hideTimeOverride": true, 329 | "id": 23, 330 | "interval": null, 331 | "links": [], 332 | "mappingType": 1, 333 | "mappingTypes": [ 334 | { 335 | "name": "value to text", 336 | "value": 1 337 | }, 338 | { 339 | "name": "range to text", 340 | "value": 2 341 | } 342 | ], 343 | "maxDataPoints": 100, 344 | "nullPointMode": "connected", 345 | "nullText": null, 346 | "postfix": " cores", 347 | "postfixFontSize": "50%", 348 | "prefix": "", 349 | "prefixFontSize": "50%", 350 | "rangeMaps": [ 351 | { 352 | "from": "null", 353 | "text": "N/A", 354 | "to": "null" 355 | } 356 | ], 357 | "span": 2, 358 | "sparkline": { 359 | "fillColor": "rgba(31, 118, 189, 0.18)", 360 | "full": false, 361 | "lineColor": "rgb(31, 120, 193)", 362 | "show": true 363 | }, 364 | "tableColumn": "", 365 | "targets": [ 366 | { 367 | "expr": "sum(collectd_cpu_percent{environment=\"$env\", data_center=\"$dc\", cpu!=\"idle\"}) * 0.01", 368 | "format": "time_series", 369 | "legendFormat": "", 370 | "refId": "A", 371 | "target": "scale(sumSeries(absolute(offset(collectd.$env.cassandra.$dc.$host.cpu.percent.idle, -100))), 0.01)", 372 | "textEditor": false 373 | } 374 | ], 375 | "thresholds": "", 376 | "timeShift": "10s", 377 | "title": "Cluster-Wide CPU Usage", 378 | "transparent": true, 379 | "type": "singlestat", 380 | "valueFontSize": "80%", 381 | "valueMaps": [ 382 | { 383 | "op": "=", 384 | "text": "N/A", 385 | "value": "null" 386 | } 387 | ], 388 | "valueName": "avg" 389 | }, 390 | { 391 | "cacheTimeout": null, 392 | "colorBackground": false, 393 | "colorValue": false, 394 | "colors": [ 395 | "rgba(245, 54, 54, 0.9)", 396 | "rgba(237, 129, 40, 0.89)", 397 | "rgba(50, 172, 45, 0.97)" 398 | ], 399 | "datasource": null, 400 | "decimals": 0, 401 | "format": "none", 402 | "gauge": { 403 | "maxValue": 100, 404 | "minValue": 0, 405 | "show": false, 406 | "thresholdLabels": false, 407 | "thresholdMarkers": true 408 | }, 409 | "id": 22, 410 | "interval": null, 411 | "links": [], 412 | "mappingType": 1, 413 | "mappingTypes": [ 414 | { 415 | "name": "value to text", 416 | "value": 1 417 | }, 418 | { 419 | "name": "range to text", 420 | "value": 2 421 | } 422 | ], 423 | "maxDataPoints": 100, 424 | "nullPointMode": "connected", 425 | "nullText": null, 426 | "postfix": " cores", 427 | "postfixFontSize": "50%", 428 | "prefix": "", 429 | "prefixFontSize": "50%", 430 | "rangeMaps": [ 431 | { 432 | "from": "null", 433 | "text": "N/A", 434 | "to": "null" 435 | } 436 | ], 437 | "span": 2, 438 | "sparkline": { 439 | "fillColor": "rgba(31, 118, 189, 0.18)", 440 | "full": false, 441 | "lineColor": "rgb(31, 120, 193)", 442 | "show": false 443 | }, 444 | "tableColumn": "", 445 | "targets": [ 446 | { 447 | "expr": "sum(collectd_cpu_count{environment=\"$env\", data_center=\"$dc\"})", 448 | "format": "time_series", 449 | "legendFormat": "", 450 | "refId": "A", 451 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.cpu.count)", 452 | "textEditor": false 453 | } 454 | ], 455 | "thresholds": "", 456 | "title": "Cluster-Wide CPU Allocation", 457 | "transparent": true, 458 | "type": "singlestat", 459 | "valueFontSize": "80%", 460 | "valueMaps": [ 461 | { 462 | "op": "=", 463 | "text": "N/A", 464 | "value": "null" 465 | } 466 | ], 467 | "valueName": "max" 468 | }, 469 | { 470 | "cacheTimeout": null, 471 | "colorBackground": false, 472 | "colorValue": false, 473 | "colors": [ 474 | "rgba(245, 54, 54, 0.9)", 475 | "rgba(237, 129, 40, 0.89)", 476 | "rgba(50, 172, 45, 0.97)" 477 | ], 478 | "datasource": null, 479 | "decimals": 1, 480 | "format": "kbytes", 481 | "gauge": { 482 | "maxValue": null, 483 | "minValue": 0, 484 | "show": false, 485 | "thresholdLabels": false, 486 | "thresholdMarkers": true 487 | }, 488 | "hideTimeOverride": true, 489 | "id": 25, 490 | "interval": null, 491 | "links": [], 492 | "mappingType": 1, 493 | "mappingTypes": [ 494 | { 495 | "name": "value to text", 496 | "value": 1 497 | }, 498 | { 499 | "name": "range to text", 500 | "value": 2 501 | } 502 | ], 503 | "maxDataPoints": 100, 504 | "nullPointMode": "connected", 505 | "nullText": null, 506 | "postfix": "", 507 | "postfixFontSize": "50%", 508 | "prefix": "", 509 | "prefixFontSize": "50%", 510 | "rangeMaps": [ 511 | { 512 | "from": "null", 513 | "text": "N/A", 514 | "to": "null" 515 | } 516 | ], 517 | "span": 2, 518 | "sparkline": { 519 | "fillColor": "rgba(31, 118, 189, 0.18)", 520 | "full": false, 521 | "lineColor": "rgb(31, 120, 193)", 522 | "show": true 523 | }, 524 | "tableColumn": "", 525 | "targets": [ 526 | { 527 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\", memory!=\"free\"})", 528 | "format": "time_series", 529 | "legendFormat": "", 530 | "refId": "A", 531 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.{buffered,cached,slab_recl,slab_unrecl,used})", 532 | "textEditor": false 533 | } 534 | ], 535 | "thresholds": "", 536 | "timeShift": "10s", 537 | "title": "Cluster-Wide Memory Usage", 538 | "transparent": true, 539 | "type": "singlestat", 540 | "valueFontSize": "80%", 541 | "valueMaps": [ 542 | { 543 | "op": "=", 544 | "text": "N/A", 545 | "value": "null" 546 | } 547 | ], 548 | "valueName": "avg" 549 | }, 550 | { 551 | "cacheTimeout": null, 552 | "colorBackground": false, 553 | "colorValue": false, 554 | "colors": [ 555 | "rgba(245, 54, 54, 0.9)", 556 | "rgba(237, 129, 40, 0.89)", 557 | "rgba(50, 172, 45, 0.97)" 558 | ], 559 | "datasource": null, 560 | "decimals": 1, 561 | "format": "kbytes", 562 | "gauge": { 563 | "maxValue": 100, 564 | "minValue": 0, 565 | "show": false, 566 | "thresholdLabels": false, 567 | "thresholdMarkers": true 568 | }, 569 | "id": 26, 570 | "interval": null, 571 | "links": [], 572 | "mappingType": 1, 573 | "mappingTypes": [ 574 | { 575 | "name": "value to text", 576 | "value": 1 577 | }, 578 | { 579 | "name": "range to text", 580 | "value": 2 581 | } 582 | ], 583 | "maxDataPoints": 100, 584 | "nullPointMode": "connected", 585 | "nullText": null, 586 | "postfix": "", 587 | "postfixFontSize": "50%", 588 | "prefix": "", 589 | "prefixFontSize": "50%", 590 | "rangeMaps": [ 591 | { 592 | "from": "null", 593 | "text": "N/A", 594 | "to": "null" 595 | } 596 | ], 597 | "span": 2, 598 | "sparkline": { 599 | "fillColor": "rgba(31, 118, 189, 0.18)", 600 | "full": false, 601 | "lineColor": "rgb(31, 120, 193)", 602 | "show": false 603 | }, 604 | "tableColumn": "", 605 | "targets": [ 606 | { 607 | "expr": "sum(collectd_memory{environment=\"$env\", data_center=\"$dc\"})", 608 | "format": "time_series", 609 | "legendFormat": "", 610 | "refId": "A", 611 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.memory.memory.*)", 612 | "textEditor": false 613 | } 614 | ], 615 | "thresholds": "", 616 | "title": "Cluster-Wide Memory Allocation", 617 | "transparent": true, 618 | "type": "singlestat", 619 | "valueFontSize": "80%", 620 | "valueMaps": [ 621 | { 622 | "op": "=", 623 | "text": "N/A", 624 | "value": "null" 625 | } 626 | ], 627 | "valueName": "max" 628 | }, 629 | { 630 | "cacheTimeout": null, 631 | "colorBackground": false, 632 | "colorValue": false, 633 | "colors": [ 634 | "rgba(245, 54, 54, 0.9)", 635 | "rgba(237, 129, 40, 0.89)", 636 | "rgba(50, 172, 45, 0.97)" 637 | ], 638 | "datasource": null, 639 | "decimals": 1, 640 | "format": "bytes", 641 | "gauge": { 642 | "maxValue": 100, 643 | "minValue": 0, 644 | "show": false, 645 | "thresholdLabels": false, 646 | "thresholdMarkers": true 647 | }, 648 | "hideTimeOverride": true, 649 | "id": 29, 650 | "interval": null, 651 | "links": [], 652 | "mappingType": 1, 653 | "mappingTypes": [ 654 | { 655 | "name": "value to text", 656 | "value": 1 657 | }, 658 | { 659 | "name": "range to text", 660 | "value": 2 661 | } 662 | ], 663 | "maxDataPoints": 100, 664 | "nullPointMode": "connected", 665 | "nullText": null, 666 | "postfix": "", 667 | "postfixFontSize": "50%", 668 | "prefix": "", 669 | "prefixFontSize": "50%", 670 | "rangeMaps": [ 671 | { 672 | "from": "null", 673 | "text": "N/A", 674 | "to": "null" 675 | } 676 | ], 677 | "span": 2, 678 | "sparkline": { 679 | "fillColor": "rgba(31, 118, 189, 0.18)", 680 | "full": false, 681 | "lineColor": "rgb(31, 120, 193)", 682 | "show": true 683 | }, 684 | "tableColumn": "", 685 | "targets": [ 686 | { 687 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\", type=\"used\"})", 688 | "format": "time_series", 689 | "legendFormat": "", 690 | "refId": "A", 691 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.used)", 692 | "textEditor": false 693 | } 694 | ], 695 | "thresholds": "", 696 | "timeShift": "30s", 697 | "title": "Cluster-Wide Disk Usage", 698 | "transparent": true, 699 | "type": "singlestat", 700 | "valueFontSize": "80%", 701 | "valueMaps": [ 702 | { 703 | "op": "=", 704 | "text": "N/A", 705 | "value": "null" 706 | } 707 | ], 708 | "valueName": "avg" 709 | }, 710 | { 711 | "cacheTimeout": null, 712 | "colorBackground": false, 713 | "colorValue": false, 714 | "colors": [ 715 | "rgba(245, 54, 54, 0.9)", 716 | "rgba(237, 129, 40, 0.89)", 717 | "rgba(50, 172, 45, 0.97)" 718 | ], 719 | "datasource": null, 720 | "decimals": 1, 721 | "format": "bytes", 722 | "gauge": { 723 | "maxValue": 100, 724 | "minValue": 0, 725 | "show": false, 726 | "thresholdLabels": false, 727 | "thresholdMarkers": true 728 | }, 729 | "id": 30, 730 | "interval": null, 731 | "links": [], 732 | "mappingType": 1, 733 | "mappingTypes": [ 734 | { 735 | "name": "value to text", 736 | "value": 1 737 | }, 738 | { 739 | "name": "range to text", 740 | "value": 2 741 | } 742 | ], 743 | "maxDataPoints": 100, 744 | "nullPointMode": "connected", 745 | "nullText": null, 746 | "postfix": "", 747 | "postfixFontSize": "50%", 748 | "prefix": "", 749 | "prefixFontSize": "50%", 750 | "rangeMaps": [ 751 | { 752 | "from": "null", 753 | "text": "N/A", 754 | "to": "null" 755 | } 756 | ], 757 | "span": 2, 758 | "sparkline": { 759 | "fillColor": "rgba(31, 118, 189, 0.18)", 760 | "full": false, 761 | "lineColor": "rgb(31, 120, 193)", 762 | "show": false 763 | }, 764 | "tableColumn": "", 765 | "targets": [ 766 | { 767 | "expr": "sum(collectd_df_df_complex{environment=\"$env\", data_center=\"$dc\", df=\"var-lib-cassandra\"})", 768 | "format": "time_series", 769 | "legendFormat": "", 770 | "refId": "A", 771 | "target": "sumSeries(collectd.$env.cassandra.$dc.$host.df.var-lib-cassandra.df_complex.*)", 772 | "textEditor": false 773 | } 774 | ], 775 | "thresholds": "", 776 | "title": "Cluster-Wide Disk Allocation", 777 | "transparent": true, 778 | "type": "singlestat", 779 | "valueFontSize": "80%", 780 | "valueMaps": [ 781 | { 782 | "op": "=", 783 | "text": "N/A", 784 | "value": "null" 785 | } 786 | ], 787 | "valueName": "max" 788 | } 789 | ], 790 | "repeat": null, 791 | "repeatIteration": null, 792 | "repeatRowId": null, 793 | "showTitle": true, 794 | "title": "System-Level Metrics", 795 | "titleSize": "h6" 796 | } 797 | ], 798 | "schemaVersion": 14, 799 | "style": "dark", 800 | "tags": [ 801 | "tlp", 802 | "cassandra" 803 | ], 804 | "templating": { 805 | "list": [ 806 | { 807 | "allValue": null, 808 | "current": { 809 | "text": "environment", 810 | "value": "environment" 811 | }, 812 | "datasource": "prometheus", 813 | "hide": 0, 814 | "includeAll": false, 815 | "label": "Environment", 816 | "multi": false, 817 | "name": "env", 818 | "options": [], 819 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)", 820 | "refresh": 2, 821 | "regex": "", 822 | "sort": 1, 823 | "tagValuesQuery": "", 824 | "tags": [], 825 | "tagsQuery": "", 826 | "type": "query", 827 | "useTags": true 828 | }, 829 | { 830 | "allValue": "", 831 | "current": { 832 | "text": "data_center", 833 | "value": "data_center" 834 | }, 835 | "datasource": "prometheus", 836 | "hide": 0, 837 | "includeAll": false, 838 | "label": "Data Center", 839 | "multi": false, 840 | "name": "dc", 841 | "options": [], 842 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)", 843 | "refresh": 2, 844 | "regex": "", 845 | "sort": 1, 846 | "tagValuesQuery": null, 847 | "tags": [], 848 | "tagsQuery": null, 849 | "type": "query", 850 | "useTags": false 851 | }, 852 | { 853 | "allValue": null, 854 | "current": { 855 | "text": "All", 856 | "value": "$__all" 857 | }, 858 | "datasource": "prometheus", 859 | "hide": 0, 860 | "includeAll": true, 861 | "label": "Host", 862 | "multi": false, 863 | "name": "host", 864 | "options": [], 865 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)", 866 | "refresh": 2, 867 | "regex": "", 868 | "sort": 1, 869 | "tagValuesQuery": null, 870 | "tags": [], 871 | "tagsQuery": null, 872 | "type": "query", 873 | "useTags": false 874 | } 875 | ] 876 | }, 877 | "time": { 878 | "from": "now-6h", 879 | "to": "now" 880 | }, 881 | "timepicker": { 882 | "refresh_intervals": [ 883 | "5s", 884 | "10s", 885 | "30s", 886 | "1m", 887 | "5m", 888 | "15m", 889 | "30m", 890 | "1h", 891 | "2h", 892 | "1d" 893 | ], 894 | "time_options": [ 895 | "5m", 896 | "15m", 897 | "1h", 898 | "6h", 899 | "12h", 900 | "24h", 901 | "2d", 902 | "7d", 903 | "30d" 904 | ] 905 | }, 906 | "timezone": "browser", 907 | "title": "TLP - Cassandra - Big Picture", 908 | "version": null 909 | }, 910 | "overwrite": true 911 | } 912 | -------------------------------------------------------------------------------- /grafana/dashboards/tlp-cassandra-client-connections.final.json: -------------------------------------------------------------------------------- 1 | { 2 | "dashboard": { 3 | "annotations": { 4 | "list": [] 5 | }, 6 | "editable": true, 7 | "gnetId": null, 8 | "graphTooltip": 0, 9 | "hideControls": false, 10 | "id": null, 11 | "links": [ 12 | { 13 | "asDropdown": true, 14 | "icon": "external link", 15 | "includeVars": false, 16 | "keepTime": true, 17 | "tags": [ 18 | "tlp", 19 | "cassandra" 20 | ], 21 | "title": "Other TLP Dashboards", 22 | "type": "dashboards" 23 | } 24 | ], 25 | "refresh": "5m", 26 | "rows": [ 27 | { 28 | "collapse": false, 29 | "height": 250, 30 | "panels": [ 31 | { 32 | "aliasColors": {}, 33 | "bars": false, 34 | "dashLength": 10, 35 | "dashes": false, 36 | "datasource": null, 37 | "description": "The number of connected clients to each Cassandra node. Plots both native and Thrift protocol connections if available. Since Cassandra 2.2, Thrift connections are not opened by default.\n\n##### Values\n\nA Cassandra node supports up to 128 connections per node. Values above 128 are not to be expected.\n\nIt is important, however, that there is a balanced number of connections among the nodes.\n\n##### False Positives\n\nThis metric is pretty straightforward and should not fluctuate unexpectedly.\n\n##### Required Actions\n\nIf the number of connections approaches 128 per node, the client connection options and the load balancing strategy need to be revised.\n\n##### Warning\n\nNot balanced number of connections needs to be acted on.", 38 | "fill": 1, 39 | "id": 13, 40 | "legend": { 41 | "alignAsTable": true, 42 | "avg": true, 43 | "current": true, 44 | "hideZero": false, 45 | "max": true, 46 | "min": false, 47 | "rightSide": false, 48 | "show": false, 49 | "sort": "max", 50 | "sortDesc": true, 51 | "total": false, 52 | "values": true 53 | }, 54 | "lines": true, 55 | "linewidth": 1, 56 | "links": [], 57 | "nullPointMode": "null", 58 | "percentage": false, 59 | "pointradius": 5, 60 | "points": false, 61 | "renderer": "flot", 62 | "seriesOverrides": [], 63 | "spaceLength": 10, 64 | "span": 6, 65 | "stack": false, 66 | "steppedLine": false, 67 | "targets": [ 68 | { 69 | "expr": "org_apache_cassandra_metrics_Client_connectedNativeClients{environment=\"$env\", data_center=\"$dc\"}", 70 | "format": "time_series", 71 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.connectedNativeClients", 72 | "refId": "A", 73 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedNativeClients.value, 3, 9)" 74 | }, 75 | { 76 | "expr": "org_apache_cassandra_metrics_Client_connectedThriftClients{environment=\"$env\", data_center=\"$dc\"}", 77 | "format": "time_series", 78 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.connectedThriftClients", 79 | "refId": "B", 80 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedThriftClients.value, 3, 9)" 81 | } 82 | ], 83 | "thresholds": [], 84 | "timeFrom": null, 85 | "timeShift": null, 86 | "title": "Connections (per host)", 87 | "tooltip": { 88 | "shared": false, 89 | "sort": 2, 90 | "value_type": "individual" 91 | }, 92 | "transparent": true, 93 | "type": "graph", 94 | "xaxis": { 95 | "buckets": null, 96 | "mode": "time", 97 | "name": null, 98 | "show": true, 99 | "values": [] 100 | }, 101 | "yaxes": [ 102 | { 103 | "format": "short", 104 | "label": "Client connections", 105 | "logBase": 1, 106 | "max": null, 107 | "min": "0", 108 | "show": true 109 | }, 110 | { 111 | "format": "short", 112 | "label": null, 113 | "logBase": 1, 114 | "max": null, 115 | "min": null, 116 | "show": false 117 | } 118 | ] 119 | }, 120 | { 121 | "aliasColors": {}, 122 | "bars": false, 123 | "dashLength": 10, 124 | "dashes": false, 125 | "datasource": null, 126 | "description": "Number of client connections summed per data cetnter.\n\n##### Values\n\nAbsolute values depend on the number of nodes per data center.\n\n##### False Positives\n\nIt is possible to have significant differences among sum of connections to each data center. This can happen in situations when different data centers serve different cohorts of clients having peak load happening at different times.", 127 | "fill": 1, 128 | "id": 16, 129 | "legend": { 130 | "alignAsTable": true, 131 | "avg": true, 132 | "current": true, 133 | "max": true, 134 | "min": false, 135 | "show": false, 136 | "sort": "max", 137 | "sortDesc": true, 138 | "total": false, 139 | "values": true 140 | }, 141 | "lines": true, 142 | "linewidth": 1, 143 | "links": [], 144 | "nullPointMode": "null", 145 | "percentage": false, 146 | "pointradius": 5, 147 | "points": false, 148 | "renderer": "flot", 149 | "seriesOverrides": [], 150 | "spaceLength": 10, 151 | "span": 6, 152 | "stack": false, 153 | "steppedLine": false, 154 | "targets": [ 155 | { 156 | "expr": "sum(org_apache_cassandra_metrics_Client_connectedNativeClients{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center)", 157 | "format": "time_series", 158 | "hide": false, 159 | "legendFormat": "{{environment}}.{{data_center}}.connectedNativeClients", 160 | "refId": "A", 161 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedNativeClients.value, 3), 2, 8)" 162 | }, 163 | { 164 | "expr": "sum(org_apache_cassandra_metrics_Client_connectedThriftClients{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center)", 165 | "format": "time_series", 166 | "legendFormat": "{{environment}}.{{data_center}}.connectedThriftClients", 167 | "refId": "B", 168 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.Client.connectedThriftClients.value, 3), 2, 8)" 169 | } 170 | ], 171 | "thresholds": [], 172 | "timeFrom": null, 173 | "timeShift": null, 174 | "title": "Connections (per data center)", 175 | "tooltip": { 176 | "shared": false, 177 | "sort": 2, 178 | "value_type": "individual" 179 | }, 180 | "transparent": true, 181 | "type": "graph", 182 | "xaxis": { 183 | "buckets": null, 184 | "mode": "time", 185 | "name": null, 186 | "show": true, 187 | "values": [] 188 | }, 189 | "yaxes": [ 190 | { 191 | "format": "short", 192 | "label": "Client connections", 193 | "logBase": 1, 194 | "max": null, 195 | "min": "0", 196 | "show": true 197 | }, 198 | { 199 | "format": "short", 200 | "label": "", 201 | "logBase": 1, 202 | "max": null, 203 | "min": null, 204 | "show": false 205 | } 206 | ] 207 | }, 208 | { 209 | "aliasColors": {}, 210 | "bars": false, 211 | "dashLength": 10, 212 | "dashes": false, 213 | "datasource": null, 214 | "description": "A Cassandra client typically interacts with the cluster via a coordinator node, which is responsible for arranging the operation to be carried out.\n\nIf the operation takes too long the coordinator will time out the operation. This graph shows a rate of the timeouts happening per node.\n\n##### Values\n\nThis graph shows a per-minute rate of timeouts happening on each node, further broken down by the operation type.\n\n##### False Positives\n\nTimeouts should not happen under any circumstances. There are no false positives.\n\n##### Required Actions\n\nIf timeouts happen, their cause needs to be investigated. The top-level reasons for timeouts occurring include over-loaded nodes, misconfigured clients or ill-fitting data model (including sub-optimal queries).\n\n##### Warning\n\nTimeouts might percolate all the way to application level, where they can cause SLA degradation.", 215 | "fill": 1, 216 | "id": 14, 217 | "legend": { 218 | "alignAsTable": true, 219 | "avg": true, 220 | "current": true, 221 | "hideZero": false, 222 | "max": true, 223 | "min": false, 224 | "rightSide": false, 225 | "show": false, 226 | "sort": "max", 227 | "sortDesc": true, 228 | "total": false, 229 | "values": true 230 | }, 231 | "lines": true, 232 | "linewidth": 1, 233 | "links": [], 234 | "nullPointMode": "null", 235 | "percentage": false, 236 | "pointradius": 5, 237 | "points": false, 238 | "renderer": "flot", 239 | "seriesOverrides": [], 240 | "spaceLength": 10, 241 | "span": 6, 242 | "stack": false, 243 | "steppedLine": false, 244 | "targets": [ 245 | { 246 | "expr": "org_apache_cassandra_metrics_ClientRequest_Timeouts{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}", 247 | "format": "time_series", 248 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.m1", 249 | "refId": "A", 250 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Timeouts.1MinuteRate, 3, 9)", 251 | "textEditor": false 252 | } 253 | ], 254 | "thresholds": [], 255 | "timeFrom": null, 256 | "timeShift": null, 257 | "title": "Timeouts (per host)", 258 | "tooltip": { 259 | "shared": false, 260 | "sort": 2, 261 | "value_type": "individual" 262 | }, 263 | "transparent": true, 264 | "type": "graph", 265 | "xaxis": { 266 | "buckets": null, 267 | "mode": "time", 268 | "name": null, 269 | "show": true, 270 | "values": [] 271 | }, 272 | "yaxes": [ 273 | { 274 | "format": "opm", 275 | "label": "Timeouts", 276 | "logBase": 1, 277 | "max": null, 278 | "min": "0", 279 | "show": true 280 | }, 281 | { 282 | "format": "short", 283 | "label": null, 284 | "logBase": 1, 285 | "max": null, 286 | "min": null, 287 | "show": false 288 | } 289 | ] 290 | }, 291 | { 292 | "aliasColors": {}, 293 | "bars": false, 294 | "dashLength": 10, 295 | "dashes": false, 296 | "datasource": null, 297 | "description": "Similar to `Timeouts (per host)` graph, with the difference that this graph sums the timeouts per data center.\n\nThe purpose of this graph is to provide insight if it is isolated nodes having problems, or if the problem is more wide-spread.\n\n##### Values\n\nAmount of timeouts happening in each datacenter per minute, further split among each operation type.", 298 | "fill": 1, 299 | "id": 15, 300 | "legend": { 301 | "alignAsTable": true, 302 | "avg": true, 303 | "current": true, 304 | "max": true, 305 | "min": false, 306 | "show": false, 307 | "sort": "max", 308 | "sortDesc": true, 309 | "total": false, 310 | "values": true 311 | }, 312 | "lines": true, 313 | "linewidth": 1, 314 | "links": [], 315 | "nullPointMode": "null", 316 | "percentage": false, 317 | "pointradius": 5, 318 | "points": false, 319 | "renderer": "flot", 320 | "seriesOverrides": [], 321 | "spaceLength": 10, 322 | "span": 6, 323 | "stack": false, 324 | "steppedLine": false, 325 | "targets": [ 326 | { 327 | "expr": "sum(org_apache_cassandra_metrics_ClientRequest_Timeouts{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}) by (environment, data_center, scope)", 328 | "format": "time_series", 329 | "legendFormat": "{{environment}}.{{data_center}}.{{scope}}.m1", 330 | "refId": "A", 331 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Timeouts.1MinuteRate, 3), 2, 8)", 332 | "textEditor": false 333 | } 334 | ], 335 | "thresholds": [], 336 | "timeFrom": null, 337 | "timeShift": null, 338 | "title": "Timeouts (per data center)", 339 | "tooltip": { 340 | "shared": false, 341 | "sort": 2, 342 | "value_type": "individual" 343 | }, 344 | "transparent": true, 345 | "type": "graph", 346 | "xaxis": { 347 | "buckets": null, 348 | "mode": "time", 349 | "name": null, 350 | "show": true, 351 | "values": [] 352 | }, 353 | "yaxes": [ 354 | { 355 | "format": "opm", 356 | "label": "Timeouts", 357 | "logBase": 1, 358 | "max": null, 359 | "min": "0", 360 | "show": true 361 | }, 362 | { 363 | "format": "short", 364 | "label": "", 365 | "logBase": 1, 366 | "max": null, 367 | "min": null, 368 | "show": false 369 | } 370 | ] 371 | }, 372 | { 373 | "aliasColors": {}, 374 | "bars": false, 375 | "dashLength": 10, 376 | "dashes": false, 377 | "datasource": null, 378 | "description": "A Cassandra client typically interacts with the cluster via a coordinator node, which is responsible for arranging the operation to be carried out.\n\nFor certain operations, the coordinator needs responses from multiple nodes. If the coordinator can not possibly obtain responses from these nodes, an `Unavailable` error occurs.\n\nFor example, in a cluster with replication factor of three and two replicas down for a given partition, executing an operation with a `QUORUM` consistency will trigger an `Unavailable` error.\n\n##### Values\n\nThe values are a per-second rate of `Unavailable` errors occurring per node, further broken down by operation type.\n\n##### False Positives\n\nFalse positives should not happen.\n\nHowever, sometimes the cause of `Unavailable` errors is a discrepancy between keyspace topology and consistency used by the application. \n\n##### Required Actions\n\n`Unavailable` errors should not happen in clusters. If they do, they cause needs to be investigated and promptly addressed.\n\n##### Warning\n\n`Unavailable` errors can be a symptom of down nodes. Down nodes are generally a bad thing and should be investigated and promptly fixed.", 379 | "fill": 1, 380 | "id": 17, 381 | "legend": { 382 | "alignAsTable": true, 383 | "avg": true, 384 | "current": true, 385 | "hideZero": false, 386 | "max": true, 387 | "min": false, 388 | "show": false, 389 | "sort": "max", 390 | "sortDesc": true, 391 | "total": false, 392 | "values": true 393 | }, 394 | "lines": true, 395 | "linewidth": 1, 396 | "links": [], 397 | "nullPointMode": "null", 398 | "percentage": false, 399 | "pointradius": 5, 400 | "points": false, 401 | "renderer": "flot", 402 | "seriesOverrides": [], 403 | "spaceLength": 10, 404 | "span": 6, 405 | "stack": false, 406 | "steppedLine": false, 407 | "targets": [ 408 | { 409 | "expr": "org_apache_cassandra_metrics_ClientRequest_Unavailables{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}", 410 | "format": "time_series", 411 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.m1", 412 | "refId": "A", 413 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Unavailables.1MinuteRate, 3, 9)", 414 | "textEditor": false 415 | } 416 | ], 417 | "thresholds": [], 418 | "timeFrom": null, 419 | "timeShift": null, 420 | "title": "Unavailables (per host)", 421 | "tooltip": { 422 | "shared": false, 423 | "sort": 2, 424 | "value_type": "individual" 425 | }, 426 | "transparent": true, 427 | "type": "graph", 428 | "xaxis": { 429 | "buckets": null, 430 | "mode": "time", 431 | "name": null, 432 | "show": true, 433 | "values": [] 434 | }, 435 | "yaxes": [ 436 | { 437 | "format": "opm", 438 | "label": "Unavailable errors", 439 | "logBase": 1, 440 | "max": null, 441 | "min": "0", 442 | "show": true 443 | }, 444 | { 445 | "format": "short", 446 | "label": "", 447 | "logBase": 1, 448 | "max": null, 449 | "min": null, 450 | "show": false 451 | } 452 | ] 453 | }, 454 | { 455 | "aliasColors": {}, 456 | "bars": false, 457 | "dashLength": 10, 458 | "dashes": false, 459 | "datasource": null, 460 | "description": "Similar to `Unavailables (per host)`, this graph shows the number of `Unavailable` errors occurring in the cluster, but sums the errors by data center.\n\n##### Values\n\nThe values are the number of `Unavailaber` errors happening per minute in each data center, further broken down by operation type.", 461 | "fill": 1, 462 | "id": 18, 463 | "legend": { 464 | "alignAsTable": true, 465 | "avg": true, 466 | "current": true, 467 | "hideZero": false, 468 | "max": true, 469 | "min": false, 470 | "show": false, 471 | "sort": "max", 472 | "sortDesc": true, 473 | "total": false, 474 | "values": true 475 | }, 476 | "lines": true, 477 | "linewidth": 1, 478 | "links": [], 479 | "nullPointMode": "null", 480 | "percentage": false, 481 | "pointradius": 5, 482 | "points": false, 483 | "renderer": "flot", 484 | "seriesOverrides": [], 485 | "spaceLength": 10, 486 | "span": 6, 487 | "stack": false, 488 | "steppedLine": false, 489 | "targets": [ 490 | { 491 | "expr": "sum(org_apache_cassandra_metrics_ClientRequest_Unavailables{environment=\"$env\", data_center=\"$dc\", unit=\"OneMinuteRate\"}) by (environment, data_center, scope)", 492 | "format": "time_series", 493 | "legendFormat": "{{environment}}.{{data_center}}.{{scope}}.m1", 494 | "refId": "B", 495 | "target": "aliasByNode(sumSeriesWithWildcards(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.Unavailables.1MinuteRate, 3), 2, 8)", 496 | "textEditor": false 497 | } 498 | ], 499 | "thresholds": [], 500 | "timeFrom": null, 501 | "timeShift": null, 502 | "title": "Unavailables (per data center)", 503 | "tooltip": { 504 | "shared": false, 505 | "sort": 2, 506 | "value_type": "individual" 507 | }, 508 | "transparent": true, 509 | "type": "graph", 510 | "xaxis": { 511 | "buckets": null, 512 | "mode": "time", 513 | "name": null, 514 | "show": true, 515 | "values": [] 516 | }, 517 | "yaxes": [ 518 | { 519 | "format": "opm", 520 | "label": "Unavailable errors", 521 | "logBase": 1, 522 | "max": null, 523 | "min": "0", 524 | "show": true 525 | }, 526 | { 527 | "format": "short", 528 | "label": "", 529 | "logBase": 1, 530 | "max": null, 531 | "min": null, 532 | "show": false 533 | } 534 | ] 535 | } 536 | ], 537 | "repeat": null, 538 | "repeatIteration": null, 539 | "repeatRowId": null, 540 | "showTitle": true, 541 | "title": "Client Connections Overview", 542 | "titleSize": "h6" 543 | }, 544 | { 545 | "collapse": false, 546 | "height": 250, 547 | "panels": [ 548 | { 549 | "aliasColors": {}, 550 | "bars": false, 551 | "dashLength": 10, 552 | "dashes": false, 553 | "datasource": null, 554 | "description": "Cassandra groups task of a particular type to its own thread pool. Monitoring thread pools is important to understand the saturation of the node.\n\nIn this graph, a thread pool responsible for handling client CQL requests is shown.\n\n##### Values\n\nThe values are the current number of tasks in a particular state. The states tracked are:\n* **Active**: tasks actively worked on.\n* **Pending**: queued tasks.\n* **Blocked**: tasks blocked due to queue saturation. \n\n##### False Positives\n\nIt is acceptable for the `Pending` and `Blocked` tasks to have non-zero value.\n\n##### Required Actions\n\nIn case of sharp increase, or constant values in order of several tens, node capacity assessment needs to happen.", 555 | "fill": 1, 556 | "id": 3, 557 | "legend": { 558 | "alignAsTable": true, 559 | "avg": true, 560 | "current": true, 561 | "max": true, 562 | "min": false, 563 | "show": false, 564 | "sort": "max", 565 | "sortDesc": true, 566 | "total": false, 567 | "values": true 568 | }, 569 | "lines": true, 570 | "linewidth": 1, 571 | "links": [], 572 | "nullPointMode": "null", 573 | "percentage": false, 574 | "pointradius": 5, 575 | "points": false, 576 | "renderer": "flot", 577 | "seriesOverrides": [], 578 | "spaceLength": 10, 579 | "span": 6, 580 | "stack": false, 581 | "steppedLine": false, 582 | "targets": [ 583 | { 584 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"ActiveTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}", 585 | "format": "time_series", 586 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 587 | "refId": "A", 588 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.ActiveTasks.value, 3, 11)" 589 | }, 590 | { 591 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"PendingTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}", 592 | "format": "time_series", 593 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 594 | "refId": "D", 595 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.PendingTasks.value, 3, 11)" 596 | }, 597 | { 598 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"CurrentlyBlockedTasks\", thread_type=\"transport\", thread_pool=\"Native-Transport-Requests\"}", 599 | "format": "time_series", 600 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 601 | "refId": "C", 602 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.transport.Native-Transport-Requests.CurrentlyBlockedTasks.value, 3, 11)" 603 | } 604 | ], 605 | "thresholds": [], 606 | "timeFrom": null, 607 | "timeShift": null, 608 | "title": "Native Requests Pool (per host)", 609 | "tooltip": { 610 | "shared": false, 611 | "sort": 2, 612 | "value_type": "individual" 613 | }, 614 | "transparent": true, 615 | "type": "graph", 616 | "xaxis": { 617 | "buckets": null, 618 | "mode": "time", 619 | "name": null, 620 | "show": true, 621 | "values": [] 622 | }, 623 | "yaxes": [ 624 | { 625 | "format": "short", 626 | "label": "Threads", 627 | "logBase": 1, 628 | "max": null, 629 | "min": "0", 630 | "show": true 631 | }, 632 | { 633 | "format": "short", 634 | "label": null, 635 | "logBase": 1, 636 | "max": null, 637 | "min": null, 638 | "show": false 639 | } 640 | ] 641 | }, 642 | { 643 | "aliasColors": {}, 644 | "bars": false, 645 | "dashLength": 10, 646 | "dashes": false, 647 | "datasource": null, 648 | "description": "Cassandra groups task of a particular type to its own thread pool. Monitoring thread pools is important to understand the saturation of the node.\n\nThis thread pool handles coordinator requests to the cluster.\n\n##### Values\n\nThe values are the current number of tasks in a particular state. The states tracked are:\n\n* **Active**: tasks actively worked on.\n* **Pending**: queued tasks.\n* **Blocked**: tasks blocked due to queue saturation.\n\n##### False Positives\n\nIt is acceptable for the `Pending` and `Blocked` tasks to have non-zero value.\n\n##### Required Actions\n\nIn case of sharp increase, or constant values in order of several tens, node capacity assessment needs to happen.", 649 | "fill": 1, 650 | "id": 4, 651 | "legend": { 652 | "alignAsTable": true, 653 | "avg": true, 654 | "current": true, 655 | "max": true, 656 | "min": false, 657 | "show": false, 658 | "sort": "max", 659 | "sortDesc": true, 660 | "total": false, 661 | "values": true 662 | }, 663 | "lines": true, 664 | "linewidth": 1, 665 | "links": [], 666 | "nullPointMode": "null", 667 | "percentage": false, 668 | "pointradius": 5, 669 | "points": false, 670 | "renderer": "flot", 671 | "seriesOverrides": [], 672 | "spaceLength": 10, 673 | "span": 6, 674 | "stack": false, 675 | "steppedLine": false, 676 | "targets": [ 677 | { 678 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"ActiveTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}", 679 | "format": "time_series", 680 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 681 | "refId": "A", 682 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.ActiveTasks.value, 3, 11)" 683 | }, 684 | { 685 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"PendingTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}", 686 | "format": "time_series", 687 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 688 | "refId": "B", 689 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.PendingTasks.value, 3, 11)" 690 | }, 691 | { 692 | "expr": "org_apache_cassandra_metrics_ThreadPools{environment=\"$env\", data_center=\"$dc\", status_pool=\"CurrentlyBlockedTasks\", thread_type=\"request\", thread_pool=\"RequestResponseStage\"}", 693 | "format": "time_series", 694 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{thread_pool}}.{{status_pool}}", 695 | "refId": "C", 696 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ThreadPools.request.RequestResponseStage.CurrentlyBlockedTasks.value, 3, 11)" 697 | } 698 | ], 699 | "thresholds": [], 700 | "timeFrom": null, 701 | "timeShift": null, 702 | "title": "Coordinator Requests Pool (per host)", 703 | "tooltip": { 704 | "shared": false, 705 | "sort": 2, 706 | "value_type": "individual" 707 | }, 708 | "transparent": true, 709 | "type": "graph", 710 | "xaxis": { 711 | "buckets": null, 712 | "mode": "time", 713 | "name": null, 714 | "show": true, 715 | "values": [] 716 | }, 717 | "yaxes": [ 718 | { 719 | "format": "short", 720 | "label": "Threads", 721 | "logBase": 1, 722 | "max": null, 723 | "min": "0", 724 | "show": true 725 | }, 726 | { 727 | "format": "short", 728 | "label": null, 729 | "logBase": 1, 730 | "max": null, 731 | "min": null, 732 | "show": false 733 | } 734 | ] 735 | } 736 | ], 737 | "repeat": null, 738 | "repeatIteration": null, 739 | "repeatRowId": null, 740 | "showTitle": true, 741 | "title": "Thread Pools", 742 | "titleSize": "h6" 743 | }, 744 | { 745 | "collapse": false, 746 | "height": 250, 747 | "panels": [ 748 | { 749 | "aliasColors": {}, 750 | "bars": false, 751 | "dashLength": 10, 752 | "dashes": false, 753 | "datasource": null, 754 | "description": "Cassandra allows server-side caching of CQL queries to avoid their repetitive parsing, which yields certain performance benefits.\n\nThis graph shows the number of statements cached. \n\n##### Values\n\nThe values are a number of cached statements per node. The exact value of the statements in cache varies, because the the cache size is configured (since Cassandra 3.6) in bytes, rather than statement counts.\n\n##### False Positives\n\nMonotonic values in orders of hundreds can signal saturated cache. However, this situation needs to be correlated with the `Prepared Statements Eviction` graph to prove the saturation.\n\n##### Required Actions\n\nIt is unlikely the application actually needs the whole cache. If the saturation happens, it's typically a sign of repetitive statement preparation, which is sub-optimal.\n\n##### Warning\n\nSaturated prepared statements cache signals misuse on the application level, which should be corrected.", 755 | "fill": 1, 756 | "id": 5, 757 | "legend": { 758 | "alignAsTable": true, 759 | "avg": true, 760 | "current": true, 761 | "max": true, 762 | "min": false, 763 | "show": false, 764 | "sort": "max", 765 | "sortDesc": true, 766 | "total": false, 767 | "values": true 768 | }, 769 | "lines": true, 770 | "linewidth": 1, 771 | "links": [], 772 | "nullPointMode": "null", 773 | "percentage": false, 774 | "pointradius": 5, 775 | "points": false, 776 | "renderer": "flot", 777 | "seriesOverrides": [], 778 | "spaceLength": 10, 779 | "span": 6, 780 | "stack": false, 781 | "steppedLine": false, 782 | "targets": [ 783 | { 784 | "expr": "org_apache_cassandra_metrics_CQL_PreparedStatementsCount{environment=\"$env\", data_center=\"$dc\"}", 785 | "format": "time_series", 786 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsCount", 787 | "refId": "A", 788 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsCount.value, 3)" 789 | } 790 | ], 791 | "thresholds": [], 792 | "timeFrom": null, 793 | "timeShift": null, 794 | "title": "Prepared Statements Cache (by host)", 795 | "tooltip": { 796 | "shared": false, 797 | "sort": 2, 798 | "value_type": "individual" 799 | }, 800 | "transparent": true, 801 | "type": "graph", 802 | "xaxis": { 803 | "buckets": null, 804 | "mode": "time", 805 | "name": null, 806 | "show": true, 807 | "values": [] 808 | }, 809 | "yaxes": [ 810 | { 811 | "format": "short", 812 | "label": "Statements", 813 | "logBase": 1, 814 | "max": null, 815 | "min": "0", 816 | "show": true 817 | }, 818 | { 819 | "format": "short", 820 | "label": null, 821 | "logBase": 1, 822 | "max": null, 823 | "min": null, 824 | "show": false 825 | } 826 | ] 827 | }, 828 | { 829 | "aliasColors": {}, 830 | "bars": false, 831 | "dashLength": 10, 832 | "dashes": false, 833 | "datasource": null, 834 | "description": "Cassandra allows server-side caching of CQL queries to avoid their repetitive parsing, which yields certain performance benefits.\n\nThis graph shows the eviction of prepared statements from their cache.\n\n##### Values\n\nThe values represent the number of statements evicted per second per node.\n\n##### False Positives\n\nIt is acceptable for a Cassandra node to evict some statements. This should happen sporadically, and exclusively after operator-triggered operations.\n\n##### Required Actions\n\nPrepared statement eviction occurring signals misuse at the application level which needs to be investigated and remedied.", 835 | "fill": 1, 836 | "id": 8, 837 | "legend": { 838 | "alignAsTable": true, 839 | "avg": true, 840 | "current": true, 841 | "max": true, 842 | "min": false, 843 | "show": false, 844 | "sort": "max", 845 | "sortDesc": true, 846 | "total": false, 847 | "values": true 848 | }, 849 | "lines": true, 850 | "linewidth": 1, 851 | "links": [], 852 | "nullPointMode": "null", 853 | "percentage": false, 854 | "pointradius": 5, 855 | "points": false, 856 | "renderer": "flot", 857 | "seriesOverrides": [], 858 | "spaceLength": 10, 859 | "span": 6, 860 | "stack": false, 861 | "steppedLine": false, 862 | "targets": [ 863 | { 864 | "expr": "deriv(org_apache_cassandra_metrics_CQL_PreparedStatementsEvicted{environment=\"$env\", data_center=\"$dc\"}[5m])", 865 | "format": "time_series", 866 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsEvicted", 867 | "refId": "A", 868 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsEvicted.count), 3)" 869 | } 870 | ], 871 | "thresholds": [], 872 | "timeFrom": null, 873 | "timeShift": null, 874 | "title": "Prepared Statements Eviction (by host, perSecond() issues)", 875 | "tooltip": { 876 | "shared": false, 877 | "sort": 2, 878 | "value_type": "individual" 879 | }, 880 | "transparent": true, 881 | "type": "graph", 882 | "xaxis": { 883 | "buckets": null, 884 | "mode": "time", 885 | "name": null, 886 | "show": true, 887 | "values": [] 888 | }, 889 | "yaxes": [ 890 | { 891 | "format": "ops", 892 | "label": "Eviction rate", 893 | "logBase": 1, 894 | "max": null, 895 | "min": "0", 896 | "show": true 897 | }, 898 | { 899 | "format": "short", 900 | "label": null, 901 | "logBase": 1, 902 | "max": null, 903 | "min": null, 904 | "show": false 905 | } 906 | ] 907 | }, 908 | { 909 | "aliasColors": {}, 910 | "bars": false, 911 | "dashLength": 10, 912 | "dashes": false, 913 | "datasource": null, 914 | "description": "This graph shows the ratio between prepared and regular statements executed.\n\n##### Values\n\nThe closer to `1.0` the value, the bigger the portion of prepared statements executed.\n\nThe values are reported per each host.\n\n##### Required Actions\n\nBased on the work load, if this ratio swings towards the unexpected type of requests executed, an investigation of the application should ensue.", 915 | "fill": 1, 916 | "id": 6, 917 | "legend": { 918 | "alignAsTable": true, 919 | "avg": true, 920 | "current": true, 921 | "max": true, 922 | "min": false, 923 | "show": false, 924 | "sort": "max", 925 | "sortDesc": true, 926 | "total": false, 927 | "values": true 928 | }, 929 | "lines": true, 930 | "linewidth": 1, 931 | "links": [], 932 | "nullPointMode": "null", 933 | "percentage": false, 934 | "pointradius": 5, 935 | "points": false, 936 | "renderer": "flot", 937 | "seriesOverrides": [], 938 | "spaceLength": 10, 939 | "span": 6, 940 | "stack": false, 941 | "steppedLine": false, 942 | "targets": [ 943 | { 944 | "expr": "org_apache_cassandra_metrics_CQL_PreparedStatementsRatio{environment=\"$env\", data_center=\"$dc\"}", 945 | "format": "time_series", 946 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.PreparedStatementsRatio", 947 | "refId": "A", 948 | "target": "aliasByNode(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.PreparedStatementsRatio.value, 3)" 949 | } 950 | ], 951 | "thresholds": [], 952 | "timeFrom": null, 953 | "timeShift": null, 954 | "title": "Prepared vs Unprepared (by host)", 955 | "tooltip": { 956 | "shared": false, 957 | "sort": 2, 958 | "value_type": "individual" 959 | }, 960 | "transparent": true, 961 | "type": "graph", 962 | "xaxis": { 963 | "buckets": null, 964 | "mode": "time", 965 | "name": null, 966 | "show": true, 967 | "values": [] 968 | }, 969 | "yaxes": [ 970 | { 971 | "format": "short", 972 | "label": null, 973 | "logBase": 1, 974 | "max": "1", 975 | "min": "0", 976 | "show": true 977 | }, 978 | { 979 | "format": "short", 980 | "label": null, 981 | "logBase": 1, 982 | "max": null, 983 | "min": null, 984 | "show": false 985 | } 986 | ] 987 | }, 988 | { 989 | "aliasColors": {}, 990 | "bars": false, 991 | "dashLength": 10, 992 | "dashes": false, 993 | "datasource": null, 994 | "description": "Unlike prepared statements, the regular statements need to be parsed every time they are executed. Because of this, they might not be desired if performance is critical.\n\n##### Values\n\nCassandra reports only the total number of regular statements executed over its uptime. Therefore we're using Grafana's `perSecond()` aggregation to visualise a per-second rate of regular statement execution per host.\n\n##### False Positives\n\nRegular statements are sometimes inevitable. For example, the java-driver performs several regular statements upon initialising its connection in order to learn the cluster topology etc.", 995 | "fill": 1, 996 | "id": 19, 997 | "legend": { 998 | "alignAsTable": true, 999 | "avg": true, 1000 | "current": true, 1001 | "max": true, 1002 | "min": false, 1003 | "show": false, 1004 | "sort": "max", 1005 | "sortDesc": true, 1006 | "total": false, 1007 | "values": true 1008 | }, 1009 | "lines": true, 1010 | "linewidth": 1, 1011 | "links": [], 1012 | "nullPointMode": "null", 1013 | "percentage": false, 1014 | "pointradius": 5, 1015 | "points": false, 1016 | "renderer": "flot", 1017 | "seriesOverrides": [], 1018 | "spaceLength": 10, 1019 | "span": 6, 1020 | "stack": false, 1021 | "steppedLine": false, 1022 | "targets": [ 1023 | { 1024 | "expr": "deriv(org_apache_cassandra_metrics_CQL_RegularStatementsExecuted{environment=\"$env\", data_center=\"$dc\"}[5m])", 1025 | "format": "time_series", 1026 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.RegularStatementsExecuted", 1027 | "refId": "A", 1028 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.CQL.RegularStatementsExecuted.count), 3)" 1029 | } 1030 | ], 1031 | "thresholds": [], 1032 | "timeFrom": null, 1033 | "timeShift": null, 1034 | "title": "Regular Statements Executed (by host, perSecond() issues)", 1035 | "tooltip": { 1036 | "shared": false, 1037 | "sort": 2, 1038 | "value_type": "individual" 1039 | }, 1040 | "transparent": true, 1041 | "type": "graph", 1042 | "xaxis": { 1043 | "buckets": null, 1044 | "mode": "time", 1045 | "name": null, 1046 | "show": true, 1047 | "values": [] 1048 | }, 1049 | "yaxes": [ 1050 | { 1051 | "format": "ops", 1052 | "label": "Statement rate", 1053 | "logBase": 1, 1054 | "max": null, 1055 | "min": "0", 1056 | "show": true 1057 | }, 1058 | { 1059 | "format": "short", 1060 | "label": null, 1061 | "logBase": 1, 1062 | "max": null, 1063 | "min": null, 1064 | "show": false 1065 | } 1066 | ] 1067 | } 1068 | ], 1069 | "repeat": null, 1070 | "repeatIteration": null, 1071 | "repeatRowId": null, 1072 | "showTitle": true, 1073 | "title": "Statements", 1074 | "titleSize": "h6" 1075 | }, 1076 | { 1077 | "collapse": false, 1078 | "height": 250, 1079 | "panels": [ 1080 | { 1081 | "aliasColors": {}, 1082 | "bars": false, 1083 | "dashLength": 10, 1084 | "dashes": false, 1085 | "datasource": null, 1086 | "description": "Cassandra exposes certain metrics related Light-Weight Transactions (LWT) that might indicate problems:\n* **ConditionNotMet**: indicates a failure to meet a condition requested by the transaction. This might indicate a contention in modifying a single cell.\n* **UnfinishedCommit** indicates premature conclusions of Paxos rounds, which is mundane by itself and has mundane consequences too.\n\n##### Values\n\nThe values are per-second rates of both problematic behaviours occurring, broken down per node and per LWT type (read or write).\n\n##### False Positives\n\nIt is acceptable for these metrics to be non-zero if their values are small and happen infrequently.\n\n##### Required Actions\n\nIn case the values are constantly non-zero, the use-case needs to revised as it might be designed in a sub-optimal way.", 1087 | "fill": 1, 1088 | "id": 11, 1089 | "legend": { 1090 | "alignAsTable": true, 1091 | "avg": true, 1092 | "current": true, 1093 | "max": true, 1094 | "min": false, 1095 | "show": false, 1096 | "sort": "max", 1097 | "sortDesc": true, 1098 | "total": false, 1099 | "values": true 1100 | }, 1101 | "lines": true, 1102 | "linewidth": 1, 1103 | "links": [], 1104 | "nullPointMode": "null", 1105 | "percentage": false, 1106 | "pointradius": 5, 1107 | "points": false, 1108 | "renderer": "flot", 1109 | "seriesOverrides": [], 1110 | "spaceLength": 10, 1111 | "span": 6, 1112 | "stack": false, 1113 | "steppedLine": false, 1114 | "targets": [ 1115 | { 1116 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_ConditionNotMet{environment=\"$env\", data_center=\"$dc\"}[5m])", 1117 | "format": "time_series", 1118 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.ConditionNotMet", 1119 | "refId": "A", 1120 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.ConditionNotMet.count), 3, 9, 10)" 1121 | }, 1122 | { 1123 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_UnfinishedCommit{environment=\"$env\", data_center=\"$dc\"}[5m])", 1124 | "format": "time_series", 1125 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}.UnfinishedCommit", 1126 | "refId": "C", 1127 | "target": "aliasByNode(perSecond(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.UnfinishedCommit.count), 3, 9, 10)" 1128 | } 1129 | ], 1130 | "thresholds": [], 1131 | "timeFrom": null, 1132 | "timeShift": null, 1133 | "title": "LWT Problems (by host, perSecond() issues)", 1134 | "tooltip": { 1135 | "shared": false, 1136 | "sort": 2, 1137 | "value_type": "individual" 1138 | }, 1139 | "transparent": true, 1140 | "type": "graph", 1141 | "xaxis": { 1142 | "buckets": null, 1143 | "mode": "time", 1144 | "name": null, 1145 | "show": true, 1146 | "values": [] 1147 | }, 1148 | "yaxes": [ 1149 | { 1150 | "format": "ops", 1151 | "label": null, 1152 | "logBase": 1, 1153 | "max": null, 1154 | "min": "0", 1155 | "show": true 1156 | }, 1157 | { 1158 | "format": "short", 1159 | "label": null, 1160 | "logBase": 1, 1161 | "max": null, 1162 | "min": null, 1163 | "show": false 1164 | } 1165 | ] 1166 | }, 1167 | { 1168 | "aliasColors": {}, 1169 | "bars": false, 1170 | "dashLength": 10, 1171 | "dashes": false, 1172 | "datasource": null, 1173 | "description": "Cassandra tracks total latency accumulated while carrying out a particular request. This happens on the coordinator level and represents the total time the request has spent in the cluster.\n\n##### Values\n\nCassandra reports an ever-raising value representing microseconds spent. For visualisation we are using Grafana's derivative to gauge any trends.\n\n##### Required Actions\n\nTrend changes in the values shown in these graphs indicate the subtle degradations that can happen in a Cassandra cluster and should trigger an investigation.", 1174 | "fill": 1, 1175 | "id": 12, 1176 | "legend": { 1177 | "alignAsTable": true, 1178 | "avg": true, 1179 | "current": true, 1180 | "max": true, 1181 | "min": false, 1182 | "show": false, 1183 | "sort": "max", 1184 | "sortDesc": true, 1185 | "total": false, 1186 | "values": true 1187 | }, 1188 | "lines": true, 1189 | "linewidth": 1, 1190 | "links": [], 1191 | "nullPointMode": "null", 1192 | "percentage": false, 1193 | "pointradius": 5, 1194 | "points": false, 1195 | "renderer": "flot", 1196 | "seriesOverrides": [], 1197 | "spaceLength": 10, 1198 | "span": 6, 1199 | "stack": false, 1200 | "steppedLine": false, 1201 | "targets": [ 1202 | { 1203 | "expr": "deriv(org_apache_cassandra_metrics_ClientRequest_TotalLatency[5m])", 1204 | "format": "time_series", 1205 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{scope}}", 1206 | "refId": "D", 1207 | "target": "aliasByNode(derivative(cassandra.$env.$dc.$host.org.apache.cassandra.metrics.ClientRequest.*.TotalLatency.count), 3, 9)" 1208 | } 1209 | ], 1210 | "thresholds": [], 1211 | "timeFrom": null, 1212 | "timeShift": null, 1213 | "title": "Total Latency (possible derivative() issues)", 1214 | "tooltip": { 1215 | "shared": false, 1216 | "sort": 2, 1217 | "value_type": "individual" 1218 | }, 1219 | "transparent": true, 1220 | "type": "graph", 1221 | "xaxis": { 1222 | "buckets": null, 1223 | "mode": "time", 1224 | "name": null, 1225 | "show": true, 1226 | "values": [] 1227 | }, 1228 | "yaxes": [ 1229 | { 1230 | "format": "µs", 1231 | "label": null, 1232 | "logBase": 1, 1233 | "max": null, 1234 | "min": "0", 1235 | "show": true 1236 | }, 1237 | { 1238 | "format": "short", 1239 | "label": null, 1240 | "logBase": 1, 1241 | "max": null, 1242 | "min": null, 1243 | "show": true 1244 | } 1245 | ] 1246 | } 1247 | ], 1248 | "repeat": null, 1249 | "repeatIteration": null, 1250 | "repeatRowId": null, 1251 | "showTitle": true, 1252 | "title": "Client Requests", 1253 | "titleSize": "h6" 1254 | } 1255 | ], 1256 | "schemaVersion": 14, 1257 | "style": "light", 1258 | "tags": [ 1259 | "tlp", 1260 | "cassandra", 1261 | "beta" 1262 | ], 1263 | "templating": { 1264 | "list": [ 1265 | { 1266 | "allValue": null, 1267 | "current": { 1268 | "text": "environment", 1269 | "value": "environment" 1270 | }, 1271 | "datasource": "prometheus", 1272 | "hide": 0, 1273 | "includeAll": false, 1274 | "label": "Environment", 1275 | "multi": false, 1276 | "name": "env", 1277 | "options": [], 1278 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)", 1279 | "refresh": 2, 1280 | "regex": "", 1281 | "sort": 1, 1282 | "tagValuesQuery": "", 1283 | "tags": [], 1284 | "tagsQuery": "", 1285 | "type": "query", 1286 | "useTags": true 1287 | }, 1288 | { 1289 | "allValue": "", 1290 | "current": { 1291 | "text": "data_center", 1292 | "value": "data_center" 1293 | }, 1294 | "datasource": "prometheus", 1295 | "hide": 0, 1296 | "includeAll": false, 1297 | "label": "Data Center", 1298 | "multi": false, 1299 | "name": "dc", 1300 | "options": [], 1301 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)", 1302 | "refresh": 2, 1303 | "regex": "", 1304 | "sort": 1, 1305 | "tagValuesQuery": null, 1306 | "tags": [], 1307 | "tagsQuery": null, 1308 | "type": "query", 1309 | "useTags": false 1310 | }, 1311 | { 1312 | "allValue": null, 1313 | "current": { 1314 | "text": "All", 1315 | "value": "$__all" 1316 | }, 1317 | "datasource": "prometheus", 1318 | "hide": 0, 1319 | "includeAll": true, 1320 | "label": "Host", 1321 | "multi": false, 1322 | "name": "host", 1323 | "options": [], 1324 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)", 1325 | "refresh": 2, 1326 | "regex": "", 1327 | "sort": 1, 1328 | "tagValuesQuery": null, 1329 | "tags": [], 1330 | "tagsQuery": null, 1331 | "type": "query", 1332 | "useTags": false 1333 | } 1334 | ] 1335 | }, 1336 | "time": { 1337 | "from": "now-6h", 1338 | "to": "now" 1339 | }, 1340 | "timepicker": { 1341 | "refresh_intervals": [ 1342 | "5s", 1343 | "10s", 1344 | "30s", 1345 | "1m", 1346 | "5m", 1347 | "15m", 1348 | "30m", 1349 | "1h", 1350 | "2h", 1351 | "1d" 1352 | ], 1353 | "time_options": [ 1354 | "5m", 1355 | "15m", 1356 | "1h", 1357 | "6h", 1358 | "12h", 1359 | "24h", 1360 | "2d", 1361 | "7d", 1362 | "30d" 1363 | ] 1364 | }, 1365 | "timezone": "browser", 1366 | "title": "TLP - Cassandra - Client Connections", 1367 | "version": null 1368 | }, 1369 | "overwrite": true 1370 | } 1371 | -------------------------------------------------------------------------------- /grafana/dashboards/tlp-cassandra-reaper.final.json: -------------------------------------------------------------------------------- 1 | { 2 | "dashboard": { 3 | "annotations": { 4 | "list": [] 5 | }, 6 | "editable": true, 7 | "gnetId": null, 8 | "graphTooltip": 0, 9 | "hideControls": false, 10 | "id": null, 11 | "links": [ 12 | { 13 | "asDropdown": true, 14 | "icon": "external link", 15 | "includeVars": false, 16 | "keepTime": true, 17 | "tags": [ 18 | "tlp", 19 | "cassandra" 20 | ], 21 | "title": "Other TLP Dashboards", 22 | "type": "dashboards" 23 | } 24 | ], 25 | "refresh": "5m", 26 | "rows": [ 27 | { 28 | "collapse": false, 29 | "height": "250px", 30 | "panels": [ 31 | { 32 | "aliasColors": {}, 33 | "bars": false, 34 | "dashLength": 10, 35 | "dashes": false, 36 | "datasource": null, 37 | "fill": 1, 38 | "id": 1, 39 | "legend": { 40 | "alignAsTable": true, 41 | "avg": true, 42 | "current": true, 43 | "max": true, 44 | "min": false, 45 | "show": false, 46 | "sort": "max", 47 | "sortDesc": true, 48 | "total": false, 49 | "values": true 50 | }, 51 | "lines": true, 52 | "linewidth": 1, 53 | "links": [], 54 | "nullPointMode": "null", 55 | "percentage": false, 56 | "pointradius": 5, 57 | "points": false, 58 | "renderer": "flot", 59 | "seriesOverrides": [], 60 | "spaceLength": 10, 61 | "span": 12, 62 | "stack": false, 63 | "steppedLine": false, 64 | "targets": [ 65 | { 66 | "expr": "avg(io_cassandrareaper_jmx_JmxConnectionFactory_jmxConnectionsIntializer{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)", 67 | "format": "time_series", 68 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}", 69 | "refId": "A", 70 | "target": "" 71 | } 72 | ], 73 | "thresholds": [], 74 | "timeFrom": null, 75 | "timeShift": null, 76 | "title": "JMX Connections to Cassandra", 77 | "tooltip": { 78 | "shared": false, 79 | "sort": 2, 80 | "value_type": "individual" 81 | }, 82 | "transparent": true, 83 | "type": "graph", 84 | "xaxis": { 85 | "buckets": null, 86 | "mode": "time", 87 | "name": null, 88 | "show": true, 89 | "values": [] 90 | }, 91 | "yaxes": [ 92 | { 93 | "format": "short", 94 | "label": "DUNNO", 95 | "logBase": 1, 96 | "max": null, 97 | "min": null, 98 | "show": true 99 | }, 100 | { 101 | "format": "short", 102 | "label": null, 103 | "logBase": 1, 104 | "max": null, 105 | "min": null, 106 | "show": true 107 | } 108 | ] 109 | } 110 | ], 111 | "repeat": null, 112 | "repeatIteration": null, 113 | "repeatRowId": null, 114 | "showTitle": false, 115 | "title": "Dashboard Row", 116 | "titleSize": "h6" 117 | }, 118 | { 119 | "collapse": false, 120 | "height": 250, 121 | "panels": [ 122 | { 123 | "aliasColors": {}, 124 | "bars": false, 125 | "dashLength": 10, 126 | "dashes": false, 127 | "datasource": null, 128 | "fill": 1, 129 | "id": 2, 130 | "legend": { 131 | "alignAsTable": true, 132 | "avg": true, 133 | "current": true, 134 | "max": true, 135 | "min": false, 136 | "show": false, 137 | "sort": "max", 138 | "sortDesc": true, 139 | "total": false, 140 | "values": true 141 | }, 142 | "lines": true, 143 | "linewidth": 1, 144 | "links": [], 145 | "nullPointMode": "null", 146 | "percentage": false, 147 | "pointradius": 5, 148 | "points": false, 149 | "renderer": "flot", 150 | "seriesOverrides": [], 151 | "spaceLength": 10, 152 | "span": 12, 153 | "stack": false, 154 | "steppedLine": false, 155 | "targets": [ 156 | { 157 | "expr": "io_cassandrareaper_jmx_JmxConnectionFactory_jmxConnectionsIntializer_count{environment=\"$env\", data_center=\"$dc\"}", 158 | "format": "time_series", 159 | "legendFormat": "{{environment}}.{{data_center}}{{host}}", 160 | "refId": "A", 161 | "target": "" 162 | } 163 | ], 164 | "thresholds": [], 165 | "timeFrom": null, 166 | "timeShift": null, 167 | "title": "Number of JMX Connections to Cassandra", 168 | "tooltip": { 169 | "shared": false, 170 | "sort": 2, 171 | "value_type": "individual" 172 | }, 173 | "transparent": true, 174 | "type": "graph", 175 | "xaxis": { 176 | "buckets": null, 177 | "mode": "time", 178 | "name": null, 179 | "show": true, 180 | "values": [] 181 | }, 182 | "yaxes": [ 183 | { 184 | "decimals": 0, 185 | "format": "short", 186 | "label": "Number of connections to Cassandra", 187 | "logBase": 1, 188 | "max": null, 189 | "min": null, 190 | "show": true 191 | }, 192 | { 193 | "format": "short", 194 | "label": null, 195 | "logBase": 1, 196 | "max": null, 197 | "min": null, 198 | "show": true 199 | } 200 | ] 201 | } 202 | ], 203 | "repeat": null, 204 | "repeatIteration": null, 205 | "repeatRowId": null, 206 | "showTitle": false, 207 | "title": "Dashboard Row", 208 | "titleSize": "h6" 209 | }, 210 | { 211 | "collapse": false, 212 | "height": 250, 213 | "panels": [ 214 | { 215 | "aliasColors": {}, 216 | "bars": false, 217 | "dashLength": 10, 218 | "dashes": false, 219 | "datasource": null, 220 | "fill": 1, 221 | "id": 3, 222 | "legend": { 223 | "alignAsTable": true, 224 | "avg": true, 225 | "current": true, 226 | "max": true, 227 | "min": false, 228 | "show": false, 229 | "sort": "max", 230 | "sortDesc": true, 231 | "total": false, 232 | "values": true 233 | }, 234 | "lines": true, 235 | "linewidth": 1, 236 | "links": [], 237 | "nullPointMode": "null", 238 | "percentage": false, 239 | "pointradius": 5, 240 | "points": false, 241 | "renderer": "flot", 242 | "seriesOverrides": [], 243 | "spaceLength": 10, 244 | "span": 12, 245 | "stack": false, 246 | "steppedLine": false, 247 | "targets": [ 248 | { 249 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_open_files{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)", 250 | "format": "time_series", 251 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}", 252 | "refId": "A", 253 | "target": "" 254 | } 255 | ], 256 | "thresholds": [], 257 | "timeFrom": null, 258 | "timeShift": null, 259 | "title": "Number of Open Files", 260 | "tooltip": { 261 | "shared": false, 262 | "sort": 2, 263 | "value_type": "individual" 264 | }, 265 | "transparent": true, 266 | "type": "graph", 267 | "xaxis": { 268 | "buckets": null, 269 | "mode": "time", 270 | "name": null, 271 | "show": true, 272 | "values": [] 273 | }, 274 | "yaxes": [ 275 | { 276 | "decimals": 0, 277 | "format": "short", 278 | "label": "Open files", 279 | "logBase": 1, 280 | "max": null, 281 | "min": null, 282 | "show": true 283 | }, 284 | { 285 | "format": "short", 286 | "label": null, 287 | "logBase": 1, 288 | "max": null, 289 | "min": null, 290 | "show": true 291 | } 292 | ] 293 | } 294 | ], 295 | "repeat": null, 296 | "repeatIteration": null, 297 | "repeatRowId": null, 298 | "showTitle": false, 299 | "title": "Dashboard Row", 300 | "titleSize": "h6" 301 | }, 302 | { 303 | "collapse": false, 304 | "height": 250, 305 | "panels": [ 306 | { 307 | "aliasColors": {}, 308 | "bars": false, 309 | "dashLength": 10, 310 | "dashes": false, 311 | "datasource": null, 312 | "fill": 1, 313 | "id": 4, 314 | "legend": { 315 | "alignAsTable": true, 316 | "avg": true, 317 | "current": true, 318 | "max": true, 319 | "min": false, 320 | "show": false, 321 | "sort": "max", 322 | "sortDesc": true, 323 | "total": false, 324 | "values": true 325 | }, 326 | "lines": true, 327 | "linewidth": 1, 328 | "links": [], 329 | "nullPointMode": "null", 330 | "percentage": false, 331 | "pointradius": 5, 332 | "points": false, 333 | "renderer": "flot", 334 | "seriesOverrides": [], 335 | "spaceLength": 10, 336 | "span": 12, 337 | "stack": false, 338 | "steppedLine": false, 339 | "targets": [ 340 | { 341 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_releaseLead{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)", 342 | "format": "time_series", 343 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}", 344 | "refId": "A", 345 | "target": "" 346 | } 347 | ], 348 | "thresholds": [], 349 | "timeFrom": null, 350 | "timeShift": null, 351 | "title": "DUNNO", 352 | "tooltip": { 353 | "shared": false, 354 | "sort": 2, 355 | "value_type": "individual" 356 | }, 357 | "transparent": true, 358 | "type": "graph", 359 | "xaxis": { 360 | "buckets": null, 361 | "mode": "time", 362 | "name": null, 363 | "show": true, 364 | "values": [] 365 | }, 366 | "yaxes": [ 367 | { 368 | "format": "short", 369 | "label": "DUNNO", 370 | "logBase": 1, 371 | "max": null, 372 | "min": null, 373 | "show": true 374 | }, 375 | { 376 | "format": "short", 377 | "label": null, 378 | "logBase": 1, 379 | "max": null, 380 | "min": null, 381 | "show": true 382 | } 383 | ] 384 | } 385 | ], 386 | "repeat": null, 387 | "repeatIteration": null, 388 | "repeatRowId": null, 389 | "showTitle": false, 390 | "title": "Dashboard Row", 391 | "titleSize": "h6" 392 | }, 393 | { 394 | "collapse": false, 395 | "height": 250, 396 | "panels": [ 397 | { 398 | "aliasColors": {}, 399 | "bars": false, 400 | "dashLength": 10, 401 | "dashes": false, 402 | "datasource": null, 403 | "fill": 1, 404 | "id": 5, 405 | "legend": { 406 | "alignAsTable": true, 407 | "avg": true, 408 | "current": true, 409 | "max": true, 410 | "min": false, 411 | "show": false, 412 | "sort": "max", 413 | "sortDesc": true, 414 | "total": false, 415 | "values": true 416 | }, 417 | "lines": true, 418 | "linewidth": 1, 419 | "links": [], 420 | "nullPointMode": "null", 421 | "percentage": false, 422 | "pointradius": 5, 423 | "points": false, 424 | "renderer": "flot", 425 | "seriesOverrides": [], 426 | "spaceLength": 10, 427 | "span": 12, 428 | "stack": false, 429 | "steppedLine": false, 430 | "targets": [ 431 | { 432 | "expr": "avg(io_cassandrareaper_service_SegmentRunner_takeLead{environment=\"$env\", data_center=\"$dc\"}) by (environment, data_center, host, quantile)", 433 | "format": "time_series", 434 | "legendFormat": "{{environment}}.{{data_center}}.{{host}}.{{quantile}}", 435 | "refId": "A", 436 | "target": "" 437 | } 438 | ], 439 | "thresholds": [], 440 | "timeFrom": null, 441 | "timeShift": null, 442 | "title": "DUNNO", 443 | "tooltip": { 444 | "shared": false, 445 | "sort": 2, 446 | "value_type": "individual" 447 | }, 448 | "transparent": true, 449 | "type": "graph", 450 | "xaxis": { 451 | "buckets": null, 452 | "mode": "time", 453 | "name": null, 454 | "show": true, 455 | "values": [] 456 | }, 457 | "yaxes": [ 458 | { 459 | "format": "short", 460 | "label": "DUNNO", 461 | "logBase": 1, 462 | "max": null, 463 | "min": null, 464 | "show": true 465 | }, 466 | { 467 | "format": "short", 468 | "label": null, 469 | "logBase": 1, 470 | "max": null, 471 | "min": null, 472 | "show": true 473 | } 474 | ] 475 | } 476 | ], 477 | "repeat": null, 478 | "repeatIteration": null, 479 | "repeatRowId": null, 480 | "showTitle": false, 481 | "title": "Dashboard Row", 482 | "titleSize": "h6" 483 | } 484 | ], 485 | "schemaVersion": 14, 486 | "style": "dark", 487 | "tags": [ 488 | "tlp", 489 | "cassandra", 490 | "beta" 491 | ], 492 | "templating": { 493 | "list": [ 494 | { 495 | "allValue": null, 496 | "current": { 497 | "text": "environment", 498 | "value": "environment" 499 | }, 500 | "datasource": "prometheus", 501 | "hide": 0, 502 | "includeAll": false, 503 | "label": "Environment", 504 | "multi": false, 505 | "name": "env", 506 | "options": [], 507 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, environment)", 508 | "refresh": 2, 509 | "regex": "", 510 | "sort": 1, 511 | "tagValuesQuery": "", 512 | "tags": [], 513 | "tagsQuery": "", 514 | "type": "query", 515 | "useTags": true 516 | }, 517 | { 518 | "allValue": "", 519 | "current": { 520 | "text": "data_center", 521 | "value": "data_center" 522 | }, 523 | "datasource": "prometheus", 524 | "hide": 0, 525 | "includeAll": false, 526 | "label": "Data Center", 527 | "multi": false, 528 | "name": "dc", 529 | "options": [], 530 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, data_center)", 531 | "refresh": 2, 532 | "regex": "", 533 | "sort": 1, 534 | "tagValuesQuery": null, 535 | "tags": [], 536 | "tagsQuery": null, 537 | "type": "query", 538 | "useTags": false 539 | }, 540 | { 541 | "allValue": null, 542 | "current": { 543 | "text": "All", 544 | "value": "$__all" 545 | }, 546 | "datasource": "prometheus", 547 | "hide": 0, 548 | "includeAll": true, 549 | "label": "Host", 550 | "multi": false, 551 | "name": "host", 552 | "options": [], 553 | "query": "label_values(org_apache_cassandra_metrics_Storage_Load, host)", 554 | "refresh": 2, 555 | "regex": "", 556 | "sort": 1, 557 | "tagValuesQuery": null, 558 | "tags": [], 559 | "tagsQuery": null, 560 | "type": "query", 561 | "useTags": false 562 | } 563 | ] 564 | }, 565 | "time": { 566 | "from": "now-6h", 567 | "to": "now" 568 | }, 569 | "timepicker": { 570 | "refresh_intervals": [ 571 | "5s", 572 | "10s", 573 | "30s", 574 | "1m", 575 | "5m", 576 | "15m", 577 | "30m", 578 | "1h", 579 | "2h", 580 | "1d" 581 | ], 582 | "time_options": [ 583 | "5m", 584 | "15m", 585 | "1h", 586 | "6h", 587 | "12h", 588 | "24h", 589 | "2d", 590 | "7d", 591 | "30d" 592 | ] 593 | }, 594 | "timezone": "", 595 | "title": "TLP - Cassandra - Reaper", 596 | "version": null 597 | }, 598 | "overwrite": true 599 | } 600 | -------------------------------------------------------------------------------- /grafana/grafana.env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # the name of the grafana-server instance 4 | GF_DEFAULT_INSTANCE_NAME=pickle-grafana 5 | 6 | # only log to console 7 | GF_LOG_MODE=console 8 | GF_LOG_CONSOLE_LEVEL=info 9 | # GF_LOG_CONSOLE_FORMAT=json 10 | 11 | # install plugins 12 | GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel 13 | 14 | # debug purposes 15 | GF_SERVER_ROUTER_LOGGING=true 16 | 17 | # define url used in Alert messages 18 | GF_SERVER_DOMAIN=localhost 19 | -------------------------------------------------------------------------------- /logspout/Dockerfile: -------------------------------------------------------------------------------- 1 | # uses ONBUILD instructions described here: 2 | # https://github.com/gliderlabs/logspout/tree/master/custom 3 | 4 | FROM gliderlabs/logspout:master 5 | ENV SYSLOG_FORMAT rfc3164 6 | -------------------------------------------------------------------------------- /logspout/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # unmodified from: 4 | # https://github.com/gliderlabs/logspout/blob/d6fe1803e9d9637d707ed57a873e46e6d0f0b2e6/custom/build.sh 5 | 6 | set -e 7 | apk add --update go build-base git mercurial ca-certificates 8 | mkdir -p /go/src/github.com/gliderlabs 9 | cp -r /src /go/src/github.com/gliderlabs/logspout 10 | cd /go/src/github.com/gliderlabs/logspout 11 | export GOPATH=/go 12 | go get 13 | go build -ldflags "-X main.Version=$1" -o /bin/logspout 14 | apk del go git mercurial build-base 15 | rm -rf /go /var/cache/apk/* /root/.glide 16 | 17 | # backwards compatibility 18 | ln -fs /tmp/docker.sock /var/run/docker.sock 19 | -------------------------------------------------------------------------------- /logspout/logspout.env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # tail settings 4 | # BACKLOG=false 5 | # TAIL=100 6 | 7 | # Logstash-specific 8 | # ROUTE_URIS=logstash+tcp://logstash:5000 9 | # LOGSTASH_TAGS=docker-elk 10 | -------------------------------------------------------------------------------- /logspout/modules.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // installs the Logstash adapter for Logspout, and required dependencies 4 | // https://github.com/looplab/logspout-logstash 5 | import ( 6 | _ "github.com/gliderlabs/logspout/adapters/syslog" 7 | _ "github.com/gliderlabs/logspout/httpstream" 8 | _ "github.com/gliderlabs/logspout/transports/tcp" 9 | _ "github.com/looplab/logspout-logstash" 10 | ) 11 | -------------------------------------------------------------------------------- /pickle-factory/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2 2 | 3 | WORKDIR /usr/src/app 4 | 5 | # copied from: https://github.com/tianon/gosu/blob/e87cf95808a7b16208515c49012aa3410bc5bba8/INSTALL.md 6 | #ENV GOSU_VERSION 1.10 7 | #RUN set -ex; \ 8 | # \ 9 | # fetchDeps=' \ 10 | # ca-certificates \ 11 | # wget \ 12 | # '; \ 13 | # apt-get update; \ 14 | # apt-get install -y --no-install-recommends $fetchDeps; \ 15 | # rm -rf /var/lib/apt/lists/*; \ 16 | # \ 17 | # dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ 18 | # wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch"; \ 19 | # wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc"; \ 20 | # \ 21 | ## verify the signature 22 | # export GNUPGHOME="$(mktemp -d)"; \ 23 | # gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ 24 | # gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \ 25 | # rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc; \ 26 | # \ 27 | # chmod +x /usr/local/bin/gosu; \ 28 | ## verify that the binary works 29 | # gosu nobody true; \ 30 | # \ 31 | # apt-get purge -y --auto-remove $fetchDeps 32 | 33 | RUN apt-get update \ 34 | && apt-get install -y \ 35 | gcc \ 36 | python-dev \ 37 | python-snappy \ 38 | libev4 \ 39 | libev-dev 40 | 41 | COPY requirements.txt ./ 42 | RUN pip install --no-cache-dir -r requirements.txt 43 | 44 | COPY . . 45 | 46 | ENV USER pickle 47 | #RUN adduser pickle 48 | ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"] 49 | 50 | CMD [ "python", "./factory.py" ] 51 | -------------------------------------------------------------------------------- /pickle-factory/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | #exec gosu ${USER} "$@" 6 | 7 | $@ 8 | -------------------------------------------------------------------------------- /pickle-factory/factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/env python 2 | 3 | import os 4 | import random 5 | import uuid 6 | 7 | from cassandra import ConsistencyLevel 8 | from cassandra.io.libevreactor import LibevConnection 9 | from cassandra.cluster import Cluster 10 | from cassandra.policies import DCAwareRoundRobinPolicy 11 | 12 | 13 | 14 | # connect to Cassandra 15 | 16 | CASSANDRA_HOST = os.environ['CASSANDRA_HOST'] 17 | CASSANDRA_DC = os.environ['CASSANDRA_DC'] 18 | cluster = Cluster([CASSANDRA_HOST], 19 | load_balancing_policy=DCAwareRoundRobinPolicy( 20 | local_dc=CASSANDRA_DC), ) 21 | cluster.connection_class = LibevConnection 22 | session = cluster.connect() 23 | 24 | 25 | 26 | # prepare statements 27 | print 'Preparing statements...' 28 | 29 | insert_employee = session.prepare(""" 30 | INSERT INTO pickle.employees 31 | (employee_id) 32 | VALUES 33 | (?) 34 | """) 35 | insert_employee.consistency_level = ConsistencyLevel.QUORUM 36 | 37 | insert_timesheet = session.prepare(""" 38 | INSERT INTO pickle.timesheets 39 | (employee_id, pickle_tree_id, timestamp, pickle_count, pickle_avg_size, watered) 40 | VALUES 41 | (?, ?, ?, ?, ?, ?) 42 | """) 43 | insert_timesheet.consistency_level = ConsistencyLevel.QUORUM 44 | 45 | insert_tree = session.prepare(""" 46 | INSERT INTO pickle.trees 47 | (pickle_tree_id, timestamp, employee_id, pickle_count, pickle_avg_size, watered) 48 | VALUES 49 | (?, ?, ?, ?, ?, ?) 50 | """) 51 | insert_tree.consistency_level = ConsistencyLevel.ONE 52 | 53 | insert_production = session.prepare(""" 54 | INSERT INTO pickle.production 55 | (pickle_count, pickle_tree_id, timestamp) 56 | VALUES 57 | (?, ?, ?) 58 | """) 59 | insert_production.consistency_level = ConsistencyLevel.ONE 60 | 61 | 62 | 63 | # generate employee_ids 64 | print 'Generating employee IDs...' 65 | 66 | futures = [] 67 | employee_uuids = [] 68 | pickle_tree_ids = [] 69 | for _ in xrange(100): 70 | employee_uuid = uuid.uuid4() # random uuid 71 | 72 | future = session.execute_async(insert_employee, (employee_uuid,)) 73 | employee_uuids.append(employee_uuid) 74 | 75 | pickle_tree_id = uuid.uuid4() # randome uuid 76 | pickle_tree_ids.append(pickle_tree_id) 77 | 78 | # confirm all futures were written 79 | while futures: 80 | print 'Committing employee information...' 81 | future = futures.pop() 82 | future.result() 83 | 84 | 85 | 86 | # generate simulated workforce 87 | 88 | for _ in xrange(100000): 89 | employee_uuid = random.choice(employee_uuids) 90 | pickle_tree_id = random.choice(pickle_tree_ids) 91 | timestamp = uuid.uuid1() # contains time information 92 | pickle_count = random.randint(0, 100) 93 | pickle_avg_size = random.uniform(0, 2) 94 | watered = random.randint(0, 1) 95 | 96 | future = session.execute_async(insert_timesheet, 97 | (employee_uuid, pickle_tree_id, timestamp, 98 | pickle_count, pickle_avg_size, watered)) 99 | futures.append(future) 100 | 101 | future = session.execute_async(insert_tree, 102 | (pickle_tree_id, timestamp, employee_uuid, 103 | pickle_count, pickle_avg_size, watered)) 104 | futures.append(future) 105 | 106 | future = session.execute_async(insert_production, 107 | (pickle_count, pickle_tree_id, timestamp)) 108 | futures.append(future) 109 | 110 | if len(futures) > 3000: 111 | print 'Committing timesheets...' 112 | while futures: 113 | future = futures.pop() 114 | future.result() 115 | 116 | # confirm all futures were written 117 | while futures: 118 | print 'Committing last timesheets...' 119 | future = futures.pop() 120 | future.result() 121 | 122 | print 'Done.' 123 | -------------------------------------------------------------------------------- /pickle-factory/requirements.txt: -------------------------------------------------------------------------------- 1 | cassandra-driver==3.11.0 2 | lz4==0.10.1 3 | scales==1.0.9 4 | -------------------------------------------------------------------------------- /pickle-shop/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2 2 | 3 | WORKDIR /usr/src/app 4 | 5 | # copied from: https://github.com/tianon/gosu/blob/e87cf95808a7b16208515c49012aa3410bc5bba8/INSTALL.md 6 | #ENV GOSU_VERSION 1.10 7 | #RUN set -ex; \ 8 | # \ 9 | # fetchDeps=' \ 10 | # ca-certificates \ 11 | # wget \ 12 | # '; \ 13 | # apt-get update; \ 14 | # apt-get install -y --no-install-recommends $fetchDeps; \ 15 | # rm -rf /var/lib/apt/lists/*; \ 16 | # \ 17 | # dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ 18 | # wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch"; \ 19 | # wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc"; \ 20 | # \ 21 | ## verify the signature 22 | # export GNUPGHOME="$(mktemp -d)"; \ 23 | # gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ 24 | # gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \ 25 | # rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc; \ 26 | # \ 27 | # chmod +x /usr/local/bin/gosu; \ 28 | ## verify that the binary works 29 | # gosu nobody true; \ 30 | # \ 31 | # apt-get purge -y --auto-remove $fetchDeps 32 | 33 | RUN apt-get update \ 34 | && apt-get install -y \ 35 | gcc \ 36 | python-dev \ 37 | python-snappy \ 38 | libev4 \ 39 | libev-dev 40 | 41 | COPY requirements.txt ./ 42 | RUN pip install --no-cache-dir -r requirements.txt 43 | 44 | COPY . . 45 | 46 | ENV USER pickle 47 | #RUN adduser pickle 48 | ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"] 49 | 50 | CMD [ "python", "./shop.py" ] 51 | -------------------------------------------------------------------------------- /pickle-shop/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | #exec gosu ${USER} "$@" 6 | 7 | $@ 8 | -------------------------------------------------------------------------------- /pickle-shop/requirements.txt: -------------------------------------------------------------------------------- 1 | cassandra-driver==3.11.0 2 | lz4==0.10.1 3 | scales==1.0.9 4 | -------------------------------------------------------------------------------- /pickle-shop/shop.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/env python 2 | 3 | import os 4 | import random 5 | 6 | from cassandra import ConsistencyLevel 7 | from cassandra.io.libevreactor import LibevConnection 8 | from cassandra.cluster import Cluster 9 | from cassandra.policies import DCAwareRoundRobinPolicy 10 | 11 | 12 | 13 | # connect to Cassandra 14 | 15 | CASSANDRA_HOST = os.environ['CASSANDRA_HOST'] 16 | CASSANDRA_DC = os.environ['CASSANDRA_DC'] 17 | cluster = Cluster([CASSANDRA_HOST], 18 | load_balancing_policy=DCAwareRoundRobinPolicy( 19 | local_dc=CASSANDRA_DC), ) 20 | cluster.connection_class = LibevConnection 21 | session = cluster.connect() 22 | 23 | 24 | 25 | # prepare statements 26 | print 'Preparing statements...' 27 | 28 | select_employee = session.prepare(""" 29 | SELECT * FROM pickle.employees 30 | """) 31 | select_employee.consistency_level = ConsistencyLevel.QUORUM 32 | 33 | select_timesheet = session.prepare(""" 34 | SELECT * FROM pickle.timesheets 35 | WHERE employee_id = ? 36 | """) 37 | select_timesheet.consistency_level = ConsistencyLevel.QUORUM 38 | 39 | 40 | 41 | # synchronous execution of prepared statements 42 | print 'Finding all employees...' 43 | 44 | employee_ids = [] 45 | result = session.execute(select_employee) 46 | for row in result: 47 | employee_ids.append(row.employee_id) 48 | 49 | print 'Found %s employees!' % len(employee_ids) 50 | 51 | 52 | 53 | # sample workforce activity for 10 employees 54 | sample = random.sample(employee_ids, 10) 55 | 56 | # asynchronous multi-get 57 | print 'Perform multiple asynchronous read queries...' 58 | 59 | futures = [] 60 | for employee_id in sample: 61 | future = session.execute_async(select_timesheet, (employee_id,)) 62 | futures.append(future) 63 | 64 | 65 | 66 | # process returned results 67 | 68 | print 'Total Pickles Picked' 69 | print '====================' 70 | 71 | for future in futures: 72 | result = future.result() 73 | 74 | employee_id = None 75 | pickle_counts = 0 76 | for row in result: 77 | employee_id = row.employee_id 78 | pickle_counts += row.pickle_count 79 | 80 | if employee_id: 81 | print '%s: %s' % (employee_id, pickle_counts) 82 | -------------------------------------------------------------------------------- /pickle.env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CASSANDRA_HOST=cassandra 4 | CASSANDRA_DC=pickle-east 5 | -------------------------------------------------------------------------------- /prometheus/config/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 30s 3 | evaluation_interval: 15s 4 | # scrape_timeout is set to the global default (10s). 5 | 6 | # Attach these labels to any time series or alerts when communicating with 7 | # external systems (federation, remote storage, Alertmanager). 8 | external_labels: 9 | monitor: 'pickle-farm' 10 | 11 | scrape_configs: 12 | - job_name: 'prometheus' 13 | static_configs: 14 | - targets: ['localhost:9090'] 15 | 16 | - job_name: 'cassandra' 17 | static_configs: 18 | - targets: ['cassandra:7070'] 19 | labels: 20 | service: 'cassandra' 21 | component: 'cassandra' 22 | environment: 'environment' 23 | data_center: 'data_center' 24 | host: 'cassandra' 25 | - targets: ['cassandra:9103'] 26 | labels: 27 | service: 'cassandra' 28 | component: 'collectd' 29 | environment: 'environment' 30 | data_center: 'data_center' 31 | host: 'cassandra' 32 | 33 | - job_name: 'cassandra-reaper' 34 | metrics_path: '/prometheusMetrics' 35 | static_configs: 36 | - targets: ['cassandra-reaper:8081'] 37 | labels: 38 | service: 'cassandra' 39 | component: 'cassandra-reaper' 40 | environment: 'environment' 41 | data_center: 'data_center' 42 | host: 'cassandra-reaper' 43 | --------------------------------------------------------------------------------