├── gcplot
├── gcserver
│ ├── gcplot.properties
│ ├── gcserver
│ └── logback.xml
├── cassandra
│ ├── cassandra-topology.properties
│ ├── cassandra-rackdc.properties
│ ├── cdb.cql
│ ├── jvm.properties
│ └── cassandra.yaml
├── nginx
│ ├── nginx
│ └── gcplot.conf
├── start.sh
├── orientdb
│ ├── orientdb.sh
│ ├── orientdb-server-log.properties
│ └── orientdb-server-config.xml
└── Dockerfile
└── README.md
/gcplot/gcserver/gcplot.properties:
--------------------------------------------------------------------------------
1 | orientdb.connection.string=remote:localhost:2424/gcplot
2 | orientdb.user=admin
3 | orientdb.password=admin
4 |
5 | bootstrap.server.host=0.0.0.0
6 | bootstrap.server.port=9091
7 |
8 | hosts.group=docker
9 | app.hostname=localhost
10 | app.hostaddr=127.0.0.1
11 | worker.tasks.enabled=false
12 |
13 | graphite.sink.enabled=false
14 | graphite.report.every.ms=30000
15 | graphite.host=
16 | graphite.port=2003
17 |
18 | gc.repository.type=cassandra
19 |
20 | cassandra.hosts=localhost
21 | cassandra.port=9042
22 | cassandra.username=cassandra
23 | cassandra.password=cassandra
24 | cassandra.keyspace=gcplot
25 | cassandra.reconnect.delay.ms=100
26 |
27 | gc.cassandra.fetchSize=5000
28 |
29 | s3.resource.bucket=
30 | s3.connector.bucket=
31 | s3.connector.path.base=connector-logs
32 |
33 | files.upload.dir=/tmp/gcserver-file-uploads
34 |
35 | worker.task.distributor.interval.ms=120000
36 | worker.processing.interval.ms=15000
37 |
38 | vertx.worker.pool.size=50
39 | vertx.worker.blocking.pool.size=150
40 |
41 | ui.host.protocol=http
42 | ui.host=localhost
43 |
44 | max.upload.file.size=2147483654
45 |
--------------------------------------------------------------------------------
/gcplot/cassandra/cassandra-topology.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Cassandra Node IP=Data Center:Rack
18 |
19 | # default for unknown nodes
20 | default=DC1:RAC1
21 |
22 | # Native IPv6 is supported, however you must escape the colon in the IPv6 Address
23 | # Also be sure to comment out JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv4Stack=true"
24 | # in cassandra-env.sh
25 | #fe80\:0\:0\:0\:202\:b3ff\:fe1e\:8329=DC1:RAC3
26 |
--------------------------------------------------------------------------------
/gcplot/nginx/nginx:
--------------------------------------------------------------------------------
1 | user www-data;
2 | worker_processes auto;
3 | pid /run/nginx.pid;
4 |
5 | events {
6 | worker_connections 131072;
7 | # multi_accept on;
8 | use epoll;
9 | }
10 |
11 | http {
12 | client_max_body_size 16384M;
13 | ##
14 | # Basic Settings
15 | ##
16 |
17 | sendfile on;
18 | tcp_nopush on;
19 | tcp_nodelay on;
20 | keepalive_timeout 300;
21 | types_hash_max_size 2048;
22 | # server_tokens off;
23 |
24 | # server_names_hash_bucket_size 64;
25 | # server_name_in_redirect off;
26 |
27 | include /etc/nginx/mime.types;
28 | default_type application/octet-stream;
29 |
30 | ##
31 | # Logging Settings
32 | ##
33 |
34 | access_log off;
35 | #access_log /var/log/nginx/access.log;
36 | error_log /var/log/nginx/error.log;
37 |
38 | ##
39 | # Gzip Settings
40 | ##
41 |
42 | gzip on;
43 | gzip_disable "msie6";
44 |
45 | # gzip_vary on;
46 | # gzip_proxied any;
47 | # gzip_comp_level 6;
48 | # gzip_buffers 16 8k;
49 | # gzip_http_version 1.1;
50 | # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
51 |
52 | ##
53 | # Virtual Host Configs
54 | ##
55 |
56 | include /etc/nginx/conf.d/*.conf;
57 | include /etc/nginx/sites-enabled/*;
58 | }
59 |
--------------------------------------------------------------------------------
/gcplot/cassandra/cassandra-rackdc.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # These properties are used with GossipingPropertyFileSnitch and will
18 | # indicate the rack and dc for this node
19 | dc=DC1
20 | rack=RAC1
21 |
22 | # Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
23 | # to append a string to the EC2 region name.
24 | #dc_suffix=
25 |
26 | # Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
27 | prefer_local=true
28 |
--------------------------------------------------------------------------------
/gcplot/gcserver/gcserver:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | SERVICE_NAME=gcserver
3 | PATH_TO_JAR=/home/gcserver/lib/bootstrap.jar
4 | PID_PATH_NAME=/tmp/gcserver.pid
5 | USER=gcserver
6 | ARGS='-server -Xmx{GCPLOT_MEMORY} -Xms{GCPLOT_MEMORY} -XX:+DisableExplicitGC -XX:+PerfDisableSharedMem -XX:+UnlockExperimentalVMOptions -XX:+HeapDumpOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions'
7 |
8 | function start_service() {
9 | echo "Starting $SERVICE_NAME ..."
10 | stop_service
11 | if [ ! -f $PID_PATH_NAME ]; then
12 | su $USER -c "nohup java $ARGS -Dcurrent.host.name=$HOSTNAME -Dvertx.cacheDirBase=/tmp -Dlogback.configurationFile=/home/gcserver/config/logback.xml -jar $PATH_TO_JAR -cfg /home/gcserver/config >> /dev/null & echo \$! > $PID_PATH_NAME"
13 | sleep 5
14 | echo "$SERVICE_NAME started ..."
15 | else
16 | echo "$SERVICE_NAME is already running ..."
17 | fi
18 | }
19 |
20 | function stop_service() {
21 | if [ -f $PID_PATH_NAME ]; then
22 | PID=$(cat $PID_PATH_NAME);
23 | echo "$SERVICE_NAME stoping ..."
24 | echo "$SERVICE_NAME killing $PID ..."
25 | kill $PID;
26 | wait $PID;
27 | echo "$SERVICE_NAME stopped ..."
28 | rm -rf $PID_PATH_NAME
29 | sleep 5
30 | else
31 | echo "$SERVICE_NAME is not running ..."
32 | fi
33 | }
34 |
35 | case $1 in
36 | start)
37 | start_service
38 | ;;
39 | stop)
40 | stop_service
41 | ;;
42 | restart)
43 | stop_service
44 | start_service
45 | ;;
46 | status)
47 | if [ -f $PID_PATH_NAME ]; then
48 | PID=$(cat $PID_PATH_NAME);
49 | echo "Started $PID."
50 | else
51 | echo "Not started."
52 | fi
53 | ;;
54 | esac
55 |
--------------------------------------------------------------------------------
/gcplot/gcserver/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | true
4 |
5 | %d{yyyy/MM/dd HH:mm:ss.SSS} %p [%c{1}] %m%n
6 |
7 |
8 |
9 | /home/gcserver/logs/app.debug.%d{yyyy-MM-dd}.log
10 |
11 |
12 | 31
13 | 30GB
14 |
15 |
16 |
17 |
18 | true
19 |
20 | %d{HH:mm:ss} %p [%c{1}] %m%n
21 |
22 |
23 |
24 | /home/gcserver/logs/analytics.%d{yyyy-MM-dd}.log
25 |
26 |
27 | 31
28 | 20GB
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
43 |
44 |
--------------------------------------------------------------------------------
/gcplot/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Start & Init OrientDB
4 |
5 | sed -i "s/{GCPLOT_MEMORY}/$GCPLOT_MEMORY/g" /etc/init.d/gcserver \
6 | && sed -i "s/{ORIENTDB_MEMORY}/$ORIENTDB_MEMORY/g" /etc/init.d/orientdb \
7 | && sed -i "s/{CASSANDRA_MEMORY}/$CASSANDRA_MEMORY/g" /etc/cassandra/jvm.options
8 |
9 | echo "Starting OrientDB ..."
10 |
11 | service orientdb start
12 |
13 | echo "Waiting for the OrientDB 2424 port to be opened ..."
14 |
15 | while ! nc -z localhost 2424; do
16 | sleep 0.1 # wait for 1/10 of the second before check again
17 | done
18 |
19 | echo "Creating OrientDB database ..."
20 |
21 | /opt/orientdb/bin/console.sh "CREATE DATABASE remote:localhost:2424/gcplot admin admin plocal"
22 |
23 | # Start & Init Cassandra
24 |
25 | echo "Starting Cassandra ..."
26 |
27 | service cassandra start
28 |
29 | echo "Waiting for the Cassandra 9042 port to be opened ..."
30 |
31 | while ! nc -z localhost 9042; do
32 | sleep 0.1 # wait for 1/10 of the second before check again
33 | done
34 |
35 | while [ $(grep "Created default superuser role" /var/log/cassandra/debug.log | wc -l) -eq "0" ]; do
36 | sleep 0.1
37 | done
38 |
39 | echo "Performing CQL initialization ..."
40 |
41 | cqlsh --username=cassandra --password=cassandra --file=/etc/cassandra/cdb.cql
42 |
43 | echo "Running gcserver service ..."
44 |
45 | service gcserver start
46 |
47 | # Follow output
48 |
49 | echo "GCPlot instance is initialized"
50 |
51 | while [ $(grep "Trying to reload configuration from main DB" /home/gcserver/logs/app.debug.*.log | wc -l) -eq "0" ]; do
52 | sleep 0.1
53 | done
54 |
55 | echo "Registering admin user in GCPlot ..."
56 |
57 | curl 127.0.0.1:9091/user/register_admin
58 |
59 | echo "Starting nginx service ..."
60 |
61 | service nginx start
62 |
63 | tail -F /home/gcserver/logs/app.debug.*.log
64 |
--------------------------------------------------------------------------------
/gcplot/nginx/gcplot.conf:
--------------------------------------------------------------------------------
1 | # Expires map
2 | map $sent_http_content_type $expires {
3 | default off;
4 | text/html epoch;
5 | text/css max;
6 | application/javascript max;
7 | ~image/ max;
8 | }
9 |
10 | server {
11 | listen 80 default_server;
12 | gzip off;
13 |
14 | expires $expires;
15 | charset utf-8;
16 | location / {
17 | root /var/www/landing;
18 | index login.html;
19 | error_page 404 /404.html;
20 | }
21 | location /app {
22 | return 302 $scheme://$http_host$uri/;
23 | }
24 | location /app/ {
25 | root /var/www;
26 | index index.html;
27 | if (!-e $request_filename) {
28 | rewrite ^(.*)$ /app/ break;
29 | }
30 | }
31 | location /assets {
32 | root /var/www/app;
33 | }
34 | location /img {
35 | root /var/www/app;
36 | }
37 | location /rest {
38 | return 302 $scheme://$http_host$uri/;
39 | }
40 | location /rest/ {
41 | proxy_pass http://127.0.0.1:9091/;
42 | proxy_set_header X-Real-IP $remote_addr;
43 | proxy_set_header Host $http_host;
44 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
45 | add_header 'Access-Control-Allow-Origin' '*';
46 | add_header 'Access-Control-Allow-Credentials' 'true';
47 | add_header 'Access-Control-Allow-Methods' 'GET,HEAD,OPTIONS,POST,PUT,DELETE';
48 | add_header 'Access-Control-Allow-Headers' 'Access-Control-Allow-Headers, Origin,Accept, X-Requested-With, Content-Type, Access-Control-Request-Method, Access-Control-Request-Headers, Transfer-Encoding, X-Firefox-Spdy';
49 | add_header 'Access-Control-Expose-Headers' 'Transfer-Encoding';
50 | keepalive_timeout 120s;
51 | keepalive_requests 100;
52 | proxy_connect_timeout 600;
53 | proxy_send_timeout 600;
54 | proxy_read_timeout 600;
55 | send_timeout 600;
56 | proxy_intercept_errors on;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/gcplot/orientdb/orientdb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # OrientDB service script
3 | #
4 | # Copyright (c) OrientDB LTD (http://orientdb.com/)
5 |
6 | # chkconfig: 2345 20 80
7 | # description: OrientDb init script
8 | # processname: orientdb.sh
9 |
10 | # You have to SET the OrientDB installation directory here
11 | ORIENTDB_DIR="/opt/orientdb"
12 | ORIENTDB_USER="orientdb"
13 | LOG_DIR="/var/log/orientdb"
14 | export ORIENTDB_OPTS_MEMORY="-Xms{ORIENTDB_MEMORY} -Xmx{ORIENTDB_MEMORY}"
15 |
16 | usage() {
17 | echo "Usage: `basename $0`: "
18 | exit 1
19 | }
20 |
21 | start() {
22 | status
23 | if [ $PID -gt 0 ]
24 | then
25 | echo "OrientDB server daemon was already started. PID: $PID"
26 | return $PID
27 | fi
28 | echo "Starting OrientDB server daemon..."
29 | cd "$ORIENTDB_DIR/bin"
30 | su $ORIENTDB_USER -c "cd \"$ORIENTDB_DIR/bin\"; /usr/bin/nohup ./server.sh 1>$LOG_DIR/orientdb.log 2>$LOG_DIR/orientdb.err &"
31 | }
32 |
33 | stop() {
34 | status
35 | if [ $PID -eq 0 ]
36 | then
37 | echo "OrientDB server daemon is already not running"
38 | return 0
39 | fi
40 | echo "Stopping OrientDB server daemon..."
41 | cd "$ORIENTDB_DIR/bin"
42 | su $ORIENTDB_USER -c "cd \"$ORIENTDB_DIR/bin\"; /usr/bin/nohup ./shutdown.sh 1>>$LOG_DIR/orientdb.log 2>>$LOG_DIR/orientdb.err &"
43 | }
44 |
45 | status() {
46 | PID=` ps auxw | grep 'orientdb.www.path' | grep java | grep -v grep | awk '{print $2}'`
47 | if [ "x$PID" = "x" ]
48 | then
49 | PID=0
50 | fi
51 |
52 | # if PID is greater than 0 then OrientDB is running, else it is not
53 | return $PID
54 | }
55 |
56 | if [ "x$1" = "xstart" ]
57 | then
58 | start
59 | exit 0
60 | fi
61 |
62 | if [ "x$1" = "xstop" ]
63 | then
64 | stop
65 | exit 0
66 | fi
67 |
68 | if [ "x$1" = "xstatus" ]
69 | then
70 | status
71 | if [ $PID -gt 0 ]
72 | then
73 | echo "OrientDB server daemon is running with PID: $PID"
74 | exit 0
75 | else
76 | echo "OrientDB server daemon is NOT running"
77 | exit 3
78 | fi
79 | fi
80 |
81 | usage
82 |
--------------------------------------------------------------------------------
/gcplot/orientdb/orientdb-server-log.properties:
--------------------------------------------------------------------------------
1 | #
2 | # /*
3 | # * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com)
4 | # *
5 | # * Licensed under the Apache License, Version 2.0 (the "License");
6 | # * you may not use this file except in compliance with the License.
7 | # * You may obtain a copy of the License at
8 | # *
9 | # * http://www.apache.org/licenses/LICENSE-2.0
10 | # *
11 | # * Unless required by applicable law or agreed to in writing, software
12 | # * distributed under the License is distributed on an "AS IS" BASIS,
13 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # * See the License for the specific language governing permissions and
15 | # * limitations under the License.
16 | # *
17 | # * For more information: http://www.orientechnologies.com
18 | # */
19 | #
20 |
21 | # Specify the handlers to create in the root logger
22 | # (all loggers are children of the root logger)
23 | # The following creates two handlers
24 | handlers = java.util.logging.ConsoleHandler, java.util.logging.FileHandler
25 |
26 | # Set the default logging level for the root logger
27 | .level = INFO
28 | com.orientechnologies.level = INFO
29 | com.orientechnologies.orient.server.distributed.level = INFO
30 |
31 | # Set the default logging level for new ConsoleHandler instances
32 | java.util.logging.ConsoleHandler.level = INFO
33 | # Set the default formatter for new ConsoleHandler instances
34 | java.util.logging.ConsoleHandler.formatter = com.orientechnologies.common.log.OAnsiLogFormatter
35 |
36 | # Set the default logging level for new FileHandler instances
37 | java.util.logging.FileHandler.level = INFO
38 | # Naming style for the output file
39 | java.util.logging.FileHandler.pattern=/var/log/orientdb/orient-server.log
40 | # Set the default formatter for new FileHandler instances
41 | java.util.logging.FileHandler.formatter = com.orientechnologies.common.log.OLogFormatter
42 | # Limiting size of output file in bytes:
43 | java.util.logging.FileHandler.limit=1000000000
44 | # Number of output files to cycle through, by appending an
45 | # integer to the base file name:
46 | java.util.logging.FileHandler.count=10
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GCPlot - All-in-one JVM GC Logs Analyzer
2 |
3 | https://github.com/dmart28/gcplot
4 |
5 | https://github.com/dmart28/gcplot-ui
6 |
7 | GCPlot is a Java Garbage Collector (GC) logs analyzer. Basically, it's an effort to solve all GC logs reading/analyzing problems once and forever. As developers, we were tired about the current situation and efforts needed to just compare some number of GC configurations, so we decided to start from scratch and build a tool that suits best for us.
8 |
9 | The report itself consists of a lot of graphs, measurements, stats, etc about how exactly your GC works. You can also manage the timeline and decide - whether to dig deeper, by analyzing, for example, 2 minutes interval in the most details, or check everything from the bird's eye view by choosing the last month.
10 |
11 | # Installation
12 |
13 | ## Docker Installation
14 |
15 | You can run GCPlot in a Docker container. Docker is supported by most of the modern OS, for more details check official [Docker Installation](https://docs.docker.com/engine/installation/) page.
16 |
17 | In order to run GCPlot as-is without additional configuration, run next command:
18 |
19 | `docker run -d -p 80:80 gcplot/gcplot`
20 |
21 | After that eventually the platform will be accessible from your host machine at `http://127.0.0.1` address. If you would like to use another port, just change it. For example, for `http://127.0.0.1:8080` address, the command will look like:
22 |
23 | `docker run -d -p 8080:80 gcplot/gcplot`
24 |
25 | By default, admin user is already created, with username and password `admin`. Please consider changing it for the best security after the initial log in.
26 |
27 | ### Versions
28 |
29 | You can check the Docker container versions available [here](https://hub.docker.com/r/gcplot/gcplot/tags/).
30 |
31 | ### Memory Settings
32 |
33 | You can control heap size of the services inside container. GCPlot uses Cassandra and OrientDB under the hood, which are also presented inside the container. Default values are:
34 |
35 | ```
36 | GCPLOT_MEMORY=512m
37 | ORIENTDB_MEMORY=256m
38 | CASSANDRA_MEMORY=1g
39 | ```
40 |
41 | To give, for example, GCPlot service 1G of heap, and to Cassandra 4G, the command may look like:
42 |
43 | `docker run -d -p 80:80 -e "GCPLOT_MEMORY=1g" -e "CASSANDRA_MEMORY=4g" gcplot/gcplot`
44 |
--------------------------------------------------------------------------------
/gcplot/cassandra/cdb.cql:
--------------------------------------------------------------------------------
1 | CREATE KEYSPACE IF NOT EXISTS gcplot
2 | WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };
3 |
4 | USE gcplot;
5 |
6 | CREATE TABLE IF NOT EXISTS gc_analyse (
7 | id uuid,
8 | account_id varchar,
9 | analyse_name varchar,
10 | is_continuous boolean,
11 | start timestamp,
12 | first_event map,
13 | last_event map,
14 | timezone varchar,
15 | jvm_ids set,
16 | jvm_names map,
17 | jvm_versions map,
18 | jvm_gc_types map,
19 | jvm_headers map,
20 | jvm_md_page_size map,
21 | jvm_md_phys_total map,
22 | jvm_md_phys_free map,
23 | jvm_md_swap_total map,
24 | jvm_md_swap_free map,
25 | rc_source_type varchar,
26 | rc_source_config_string varchar,
27 | jvm_rc_source_type map,
28 | jvm_rc_source_config_string map,
29 | configs map,
30 | ext varchar,
31 | PRIMARY KEY (account_id, id)
32 | ) WITH gc_grace_seconds = 60;
33 |
34 | CREATE INDEX IF NOT EXISTS analyse_continuous ON gc_analyse( is_continuous );
35 |
36 | CREATE TABLE IF NOT EXISTS gc_event (
37 | id uuid,
38 | parent_id uuid,
39 | analyse_id uuid,
40 | bucket_id varchar,
41 | date varchar,
42 | jvm_id varchar,
43 | description varchar,
44 | written_at timeuuid,
45 | tmstm double,
46 | occurred timestamp,
47 | vm_event_type int,
48 | capacity list,
49 | total_capacity list,
50 | pause_mu bigint,
51 | generations bigint,
52 | phase int,
53 | cause int,
54 | properties bigint,
55 | concurrency int,
56 | gen_cap_before map,
57 | gen_cap_after map,
58 | gen_cap_total map,
59 | user_time double,
60 | sys_time double,
61 | real_time double,
62 | ext varchar,
63 | PRIMARY KEY ((analyse_id, jvm_id, date), written_at)
64 | ) WITH CLUSTERING ORDER BY (written_at DESC) AND gc_grace_seconds = 60;
65 |
66 | CREATE MATERIALIZED VIEW IF NOT EXISTS gc_event_by_bucket
67 | AS SELECT * FROM gc_event
68 | WHERE bucket_id IS NOT NULL AND analyse_id IS NOT NULL AND jvm_id IS NOT NULL AND date IS NOT NULL AND written_at IS NOT NULL
69 | PRIMARY KEY ((bucket_id, analyse_id, jvm_id, date), written_at);
70 |
71 | CREATE TABLE IF NOT EXISTS objects_ages (
72 | analyse_id uuid,
73 | occurred timestamp,
74 | written_at timeuuid,
75 | jvm_id varchar,
76 | desired_sv_size bigint,
77 | occupied list,
78 | total list,
79 | ext varchar,
80 | PRIMARY KEY ((analyse_id, jvm_id), written_at)
81 | ) WITH CLUSTERING ORDER BY (written_at DESC);
82 |
83 | CREATE INDEX IF NOT EXISTS objects_ages_occurred ON objects_ages( occurred );
84 |
--------------------------------------------------------------------------------
/gcplot/Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # GCPlot all-in-one dockerfile
3 | #
4 |
5 | FROM openjdk:8-jdk
6 |
7 | ARG GCPLOT_VERSION=2.0.1
8 | ARG CASSANDRA_VERSION=311x
9 | ARG ORIENTDB_VERSION=2.2.13
10 |
11 | ENV GCPLOT_MEMORY=512m
12 | ENV ORIENTDB_MEMORY=256m
13 | ENV CASSANDRA_MEMORY=1g
14 |
15 | # Install packages and create appropriate user
16 |
17 | RUN \
18 | apt-get update -y \
19 | && apt-get install -y wget curl python unzip nginx net-tools netcat \
20 | && groupadd --system gcserver \
21 | && useradd -d /home/gcserver -u 1040 -g gcserver -s /bin/bash gcserver \
22 | && mkdir -p /home/gcserver \
23 | && mkdir -m 755 -p /home/gcserver/logs \
24 | && mkdir -m 755 -p /home/gcserver/config \
25 | && mkdir -m 755 -p /home/gcserver/lib \
26 | && mkdir -m 777 -p /tmp/gcserver-file-uploads \
27 | && chown gcserver:gcserver /tmp/gcserver-file-uploads \
28 | && chown -R gcserver:gcserver /home/gcserver
29 |
30 | COPY gcserver/logback.xml /home/gcserver/config
31 | COPY gcserver/gcplot.properties /home/gcserver/config
32 | COPY gcserver/gcserver /etc/init.d
33 |
34 | RUN chmod 755 /etc/init.d/gcserver \
35 | && chmod -R 755 /home/gcserver/config/*
36 |
37 | RUN \
38 | wget "https://downloads.gcplot.com/artifacts/gcserver/${GCPLOT_VERSION}/com.gcplot.web-${GCPLOT_VERSION}-all.jar" -O /home/gcserver/lib/bootstrap.jar \
39 | && chown -R gcserver:gcserver /home/gcserver
40 |
41 | # Install OrientDB
42 |
43 | RUN \
44 | groupadd --system orientdb \
45 | && useradd -d /var/orientdb -u 1041 -g orientdb -s /bin/bash orientdb \
46 | && mkdir -p /var/orientdb \
47 | && mkdir -p /var/lib/orientdb \
48 | && mkdir -p /var/log/orientdb \
49 | && chown -R orientdb:orientdb /var/orientdb \
50 | && chown -R orientdb:orientdb /var/lib/orientdb \
51 | && chown -R orientdb:orientdb /var/log/orientdb \
52 | && wget "http://orientdb.com/download.php?file=orientdb-community-$ORIENTDB_VERSION.tar.gz&os=multi" -O /tmp/orientdb.tar.gz \
53 | && tar -zvxf /tmp/orientdb.tar.gz --directory /opt \
54 | && mv /opt/orientdb-community-$ORIENTDB_VERSION /opt/orientdb \
55 | && chown -R orientdb:orientdb /opt/orientdb
56 |
57 | COPY orientdb/orientdb-server-config.xml /opt/orientdb/config/orientdb-server-config.xml
58 | COPY orientdb/orientdb-server-log.properties /opt/orientdb/config/orientdb-server-log.properties
59 | COPY orientdb/orientdb.sh /opt/orientdb/bin/orientdb.sh
60 |
61 | RUN cp /opt/orientdb/bin/orientdb.sh /etc/init.d/orientdb \
62 | && chmod 755 /etc/init.d/orientdb
63 |
64 | # Install Cassandra
65 |
66 | RUN echo "deb http://www.apache.org/dist/cassandra/debian $CASSANDRA_VERSION main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list \
67 | && curl https://www.apache.org/dist/cassandra/KEYS | apt-key add - \
68 | && apt-get update -y \
69 | && apt-get install cassandra -y \
70 | && sed -i '/ulimit/d' /etc/init.d/cassandra
71 |
72 | COPY cassandra/cassandra-rackdc.properties /etc/cassandra/cassandra-rackdc.properties
73 | COPY cassandra/cassandra-topology.properties /etc/cassandra/cassandra-topology.properties
74 | COPY cassandra/cassandra.yaml /etc/cassandra/cassandra.yaml
75 | COPY cassandra/jvm.properties /etc/cassandra/jvm.options
76 | COPY cassandra/cdb.cql /etc/cassandra/cdb.cql
77 | COPY start.sh /start.sh
78 |
79 | RUN chmod 755 /start.sh
80 |
81 | # Install UI
82 |
83 | RUN wget "https://downloads.gcplot.com/artifacts/ui/$GCPLOT_VERSION/gcplot-ui-$GCPLOT_VERSION.tar.gz" -O /tmp/gcplot-ui.tar.gz \
84 | && tar -xvzf /tmp/gcplot-ui.tar.gz -C /tmp \
85 | && sed -i "s/\${CACHE_BUSTER}/$(date +%s)/g" /tmp/dist/index.html \
86 | && mkdir /var/www/app \
87 | && mkdir /var/www/landing \
88 | && mv /tmp/dist/* /var/www/app \
89 | && mv /tmp/landing/* /var/www/landing \
90 | && chmod -R 755 /var/www/*
91 |
92 | # Configuring nginx
93 |
94 | COPY nginx/gcplot.conf /etc/nginx/sites-available/gcplot.conf
95 | COPY nginx/nginx /etc/nginx/nginx.conf
96 | RUN ln -s /etc/nginx/sites-available/gcplot.conf /etc/nginx/sites-enabled/gcplot.conf \
97 | && rm -rf /etc/nginx/sites-enabled/default \
98 | && rm -rf /etc/nginx/sites-available/default
99 |
100 | VOLUME ["/var/lib/cassandra", "/var/lib/orientdb"]
101 | EXPOSE 9042 2424 2480 80
102 | CMD ["/start.sh"]
103 |
--------------------------------------------------------------------------------
/gcplot/orientdb/orientdb-server-config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 | true
86 |
87 |
--------------------------------------------------------------------------------
/gcplot/cassandra/jvm.properties:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # jvm.options #
3 | # #
4 | # - all flags defined here will be used by cassandra to startup the JVM #
5 | # - one flag should be specified per line #
6 | # - lines that do not start with '-' will be ignored #
7 | # - only static flags are accepted (no variables or parameters) #
8 | # - dynamic flags will be appended to these on cassandra-env #
9 | ###########################################################################
10 |
11 | ######################
12 | # STARTUP PARAMETERS #
13 | ######################
14 |
15 | # Uncomment any of the following properties to enable specific startup parameters
16 |
17 | # In a multi-instance deployment, multiple Cassandra instances will independently assume that all
18 | # CPU processors are available to it. This setting allows you to specify a smaller set of processors
19 | # and perhaps have affinity.
20 | #-Dcassandra.available_processors=number_of_processors
21 |
22 | # The directory location of the cassandra.yaml file.
23 | #-Dcassandra.config=directory
24 |
25 | # Sets the initial partitioner token for a node the first time the node is started.
26 | #-Dcassandra.initial_token=token
27 |
28 | # Set to false to start Cassandra on a node but not have the node join the cluster.
29 | #-Dcassandra.join_ring=true|false
30 |
31 | # Set to false to clear all gossip state for the node on restart. Use when you have changed node
32 | # information in cassandra.yaml (such as listen_address).
33 | #-Dcassandra.load_ring_state=true|false
34 |
35 | # Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
36 | #-Dcassandra.metricsReporterConfigFile=file
37 |
38 | # Set the port on which the CQL native transport listens for clients. (Default: 9042)
39 | #-Dcassandra.native_transport_port=port
40 |
41 | # Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
42 | #-Dcassandra.partitioner=partitioner
43 |
44 | # To replace a node that has died, restart a new node in its place specifying the address of the
45 | # dead node. The new node must not have any data in its data directory, that is, it must be in the
46 | # same state as before bootstrapping.
47 | #-Dcassandra.replace_address=listen_address or broadcast_address of dead node
48 |
49 | # Allow restoring specific tables from an archived commit log.
50 | #-Dcassandra.replayList=table
51 |
52 | # Allows overriding of the default RING_DELAY (1000ms), which is the amount of time a node waits
53 | # before joining the ring.
54 | #-Dcassandra.ring_delay_ms=ms
55 |
56 | # Set the port for the Thrift RPC service, which is used for client connections. (Default: 9160)
57 | #-Dcassandra.rpc_port=port
58 |
59 | # Set the SSL port for encrypted communication. (Default: 7001)
60 | #-Dcassandra.ssl_storage_port=port
61 |
62 | # Enable or disable the native transport server. See start_native_transport in cassandra.yaml.
63 | # cassandra.start_native_transport=true|false
64 |
65 | # Enable or disable the Thrift RPC server. (Default: true)
66 | #-Dcassandra.start_rpc=true/false
67 |
68 | # Set the port for inter-node communication. (Default: 7000)
69 | #-Dcassandra.storage_port=port
70 |
71 | # Set the default location for the trigger JARs. (Default: conf/triggers)
72 | #-Dcassandra.triggers_dir=directory
73 |
74 | # For testing new compaction and compression strategies. It allows you to experiment with different
75 | # strategies and benchmark write performance differences without affecting the production workload.
76 | #-Dcassandra.write_survey=true
77 |
78 | # To disable configuration via JMX of auth caches (such as those for credentials, permissions and
79 | # roles). This will mean those config options can only be set (persistently) in cassandra.yaml
80 | # and will require a restart for new values to take effect.
81 | #-Dcassandra.disable_auth_caches_remote_configuration=true
82 |
83 | ########################
84 | # GENERAL JVM SETTINGS #
85 | ########################
86 |
87 | # enable thread priorities, primarily so we can give periodic tasks
88 | # a lower priority to avoid interfering with client workload
89 | -XX:+UseThreadPriorities
90 |
91 | # allows lowering thread priority without being root on linux - probably
92 | # not necessary on Windows but doesn't harm anything.
93 | # see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workar
94 | -XX:ThreadPriorityPolicy=42
95 |
96 | # Enable heap-dump if there's an OOM
97 | -XX:+HeapDumpOnOutOfMemoryError
98 |
99 | # Per-thread stack size.
100 | -Xss256k
101 |
102 | # Larger interned string table, for gossip's benefit (CASSANDRA-6410)
103 | -XX:StringTableSize=1000003
104 |
105 | # Make sure all memory is faulted and zeroed on startup.
106 | # This helps prevent soft faults in containers and makes
107 | # transparent hugepage allocation more effective.
108 | -XX:+AlwaysPreTouch
109 |
110 | # Disable biased locking as it does not benefit Cassandra.
111 | -XX:-UseBiasedLocking
112 |
113 | # Enable thread-local allocation blocks and allow the JVM to automatically
114 | # resize them at runtime.
115 | -XX:+UseTLAB
116 | -XX:+ResizeTLAB
117 |
118 | # http://www.evanjones.ca/jvm-mmap-pause.html
119 | -XX:+PerfDisableSharedMem
120 |
121 | # Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
122 | # http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
123 | # comment out this entry to enable IPv6 support).
124 | -Djava.net.preferIPv4Stack=true
125 |
126 | ### Debug options
127 |
128 | # uncomment to enable flight recorder
129 | #-XX:+UnlockCommercialFeatures
130 | #-XX:+FlightRecorder
131 |
132 | # uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
133 | #-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
134 |
135 | # uncomment to have Cassandra JVM log internal method compilation (developers only)
136 | #-XX:+UnlockDiagnosticVMOptions
137 | #-XX:+LogCompilation
138 |
139 | #################
140 | # HEAP SETTINGS #
141 | #################
142 |
143 | # Heap size is automatically calculated by cassandra-env based on this
144 | # formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
145 | # That is:
146 | # - calculate 1/2 ram and cap to 1024MB
147 | # - calculate 1/4 ram and cap to 8192MB
148 | # - pick the max
149 | #
150 | # For production use you may wish to adjust this for your environment.
151 | # If that's the case, uncomment the -Xmx and Xms options below to override the
152 | # automatic calculation of JVM heap memory.
153 | #
154 | # It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
155 | # the same value to avoid stop-the-world GC pauses during resize, and
156 | # so that we can lock the heap in memory on startup to prevent any
157 | # of it from being swapped out.
158 | -Xms{CASSANDRA_MEMORY}
159 | -Xmx{CASSANDRA_MEMORY}
160 |
161 | # Young generation size is automatically calculated by cassandra-env
162 | # based on this formula: min(100 * num_cores, 1/4 * heap size)
163 | #
164 | # The main trade-off for the young generation is that the larger it
165 | # is, the longer GC pause times will be. The shorter it is, the more
166 | # expensive GC will be (usually).
167 | #
168 | # It is not recommended to set the young generation size if using the
169 | # G1 GC, since that will override the target pause-time goal.
170 | # More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
171 | #
172 | # The example below assumes a modern 8-core+ machine for decent
173 | # times. If in doubt, and if you do not particularly want to tweak, go
174 | # 100 MB per physical CPU core.
175 | #-Xmn800M
176 |
177 | #################
178 | # GC SETTINGS #
179 | #################
180 |
181 | ### CMS Settings
182 |
183 | -XX:+UseParNewGC
184 | -XX:+UseConcMarkSweepGC
185 | -XX:+CMSParallelRemarkEnabled
186 | -XX:SurvivorRatio=8
187 | -XX:MaxTenuringThreshold=1
188 | -XX:CMSInitiatingOccupancyFraction=75
189 | #-XX:+UseCMSInitiatingOccupancyOnly
190 | #-XX:CMSWaitDuration=10000
191 | #-XX:+CMSParallelInitialMarkEnabled
192 | -XX:+CMSEdenChunksRecordAlways
193 | # some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
194 | #-XX:+CMSClassUnloadingEnabled
195 |
196 |
197 | ### G1 Settings (experimental, comment previous section and uncomment section below to enable)
198 |
199 | ## Use the Hotspot garbage-first collector.
200 | # -XX:+UseG1GC
201 | #
202 | ## Have the JVM do less remembered set work during STW, instead
203 | ## preferring concurrent GC. Reduces p99.9 latency.
204 | #-XX:G1RSetUpdatingPauseTimePercent=5
205 | #
206 | ## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
207 | ## 200ms is the JVM default and lowest viable setting
208 | ## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
209 | # -XX:MaxGCPauseMillis=80
210 |
211 | ## Optional G1 Settings
212 |
213 | # Save CPU time on large (>= 16GB) heaps by delaying region scanning
214 | # until the heap is 70% full. The default in Hotspot 8u40 is 40%.
215 | -XX:InitiatingHeapOccupancyPercent=70
216 |
217 | # For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
218 | # Otherwise equal to the number of cores when 8 or less.
219 | # Machines with > 10 cores should try setting these to <= full cores.
220 | #-XX:ParallelGCThreads=16
221 | # By default, ConcGCThreads is 1/4 of ParallelGCThreads.
222 | # Setting both to the same value can reduce STW durations.
223 | #-XX:ConcGCThreads=16
224 |
225 | ### GC logging options -- uncomment to enable
226 |
227 | #-XX:+PrintGCDetails
228 | #-XX:+PrintGCDateStamps
229 | #-XX:+PrintHeapAtGC
230 | #-XX:+PrintTenuringDistribution
231 | #-XX:+PrintGCApplicationStoppedTime
232 | #-XX:+PrintPromotionFailure
233 | #-XX:PrintFLSStatistics=1
234 | #-Xloggc:/var/log/cassandra/gc.log
235 | #-XX:+UseGCLogFileRotation
236 | #-XX:NumberOfGCLogFiles=10
237 | #-XX:GCLogFileSize=200M
238 |
--------------------------------------------------------------------------------
/gcplot/cassandra/cassandra.yaml:
--------------------------------------------------------------------------------
1 | # Cassandra storage config YAML
2 |
3 | # NOTE:
4 | # See http://wiki.apache.org/cassandra/StorageConfiguration for
5 | # full explanations of configuration directives
6 | # /NOTE
7 |
8 | # The name of the cluster. This is mainly used to prevent machines in
9 | # one logical cluster from joining another.
10 | cluster_name: 'gcp_csd_cluster'
11 |
12 | # This defines the number of tokens randomly assigned to this node on the ring
13 | # The more tokens, relative to other nodes, the larger the proportion of data
14 | # that this node will store. You probably want all nodes to have the same number
15 | # of tokens assuming they have equal hardware capability.
16 | #
17 | # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
18 | # and will use the initial_token as described below.
19 | #
20 | # Specifying initial_token will override this setting on the node's initial start,
21 | # on subsequent starts, this setting will apply even if initial token is set.
22 | #
23 | # If you already have a cluster with 1 token per node, and wish to migrate to
24 | # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
25 | num_tokens: 256
26 |
27 | # Triggers automatic allocation of num_tokens tokens for this node. The allocation
28 | # algorithm attempts to choose tokens in a way that optimizes replicated load over
29 | # the nodes in the datacenter for the replication strategy used by the specified
30 | # keyspace.
31 | #
32 | # The load assigned to each node will be close to proportional to its number of
33 | # vnodes.
34 | #
35 | # Only supported with the Murmur3Partitioner.
36 | # allocate_tokens_for_keyspace: KEYSPACE
37 |
38 | # initial_token allows you to specify tokens manually. While you can use # it with
39 | # vnodes (num_tokens > 1, above) -- in which case you should provide a
40 | # comma-separated list -- it's primarily used when adding nodes # to legacy clusters
41 | # that do not have vnodes enabled.
42 | # initial_token:
43 |
44 | # See http://wiki.apache.org/cassandra/HintedHandoff
45 | # May either be "true" or "false" to enable globally
46 | hinted_handoff_enabled: true
47 | # When hinted_handoff_enabled is true, a black list of data centers that will not
48 | # perform hinted handoff
49 | #hinted_handoff_disabled_datacenters:
50 | # - DC1
51 | # - DC2
52 | # this defines the maximum amount of time a dead host will have hints
53 | # generated. After it has been dead this long, new hints for it will not be
54 | # created until it has been seen alive and gone down again.
55 | max_hint_window_in_ms: 10800000 # 3 hours
56 |
57 | # Maximum throttle in KBs per second, per delivery thread. This will be
58 | # reduced proportionally to the number of nodes in the cluster. (If there
59 | # are two nodes in the cluster, each delivery thread will use the maximum
60 | # rate; if there are three, each will throttle to half of the maximum,
61 | # since we expect two nodes to be delivering hints simultaneously.)
62 | hinted_handoff_throttle_in_kb: 1024
63 |
64 | # Number of threads with which to deliver hints;
65 | # Consider increasing this number when you have multi-dc deployments, since
66 | # cross-dc handoff tends to be slower
67 | max_hints_delivery_threads: 2
68 |
69 | # Directory where Cassandra should store hints.
70 | # If not set, the default directory is $CASSANDRA_HOME/data/hints.
71 | hints_directory: /var/lib/cassandra/hints
72 |
73 | # How often hints should be flushed from the internal buffers to disk.
74 | # Will *not* trigger fsync.
75 | hints_flush_period_in_ms: 10000
76 |
77 | # Maximum size for a single hints file, in megabytes.
78 | max_hints_file_size_in_mb: 512
79 |
80 | # Compression to apply to the hint files. If omitted, hints files
81 | # will be written uncompressed. LZ4, Snappy, and Deflate compressors
82 | # are supported.
83 | #hints_compression:
84 | # - class_name: LZ4Compressor
85 | # parameters:
86 | # -
87 |
88 | # Maximum throttle in KBs per second, total. This will be
89 | # reduced proportionally to the number of nodes in the cluster.
90 | batchlog_replay_throttle_in_kb: 1024
91 |
92 | # Authentication backend, implementing IAuthenticator; used to identify users
93 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
94 | # PasswordAuthenticator}.
95 | #
96 | # - AllowAllAuthenticator performs no checks - set it to disable authentication.
97 | # - PasswordAuthenticator relies on username/password pairs to authenticate
98 | # users. It keeps usernames and hashed passwords in system_auth.credentials table.
99 | # Please increase system_auth keyspace replication factor if you use this authenticator.
100 | # If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
101 | authenticator: PasswordAuthenticator
102 |
103 | # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
104 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
105 | # CassandraAuthorizer}.
106 | #
107 | # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
108 | # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
109 | # increase system_auth keyspace replication factor if you use this authorizer.
110 | authorizer: CassandraAuthorizer
111 |
112 | # Part of the Authentication & Authorization backend, implementing IRoleManager; used
113 | # to maintain grants and memberships between roles.
114 | # Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
115 | # which stores role information in the system_auth keyspace. Most functions of the
116 | # IRoleManager require an authenticated login, so unless the configured IAuthenticator
117 | # actually implements authentication, most of this functionality will be unavailable.
118 | #
119 | # - CassandraRoleManager stores role data in the system_auth keyspace. Please
120 | # increase system_auth keyspace replication factor if you use this role manager.
121 | role_manager: CassandraRoleManager
122 |
123 | # Validity period for roles cache (fetching permissions can be an
124 | # expensive operation depending on the authorizer). Granted roles are cached for
125 | # authenticated sessions in AuthenticatedUser and after the period specified
126 | # here, become eligible for (async) reload.
127 | # Defaults to 2000, set to 0 to disable.
128 | # Will be disabled automatically for AllowAllAuthenticator.
129 | roles_validity_in_ms: 2000
130 |
131 | # Refresh interval for roles cache (if enabled).
132 | # After this interval, cache entries become eligible for refresh. Upon next
133 | # access, an async reload is scheduled and the old value returned until it
134 | # completes. If roles_validity_in_ms is non-zero, then this must be
135 | # also.
136 | # Defaults to the same value as roles_validity_in_ms.
137 | # roles_update_interval_in_ms: 1000
138 |
139 | # Validity period for permissions cache (fetching permissions can be an
140 | # expensive operation depending on the authorizer, CassandraAuthorizer is
141 | # one example). Defaults to 2000, set to 0 to disable.
142 | # Will be disabled automatically for AllowAllAuthorizer.
143 | permissions_validity_in_ms: 2000
144 |
145 | # Refresh interval for permissions cache (if enabled).
146 | # After this interval, cache entries become eligible for refresh. Upon next
147 | # access, an async reload is scheduled and the old value returned until it
148 | # completes. If permissions_validity_in_ms is non-zero, then this must be
149 | # also.
150 | # Defaults to the same value as permissions_validity_in_ms.
151 | # permissions_update_interval_in_ms: 1000
152 |
153 | # The partitioner is responsible for distributing groups of rows (by
154 | # partition key) across nodes in the cluster. You should leave this
155 | # alone for new clusters. The partitioner can NOT be changed without
156 | # reloading all data, so when upgrading you should set this to the
157 | # same partitioner you were already using.
158 | #
159 | # Besides Murmur3Partitioner, partitioners included for backwards
160 | # compatibility include RandomPartitioner, ByteOrderedPartitioner, and
161 | # OrderPreservingPartitioner.
162 | #
163 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner
164 |
165 | # Directories where Cassandra should store data on disk. Cassandra
166 | # will spread data evenly across them, subject to the granularity of
167 | # the configured compaction strategy.
168 | # If not set, the default directory is $CASSANDRA_HOME/data/data.
169 | data_file_directories:
170 | - /var/lib/cassandra/data
171 |
172 | # commit log. when running on magnetic HDD, this should be a
173 | # separate spindle than the data directories.
174 | # If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
175 | commitlog_directory: /var/lib/cassandra/commitlog
176 |
177 | # policy for data disk failures:
178 | # die: shut down gossip and client transports and kill the JVM for any fs errors or
179 | # single-sstable errors, so the node can be replaced.
180 | # stop_paranoid: shut down gossip and client transports even for single-sstable errors,
181 | # kill the JVM for errors during startup.
182 | # stop: shut down gossip and client transports, leaving the node effectively dead, but
183 | # can still be inspected via JMX, kill the JVM for errors during startup.
184 | # best_effort: stop using the failed disk and respond to requests based on
185 | # remaining available sstables. This means you WILL see obsolete
186 | # data at CL.ONE!
187 | # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
188 | disk_failure_policy: stop
189 |
190 | # policy for commit disk failures:
191 | # die: shut down gossip and Thrift and kill the JVM, so the node can be replaced.
192 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but
193 | # can still be inspected via JMX.
194 | # stop_commit: shutdown the commit log, letting writes collect but
195 | # continuing to service reads, as in pre-2.0.5 Cassandra
196 | # ignore: ignore fatal errors and let the batches fail
197 | commit_failure_policy: stop
198 |
199 | # Maximum size of the key cache in memory.
200 | #
201 | # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
202 | # minimum, sometimes more. The key cache is fairly tiny for the amount of
203 | # time it saves, so it's worthwhile to use it at large numbers.
204 | # The row cache saves even more time, but must contain the entire row,
205 | # so it is extremely space-intensive. It's best to only use the
206 | # row cache if you have hot rows or static rows.
207 | #
208 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
209 | #
210 | # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
211 | key_cache_size_in_mb:
212 |
213 | # Duration in seconds after which Cassandra should
214 | # save the key cache. Caches are saved to saved_caches_directory as
215 | # specified in this configuration file.
216 | #
217 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
218 | # terms of I/O for the key cache. Row cache saving is much more expensive and
219 | # has limited use.
220 | #
221 | # Default is 14400 or 4 hours.
222 | key_cache_save_period: 14400
223 |
224 | # Number of keys from the key cache to save
225 | # Disabled by default, meaning all keys are going to be saved
226 | # key_cache_keys_to_save: 100
227 |
228 | # Row cache implementation class name.
229 | # Available implementations:
230 | # org.apache.cassandra.cache.OHCProvider Fully off-heap row cache implementation (default).
231 | # org.apache.cassandra.cache.SerializingCacheProvider This is the row cache implementation availabile
232 | # in previous releases of Cassandra.
233 | # row_cache_class_name: org.apache.cassandra.cache.OHCProvider
234 |
235 | # Maximum size of the row cache in memory.
236 | # Please note that OHC cache implementation requires some additional off-heap memory to manage
237 | # the map structures and some in-flight memory during operations before/after cache entries can be
238 | # accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
239 | # Do not specify more memory that the system can afford in the worst usual situation and leave some
240 | # headroom for OS block level cache. Do never allow your system to swap.
241 | #
242 | # Default value is 0, to disable row caching.
243 | row_cache_size_in_mb: 256
244 |
245 | # Duration in seconds after which Cassandra should save the row cache.
246 | # Caches are saved to saved_caches_directory as specified in this configuration file.
247 | #
248 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
249 | # terms of I/O for the key cache. Row cache saving is much more expensive and
250 | # has limited use.
251 | #
252 | # Default is 0 to disable saving the row cache.
253 | row_cache_save_period: 0
254 |
255 | # Number of keys from the row cache to save.
256 | # Specify 0 (which is the default), meaning all keys are going to be saved
257 | # row_cache_keys_to_save: 100
258 |
259 | # Maximum size of the counter cache in memory.
260 | #
261 | # Counter cache helps to reduce counter locks' contention for hot counter cells.
262 | # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
263 | # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
264 | # of the lock hold, helping with hot counter cell updates, but will not allow skipping
265 | # the read entirely. Only the local (clock, count) tuple of a counter cell is kept
266 | # in memory, not the whole counter, so it's relatively cheap.
267 | #
268 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
269 | #
270 | # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
271 | # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
272 | counter_cache_size_in_mb:
273 |
274 | # Duration in seconds after which Cassandra should
275 | # save the counter cache (keys only). Caches are saved to saved_caches_directory as
276 | # specified in this configuration file.
277 | #
278 | # Default is 7200 or 2 hours.
279 | counter_cache_save_period: 7200
280 |
281 | # Number of keys from the counter cache to save
282 | # Disabled by default, meaning all keys are going to be saved
283 | # counter_cache_keys_to_save: 100
284 |
285 | # saved caches
286 | # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
287 | saved_caches_directory: /var/lib/cassandra/saved_caches
288 |
289 | # commitlog_sync may be either "periodic" or "batch."
290 | #
291 | # When in batch mode, Cassandra won't ack writes until the commit log
292 | # has been fsynced to disk. It will wait
293 | # commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
294 | # This window should be kept short because the writer threads will
295 | # be unable to do extra work while waiting. (You may need to increase
296 | # concurrent_writes for the same reason.)
297 | #
298 | # commitlog_sync: batch
299 | # commitlog_sync_batch_window_in_ms: 2
300 | #
301 | # the other option is "periodic" where writes may be acked immediately
302 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms
303 | # milliseconds.
304 | commitlog_sync: periodic
305 | commitlog_sync_period_in_ms: 10000
306 |
307 | # The size of the individual commitlog file segments. A commitlog
308 | # segment may be archived, deleted, or recycled once all the data
309 | # in it (potentially from each columnfamily in the system) has been
310 | # flushed to sstables.
311 | #
312 | # The default size is 32, which is almost always fine, but if you are
313 | # archiving commitlog segments (see commitlog_archiving.properties),
314 | # then you probably want a finer granularity of archiving; 8 or 16 MB
315 | # is reasonable.
316 | # Max mutation size is also configurable via max_mutation_size_in_kb setting in
317 | # cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
318 | #
319 | # NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
320 | # be set to at least twice the size of max_mutation_size_in_kb / 1024
321 | #
322 | commitlog_segment_size_in_mb: 32
323 |
324 | # Compression to apply to the commit log. If omitted, the commit log
325 | # will be written uncompressed. LZ4, Snappy, and Deflate compressors
326 | # are supported.
327 | #commitlog_compression:
328 | # - class_name: LZ4Compressor
329 | # parameters:
330 | # -
331 |
332 | # any class that implements the SeedProvider interface and has a
333 | # constructor that takes a Map of parameters will do.
334 | seed_provider:
335 | # Addresses of hosts that are deemed contact points.
336 | # Cassandra nodes use this list of hosts to find each other and learn
337 | # the topology of the ring. You must change this if you are running
338 | # multiple nodes!
339 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider
340 | parameters:
341 | # seeds is actually a comma-delimited list of addresses.
342 | # Ex: ",,"
343 | - seeds: 127.0.0.1
344 |
345 | # For workloads with more data than can fit in memory, Cassandra's
346 | # bottleneck will be reads that need to fetch data from
347 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
348 | # order to allow the operations to enqueue low enough in the stack
349 | # that the OS and drives can reorder them. Same applies to
350 | # "concurrent_counter_writes", since counter writes read the current
351 | # values before incrementing and writing them back.
352 | #
353 | # On the other hand, since writes are almost never IO bound, the ideal
354 | # number of "concurrent_writes" is dependent on the number of cores in
355 | # your system; (8 * number_of_cores) is a good rule of thumb.
356 | concurrent_reads: 96
357 | concurrent_writes: 96
358 | concurrent_counter_writes: 96
359 |
360 | # For materialized view writes, as there is a read involved, so this should
361 | # be limited by the less of concurrent reads or concurrent writes.
362 | concurrent_materialized_view_writes: 96
363 |
364 | # Maximum memory to use for pooling sstable buffers. Defaults to the smaller
365 | # of 1/4 of heap or 512MB. This pool is allocated off-heap, so is in addition
366 | # to the memory allocated for heap. Memory is only allocated as needed.
367 | # file_cache_size_in_mb: 512
368 |
369 | # Flag indicating whether to allocate on or off heap when the sstable buffer
370 | # pool is exhausted, that is when it has exceeded the maximum memory
371 | # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
372 |
373 | # buffer_pool_use_heap_if_exhausted: true
374 |
375 | # The strategy for optimizing disk read
376 | # Possible values are:
377 | # ssd (for solid state disks, the default)
378 | # spinning (for spinning disks)
379 | # disk_optimization_strategy: ssd
380 |
381 | # Total permitted memory to use for memtables. Cassandra will stop
382 | # accepting writes when the limit is exceeded until a flush completes,
383 | # and will trigger a flush based on memtable_cleanup_threshold
384 | # If omitted, Cassandra will set both to 1/4 the size of the heap.
385 | # memtable_heap_space_in_mb: 2048
386 | # memtable_offheap_space_in_mb: 2048
387 |
388 | # Ratio of occupied non-flushing memtable size to total permitted size
389 | # that will trigger a flush of the largest memtable. Larger mct will
390 | # mean larger flushes and hence less compaction, but also less concurrent
391 | # flush activity which can make it difficult to keep your disks fed
392 | # under heavy write load.
393 | #
394 | # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
395 | # memtable_cleanup_threshold: 0.11
396 |
397 | # Specify the way Cassandra allocates and manages memtable memory.
398 | # Options are:
399 | # heap_buffers: on heap nio buffers
400 | # offheap_buffers: off heap (direct) nio buffers
401 | memtable_allocation_type: heap_buffers
402 |
403 | # Total space to use for commit logs on disk.
404 | #
405 | # If space gets above this value, Cassandra will flush every dirty CF
406 | # in the oldest segment and remove it. So a small total commitlog space
407 | # will tend to cause more flush activity on less-active columnfamilies.
408 | #
409 | # The default value is the smaller of 8192, and 1/4 of the total space
410 | # of the commitlog volume.
411 | #
412 | # commitlog_total_space_in_mb: 8192
413 |
414 | # This sets the amount of memtable flush writer threads. These will
415 | # be blocked by disk io, and each one will hold a memtable in memory
416 | # while blocked.
417 | #
418 | # memtable_flush_writers defaults to the smaller of (number of disks,
419 | # number of cores), with a minimum of 2 and a maximum of 8.
420 | #
421 | # If your data directories are backed by SSD, you should increase this
422 | # to the number of cores.
423 | #memtable_flush_writers: 8
424 |
425 | # A fixed memory pool size in MB for for SSTable index summaries. If left
426 | # empty, this will default to 5% of the heap size. If the memory usage of
427 | # all index summaries exceeds this limit, SSTables with low read rates will
428 | # shrink their index summaries in order to meet this limit. However, this
429 | # is a best-effort process. In extreme conditions Cassandra may need to use
430 | # more than this amount of memory.
431 | index_summary_capacity_in_mb:
432 |
433 | # How frequently index summaries should be resampled. This is done
434 | # periodically to redistribute memory from the fixed-size pool to sstables
435 | # proportional their recent read rates. Setting to -1 will disable this
436 | # process, leaving existing index summaries at their current sampling level.
437 | index_summary_resize_interval_in_minutes: 60
438 |
439 | # Whether to, when doing sequential writing, fsync() at intervals in
440 | # order to force the operating system to flush the dirty
441 | # buffers. Enable this to avoid sudden dirty buffer flushing from
442 | # impacting read latencies. Almost always a good idea on SSDs; not
443 | # necessarily on platters.
444 | trickle_fsync: false
445 | trickle_fsync_interval_in_kb: 10240
446 |
447 | # TCP port, for commands and data
448 | # For security reasons, you should not expose this port to the internet. Firewall it if needed.
449 | storage_port: 7000
450 |
451 | # SSL port, for encrypted communication. Unused unless enabled in
452 | # encryption_options
453 | # For security reasons, you should not expose this port to the internet. Firewall it if needed.
454 | ssl_storage_port: 7001
455 |
456 | # Address or interface to bind to and tell other Cassandra nodes to connect to.
457 | # You _must_ change this if you want multiple nodes to be able to communicate!
458 | #
459 | # Set listen_address OR listen_interface, not both. Interfaces must correspond
460 | # to a single address, IP aliasing is not supported.
461 | #
462 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
463 | # will always do the Right Thing _if_ the node is properly configured
464 | # (hostname, name resolution, etc), and the Right Thing is to use the
465 | # address associated with the hostname (it might not be).
466 | #
467 | # Setting listen_address to 0.0.0.0 is always wrong.
468 | #
469 | # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
470 | # you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
471 | # address will be used. If true the first ipv6 address will be used. Defaults to false preferring
472 | # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
473 | listen_interface: lo
474 | # listen_interface_prefer_ipv6: false
475 |
476 | # Address to broadcast to other Cassandra nodes
477 | # Leaving this blank will set it to the same value as listen_address
478 | broadcast_address: 127.0.0.1
479 |
480 | # When using multiple physical network interfaces, set this
481 | # to true to listen on broadcast_address in addition to
482 | # the listen_address, allowing nodes to communicate in both
483 | # interfaces.
484 | # Ignore this property if the network configuration automatically
485 | # routes between the public and private networks such as EC2.
486 | # listen_on_broadcast_address: false
487 |
488 | # Internode authentication backend, implementing IInternodeAuthenticator;
489 | # used to allow/disallow connections from peer nodes.
490 | # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
491 |
492 | # Whether to start the native transport server.
493 | # Please note that the address on which the native transport is bound is the
494 | # same as the rpc_address. The port however is different and specified below.
495 | start_native_transport: true
496 | # port for the CQL native transport to listen for clients on
497 | # For security reasons, you should not expose this port to the internet. Firewall it if needed.
498 | native_transport_port: 9042
499 | # Enabling native transport encryption in client_encryption_options allows you to either use
500 | # encryption for the standard port or to use a dedicated, additional port along with the unencrypted
501 | # standard native_transport_port.
502 | # Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
503 | # for native_transport_port. Setting native_transport_port_ssl to a different value
504 | # from native_transport_port will use encryption for native_transport_port_ssl while
505 | # keeping native_transport_port unencrypted.
506 | # native_transport_port_ssl: 9142
507 | # The maximum threads for handling requests when the native transport is used.
508 | # This is similar to rpc_max_threads though the default differs slightly (and
509 | # there is no native_transport_min_threads, idle threads will always be stopped
510 | # after 30 seconds).
511 | # native_transport_max_threads: 128
512 | #
513 | # The maximum size of allowed frame. Frame (requests) larger than this will
514 | # be rejected as invalid. The default is 256MB. If you're changing this parameter,
515 | # you may want to adjust max_value_size_in_mb accordingly.
516 | # native_transport_max_frame_size_in_mb: 256
517 |
518 | # The maximum number of concurrent client connections.
519 | # The default is -1, which means unlimited.
520 | # native_transport_max_concurrent_connections: -1
521 |
522 | # The maximum number of concurrent client connections per source ip.
523 | # The default is -1, which means unlimited.
524 | # native_transport_max_concurrent_connections_per_ip: -1
525 |
526 | # Whether to start the thrift rpc server.
527 | start_rpc: false
528 |
529 | # The address or interface to bind the Thrift RPC service and native transport
530 | # server to.
531 | #
532 | # Set rpc_address OR rpc_interface, not both. Interfaces must correspond
533 | # to a single address, IP aliasing is not supported.
534 | #
535 | # Leaving rpc_address blank has the same effect as on listen_address
536 | # (i.e. it will be based on the configured hostname of the node).
537 | #
538 | # Note that unlike listen_address, you can specify 0.0.0.0, but you must also
539 | # set broadcast_rpc_address to a value other than 0.0.0.0.
540 | #
541 | # For security reasons, you should not expose this port to the internet. Firewall it if needed.
542 | #
543 | # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
544 | # you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
545 | # address will be used. If true the first ipv6 address will be used. Defaults to false preferring
546 | # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
547 | # rpc_address: 0.0.0.0
548 | rpc_interface: lo
549 | # rpc_interface_prefer_ipv6: false
550 |
551 | # port for Thrift to listen for clients on
552 | rpc_port: 9160
553 |
554 | # RPC address to broadcast to drivers and other Cassandra nodes. This cannot
555 | # be set to 0.0.0.0. If left blank, this will be set to the value of
556 | # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
557 | # be set.
558 | broadcast_rpc_address: 127.0.0.1
559 |
560 | # enable or disable keepalive on rpc/native connections
561 | rpc_keepalive: true
562 |
563 | # Cassandra provides two out-of-the-box options for the RPC Server:
564 | #
565 | # sync -> One thread per thrift connection. For a very large number of clients, memory
566 | # will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
567 | # per thread, and that will correspond to your use of virtual memory (but physical memory
568 | # may be limited depending on use of stack space).
569 | #
570 | # hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
571 | # asynchronously using a small number of threads that does not vary with the amount
572 | # of thrift clients (and thus scales well to many clients). The rpc requests are still
573 | # synchronous (one thread per active request). If hsha is selected then it is essential
574 | # that rpc_max_threads is changed from the default value of unlimited.
575 | #
576 | # The default is sync because on Windows hsha is about 30% slower. On Linux,
577 | # sync/hsha performance is about the same, with hsha of course using less memory.
578 | #
579 | # Alternatively, can provide your own RPC server by providing the fully-qualified class name
580 | # of an o.a.c.t.TServerFactory that can create an instance of it.
581 | rpc_server_type: sync
582 |
583 | # Uncomment rpc_min|max_thread to set request pool size limits.
584 | #
585 | # Regardless of your choice of RPC server (see above), the number of maximum requests in the
586 | # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
587 | # RPC server, it also dictates the number of clients that can be connected at all).
588 | #
589 | # The default is unlimited and thus provides no protection against clients overwhelming the server. You are
590 | # encouraged to set a maximum that makes sense for you in production, but do keep in mind that
591 | # rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
592 | #
593 | # rpc_min_threads: 16
594 | # rpc_max_threads: 2048
595 |
596 | # uncomment to set socket buffer sizes on rpc connections
597 | # rpc_send_buff_size_in_bytes:
598 | # rpc_recv_buff_size_in_bytes:
599 |
600 | # Uncomment to set socket buffer size for internode communication
601 | # Note that when setting this, the buffer size is limited by net.core.wmem_max
602 | # and when not setting it it is defined by net.ipv4.tcp_wmem
603 | # See:
604 | # /proc/sys/net/core/wmem_max
605 | # /proc/sys/net/core/rmem_max
606 | # /proc/sys/net/ipv4/tcp_wmem
607 | # /proc/sys/net/ipv4/tcp_wmem
608 | # and: man tcp
609 | # internode_send_buff_size_in_bytes:
610 | # internode_recv_buff_size_in_bytes:
611 |
612 | # Frame size for thrift (maximum message length).
613 | thrift_framed_transport_size_in_mb: 15
614 |
615 | # Set to true to have Cassandra create a hard link to each sstable
616 | # flushed or streamed locally in a backups/ subdirectory of the
617 | # keyspace data. Removing these links is the operator's
618 | # responsibility.
619 | incremental_backups: false
620 |
621 | # Whether or not to take a snapshot before each compaction. Be
622 | # careful using this option, since Cassandra won't clean up the
623 | # snapshots for you. Mostly useful if you're paranoid when there
624 | # is a data format change.
625 | snapshot_before_compaction: false
626 |
627 | # Whether or not a snapshot is taken of the data before keyspace truncation
628 | # or dropping of column families. The STRONGLY advised default of true
629 | # should be used to provide data safety. If you set this flag to false, you will
630 | # lose data on truncation or drop.
631 | auto_snapshot: true
632 |
633 | # When executing a scan, within or across a partition, we need to keep the
634 | # tombstones seen in memory so we can return them to the coordinator, which
635 | # will use them to make sure other replicas also know about the deleted rows.
636 | # With workloads that generate a lot of tombstones, this can cause performance
637 | # problems and even exaust the server heap.
638 | # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
639 | # Adjust the thresholds here if you understand the dangers and want to
640 | # scan more tombstones anyway. These thresholds may also be adjusted at runtime
641 | # using the StorageService mbean.
642 | tombstone_warn_threshold: 1000
643 | tombstone_failure_threshold: 100000
644 |
645 | # Granularity of the collation index of rows within a partition.
646 | # Increase if your rows are large, or if you have a very large
647 | # number of rows per partition. The competing goals are these:
648 | # 1) a smaller granularity means more index entries are generated
649 | # and looking up rows withing the partition by collation column
650 | # is faster
651 | # 2) but, Cassandra will keep the collation index in memory for hot
652 | # rows (as part of the key cache), so a larger granularity means
653 | # you can cache more hot rows
654 | column_index_size_in_kb: 64
655 |
656 |
657 | # Log WARN on any batch size exceeding this value. 5kb per batch by default.
658 | # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
659 | batch_size_warn_threshold_in_kb: 5
660 |
661 | # Fail any batch exceeding this value. 50kb (10x warn threshold) by default.
662 | batch_size_fail_threshold_in_kb: 50
663 |
664 | # Log WARN on any batches not of type LOGGED than span across more partitions than this limit
665 | unlogged_batch_across_partitions_warn_threshold: 10
666 |
667 | # Number of simultaneous compactions to allow, NOT including
668 | # validation "compactions" for anti-entropy repair. Simultaneous
669 | # compactions can help preserve read performance in a mixed read/write
670 | # workload, by mitigating the tendency of small sstables to accumulate
671 | # during a single long running compactions. The default is usually
672 | # fine and if you experience problems with compaction running too
673 | # slowly or too fast, you should look at
674 | # compaction_throughput_mb_per_sec first.
675 | #
676 | # concurrent_compactors defaults to the smaller of (number of disks,
677 | # number of cores), with a minimum of 2 and a maximum of 8.
678 | #
679 | # If your data directories are backed by SSD, you should increase this
680 | # to the number of cores.
681 | #concurrent_compactors: 1
682 |
683 | # Throttles compaction to the given total throughput across the entire
684 | # system. The faster you insert data, the faster you need to compact in
685 | # order to keep the sstable count down, but in general, setting this to
686 | # 16 to 32 times the rate you are inserting data is more than sufficient.
687 | # Setting this to 0 disables throttling. Note that this account for all types
688 | # of compaction, including validation compaction.
689 | compaction_throughput_mb_per_sec: 16
690 |
691 | # Log a warning when compacting partitions larger than this value
692 | compaction_large_partition_warning_threshold_mb: 100
693 |
694 | # When compacting, the replacement sstable(s) can be opened before they
695 | # are completely written, and used in place of the prior sstables for
696 | # any range that has been written. This helps to smoothly transfer reads
697 | # between the sstables, reducing page cache churn and keeping hot rows hot
698 | sstable_preemptive_open_interval_in_mb: 50
699 |
700 | # Throttles all outbound streaming file transfers on this node to the
701 | # given total throughput in Mbps. This is necessary because Cassandra does
702 | # mostly sequential IO when streaming data during bootstrap or repair, which
703 | # can lead to saturating the network connection and degrading rpc performance.
704 | # When unset, the default is 200 Mbps or 25 MB/s.
705 | # stream_throughput_outbound_megabits_per_sec: 200
706 |
707 | # Throttles all streaming file transfer between the datacenters,
708 | # this setting allows users to throttle inter dc stream throughput in addition
709 | # to throttling all network stream traffic as configured with
710 | # stream_throughput_outbound_megabits_per_sec
711 | # When unset, the default is 200 Mbps or 25 MB/s
712 | # inter_dc_stream_throughput_outbound_megabits_per_sec: 200
713 |
714 | # How long the coordinator should wait for read operations to complete
715 | read_request_timeout_in_ms: 5000
716 | # How long the coordinator should wait for seq or index scans to complete
717 | range_request_timeout_in_ms: 10000
718 | # How long the coordinator should wait for writes to complete
719 | write_request_timeout_in_ms: 2000
720 | # How long the coordinator should wait for counter writes to complete
721 | counter_write_request_timeout_in_ms: 5000
722 | # How long a coordinator should continue to retry a CAS operation
723 | # that contends with other proposals for the same row
724 | cas_contention_timeout_in_ms: 1000
725 | # How long the coordinator should wait for truncates to complete
726 | # (This can be much longer, because unless auto_snapshot is disabled
727 | # we need to flush first so we can snapshot before removing the data.)
728 | truncate_request_timeout_in_ms: 60000
729 | # The default timeout for other, miscellaneous operations
730 | request_timeout_in_ms: 10000
731 |
732 | # Enable operation timeout information exchange between nodes to accurately
733 | # measure request timeouts. If disabled, replicas will assume that requests
734 | # were forwarded to them instantly by the coordinator, which means that
735 | # under overload conditions we will waste that much extra time processing
736 | # already-timed-out requests.
737 | #
738 | # Warning: before enabling this property make sure to ntp is installed
739 | # and the times are synchronized between the nodes.
740 | cross_node_timeout: false
741 |
742 | # Set socket timeout for streaming operation.
743 | # The stream session is failed if no data/ack is received by any of the participants
744 | # within that period, which means this should also be sufficient to stream a large
745 | # sstable or rebuild table indexes.
746 | # Default value is 86400000ms, which means stale streams timeout after 24 hours.
747 | # A value of zero means stream sockets should never time out.
748 | # streaming_socket_timeout_in_ms: 86400000
749 |
750 | # phi value that must be reached for a host to be marked down.
751 | # most users should never need to adjust this.
752 | phi_convict_threshold: 12
753 |
754 | # endpoint_snitch -- Set this to a class that implements
755 | # IEndpointSnitch. The snitch has two functions:
756 | # - it teaches Cassandra enough about your network topology to route
757 | # requests efficiently
758 | # - it allows Cassandra to spread replicas around your cluster to avoid
759 | # correlated failures. It does this by grouping machines into
760 | # "datacenters" and "racks." Cassandra will do its best not to have
761 | # more than one replica on the same "rack" (which may not actually
762 | # be a physical location)
763 | #
764 | # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
765 | # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
766 | # ARE PLACED.
767 | #
768 | # IF THE RACK A REPLICA IS PLACED IN CHANGES AFTER THE REPLICA HAS BEEN
769 | # ADDED TO A RING, THE NODE MUST BE DECOMMISSIONED AND REBOOTSTRAPPED.
770 | #
771 | # Out of the box, Cassandra provides
772 | # - SimpleSnitch:
773 | # Treats Strategy order as proximity. This can improve cache
774 | # locality when disabling read repair. Only appropriate for
775 | # single-datacenter deployments.
776 | # - GossipingPropertyFileSnitch
777 | # This should be your go-to snitch for production use. The rack
778 | # and datacenter for the local node are defined in
779 | # cassandra-rackdc.properties and propagated to other nodes via
780 | # gossip. If cassandra-topology.properties exists, it is used as a
781 | # fallback, allowing migration from the PropertyFileSnitch.
782 | # - PropertyFileSnitch:
783 | # Proximity is determined by rack and data center, which are
784 | # explicitly configured in cassandra-topology.properties.
785 | # - Ec2Snitch:
786 | # Appropriate for EC2 deployments in a single Region. Loads Region
787 | # and Availability Zone information from the EC2 API. The Region is
788 | # treated as the datacenter, and the Availability Zone as the rack.
789 | # Only private IPs are used, so this will not work across multiple
790 | # Regions.
791 | # - Ec2MultiRegionSnitch:
792 | # Uses public IPs as broadcast_address to allow cross-region
793 | # connectivity. (Thus, you should set seed addresses to the public
794 | # IP as well.) You will need to open the storage_port or
795 | # ssl_storage_port on the public IP firewall. (For intra-Region
796 | # traffic, Cassandra will switch to the private IP after
797 | # establishing a connection.)
798 | # - RackInferringSnitch:
799 | # Proximity is determined by rack and data center, which are
800 | # assumed to correspond to the 3rd and 2nd octet of each node's IP
801 | # address, respectively. Unless this happens to match your
802 | # deployment conventions, this is best used as an example of
803 | # writing a custom Snitch class and is provided in that spirit.
804 | #
805 | # You can use a custom Snitch by setting this to the full class name
806 | # of the snitch, which will be assumed to be on your classpath.
807 | endpoint_snitch: GossipingPropertyFileSnitch
808 |
809 | # controls how often to perform the more expensive part of host score
810 | # calculation
811 | dynamic_snitch_update_interval_in_ms: 100
812 | # controls how often to reset all host scores, allowing a bad host to
813 | # possibly recover
814 | dynamic_snitch_reset_interval_in_ms: 600000
815 | # if set greater than zero and read_repair_chance is < 1.0, this will allow
816 | # 'pinning' of replicas to hosts in order to increase cache capacity.
817 | # The badness threshold will control how much worse the pinned host has to be
818 | # before the dynamic snitch will prefer other replicas over it. This is
819 | # expressed as a double which represents a percentage. Thus, a value of
820 | # 0.2 means Cassandra would continue to prefer the static snitch values
821 | # until the pinned host was 20% worse than the fastest.
822 | dynamic_snitch_badness_threshold: 0.1
823 |
824 | # request_scheduler -- Set this to a class that implements
825 | # RequestScheduler, which will schedule incoming client requests
826 | # according to the specific policy. This is useful for multi-tenancy
827 | # with a single Cassandra cluster.
828 | # NOTE: This is specifically for requests from the client and does
829 | # not affect inter node communication.
830 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
831 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
832 | # client requests to a node with a separate queue for each
833 | # request_scheduler_id. The scheduler is further customized by
834 | # request_scheduler_options as described below.
835 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
836 |
837 | # Scheduler Options vary based on the type of scheduler
838 | # NoScheduler - Has no options
839 | # RoundRobin
840 | # - throttle_limit -- The throttle_limit is the number of in-flight
841 | # requests per client. Requests beyond
842 | # that limit are queued up until
843 | # running requests can complete.
844 | # The value of 80 here is twice the number of
845 | # concurrent_reads + concurrent_writes.
846 | # - default_weight -- default_weight is optional and allows for
847 | # overriding the default which is 1.
848 | # - weights -- Weights are optional and will default to 1 or the
849 | # overridden default_weight. The weight translates into how
850 | # many requests are handled during each turn of the
851 | # RoundRobin, based on the scheduler id.
852 | #
853 | # request_scheduler_options:
854 | # throttle_limit: 80
855 | # default_weight: 5
856 | # weights:
857 | # Keyspace1: 1
858 | # Keyspace2: 5
859 |
860 | # request_scheduler_id -- An identifier based on which to perform
861 | # the request scheduling. Currently the only valid option is keyspace.
862 | # request_scheduler_id: keyspace
863 |
864 | # Enable or disable inter-node encryption
865 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
866 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
867 | # suite for authentication, key exchange and encryption of the actual data transfers.
868 | # Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
869 | # NOTE: No custom encryption options are enabled at the moment
870 | # The available internode options are : all, none, dc, rack
871 | #
872 | # If set to dc cassandra will encrypt the traffic between the DCs
873 | # If set to rack cassandra will encrypt the traffic between the racks
874 | #
875 | # The passwords used in these options must match the passwords used when generating
876 | # the keystore and truststore. For instructions on generating these files, see:
877 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
878 | #
879 | server_encryption_options:
880 | internode_encryption: none
881 | keystore: conf/.keystore
882 | keystore_password: cassandra
883 | truststore: conf/.truststore
884 | truststore_password: cassandra
885 | # More advanced defaults below:
886 | # protocol: TLS
887 | # algorithm: SunX509
888 | # store_type: JKS
889 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
890 | # require_client_auth: false
891 |
892 | # enable or disable client/server encryption.
893 | client_encryption_options:
894 | enabled: false
895 | # If enabled and optional is set to true encrypted and unencrypted connections are handled.
896 | optional: false
897 | keystore: conf/.keystore
898 | keystore_password: cassandra
899 | # require_client_auth: false
900 | # Set trustore and truststore_password if require_client_auth is true
901 | # truststore: conf/.truststore
902 | # truststore_password: cassandra
903 | # More advanced defaults below:
904 | # protocol: TLS
905 | # algorithm: SunX509
906 | # store_type: JKS
907 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
908 |
909 | # internode_compression controls whether traffic between nodes is
910 | # compressed.
911 | # can be: all - all traffic is compressed
912 | # dc - traffic between different datacenters is compressed
913 | # none - nothing is compressed.
914 | internode_compression: all
915 |
916 | # Enable or disable tcp_nodelay for inter-dc communication.
917 | # Disabling it will result in larger (but fewer) network packets being sent,
918 | # reducing overhead from the TCP protocol itself, at the cost of increasing
919 | # latency if you block for cross-datacenter responses.
920 | inter_dc_tcp_nodelay: false
921 |
922 | # TTL for different trace types used during logging of the repair process.
923 | tracetype_query_ttl: 86400
924 | tracetype_repair_ttl: 604800
925 |
926 | # GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
927 | # Adjust the threshold based on your application throughput requirement
928 | # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
929 | gc_warn_threshold_in_ms: 1000
930 |
931 | # UDFs (user defined functions) are disabled by default.
932 | # As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
933 | enable_user_defined_functions: false
934 |
935 | # Enables scripted UDFs (JavaScript UDFs).
936 | # Java UDFs are always enabled, if enable_user_defined_functions is true.
937 | # Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
938 | # This option has no effect, if enable_user_defined_functions is false.
939 | enable_scripted_user_defined_functions: false
940 |
941 | # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
942 | # Lowering this value on Windows can provide much tighter latency and better throughput, however
943 | # some virtualized environments may see a negative performance impact from changing this setting
944 | # below their system default. The sysinternals 'clockres' tool can confirm your system's default
945 | # setting.
946 | windows_timer_interval: 1
947 |
948 | # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
949 | # early. Any value size larger than this threshold will result into marking an SSTable
950 | # as corrupted.
951 | # max_value_size_in_mb: 256
952 |
953 | #############################################################
954 | ### Lekane changes ####
955 | #############################################################
956 | #auto_bootstrap: false
957 |
--------------------------------------------------------------------------------