├── .gitignore
├── LICENSE
├── README.md
├── Vagrantfile
├── cassandra
    ├── conf
    │   ├── cassandra_20x.yaml
    │   └── cassandra_21x.yaml
    ├── init.sls
    └── settings.sls
└── vagrant
    ├── provision-config.sh
    ├── provision-master.sh
    ├── provision-minion.sh
    └── srv
        ├── pillar
            ├── cassandra.sls
            └── top.sls
        └── salt
            ├── java.sls
            └── top.sls


/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Viktor Taranenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | cassandra-formula
2 | =================
3 | 
4 | Salt Formula to set up and configure Cassandra cluster
5 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
 5 | VAGRANTFILE_API_VERSION = "2"
 6 | 
 7 | # Require a recent version of vagrant otherwise some have reported errors setting host names on boxes
 8 | Vagrant.require_version ">= 1.6.3"
 9 | 
10 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
11 | 
12 |   # The number of minions to provision
13 |   num_minion = (ENV['NUM_MINIONS'] || 0).to_i
14 | 
15 |   # ip configuration
16 |   master_ip = "10.245.1.2"
17 |   minion_ip_base = "10.245.2."
18 |   minion_ips = num_minion.times.collect { |n| minion_ip_base + "#{n+2}" }
19 |   minion_ips_str = minion_ips.join(",")
20 | 
21 |   config.vm.box = "debian/jessie64"
22 | 
23 |   config.vm.provider :virtualbox do |v|
24 |     # On VirtualBox, we don't have guest additions or a functional vboxsf
25 |     # in CoreOS, so tell Vagrant that so it can be smarter.
26 |     v.check_guest_additions = false
27 |     v.functional_vboxsf     = false
28 |     v.memory = 1536
29 |     v.cpus = 1
30 |   end
31 | 
32 |   config.vm.define "master" do |config|
33 |     config.vm.provision "shell", inline: "/vagrant/vagrant/provision-master.sh #{master_ip} #{num_minion} #{minion_ips_str}"
34 |     config.vm.network "private_network", ip: master_ip
35 |     config.vm.hostname = "salt-master"
36 |     config.vm.synced_folder "vagrant/srv/salt", "/srv/salt"
37 |     config.vm.synced_folder "cassandra", "/srv/salt/cassandra"
38 |     config.vm.synced_folder "vagrant/srv/pillar", "/srv/pillar"
39 |   end
40 | 
41 |   num_minion.times do |n|
42 |     config.vm.define "minion-#{n+1}" do |minion|
43 |       minion_index = n+1
44 |       minion_ip = minion_ips[n]
45 |       minion.vm.provision "shell", inline: "/vagrant/vagrant/provision-minion.sh #{master_ip} #{num_minion} #{minion_ips_str} #{minion_ip} #{minion_index}"
46 |       minion.vm.network "private_network", ip: minion_ip
47 |       minion.vm.hostname = "salt-minion-#{minion_index}"
48 |     end
49 |   end
50 | 
51 | end
52 | 


--------------------------------------------------------------------------------
/cassandra/conf/cassandra_20x.yaml:
--------------------------------------------------------------------------------
  1 | {%- from 'cassandra/settings.sls' import config as c with context %}
  2 | 
  3 | # Cassandra storage config YAML 
  4 | 
  5 | # NOTE:
  6 | #   See http://wiki.apache.org/cassandra/StorageConfiguration for
  7 | #   full explanations of configuration directives
  8 | # /NOTE
  9 | 
 10 | # The name of the cluster. This is mainly used to prevent machines in
 11 | # one logical cluster from joining another.
 12 | cluster_name: {{ c.cluster_name }}
 13 | 
 14 | # This defines the number of tokens randomly assigned to this node on the ring
 15 | # The more tokens, relative to other nodes, the larger the proportion of data
 16 | # that this node will store. You probably want all nodes to have the same number
 17 | # of tokens assuming they have equal hardware capability.
 18 | #
 19 | # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
 20 | # and will use the initial_token as described below.
 21 | #
 22 | # Specifying initial_token will override this setting.
 23 | #
 24 | # If you already have a cluster with 1 token per node, and wish to migrate to 
 25 | # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
 26 | num_tokens: 256
 27 | 
 28 | # initial_token allows you to specify tokens manually.  While you can use # it with
 29 | # vnodes (num_tokens > 1, above) -- in which case you should provide a 
 30 | # comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
 31 | # that do not have vnodes enabled.
 32 | # initial_token:
 33 | 
 34 | # May either be "true" or "false" to enable globally, or contain a list
 35 | # of data centers to enable per-datacenter.
 36 | # hinted_handoff_enabled: DC1,DC2
 37 | # See http://wiki.apache.org/cassandra/HintedHandoff
 38 | hinted_handoff_enabled: true
 39 | # this defines the maximum amount of time a dead host will have hints
 40 | # generated.  After it has been dead this long, new hints for it will not be
 41 | # created until it has been seen alive and gone down again.
 42 | max_hint_window_in_ms: 10800000 # 3 hours
 43 | # Maximum throttle in KBs per second, per delivery thread.  This will be
 44 | # reduced proportionally to the number of nodes in the cluster.  (If there
 45 | # are two nodes in the cluster, each delivery thread will use the maximum
 46 | # rate; if there are three, each will throttle to half of the maximum,
 47 | # since we expect two nodes to be delivering hints simultaneously.)
 48 | hinted_handoff_throttle_in_kb: 1024
 49 | # Number of threads with which to deliver hints;
 50 | # Consider increasing this number when you have multi-dc deployments, since
 51 | # cross-dc handoff tends to be slower
 52 | max_hints_delivery_threads: 2
 53 | 
 54 | # Maximum throttle in KBs per second, total. This will be
 55 | # reduced proportionally to the number of nodes in the cluster.
 56 | batchlog_replay_throttle_in_kb: 1024
 57 | 
 58 | # Authentication backend, implementing IAuthenticator; used to identify users
 59 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
 60 | # PasswordAuthenticator}.
 61 | #
 62 | # - AllowAllAuthenticator performs no checks - set it to disable authentication.
 63 | # - PasswordAuthenticator relies on username/password pairs to authenticate
 64 | #   users. It keeps usernames and hashed passwords in system_auth.credentials table.
 65 | #   Please increase system_auth keyspace replication factor if you use this authenticator.
 66 | authenticator: AllowAllAuthenticator
 67 | 
 68 | # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
 69 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
 70 | # CassandraAuthorizer}.
 71 | #
 72 | # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
 73 | # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
 74 | #   increase system_auth keyspace replication factor if you use this authorizer.
 75 | authorizer: AllowAllAuthorizer
 76 | 
 77 | # Validity period for permissions cache (fetching permissions can be an
 78 | # expensive operation depending on the authorizer, CassandraAuthorizer is
 79 | # one example). Defaults to 2000, set to 0 to disable.
 80 | # Will be disabled automatically for AllowAllAuthorizer.
 81 | permissions_validity_in_ms: 2000
 82 | 
 83 | # The partitioner is responsible for distributing groups of rows (by
 84 | # partition key) across nodes in the cluster.  You should leave this
 85 | # alone for new clusters.  The partitioner can NOT be changed without
 86 | # reloading all data, so when upgrading you should set this to the
 87 | # same partitioner you were already using.
 88 | #
 89 | # Besides Murmur3Partitioner, partitioners included for backwards
 90 | # compatibility include RandomPartitioner, ByteOrderedPartitioner, and
 91 | # OrderPreservingPartitioner.
 92 | #
 93 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 94 | 
 95 | # Directories where Cassandra should store data on disk.  Cassandra
 96 | # will spread data evenly across them, subject to the granularity of
 97 | # the configured compaction strategy.
 98 | data_file_directories:
 99 | {%- for d in c.data_file_directories %}
100 |   - {{ d }}
101 | {% endfor %}
102 | 
103 | # commit log
104 | commitlog_directory: {{ c.commitlog_directory }}
105 | 
106 | # policy for data disk failures:
107 | # stop_paranoid: shut down gossip and Thrift even for single-sstable errors.
108 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but
109 | #       can still be inspected via JMX.
110 | # best_effort: stop using the failed disk and respond to requests based on
111 | #              remaining available sstables.  This means you WILL see obsolete
112 | #              data at CL.ONE!
113 | # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
114 | disk_failure_policy: stop
115 | 
116 | # policy for commit disk failures:
117 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but
118 | #       can still be inspected via JMX.
119 | # stop_commit: shutdown the commit log, letting writes collect but 
120 | #              continuing to service reads, as in pre-2.0.5 Cassandra
121 | # ignore: ignore fatal errors and let the batches fail
122 | commit_failure_policy: stop
123 | 
124 | # Maximum size of the key cache in memory.
125 | #
126 | # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
127 | # minimum, sometimes more. The key cache is fairly tiny for the amount of
128 | # time it saves, so it's worthwhile to use it at large numbers.
129 | # The row cache saves even more time, but must contain the entire row,
130 | # so it is extremely space-intensive. It's best to only use the
131 | # row cache if you have hot rows or static rows.
132 | #
133 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
134 | #
135 | # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
136 | key_cache_size_in_mb:
137 | 
138 | # Duration in seconds after which Cassandra should
139 | # save the key cache. Caches are saved to saved_caches_directory as
140 | # specified in this configuration file.
141 | #
142 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
143 | # terms of I/O for the key cache. Row cache saving is much more expensive and
144 | # has limited use.
145 | #
146 | # Default is 14400 or 4 hours.
147 | key_cache_save_period: 14400
148 | 
149 | # Number of keys from the key cache to save
150 | # Disabled by default, meaning all keys are going to be saved
151 | # key_cache_keys_to_save: 100
152 | 
153 | # Maximum size of the row cache in memory.
154 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
155 | #
156 | # Default value is 0, to disable row caching.
157 | row_cache_size_in_mb: 0
158 | 
159 | # Duration in seconds after which Cassandra should
160 | # safe the row cache. Caches are saved to saved_caches_directory as specified
161 | # in this configuration file.
162 | #
163 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
164 | # terms of I/O for the key cache. Row cache saving is much more expensive and
165 | # has limited use.
166 | #
167 | # Default is 0 to disable saving the row cache.
168 | row_cache_save_period: 0
169 | 
170 | # Number of keys from the row cache to save
171 | # Disabled by default, meaning all keys are going to be saved
172 | # row_cache_keys_to_save: 100
173 | 
174 | # The off-heap memory allocator.  Affects storage engine metadata as
175 | # well as caches.  Experiments show that JEMAlloc saves some memory
176 | # than the native GCC allocator (i.e., JEMalloc is more
177 | # fragmentation-resistant).
178 | # 
179 | # Supported values are: NativeAllocator, JEMallocAllocator
180 | #
181 | # If you intend to use JEMallocAllocator you have to install JEMalloc as library and
182 | # modify cassandra-env.sh as directed in the file.
183 | #
184 | # Defaults to NativeAllocator
185 | # memory_allocator: NativeAllocator
186 | 
187 | # saved caches
188 | saved_caches_directory: {{ c.saved_caches_directory }}
189 | 
190 | # commitlog_sync may be either "periodic" or "batch." 
191 | # When in batch mode, Cassandra won't ack writes until the commit log
192 | # has been fsynced to disk.  It will wait up to
193 | # commitlog_sync_batch_window_in_ms milliseconds for other writes, before
194 | # performing the sync.
195 | #
196 | # commitlog_sync: batch
197 | # commitlog_sync_batch_window_in_ms: 50
198 | #
199 | # the other option is "periodic" where writes may be acked immediately
200 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms
201 | # milliseconds.  By default this allows 1024*(CPU cores) pending
202 | # entries on the commitlog queue.  If you are writing very large blobs,
203 | # you should reduce that; 16*cores works reasonably well for 1MB blobs.
204 | # It should be at least as large as the concurrent_writes setting.
205 | commitlog_sync: periodic
206 | commitlog_sync_period_in_ms: 10000
207 | # commitlog_periodic_queue_size:
208 | 
209 | # The size of the individual commitlog file segments.  A commitlog
210 | # segment may be archived, deleted, or recycled once all the data
211 | # in it (potentially from each columnfamily in the system) has been
212 | # flushed to sstables.  
213 | #
214 | # The default size is 32, which is almost always fine, but if you are
215 | # archiving commitlog segments (see commitlog_archiving.properties),
216 | # then you probably want a finer granularity of archiving; 8 or 16 MB
217 | # is reasonable.
218 | commitlog_segment_size_in_mb: 32
219 | 
220 | # any class that implements the SeedProvider interface and has a
221 | # constructor that takes a Map<String, String> of parameters will do.
222 | seed_provider:
223 |     # Addresses of hosts that are deemed contact points. 
224 |     # Cassandra nodes use this list of hosts to find each other and learn
225 |     # the topology of the ring.  You must change this if you are running
226 |     # multiple nodes!
227 |     - class_name: org.apache.cassandra.locator.SimpleSeedProvider
228 |       parameters:
229 |           # seeds is actually a comma-delimited list of addresses.
230 |           # Ex: "<ip1>,<ip2>,<ip3>"
231 |           - seeds: "{{ ','.join(c.seeds) }}"
232 | 
233 | # For workloads with more data than can fit in memory, Cassandra's
234 | # bottleneck will be reads that need to fetch data from
235 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
236 | # order to allow the operations to enqueue low enough in the stack
237 | # that the OS and drives can reorder them.
238 | #
239 | # On the other hand, since writes are almost never IO bound, the ideal
240 | # number of "concurrent_writes" is dependent on the number of cores in
241 | # your system; (8 * number_of_cores) is a good rule of thumb.
242 | concurrent_reads: 32
243 | concurrent_writes: 32
244 | 
245 | # Total memory to use for sstable-reading buffers.  Defaults to
246 | # the smaller of 1/4 of heap or 512MB.
247 | # file_cache_size_in_mb: 512
248 | 
249 | # Total memory to use for memtables.  Cassandra will flush the largest
250 | # memtable when this much memory is used.
251 | # If omitted, Cassandra will set it to 1/4 of the heap.
252 | # memtable_total_space_in_mb: 2048
253 | 
254 | # Total space to use for commitlogs.  Since commitlog segments are
255 | # mmapped, and hence use up address space, the default size is 32
256 | # on 32-bit JVMs, and 1024 on 64-bit JVMs.
257 | #
258 | # If space gets above this value (it will round up to the next nearest
259 | # segment multiple), Cassandra will flush every dirty CF in the oldest
260 | # segment and remove it.  So a small total commitlog space will tend
261 | # to cause more flush activity on less-active columnfamilies.
262 | # commitlog_total_space_in_mb: 4096
263 | 
264 | # This sets the amount of memtable flush writer threads.  These will
265 | # be blocked by disk io, and each one will hold a memtable in memory
266 | # while blocked. If you have a large heap and many data directories,
267 | # you can increase this value for better flush performance.
268 | # By default this will be set to the amount of data directories defined.
269 | #memtable_flush_writers: 1
270 | 
271 | # the number of full memtables to allow pending flush, that is,
272 | # waiting for a writer thread.  At a minimum, this should be set to
273 | # the maximum number of secondary indexes created on a single CF.
274 | memtable_flush_queue_size: 4
275 | 
276 | # Whether to, when doing sequential writing, fsync() at intervals in
277 | # order to force the operating system to flush the dirty
278 | # buffers. Enable this to avoid sudden dirty buffer flushing from
279 | # impacting read latencies. Almost always a good idea on SSDs; not
280 | # necessarily on platters.
281 | trickle_fsync: false
282 | trickle_fsync_interval_in_kb: 10240
283 | 
284 | # TCP port, for commands and data
285 | storage_port: 7000
286 | 
287 | # SSL port, for encrypted communication.  Unused unless enabled in
288 | # encryption_options
289 | ssl_storage_port: 7001
290 | 
291 | # Address to bind to and tell other Cassandra nodes to connect to. You
292 | # _must_ change this if you want multiple nodes to be able to
293 | # communicate!
294 | # 
295 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
296 | # will always do the Right Thing _if_ the node is properly configured
297 | # (hostname, name resolution, etc), and the Right Thing is to use the
298 | # address associated with the hostname (it might not be).
299 | #
300 | # Setting this to 0.0.0.0 is always wrong.
301 | listen_address: {{ c.listen_address }}
302 | 
303 | # Address to broadcast to other Cassandra nodes
304 | # Leaving this blank will set it to the same value as listen_address
305 | # broadcast_address: 1.2.3.4
306 | 
307 | # Internode authentication backend, implementing IInternodeAuthenticator;
308 | # used to allow/disallow connections from peer nodes.
309 | # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
310 | 
311 | # Whether to start the native transport server.
312 | # Please note that the address on which the native transport is bound is the
313 | # same as the rpc_address. The port however is different and specified below.
314 | start_native_transport: true
315 | # port for the CQL native transport to listen for clients on
316 | native_transport_port: 9042
317 | # The maximum threads for handling requests when the native transport is used.
318 | # This is similar to rpc_max_threads though the default differs slightly (and
319 | # there is no native_transport_min_threads, idle threads will always be stopped
320 | # after 30 seconds).
321 | # native_transport_max_threads: 128
322 | #
323 | # The maximum size of allowed frame. Frame (requests) larger than this will
324 | # be rejected as invalid. The default is 256MB.
325 | # native_transport_max_frame_size_in_mb: 256
326 | 
327 | # Whether to start the thrift rpc server.
328 | start_rpc: true
329 | 
330 | # The address to bind the Thrift RPC service and native transport
331 | # server -- clients connect here.
332 | #
333 | # Leaving this blank has the same effect it does for ListenAddress,
334 | # (i.e. it will be based on the configured hostname of the node).
335 | #
336 | # Note that unlike ListenAddress above, it is allowed to specify 0.0.0.0
337 | # here if you want to listen on all interfaces, but that will break clients 
338 | # that rely on node auto-discovery.
339 | rpc_address: {{ c.rpc_address }}
340 | # port for Thrift to listen for clients on
341 | rpc_port: 9160
342 | 
343 | # enable or disable keepalive on rpc/native connections
344 | rpc_keepalive: true
345 | 
346 | # Cassandra provides two out-of-the-box options for the RPC Server:
347 | #
348 | # sync  -> One thread per thrift connection. For a very large number of clients, memory
349 | #          will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
350 | #          per thread, and that will correspond to your use of virtual memory (but physical memory
351 | #          may be limited depending on use of stack space).
352 | #
353 | # hsha  -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
354 | #          asynchronously using a small number of threads that does not vary with the amount
355 | #          of thrift clients (and thus scales well to many clients). The rpc requests are still
356 | #          synchronous (one thread per active request).
357 | #
358 | # The default is sync because on Windows hsha is about 30% slower.  On Linux,
359 | # sync/hsha performance is about the same, with hsha of course using less memory.
360 | #
361 | # Alternatively,  can provide your own RPC server by providing the fully-qualified class name
362 | # of an o.a.c.t.TServerFactory that can create an instance of it.
363 | rpc_server_type: sync
364 | 
365 | # Uncomment rpc_min|max_thread to set request pool size limits.
366 | #
367 | # Regardless of your choice of RPC server (see above), the number of maximum requests in the
368 | # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
369 | # RPC server, it also dictates the number of clients that can be connected at all).
370 | #
371 | # The default is unlimited and thus provides no protection against clients overwhelming the server. You are
372 | # encouraged to set a maximum that makes sense for you in production, but do keep in mind that
373 | # rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
374 | #
375 | # rpc_min_threads: 16
376 | # rpc_max_threads: 2048
377 | 
378 | # uncomment to set socket buffer sizes on rpc connections
379 | # rpc_send_buff_size_in_bytes:
380 | # rpc_recv_buff_size_in_bytes:
381 | 
382 | # Uncomment to set socket buffer size for internode communication
383 | # Note that when setting this, the buffer size is limited by net.core.wmem_max
384 | # and when not setting it it is defined by net.ipv4.tcp_wmem
385 | # See:
386 | # /proc/sys/net/core/wmem_max
387 | # /proc/sys/net/core/rmem_max
388 | # /proc/sys/net/ipv4/tcp_wmem
389 | # /proc/sys/net/ipv4/tcp_wmem
390 | # and: man tcp
391 | # internode_send_buff_size_in_bytes:
392 | # internode_recv_buff_size_in_bytes:
393 | 
394 | # Frame size for thrift (maximum message length).
395 | thrift_framed_transport_size_in_mb: 15
396 | 
397 | # Set to true to have Cassandra create a hard link to each sstable
398 | # flushed or streamed locally in a backups/ subdirectory of the
399 | # keyspace data.  Removing these links is the operator's
400 | # responsibility.
401 | incremental_backups: false
402 | 
403 | # Whether or not to take a snapshot before each compaction.  Be
404 | # careful using this option, since Cassandra won't clean up the
405 | # snapshots for you.  Mostly useful if you're paranoid when there
406 | # is a data format change.
407 | snapshot_before_compaction: false
408 | 
409 | # Whether or not a snapshot is taken of the data before keyspace truncation
410 | # or dropping of column families. The STRONGLY advised default of true 
411 | # should be used to provide data safety. If you set this flag to false, you will
412 | # lose data on truncation or drop.
413 | auto_snapshot: true
414 | 
415 | # When executing a scan, within or across a partition, we need to keep the
416 | # tombstones seen in memory so we can return them to the coordinator, which
417 | # will use them to make sure other replicas also know about the deleted rows.
418 | # With workloads that generate a lot of tombstones, this can cause performance
419 | # problems and even exaust the server heap.
420 | # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
421 | # Adjust the thresholds here if you understand the dangers and want to
422 | # scan more tombstones anyway.  These thresholds may also be adjusted at runtime
423 | # using the StorageService mbean.
424 | tombstone_warn_threshold: 1000
425 | tombstone_failure_threshold: 100000
426 | 
427 | # Add column indexes to a row after its contents reach this size.
428 | # Increase if your column values are large, or if you have a very large
429 | # number of columns.  The competing causes are, Cassandra has to
430 | # deserialize this much of the row to read a single column, so you want
431 | # it to be small - at least if you do many partial-row reads - but all
432 | # the index data is read for each access, so you don't want to generate
433 | # that wastefully either.
434 | column_index_size_in_kb: 64
435 | 
436 | 
437 | # Log WARN on any batch size exceeding this value. 5kb per batch by default.
438 | # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
439 | batch_size_warn_threshold_in_kb: 5
440 | 
441 | # Size limit for rows being compacted in memory.  Larger rows will spill
442 | # over to disk and use a slower two-pass compaction process.  A message
443 | # will be logged specifying the row key.
444 | in_memory_compaction_limit_in_mb: 64
445 | 
446 | # Number of simultaneous compactions to allow, NOT including
447 | # validation "compactions" for anti-entropy repair.  Simultaneous
448 | # compactions can help preserve read performance in a mixed read/write
449 | # workload, by mitigating the tendency of small sstables to accumulate
450 | # during a single long running compactions. The default is usually
451 | # fine and if you experience problems with compaction running too
452 | # slowly or too fast, you should look at
453 | # compaction_throughput_mb_per_sec first.
454 | #
455 | # concurrent_compactors defaults to the number of cores.
456 | # Uncomment to make compaction mono-threaded, the pre-0.8 default.
457 | #concurrent_compactors: 1
458 | 
459 | # Multi-threaded compaction. When enabled, each compaction will use
460 | # up to one thread per core, plus one thread per sstable being merged.
461 | # This is usually only useful for SSD-based hardware: otherwise, 
462 | # your concern is usually to get compaction to do LESS i/o (see:
463 | # compaction_throughput_mb_per_sec), not more.
464 | multithreaded_compaction: false
465 | 
466 | # Throttles compaction to the given total throughput across the entire
467 | # system. The faster you insert data, the faster you need to compact in
468 | # order to keep the sstable count down, but in general, setting this to
469 | # 16 to 32 times the rate you are inserting data is more than sufficient.
470 | # Setting this to 0 disables throttling. Note that this account for all types
471 | # of compaction, including validation compaction.
472 | compaction_throughput_mb_per_sec: 16
473 | 
474 | # Track cached row keys during compaction, and re-cache their new
475 | # positions in the compacted sstable.  Disable if you use really large
476 | # key caches.
477 | compaction_preheat_key_cache: true
478 | 
479 | # Throttles all outbound streaming file transfers on this node to the
480 | # given total throughput in Mbps. This is necessary because Cassandra does
481 | # mostly sequential IO when streaming data during bootstrap or repair, which
482 | # can lead to saturating the network connection and degrading rpc performance.
483 | # When unset, the default is 200 Mbps or 25 MB/s.
484 | # stream_throughput_outbound_megabits_per_sec: 200
485 | 
486 | # How long the coordinator should wait for read operations to complete
487 | read_request_timeout_in_ms: 5000
488 | # How long the coordinator should wait for seq or index scans to complete
489 | range_request_timeout_in_ms: 10000
490 | # How long the coordinator should wait for writes to complete
491 | write_request_timeout_in_ms: 2000
492 | # How long a coordinator should continue to retry a CAS operation
493 | # that contends with other proposals for the same row
494 | cas_contention_timeout_in_ms: 1000
495 | # How long the coordinator should wait for truncates to complete
496 | # (This can be much longer, because unless auto_snapshot is disabled
497 | # we need to flush first so we can snapshot before removing the data.)
498 | truncate_request_timeout_in_ms: 60000
499 | # The default timeout for other, miscellaneous operations
500 | request_timeout_in_ms: 10000
501 | 
502 | # Enable operation timeout information exchange between nodes to accurately
503 | # measure request timeouts.  If disabled, replicas will assume that requests
504 | # were forwarded to them instantly by the coordinator, which means that
505 | # under overload conditions we will waste that much extra time processing 
506 | # already-timed-out requests.
507 | #
508 | # Warning: before enabling this property make sure to ntp is installed
509 | # and the times are synchronized between the nodes.
510 | cross_node_timeout: false
511 | 
512 | # Enable socket timeout for streaming operation.
513 | # When a timeout occurs during streaming, streaming is retried from the start
514 | # of the current file. This _can_ involve re-streaming an important amount of
515 | # data, so you should avoid setting the value too low.
516 | # Default value is 0, which never timeout streams.
517 | # streaming_socket_timeout_in_ms: 0
518 | 
519 | # phi value that must be reached for a host to be marked down.
520 | # most users should never need to adjust this.
521 | # phi_convict_threshold: 8
522 | 
523 | # endpoint_snitch -- Set this to a class that implements
524 | # IEndpointSnitch.  The snitch has two functions:
525 | # - it teaches Cassandra enough about your network topology to route
526 | #   requests efficiently
527 | # - it allows Cassandra to spread replicas around your cluster to avoid
528 | #   correlated failures. It does this by grouping machines into
529 | #   "datacenters" and "racks."  Cassandra will do its best not to have
530 | #   more than one replica on the same "rack" (which may not actually
531 | #   be a physical location)
532 | #
533 | # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
534 | # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
535 | # ARE PLACED.
536 | #
537 | # Out of the box, Cassandra provides
538 | #  - SimpleSnitch:
539 | #    Treats Strategy order as proximity. This can improve cache
540 | #    locality when disabling read repair.  Only appropriate for
541 | #    single-datacenter deployments.
542 | #  - GossipingPropertyFileSnitch
543 | #    This should be your go-to snitch for production use.  The rack
544 | #    and datacenter for the local node are defined in
545 | #    cassandra-rackdc.properties and propagated to other nodes via
546 | #    gossip.  If cassandra-topology.properties exists, it is used as a
547 | #    fallback, allowing migration from the PropertyFileSnitch.
548 | #  - PropertyFileSnitch:
549 | #    Proximity is determined by rack and data center, which are
550 | #    explicitly configured in cassandra-topology.properties.
551 | #  - Ec2Snitch:
552 | #    Appropriate for EC2 deployments in a single Region. Loads Region
553 | #    and Availability Zone information from the EC2 API. The Region is
554 | #    treated as the datacenter, and the Availability Zone as the rack.
555 | #    Only private IPs are used, so this will not work across multiple
556 | #    Regions.
557 | #  - Ec2MultiRegionSnitch:
558 | #    Uses public IPs as broadcast_address to allow cross-region
559 | #    connectivity.  (Thus, you should set seed addresses to the public
560 | #    IP as well.) You will need to open the storage_port or
561 | #    ssl_storage_port on the public IP firewall.  (For intra-Region
562 | #    traffic, Cassandra will switch to the private IP after
563 | #    establishing a connection.)
564 | #  - RackInferringSnitch:
565 | #    Proximity is determined by rack and data center, which are
566 | #    assumed to correspond to the 3rd and 2nd octet of each node's IP
567 | #    address, respectively.  Unless this happens to match your
568 | #    deployment conventions, this is best used as an example of
569 | #    writing a custom Snitch class and is provided in that spirit.
570 | #
571 | # You can use a custom Snitch by setting this to the full class name
572 | # of the snitch, which will be assumed to be on your classpath.
573 | endpoint_snitch: {{ c.endpoint_snitch }}
574 | 
575 | # controls how often to perform the more expensive part of host score
576 | # calculation
577 | dynamic_snitch_update_interval_in_ms: 100 
578 | # controls how often to reset all host scores, allowing a bad host to
579 | # possibly recover
580 | dynamic_snitch_reset_interval_in_ms: 600000
581 | # if set greater than zero and read_repair_chance is < 1.0, this will allow
582 | # 'pinning' of replicas to hosts in order to increase cache capacity.
583 | # The badness threshold will control how much worse the pinned host has to be
584 | # before the dynamic snitch will prefer other replicas over it.  This is
585 | # expressed as a double which represents a percentage.  Thus, a value of
586 | # 0.2 means Cassandra would continue to prefer the static snitch values
587 | # until the pinned host was 20% worse than the fastest.
588 | dynamic_snitch_badness_threshold: 0.1
589 | 
590 | # request_scheduler -- Set this to a class that implements
591 | # RequestScheduler, which will schedule incoming client requests
592 | # according to the specific policy. This is useful for multi-tenancy
593 | # with a single Cassandra cluster.
594 | # NOTE: This is specifically for requests from the client and does
595 | # not affect inter node communication.
596 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
597 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
598 | # client requests to a node with a separate queue for each
599 | # request_scheduler_id. The scheduler is further customized by
600 | # request_scheduler_options as described below.
601 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
602 | 
603 | # Scheduler Options vary based on the type of scheduler
604 | # NoScheduler - Has no options
605 | # RoundRobin
606 | #  - throttle_limit -- The throttle_limit is the number of in-flight
607 | #                      requests per client.  Requests beyond 
608 | #                      that limit are queued up until
609 | #                      running requests can complete.
610 | #                      The value of 80 here is twice the number of
611 | #                      concurrent_reads + concurrent_writes.
612 | #  - default_weight -- default_weight is optional and allows for
613 | #                      overriding the default which is 1.
614 | #  - weights -- Weights are optional and will default to 1 or the
615 | #               overridden default_weight. The weight translates into how
616 | #               many requests are handled during each turn of the
617 | #               RoundRobin, based on the scheduler id.
618 | #
619 | # request_scheduler_options:
620 | #    throttle_limit: 80
621 | #    default_weight: 5
622 | #    weights:
623 | #      Keyspace1: 1
624 | #      Keyspace2: 5
625 | 
626 | # request_scheduler_id -- An identifier based on which to perform
627 | # the request scheduling. Currently the only valid option is keyspace.
628 | # request_scheduler_id: keyspace
629 | 
630 | # Enable or disable inter-node encryption
631 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
632 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
633 | # suite for authentication, key exchange and encryption of the actual data transfers.
634 | # Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
635 | # NOTE: No custom encryption options are enabled at the moment
636 | # The available internode options are : all, none, dc, rack
637 | #
638 | # If set to dc cassandra will encrypt the traffic between the DCs
639 | # If set to rack cassandra will encrypt the traffic between the racks
640 | #
641 | # The passwords used in these options must match the passwords used when generating
642 | # the keystore and truststore.  For instructions on generating these files, see:
643 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
644 | #
645 | server_encryption_options:
646 |     internode_encryption: none
647 |     keystore: conf/.keystore
648 |     keystore_password: cassandra
649 |     truststore: conf/.truststore
650 |     truststore_password: cassandra
651 |     # More advanced defaults below:
652 |     # protocol: TLS
653 |     # algorithm: SunX509
654 |     # store_type: JKS
655 |     # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
656 |     # require_client_auth: false
657 | 
658 | # enable or disable client/server encryption.
659 | client_encryption_options:
660 |     enabled: false
661 |     keystore: conf/.keystore
662 |     keystore_password: cassandra
663 |     # require_client_auth: false
664 |     # Set trustore and truststore_password if require_client_auth is true
665 |     # truststore: conf/.truststore
666 |     # truststore_password: cassandra
667 |     # More advanced defaults below:
668 |     # protocol: TLS
669 |     # algorithm: SunX509
670 |     # store_type: JKS
671 |     # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
672 | 
673 | # internode_compression controls whether traffic between nodes is
674 | # compressed.
675 | # can be:  all  - all traffic is compressed
676 | #          dc   - traffic between different datacenters is compressed
677 | #          none - nothing is compressed.
678 | internode_compression: all
679 | 
680 | # Enable or disable tcp_nodelay for inter-dc communication.
681 | # Disabling it will result in larger (but fewer) network packets being sent,
682 | # reducing overhead from the TCP protocol itself, at the cost of increasing
683 | # latency if you block for cross-datacenter responses.
684 | inter_dc_tcp_nodelay: false
685 | 
686 | # Enable or disable kernel page cache preheating from contents of the key cache after compaction.
687 | # When enabled it would preheat only first "page" (4KB) of each row to optimize
688 | # for sequential access. Note: This could be harmful for fat rows, see CASSANDRA-4937
689 | # for further details on that topic.
690 | preheat_kernel_page_cache: false
691 | 


--------------------------------------------------------------------------------
/cassandra/conf/cassandra_21x.yaml:
--------------------------------------------------------------------------------
  1 | {%- from 'cassandra/settings.sls' import config as c with context %}
  2 | # Cassandra storage config YAML 
  3 | 
  4 | # NOTE:
  5 | #   See http://wiki.apache.org/cassandra/StorageConfiguration for
  6 | #   full explanations of configuration directives
  7 | # /NOTE
  8 | 
  9 | # The name of the cluster. This is mainly used to prevent machines in
 10 | # one logical cluster from joining another.
 11 | cluster_name: {{ c.cluster_name }}
 12 | 
 13 | # This defines the number of tokens randomly assigned to this node on the ring
 14 | # The more tokens, relative to other nodes, the larger the proportion of data
 15 | # that this node will store. You probably want all nodes to have the same number
 16 | # of tokens assuming they have equal hardware capability.
 17 | #
 18 | # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
 19 | # and will use the initial_token as described below.
 20 | #
 21 | # Specifying initial_token will override this setting on the node's initial start,
 22 | # on subsequent starts, this setting will apply even if initial token is set.
 23 | #
 24 | # If you already have a cluster with 1 token per node, and wish to migrate to 
 25 | # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
 26 | num_tokens: 256
 27 | 
 28 | # initial_token allows you to specify tokens manually.  While you can use # it with
 29 | # vnodes (num_tokens > 1, above) -- in which case you should provide a 
 30 | # comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
 31 | # that do not have vnodes enabled.
 32 | # initial_token:
 33 | 
 34 | # See http://wiki.apache.org/cassandra/HintedHandoff
 35 | # May either be "true" or "false" to enable globally, or contain a list
 36 | # of data centers to enable per-datacenter.
 37 | # hinted_handoff_enabled: DC1,DC2
 38 | hinted_handoff_enabled: true
 39 | # this defines the maximum amount of time a dead host will have hints
 40 | # generated.  After it has been dead this long, new hints for it will not be
 41 | # created until it has been seen alive and gone down again.
 42 | max_hint_window_in_ms: 10800000 # 3 hours
 43 | # Maximum throttle in KBs per second, per delivery thread.  This will be
 44 | # reduced proportionally to the number of nodes in the cluster.  (If there
 45 | # are two nodes in the cluster, each delivery thread will use the maximum
 46 | # rate; if there are three, each will throttle to half of the maximum,
 47 | # since we expect two nodes to be delivering hints simultaneously.)
 48 | hinted_handoff_throttle_in_kb: 1024
 49 | # Number of threads with which to deliver hints;
 50 | # Consider increasing this number when you have multi-dc deployments, since
 51 | # cross-dc handoff tends to be slower
 52 | max_hints_delivery_threads: 2
 53 | 
 54 | # Maximum throttle in KBs per second, total. This will be
 55 | # reduced proportionally to the number of nodes in the cluster.
 56 | batchlog_replay_throttle_in_kb: 1024
 57 | 
 58 | # Authentication backend, implementing IAuthenticator; used to identify users
 59 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
 60 | # PasswordAuthenticator}.
 61 | #
 62 | # - AllowAllAuthenticator performs no checks - set it to disable authentication.
 63 | # - PasswordAuthenticator relies on username/password pairs to authenticate
 64 | #   users. It keeps usernames and hashed passwords in system_auth.credentials table.
 65 | #   Please increase system_auth keyspace replication factor if you use this authenticator.
 66 | authenticator: AllowAllAuthenticator
 67 | 
 68 | # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
 69 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
 70 | # CassandraAuthorizer}.
 71 | #
 72 | # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
 73 | # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
 74 | #   increase system_auth keyspace replication factor if you use this authorizer.
 75 | authorizer: AllowAllAuthorizer
 76 | 
 77 | # Validity period for permissions cache (fetching permissions can be an
 78 | # expensive operation depending on the authorizer, CassandraAuthorizer is
 79 | # one example). Defaults to 2000, set to 0 to disable.
 80 | # Will be disabled automatically for AllowAllAuthorizer.
 81 | permissions_validity_in_ms: 2000
 82 | 
 83 | # The partitioner is responsible for distributing groups of rows (by
 84 | # partition key) across nodes in the cluster.  You should leave this
 85 | # alone for new clusters.  The partitioner can NOT be changed without
 86 | # reloading all data, so when upgrading you should set this to the
 87 | # same partitioner you were already using.
 88 | #
 89 | # Besides Murmur3Partitioner, partitioners included for backwards
 90 | # compatibility include RandomPartitioner, ByteOrderedPartitioner, and
 91 | # OrderPreservingPartitioner.
 92 | #
 93 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 94 | 
 95 | # Directories where Cassandra should store data on disk.  Cassandra
 96 | # will spread data evenly across them, subject to the granularity of
 97 | # the configured compaction strategy.
 98 | # If not set, the default directory is $CASSANDRA_HOME/data/data.
 99 | data_file_directories:
100 | {%- for d in c.data_file_directories %}
101 |   - {{ d }}
102 | {%- endfor %}
103 | 
104 | # commit log.  when running on magnetic HDD, this should be a
105 | # separate spindle than the data directories.
106 | # If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
107 | commitlog_directory: {{ c.commitlog_directory }}
108 | 
109 | # policy for data disk failures:
110 | # stop_paranoid: shut down gossip and Thrift even for single-sstable errors.
111 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but
112 | #       can still be inspected via JMX.
113 | # best_effort: stop using the failed disk and respond to requests based on
114 | #              remaining available sstables.  This means you WILL see obsolete
115 | #              data at CL.ONE!
116 | # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
117 | disk_failure_policy: stop
118 | 
119 | # policy for commit disk failures:
120 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but
121 | #       can still be inspected via JMX.
122 | # stop_commit: shutdown the commit log, letting writes collect but 
123 | #              continuing to service reads, as in pre-2.0.5 Cassandra
124 | # ignore: ignore fatal errors and let the batches fail
125 | commit_failure_policy: stop
126 | 
127 | # Maximum size of the key cache in memory.
128 | #
129 | # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
130 | # minimum, sometimes more. The key cache is fairly tiny for the amount of
131 | # time it saves, so it's worthwhile to use it at large numbers.
132 | # The row cache saves even more time, but must contain the entire row,
133 | # so it is extremely space-intensive. It's best to only use the
134 | # row cache if you have hot rows or static rows.
135 | #
136 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
137 | #
138 | # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
139 | key_cache_size_in_mb:
140 | 
141 | # Duration in seconds after which Cassandra should
142 | # save the key cache. Caches are saved to saved_caches_directory as
143 | # specified in this configuration file.
144 | #
145 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
146 | # terms of I/O for the key cache. Row cache saving is much more expensive and
147 | # has limited use.
148 | #
149 | # Default is 14400 or 4 hours.
150 | key_cache_save_period: 14400
151 | 
152 | # Number of keys from the key cache to save
153 | # Disabled by default, meaning all keys are going to be saved
154 | # key_cache_keys_to_save: 100
155 | 
156 | # Maximum size of the row cache in memory.
157 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
158 | #
159 | # Default value is 0, to disable row caching.
160 | row_cache_size_in_mb: 0
161 | 
162 | # Duration in seconds after which Cassandra should
163 | # save the row cache. Caches are saved to saved_caches_directory as specified
164 | # in this configuration file.
165 | #
166 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in
167 | # terms of I/O for the key cache. Row cache saving is much more expensive and
168 | # has limited use.
169 | #
170 | # Default is 0 to disable saving the row cache.
171 | row_cache_save_period: 0
172 | 
173 | # Number of keys from the row cache to save
174 | # Disabled by default, meaning all keys are going to be saved
175 | # row_cache_keys_to_save: 100
176 | 
177 | # Maximum size of the counter cache in memory.
178 | #
179 | # Counter cache helps to reduce counter locks' contention for hot counter cells.
180 | # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
181 | # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
182 | # of the lock hold, helping with hot counter cell updates, but will not allow skipping
183 | # the read entirely. Only the local (clock, count) tuple of a counter cell is kept
184 | # in memory, not the whole counter, so it's relatively cheap.
185 | #
186 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
187 | #
188 | # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
189 | # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
190 | counter_cache_size_in_mb:
191 | 
192 | # Duration in seconds after which Cassandra should
193 | # save the counter cache (keys only). Caches are saved to saved_caches_directory as
194 | # specified in this configuration file.
195 | #
196 | # Default is 7200 or 2 hours.
197 | counter_cache_save_period: 7200
198 | 
199 | # Number of keys from the counter cache to save
200 | # Disabled by default, meaning all keys are going to be saved
201 | # counter_cache_keys_to_save: 100
202 | 
203 | # The off-heap memory allocator.  Affects storage engine metadata as
204 | # well as caches.  Experiments show that JEMAlloc saves some memory
205 | # than the native GCC allocator (i.e., JEMalloc is more
206 | # fragmentation-resistant).
207 | # 
208 | # Supported values are: NativeAllocator, JEMallocAllocator
209 | #
210 | # If you intend to use JEMallocAllocator you have to install JEMalloc as library and
211 | # modify cassandra-env.sh as directed in the file.
212 | #
213 | # Defaults to NativeAllocator
214 | # memory_allocator: NativeAllocator
215 | 
216 | # saved caches
217 | # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
218 | saved_caches_directory: {{ c.saved_caches_directory }}
219 | 
220 | # commitlog_sync may be either "periodic" or "batch." 
221 | # When in batch mode, Cassandra won't ack writes until the commit log
222 | # has been fsynced to disk.  It will wait up to
223 | # commitlog_sync_batch_window_in_ms milliseconds for other writes, before
224 | # performing the sync.
225 | #
226 | # commitlog_sync: batch
227 | # commitlog_sync_batch_window_in_ms: 50
228 | #
229 | # the other option is "periodic" where writes may be acked immediately
230 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms
231 | # milliseconds.  commitlog_periodic_queue_size allows 1024*(CPU cores) pending
232 | # entries on the commitlog queue by default.  If you are writing very large
233 | # blobs, you should reduce that; 16*cores works reasonably well for 1MB blobs.
234 | # It should be at least as large as the concurrent_writes setting.
235 | commitlog_sync: periodic
236 | commitlog_sync_period_in_ms: 10000
237 | # commitlog_periodic_queue_size:
238 | 
239 | # The size of the individual commitlog file segments.  A commitlog
240 | # segment may be archived, deleted, or recycled once all the data
241 | # in it (potentially from each columnfamily in the system) has been
242 | # flushed to sstables.  
243 | #
244 | # The default size is 32, which is almost always fine, but if you are
245 | # archiving commitlog segments (see commitlog_archiving.properties),
246 | # then you probably want a finer granularity of archiving; 8 or 16 MB
247 | # is reasonable.
248 | commitlog_segment_size_in_mb: 32
249 | 
250 | # any class that implements the SeedProvider interface and has a
251 | # constructor that takes a Map<String, String> of parameters will do.
252 | seed_provider:
253 |     # Addresses of hosts that are deemed contact points. 
254 |     # Cassandra nodes use this list of hosts to find each other and learn
255 |     # the topology of the ring.  You must change this if you are running
256 |     # multiple nodes!
257 |     - class_name: org.apache.cassandra.locator.SimpleSeedProvider
258 |       parameters:
259 |           # seeds is actually a comma-delimited list of addresses.
260 |           # Ex: "<ip1>,<ip2>,<ip3>"
261 |           - seeds: "{{ ','.join(c.seeds) }}"
262 | 
263 | # For workloads with more data than can fit in memory, Cassandra's
264 | # bottleneck will be reads that need to fetch data from
265 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
266 | # order to allow the operations to enqueue low enough in the stack
267 | # that the OS and drives can reorder them. Same applies to
268 | # "concurrent_counter_writes", since counter writes read the current
269 | # values before incrementing and writing them back.
270 | #
271 | # On the other hand, since writes are almost never IO bound, the ideal
272 | # number of "concurrent_writes" is dependent on the number of cores in
273 | # your system; (8 * number_of_cores) is a good rule of thumb.
274 | concurrent_reads: 32
275 | concurrent_writes: 32
276 | concurrent_counter_writes: 32
277 | 
278 | # Total memory to use for sstable-reading buffers.  Defaults to
279 | # the smaller of 1/4 of heap or 512MB.
280 | # file_cache_size_in_mb: 512
281 | 
282 | # Total permitted memory to use for memtables. Cassandra will stop 
283 | # accepting writes when the limit is exceeded until a flush completes,
284 | # and will trigger a flush based on memtable_cleanup_threshold
285 | # If omitted, Cassandra will set both to 1/4 the size of the heap.
286 | # memtable_heap_space_in_mb: 2048
287 | # memtable_offheap_space_in_mb: 2048
288 | 
289 | # Ratio of occupied non-flushing memtable size to total permitted size
290 | # that will trigger a flush of the largest memtable.  Lager mct will
291 | # mean larger flushes and hence less compaction, but also less concurrent
292 | # flush activity which can make it difficult to keep your disks fed
293 | # under heavy write load.
294 | #
295 | # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
296 | # memtable_cleanup_threshold: 0.11
297 | 
298 | # Specify the way Cassandra allocates and manages memtable memory.
299 | # Options are:
300 | #   heap_buffers:    on heap nio buffers
301 | #   offheap_buffers: off heap (direct) nio buffers
302 | #   offheap_objects: native memory, eliminating nio buffer heap overhead
303 | memtable_allocation_type: heap_buffers
304 | 
305 | # Total space to use for commitlogs.  Since commitlog segments are
306 | # mmapped, and hence use up address space, the default size is 32
307 | # on 32-bit JVMs, and 8192 on 64-bit JVMs.
308 | #
309 | # If space gets above this value (it will round up to the next nearest
310 | # segment multiple), Cassandra will flush every dirty CF in the oldest
311 | # segment and remove it.  So a small total commitlog space will tend
312 | # to cause more flush activity on less-active columnfamilies.
313 | # commitlog_total_space_in_mb: 8192
314 | 
315 | # This sets the amount of memtable flush writer threads.  These will
316 | # be blocked by disk io, and each one will hold a memtable in memory
317 | # while blocked. 
318 | #
319 | # memtable_flush_writers defaults to the smaller of (number of disks,
320 | # number of cores), with a minimum of 2 and a maximum of 8.
321 | # 
322 | # If your data directories are backed by SSD, you should increase this
323 | # to the number of cores.
324 | #memtable_flush_writers: 8
325 | 
326 | # A fixed memory pool size in MB for for SSTable index summaries. If left
327 | # empty, this will default to 5% of the heap size. If the memory usage of
328 | # all index summaries exceeds this limit, SSTables with low read rates will
329 | # shrink their index summaries in order to meet this limit.  However, this
330 | # is a best-effort process. In extreme conditions Cassandra may need to use
331 | # more than this amount of memory.
332 | index_summary_capacity_in_mb:
333 | 
334 | # How frequently index summaries should be resampled.  This is done
335 | # periodically to redistribute memory from the fixed-size pool to sstables
336 | # proportional their recent read rates.  Setting to -1 will disable this
337 | # process, leaving existing index summaries at their current sampling level.
338 | index_summary_resize_interval_in_minutes: 60
339 | 
340 | # Whether to, when doing sequential writing, fsync() at intervals in
341 | # order to force the operating system to flush the dirty
342 | # buffers. Enable this to avoid sudden dirty buffer flushing from
343 | # impacting read latencies. Almost always a good idea on SSDs; not
344 | # necessarily on platters.
345 | trickle_fsync: false
346 | trickle_fsync_interval_in_kb: 10240
347 | 
348 | # TCP port, for commands and data
349 | storage_port: 7000
350 | 
351 | # SSL port, for encrypted communication.  Unused unless enabled in
352 | # encryption_options
353 | ssl_storage_port: 7001
354 | 
355 | # Address or interface to bind to and tell other Cassandra nodes to connect to.
356 | # You _must_ change this if you want multiple nodes to be able to communicate!
357 | #
358 | # Set listen_address OR listen_interface, not both. Interfaces must correspond
359 | # to a single address, IP aliasing is not supported.
360 | #
361 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
362 | # will always do the Right Thing _if_ the node is properly configured
363 | # (hostname, name resolution, etc), and the Right Thing is to use the
364 | # address associated with the hostname (it might not be).
365 | #
366 | # Setting listen_address to 0.0.0.0 is always wrong.
367 | listen_address: {{ c.listen_address }}
368 | # listen_interface: eth0
369 | 
370 | # Address to broadcast to other Cassandra nodes
371 | # Leaving this blank will set it to the same value as listen_address
372 | # broadcast_address: 1.2.3.4
373 | 
374 | # Internode authentication backend, implementing IInternodeAuthenticator;
375 | # used to allow/disallow connections from peer nodes.
376 | # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
377 | 
378 | # Whether to start the native transport server.
379 | # Please note that the address on which the native transport is bound is the
380 | # same as the rpc_address. The port however is different and specified below.
381 | start_native_transport: true
382 | # port for the CQL native transport to listen for clients on
383 | native_transport_port: 9042
384 | # The maximum threads for handling requests when the native transport is used.
385 | # This is similar to rpc_max_threads though the default differs slightly (and
386 | # there is no native_transport_min_threads, idle threads will always be stopped
387 | # after 30 seconds).
388 | # native_transport_max_threads: 128
389 | #
390 | # The maximum size of allowed frame. Frame (requests) larger than this will
391 | # be rejected as invalid. The default is 256MB.
392 | # native_transport_max_frame_size_in_mb: 256
393 | 
394 | # Whether to start the thrift rpc server.
395 | start_rpc: true
396 | 
397 | # The address or interface to bind the Thrift RPC service and native transport
398 | # server to.
399 | #
400 | # Set rpc_address OR rpc_interface, not both. Interfaces must correspond
401 | # to a single address, IP aliasing is not supported.
402 | #
403 | # Leaving rpc_address blank has the same effect as on listen_address
404 | # (i.e. it will be based on the configured hostname of the node).
405 | #
406 | # Note that unlike listen_address, you can specify 0.0.0.0, but you must also
407 | # set broadcast_rpc_address to a value other than 0.0.0.0.
408 | rpc_address: {{ c.rpc_address }}
409 | # rpc_interface: eth1
410 | 
411 | # port for Thrift to listen for clients on
412 | rpc_port: 9160
413 | 
414 | # RPC address to broadcast to drivers and other Cassandra nodes. This cannot
415 | # be set to 0.0.0.0. If left blank, this will be set to the value of
416 | # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
417 | # be set.
418 | # broadcast_rpc_address: 1.2.3.4
419 | 
420 | # enable or disable keepalive on rpc/native connections
421 | rpc_keepalive: true
422 | 
423 | # Cassandra provides two out-of-the-box options for the RPC Server:
424 | #
425 | # sync  -> One thread per thrift connection. For a very large number of clients, memory
426 | #          will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
427 | #          per thread, and that will correspond to your use of virtual memory (but physical memory
428 | #          may be limited depending on use of stack space).
429 | #
430 | # hsha  -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
431 | #          asynchronously using a small number of threads that does not vary with the amount
432 | #          of thrift clients (and thus scales well to many clients). The rpc requests are still
433 | #          synchronous (one thread per active request). If hsha is selected then it is essential
434 | #          that rpc_max_threads is changed from the default value of unlimited.
435 | #
436 | # The default is sync because on Windows hsha is about 30% slower.  On Linux,
437 | # sync/hsha performance is about the same, with hsha of course using less memory.
438 | #
439 | # Alternatively,  can provide your own RPC server by providing the fully-qualified class name
440 | # of an o.a.c.t.TServerFactory that can create an instance of it.
441 | rpc_server_type: sync
442 | 
443 | # Uncomment rpc_min|max_thread to set request pool size limits.
444 | #
445 | # Regardless of your choice of RPC server (see above), the number of maximum requests in the
446 | # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
447 | # RPC server, it also dictates the number of clients that can be connected at all).
448 | #
449 | # The default is unlimited and thus provides no protection against clients overwhelming the server. You are
450 | # encouraged to set a maximum that makes sense for you in production, but do keep in mind that
451 | # rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
452 | #
453 | # rpc_min_threads: 16
454 | # rpc_max_threads: 2048
455 | 
456 | # uncomment to set socket buffer sizes on rpc connections
457 | # rpc_send_buff_size_in_bytes:
458 | # rpc_recv_buff_size_in_bytes:
459 | 
460 | # Uncomment to set socket buffer size for internode communication
461 | # Note that when setting this, the buffer size is limited by net.core.wmem_max
462 | # and when not setting it it is defined by net.ipv4.tcp_wmem
463 | # See:
464 | # /proc/sys/net/core/wmem_max
465 | # /proc/sys/net/core/rmem_max
466 | # /proc/sys/net/ipv4/tcp_wmem
467 | # /proc/sys/net/ipv4/tcp_wmem
468 | # and: man tcp
469 | # internode_send_buff_size_in_bytes:
470 | # internode_recv_buff_size_in_bytes:
471 | 
472 | # Frame size for thrift (maximum message length).
473 | thrift_framed_transport_size_in_mb: 15
474 | 
475 | # Set to true to have Cassandra create a hard link to each sstable
476 | # flushed or streamed locally in a backups/ subdirectory of the
477 | # keyspace data.  Removing these links is the operator's
478 | # responsibility.
479 | incremental_backups: false
480 | 
481 | # Whether or not to take a snapshot before each compaction.  Be
482 | # careful using this option, since Cassandra won't clean up the
483 | # snapshots for you.  Mostly useful if you're paranoid when there
484 | # is a data format change.
485 | snapshot_before_compaction: false
486 | 
487 | # Whether or not a snapshot is taken of the data before keyspace truncation
488 | # or dropping of column families. The STRONGLY advised default of true 
489 | # should be used to provide data safety. If you set this flag to false, you will
490 | # lose data on truncation or drop.
491 | auto_snapshot: true
492 | 
493 | # When executing a scan, within or across a partition, we need to keep the
494 | # tombstones seen in memory so we can return them to the coordinator, which
495 | # will use them to make sure other replicas also know about the deleted rows.
496 | # With workloads that generate a lot of tombstones, this can cause performance
497 | # problems and even exaust the server heap.
498 | # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
499 | # Adjust the thresholds here if you understand the dangers and want to
500 | # scan more tombstones anyway.  These thresholds may also be adjusted at runtime
501 | # using the StorageService mbean.
502 | tombstone_warn_threshold: 1000
503 | tombstone_failure_threshold: 100000
504 | 
505 | # Granularity of the collation index of rows within a partition.
506 | # Increase if your rows are large, or if you have a very large
507 | # number of rows per partition.  The competing goals are these:
508 | #   1) a smaller granularity means more index entries are generated
509 | #      and looking up rows withing the partition by collation column
510 | #      is faster
511 | #   2) but, Cassandra will keep the collation index in memory for hot
512 | #      rows (as part of the key cache), so a larger granularity means
513 | #      you can cache more hot rows
514 | column_index_size_in_kb: 64
515 | 
516 | 
517 | # Log WARN on any batch size exceeding this value. 5kb per batch by default.
518 | # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
519 | batch_size_warn_threshold_in_kb: 5
520 | 
521 | # Number of simultaneous compactions to allow, NOT including
522 | # validation "compactions" for anti-entropy repair.  Simultaneous
523 | # compactions can help preserve read performance in a mixed read/write
524 | # workload, by mitigating the tendency of small sstables to accumulate
525 | # during a single long running compactions. The default is usually
526 | # fine and if you experience problems with compaction running too
527 | # slowly or too fast, you should look at
528 | # compaction_throughput_mb_per_sec first.
529 | #
530 | # concurrent_compactors defaults to the smaller of (number of disks,
531 | # number of cores), with a minimum of 2 and a maximum of 8.
532 | # 
533 | # If your data directories are backed by SSD, you should increase this
534 | # to the number of cores.
535 | #concurrent_compactors: 1
536 | 
537 | # Throttles compaction to the given total throughput across the entire
538 | # system. The faster you insert data, the faster you need to compact in
539 | # order to keep the sstable count down, but in general, setting this to
540 | # 16 to 32 times the rate you are inserting data is more than sufficient.
541 | # Setting this to 0 disables throttling. Note that this account for all types
542 | # of compaction, including validation compaction.
543 | compaction_throughput_mb_per_sec: 16
544 | 
545 | # When compacting, the replacement sstable(s) can be opened before they
546 | # are completely written, and used in place of the prior sstables for
547 | # any range that has been written. This helps to smoothly transfer reads 
548 | # between the sstables, reducing page cache churn and keeping hot rows hot
549 | sstable_preemptive_open_interval_in_mb: 50
550 | 
551 | # Throttles all outbound streaming file transfers on this node to the
552 | # given total throughput in Mbps. This is necessary because Cassandra does
553 | # mostly sequential IO when streaming data during bootstrap or repair, which
554 | # can lead to saturating the network connection and degrading rpc performance.
555 | # When unset, the default is 200 Mbps or 25 MB/s.
556 | # stream_throughput_outbound_megabits_per_sec: 200
557 | 
558 | # Throttles all streaming file transfer between the datacenters,
559 | # this setting allows users to throttle inter dc stream throughput in addition
560 | # to throttling all network stream traffic as configured with
561 | # stream_throughput_outbound_megabits_per_sec
562 | # inter_dc_stream_throughput_outbound_megabits_per_sec:
563 | 
564 | # How long the coordinator should wait for read operations to complete
565 | read_request_timeout_in_ms: 5000
566 | # How long the coordinator should wait for seq or index scans to complete
567 | range_request_timeout_in_ms: 10000
568 | # How long the coordinator should wait for writes to complete
569 | write_request_timeout_in_ms: 2000
570 | # How long the coordinator should wait for counter writes to complete
571 | counter_write_request_timeout_in_ms: 5000
572 | # How long a coordinator should continue to retry a CAS operation
573 | # that contends with other proposals for the same row
574 | cas_contention_timeout_in_ms: 1000
575 | # How long the coordinator should wait for truncates to complete
576 | # (This can be much longer, because unless auto_snapshot is disabled
577 | # we need to flush first so we can snapshot before removing the data.)
578 | truncate_request_timeout_in_ms: 60000
579 | # The default timeout for other, miscellaneous operations
580 | request_timeout_in_ms: 10000
581 | 
582 | # Enable operation timeout information exchange between nodes to accurately
583 | # measure request timeouts.  If disabled, replicas will assume that requests
584 | # were forwarded to them instantly by the coordinator, which means that
585 | # under overload conditions we will waste that much extra time processing 
586 | # already-timed-out requests.
587 | #
588 | # Warning: before enabling this property make sure to ntp is installed
589 | # and the times are synchronized between the nodes.
590 | cross_node_timeout: false
591 | 
592 | # Enable socket timeout for streaming operation.
593 | # When a timeout occurs during streaming, streaming is retried from the start
594 | # of the current file. This _can_ involve re-streaming an important amount of
595 | # data, so you should avoid setting the value too low.
596 | # Default value is 0, which never timeout streams.
597 | # streaming_socket_timeout_in_ms: 0
598 | 
599 | # phi value that must be reached for a host to be marked down.
600 | # most users should never need to adjust this.
601 | # phi_convict_threshold: 8
602 | 
603 | # endpoint_snitch -- Set this to a class that implements
604 | # IEndpointSnitch.  The snitch has two functions:
605 | # - it teaches Cassandra enough about your network topology to route
606 | #   requests efficiently
607 | # - it allows Cassandra to spread replicas around your cluster to avoid
608 | #   correlated failures. It does this by grouping machines into
609 | #   "datacenters" and "racks."  Cassandra will do its best not to have
610 | #   more than one replica on the same "rack" (which may not actually
611 | #   be a physical location)
612 | #
613 | # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
614 | # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
615 | # ARE PLACED.
616 | #
617 | # Out of the box, Cassandra provides
618 | #  - SimpleSnitch:
619 | #    Treats Strategy order as proximity. This can improve cache
620 | #    locality when disabling read repair.  Only appropriate for
621 | #    single-datacenter deployments.
622 | #  - GossipingPropertyFileSnitch
623 | #    This should be your go-to snitch for production use.  The rack
624 | #    and datacenter for the local node are defined in
625 | #    cassandra-rackdc.properties and propagated to other nodes via
626 | #    gossip.  If cassandra-topology.properties exists, it is used as a
627 | #    fallback, allowing migration from the PropertyFileSnitch.
628 | #  - PropertyFileSnitch:
629 | #    Proximity is determined by rack and data center, which are
630 | #    explicitly configured in cassandra-topology.properties.
631 | #  - Ec2Snitch:
632 | #    Appropriate for EC2 deployments in a single Region. Loads Region
633 | #    and Availability Zone information from the EC2 API. The Region is
634 | #    treated as the datacenter, and the Availability Zone as the rack.
635 | #    Only private IPs are used, so this will not work across multiple
636 | #    Regions.
637 | #  - Ec2MultiRegionSnitch:
638 | #    Uses public IPs as broadcast_address to allow cross-region
639 | #    connectivity.  (Thus, you should set seed addresses to the public
640 | #    IP as well.) You will need to open the storage_port or
641 | #    ssl_storage_port on the public IP firewall.  (For intra-Region
642 | #    traffic, Cassandra will switch to the private IP after
643 | #    establishing a connection.)
644 | #  - RackInferringSnitch:
645 | #    Proximity is determined by rack and data center, which are
646 | #    assumed to correspond to the 3rd and 2nd octet of each node's IP
647 | #    address, respectively.  Unless this happens to match your
648 | #    deployment conventions, this is best used as an example of
649 | #    writing a custom Snitch class and is provided in that spirit.
650 | #
651 | # You can use a custom Snitch by setting this to the full class name
652 | # of the snitch, which will be assumed to be on your classpath.
653 | endpoint_snitch: {{ c.endpoint_snitch }}
654 | 
655 | # controls how often to perform the more expensive part of host score
656 | # calculation
657 | dynamic_snitch_update_interval_in_ms: 100 
658 | # controls how often to reset all host scores, allowing a bad host to
659 | # possibly recover
660 | dynamic_snitch_reset_interval_in_ms: 600000
661 | # if set greater than zero and read_repair_chance is < 1.0, this will allow
662 | # 'pinning' of replicas to hosts in order to increase cache capacity.
663 | # The badness threshold will control how much worse the pinned host has to be
664 | # before the dynamic snitch will prefer other replicas over it.  This is
665 | # expressed as a double which represents a percentage.  Thus, a value of
666 | # 0.2 means Cassandra would continue to prefer the static snitch values
667 | # until the pinned host was 20% worse than the fastest.
668 | dynamic_snitch_badness_threshold: 0.1
669 | 
670 | # request_scheduler -- Set this to a class that implements
671 | # RequestScheduler, which will schedule incoming client requests
672 | # according to the specific policy. This is useful for multi-tenancy
673 | # with a single Cassandra cluster.
674 | # NOTE: This is specifically for requests from the client and does
675 | # not affect inter node communication.
676 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
677 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
678 | # client requests to a node with a separate queue for each
679 | # request_scheduler_id. The scheduler is further customized by
680 | # request_scheduler_options as described below.
681 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
682 | 
683 | # Scheduler Options vary based on the type of scheduler
684 | # NoScheduler - Has no options
685 | # RoundRobin
686 | #  - throttle_limit -- The throttle_limit is the number of in-flight
687 | #                      requests per client.  Requests beyond 
688 | #                      that limit are queued up until
689 | #                      running requests can complete.
690 | #                      The value of 80 here is twice the number of
691 | #                      concurrent_reads + concurrent_writes.
692 | #  - default_weight -- default_weight is optional and allows for
693 | #                      overriding the default which is 1.
694 | #  - weights -- Weights are optional and will default to 1 or the
695 | #               overridden default_weight. The weight translates into how
696 | #               many requests are handled during each turn of the
697 | #               RoundRobin, based on the scheduler id.
698 | #
699 | # request_scheduler_options:
700 | #    throttle_limit: 80
701 | #    default_weight: 5
702 | #    weights:
703 | #      Keyspace1: 1
704 | #      Keyspace2: 5
705 | 
706 | # request_scheduler_id -- An identifier based on which to perform
707 | # the request scheduling. Currently the only valid option is keyspace.
708 | # request_scheduler_id: keyspace
709 | 
710 | # Enable or disable inter-node encryption
711 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
712 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
713 | # suite for authentication, key exchange and encryption of the actual data transfers.
714 | # Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
715 | # NOTE: No custom encryption options are enabled at the moment
716 | # The available internode options are : all, none, dc, rack
717 | #
718 | # If set to dc cassandra will encrypt the traffic between the DCs
719 | # If set to rack cassandra will encrypt the traffic between the racks
720 | #
721 | # The passwords used in these options must match the passwords used when generating
722 | # the keystore and truststore.  For instructions on generating these files, see:
723 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
724 | #
725 | server_encryption_options:
726 |     internode_encryption: none
727 |     keystore: conf/.keystore
728 |     keystore_password: cassandra
729 |     truststore: conf/.truststore
730 |     truststore_password: cassandra
731 |     # More advanced defaults below:
732 |     # protocol: TLS
733 |     # algorithm: SunX509
734 |     # store_type: JKS
735 |     # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
736 |     # require_client_auth: false
737 | 
738 | # enable or disable client/server encryption.
739 | client_encryption_options:
740 |     enabled: false
741 |     keystore: conf/.keystore
742 |     keystore_password: cassandra
743 |     # require_client_auth: false
744 |     # Set trustore and truststore_password if require_client_auth is true
745 |     # truststore: conf/.truststore
746 |     # truststore_password: cassandra
747 |     # More advanced defaults below:
748 |     # protocol: TLS
749 |     # algorithm: SunX509
750 |     # store_type: JKS
751 |     # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
752 | 
753 | # internode_compression controls whether traffic between nodes is
754 | # compressed.
755 | # can be:  all  - all traffic is compressed
756 | #          dc   - traffic between different datacenters is compressed
757 | #          none - nothing is compressed.
758 | internode_compression: all
759 | 
760 | # Enable or disable tcp_nodelay for inter-dc communication.
761 | # Disabling it will result in larger (but fewer) network packets being sent,
762 | # reducing overhead from the TCP protocol itself, at the cost of increasing
763 | # latency if you block for cross-datacenter responses.
764 | inter_dc_tcp_nodelay: false
765 | 


--------------------------------------------------------------------------------
/cassandra/init.sls:
--------------------------------------------------------------------------------
 1 | {%- from 'cassandra/settings.sls' import cassandra with context %}
 2 | 
 3 | {% if cassandra.install_java %}
 4 | openjdk-8-jre-headless:
 5 |   pkg.installed:
 6 |     - require_in:
 7 |       - pkg: cassandra_package
 8 | {% endif %}
 9 | 
10 | cassandra_package:
11 |   pkgrepo.managed:
12 |     - humanname: Cassandra Debian Repo
13 |     - name: deb http://debian.datastax.com/community stable main
14 |     - file: /etc/apt/sources.list.d/cassandra.sources.list
15 |     - key_url: http://debian.datastax.com/debian/repo_key
16 |   pkg.installed:
17 |     - name: {{ cassandra.package_name }}
18 |     - version: {{ cassandra.version }}
19 | 
20 | cassandra_configuration:
21 |   file.managed:
22 |     - name: {{ cassandra.conf_path }}
23 |     - user: root
24 |     - group: root
25 |     - mode: 644
26 |     - source: salt://cassandra/conf/cassandra_{{ cassandra.series }}.yaml
27 |     - template: jinja
28 |     - require:
29 |       - pkg: cassandra_package
30 | 
31 | {% for d in cassandra.config.data_file_directories %}
32 | data_file_directories_{{ d }}:
33 |   file.directory:
34 |     - name: {{ d }}
35 |     - user: cassandra
36 |     - group: cassandra
37 |     - mode: 755
38 |     - makedirs: True
39 | {% endfor %}
40 | 
41 | commitlog_directory:
42 |   file.directory:
43 |     - name: {{ cassandra.config.commitlog_directory }}
44 |     - user: cassandra
45 |     - group: cassandra
46 |     - mode: 755
47 |     - makedirs: True
48 | 
49 | saved_caches_directory:
50 |   file.directory:
51 |     - name: {{ cassandra.config.saved_caches_directory }}
52 |     - user: cassandra
53 |     - group: cassandra
54 |     - mode: 755
55 |     - makedirs: True
56 | 
57 | cassandra_service:
58 |   service.running:
59 |     - name: cassandra
60 |     - enable: True
61 |     - watch:
62 |       - pkg: cassandra_package
63 |       - file: cassandra_configuration
64 | 


--------------------------------------------------------------------------------
/cassandra/settings.sls:
--------------------------------------------------------------------------------
 1 | {% set p  = salt['pillar.get']('cassandra', {}) %}
 2 | {% set pc = p.get('config', {}) %}
 3 | {% set g  = salt['grains.get']('cassandra', {}) %}
 4 | {% set gc = g.get('config', {}) %}
 5 | 
 6 | {% set install_java   = g.get('install_java', p.get('install_java', False)) %}
 7 | {% set version        = g.get('version', p.get('version', '2.1.8')) %}
 8 | {% set series         = g.get('version', p.get('series', '21x')) %}
 9 | {% set package_name   = g.get('package_name', p.get('package_name', 'cassandra')) %}
10 | {% set conf_path      = g.get('conf_path', p.get('conf_path', '/etc/cassandra/cassandra.yaml')) %}
11 | {% set auto_discovery = g.get('auto_discovery', p.get('auto_discovery', False)) %}
12 | 
13 | {% set default_config = {
14 |   'cluster_name': 'Test Cluster',
15 |   'data_file_directories': ['/var/lib/cassandra/data'],
16 |   'commitlog_directory': '/var/lib/cassandra/commitlog',
17 |   'saved_caches_directory': '/var/lib/cassandra/saved_caches',
18 |   'seeds': ["127.0.0.1"],
19 |   'listen_address': 'localhost',
20 |   'rpc_address': 'localhost',
21 |   'endpoint_snitch': 'SimpleSnitch'
22 |   }%}
23 | 
24 | {%- set config = default_config %}
25 | 
26 | {%- do config.update(pc) %}
27 | {%- do config.update(gc) %}
28 | 
29 | {%- if auto_discovery %}
30 | 
31 | {%- set force_mine_update = salt['mine.send']('network.get_hostname') %}
32 | {%- set cassandra_host_dict = salt['mine.get']('cassandra_cluster_name:' + config.cluster_name, 'network.get_hostname', 'grain') %}
33 | {%- set cassandra_hosts = cassandra_host_dict.values() %}
34 | {%- do cassandra_hosts.sort() %}
35 | {%- do config.update({'seeds':cassandra_hosts[:4]}) %}
36 | {%- endif %}
37 | 
38 | {%- set cassandra = {} %}
39 | 
40 | {%- do cassandra.update({
41 |   'version': version,
42 |   'series': series,
43 |   'install_java': install_java,
44 |   'package_name': package_name,
45 |   'conf_path': conf_path,
46 |   'config': config
47 |    }) %}
48 | 


--------------------------------------------------------------------------------
/vagrant/provision-config.sh:
--------------------------------------------------------------------------------
 1 | MASTER_IP=$1
 2 | NUM_MINIONS=$2
 3 | MINION_IPS=$3
 4 | 
 5 | INSTANCE_PREFIX=salt
 6 | MASTER_NAME="${INSTANCE_PREFIX}-master"
 7 | MASTER_TAG="${INSTANCE_PREFIX}-master"
 8 | MINION_TAG="${INSTANCE_PREFIX}-minion"
 9 | MINION_NAMES=($(eval echo ${INSTANCE_PREFIX}-minion-{1..${NUM_MINIONS}}))
10 | 


--------------------------------------------------------------------------------
/vagrant/provision-master.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | source $(dirname $0)/provision-config.sh
 5 | 
 6 | if [ ! -f "/var/salt-vagrant-setup" ]; then
 7 |   mkdir -p /etc/salt/minion.d
 8 |   echo "master: $MASTER_NAME" > /etc/salt/minion.d/master.conf
 9 | 
10 |   cat <<EOF >/etc/salt/minion.d/grains.conf
11 | grains:
12 |   master_ip: $MASTER_IP
13 |   minion_ips: $MINION_IPS
14 |   roles:
15 |     - salt-master
16 | EOF
17 | 
18 |   # Configure the salt-master
19 |   # Auto accept all keys from minions that try to join
20 |   mkdir -p /etc/salt/master.d
21 |   cat <<EOF >/etc/salt/master.d/auto-accept.conf
22 | open_mode: True
23 | auto_accept: True
24 | EOF
25 | 
26 |   cat <<EOF >/etc/salt/master.d/fileserver.conf
27 | fileserver_backend:
28 |   - roots
29 | EOF
30 | 
31 |   cat <<EOF >/etc/salt/master.d/reactor.conf
32 | # React to new minions starting by running highstate on them.
33 | reactor:
34 |   - 'salt/minion/*/start':
35 |     - /srv/reactor/start.sls
36 | EOF
37 | 
38 |   curl -sS -L https://bootstrap.saltstack.com | sh -s -- -M
39 |   
40 |   # a file we touch to state that base-setup is done
41 |   echo "Salt configured" > /var/salt-vagrant-setup
42 |   salt-call state.highstate
43 | fi
44 | 


--------------------------------------------------------------------------------
/vagrant/provision-minion.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014 Google Inc. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # exit on any error
18 | set -e
19 | source $(dirname $0)/provision-config.sh
20 | 
21 | MINION_IP=$4
22 | MINION_INDEX=$5
23 | # we will run provision to update code each time we test, so we do not want to do salt install each time
24 | if [ ! -f "/var/salt-vagrant-setup" ]; then
25 | 
26 |   if [ ! "$(cat /etc/hosts | grep $MASTER_NAME)" ]; then
27 |     echo "Adding host entry for $MASTER_NAME"
28 |     echo "$MASTER_IP $MASTER_NAME" >> /etc/hosts
29 |   fi
30 | 
31 |   # Prepopulate the name of the Master
32 |   mkdir -p /etc/salt/minion.d
33 |   echo "master: $MASTER_NAME" > /etc/salt/minion.d/master.conf
34 | 
35 |   # Our minions will have a pool role to distinguish them from the master.
36 |   cat <<EOF >/etc/salt/minion.d/grains.conf
37 | grains:
38 |   minion_ip: $MINION_IP
39 |   roles:
40 |     - salt-pool
41 |   kafka:
42 |     broker_id: $MINION_INDEX
43 | EOF
44 | 
45 |   curl -sS -L http://bootstrap.saltstack.com | sh -s -- -X
46 | 
47 |   # a file we touch to state that base-setup is done
48 |   echo "Salt configured" > /var/salt-vagrant-setup
49 | 
50 | fi
51 | 


--------------------------------------------------------------------------------
/vagrant/srv/pillar/cassandra.sls:
--------------------------------------------------------------------------------
 1 | cassandra:
 2 |   version: 2.1.8
 3 |   series: 21x
 4 |   install_java: True
 5 |   config:
 6 |     cluster_name: test-cluster
 7 |     seeds:
 8 |       - '10.245.1.2'
 9 |     listen_address: {{ grains['ip_interfaces']['eth1'][0] }}
10 |     rpc_address: {{ grains['ip_interfaces']['eth1'][0] }}
11 |     endpoint_snitch: GossipingPropertyFileSnitch
12 | 


--------------------------------------------------------------------------------
/vagrant/srv/pillar/top.sls:
--------------------------------------------------------------------------------
1 | base:
2 |   '*':
3 |     - cassandra


--------------------------------------------------------------------------------
/vagrant/srv/salt/java.sls:
--------------------------------------------------------------------------------
1 | openjdk-7-jre-headless:
2 |   pkg.installed


--------------------------------------------------------------------------------
/vagrant/srv/salt/top.sls:
--------------------------------------------------------------------------------
1 | base:
2 |   'salt-master':
3 |     - cassandra


--------------------------------------------------------------------------------