├── .gitignore ├── LICENSE ├── README.md ├── Vagrantfile ├── cassandra ├── conf │ ├── cassandra_20x.yaml │ └── cassandra_21x.yaml ├── init.sls └── settings.sls └── vagrant ├── provision-config.sh ├── provision-master.sh ├── provision-minion.sh └── srv ├── pillar ├── cassandra.sls └── top.sls └── salt ├── java.sls └── top.sls /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Viktor Taranenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cassandra-formula 2 | ================= 3 | 4 | Salt Formula to set up and configure Cassandra cluster 5 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | # Require a recent version of vagrant otherwise some have reported errors setting host names on boxes 8 | Vagrant.require_version ">= 1.6.3" 9 | 10 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 11 | 12 | # The number of minions to provision 13 | num_minion = (ENV['NUM_MINIONS'] || 0).to_i 14 | 15 | # ip configuration 16 | master_ip = "10.245.1.2" 17 | minion_ip_base = "10.245.2." 18 | minion_ips = num_minion.times.collect { |n| minion_ip_base + "#{n+2}" } 19 | minion_ips_str = minion_ips.join(",") 20 | 21 | config.vm.box = "debian/jessie64" 22 | 23 | config.vm.provider :virtualbox do |v| 24 | # On VirtualBox, we don't have guest additions or a functional vboxsf 25 | # in CoreOS, so tell Vagrant that so it can be smarter. 26 | v.check_guest_additions = false 27 | v.functional_vboxsf = false 28 | v.memory = 1536 29 | v.cpus = 1 30 | end 31 | 32 | config.vm.define "master" do |config| 33 | config.vm.provision "shell", inline: "/vagrant/vagrant/provision-master.sh #{master_ip} #{num_minion} #{minion_ips_str}" 34 | config.vm.network "private_network", ip: master_ip 35 | config.vm.hostname = "salt-master" 36 | config.vm.synced_folder "vagrant/srv/salt", "/srv/salt" 37 | config.vm.synced_folder "cassandra", "/srv/salt/cassandra" 38 | config.vm.synced_folder "vagrant/srv/pillar", "/srv/pillar" 39 | end 40 | 41 | num_minion.times do |n| 42 | config.vm.define "minion-#{n+1}" do |minion| 43 | minion_index = n+1 44 | minion_ip = minion_ips[n] 45 | minion.vm.provision "shell", inline: "/vagrant/vagrant/provision-minion.sh #{master_ip} #{num_minion} #{minion_ips_str} #{minion_ip} #{minion_index}" 46 | minion.vm.network "private_network", ip: minion_ip 47 | minion.vm.hostname = "salt-minion-#{minion_index}" 48 | end 49 | end 50 | 51 | end 52 | -------------------------------------------------------------------------------- /cassandra/conf/cassandra_20x.yaml: -------------------------------------------------------------------------------- 1 | {%- from 'cassandra/settings.sls' import config as c with context %} 2 | 3 | # Cassandra storage config YAML 4 | 5 | # NOTE: 6 | # See http://wiki.apache.org/cassandra/StorageConfiguration for 7 | # full explanations of configuration directives 8 | # /NOTE 9 | 10 | # The name of the cluster. This is mainly used to prevent machines in 11 | # one logical cluster from joining another. 12 | cluster_name: {{ c.cluster_name }} 13 | 14 | # This defines the number of tokens randomly assigned to this node on the ring 15 | # The more tokens, relative to other nodes, the larger the proportion of data 16 | # that this node will store. You probably want all nodes to have the same number 17 | # of tokens assuming they have equal hardware capability. 18 | # 19 | # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, 20 | # and will use the initial_token as described below. 21 | # 22 | # Specifying initial_token will override this setting. 23 | # 24 | # If you already have a cluster with 1 token per node, and wish to migrate to 25 | # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations 26 | num_tokens: 256 27 | 28 | # initial_token allows you to specify tokens manually. While you can use # it with 29 | # vnodes (num_tokens > 1, above) -- in which case you should provide a 30 | # comma-separated list -- it's primarily used when adding nodes # to legacy clusters 31 | # that do not have vnodes enabled. 32 | # initial_token: 33 | 34 | # May either be "true" or "false" to enable globally, or contain a list 35 | # of data centers to enable per-datacenter. 36 | # hinted_handoff_enabled: DC1,DC2 37 | # See http://wiki.apache.org/cassandra/HintedHandoff 38 | hinted_handoff_enabled: true 39 | # this defines the maximum amount of time a dead host will have hints 40 | # generated. After it has been dead this long, new hints for it will not be 41 | # created until it has been seen alive and gone down again. 42 | max_hint_window_in_ms: 10800000 # 3 hours 43 | # Maximum throttle in KBs per second, per delivery thread. This will be 44 | # reduced proportionally to the number of nodes in the cluster. (If there 45 | # are two nodes in the cluster, each delivery thread will use the maximum 46 | # rate; if there are three, each will throttle to half of the maximum, 47 | # since we expect two nodes to be delivering hints simultaneously.) 48 | hinted_handoff_throttle_in_kb: 1024 49 | # Number of threads with which to deliver hints; 50 | # Consider increasing this number when you have multi-dc deployments, since 51 | # cross-dc handoff tends to be slower 52 | max_hints_delivery_threads: 2 53 | 54 | # Maximum throttle in KBs per second, total. This will be 55 | # reduced proportionally to the number of nodes in the cluster. 56 | batchlog_replay_throttle_in_kb: 1024 57 | 58 | # Authentication backend, implementing IAuthenticator; used to identify users 59 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, 60 | # PasswordAuthenticator}. 61 | # 62 | # - AllowAllAuthenticator performs no checks - set it to disable authentication. 63 | # - PasswordAuthenticator relies on username/password pairs to authenticate 64 | # users. It keeps usernames and hashed passwords in system_auth.credentials table. 65 | # Please increase system_auth keyspace replication factor if you use this authenticator. 66 | authenticator: AllowAllAuthenticator 67 | 68 | # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions 69 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, 70 | # CassandraAuthorizer}. 71 | # 72 | # - AllowAllAuthorizer allows any action to any user - set it to disable authorization. 73 | # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please 74 | # increase system_auth keyspace replication factor if you use this authorizer. 75 | authorizer: AllowAllAuthorizer 76 | 77 | # Validity period for permissions cache (fetching permissions can be an 78 | # expensive operation depending on the authorizer, CassandraAuthorizer is 79 | # one example). Defaults to 2000, set to 0 to disable. 80 | # Will be disabled automatically for AllowAllAuthorizer. 81 | permissions_validity_in_ms: 2000 82 | 83 | # The partitioner is responsible for distributing groups of rows (by 84 | # partition key) across nodes in the cluster. You should leave this 85 | # alone for new clusters. The partitioner can NOT be changed without 86 | # reloading all data, so when upgrading you should set this to the 87 | # same partitioner you were already using. 88 | # 89 | # Besides Murmur3Partitioner, partitioners included for backwards 90 | # compatibility include RandomPartitioner, ByteOrderedPartitioner, and 91 | # OrderPreservingPartitioner. 92 | # 93 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner 94 | 95 | # Directories where Cassandra should store data on disk. Cassandra 96 | # will spread data evenly across them, subject to the granularity of 97 | # the configured compaction strategy. 98 | data_file_directories: 99 | {%- for d in c.data_file_directories %} 100 | - {{ d }} 101 | {% endfor %} 102 | 103 | # commit log 104 | commitlog_directory: {{ c.commitlog_directory }} 105 | 106 | # policy for data disk failures: 107 | # stop_paranoid: shut down gossip and Thrift even for single-sstable errors. 108 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but 109 | # can still be inspected via JMX. 110 | # best_effort: stop using the failed disk and respond to requests based on 111 | # remaining available sstables. This means you WILL see obsolete 112 | # data at CL.ONE! 113 | # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra 114 | disk_failure_policy: stop 115 | 116 | # policy for commit disk failures: 117 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but 118 | # can still be inspected via JMX. 119 | # stop_commit: shutdown the commit log, letting writes collect but 120 | # continuing to service reads, as in pre-2.0.5 Cassandra 121 | # ignore: ignore fatal errors and let the batches fail 122 | commit_failure_policy: stop 123 | 124 | # Maximum size of the key cache in memory. 125 | # 126 | # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the 127 | # minimum, sometimes more. The key cache is fairly tiny for the amount of 128 | # time it saves, so it's worthwhile to use it at large numbers. 129 | # The row cache saves even more time, but must contain the entire row, 130 | # so it is extremely space-intensive. It's best to only use the 131 | # row cache if you have hot rows or static rows. 132 | # 133 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. 134 | # 135 | # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. 136 | key_cache_size_in_mb: 137 | 138 | # Duration in seconds after which Cassandra should 139 | # save the key cache. Caches are saved to saved_caches_directory as 140 | # specified in this configuration file. 141 | # 142 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in 143 | # terms of I/O for the key cache. Row cache saving is much more expensive and 144 | # has limited use. 145 | # 146 | # Default is 14400 or 4 hours. 147 | key_cache_save_period: 14400 148 | 149 | # Number of keys from the key cache to save 150 | # Disabled by default, meaning all keys are going to be saved 151 | # key_cache_keys_to_save: 100 152 | 153 | # Maximum size of the row cache in memory. 154 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. 155 | # 156 | # Default value is 0, to disable row caching. 157 | row_cache_size_in_mb: 0 158 | 159 | # Duration in seconds after which Cassandra should 160 | # safe the row cache. Caches are saved to saved_caches_directory as specified 161 | # in this configuration file. 162 | # 163 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in 164 | # terms of I/O for the key cache. Row cache saving is much more expensive and 165 | # has limited use. 166 | # 167 | # Default is 0 to disable saving the row cache. 168 | row_cache_save_period: 0 169 | 170 | # Number of keys from the row cache to save 171 | # Disabled by default, meaning all keys are going to be saved 172 | # row_cache_keys_to_save: 100 173 | 174 | # The off-heap memory allocator. Affects storage engine metadata as 175 | # well as caches. Experiments show that JEMAlloc saves some memory 176 | # than the native GCC allocator (i.e., JEMalloc is more 177 | # fragmentation-resistant). 178 | # 179 | # Supported values are: NativeAllocator, JEMallocAllocator 180 | # 181 | # If you intend to use JEMallocAllocator you have to install JEMalloc as library and 182 | # modify cassandra-env.sh as directed in the file. 183 | # 184 | # Defaults to NativeAllocator 185 | # memory_allocator: NativeAllocator 186 | 187 | # saved caches 188 | saved_caches_directory: {{ c.saved_caches_directory }} 189 | 190 | # commitlog_sync may be either "periodic" or "batch." 191 | # When in batch mode, Cassandra won't ack writes until the commit log 192 | # has been fsynced to disk. It will wait up to 193 | # commitlog_sync_batch_window_in_ms milliseconds for other writes, before 194 | # performing the sync. 195 | # 196 | # commitlog_sync: batch 197 | # commitlog_sync_batch_window_in_ms: 50 198 | # 199 | # the other option is "periodic" where writes may be acked immediately 200 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms 201 | # milliseconds. By default this allows 1024*(CPU cores) pending 202 | # entries on the commitlog queue. If you are writing very large blobs, 203 | # you should reduce that; 16*cores works reasonably well for 1MB blobs. 204 | # It should be at least as large as the concurrent_writes setting. 205 | commitlog_sync: periodic 206 | commitlog_sync_period_in_ms: 10000 207 | # commitlog_periodic_queue_size: 208 | 209 | # The size of the individual commitlog file segments. A commitlog 210 | # segment may be archived, deleted, or recycled once all the data 211 | # in it (potentially from each columnfamily in the system) has been 212 | # flushed to sstables. 213 | # 214 | # The default size is 32, which is almost always fine, but if you are 215 | # archiving commitlog segments (see commitlog_archiving.properties), 216 | # then you probably want a finer granularity of archiving; 8 or 16 MB 217 | # is reasonable. 218 | commitlog_segment_size_in_mb: 32 219 | 220 | # any class that implements the SeedProvider interface and has a 221 | # constructor that takes a Map of parameters will do. 222 | seed_provider: 223 | # Addresses of hosts that are deemed contact points. 224 | # Cassandra nodes use this list of hosts to find each other and learn 225 | # the topology of the ring. You must change this if you are running 226 | # multiple nodes! 227 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider 228 | parameters: 229 | # seeds is actually a comma-delimited list of addresses. 230 | # Ex: ",," 231 | - seeds: "{{ ','.join(c.seeds) }}" 232 | 233 | # For workloads with more data than can fit in memory, Cassandra's 234 | # bottleneck will be reads that need to fetch data from 235 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in 236 | # order to allow the operations to enqueue low enough in the stack 237 | # that the OS and drives can reorder them. 238 | # 239 | # On the other hand, since writes are almost never IO bound, the ideal 240 | # number of "concurrent_writes" is dependent on the number of cores in 241 | # your system; (8 * number_of_cores) is a good rule of thumb. 242 | concurrent_reads: 32 243 | concurrent_writes: 32 244 | 245 | # Total memory to use for sstable-reading buffers. Defaults to 246 | # the smaller of 1/4 of heap or 512MB. 247 | # file_cache_size_in_mb: 512 248 | 249 | # Total memory to use for memtables. Cassandra will flush the largest 250 | # memtable when this much memory is used. 251 | # If omitted, Cassandra will set it to 1/4 of the heap. 252 | # memtable_total_space_in_mb: 2048 253 | 254 | # Total space to use for commitlogs. Since commitlog segments are 255 | # mmapped, and hence use up address space, the default size is 32 256 | # on 32-bit JVMs, and 1024 on 64-bit JVMs. 257 | # 258 | # If space gets above this value (it will round up to the next nearest 259 | # segment multiple), Cassandra will flush every dirty CF in the oldest 260 | # segment and remove it. So a small total commitlog space will tend 261 | # to cause more flush activity on less-active columnfamilies. 262 | # commitlog_total_space_in_mb: 4096 263 | 264 | # This sets the amount of memtable flush writer threads. These will 265 | # be blocked by disk io, and each one will hold a memtable in memory 266 | # while blocked. If you have a large heap and many data directories, 267 | # you can increase this value for better flush performance. 268 | # By default this will be set to the amount of data directories defined. 269 | #memtable_flush_writers: 1 270 | 271 | # the number of full memtables to allow pending flush, that is, 272 | # waiting for a writer thread. At a minimum, this should be set to 273 | # the maximum number of secondary indexes created on a single CF. 274 | memtable_flush_queue_size: 4 275 | 276 | # Whether to, when doing sequential writing, fsync() at intervals in 277 | # order to force the operating system to flush the dirty 278 | # buffers. Enable this to avoid sudden dirty buffer flushing from 279 | # impacting read latencies. Almost always a good idea on SSDs; not 280 | # necessarily on platters. 281 | trickle_fsync: false 282 | trickle_fsync_interval_in_kb: 10240 283 | 284 | # TCP port, for commands and data 285 | storage_port: 7000 286 | 287 | # SSL port, for encrypted communication. Unused unless enabled in 288 | # encryption_options 289 | ssl_storage_port: 7001 290 | 291 | # Address to bind to and tell other Cassandra nodes to connect to. You 292 | # _must_ change this if you want multiple nodes to be able to 293 | # communicate! 294 | # 295 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This 296 | # will always do the Right Thing _if_ the node is properly configured 297 | # (hostname, name resolution, etc), and the Right Thing is to use the 298 | # address associated with the hostname (it might not be). 299 | # 300 | # Setting this to 0.0.0.0 is always wrong. 301 | listen_address: {{ c.listen_address }} 302 | 303 | # Address to broadcast to other Cassandra nodes 304 | # Leaving this blank will set it to the same value as listen_address 305 | # broadcast_address: 1.2.3.4 306 | 307 | # Internode authentication backend, implementing IInternodeAuthenticator; 308 | # used to allow/disallow connections from peer nodes. 309 | # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator 310 | 311 | # Whether to start the native transport server. 312 | # Please note that the address on which the native transport is bound is the 313 | # same as the rpc_address. The port however is different and specified below. 314 | start_native_transport: true 315 | # port for the CQL native transport to listen for clients on 316 | native_transport_port: 9042 317 | # The maximum threads for handling requests when the native transport is used. 318 | # This is similar to rpc_max_threads though the default differs slightly (and 319 | # there is no native_transport_min_threads, idle threads will always be stopped 320 | # after 30 seconds). 321 | # native_transport_max_threads: 128 322 | # 323 | # The maximum size of allowed frame. Frame (requests) larger than this will 324 | # be rejected as invalid. The default is 256MB. 325 | # native_transport_max_frame_size_in_mb: 256 326 | 327 | # Whether to start the thrift rpc server. 328 | start_rpc: true 329 | 330 | # The address to bind the Thrift RPC service and native transport 331 | # server -- clients connect here. 332 | # 333 | # Leaving this blank has the same effect it does for ListenAddress, 334 | # (i.e. it will be based on the configured hostname of the node). 335 | # 336 | # Note that unlike ListenAddress above, it is allowed to specify 0.0.0.0 337 | # here if you want to listen on all interfaces, but that will break clients 338 | # that rely on node auto-discovery. 339 | rpc_address: {{ c.rpc_address }} 340 | # port for Thrift to listen for clients on 341 | rpc_port: 9160 342 | 343 | # enable or disable keepalive on rpc/native connections 344 | rpc_keepalive: true 345 | 346 | # Cassandra provides two out-of-the-box options for the RPC Server: 347 | # 348 | # sync -> One thread per thrift connection. For a very large number of clients, memory 349 | # will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size 350 | # per thread, and that will correspond to your use of virtual memory (but physical memory 351 | # may be limited depending on use of stack space). 352 | # 353 | # hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled 354 | # asynchronously using a small number of threads that does not vary with the amount 355 | # of thrift clients (and thus scales well to many clients). The rpc requests are still 356 | # synchronous (one thread per active request). 357 | # 358 | # The default is sync because on Windows hsha is about 30% slower. On Linux, 359 | # sync/hsha performance is about the same, with hsha of course using less memory. 360 | # 361 | # Alternatively, can provide your own RPC server by providing the fully-qualified class name 362 | # of an o.a.c.t.TServerFactory that can create an instance of it. 363 | rpc_server_type: sync 364 | 365 | # Uncomment rpc_min|max_thread to set request pool size limits. 366 | # 367 | # Regardless of your choice of RPC server (see above), the number of maximum requests in the 368 | # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync 369 | # RPC server, it also dictates the number of clients that can be connected at all). 370 | # 371 | # The default is unlimited and thus provides no protection against clients overwhelming the server. You are 372 | # encouraged to set a maximum that makes sense for you in production, but do keep in mind that 373 | # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. 374 | # 375 | # rpc_min_threads: 16 376 | # rpc_max_threads: 2048 377 | 378 | # uncomment to set socket buffer sizes on rpc connections 379 | # rpc_send_buff_size_in_bytes: 380 | # rpc_recv_buff_size_in_bytes: 381 | 382 | # Uncomment to set socket buffer size for internode communication 383 | # Note that when setting this, the buffer size is limited by net.core.wmem_max 384 | # and when not setting it it is defined by net.ipv4.tcp_wmem 385 | # See: 386 | # /proc/sys/net/core/wmem_max 387 | # /proc/sys/net/core/rmem_max 388 | # /proc/sys/net/ipv4/tcp_wmem 389 | # /proc/sys/net/ipv4/tcp_wmem 390 | # and: man tcp 391 | # internode_send_buff_size_in_bytes: 392 | # internode_recv_buff_size_in_bytes: 393 | 394 | # Frame size for thrift (maximum message length). 395 | thrift_framed_transport_size_in_mb: 15 396 | 397 | # Set to true to have Cassandra create a hard link to each sstable 398 | # flushed or streamed locally in a backups/ subdirectory of the 399 | # keyspace data. Removing these links is the operator's 400 | # responsibility. 401 | incremental_backups: false 402 | 403 | # Whether or not to take a snapshot before each compaction. Be 404 | # careful using this option, since Cassandra won't clean up the 405 | # snapshots for you. Mostly useful if you're paranoid when there 406 | # is a data format change. 407 | snapshot_before_compaction: false 408 | 409 | # Whether or not a snapshot is taken of the data before keyspace truncation 410 | # or dropping of column families. The STRONGLY advised default of true 411 | # should be used to provide data safety. If you set this flag to false, you will 412 | # lose data on truncation or drop. 413 | auto_snapshot: true 414 | 415 | # When executing a scan, within or across a partition, we need to keep the 416 | # tombstones seen in memory so we can return them to the coordinator, which 417 | # will use them to make sure other replicas also know about the deleted rows. 418 | # With workloads that generate a lot of tombstones, this can cause performance 419 | # problems and even exaust the server heap. 420 | # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) 421 | # Adjust the thresholds here if you understand the dangers and want to 422 | # scan more tombstones anyway. These thresholds may also be adjusted at runtime 423 | # using the StorageService mbean. 424 | tombstone_warn_threshold: 1000 425 | tombstone_failure_threshold: 100000 426 | 427 | # Add column indexes to a row after its contents reach this size. 428 | # Increase if your column values are large, or if you have a very large 429 | # number of columns. The competing causes are, Cassandra has to 430 | # deserialize this much of the row to read a single column, so you want 431 | # it to be small - at least if you do many partial-row reads - but all 432 | # the index data is read for each access, so you don't want to generate 433 | # that wastefully either. 434 | column_index_size_in_kb: 64 435 | 436 | 437 | # Log WARN on any batch size exceeding this value. 5kb per batch by default. 438 | # Caution should be taken on increasing the size of this threshold as it can lead to node instability. 439 | batch_size_warn_threshold_in_kb: 5 440 | 441 | # Size limit for rows being compacted in memory. Larger rows will spill 442 | # over to disk and use a slower two-pass compaction process. A message 443 | # will be logged specifying the row key. 444 | in_memory_compaction_limit_in_mb: 64 445 | 446 | # Number of simultaneous compactions to allow, NOT including 447 | # validation "compactions" for anti-entropy repair. Simultaneous 448 | # compactions can help preserve read performance in a mixed read/write 449 | # workload, by mitigating the tendency of small sstables to accumulate 450 | # during a single long running compactions. The default is usually 451 | # fine and if you experience problems with compaction running too 452 | # slowly or too fast, you should look at 453 | # compaction_throughput_mb_per_sec first. 454 | # 455 | # concurrent_compactors defaults to the number of cores. 456 | # Uncomment to make compaction mono-threaded, the pre-0.8 default. 457 | #concurrent_compactors: 1 458 | 459 | # Multi-threaded compaction. When enabled, each compaction will use 460 | # up to one thread per core, plus one thread per sstable being merged. 461 | # This is usually only useful for SSD-based hardware: otherwise, 462 | # your concern is usually to get compaction to do LESS i/o (see: 463 | # compaction_throughput_mb_per_sec), not more. 464 | multithreaded_compaction: false 465 | 466 | # Throttles compaction to the given total throughput across the entire 467 | # system. The faster you insert data, the faster you need to compact in 468 | # order to keep the sstable count down, but in general, setting this to 469 | # 16 to 32 times the rate you are inserting data is more than sufficient. 470 | # Setting this to 0 disables throttling. Note that this account for all types 471 | # of compaction, including validation compaction. 472 | compaction_throughput_mb_per_sec: 16 473 | 474 | # Track cached row keys during compaction, and re-cache their new 475 | # positions in the compacted sstable. Disable if you use really large 476 | # key caches. 477 | compaction_preheat_key_cache: true 478 | 479 | # Throttles all outbound streaming file transfers on this node to the 480 | # given total throughput in Mbps. This is necessary because Cassandra does 481 | # mostly sequential IO when streaming data during bootstrap or repair, which 482 | # can lead to saturating the network connection and degrading rpc performance. 483 | # When unset, the default is 200 Mbps or 25 MB/s. 484 | # stream_throughput_outbound_megabits_per_sec: 200 485 | 486 | # How long the coordinator should wait for read operations to complete 487 | read_request_timeout_in_ms: 5000 488 | # How long the coordinator should wait for seq or index scans to complete 489 | range_request_timeout_in_ms: 10000 490 | # How long the coordinator should wait for writes to complete 491 | write_request_timeout_in_ms: 2000 492 | # How long a coordinator should continue to retry a CAS operation 493 | # that contends with other proposals for the same row 494 | cas_contention_timeout_in_ms: 1000 495 | # How long the coordinator should wait for truncates to complete 496 | # (This can be much longer, because unless auto_snapshot is disabled 497 | # we need to flush first so we can snapshot before removing the data.) 498 | truncate_request_timeout_in_ms: 60000 499 | # The default timeout for other, miscellaneous operations 500 | request_timeout_in_ms: 10000 501 | 502 | # Enable operation timeout information exchange between nodes to accurately 503 | # measure request timeouts. If disabled, replicas will assume that requests 504 | # were forwarded to them instantly by the coordinator, which means that 505 | # under overload conditions we will waste that much extra time processing 506 | # already-timed-out requests. 507 | # 508 | # Warning: before enabling this property make sure to ntp is installed 509 | # and the times are synchronized between the nodes. 510 | cross_node_timeout: false 511 | 512 | # Enable socket timeout for streaming operation. 513 | # When a timeout occurs during streaming, streaming is retried from the start 514 | # of the current file. This _can_ involve re-streaming an important amount of 515 | # data, so you should avoid setting the value too low. 516 | # Default value is 0, which never timeout streams. 517 | # streaming_socket_timeout_in_ms: 0 518 | 519 | # phi value that must be reached for a host to be marked down. 520 | # most users should never need to adjust this. 521 | # phi_convict_threshold: 8 522 | 523 | # endpoint_snitch -- Set this to a class that implements 524 | # IEndpointSnitch. The snitch has two functions: 525 | # - it teaches Cassandra enough about your network topology to route 526 | # requests efficiently 527 | # - it allows Cassandra to spread replicas around your cluster to avoid 528 | # correlated failures. It does this by grouping machines into 529 | # "datacenters" and "racks." Cassandra will do its best not to have 530 | # more than one replica on the same "rack" (which may not actually 531 | # be a physical location) 532 | # 533 | # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER, 534 | # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS 535 | # ARE PLACED. 536 | # 537 | # Out of the box, Cassandra provides 538 | # - SimpleSnitch: 539 | # Treats Strategy order as proximity. This can improve cache 540 | # locality when disabling read repair. Only appropriate for 541 | # single-datacenter deployments. 542 | # - GossipingPropertyFileSnitch 543 | # This should be your go-to snitch for production use. The rack 544 | # and datacenter for the local node are defined in 545 | # cassandra-rackdc.properties and propagated to other nodes via 546 | # gossip. If cassandra-topology.properties exists, it is used as a 547 | # fallback, allowing migration from the PropertyFileSnitch. 548 | # - PropertyFileSnitch: 549 | # Proximity is determined by rack and data center, which are 550 | # explicitly configured in cassandra-topology.properties. 551 | # - Ec2Snitch: 552 | # Appropriate for EC2 deployments in a single Region. Loads Region 553 | # and Availability Zone information from the EC2 API. The Region is 554 | # treated as the datacenter, and the Availability Zone as the rack. 555 | # Only private IPs are used, so this will not work across multiple 556 | # Regions. 557 | # - Ec2MultiRegionSnitch: 558 | # Uses public IPs as broadcast_address to allow cross-region 559 | # connectivity. (Thus, you should set seed addresses to the public 560 | # IP as well.) You will need to open the storage_port or 561 | # ssl_storage_port on the public IP firewall. (For intra-Region 562 | # traffic, Cassandra will switch to the private IP after 563 | # establishing a connection.) 564 | # - RackInferringSnitch: 565 | # Proximity is determined by rack and data center, which are 566 | # assumed to correspond to the 3rd and 2nd octet of each node's IP 567 | # address, respectively. Unless this happens to match your 568 | # deployment conventions, this is best used as an example of 569 | # writing a custom Snitch class and is provided in that spirit. 570 | # 571 | # You can use a custom Snitch by setting this to the full class name 572 | # of the snitch, which will be assumed to be on your classpath. 573 | endpoint_snitch: {{ c.endpoint_snitch }} 574 | 575 | # controls how often to perform the more expensive part of host score 576 | # calculation 577 | dynamic_snitch_update_interval_in_ms: 100 578 | # controls how often to reset all host scores, allowing a bad host to 579 | # possibly recover 580 | dynamic_snitch_reset_interval_in_ms: 600000 581 | # if set greater than zero and read_repair_chance is < 1.0, this will allow 582 | # 'pinning' of replicas to hosts in order to increase cache capacity. 583 | # The badness threshold will control how much worse the pinned host has to be 584 | # before the dynamic snitch will prefer other replicas over it. This is 585 | # expressed as a double which represents a percentage. Thus, a value of 586 | # 0.2 means Cassandra would continue to prefer the static snitch values 587 | # until the pinned host was 20% worse than the fastest. 588 | dynamic_snitch_badness_threshold: 0.1 589 | 590 | # request_scheduler -- Set this to a class that implements 591 | # RequestScheduler, which will schedule incoming client requests 592 | # according to the specific policy. This is useful for multi-tenancy 593 | # with a single Cassandra cluster. 594 | # NOTE: This is specifically for requests from the client and does 595 | # not affect inter node communication. 596 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place 597 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of 598 | # client requests to a node with a separate queue for each 599 | # request_scheduler_id. The scheduler is further customized by 600 | # request_scheduler_options as described below. 601 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler 602 | 603 | # Scheduler Options vary based on the type of scheduler 604 | # NoScheduler - Has no options 605 | # RoundRobin 606 | # - throttle_limit -- The throttle_limit is the number of in-flight 607 | # requests per client. Requests beyond 608 | # that limit are queued up until 609 | # running requests can complete. 610 | # The value of 80 here is twice the number of 611 | # concurrent_reads + concurrent_writes. 612 | # - default_weight -- default_weight is optional and allows for 613 | # overriding the default which is 1. 614 | # - weights -- Weights are optional and will default to 1 or the 615 | # overridden default_weight. The weight translates into how 616 | # many requests are handled during each turn of the 617 | # RoundRobin, based on the scheduler id. 618 | # 619 | # request_scheduler_options: 620 | # throttle_limit: 80 621 | # default_weight: 5 622 | # weights: 623 | # Keyspace1: 1 624 | # Keyspace2: 5 625 | 626 | # request_scheduler_id -- An identifier based on which to perform 627 | # the request scheduling. Currently the only valid option is keyspace. 628 | # request_scheduler_id: keyspace 629 | 630 | # Enable or disable inter-node encryption 631 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that 632 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher 633 | # suite for authentication, key exchange and encryption of the actual data transfers. 634 | # Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode. 635 | # NOTE: No custom encryption options are enabled at the moment 636 | # The available internode options are : all, none, dc, rack 637 | # 638 | # If set to dc cassandra will encrypt the traffic between the DCs 639 | # If set to rack cassandra will encrypt the traffic between the racks 640 | # 641 | # The passwords used in these options must match the passwords used when generating 642 | # the keystore and truststore. For instructions on generating these files, see: 643 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore 644 | # 645 | server_encryption_options: 646 | internode_encryption: none 647 | keystore: conf/.keystore 648 | keystore_password: cassandra 649 | truststore: conf/.truststore 650 | truststore_password: cassandra 651 | # More advanced defaults below: 652 | # protocol: TLS 653 | # algorithm: SunX509 654 | # store_type: JKS 655 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] 656 | # require_client_auth: false 657 | 658 | # enable or disable client/server encryption. 659 | client_encryption_options: 660 | enabled: false 661 | keystore: conf/.keystore 662 | keystore_password: cassandra 663 | # require_client_auth: false 664 | # Set trustore and truststore_password if require_client_auth is true 665 | # truststore: conf/.truststore 666 | # truststore_password: cassandra 667 | # More advanced defaults below: 668 | # protocol: TLS 669 | # algorithm: SunX509 670 | # store_type: JKS 671 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] 672 | 673 | # internode_compression controls whether traffic between nodes is 674 | # compressed. 675 | # can be: all - all traffic is compressed 676 | # dc - traffic between different datacenters is compressed 677 | # none - nothing is compressed. 678 | internode_compression: all 679 | 680 | # Enable or disable tcp_nodelay for inter-dc communication. 681 | # Disabling it will result in larger (but fewer) network packets being sent, 682 | # reducing overhead from the TCP protocol itself, at the cost of increasing 683 | # latency if you block for cross-datacenter responses. 684 | inter_dc_tcp_nodelay: false 685 | 686 | # Enable or disable kernel page cache preheating from contents of the key cache after compaction. 687 | # When enabled it would preheat only first "page" (4KB) of each row to optimize 688 | # for sequential access. Note: This could be harmful for fat rows, see CASSANDRA-4937 689 | # for further details on that topic. 690 | preheat_kernel_page_cache: false 691 | -------------------------------------------------------------------------------- /cassandra/conf/cassandra_21x.yaml: -------------------------------------------------------------------------------- 1 | {%- from 'cassandra/settings.sls' import config as c with context %} 2 | # Cassandra storage config YAML 3 | 4 | # NOTE: 5 | # See http://wiki.apache.org/cassandra/StorageConfiguration for 6 | # full explanations of configuration directives 7 | # /NOTE 8 | 9 | # The name of the cluster. This is mainly used to prevent machines in 10 | # one logical cluster from joining another. 11 | cluster_name: {{ c.cluster_name }} 12 | 13 | # This defines the number of tokens randomly assigned to this node on the ring 14 | # The more tokens, relative to other nodes, the larger the proportion of data 15 | # that this node will store. You probably want all nodes to have the same number 16 | # of tokens assuming they have equal hardware capability. 17 | # 18 | # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, 19 | # and will use the initial_token as described below. 20 | # 21 | # Specifying initial_token will override this setting on the node's initial start, 22 | # on subsequent starts, this setting will apply even if initial token is set. 23 | # 24 | # If you already have a cluster with 1 token per node, and wish to migrate to 25 | # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations 26 | num_tokens: 256 27 | 28 | # initial_token allows you to specify tokens manually. While you can use # it with 29 | # vnodes (num_tokens > 1, above) -- in which case you should provide a 30 | # comma-separated list -- it's primarily used when adding nodes # to legacy clusters 31 | # that do not have vnodes enabled. 32 | # initial_token: 33 | 34 | # See http://wiki.apache.org/cassandra/HintedHandoff 35 | # May either be "true" or "false" to enable globally, or contain a list 36 | # of data centers to enable per-datacenter. 37 | # hinted_handoff_enabled: DC1,DC2 38 | hinted_handoff_enabled: true 39 | # this defines the maximum amount of time a dead host will have hints 40 | # generated. After it has been dead this long, new hints for it will not be 41 | # created until it has been seen alive and gone down again. 42 | max_hint_window_in_ms: 10800000 # 3 hours 43 | # Maximum throttle in KBs per second, per delivery thread. This will be 44 | # reduced proportionally to the number of nodes in the cluster. (If there 45 | # are two nodes in the cluster, each delivery thread will use the maximum 46 | # rate; if there are three, each will throttle to half of the maximum, 47 | # since we expect two nodes to be delivering hints simultaneously.) 48 | hinted_handoff_throttle_in_kb: 1024 49 | # Number of threads with which to deliver hints; 50 | # Consider increasing this number when you have multi-dc deployments, since 51 | # cross-dc handoff tends to be slower 52 | max_hints_delivery_threads: 2 53 | 54 | # Maximum throttle in KBs per second, total. This will be 55 | # reduced proportionally to the number of nodes in the cluster. 56 | batchlog_replay_throttle_in_kb: 1024 57 | 58 | # Authentication backend, implementing IAuthenticator; used to identify users 59 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, 60 | # PasswordAuthenticator}. 61 | # 62 | # - AllowAllAuthenticator performs no checks - set it to disable authentication. 63 | # - PasswordAuthenticator relies on username/password pairs to authenticate 64 | # users. It keeps usernames and hashed passwords in system_auth.credentials table. 65 | # Please increase system_auth keyspace replication factor if you use this authenticator. 66 | authenticator: AllowAllAuthenticator 67 | 68 | # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions 69 | # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, 70 | # CassandraAuthorizer}. 71 | # 72 | # - AllowAllAuthorizer allows any action to any user - set it to disable authorization. 73 | # - CassandraAuthorizer stores permissions in system_auth.permissions table. Please 74 | # increase system_auth keyspace replication factor if you use this authorizer. 75 | authorizer: AllowAllAuthorizer 76 | 77 | # Validity period for permissions cache (fetching permissions can be an 78 | # expensive operation depending on the authorizer, CassandraAuthorizer is 79 | # one example). Defaults to 2000, set to 0 to disable. 80 | # Will be disabled automatically for AllowAllAuthorizer. 81 | permissions_validity_in_ms: 2000 82 | 83 | # The partitioner is responsible for distributing groups of rows (by 84 | # partition key) across nodes in the cluster. You should leave this 85 | # alone for new clusters. The partitioner can NOT be changed without 86 | # reloading all data, so when upgrading you should set this to the 87 | # same partitioner you were already using. 88 | # 89 | # Besides Murmur3Partitioner, partitioners included for backwards 90 | # compatibility include RandomPartitioner, ByteOrderedPartitioner, and 91 | # OrderPreservingPartitioner. 92 | # 93 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner 94 | 95 | # Directories where Cassandra should store data on disk. Cassandra 96 | # will spread data evenly across them, subject to the granularity of 97 | # the configured compaction strategy. 98 | # If not set, the default directory is $CASSANDRA_HOME/data/data. 99 | data_file_directories: 100 | {%- for d in c.data_file_directories %} 101 | - {{ d }} 102 | {%- endfor %} 103 | 104 | # commit log. when running on magnetic HDD, this should be a 105 | # separate spindle than the data directories. 106 | # If not set, the default directory is $CASSANDRA_HOME/data/commitlog. 107 | commitlog_directory: {{ c.commitlog_directory }} 108 | 109 | # policy for data disk failures: 110 | # stop_paranoid: shut down gossip and Thrift even for single-sstable errors. 111 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but 112 | # can still be inspected via JMX. 113 | # best_effort: stop using the failed disk and respond to requests based on 114 | # remaining available sstables. This means you WILL see obsolete 115 | # data at CL.ONE! 116 | # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra 117 | disk_failure_policy: stop 118 | 119 | # policy for commit disk failures: 120 | # stop: shut down gossip and Thrift, leaving the node effectively dead, but 121 | # can still be inspected via JMX. 122 | # stop_commit: shutdown the commit log, letting writes collect but 123 | # continuing to service reads, as in pre-2.0.5 Cassandra 124 | # ignore: ignore fatal errors and let the batches fail 125 | commit_failure_policy: stop 126 | 127 | # Maximum size of the key cache in memory. 128 | # 129 | # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the 130 | # minimum, sometimes more. The key cache is fairly tiny for the amount of 131 | # time it saves, so it's worthwhile to use it at large numbers. 132 | # The row cache saves even more time, but must contain the entire row, 133 | # so it is extremely space-intensive. It's best to only use the 134 | # row cache if you have hot rows or static rows. 135 | # 136 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. 137 | # 138 | # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. 139 | key_cache_size_in_mb: 140 | 141 | # Duration in seconds after which Cassandra should 142 | # save the key cache. Caches are saved to saved_caches_directory as 143 | # specified in this configuration file. 144 | # 145 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in 146 | # terms of I/O for the key cache. Row cache saving is much more expensive and 147 | # has limited use. 148 | # 149 | # Default is 14400 or 4 hours. 150 | key_cache_save_period: 14400 151 | 152 | # Number of keys from the key cache to save 153 | # Disabled by default, meaning all keys are going to be saved 154 | # key_cache_keys_to_save: 100 155 | 156 | # Maximum size of the row cache in memory. 157 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. 158 | # 159 | # Default value is 0, to disable row caching. 160 | row_cache_size_in_mb: 0 161 | 162 | # Duration in seconds after which Cassandra should 163 | # save the row cache. Caches are saved to saved_caches_directory as specified 164 | # in this configuration file. 165 | # 166 | # Saved caches greatly improve cold-start speeds, and is relatively cheap in 167 | # terms of I/O for the key cache. Row cache saving is much more expensive and 168 | # has limited use. 169 | # 170 | # Default is 0 to disable saving the row cache. 171 | row_cache_save_period: 0 172 | 173 | # Number of keys from the row cache to save 174 | # Disabled by default, meaning all keys are going to be saved 175 | # row_cache_keys_to_save: 100 176 | 177 | # Maximum size of the counter cache in memory. 178 | # 179 | # Counter cache helps to reduce counter locks' contention for hot counter cells. 180 | # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before 181 | # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration 182 | # of the lock hold, helping with hot counter cell updates, but will not allow skipping 183 | # the read entirely. Only the local (clock, count) tuple of a counter cell is kept 184 | # in memory, not the whole counter, so it's relatively cheap. 185 | # 186 | # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. 187 | # 188 | # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. 189 | # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. 190 | counter_cache_size_in_mb: 191 | 192 | # Duration in seconds after which Cassandra should 193 | # save the counter cache (keys only). Caches are saved to saved_caches_directory as 194 | # specified in this configuration file. 195 | # 196 | # Default is 7200 or 2 hours. 197 | counter_cache_save_period: 7200 198 | 199 | # Number of keys from the counter cache to save 200 | # Disabled by default, meaning all keys are going to be saved 201 | # counter_cache_keys_to_save: 100 202 | 203 | # The off-heap memory allocator. Affects storage engine metadata as 204 | # well as caches. Experiments show that JEMAlloc saves some memory 205 | # than the native GCC allocator (i.e., JEMalloc is more 206 | # fragmentation-resistant). 207 | # 208 | # Supported values are: NativeAllocator, JEMallocAllocator 209 | # 210 | # If you intend to use JEMallocAllocator you have to install JEMalloc as library and 211 | # modify cassandra-env.sh as directed in the file. 212 | # 213 | # Defaults to NativeAllocator 214 | # memory_allocator: NativeAllocator 215 | 216 | # saved caches 217 | # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. 218 | saved_caches_directory: {{ c.saved_caches_directory }} 219 | 220 | # commitlog_sync may be either "periodic" or "batch." 221 | # When in batch mode, Cassandra won't ack writes until the commit log 222 | # has been fsynced to disk. It will wait up to 223 | # commitlog_sync_batch_window_in_ms milliseconds for other writes, before 224 | # performing the sync. 225 | # 226 | # commitlog_sync: batch 227 | # commitlog_sync_batch_window_in_ms: 50 228 | # 229 | # the other option is "periodic" where writes may be acked immediately 230 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms 231 | # milliseconds. commitlog_periodic_queue_size allows 1024*(CPU cores) pending 232 | # entries on the commitlog queue by default. If you are writing very large 233 | # blobs, you should reduce that; 16*cores works reasonably well for 1MB blobs. 234 | # It should be at least as large as the concurrent_writes setting. 235 | commitlog_sync: periodic 236 | commitlog_sync_period_in_ms: 10000 237 | # commitlog_periodic_queue_size: 238 | 239 | # The size of the individual commitlog file segments. A commitlog 240 | # segment may be archived, deleted, or recycled once all the data 241 | # in it (potentially from each columnfamily in the system) has been 242 | # flushed to sstables. 243 | # 244 | # The default size is 32, which is almost always fine, but if you are 245 | # archiving commitlog segments (see commitlog_archiving.properties), 246 | # then you probably want a finer granularity of archiving; 8 or 16 MB 247 | # is reasonable. 248 | commitlog_segment_size_in_mb: 32 249 | 250 | # any class that implements the SeedProvider interface and has a 251 | # constructor that takes a Map of parameters will do. 252 | seed_provider: 253 | # Addresses of hosts that are deemed contact points. 254 | # Cassandra nodes use this list of hosts to find each other and learn 255 | # the topology of the ring. You must change this if you are running 256 | # multiple nodes! 257 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider 258 | parameters: 259 | # seeds is actually a comma-delimited list of addresses. 260 | # Ex: ",," 261 | - seeds: "{{ ','.join(c.seeds) }}" 262 | 263 | # For workloads with more data than can fit in memory, Cassandra's 264 | # bottleneck will be reads that need to fetch data from 265 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in 266 | # order to allow the operations to enqueue low enough in the stack 267 | # that the OS and drives can reorder them. Same applies to 268 | # "concurrent_counter_writes", since counter writes read the current 269 | # values before incrementing and writing them back. 270 | # 271 | # On the other hand, since writes are almost never IO bound, the ideal 272 | # number of "concurrent_writes" is dependent on the number of cores in 273 | # your system; (8 * number_of_cores) is a good rule of thumb. 274 | concurrent_reads: 32 275 | concurrent_writes: 32 276 | concurrent_counter_writes: 32 277 | 278 | # Total memory to use for sstable-reading buffers. Defaults to 279 | # the smaller of 1/4 of heap or 512MB. 280 | # file_cache_size_in_mb: 512 281 | 282 | # Total permitted memory to use for memtables. Cassandra will stop 283 | # accepting writes when the limit is exceeded until a flush completes, 284 | # and will trigger a flush based on memtable_cleanup_threshold 285 | # If omitted, Cassandra will set both to 1/4 the size of the heap. 286 | # memtable_heap_space_in_mb: 2048 287 | # memtable_offheap_space_in_mb: 2048 288 | 289 | # Ratio of occupied non-flushing memtable size to total permitted size 290 | # that will trigger a flush of the largest memtable. Lager mct will 291 | # mean larger flushes and hence less compaction, but also less concurrent 292 | # flush activity which can make it difficult to keep your disks fed 293 | # under heavy write load. 294 | # 295 | # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) 296 | # memtable_cleanup_threshold: 0.11 297 | 298 | # Specify the way Cassandra allocates and manages memtable memory. 299 | # Options are: 300 | # heap_buffers: on heap nio buffers 301 | # offheap_buffers: off heap (direct) nio buffers 302 | # offheap_objects: native memory, eliminating nio buffer heap overhead 303 | memtable_allocation_type: heap_buffers 304 | 305 | # Total space to use for commitlogs. Since commitlog segments are 306 | # mmapped, and hence use up address space, the default size is 32 307 | # on 32-bit JVMs, and 8192 on 64-bit JVMs. 308 | # 309 | # If space gets above this value (it will round up to the next nearest 310 | # segment multiple), Cassandra will flush every dirty CF in the oldest 311 | # segment and remove it. So a small total commitlog space will tend 312 | # to cause more flush activity on less-active columnfamilies. 313 | # commitlog_total_space_in_mb: 8192 314 | 315 | # This sets the amount of memtable flush writer threads. These will 316 | # be blocked by disk io, and each one will hold a memtable in memory 317 | # while blocked. 318 | # 319 | # memtable_flush_writers defaults to the smaller of (number of disks, 320 | # number of cores), with a minimum of 2 and a maximum of 8. 321 | # 322 | # If your data directories are backed by SSD, you should increase this 323 | # to the number of cores. 324 | #memtable_flush_writers: 8 325 | 326 | # A fixed memory pool size in MB for for SSTable index summaries. If left 327 | # empty, this will default to 5% of the heap size. If the memory usage of 328 | # all index summaries exceeds this limit, SSTables with low read rates will 329 | # shrink their index summaries in order to meet this limit. However, this 330 | # is a best-effort process. In extreme conditions Cassandra may need to use 331 | # more than this amount of memory. 332 | index_summary_capacity_in_mb: 333 | 334 | # How frequently index summaries should be resampled. This is done 335 | # periodically to redistribute memory from the fixed-size pool to sstables 336 | # proportional their recent read rates. Setting to -1 will disable this 337 | # process, leaving existing index summaries at their current sampling level. 338 | index_summary_resize_interval_in_minutes: 60 339 | 340 | # Whether to, when doing sequential writing, fsync() at intervals in 341 | # order to force the operating system to flush the dirty 342 | # buffers. Enable this to avoid sudden dirty buffer flushing from 343 | # impacting read latencies. Almost always a good idea on SSDs; not 344 | # necessarily on platters. 345 | trickle_fsync: false 346 | trickle_fsync_interval_in_kb: 10240 347 | 348 | # TCP port, for commands and data 349 | storage_port: 7000 350 | 351 | # SSL port, for encrypted communication. Unused unless enabled in 352 | # encryption_options 353 | ssl_storage_port: 7001 354 | 355 | # Address or interface to bind to and tell other Cassandra nodes to connect to. 356 | # You _must_ change this if you want multiple nodes to be able to communicate! 357 | # 358 | # Set listen_address OR listen_interface, not both. Interfaces must correspond 359 | # to a single address, IP aliasing is not supported. 360 | # 361 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This 362 | # will always do the Right Thing _if_ the node is properly configured 363 | # (hostname, name resolution, etc), and the Right Thing is to use the 364 | # address associated with the hostname (it might not be). 365 | # 366 | # Setting listen_address to 0.0.0.0 is always wrong. 367 | listen_address: {{ c.listen_address }} 368 | # listen_interface: eth0 369 | 370 | # Address to broadcast to other Cassandra nodes 371 | # Leaving this blank will set it to the same value as listen_address 372 | # broadcast_address: 1.2.3.4 373 | 374 | # Internode authentication backend, implementing IInternodeAuthenticator; 375 | # used to allow/disallow connections from peer nodes. 376 | # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator 377 | 378 | # Whether to start the native transport server. 379 | # Please note that the address on which the native transport is bound is the 380 | # same as the rpc_address. The port however is different and specified below. 381 | start_native_transport: true 382 | # port for the CQL native transport to listen for clients on 383 | native_transport_port: 9042 384 | # The maximum threads for handling requests when the native transport is used. 385 | # This is similar to rpc_max_threads though the default differs slightly (and 386 | # there is no native_transport_min_threads, idle threads will always be stopped 387 | # after 30 seconds). 388 | # native_transport_max_threads: 128 389 | # 390 | # The maximum size of allowed frame. Frame (requests) larger than this will 391 | # be rejected as invalid. The default is 256MB. 392 | # native_transport_max_frame_size_in_mb: 256 393 | 394 | # Whether to start the thrift rpc server. 395 | start_rpc: true 396 | 397 | # The address or interface to bind the Thrift RPC service and native transport 398 | # server to. 399 | # 400 | # Set rpc_address OR rpc_interface, not both. Interfaces must correspond 401 | # to a single address, IP aliasing is not supported. 402 | # 403 | # Leaving rpc_address blank has the same effect as on listen_address 404 | # (i.e. it will be based on the configured hostname of the node). 405 | # 406 | # Note that unlike listen_address, you can specify 0.0.0.0, but you must also 407 | # set broadcast_rpc_address to a value other than 0.0.0.0. 408 | rpc_address: {{ c.rpc_address }} 409 | # rpc_interface: eth1 410 | 411 | # port for Thrift to listen for clients on 412 | rpc_port: 9160 413 | 414 | # RPC address to broadcast to drivers and other Cassandra nodes. This cannot 415 | # be set to 0.0.0.0. If left blank, this will be set to the value of 416 | # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must 417 | # be set. 418 | # broadcast_rpc_address: 1.2.3.4 419 | 420 | # enable or disable keepalive on rpc/native connections 421 | rpc_keepalive: true 422 | 423 | # Cassandra provides two out-of-the-box options for the RPC Server: 424 | # 425 | # sync -> One thread per thrift connection. For a very large number of clients, memory 426 | # will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size 427 | # per thread, and that will correspond to your use of virtual memory (but physical memory 428 | # may be limited depending on use of stack space). 429 | # 430 | # hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled 431 | # asynchronously using a small number of threads that does not vary with the amount 432 | # of thrift clients (and thus scales well to many clients). The rpc requests are still 433 | # synchronous (one thread per active request). If hsha is selected then it is essential 434 | # that rpc_max_threads is changed from the default value of unlimited. 435 | # 436 | # The default is sync because on Windows hsha is about 30% slower. On Linux, 437 | # sync/hsha performance is about the same, with hsha of course using less memory. 438 | # 439 | # Alternatively, can provide your own RPC server by providing the fully-qualified class name 440 | # of an o.a.c.t.TServerFactory that can create an instance of it. 441 | rpc_server_type: sync 442 | 443 | # Uncomment rpc_min|max_thread to set request pool size limits. 444 | # 445 | # Regardless of your choice of RPC server (see above), the number of maximum requests in the 446 | # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync 447 | # RPC server, it also dictates the number of clients that can be connected at all). 448 | # 449 | # The default is unlimited and thus provides no protection against clients overwhelming the server. You are 450 | # encouraged to set a maximum that makes sense for you in production, but do keep in mind that 451 | # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. 452 | # 453 | # rpc_min_threads: 16 454 | # rpc_max_threads: 2048 455 | 456 | # uncomment to set socket buffer sizes on rpc connections 457 | # rpc_send_buff_size_in_bytes: 458 | # rpc_recv_buff_size_in_bytes: 459 | 460 | # Uncomment to set socket buffer size for internode communication 461 | # Note that when setting this, the buffer size is limited by net.core.wmem_max 462 | # and when not setting it it is defined by net.ipv4.tcp_wmem 463 | # See: 464 | # /proc/sys/net/core/wmem_max 465 | # /proc/sys/net/core/rmem_max 466 | # /proc/sys/net/ipv4/tcp_wmem 467 | # /proc/sys/net/ipv4/tcp_wmem 468 | # and: man tcp 469 | # internode_send_buff_size_in_bytes: 470 | # internode_recv_buff_size_in_bytes: 471 | 472 | # Frame size for thrift (maximum message length). 473 | thrift_framed_transport_size_in_mb: 15 474 | 475 | # Set to true to have Cassandra create a hard link to each sstable 476 | # flushed or streamed locally in a backups/ subdirectory of the 477 | # keyspace data. Removing these links is the operator's 478 | # responsibility. 479 | incremental_backups: false 480 | 481 | # Whether or not to take a snapshot before each compaction. Be 482 | # careful using this option, since Cassandra won't clean up the 483 | # snapshots for you. Mostly useful if you're paranoid when there 484 | # is a data format change. 485 | snapshot_before_compaction: false 486 | 487 | # Whether or not a snapshot is taken of the data before keyspace truncation 488 | # or dropping of column families. The STRONGLY advised default of true 489 | # should be used to provide data safety. If you set this flag to false, you will 490 | # lose data on truncation or drop. 491 | auto_snapshot: true 492 | 493 | # When executing a scan, within or across a partition, we need to keep the 494 | # tombstones seen in memory so we can return them to the coordinator, which 495 | # will use them to make sure other replicas also know about the deleted rows. 496 | # With workloads that generate a lot of tombstones, this can cause performance 497 | # problems and even exaust the server heap. 498 | # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) 499 | # Adjust the thresholds here if you understand the dangers and want to 500 | # scan more tombstones anyway. These thresholds may also be adjusted at runtime 501 | # using the StorageService mbean. 502 | tombstone_warn_threshold: 1000 503 | tombstone_failure_threshold: 100000 504 | 505 | # Granularity of the collation index of rows within a partition. 506 | # Increase if your rows are large, or if you have a very large 507 | # number of rows per partition. The competing goals are these: 508 | # 1) a smaller granularity means more index entries are generated 509 | # and looking up rows withing the partition by collation column 510 | # is faster 511 | # 2) but, Cassandra will keep the collation index in memory for hot 512 | # rows (as part of the key cache), so a larger granularity means 513 | # you can cache more hot rows 514 | column_index_size_in_kb: 64 515 | 516 | 517 | # Log WARN on any batch size exceeding this value. 5kb per batch by default. 518 | # Caution should be taken on increasing the size of this threshold as it can lead to node instability. 519 | batch_size_warn_threshold_in_kb: 5 520 | 521 | # Number of simultaneous compactions to allow, NOT including 522 | # validation "compactions" for anti-entropy repair. Simultaneous 523 | # compactions can help preserve read performance in a mixed read/write 524 | # workload, by mitigating the tendency of small sstables to accumulate 525 | # during a single long running compactions. The default is usually 526 | # fine and if you experience problems with compaction running too 527 | # slowly or too fast, you should look at 528 | # compaction_throughput_mb_per_sec first. 529 | # 530 | # concurrent_compactors defaults to the smaller of (number of disks, 531 | # number of cores), with a minimum of 2 and a maximum of 8. 532 | # 533 | # If your data directories are backed by SSD, you should increase this 534 | # to the number of cores. 535 | #concurrent_compactors: 1 536 | 537 | # Throttles compaction to the given total throughput across the entire 538 | # system. The faster you insert data, the faster you need to compact in 539 | # order to keep the sstable count down, but in general, setting this to 540 | # 16 to 32 times the rate you are inserting data is more than sufficient. 541 | # Setting this to 0 disables throttling. Note that this account for all types 542 | # of compaction, including validation compaction. 543 | compaction_throughput_mb_per_sec: 16 544 | 545 | # When compacting, the replacement sstable(s) can be opened before they 546 | # are completely written, and used in place of the prior sstables for 547 | # any range that has been written. This helps to smoothly transfer reads 548 | # between the sstables, reducing page cache churn and keeping hot rows hot 549 | sstable_preemptive_open_interval_in_mb: 50 550 | 551 | # Throttles all outbound streaming file transfers on this node to the 552 | # given total throughput in Mbps. This is necessary because Cassandra does 553 | # mostly sequential IO when streaming data during bootstrap or repair, which 554 | # can lead to saturating the network connection and degrading rpc performance. 555 | # When unset, the default is 200 Mbps or 25 MB/s. 556 | # stream_throughput_outbound_megabits_per_sec: 200 557 | 558 | # Throttles all streaming file transfer between the datacenters, 559 | # this setting allows users to throttle inter dc stream throughput in addition 560 | # to throttling all network stream traffic as configured with 561 | # stream_throughput_outbound_megabits_per_sec 562 | # inter_dc_stream_throughput_outbound_megabits_per_sec: 563 | 564 | # How long the coordinator should wait for read operations to complete 565 | read_request_timeout_in_ms: 5000 566 | # How long the coordinator should wait for seq or index scans to complete 567 | range_request_timeout_in_ms: 10000 568 | # How long the coordinator should wait for writes to complete 569 | write_request_timeout_in_ms: 2000 570 | # How long the coordinator should wait for counter writes to complete 571 | counter_write_request_timeout_in_ms: 5000 572 | # How long a coordinator should continue to retry a CAS operation 573 | # that contends with other proposals for the same row 574 | cas_contention_timeout_in_ms: 1000 575 | # How long the coordinator should wait for truncates to complete 576 | # (This can be much longer, because unless auto_snapshot is disabled 577 | # we need to flush first so we can snapshot before removing the data.) 578 | truncate_request_timeout_in_ms: 60000 579 | # The default timeout for other, miscellaneous operations 580 | request_timeout_in_ms: 10000 581 | 582 | # Enable operation timeout information exchange between nodes to accurately 583 | # measure request timeouts. If disabled, replicas will assume that requests 584 | # were forwarded to them instantly by the coordinator, which means that 585 | # under overload conditions we will waste that much extra time processing 586 | # already-timed-out requests. 587 | # 588 | # Warning: before enabling this property make sure to ntp is installed 589 | # and the times are synchronized between the nodes. 590 | cross_node_timeout: false 591 | 592 | # Enable socket timeout for streaming operation. 593 | # When a timeout occurs during streaming, streaming is retried from the start 594 | # of the current file. This _can_ involve re-streaming an important amount of 595 | # data, so you should avoid setting the value too low. 596 | # Default value is 0, which never timeout streams. 597 | # streaming_socket_timeout_in_ms: 0 598 | 599 | # phi value that must be reached for a host to be marked down. 600 | # most users should never need to adjust this. 601 | # phi_convict_threshold: 8 602 | 603 | # endpoint_snitch -- Set this to a class that implements 604 | # IEndpointSnitch. The snitch has two functions: 605 | # - it teaches Cassandra enough about your network topology to route 606 | # requests efficiently 607 | # - it allows Cassandra to spread replicas around your cluster to avoid 608 | # correlated failures. It does this by grouping machines into 609 | # "datacenters" and "racks." Cassandra will do its best not to have 610 | # more than one replica on the same "rack" (which may not actually 611 | # be a physical location) 612 | # 613 | # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER, 614 | # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS 615 | # ARE PLACED. 616 | # 617 | # Out of the box, Cassandra provides 618 | # - SimpleSnitch: 619 | # Treats Strategy order as proximity. This can improve cache 620 | # locality when disabling read repair. Only appropriate for 621 | # single-datacenter deployments. 622 | # - GossipingPropertyFileSnitch 623 | # This should be your go-to snitch for production use. The rack 624 | # and datacenter for the local node are defined in 625 | # cassandra-rackdc.properties and propagated to other nodes via 626 | # gossip. If cassandra-topology.properties exists, it is used as a 627 | # fallback, allowing migration from the PropertyFileSnitch. 628 | # - PropertyFileSnitch: 629 | # Proximity is determined by rack and data center, which are 630 | # explicitly configured in cassandra-topology.properties. 631 | # - Ec2Snitch: 632 | # Appropriate for EC2 deployments in a single Region. Loads Region 633 | # and Availability Zone information from the EC2 API. The Region is 634 | # treated as the datacenter, and the Availability Zone as the rack. 635 | # Only private IPs are used, so this will not work across multiple 636 | # Regions. 637 | # - Ec2MultiRegionSnitch: 638 | # Uses public IPs as broadcast_address to allow cross-region 639 | # connectivity. (Thus, you should set seed addresses to the public 640 | # IP as well.) You will need to open the storage_port or 641 | # ssl_storage_port on the public IP firewall. (For intra-Region 642 | # traffic, Cassandra will switch to the private IP after 643 | # establishing a connection.) 644 | # - RackInferringSnitch: 645 | # Proximity is determined by rack and data center, which are 646 | # assumed to correspond to the 3rd and 2nd octet of each node's IP 647 | # address, respectively. Unless this happens to match your 648 | # deployment conventions, this is best used as an example of 649 | # writing a custom Snitch class and is provided in that spirit. 650 | # 651 | # You can use a custom Snitch by setting this to the full class name 652 | # of the snitch, which will be assumed to be on your classpath. 653 | endpoint_snitch: {{ c.endpoint_snitch }} 654 | 655 | # controls how often to perform the more expensive part of host score 656 | # calculation 657 | dynamic_snitch_update_interval_in_ms: 100 658 | # controls how often to reset all host scores, allowing a bad host to 659 | # possibly recover 660 | dynamic_snitch_reset_interval_in_ms: 600000 661 | # if set greater than zero and read_repair_chance is < 1.0, this will allow 662 | # 'pinning' of replicas to hosts in order to increase cache capacity. 663 | # The badness threshold will control how much worse the pinned host has to be 664 | # before the dynamic snitch will prefer other replicas over it. This is 665 | # expressed as a double which represents a percentage. Thus, a value of 666 | # 0.2 means Cassandra would continue to prefer the static snitch values 667 | # until the pinned host was 20% worse than the fastest. 668 | dynamic_snitch_badness_threshold: 0.1 669 | 670 | # request_scheduler -- Set this to a class that implements 671 | # RequestScheduler, which will schedule incoming client requests 672 | # according to the specific policy. This is useful for multi-tenancy 673 | # with a single Cassandra cluster. 674 | # NOTE: This is specifically for requests from the client and does 675 | # not affect inter node communication. 676 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place 677 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of 678 | # client requests to a node with a separate queue for each 679 | # request_scheduler_id. The scheduler is further customized by 680 | # request_scheduler_options as described below. 681 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler 682 | 683 | # Scheduler Options vary based on the type of scheduler 684 | # NoScheduler - Has no options 685 | # RoundRobin 686 | # - throttle_limit -- The throttle_limit is the number of in-flight 687 | # requests per client. Requests beyond 688 | # that limit are queued up until 689 | # running requests can complete. 690 | # The value of 80 here is twice the number of 691 | # concurrent_reads + concurrent_writes. 692 | # - default_weight -- default_weight is optional and allows for 693 | # overriding the default which is 1. 694 | # - weights -- Weights are optional and will default to 1 or the 695 | # overridden default_weight. The weight translates into how 696 | # many requests are handled during each turn of the 697 | # RoundRobin, based on the scheduler id. 698 | # 699 | # request_scheduler_options: 700 | # throttle_limit: 80 701 | # default_weight: 5 702 | # weights: 703 | # Keyspace1: 1 704 | # Keyspace2: 5 705 | 706 | # request_scheduler_id -- An identifier based on which to perform 707 | # the request scheduling. Currently the only valid option is keyspace. 708 | # request_scheduler_id: keyspace 709 | 710 | # Enable or disable inter-node encryption 711 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that 712 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher 713 | # suite for authentication, key exchange and encryption of the actual data transfers. 714 | # Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode. 715 | # NOTE: No custom encryption options are enabled at the moment 716 | # The available internode options are : all, none, dc, rack 717 | # 718 | # If set to dc cassandra will encrypt the traffic between the DCs 719 | # If set to rack cassandra will encrypt the traffic between the racks 720 | # 721 | # The passwords used in these options must match the passwords used when generating 722 | # the keystore and truststore. For instructions on generating these files, see: 723 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore 724 | # 725 | server_encryption_options: 726 | internode_encryption: none 727 | keystore: conf/.keystore 728 | keystore_password: cassandra 729 | truststore: conf/.truststore 730 | truststore_password: cassandra 731 | # More advanced defaults below: 732 | # protocol: TLS 733 | # algorithm: SunX509 734 | # store_type: JKS 735 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] 736 | # require_client_auth: false 737 | 738 | # enable or disable client/server encryption. 739 | client_encryption_options: 740 | enabled: false 741 | keystore: conf/.keystore 742 | keystore_password: cassandra 743 | # require_client_auth: false 744 | # Set trustore and truststore_password if require_client_auth is true 745 | # truststore: conf/.truststore 746 | # truststore_password: cassandra 747 | # More advanced defaults below: 748 | # protocol: TLS 749 | # algorithm: SunX509 750 | # store_type: JKS 751 | # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] 752 | 753 | # internode_compression controls whether traffic between nodes is 754 | # compressed. 755 | # can be: all - all traffic is compressed 756 | # dc - traffic between different datacenters is compressed 757 | # none - nothing is compressed. 758 | internode_compression: all 759 | 760 | # Enable or disable tcp_nodelay for inter-dc communication. 761 | # Disabling it will result in larger (but fewer) network packets being sent, 762 | # reducing overhead from the TCP protocol itself, at the cost of increasing 763 | # latency if you block for cross-datacenter responses. 764 | inter_dc_tcp_nodelay: false 765 | -------------------------------------------------------------------------------- /cassandra/init.sls: -------------------------------------------------------------------------------- 1 | {%- from 'cassandra/settings.sls' import cassandra with context %} 2 | 3 | {% if cassandra.install_java %} 4 | openjdk-8-jre-headless: 5 | pkg.installed: 6 | - require_in: 7 | - pkg: cassandra_package 8 | {% endif %} 9 | 10 | cassandra_package: 11 | pkgrepo.managed: 12 | - humanname: Cassandra Debian Repo 13 | - name: deb http://debian.datastax.com/community stable main 14 | - file: /etc/apt/sources.list.d/cassandra.sources.list 15 | - key_url: http://debian.datastax.com/debian/repo_key 16 | pkg.installed: 17 | - name: {{ cassandra.package_name }} 18 | - version: {{ cassandra.version }} 19 | 20 | cassandra_configuration: 21 | file.managed: 22 | - name: {{ cassandra.conf_path }} 23 | - user: root 24 | - group: root 25 | - mode: 644 26 | - source: salt://cassandra/conf/cassandra_{{ cassandra.series }}.yaml 27 | - template: jinja 28 | - require: 29 | - pkg: cassandra_package 30 | 31 | {% for d in cassandra.config.data_file_directories %} 32 | data_file_directories_{{ d }}: 33 | file.directory: 34 | - name: {{ d }} 35 | - user: cassandra 36 | - group: cassandra 37 | - mode: 755 38 | - makedirs: True 39 | {% endfor %} 40 | 41 | commitlog_directory: 42 | file.directory: 43 | - name: {{ cassandra.config.commitlog_directory }} 44 | - user: cassandra 45 | - group: cassandra 46 | - mode: 755 47 | - makedirs: True 48 | 49 | saved_caches_directory: 50 | file.directory: 51 | - name: {{ cassandra.config.saved_caches_directory }} 52 | - user: cassandra 53 | - group: cassandra 54 | - mode: 755 55 | - makedirs: True 56 | 57 | cassandra_service: 58 | service.running: 59 | - name: cassandra 60 | - enable: True 61 | - watch: 62 | - pkg: cassandra_package 63 | - file: cassandra_configuration 64 | -------------------------------------------------------------------------------- /cassandra/settings.sls: -------------------------------------------------------------------------------- 1 | {% set p = salt['pillar.get']('cassandra', {}) %} 2 | {% set pc = p.get('config', {}) %} 3 | {% set g = salt['grains.get']('cassandra', {}) %} 4 | {% set gc = g.get('config', {}) %} 5 | 6 | {% set install_java = g.get('install_java', p.get('install_java', False)) %} 7 | {% set version = g.get('version', p.get('version', '2.1.8')) %} 8 | {% set series = g.get('version', p.get('series', '21x')) %} 9 | {% set package_name = g.get('package_name', p.get('package_name', 'cassandra')) %} 10 | {% set conf_path = g.get('conf_path', p.get('conf_path', '/etc/cassandra/cassandra.yaml')) %} 11 | {% set auto_discovery = g.get('auto_discovery', p.get('auto_discovery', False)) %} 12 | 13 | {% set default_config = { 14 | 'cluster_name': 'Test Cluster', 15 | 'data_file_directories': ['/var/lib/cassandra/data'], 16 | 'commitlog_directory': '/var/lib/cassandra/commitlog', 17 | 'saved_caches_directory': '/var/lib/cassandra/saved_caches', 18 | 'seeds': ["127.0.0.1"], 19 | 'listen_address': 'localhost', 20 | 'rpc_address': 'localhost', 21 | 'endpoint_snitch': 'SimpleSnitch' 22 | }%} 23 | 24 | {%- set config = default_config %} 25 | 26 | {%- do config.update(pc) %} 27 | {%- do config.update(gc) %} 28 | 29 | {%- if auto_discovery %} 30 | 31 | {%- set force_mine_update = salt['mine.send']('network.get_hostname') %} 32 | {%- set cassandra_host_dict = salt['mine.get']('cassandra_cluster_name:' + config.cluster_name, 'network.get_hostname', 'grain') %} 33 | {%- set cassandra_hosts = cassandra_host_dict.values() %} 34 | {%- do cassandra_hosts.sort() %} 35 | {%- do config.update({'seeds':cassandra_hosts[:4]}) %} 36 | {%- endif %} 37 | 38 | {%- set cassandra = {} %} 39 | 40 | {%- do cassandra.update({ 41 | 'version': version, 42 | 'series': series, 43 | 'install_java': install_java, 44 | 'package_name': package_name, 45 | 'conf_path': conf_path, 46 | 'config': config 47 | }) %} 48 | -------------------------------------------------------------------------------- /vagrant/provision-config.sh: -------------------------------------------------------------------------------- 1 | MASTER_IP=$1 2 | NUM_MINIONS=$2 3 | MINION_IPS=$3 4 | 5 | INSTANCE_PREFIX=salt 6 | MASTER_NAME="${INSTANCE_PREFIX}-master" 7 | MASTER_TAG="${INSTANCE_PREFIX}-master" 8 | MINION_TAG="${INSTANCE_PREFIX}-minion" 9 | MINION_NAMES=($(eval echo ${INSTANCE_PREFIX}-minion-{1..${NUM_MINIONS}})) 10 | -------------------------------------------------------------------------------- /vagrant/provision-master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | source $(dirname $0)/provision-config.sh 5 | 6 | if [ ! -f "/var/salt-vagrant-setup" ]; then 7 | mkdir -p /etc/salt/minion.d 8 | echo "master: $MASTER_NAME" > /etc/salt/minion.d/master.conf 9 | 10 | cat </etc/salt/minion.d/grains.conf 11 | grains: 12 | master_ip: $MASTER_IP 13 | minion_ips: $MINION_IPS 14 | roles: 15 | - salt-master 16 | EOF 17 | 18 | # Configure the salt-master 19 | # Auto accept all keys from minions that try to join 20 | mkdir -p /etc/salt/master.d 21 | cat </etc/salt/master.d/auto-accept.conf 22 | open_mode: True 23 | auto_accept: True 24 | EOF 25 | 26 | cat </etc/salt/master.d/fileserver.conf 27 | fileserver_backend: 28 | - roots 29 | EOF 30 | 31 | cat </etc/salt/master.d/reactor.conf 32 | # React to new minions starting by running highstate on them. 33 | reactor: 34 | - 'salt/minion/*/start': 35 | - /srv/reactor/start.sls 36 | EOF 37 | 38 | curl -sS -L https://bootstrap.saltstack.com | sh -s -- -M 39 | 40 | # a file we touch to state that base-setup is done 41 | echo "Salt configured" > /var/salt-vagrant-setup 42 | salt-call state.highstate 43 | fi 44 | -------------------------------------------------------------------------------- /vagrant/provision-minion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # exit on any error 18 | set -e 19 | source $(dirname $0)/provision-config.sh 20 | 21 | MINION_IP=$4 22 | MINION_INDEX=$5 23 | # we will run provision to update code each time we test, so we do not want to do salt install each time 24 | if [ ! -f "/var/salt-vagrant-setup" ]; then 25 | 26 | if [ ! "$(cat /etc/hosts | grep $MASTER_NAME)" ]; then 27 | echo "Adding host entry for $MASTER_NAME" 28 | echo "$MASTER_IP $MASTER_NAME" >> /etc/hosts 29 | fi 30 | 31 | # Prepopulate the name of the Master 32 | mkdir -p /etc/salt/minion.d 33 | echo "master: $MASTER_NAME" > /etc/salt/minion.d/master.conf 34 | 35 | # Our minions will have a pool role to distinguish them from the master. 36 | cat </etc/salt/minion.d/grains.conf 37 | grains: 38 | minion_ip: $MINION_IP 39 | roles: 40 | - salt-pool 41 | kafka: 42 | broker_id: $MINION_INDEX 43 | EOF 44 | 45 | curl -sS -L http://bootstrap.saltstack.com | sh -s -- -X 46 | 47 | # a file we touch to state that base-setup is done 48 | echo "Salt configured" > /var/salt-vagrant-setup 49 | 50 | fi 51 | -------------------------------------------------------------------------------- /vagrant/srv/pillar/cassandra.sls: -------------------------------------------------------------------------------- 1 | cassandra: 2 | version: 2.1.8 3 | series: 21x 4 | install_java: True 5 | config: 6 | cluster_name: test-cluster 7 | seeds: 8 | - '10.245.1.2' 9 | listen_address: {{ grains['ip_interfaces']['eth1'][0] }} 10 | rpc_address: {{ grains['ip_interfaces']['eth1'][0] }} 11 | endpoint_snitch: GossipingPropertyFileSnitch 12 | -------------------------------------------------------------------------------- /vagrant/srv/pillar/top.sls: -------------------------------------------------------------------------------- 1 | base: 2 | '*': 3 | - cassandra -------------------------------------------------------------------------------- /vagrant/srv/salt/java.sls: -------------------------------------------------------------------------------- 1 | openjdk-7-jre-headless: 2 | pkg.installed -------------------------------------------------------------------------------- /vagrant/srv/salt/top.sls: -------------------------------------------------------------------------------- 1 | base: 2 | 'salt-master': 3 | - cassandra --------------------------------------------------------------------------------