├── addition_to_sys_v2.sql ├── galera_check.pl ├── path_to_your_file ├── proxy_debug_tools ├── README.md └── marce.pl ├── readme.md └── variables_test.md /addition_to_sys_v2.sql: -------------------------------------------------------------------------------- 1 | USE sys; 2 | 3 | 4 | DROP VIEW IF EXISTS gr_member_routing_candidate_status; 5 | 6 | DROP FUNCTION IF EXISTS IFZERO; 7 | DROP FUNCTION IF EXISTS LOCATE2; 8 | DROP FUNCTION IF EXISTS GTID_NORMALIZE; 9 | DROP FUNCTION IF EXISTS GTID_COUNT; 10 | DROP FUNCTION IF EXISTS gr_applier_queue_length; 11 | DROP FUNCTION IF EXISTS gr_member_in_primary_partition; 12 | DROP FUNCTION IF EXISTS gr_transactions_to_cert; 13 | 14 | DELIMITER $$ 15 | 16 | CREATE FUNCTION IFZERO(a INT, b INT) 17 | RETURNS INT 18 | DETERMINISTIC 19 | RETURN IF(a = 0, b, a)$$ 20 | 21 | CREATE FUNCTION LOCATE2(needle TEXT(10000), haystack TEXT(10000), offset INT) 22 | RETURNS INT 23 | DETERMINISTIC 24 | RETURN IFZERO(LOCATE(needle, haystack, offset), LENGTH(haystack) + 1)$$ 25 | 26 | CREATE FUNCTION GTID_NORMALIZE(g TEXT(10000)) 27 | RETURNS TEXT(10000) 28 | DETERMINISTIC 29 | RETURN GTID_SUBTRACT(g, '')$$ 30 | 31 | CREATE FUNCTION GTID_COUNT(gtid_set TEXT(10000)) 32 | RETURNS INT 33 | DETERMINISTIC 34 | BEGIN 35 | DECLARE result BIGINT DEFAULT 0; 36 | DECLARE colon_pos INT; 37 | DECLARE next_dash_pos INT; 38 | DECLARE next_colon_pos INT; 39 | DECLARE next_comma_pos INT; 40 | SET gtid_set = GTID_NORMALIZE(gtid_set); 41 | SET colon_pos = LOCATE2(':', gtid_set, 1); 42 | WHILE colon_pos != LENGTH(gtid_set) + 1 DO 43 | SET next_dash_pos = LOCATE2('-', gtid_set, colon_pos + 1); 44 | SET next_colon_pos = LOCATE2(':', gtid_set, colon_pos + 1); 45 | SET next_comma_pos = LOCATE2(',', gtid_set, colon_pos + 1); 46 | IF next_dash_pos < next_colon_pos AND next_dash_pos < next_comma_pos THEN 47 | SET result = result + 48 | SUBSTR(gtid_set, next_dash_pos + 1, 49 | LEAST(next_colon_pos, next_comma_pos) - (next_dash_pos + 1)) - 50 | SUBSTR(gtid_set, colon_pos + 1, next_dash_pos - (colon_pos + 1)) + 1; 51 | ELSE 52 | SET result = result + 1; 53 | END IF; 54 | SET colon_pos = next_colon_pos; 55 | END WHILE; 56 | RETURN result; 57 | END$$ 58 | 59 | CREATE FUNCTION gr_applier_queue_length() 60 | RETURNS INT 61 | DETERMINISTIC 62 | BEGIN 63 | RETURN (SELECT sys.gtid_count( GTID_SUBTRACT( (SELECT 64 | Received_transaction_set FROM performance_schema.replication_connection_status 65 | WHERE Channel_name = 'group_replication_applier' ), (SELECT 66 | @@global.GTID_EXECUTED) ))); 67 | END$$ 68 | 69 | CREATE FUNCTION gr_member_in_primary_partition() 70 | RETURNS varchar(20) CHARSET utf8mb4 71 | READS SQL DATA 72 | DETERMINISTIC 73 | BEGIN 74 | DECLARE myReturn VARCHAR(20); 75 | DECLARE myError INT DEFAULT 0; 76 | 77 | DECLARE CONTINUE HANDLER FOR 1242 SET myError = 1242; 78 | 79 | (SELECT IF( MEMBER_STATE='ONLINE' AND ((SELECT COUNT(*) FROM 80 | performance_schema.replication_group_members WHERE MEMBER_STATE != 'ONLINE') >= 81 | ((SELECT COUNT(*) FROM performance_schema.replication_group_members)/2) = 0), 82 | 'YES', 'NO' ) into myReturn FROM performance_schema.replication_group_members JOIN 83 | performance_schema.replication_group_member_stats rgms USING(member_id) WHERE rgms.MEMBER_ID=@@SERVER_UUID ) ; 84 | 85 | IF myError > 0 THEN 86 | GET DIAGNOSTICS CONDITION 1 87 | @p1 = RETURNED_SQLSTATE, @p2 = MESSAGE_TEXT; 88 | select @p2 into myReturn; 89 | RETURN myReturn; 90 | END IF; 91 | IF myReturn IS NULL Then 92 | RETURN 'NO'; 93 | END IF; 94 | RETURN myReturn; 95 | 96 | END$$ 97 | 98 | CREATE FUNCTION `gr_transactions_to_cert`() RETURNS int 99 | DETERMINISTIC 100 | BEGIN 101 | DECLARE transactions_to_cert INT DEFAULT 0; 102 | select performance_schema.replication_group_member_stats.COUNT_TRANSACTIONS_IN_QUEUE into transactions_to_cert 103 | 104 | FROM 105 | performance_schema.replication_group_member_stats where MEMBER_ID=@@SERVER_UUID; 106 | 107 | IF transactions_to_cert IS NULL THEN 108 | RETURN 0; 109 | END IF; 110 | 111 | RETURN transactions_to_cert; 112 | 113 | 114 | END$$ 115 | 116 | CREATE VIEW gr_member_routing_candidate_status AS 117 | 118 | SELECT 119 | sys.gr_member_in_primary_partition() AS viable_candidate, 120 | IF((SELECT 121 | ((SELECT 122 | GROUP_CONCAT(performance_schema.global_variables.VARIABLE_VALUE 123 | SEPARATOR ',') 124 | FROM 125 | performance_schema.global_variables 126 | WHERE 127 | (performance_schema.global_variables.VARIABLE_NAME IN ('read_only' , 'super_read_only'))) <> 'OFF,OFF') 128 | 129 | ), 130 | 'YES', 131 | 'NO') AS read_only, 132 | sys.gr_applier_queue_length() AS transactions_behind, 133 | sys.gr_transactions_to_cert() AS transactions_to_cert;$$ 134 | 135 | DELIMITER ; 136 | 137 | -------------------------------------------------------------------------------- /galera_check.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # This tool is "fat-packed": most of its dependent modules are embedded 3 | # in this file. 4 | ####################################### 5 | # 6 | # ProxySQL galera check v1 7 | # 8 | # Author Marco Tusa 9 | # Copyright (C) (2016 - 2020) 10 | # 11 | # 12 | #THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 13 | #WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | #MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | # 16 | #This program is free software; you can redistribute it and/or modify it under 17 | #the terms of the GNU General Public License as published by the Free Software 18 | #Foundation, version 2; OR the Perl Artistic License. On UNIX and similar 19 | #systems, you can issue `man perlgpl' or `man perlartistic' to read these 20 | #licenses. 21 | # 22 | #You should have received a copy of the GNU General Public License along with 23 | #this program; if not, write to the Free Software Foundation, Inc., 59 Temple 24 | #Place, Suite 330, Boston, MA 02111-1307 USA. 25 | 26 | ####################################### 27 | 28 | package galera_check ; 29 | use Time::HiRes qw(gettimeofday); 30 | use strict; 31 | use DBI; 32 | use Getopt::Long; 33 | use Pod::Usage; 34 | 35 | 36 | $Getopt::Long::ignorecase = 0; 37 | my $Param = {}; 38 | my $user = "admin"; 39 | my $pass = "admin"; 40 | my $help = ''; 41 | my $host = '' ; 42 | my $debug = 0 ; 43 | my %hostgroups; 44 | my $mysql_connect_timeout=6; 45 | 46 | my %processState; 47 | my %processCommand; 48 | my @HGIds; 49 | 50 | 51 | 52 | 53 | ###################################################################### 54 | #Local functions 55 | ###################################################################### 56 | 57 | sub URLDecode { 58 | my $theURL = $_[0]; 59 | $theURL =~ tr/+/ /; 60 | $theURL =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg; 61 | $theURL =~ s///g; 62 | return $theURL; 63 | } 64 | sub URLEncode { 65 | my $theURL = $_[0]; 66 | $theURL =~ s/([\W])/"%" . uc(sprintf("%2.2x",ord($1)))/eg; 67 | return $theURL; 68 | } 69 | 70 | # return a proxy object 71 | sub get_proxy($$$$){ 72 | my $dns = shift; 73 | my $user = shift; 74 | my $pass = shift; 75 | my $debug = shift; 76 | my $proxynode = ProxySqlNode->new(); 77 | $proxynode->dns($dns); 78 | $proxynode->user($user); 79 | $proxynode->password($pass); 80 | $proxynode->debug($debug); 81 | 82 | return $proxynode; 83 | 84 | } 85 | 86 | sub main{ 87 | # ============================================================================ 88 | #+++++ INITIALIZATION 89 | # ============================================================================ 90 | if($#ARGV < 0){ 91 | pod2usage(-verbose => 2) ; 92 | exit 1; 93 | } 94 | 95 | if($#ARGV < 3){ 96 | #given a ProxySQL scheduler 97 | #limitation we will pass the whole set of params as one 98 | # and will split after 99 | @ARGV = split('\ ',$ARGV[0]); 100 | } 101 | 102 | $Param->{user} = ''; 103 | $Param->{log} = undef ; 104 | $Param->{password} = ''; 105 | $Param->{host} = ''; 106 | $Param->{port} = 3306; 107 | $Param->{debug} = 0; 108 | $Param->{processlist} = 0; 109 | $Param->{OS} = $^O; 110 | $Param->{main_segment} = 1; 111 | $Param->{retry_up} = 0; 112 | $Param->{retry_down} = 0; 113 | $Param->{print_execution} = 1; 114 | $Param->{development} = 0; 115 | $Param->{development_time} = 2; 116 | $Param->{active_failover} = 0; 117 | $Param->{single_writer} = 1; 118 | $Param->{writer_is_reader} = 1; 119 | $Param->{check_timeout} = 800; 120 | $Param->{ssl_certs_path} = undef; 121 | 122 | my $run_pid_dir = "/tmp" ; 123 | 124 | #if ( 125 | GetOptions( 126 | 'user|u:s' => \$Param->{user}, 127 | 'password|p:s' => \$Param->{password}, 128 | 'host|h:s' => \$host, 129 | 'port|P:i' => \$Param->{port}, 130 | 'debug|d:i' => \$Param->{debug}, 131 | 'log:s' => \$Param->{log}, 132 | 'hostgroups|H:s'=> \$Param->{hostgroups}, 133 | 'main_segment|S:s'=> \$Param->{main_segment}, 134 | 'retry_up:i' => \$Param->{retry_up}, 135 | 'retry_down:i' => \$Param->{retry_down}, 136 | 'execution_time:i' => \$Param->{print_execution}, 137 | 'development:i' => \$Param->{development}, 138 | 'development_time:i' => \$Param->{development_time}, 139 | 'single_writer:i' => \$Param->{single_writer}, 140 | 'writer_is_also_reader:i' => \$Param->{writer_is_reader}, 141 | 'active_failover:i' => \$Param->{active_failover}, 142 | 'check_timeout:i' => \$Param->{check_timeout}, 143 | 'ssl_certs_path:s' => \$Param->{ssl_certs_path}, 144 | 145 | 'help|?' => \$Param->{help} 146 | 147 | ) or pod2usage(2); 148 | pod2usage(-verbose => 2) if $Param->{help}; 149 | 150 | die print Utils->print_log(1,"Option --hostgroups not specified.\n") unless defined($Param->{hostgroups}); 151 | die print Utils->print_log(1,"Option --host not specified.\n") unless defined $Param->{host}; 152 | die print Utils->print_log(1,"Option --user not specified.\n") unless defined $Param->{user}; 153 | die print Utils->print_log(1,"Option --port not specified.\n") unless defined $Param->{port}; 154 | die print Utils->print_log(1,"Option --active_failover has an invalid value ($Param->{active_failover}).\n" 155 | ."Valid values are:\n" 156 | ." 0 [default] do not make failover\n" 157 | ." 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers\n" 158 | ." 2 use PXC_CLUSTER_VIEW to identify a server in the same segment\n" 159 | ." 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) ") unless $Param->{active_failover} < 4; 160 | #die "Option --log not specified. We need a place to log what is going on, don't we?\n" unless defined $Param->{log}; 161 | print Utils->print_log(2,"Option --log not specified. We need a place to log what is going on, don't we?\n") unless defined $Param->{log}; 162 | 163 | if($Param->{debug}){ 164 | Utils::debugEnv(); 165 | } 166 | 167 | $Param->{host} = URLDecode($host); 168 | my $dsn = "DBI:mysql:host=$Param->{host};port=$Param->{port}"; 169 | 170 | if(defined $Param->{user}){ 171 | $user = "$Param->{user}"; 172 | } 173 | if(defined $Param->{password}){ 174 | $pass = "$Param->{password}"; 175 | } 176 | my $hg =$Param->{hostgroups}; 177 | $hg =~ s/[\:,\,]/_/g; 178 | my $base_path = "${run_pid_dir}/proxysql_galera_check_${hg}.pid"; 179 | 180 | 181 | 182 | #============================================================================ 183 | # Execution 184 | #============================================================================ 185 | if(defined $Param->{log}){ 186 | open(FH, '>>', $Param->{log}."_".$hg.".log") or die Utils->print_log(1,"cannot open file"); 187 | FH->autoflush if $Param->{development} < 2; 188 | select FH; 189 | } 190 | #checks for ssl cert path and identify if accessible. 191 | #If defined and not accessible exit with an error 192 | if(defined $Param->{ssl_certs_path}){ 193 | my $ssl_path = $Param->{ssl_certs_path}; 194 | if (-d $ssl_path) { 195 | # directory called cgi-bin exists 196 | if(-e $ssl_path."/client-key.pem" 197 | && -e $ssl_path."/client-cert.pem" 198 | && -e $ssl_path."/ca.pem"){ 199 | print Utils->print_log(4," SSL Directory exists and all the files are there $ssl_path") 200 | } 201 | else{ 202 | print Utils->print_log(1,"SSL Path (ssl_certs_path) declared and accessible [$ssl_path]. But certification files must have specific names:\n \t\t client-key.pem \n \t\t client-cert.pem \n \t\t ca.pem \n"); 203 | exit 1 204 | } 205 | } 206 | else{ 207 | # ssl path declared but not exists, exit with error 208 | print Utils->print_log(1,"SSL Path (ssl_certs_path) declared but not existing \n \t\t $ssl_path \n \t\t Please create directory and assign the right to access it to the ProxySQL user \n"); 209 | exit 1; 210 | } 211 | } 212 | 213 | if($Param->{development} < 2){ 214 | if(!-e $base_path){ 215 | `echo "$$" > $base_path` 216 | } 217 | else{ 218 | my $existing_pid=`cat $base_path`; 219 | my $exists = kill 0, $existing_pid; 220 | if($exists > 0){ 221 | print STDOUT "Another process is running using the same HostGroup and settings,\n Or orphan pid file. check in $base_path \n"; 222 | print Utils->print_log(1,"Another process is running using the same HostGroup and settings,\n Or orphan pid file. check in $base_path \n"); 223 | exit 1; 224 | } 225 | else{ 226 | `echo "$$" > $base_path`; 227 | } 228 | } 229 | } 230 | 231 | # for test only purpose comment for prod 232 | 233 | my $xx =1; 234 | my $y =0; 235 | $xx=2000000000 if($Param->{development} > 0); 236 | 237 | while($y < $xx){ 238 | ++$y ; 239 | 240 | my $start = gettimeofday(); 241 | if($Param->{debug} >= 1){ 242 | print Utils->print_log(3,"START EXECUTION\n"); 243 | } 244 | 245 | 246 | 247 | my $proxy_sql_node = get_proxy($dsn, $user, $pass ,$Param->{debug}) ; 248 | $proxy_sql_node->retry_up($Param->{retry_up}); 249 | $proxy_sql_node->retry_down($Param->{retry_down}); 250 | $proxy_sql_node->hostgroups($Param->{hostgroups}) ; 251 | $proxy_sql_node->require_failover($Param->{active_failover}); 252 | $proxy_sql_node->check_timeout($Param->{check_timeout}); 253 | 254 | $proxy_sql_node->connect(); 255 | 256 | # create basic galera cluster object and fill info 257 | $proxy_sql_node->set_galera_cluster(); 258 | my $galera_cluster = $proxy_sql_node->get_galera_cluster(); 259 | 260 | if( defined $galera_cluster){ 261 | $galera_cluster->main_segment($Param->{main_segment}); 262 | $galera_cluster->cluster_identifier($hg); 263 | $galera_cluster->get_nodes(); 264 | } 265 | 266 | # Retrive the nodes state 267 | if(defined $galera_cluster->nodes){ 268 | $galera_cluster->process_nodes(); 269 | 270 | } 271 | 272 | #Analyze nodes state from ProxySQL prospective; 273 | if(defined $galera_cluster->nodes){ 274 | my %action_node = $proxy_sql_node->evaluate_nodes($galera_cluster); 275 | 276 | } 277 | 278 | if(defined $proxy_sql_node->action_nodes){ 279 | $proxy_sql_node->push_changes; 280 | } 281 | 282 | my $end = gettimeofday(); 283 | print Utils->print_log(3,"END EXECUTION Total Time(ms):".($end - $start) * 1000 ."\n") if $Param->{print_execution} >0; 284 | if($Param->{debug} >= 1){ 285 | print Utils->print_log(3,"\n"); 286 | } 287 | FH->flush(); 288 | 289 | $proxy_sql_node->disconnect(); 290 | 291 | #debug braket 292 | sleep $Param->{development_time} if($Param->{development} > 0); 293 | 294 | } 295 | 296 | if(defined $Param->{log}){ 297 | close FH; # in the end 298 | } 299 | 300 | `rm -f $base_path`; 301 | 302 | 303 | exit(0); 304 | 305 | 306 | } 307 | 308 | # ############################################################################ 309 | # Run the program. 310 | # ############################################################################ 311 | exit main(@ARGV); 312 | 313 | { 314 | package Galeracluster; 315 | use threads; 316 | use threads::shared; 317 | use strict; 318 | use warnings; 319 | use Time::HiRes qw(gettimeofday usleep); 320 | 321 | sub new { 322 | my $class = shift; 323 | my $SQL_get_mysql_servers=" SELECT a.* FROM runtime_mysql_servers a join stats_mysql_connection_pool b on a.hostname=b.srv_host and a.port=b.srv_port and a.hostgroup_id=b.hostgroup WHERE b.status not in ('OFFLINE_HARD','SHUNNED') "; 324 | 325 | # Variable section for looping values 326 | #Generalize object for now I have conceptualize as: 327 | # Cluster (generic container) 328 | # Cluster->{name} This is the cluster name 329 | # Cluster->{nodes} the nodes in the cluster Map by node name 330 | # Cluster->{status} cluster status [Primary|not Primary] 331 | # Cluster->{size} cluster status [Primary|not Primary] 332 | # Cluster->{singlenode}=0; 0 if false 1 if true meaning only one ACTIVE node in the cluster 333 | # Cluster->{haswriter}=0; 0 if false 1 if true at least a node is fully active as writer 334 | # Cluster->{singlewriter}=1; 0 if false 1 if true this cluster can have ONLY one writer a time [true default] 335 | 336 | my $self = { 337 | _name => undef, 338 | _hosts => {}, 339 | _status => undef, 340 | _size => {}, 341 | _singlenode => 0, 342 | _haswriter => 0, 343 | _singlewriter => 1, 344 | _main_segment => 0, 345 | _SQL_get_mysql_servers => $SQL_get_mysql_servers, 346 | _hostgroups => undef, 347 | _dbh_proxy => undef, 348 | _debug => 0, 349 | _monitor_user => undef, 350 | _monitor_password => undef, 351 | _nodes => {}, 352 | _nodes_maint => {}, 353 | _check_timeout => 100, #timeout in ms 354 | _cluster_identifier => undef, 355 | _hg_writer_id => 0, 356 | _hg_reader_id => 0, 357 | _ssl_certificates_path => undef, 358 | _writer_is_reader => 0, 359 | _reader_nodes => [] , 360 | _writer_nodes => [] , 361 | _has_failover_node =>0, 362 | _writers =>0, 363 | #_hg => undef, 364 | }; 365 | bless $self, $class; 366 | return $self; 367 | 368 | } 369 | 370 | sub ssl_certificates_path{ 371 | my ( $self, $in ) = @_; 372 | $self->{_ssl_certificates_path} = $in if defined($in); 373 | return $self->{_ssl_certificates_path}; 374 | } 375 | 376 | sub has_failover_node{ 377 | my ( $self, $in ) = @_; 378 | $self->{_has_failover_node} = $in if defined($in); 379 | return $self->{_has_failover_node}; 380 | } 381 | 382 | sub writer_nodes{ 383 | my ( $self, $in ) = @_; 384 | $self->{_writer_nodes} = $in if defined($in); 385 | return $self->{_writer_nodes}; 386 | } 387 | 388 | sub reader_nodes{ 389 | my ( $self, $in ) = @_; 390 | $self->{_reader_nodes} = $in if defined($in); 391 | return $self->{_reader_nodes}; 392 | } 393 | 394 | sub cluster_identifier{ 395 | my ( $self, $in ) = @_; 396 | $self->{_cluster_identifier} = $in if defined($in); 397 | return $self->{_cluster_identifier}; 398 | } 399 | 400 | sub main_segment{ 401 | my ( $self, $main_segment ) = @_; 402 | $self->{_main_segment} = $main_segment if defined($main_segment); 403 | return $self->{_main_segment}; 404 | } 405 | 406 | sub check_timeout{ 407 | my ( $self, $check_timeout ) = @_; 408 | $self->{_check_timeout} = $check_timeout if defined($check_timeout); 409 | return $self->{_check_timeout}; 410 | } 411 | 412 | sub debug{ 413 | my ( $self, $debug ) = @_; 414 | $self->{_debug} = $debug if defined($debug); 415 | return $self->{_debug}; 416 | } 417 | 418 | 419 | sub dbh_proxy{ 420 | my ( $self, $dbh_proxy ) = @_; 421 | $self->{_dbh_proxy} = $dbh_proxy if defined($dbh_proxy); 422 | return $self->{_dbh_proxy}; 423 | } 424 | 425 | sub name { 426 | my ( $self, $name ) = @_; 427 | $self->{_name} = $name if defined($name); 428 | return $self->{_name}; 429 | } 430 | 431 | sub nodes { 432 | my ( $self, $nodes ) = @_; 433 | $self->{_nodes} = $nodes if defined($nodes); 434 | return $self->{_nodes}; 435 | } 436 | 437 | sub nodes_maint { 438 | my ( $self, $nodes ) = @_; 439 | $self->{_nodes_maint} = $nodes if defined($nodes); 440 | return $self->{_nodes_maint}; 441 | } 442 | 443 | sub status { 444 | my ( $self, $status ) = @_; 445 | $self->{_status} = $status if defined($status); 446 | return $self->{_status}; 447 | } 448 | 449 | sub size { 450 | my ( $self, $size ) = @_; 451 | $self->{_size} = $size if defined($size); 452 | return $size->{_size}; 453 | } 454 | 455 | sub singlenode { 456 | my ( $self, $singlenode ) = @_; 457 | $self->{_singlenode} = $singlenode if defined($singlenode); 458 | return $self->{_singlenode}; 459 | } 460 | 461 | sub haswriter { 462 | my ( $self, $haswriter ) = @_; 463 | $self->{_haswriter} = $haswriter if defined($haswriter); 464 | return $self->{_haswriter}; 465 | } 466 | 467 | sub singlewriter { 468 | my ( $self, $singlewriter ) = @_; 469 | $self->{_singlewriter} = $singlewriter if defined($singlewriter); 470 | return $self->{_singlewriter}; 471 | } 472 | 473 | sub writer_is_reader { 474 | my ( $self, $writer_is_reader ) = @_; 475 | $self->{_writer_is_reader} = $writer_is_reader if defined($writer_is_reader); 476 | return $self->{_writer_is_reader}; 477 | } 478 | sub writers { 479 | my ( $self, $in ) = @_; 480 | $self->{_writers} = $in if defined($in); 481 | return $self->{_writers}; 482 | } 483 | 484 | sub hostgroups { 485 | my ( $self, $hostgroups ) = @_; 486 | $self->{_hostgroups} = $hostgroups if defined($hostgroups); 487 | return $self->{_hostgroups}; 488 | } 489 | sub hg_writer_id { 490 | my ( $self, $hostgroups ) = @_; 491 | $self->{_hg_writer_id} = $hostgroups if defined($hostgroups); 492 | return $self->{_hg_writer_id}; 493 | } 494 | 495 | sub hg_reader_id { 496 | my ( $self, $hostgroups ) = @_; 497 | $self->{_hg_reader_id} = $hostgroups if defined($hostgroups); 498 | return $self->{_hg_reader_id}; 499 | } 500 | 501 | 502 | sub monitor_user{ 503 | my ( $self, $monitor_user ) = @_; 504 | $self->{_monitor_user} = $monitor_user if defined($monitor_user); 505 | return $self->{_monitor_user}; 506 | } 507 | sub monitor_password { 508 | my ( $self, $monitor_password ) = @_; 509 | $self->{_monitor_password} = $monitor_password if defined($monitor_password); 510 | return $self->{_monitor_password}; 511 | } 512 | # this function is used to identify the nodes in the cluster 513 | # using the HG as reference 514 | sub get_nodes{ 515 | my ( $self) = @_; 516 | 517 | my $dbh = $self->{_dbh_proxy}; 518 | my $cmd =$self->{_SQL_get_mysql_servers}." AND hostgroup_id IN (".join(",",sort keys(%{$self->hostgroups})).") order by hostgroup_id, hostname"; 519 | my $sth = $dbh->prepare($cmd); 520 | $sth->execute(); 521 | my $i = 1; 522 | my $locHg = $self->{_hostgroups}; 523 | my $ssl_certificates = ""; 524 | #if a ssl certificate path is defined, will create the path for each certificate and add to the dns string 525 | if(defined $self->{_ssl_certificates_path}){ 526 | $ssl_certificates = ";mysql_ssl_client_key=".$self->{_ssl_certificates_path}."/client-key.pem" 527 | .";mysql_ssl_client_cert=".$self->{_ssl_certificates_path}."/client-cert.pem" 528 | .";mysql_ssl_ca_file=".$self->{_ssl_certificates_path}."/ca.pem" 529 | } 530 | 531 | while (my $ref = $sth->fetchrow_hashref()) { 532 | my $ssl_options="" ; 533 | my $node = GaleraNode->new(); 534 | $node->debug($self->debug); 535 | $node->use_ssl($ref->{use_ssl}); 536 | $node->hostgroups($ref->{hostgroup_id}); 537 | if($node->{_hostgroups} > 8000 538 | && exists $locHg->{$node->{_hostgroups}}){ 539 | $self->{_has_failover_node} = 1; 540 | 541 | } 542 | $node->ip($ref->{hostname}); 543 | $node->port($ref->{port}); 544 | 545 | if($node->use_ssl gt 0 ){ 546 | $ssl_options = ";mysql_ssl=1"; 547 | if($self->debug){print Utils->print_log(4," Galera cluster node " . $node->ip.":". $node->port.":HG=".$node->hostgroups." Using SSL ($ssl_options)\n" ) } 548 | if(defined $self->{_ssl_certificates_path}){ 549 | $ssl_options = $ssl_options . $ssl_certificates; 550 | if($self->debug){print Utils->print_log(4," Certificates also in use ($self->{_ssl_certificates_path})\n")} 551 | } 552 | } 553 | 554 | $node->dns("DBI:mysql:host=".$node->ip.";port=".$node->port.";mysql_connect_timeout=$mysql_connect_timeout".$ssl_options); 555 | 556 | $node->weight($ref->{weight}); 557 | $node->connections($ref->{max_connections}); 558 | $node->user($self->{_monitor_user}); 559 | $node->password($self->{_monitor_password}); 560 | $node->proxy_status($ref->{status}); 561 | $node->comment($ref->{comment}); 562 | $node->set_retry_up_down($self->{_cluster_identifier}); 563 | 564 | $node->gtid_port($ref->{gtid_port}); 565 | $node->compression($ref->{compression}); 566 | $node->max_latency($ref->{max_latency_ms}); 567 | $node->max_replication_lag($ref->{max_replication_lag}); 568 | 569 | 570 | $self->{_nodes}->{$i++}=$node; 571 | $node->debug($self->debug); 572 | 573 | if($self->debug){print Utils->print_log(3," Galera cluster node " . $node->ip.":". $node->port.":HG=".$node->hostgroups."\n" ) } 574 | } 575 | 576 | if($self->debug){print Utils->print_log(3," Galera cluster nodes loaded \n") ; } 577 | } 578 | #Processing the nodes in the cluster and identify which node is active and which is to remove 579 | 580 | sub process_nodes{ 581 | my ( $self ) = @_; 582 | 583 | my $nodes = $self->{_nodes} ; 584 | my $start = gettimeofday(); 585 | my $run_milliseconds=0; 586 | my $init =0; 587 | my $irun = 1; 588 | my %Threads; 589 | my $new_nodes ={} ; 590 | my $processed_nodes ={} ; 591 | 592 | #using multiple threads to connect if a node is present in more than one HG it will have 2 threads 593 | while($irun){ 594 | $irun = 0; 595 | foreach my $key (sort keys %{$self->{_nodes}}){ 596 | if(!exists $Threads{$key}){ 597 | if($self->debug){print Utils->print_log(3, " Creating new thread to manage server check:". 598 | $self->{_nodes}->{$key}->ip.":". 599 | $self->{_nodes}->{$key}->port.":HG".$self->{_nodes}->{$key}->hostgroups."\n" ) } 600 | $new_nodes->{$key} = $self->{_nodes}->{$key}; 601 | $new_nodes->{$key}->{_process_status} = -1; 602 | $new_nodes->{$key}->{_ssl_certificates_path} = $self->ssl_certificates_path; 603 | 604 | # debug senza threads comment next line 605 | $Threads{$key}=threads->create(sub {return get_node_info($self,$key)}); 606 | 607 | #DEBUG Without threads uncomment from here 608 | #next unless $new_nodes->{$key} = get_node_info($self,$key); 609 | #evaluate_joined_node($self, $key, $new_nodes, $processed_nodes) ; 610 | 611 | # to here 612 | 613 | } 614 | } 615 | ##DEBUG SENZA THREADS commenta da qui 616 | foreach my $thr (sort keys %Threads) { 617 | if($new_nodes->{$thr}->{_process_status} eq -100){ 618 | next; 619 | } 620 | 621 | if ($Threads{$thr}->is_running()) { 622 | my $tid = $Threads{$thr}->tid; 623 | #print " - Thread $tid running\n"; 624 | 625 | if($run_milliseconds > $self->{_check_timeout} ){ 626 | if($self->debug >=0){ 627 | my $timeout = ($run_milliseconds - $self->{_check_timeout}); 628 | print print Utils->print_log(2,"Check timeout Node ip : $new_nodes->{$thr}->{_ip} , THID " . $tid." (taken: $run_milliseconds max_allowed: $self->{_check_timeout} over for ms: $timeout \n") 629 | } 630 | $irun = 0 ; 631 | } 632 | else{ 633 | $irun = 1; 634 | } 635 | } 636 | elsif ( $Threads{$thr}->is_joinable()) { 637 | 638 | my $tid = $Threads{$thr}->tid; 639 | ( $new_nodes->{$thr} ) = $Threads{$thr}->join; 640 | #$processed_nodes = 641 | evaluate_joined_node($self, $thr, $new_nodes, $processed_nodes) ; 642 | 643 | if($self->debug){print Utils->print_log(3," Thread joined : " . $tid."\n" ) } 644 | #print " - Results for thread $tid:\n"; 645 | #print " - Thread $tid has been joined\n"; 646 | } 647 | #print "."; 648 | } 649 | ## a qui 650 | if($self->debug){$run_milliseconds = (gettimeofday() -$start ) *1000}; 651 | #sleep for a time equal to the half of the timeout to save cpu cicle 652 | #usleep(($self->{_check_timeout} * 1000)/2); 653 | } 654 | 655 | $self->{_nodes} = $new_nodes; 656 | if($self->debug){$run_milliseconds = (gettimeofday() -$start ) *1000}; 657 | 658 | if($debug>=3){ 659 | foreach my $key (sort keys %{$new_nodes}){ 660 | if($new_nodes->{$key}->{_process_status} == 1){ 661 | print Utils->print_log(4,$new_nodes->{$key}->{_ip}.":".$new_nodes->{$key}->{_hostgroups}." Processed \n"); 662 | } 663 | else{ 664 | print Utils->print_log(4,$new_nodes->{$key}->{_ip}.":".$new_nodes->{$key}->{_hostgroups}." NOT Processed\n"); 665 | } 666 | } 667 | } 668 | if($self->debug){print Utils->print_log(3," Multi Thread execution done in : " . $run_milliseconds. "(ms) \n" )} 669 | 670 | } 671 | 672 | sub evaluate_joined_node($$$$){ 673 | my $self = shift; 674 | my $thr = shift; 675 | my $new_nodes = shift; 676 | my $processed_nodes = shift; 677 | 678 | #count the number of nodes by segment 679 | if($new_nodes->{$thr}->{_proxy_status} ne "OFFLINE_SOFT" 680 | && $new_nodes->{$thr}->{_proxy_status} ne "SHUNNED" 681 | && ($new_nodes->{$thr}->{_process_status} < 0 || 682 | !exists $processed_nodes->{$new_nodes->{$thr}->{_ip}}) 683 | && defined $new_nodes->{$thr}->{_wsrep_segment} 684 | ){ 685 | $self->{_size}->{$new_nodes->{$thr}->{_wsrep_segment}} = (($self->{_size}->{$new_nodes->{$thr}->{_wsrep_segment}}|| 0) +1); 686 | $processed_nodes->{$new_nodes->{$thr}->{_ip}}=$self->{_size}->{$new_nodes->{$thr}->{_wsrep_segment}}; 687 | } 688 | 689 | #assign size to HG 690 | if($new_nodes->{$thr}->{_proxy_status} ne "OFFLINE_SOFT" 691 | && defined $new_nodes->{$thr}->{_wsrep_segment} 692 | ){ 693 | $self->{_hostgroups}->{$new_nodes->{$thr}->{_hostgroups}}->{_size} = ($self->{_hostgroups}->{$new_nodes->{$thr}->{_hostgroups}}->{_size}) + 1; 694 | } 695 | 696 | #checks for ONLINE writer(s) 697 | 698 | if(defined $new_nodes->{$thr}->{_read_only} 699 | && $new_nodes->{$thr}->{_read_only} eq "OFF" 700 | && ($new_nodes->{$thr}->{_proxy_status} eq "ONLINE" || $new_nodes->{$thr}->{_proxy_status} eq "OFFLINE_SOFT") 701 | && ($new_nodes->{$thr}->{_hostgroups} == $self->hg_writer_id || $new_nodes->{$thr}->{_hostgroups} == ($self->hg_writer_id +9000)) 702 | ){ 703 | if($new_nodes->{$thr}->{_hostgroups} == $self->hg_writer_id 704 | && $new_nodes->{$thr}->{_proxy_status} eq "ONLINE" 705 | ){ 706 | $self->{_haswriter} = 1 ; 707 | $self->{_writers} = $self->{_writers} +1; 708 | } 709 | push (@{$self->{_writer_nodes}}, "$new_nodes->{$thr}->{_ip}:$new_nodes->{$thr}->{_port}"); 710 | } 711 | elsif(($new_nodes->{$thr}->{_proxy_status} eq "ONLINE" || $new_nodes->{$thr}->{_proxy_status} eq "OFFLINE_SOFT") 712 | && ($new_nodes->{$thr}->{_hostgroups} == $self->hg_reader_id || $new_nodes->{$thr}->{_hostgroups} == ($self->hg_reader_id +9000)) 713 | ){ 714 | push (@{$self->{_reader_nodes}}, "$new_nodes->{$thr}->{_ip}:$new_nodes->{$thr}->{_port}"); 715 | } 716 | else{ 717 | if($self->debug 718 | && $new_nodes->{$thr}->{_hostgroups} == $self->hg_writer_id){ 719 | print Utils->print_log(3," Not a writer :" .$new_nodes->{$thr}->{_ip} . " HG: $new_nodes->{$thr}->{_hostgroups} \n" ) 720 | } 721 | } 722 | # check if under maintenance 723 | if($new_nodes->{$thr}->{_proxy_status} eq "OFFLINE_SOFT" 724 | && $new_nodes->{$thr}->{_pxc_maint_mode} eq "MAINTENANCE"){ 725 | $self->{_nodes_maint}->{$thr} = $new_nodes->{$thr}; 726 | } 727 | #return $processed_nodes; 728 | 729 | } 730 | 731 | sub get_node_info($$){ 732 | my $self = shift; 733 | my $key = shift; 734 | my $nodes =shift; 735 | my ( $node ) = $self->{_nodes}->{$key}; 736 | if(!defined $node->get_node_info()){ 737 | $node->{_process_status}=-100; 738 | } 739 | 740 | return $node; 741 | 742 | } 743 | 744 | } 745 | 746 | { 747 | package GaleraNode; 748 | #Node Proxy States 749 | sub new { 750 | my $class = shift; 751 | my $SQL_get_variables="SHOW GLOBAL VARIABLES LIKE 'wsrep%"; 752 | my $SQL_get_status="SHOW GLOBAL STATUS LIKE 'wsrep%"; 753 | my $SQL_get_read_only="SHOW GLOBAL VARIABLES LIKE 'read_only'"; 754 | 755 | # Variable section for looping values 756 | #Generalize object for now I have conceptualize as: 757 | # Node (generic container) 758 | # Node->{name} This is the cluster name 759 | # Node->{IP} 760 | # Node->{hostgroups} 761 | # Node->{clustername} This is the cluster name 762 | # Node->{read_only} Read only node 763 | # Node->{wsrep_status} node status (OPEN 0,Primary 1,Joiner 2,Joined 3,Synced 4,Donor 5) 764 | # Node->{wsrep_rejectqueries} (NON, ALL,ALL_KILL) 765 | # Node->{wsrep_donorrejectqueries} If true the node when donor 766 | # Node->{wsrep_connected}=0; if false 1 if true meaning only one ACTIVE node in the cluster 767 | # Node->{wsrep_desinccount}=0; 0 if false 1 if true at least a node is fully active as writer 768 | # Node->{wsrep_ready} ON -OFF 769 | 770 | my $self = { 771 | _name => undef, 772 | _ip => undef, 773 | _port => 3306, 774 | _hostgroups => undef, 775 | _clustername => undef, 776 | _read_only => undef, 777 | _wsrep_status => -1, 778 | _wsrep_rejectqueries => undef, 779 | _wsrep_donorrejectqueries => undef, 780 | _wsrep_connected => undef, 781 | _wsrep_desinccount => undef, 782 | _wsrep_ready => undef, 783 | _wsrep_provider => [], 784 | _wsrep_segment => 1000, 785 | _wsrep_pc_weight => 1, 786 | _SQL_get_variables => $SQL_get_variables, 787 | _SQL_get_status=> $SQL_get_status, 788 | _SQL_get_read_only=> $SQL_get_read_only, 789 | _dns => undef, 790 | _user => undef, 791 | _password => undef, 792 | _debug => 0, 793 | _port => undef, 794 | _proxy_status => undef, 795 | _weight => 1, 796 | _connections => 2000, 797 | _cluster_status => undef, 798 | _cluster_size => 0, 799 | _process_status => -1, 800 | _MOVE_UP_OFFLINE => 1000, #move a node from OFFLINE_SOFT 801 | _MOVE_UP_HG_CHANGE => 1010, #move a node from HG 9000 (plus hg id) to reader HG 802 | _MOVE_DOWN_HG_CHANGE => 3001, #move a node from original HG to maintenance HG (HG 9000 (plus hg id) ) kill all existing connections 803 | _MOVE_DOWN_OFFLINE => 3010 , # move node to OFFLINE_soft keep existing connections, no new connections. 804 | _MOVE_TO_MAINTENANCE => 3020 , # move node to OFFLINE_soft keep existing connections, no new connections because maintenance. 805 | _MOVE_OUT_MAINTENANCE => 3030 , # move node to OFFLINE_soft keep existing connections, no new connections because maintenance. 806 | _INSERT_READ => 4010, # Insert a node in the reader host group 807 | _INSERT_WRITE => 4020, # Insert a node in the writer host group 808 | _DELETE_NODE => 5000, # this remove the node from the hostgroup 809 | _SAVE_RETRY => 9999, # this reset the retry counter in the comment 810 | #_MOVE_SWAP_READER_TO_WRITER => 5001, #Future use 811 | #_MOVE_SWAP_WRITER_TO_READER => 5010, #Future use 812 | _retry_down_saved => 0, # number of retry on a node before declaring it as failed. 813 | _retry_up_saved => 0, # number of retry on a node before declaring it OK. 814 | _comment => undef, 815 | _gtid_port => 0, 816 | _compression => 0, 817 | _use_ssl => 0, 818 | _ssl_certificates_path => undef, 819 | _max_latency => 0, 820 | _max_replication_lag => 0, 821 | _wsrep_gcomm_uuid => undef, 822 | _wsrep_local_index => 0, 823 | _pxc_maint_mode => undef, 824 | 825 | }; 826 | bless $self, $class; 827 | return $self; 828 | 829 | } 830 | 831 | sub ssl_certificates_path{ 832 | my ( $self, $in ) = @_; 833 | $self->{_ssl_certificates_path} = $in if defined($in); 834 | return $self->{_ssl_certificates_path}; 835 | } 836 | 837 | sub max_replication_lag{ 838 | my ( $self, $in ) = @_; 839 | $self->{_max_replication_lag} = $in if defined($in); 840 | return $self->{_max_replication_lag}; 841 | } 842 | 843 | sub max_latency{ 844 | my ( $self, $in ) = @_; 845 | $self->{_max_latency} = $in if defined($in); 846 | return $self->{_max_latency}; 847 | } 848 | 849 | sub use_ssl{ 850 | my ( $self, $in ) = @_; 851 | $self->{_use_ssl} = $in if defined($in); 852 | return $self->{_use_ssl}; 853 | } 854 | 855 | sub compression{ 856 | my ( $self, $in ) = @_; 857 | $self->{_compression} = $in if defined($in); 858 | return $self->{_compression}; 859 | } 860 | 861 | sub gtid_port{ 862 | my ( $self, $in ) = @_; 863 | $self->{_gtid_port} = $in if defined($in); 864 | return $self->{_gtid_port}; 865 | } 866 | 867 | sub pxc_maint_mode{ 868 | my ( $self, $in ) = @_; 869 | $self->{_pxc_maint_mode} = $in if defined($in); 870 | return $self->{_pxc_maint_mode}; 871 | } 872 | sub wsrep_local_index{ 873 | my ( $self, $in ) = @_; 874 | $self->{_wsrep_local_index} = $in if defined($in); 875 | return $self->{_wsrep_local_index}; 876 | } 877 | 878 | sub comment{ 879 | my ( $self, $in ) = @_; 880 | $self->{_comment} = $in if defined($in); 881 | return $self->{_comment}; 882 | } 883 | 884 | sub wsrep_gcomm_uuid{ 885 | my ( $self, $in ) = @_; 886 | $self->{_wsrep_gcomm_uuid} = $in if defined($in); 887 | return $self->{_wsrep_gcomm_uuid}; 888 | } 889 | 890 | sub retry_down_saved{ 891 | my ( $self, $in ) = @_; 892 | $self->{_retry_down_saved} = $in if defined($in); 893 | return $self->{_retry_down_saved}; 894 | } 895 | 896 | sub retry_up_saved{ 897 | my ( $self, $in ) = @_; 898 | $self->{_retry_up_saved} = $in if defined($in); 899 | return $self->{_retry_up_saved}; 900 | } 901 | 902 | sub process_status { 903 | my ( $self, $process_status ) = @_; 904 | $self->{_process_status} = $process_status if defined($process_status); 905 | return $self->{_process_status}; 906 | } 907 | 908 | sub debug{ 909 | my ( $self, $debug ) = @_; 910 | $self->{_debug} = $debug if defined($debug); 911 | return $self->{_debug}; 912 | } 913 | 914 | sub SAVE_RETRY { 915 | my ( $self) = @_; 916 | return $self->{_SAVE_RETRY}; 917 | } 918 | 919 | sub MOVE_UP_OFFLINE { 920 | my ( $self) = @_; 921 | return $self->{_MOVE_UP_OFFLINE}; 922 | } 923 | 924 | sub MOVE_UP_HG_CHANGE { 925 | my ( $self) = @_; 926 | return $self->{_MOVE_UP_HG_CHANGE}; 927 | } 928 | 929 | sub MOVE_DOWN_OFFLINE { 930 | my ( $self) = @_; 931 | return $self->{_MOVE_DOWN_OFFLINE}; 932 | } 933 | 934 | 935 | sub MOVE_TO_MAINTENANCE { 936 | my ( $self) = @_; 937 | return $self->{_MOVE_TO_MAINTENANCE}; 938 | } 939 | 940 | sub MOVE_OUT_MAINTENANCE { 941 | my ( $self) = @_; 942 | return $self->{_MOVE_OUT_MAINTENANCE}; 943 | } 944 | 945 | sub MOVE_DOWN_HG_CHANGE { 946 | my ( $self) = @_; 947 | return $self->{_MOVE_DOWN_HG_CHANGE}; 948 | } 949 | sub DELETE_NODE { 950 | my ( $self) = @_; 951 | return $self->{_DELETE_NODE}; 952 | } 953 | sub INSERT_READ { 954 | my ( $self) = @_; 955 | return $self->{_INSERT_READ}; 956 | } 957 | sub INSERT_WRITE { 958 | my ( $self) = @_; 959 | return $self->{_INSERT_WRITE}; 960 | } 961 | 962 | sub cluster_status { 963 | my ( $self, $status ) = @_; 964 | $self->{_cluster_status} = $status if defined($status); 965 | return $self->{_cluster_status}; 966 | } 967 | 968 | sub cluster_size { 969 | my ( $self, $size ) = @_; 970 | $self->{_cluster_size} = $size if defined($size); 971 | return $size->{_cluster_size}; 972 | } 973 | 974 | sub weight { 975 | my ( $self, $weight ) = @_; 976 | $self->{_weight} = $weight if defined($weight); 977 | return $self->{_weight}; 978 | } 979 | 980 | sub connections { 981 | my ( $self, $connections ) = @_; 982 | $self->{_connections} = $connections if defined($connections); 983 | return $self->{_connections}; 984 | } 985 | 986 | sub proxy_status { 987 | my ( $self, $status ) = @_; 988 | $self->{_proxy_status} = $status if defined($status); 989 | return $self->{_proxy_status}; 990 | } 991 | 992 | sub dns { 993 | my ( $self, $dns ) = @_; 994 | $self->{_dns} = $dns if defined($dns); 995 | return $self->{_dns}; 996 | } 997 | 998 | sub user{ 999 | my ( $self, $user ) = @_; 1000 | $self->{_user} = $user if defined($user); 1001 | return $self->{_user}; 1002 | } 1003 | sub password { 1004 | my ( $self, $password ) = @_; 1005 | $self->{_password} = $password if defined($password); 1006 | return $self->{_password}; 1007 | } 1008 | sub name { 1009 | my ( $self, $name ) = @_; 1010 | $self->{_name} = $name if defined($name); 1011 | return $self->{_name}; 1012 | } 1013 | 1014 | sub ip { 1015 | my ( $self, $ip ) = @_; 1016 | $self->{_ip} = $ip if defined($ip); 1017 | return $self->{_ip}; 1018 | } 1019 | sub port { 1020 | my ( $self, $port ) = @_; 1021 | $self->{_port} = $port if defined($port); 1022 | return $self->{_port}; 1023 | } 1024 | 1025 | 1026 | sub hostgroups { 1027 | my ( $self, $hostgroups ) = @_; 1028 | $self->{_hostgroups} = $hostgroups if defined($hostgroups); 1029 | return $self->{_hostgroups}; 1030 | } 1031 | 1032 | sub clustername { 1033 | my ( $self, $clustername ) = @_; 1034 | $self->{_clustername} = $clustername if defined($clustername); 1035 | return $self->{_clustername}; 1036 | } 1037 | 1038 | sub read_only { 1039 | my ( $self, $read_only ) = @_; 1040 | $self->{_read_only} = $read_only if defined($read_only); 1041 | return $self->{_read_only}; 1042 | } 1043 | 1044 | sub wsrep_status { 1045 | my ( $self, $wsrep_status ) = @_; 1046 | $self->{_wsrep_status} = $wsrep_status if defined($wsrep_status); 1047 | return $self->{_wsrep_status}; 1048 | } 1049 | 1050 | sub wsrep_rejectqueries { 1051 | my ( $self, $wsrep_rejectqueries ) = @_; 1052 | $self->{_wsrep_rejectqueries} = $wsrep_rejectqueries if defined($wsrep_rejectqueries); 1053 | return $self->{_wsrep_rejectqueries}; 1054 | } 1055 | 1056 | sub wsrep_donorrejectqueries { 1057 | my ( $self, $wsrep_donorrejectqueries ) = @_; 1058 | $self->{_wsrep_donorrejectqueries} = $wsrep_donorrejectqueries if defined($wsrep_donorrejectqueries); 1059 | return $self->{_wsrep_donorrejectqueries}; 1060 | } 1061 | 1062 | sub wsrep_connected { 1063 | my ( $self, $wsrep_connected ) = @_; 1064 | $self->{_wsrep_connected} = $wsrep_connected if defined($wsrep_connected); 1065 | return $self->{_wsrep_connected}; 1066 | } 1067 | 1068 | sub wsrep_desinccount { 1069 | my ( $self, $wsrep_desinccount ) = @_; 1070 | $self->{_wsrep_desinccount} = $wsrep_desinccount if defined($wsrep_desinccount); 1071 | return $self->{_wsrep_desinccount}; 1072 | } 1073 | 1074 | 1075 | sub wsrep_ready { 1076 | my ( $self, $wsrep_ready ) = @_; 1077 | $self->{_wsrep_ready} = $wsrep_ready if defined($wsrep_ready); 1078 | return $self->{_wsrep_ready}; 1079 | } 1080 | 1081 | sub wsrep_segment { 1082 | my ( $self, $wsrep_segment ) = @_; 1083 | $self->{_wsrep_segment} = $wsrep_segment if defined($wsrep_segment); 1084 | return $self->{_wsrep_segment}; 1085 | } 1086 | 1087 | sub wsrep_pc_weight { 1088 | my ( $self, $wsrep_pc_weight ) = @_; 1089 | $self->{_wsrep_pc_weight} = $wsrep_pc_weight if defined($wsrep_pc_weight); 1090 | return $self->{_wsrep_pc_weight}; 1091 | } 1092 | 1093 | sub wsrep_provider { 1094 | my ( $self, $wsrep_provider ) = @_; 1095 | my ( @array)= @{$wsrep_provider} ; 1096 | my %provider_map ; 1097 | foreach my $item (@array){ 1098 | my @items = split('\=', $item); 1099 | $provider_map{Utils::trim($items[0])}=$items[1]; 1100 | } 1101 | ($self->{_wsrep_provider}) = {%provider_map} ; 1102 | $self->wsrep_segment($provider_map{"gmcast.segment"}); 1103 | $self->wsrep_pc_weight($provider_map{"pc.weight"}); 1104 | return $self->{_wsrep_provider}; 1105 | } 1106 | 1107 | sub get_node_info($$){ 1108 | my ( $self ) = @_; 1109 | 1110 | if($self->debug >=1){ 1111 | print Utils->print_log(4," Node check START " 1112 | .$self->{_ip} 1113 | .":".$self->{_port} 1114 | .":HG".$self->{_hostgroups} 1115 | ."\n" ); 1116 | } 1117 | if($self->debug >=1){ 1118 | print Utils->print_log(4," Getting connection START " 1119 | .$self->{_ip} 1120 | .":".$self->{_port} 1121 | .":HG".$self->{_hostgroups}." \n" ); 1122 | } 1123 | my $dbh = Utils::get_connection($self->{_dns},$self->{_user},$self->{_password},' '); 1124 | if(!defined $dbh){ 1125 | print Utils->print_log(1," Node is not responding setting it as SHUNNED (internally) (ProxySQL bug - #2658)" 1126 | .$self->{_ip} 1127 | .":".$self->{_port} 1128 | .":HG".$self->{_hostgroups}." \n" ); 1129 | $self->{_proxy_status} = "SHUNNED"; 1130 | return $self ; 1131 | 1132 | } 1133 | if($self->debug >=1){ 1134 | print Utils->print_log(4," Getting connection END " 1135 | .$self->{_ip} 1136 | .":".$self->{_port} 1137 | .":HG".$self->{_hostgroups}." \n" ); 1138 | 1139 | } 1140 | 1141 | if($self->debug >=1){ 1142 | print Utils->print_log(4," Getting NODE info START " 1143 | .$self->{_ip} 1144 | .":".$self->{_port} 1145 | .":HG".$self->{_hostgroups}." \n" ); 1146 | } 1147 | 1148 | my $variables = Utils::get_variables($dbh,0); 1149 | my $status = Utils::get_status_by_name($dbh,0,"wsrep_%"); 1150 | my $pxc_view = Utils::get_pxc_clusterview($dbh, $status->{wsrep_gcomm_uuid} ); 1151 | 1152 | if($self->debug >=1){ 1153 | print Utils->print_log(4," Getting NODE info END " 1154 | .$self->{_ip} 1155 | .":".$self->{_port} 1156 | .":HG".$self->{_hostgroups}." \n" ); 1157 | } 1158 | 1159 | $self->{_name} = $variables->{wsrep_node_name}; 1160 | $self->{_clustername} = $variables->{wsrep_cluster_name}; 1161 | $self->{_read_only} = $variables->{read_only}; 1162 | $self->{_wsrep_rejectqueries} = $variables->{wsrep_reject_queries}; 1163 | #print "AAAAAAAAAAAAAAAAAAAAA $self->{_ip} $self->{_wsrep_rejectqueries} \n"; 1164 | $self->{_wsrep_donorrejectqueries} = $variables->{wsrep_sst_donor_rejects_queries}; 1165 | my ( @provider ) = split('\;', $variables->{wsrep_provider_options}); 1166 | $self->{_pxc_maint_mode} = $variables->{pxc_maint_mode}; 1167 | 1168 | $self->wsrep_provider( [ @provider]) ; 1169 | $self->{_wsrep_status} = $status->{wsrep_local_state}; 1170 | $self->{_wsrep_connected} = $status->{wsrep_connected}; 1171 | $self->{_wsrep_desinccount} = $status->{wsrep_desync_count}; 1172 | $self->{_wsrep_ready} = $status->{wsrep_ready}; 1173 | $self->{_cluster_status} = $status->{wsrep_cluster_status}; 1174 | $self->{_cluster_size} = $status->{wsrep_cluster_size}; 1175 | $self->{_wsrep_gcomm_uuid} = $status->{wsrep_gcomm_uuid}; 1176 | $self->{wsrep_segment} = ($self->{_wsrep_provider}->{"gmcast.segment"} ); 1177 | $self->{wsrep_segment} =~ s/^\s+|\s+$//g; 1178 | $self->{_wsrep_local_index} = $pxc_view->{local_index}; 1179 | if($self->{wsrep_segment} == 0){ 1180 | $self->{_wsrep_segment} = $pxc_view->{segment}; 1181 | } 1182 | 1183 | $dbh->disconnect if (defined $dbh); 1184 | #sleep 5; 1185 | 1186 | $self->{_process_status} = 1; 1187 | if($self->debug>=1){ 1188 | print Utils->print_log(4," Node check END " 1189 | .$self->{_ip} 1190 | .":".$self->{_port} 1191 | .":HG".$self->{_hostgroups} 1192 | ."\n" );} 1193 | 1194 | return $self; 1195 | 1196 | } 1197 | 1198 | sub set_retry_up_down(){ 1199 | my ( $self, $hg ) = @_; 1200 | if($self->debug >=1){print Utils->print_log(4,"Calculate retry from comment Node:".$self->ip." port:".$self->port . " hg:".$self->hostgroups ." Time IN \n");} 1201 | 1202 | my %comments = split /[;=]/, $self->{_comment}; 1203 | if(exists $comments{$hg."_retry_up"}){ 1204 | $self->{_retry_up_saved} = $comments{$hg."_retry_up"}; 1205 | } 1206 | else{ 1207 | $self->{_retry_up_saved} = 0; 1208 | } 1209 | if(exists $comments{$hg."_retry_down"}){ 1210 | $self->{_retry_down_saved} = $comments{$hg."_retry_down"}; 1211 | } 1212 | else{ 1213 | $self->{_retry_down_saved} = 0; 1214 | } 1215 | my $removeUp=$hg."_retry_up=".$self->{_retry_up_saved}.";"; 1216 | my $removeDown=$hg."_retry_down=".$self->{_retry_down_saved}.";"; 1217 | $self->{_comment} =~ s/$removeDown//ig ; 1218 | $self->{_comment} =~ s/$removeUp//ig ; 1219 | 1220 | if($self->debug >=1){print Utils->print_log(4,"Calculate retry from comment Node:".$self->ip." port:".$self->port . " hg:".$self->hostgroups ." Time OUT \n");} 1221 | } 1222 | 1223 | sub get_retry_up(){ 1224 | my ( $self,$in) = @_; 1225 | $self->{_retry_up_saved} = $in if defined($in); 1226 | return $self->{_retry_up_saved}; 1227 | } 1228 | 1229 | sub get_retry_down(){ 1230 | my ( $self,$in) = @_; 1231 | $self->{_retry_down_saved} = $in if defined($in); 1232 | return $self->{_retry_down_saved}; 1233 | } 1234 | 1235 | sub promote_writer(){ 1236 | my ( $self,$proxynode,$Galera_cluster,$exclude_delete ) = @_; 1237 | if($self->{_hostgroups} > 8000){ 1238 | print Utils->print_log(3,"Special Backup - Group found! I am electing a node to writer following the indications\n This Node Try to become the new" 1239 | ." WRITER for HG $proxynode->{_hg_writer_id} Server details: " 1240 | .$self->{_ip} 1241 | .":".$self->{_port} 1242 | .":HG".$self->{_hostgroups} 1243 | ."\n" ); 1244 | 1245 | } 1246 | print Utils->print_log(3,"This Node Try to become a WRITER promoting to HG $proxynode->{_hg_writer_id} " 1247 | .$self->{_ip} 1248 | .":".$self->{_port} 1249 | .":HG ".$self->{_hostgroups} 1250 | ."\n" ); 1251 | 1252 | #my $dbh = Utils::get_connection($self->{_dns},$self->{_user},$self->{_password},' '); 1253 | #if(!defined $dbh){ 1254 | # return undef; 1255 | #} 1256 | #(9000 + $proxynode->{_hg_writer_id}) 1257 | 1258 | my $proxy_sql_command= "INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections,use_ssl,compression,max_latency_ms) VALUES ('$self->{_ip}',$proxynode->{_hg_writer_id},$self->{_port},$self->{_weight},$self->{_connections},$self->{_use_ssl},$self->{_compression},$self->{_max_latency});"; 1259 | if($Galera_cluster->{_singlewriter} > 0){ 1260 | my $delete = "DELETE from mysql_servers where hostgroup_id in ($proxynode->{_hg_writer_id},".(9000 + $proxynode->{_hg_writer_id}).") AND STATUS = 'ONLINE'".$exclude_delete; 1261 | print Utils->print_log(2," DELETE from writer group as: " 1262 | ." SQL:" .$delete 1263 | ."\n" ); 1264 | 1265 | $proxynode->{_dbh_proxy}->do($delete) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 1266 | } 1267 | #if the writer is NOT a reader by default remove it from reader groups also the 1268 | if($Galera_cluster->{_writer_is_reader} < 1 ){ 1269 | my $delete = "DELETE from mysql_servers where hostgroup_id in ($proxynode->{_hg_reader_id},".(9000 + $proxynode->{_hg_reader_id}).") and hostname = '$self->{_ip}' and port=$self->{_port} "; 1270 | $proxynode->{_dbh_proxy}->do($delete) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 1271 | 1272 | } 1273 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 1274 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 1275 | $proxynode->{_dbh_proxy}->do("SAVE MYSQL SERVERS TO DISK") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 1276 | print Utils->print_log(2," Move node:" 1277 | .$self->{_ip}.":" 1278 | .$self->{_port} 1279 | .$self->{_weight} 1280 | .$self->{_connections} 1281 | .$proxynode->{hg_writer_id} 1282 | ." SQL:" .$proxy_sql_command 1283 | ."\n" ); 1284 | 1285 | 1286 | 1287 | return 1; 1288 | } 1289 | 1290 | sub move_to_writer_hg(){ 1291 | my ( $self ) = @_; 1292 | 1293 | print Utils->print_log(3,"This Node Try to become a WRITER set READ_ONLY to 0 " 1294 | .$self->{_ip} 1295 | .":".$self->{_port} 1296 | .":HG".$self->{_hostgroups} 1297 | ."\n" ); 1298 | 1299 | my $dbh = Utils::get_connection($self->{_dns},$self->{_user},$self->{_password},' '); 1300 | if(!defined $dbh){ 1301 | return undef; 1302 | } 1303 | if($dbh->do("SET GLOBAL READ_ONLY=0")){ 1304 | print Utils->print_log(3,"This Node NOW HAS READ_ONLY = 0 " 1305 | .$self->{_ip} 1306 | .":".$self->{_port} 1307 | .":HG".$self->{_hostgroups} 1308 | ."\n" ); 1309 | } 1310 | else{ 1311 | die "Couldn't execute statement: " . $dbh->errstr; 1312 | } 1313 | $dbh->disconnect if (defined $dbh); 1314 | #or die "Couldn't execute statement: " . $dbh->errstr; 1315 | return 1; 1316 | } 1317 | } 1318 | 1319 | { 1320 | package ProxySqlNode; 1321 | sub new { 1322 | my $class = shift; 1323 | 1324 | my $SQL_get_monitor = "select variable_name name,variable_value value from global_variables where variable_name in( 'mysql-monitor_username','mysql-monitor_password','mysql-monitor_read_only_timeout' ) order by 1"; 1325 | my $SQL_get_hostgroups = "select distinct hostgroup_id hg_isd from runtime_mysql_servers order by 1;"; 1326 | my $SQL_get_rep_hg = "select writer_hostgroup,reader_hostgroup from mysql_replication_hostgroups order by 1;"; 1327 | my $SQL_get_pxc_cluster_view = "select * from performance_schema.pxc_cluster_view order by SEGMENT, LOCAL_INDEX;"; 1328 | 1329 | # Variable section for looping values 1330 | #Generalize object for now I have conceptualize as: 1331 | # Proxy (generic container) 1332 | # Proxy->{DNS} conenction reference 1333 | # Proxy->{PID} processes pid (angel and real) 1334 | # Proxy->{hostgroups} 1335 | # Proxy->{user} This is the user name 1336 | # Proxy->{password} 1337 | # Proxy->{port} node status (OPEN 0,Primary 1,Joiner 2,Joined 3,Synced 4,Donor 5) 1338 | 1339 | my $self = { 1340 | _dns => undef, 1341 | _pid => undef, 1342 | _hostgroups => undef, 1343 | _hg_writer_id => 0, 1344 | _hg_reader_id => 0, 1345 | _user => undef, 1346 | _password => undef, 1347 | _port => undef, 1348 | _monitor_user => undef, 1349 | _monitor_password => undef, 1350 | _SQL_get_monitor => $SQL_get_monitor, 1351 | _SQL_get_hg=> $SQL_get_hostgroups, 1352 | _SQL_get_replication_hg=> $SQL_get_rep_hg, 1353 | _dbh_proxy => undef, 1354 | _check_timeout => 800, #timeout in ms 1355 | _action_nodes => {}, 1356 | _retry_down => 0, # number of retry on a node before declaring it as failed. 1357 | _retry_up => 0, # number of retry on a node before declaring it OK. 1358 | _status_changed => 0, #if 1 something had happen and a node had be modify 1359 | _require_failover => 0, # Valid values are: 1360 | # 0 [default] do not make failover 1361 | # 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers 1362 | # 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 1363 | # 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) 1364 | 1365 | 1366 | }; 1367 | bless $self, $class; 1368 | return $self; 1369 | 1370 | } 1371 | sub require_failover{ 1372 | my ( $self, $in ) = @_; 1373 | $self->{_require_failover} = $in if defined($in); 1374 | return $self->{_require_failover}; 1375 | } 1376 | 1377 | sub hg_reader_id{ 1378 | my ( $self, $in ) = @_; 1379 | $self->{_hg_reader_id} = $in if defined($in); 1380 | return $self->{_hg_reader_id}; 1381 | } 1382 | 1383 | sub status_changed{ 1384 | my ( $self, $in ) = @_; 1385 | $self->{_status_changed} = $in if defined($in); 1386 | return $self->{_status_changed}; 1387 | } 1388 | 1389 | sub retry_down{ 1390 | my ( $self, $in ) = @_; 1391 | $self->{_retry_down} = $in if defined($in); 1392 | return $self->{_retry_down}; 1393 | } 1394 | 1395 | sub retry_up{ 1396 | my ( $self, $in ) = @_; 1397 | $self->{_retry_up} = $in if defined($in); 1398 | return $self->{_retry_up}; 1399 | } 1400 | 1401 | 1402 | sub debug{ 1403 | my ( $self, $debug ) = @_; 1404 | $self->{_debug} = $debug if defined($debug); 1405 | return $self->{_debug}; 1406 | } 1407 | 1408 | sub action_nodes { 1409 | my ( $self, $action_nodes ) = @_; 1410 | $self->{_action_nodes} = $action_nodes if defined($action_nodes); 1411 | return $self->{_action_nodes}; 1412 | } 1413 | 1414 | sub dns { 1415 | my ( $self, $dns ) = @_; 1416 | $self->{_dns} = $dns if defined($dns); 1417 | return $self->{_dns}; 1418 | } 1419 | 1420 | sub dbh_proxy{ 1421 | my ( $self, $dbh_proxy ) = @_; 1422 | $self->{_dbh_proxy} = $dbh_proxy if defined($dbh_proxy); 1423 | return $self->{_dbh_proxy}; 1424 | } 1425 | 1426 | sub pid { 1427 | my ( $self, $pid ) = @_; 1428 | $self->{_pid} = $pid if defined($pid); 1429 | return $self->{_pid}; 1430 | } 1431 | 1432 | sub hg_writer_id { 1433 | my ( $self, $pid ) = @_; 1434 | $self->{_hg_writer_id} = $pid if defined($pid); 1435 | return $self->{_hg_writer_id}; 1436 | } 1437 | 1438 | sub hostgroups { 1439 | my ( $self, $hostgroups ) = @_; 1440 | if (defined $hostgroups){ 1441 | my @HGIds=split('\,', $Param->{hostgroups}); 1442 | 1443 | foreach my $hg (@HGIds){ 1444 | my $proxy_hg = ProxySqlHG->new(); 1445 | my $proxy_hgM = ProxySqlHG->new(); 1446 | my $proxy_hgB = ProxySqlHG->new(); 1447 | my ($id,$type) = split /:/, $hg; 1448 | $proxy_hg->id($id); 1449 | $proxy_hg->type(lc($type)); 1450 | if(lc($type) eq 'w'){ 1451 | $self->hg_writer_id($id); 1452 | } 1453 | if(lc($type) eq 'r'){ 1454 | $self->hg_reader_id($id); 1455 | } 1456 | 1457 | $self->{_hostgroups}->{$id}=($proxy_hg); 1458 | $proxy_hgM->id(($id + 9000)); 1459 | $proxy_hgM->type("m".lc($type)); 1460 | $self->{_hostgroups}->{$proxy_hgM->id(($id + 9000))}=($proxy_hgM); 1461 | #add a special group in case of back server for failover 1462 | if(lc($type) eq "w"){ 1463 | $proxy_hgM->id(($id + 8000)); 1464 | $proxy_hgM->type("b".lc($type)); 1465 | $self->{_hostgroups}->{$proxy_hgM->id(($id + 8000))}=($proxy_hgM); 1466 | } 1467 | if(lc($type) eq "r"){ 1468 | $proxy_hgM->id(($id + 8000)); 1469 | $proxy_hgM->type("b".lc($type)); 1470 | $self->{_hostgroups}->{$proxy_hgM->id(($id + 8000))}=($proxy_hgM); 1471 | } 1472 | 1473 | if($self->debug >=1){print Utils->print_log(3," Inizializing hostgroup " . $proxy_hg->id ." ".$proxy_hg->type . "with maintenance HG ". $proxy_hgM->id ." ".$proxy_hgM->type."\n") ; } 1474 | } 1475 | } 1476 | return $self->{_hostgroups}; 1477 | } 1478 | 1479 | sub user{ 1480 | my ( $self, $user ) = @_; 1481 | $self->{_user} = $user if defined($user); 1482 | return $self->{_user}; 1483 | } 1484 | sub password { 1485 | my ( $self, $password ) = @_; 1486 | $self->{_password} = $password if defined($password); 1487 | return $self->{_password}; 1488 | } 1489 | 1490 | sub monitor_user{ 1491 | my ( $self, $monitor_user ) = @_; 1492 | $self->{_monitor_user} = $monitor_user if defined($monitor_user); 1493 | return $self->{_monitor_user}; 1494 | } 1495 | sub monitor_password { 1496 | my ( $self, $monitor_password ) = @_; 1497 | $self->{_monitor_password} = $monitor_password if defined($monitor_password); 1498 | return $self->{_monitor_password}; 1499 | } 1500 | 1501 | sub port { 1502 | my ( $self, $port ) = @_; 1503 | $self->{_port} = $port if defined($port); 1504 | return $self->{_port}; 1505 | } 1506 | 1507 | sub check_timeout{ 1508 | my ( $self, $check_timeout ) = @_; 1509 | $self->{_check_timeout} = $check_timeout if defined($check_timeout); 1510 | return $self->{_check_timeout}; 1511 | } 1512 | 1513 | #Connect method connect an populate the cluster returns the Galera cluster 1514 | sub connect{ 1515 | my ( $self, $port ) = @_; 1516 | my $dbh = Utils::get_connection($self->{_dns}, $self->{_user}, $self->{_password},' '); 1517 | $self->{_dbh_proxy} = $dbh; 1518 | 1519 | # get monitor user/pw 1520 | my $cmd = $self->{_SQL_get_monitor}; 1521 | 1522 | 1523 | my $sth = $dbh->prepare($cmd); 1524 | $sth->execute(); 1525 | while (my $ref = $sth->fetchrow_hashref()) { 1526 | if($ref->{'name'} eq 'mysql-monitor_password' ){$self->{_monitor_password} = $ref->{'value'};} 1527 | if($ref->{'name'} eq 'mysql-monitor_username' ) {$self->{_monitor_user} = $ref->{'value'};} 1528 | #This is for now comment out. 1529 | # this is related to issue #10, where the node is not answering in time to the check. 1530 | # The timeout cannot be the same of the the ProxySQL read_only check 1531 | #if($ref->{'name'} eq 'mysql-monitor_read_only_timeout' ) {$self->{_check_timeout} = $ref->{'value'};} 1532 | 1533 | } 1534 | if($self->debug >=1){print Utils->print_log(3," Connecting to ProxySQL " . $self->{_dns}. "\n" ); } 1535 | 1536 | } 1537 | 1538 | sub disconnect{ 1539 | my ( $self, $port ) = @_; 1540 | $self->{_dbh_proxy}->disconnect; 1541 | 1542 | 1543 | } 1544 | sub get_galera_cluster{ 1545 | my ( $self, $in ) = @_; 1546 | $self->{_galera_cluster} = $in if defined($in); 1547 | return $self->{_galera_cluster}; 1548 | } 1549 | 1550 | sub set_galera_cluster(){ 1551 | my ( $self, $port ) = @_; 1552 | my $galera_cluster = Galeracluster->new(); 1553 | 1554 | $galera_cluster->hostgroups($self->hostgroups); 1555 | $galera_cluster->dbh_proxy($self->dbh_proxy); 1556 | $galera_cluster->check_timeout($self->check_timeout); 1557 | $galera_cluster->monitor_user($self->monitor_user); 1558 | $galera_cluster->monitor_password($self->monitor_password); 1559 | $galera_cluster->debug($self->debug); 1560 | $galera_cluster->hg_writer_id($self->hg_writer_id); 1561 | $galera_cluster->hg_reader_id($self->hg_reader_id); 1562 | $galera_cluster->singlewriter($Param->{single_writer}); 1563 | $galera_cluster->writer_is_reader($Param->{writer_is_reader}); 1564 | $galera_cluster->ssl_certificates_path($Param->{ssl_certs_path}); 1565 | 1566 | $self->get_galera_cluster($galera_cluster); 1567 | if($self->debug >=1){print Utils->print_log(3," Galera cluster object created " . caller(3). "\n" ); } 1568 | } 1569 | 1570 | sub evaluate_nodes{ 1571 | my ($proxynode,$GGalera_cluster) = @_ ; 1572 | my ( $nodes ) = $GGalera_cluster->{_nodes}; 1573 | my $action_nodes = undef; 1574 | 1575 | 1576 | #Rules: 1577 | #see rules in the doc 1578 | 1579 | #do the checks 1580 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state \n" ) } 1581 | foreach my $key (sort keys %{$nodes}){ 1582 | if(defined $nodes->{$key} ){ 1583 | 1584 | #only if node has HG that is not maintenance it can evaluate to be put down in some way 1585 | if($nodes->{$key}->{_hostgroups} < 8000 1586 | && $nodes->{$key}->{_process_status} > 0){ 1587 | #Check major exclusions 1588 | # 1) wsrep state 1589 | # 2) Node is not read only 1590 | # 3) at least another node in the HG 1591 | 1592 | if( $nodes->{$key}->wsrep_status == 2 1593 | && $nodes->{$key}->read_only eq "OFF" 1594 | #&& $GGalera_cluster->{_main_segment} != $nodes->{$key}->wsrep_segment 1595 | && $nodes->{$key}->proxy_status ne "OFFLINE_SOFT" 1596 | ){ 1597 | if($GGalera_cluster->{_hostgroups}->{$nodes->{$key}->{_hostgroups}}->{_size} <= 1){ 1598 | print Utils->print_log(3," Node ".$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups."; Is in state ".$nodes->{$key}->wsrep_status 1599 | .". But I will not move to OFFLINE_SOFT given last node left in the Host group \n"); 1600 | next; 1601 | } 1602 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_OFFLINE}}= $nodes->{$key}; 1603 | #if retry is > 0 then it's managed 1604 | if($proxynode->retry_down > 0){ 1605 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1606 | } 1607 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state " 1608 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_OFFLINE} 1609 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1610 | next; 1611 | } 1612 | 1613 | if( $nodes->{$key}->wsrep_status ne 4 1614 | && $nodes->{$key}->wsrep_status ne 2){ 1615 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE}}= $nodes->{$key}; 1616 | 1617 | #if retry is > 0 then it's managed 1618 | if($proxynode->retry_down > 0){ 1619 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1620 | } 1621 | 1622 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state " 1623 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE} 1624 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1625 | 1626 | next; 1627 | } 1628 | 1629 | #3) Node/cluster in non primary 1630 | if($nodes->{$key}->cluster_status ne "Primary"){ 1631 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE}}= $nodes->{$key}; 1632 | 1633 | #if retry is > 0 then it's managed 1634 | if($proxynode->retry_down > 0){ 1635 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1636 | } 1637 | 1638 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state " 1639 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE} 1640 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1641 | next; 1642 | } 1643 | 1644 | # 4) wsrep_reject_queries=NONE 1645 | if($nodes->{$key}->wsrep_rejectqueries ne "NONE" && $nodes->{$key}->proxy_status ne "OFFLINE_SOFT"){ 1646 | my $inc =0; 1647 | if($nodes->{$key}->wsrep_rejectqueries eq "ALL"){ 1648 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE}}= $nodes->{$key}; 1649 | $inc=1; 1650 | }else{ 1651 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE}}= $nodes->{$key}; 1652 | $inc=1; 1653 | } 1654 | #if retry is > 0 then it's managed 1655 | if($proxynode->retry_down > 0 && $inc > 0){ 1656 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1657 | } 1658 | 1659 | if($proxynode->debug >=1){ 1660 | print Utils->print_log(3," Evaluate nodes state " 1661 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE} 1662 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1663 | next; 1664 | } 1665 | 1666 | #5) Donor, node reject queries =1 size of cluster > 2 of nodes in the same segments 1667 | if($nodes->{$key}->wsrep_status eq 2 1668 | && $nodes->{$key}->wsrep_donorrejectqueries eq "ON"){ 1669 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE}}= $nodes->{$key}; 1670 | #if retry is > 0 then it's managed 1671 | if($proxynode->retry_down > 0){ 1672 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1673 | } 1674 | 1675 | if($proxynode->debug >=1){ 1676 | print Utils->print_log(3," Evaluate nodes state " 1677 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_HG_CHANGE} 1678 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1679 | 1680 | next; 1681 | } 1682 | 1683 | #Set OFFLINE_SOFT a writer: 1684 | #1) donor node reject queries - 0 1685 | #2)size of cluster > 2 of nodes in the same segments 1686 | #3) more then one writer in the same HG 1687 | #4) Node had pxc_maint_mode set to anything except DISABLED, not matter what it will go in OFFLINE_SOFT 1688 | 1689 | if( 1690 | $nodes->{$key}->read_only eq "ON" 1691 | && $nodes->{$key}->{_hostgroups} == $GGalera_cluster->{_hg_writer_id} 1692 | && $nodes->{$key}->wsrep_donorrejectqueries eq "OFF" 1693 | && $nodes->{$key}->proxy_status ne "OFFLINE_SOFT" 1694 | ){ 1695 | ## In case READ_ONLY is OFF and we have only a node left but desync do not put it down 1696 | #if( $GGalera_cluster->{_size}->{$nodes->{$key}->{_wsrep_segment}} == 1 1697 | # &&$nodes->{$key}->read_only eq "OFF"){ 1698 | # next; 1699 | #} 1700 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_OFFLINE}}= $nodes->{$key}; 1701 | #if retry is > 0 then it's managed 1702 | if($proxynode->retry_down > 0){ 1703 | $nodes->{$key}->get_retry_down($nodes->{$key}->get_retry_down + 1); 1704 | } 1705 | 1706 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state " 1707 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_DOWN_OFFLINE} 1708 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1709 | 1710 | next; 1711 | } 1712 | 1713 | #4) Node had pxc_maint_mode set to anything except DISABLED, not matter what it will go in OFFLINE_SOFT 1714 | if( defined $nodes->{$key}->pxc_maint_mode 1715 | && $nodes->{$key}->pxc_maint_mode ne "DISABLED" 1716 | && $nodes->{$key}->proxy_status ne "OFFLINE_SOFT"){ 1717 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_TO_MAINTENANCE}}= $nodes->{$key}; 1718 | 1719 | #if retry is > 0 then it's managed 1720 | if($proxynode->retry_down > 0){ 1721 | $nodes->{$key}->get_retry_down($proxynode->retry_down ); # this is a known state and we do not want any delay set the retry to his max 1722 | } 1723 | 1724 | if( 1725 | $nodes->{$key}->{_hostgroups} == $GGalera_cluster->{_hg_writer_id} 1726 | && $GGalera_cluster->{_singlewriter} > 0 1727 | ){ 1728 | $GGalera_cluster->{_haswriter} = $GGalera_cluster->{_haswriter} -1; 1729 | } 1730 | 1731 | 1732 | if($proxynode->debug >=1){print Utils->print_log(3," Evaluate nodes state " 1733 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_TO_MAINTENANCE} 1734 | ." Retry #".$nodes->{$key}->get_retry_down."\n" ) } 1735 | 1736 | next; 1737 | } 1738 | 1739 | # Node must be removed if writer is reader is disable and node is in writer group 1740 | if($nodes->{$key}->wsrep_status eq 4 1741 | && $nodes->{$key}->wsrep_rejectqueries eq "NONE" 1742 | && $nodes->{$key}->read_only eq "OFF" 1743 | && $nodes->{$key}->cluster_status eq "Primary" 1744 | && $nodes->{$key}->hostgroups == $proxynode->{_hg_reader_id} 1745 | && $GGalera_cluster->{_writer_is_reader} < 1 1746 | && $nodes->{$key}->proxy_status eq "ONLINE" 1747 | ){ 1748 | #my $nodes_read_ips = join(',', @{$GGalera_cluster->{_reader_nodes}}); 1749 | my $nodes_write_ips = join(',', @{$GGalera_cluster->{_writer_nodes}}); 1750 | 1751 | my $ip = "$nodes->{$key}->{_ip}:$nodes->{$key}->{_port}"; 1752 | 1753 | if($nodes_write_ips =~ m/$ip/ ){ 1754 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_DELETE_NODE}}= $nodes->{$key}; 1755 | #if retry is > 0 then it's managed 1756 | if($proxynode->retry_up > 0){ 1757 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1758 | } 1759 | print Utils->print_log(3," Writer is also reader disabled removing node from reader Hostgroup " 1760 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_DELETE_NODE} 1761 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ); 1762 | 1763 | next; 1764 | } 1765 | 1766 | } 1767 | 1768 | 1769 | } 1770 | #Node comes back from offline_soft when (all of them): 1771 | # 1) Node state is 4 1772 | # 3) wsrep_reject_queries = none 1773 | # 4) Primary state 1774 | # 5) pxc_maint_mode is DISABLED or undef 1775 | 1776 | if($nodes->{$key}->wsrep_status eq 4 1777 | && $nodes->{$key}->proxy_status eq "OFFLINE_SOFT" 1778 | && $nodes->{$key}->wsrep_rejectqueries eq "NONE" 1779 | && $nodes->{$key}->read_only eq "OFF" 1780 | &&$nodes->{$key}->cluster_status eq "Primary" 1781 | &&(!defined $nodes->{$key}->pxc_maint_mode || $nodes->{$key}->pxc_maint_mode eq "DISABLED") 1782 | && $nodes->{$key}->hostgroups < 8000 1783 | ){ 1784 | if($GGalera_cluster->haswriter > 0 1785 | && $GGalera_cluster->singlewriter > 0 1786 | && $nodes->{$key}->hostgroups == $GGalera_cluster->hg_writer_id 1787 | ){ 1788 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_DELETE_NODE}}= $nodes->{$key}; 1789 | #if retry is > 0 then it's managed 1790 | if($proxynode->retry_up > 0){ 1791 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1792 | } 1793 | if($proxynode->debug <=1){ 1794 | print Utils->print_log(3, " Evaluate nodes state " 1795 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_DELETE_NODE} 1796 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) } 1797 | next; 1798 | } 1799 | else{ 1800 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_OFFLINE}}= $nodes->{$key}; 1801 | #if retry is > 0 then it's managed 1802 | if($proxynode->retry_up > 0){ 1803 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1804 | } 1805 | if($proxynode->debug <=1){ 1806 | print Utils->print_log(3, " Evaluate nodes state " 1807 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_OFFLINE} 1808 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) } 1809 | next; 1810 | } 1811 | } 1812 | 1813 | # Node comes back from maintenance HG when (all of them): 1814 | # 1) node state is 4 1815 | # 3) wsrep_reject_queries = none 1816 | # 4) Primary state 1817 | if($nodes->{$key}->wsrep_status eq 4 1818 | && $nodes->{$key}->wsrep_rejectqueries eq "NONE" 1819 | && $nodes->{$key}->cluster_status eq "Primary" 1820 | && $nodes->{$key}->hostgroups >= 9000 1821 | ){ 1822 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_HG_CHANGE}}= $nodes->{$key}; 1823 | #if retry is > 0 then it's managed 1824 | if($proxynode->retry_up > 0){ 1825 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1826 | } 1827 | if($proxynode->debug >=1){ 1828 | print Utils->print_log(3," Evaluate nodes state " 1829 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_HG_CHANGE} 1830 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) } 1831 | next; 1832 | } 1833 | 1834 | #Special case when a node goes down it goes through several state and the check disable it moving form original group 1835 | #This is to remove it to his original HG when is not reachable 1836 | if($nodes->{$key}->{_process_status} < 0 1837 | && $nodes->{$key}->hostgroups >= 9000 1838 | ){ 1839 | #if retry is > 0 then it's managed 1840 | if($proxynode->retry_up > 0){ 1841 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1842 | } 1843 | 1844 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_HG_CHANGE}}= $nodes->{$key}; 1845 | if($proxynode->debug >=1){ 1846 | print Utils->print_log(3," Evaluate nodes state " 1847 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_MOVE_UP_HG_CHANGE} 1848 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) } 1849 | next; 1850 | } 1851 | 1852 | #Check if any node that is in the read backup host group is not present in the readhostgroup while it should. 1853 | #If identify it will add to the read HG 1854 | 1855 | if($nodes->{$key}->wsrep_status eq 4 1856 | && $nodes->{$key}->wsrep_rejectqueries eq "NONE" 1857 | && $nodes->{$key}->cluster_status eq "Primary" 1858 | && $nodes->{$key}->hostgroups == (8000 + $proxynode->{_hg_reader_id}) 1859 | ){ 1860 | my $nodes_read_ips = join(',', @{$GGalera_cluster->{_reader_nodes}}); 1861 | my $nodes_write_ips = join(',', @{$GGalera_cluster->{_writer_nodes}}); 1862 | 1863 | my $ip = "$nodes->{$key}->{_ip}:$nodes->{$key}->{_port}"; 1864 | 1865 | if($nodes_read_ips =~ m/$ip/ 1866 | || ( $nodes_write_ips =~ m/$ip/ 1867 | && $GGalera_cluster->{_writer_is_reader} < 1)){ 1868 | if($proxynode->debug >=1){ 1869 | print Utils->print_log(3," Node already ONLINE in read hg " 1870 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";\n" ) } 1871 | }else{ 1872 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_INSERT_READ}}= $nodes->{$key}; 1873 | #if retry is > 0 then it's managed 1874 | if($proxynode->retry_up > 0){ 1875 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1876 | } 1877 | if($proxynode->debug >=1){ 1878 | print Utils->print_log(3," Evaluate nodes state " 1879 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_INSERT_READ} 1880 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) 1881 | } 1882 | } 1883 | next; 1884 | } 1885 | 1886 | #Check if any node that is in the write backup host group is not present in the WRITE hostgroup while it should WHEN MULTIPLE WRITERS. 1887 | #If identify it will add to the read HG 1888 | 1889 | if($nodes->{$key}->wsrep_status eq 4 1890 | && $nodes->{$key}->wsrep_rejectqueries eq "NONE" 1891 | && $nodes->{$key}->read_only eq "OFF" 1892 | && $nodes->{$key}->cluster_status eq "Primary" 1893 | && $nodes->{$key}->hostgroups == (8000 + $proxynode->{_hg_writer_id}) 1894 | && $GGalera_cluster->{_singlewriter} < 1 1895 | ){ 1896 | #my $nodes_read_ips = join(',', @{$GGalera_cluster->{_reader_nodes}}); 1897 | my $nodes_write_ips = join(',', @{$GGalera_cluster->{_writer_nodes}}); 1898 | 1899 | my $ip = "$nodes->{$key}->{_ip}:$nodes->{$key}->{_port}"; 1900 | 1901 | if($nodes_write_ips =~ m/$ip/ 1902 | && $GGalera_cluster->{_single_writer} < 1){ 1903 | if($proxynode->debug >=1){ 1904 | print Utils->print_log(3," Node already ONLINE in write hg " 1905 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";\n" ) } 1906 | }else{ 1907 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_INSERT_WRITE}}= $nodes->{$key}; 1908 | #if retry is > 0 then it's managed 1909 | if($proxynode->retry_up > 0){ 1910 | $nodes->{$key}->get_retry_up($nodes->{$key}->get_retry_up +1); 1911 | } 1912 | if($proxynode->debug >=1){ 1913 | print Utils->print_log(3," Evaluate nodes state " 1914 | .$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_INSERT_WRITE} 1915 | ." Retry #".$nodes->{$key}->get_retry_up."\n" ) 1916 | } 1917 | } 1918 | next; 1919 | } 1920 | 1921 | 1922 | # in the case node is not in one of the declared state 1923 | # BUT it has the counter retry set THEN I reset it to 0 whatever it was because 1924 | # I assume it is ok now 1925 | if($proxynode->retry_up > 0 1926 | && $nodes->{$key}->get_retry_up > 0){ 1927 | $nodes->{$key}->get_retry_up(0); 1928 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_SAVE_RETRY}}= $nodes->{$key}; 1929 | } 1930 | if($proxynode->retry_down > 0 1931 | && $nodes->{$key}->get_retry_down > 0){ 1932 | $nodes->{$key}->get_retry_down(0); 1933 | $action_nodes->{$nodes->{$key}->ip.";".$nodes->{$key}->port.";".$nodes->{$key}->hostgroups.";".$nodes->{$key}->{_SAVE_RETRY}}= $nodes->{$key}; 1934 | } 1935 | 1936 | } 1937 | } 1938 | $proxynode->action_nodes($action_nodes); 1939 | 1940 | #failover has higher priority BUT if it happens before other action will interfere with recovery and move nodes that may prevent the failover to happen 1941 | # and it will ends the script work (if successful) 1942 | #if($GGalera_cluster->{_writers} > 1 1943 | # && $GGalera_cluster->{_singlewriter} > 0) 1944 | #{ 1945 | # $proxynode->{_require_failover} = 1; 1946 | #} 1947 | 1948 | 1949 | 1950 | if($proxynode->require_failover > 0 1951 | && !defined $action_nodes 1952 | ){ 1953 | if($GGalera_cluster->haswriter < 1 1954 | || ( $GGalera_cluster->{_writers} > 1 && $GGalera_cluster->{_singlewriter} > 0) 1955 | ){ 1956 | print Utils->print_log(2,"Fail-over in action Using Method = $proxynode->{_require_failover}\n" ); 1957 | if($proxynode->initiate_failover($GGalera_cluster) >0){ 1958 | #if($proxynode->debug >=1){ 1959 | print Utils->print_log(2,"!!!! FAILOVER !!!!! \n Cluster was without WRITER I have try to restore service promoting a node\n" ); 1960 | #exit 0; 1961 | #} 1962 | } 1963 | } 1964 | } 1965 | elsif($proxynode->require_failover > 0 1966 | && $GGalera_cluster->haswriter < 1){ 1967 | print Utils->print_log(2,"PXC maintenance on single writer, is asking for failover. Fail-over in action Using Method = $proxynode->{_require_failover}\n" ); 1968 | $proxynode->push_changes; 1969 | if($proxynode->initiate_failover($GGalera_cluster) >0){ 1970 | #if($proxynode->debug >=1){ 1971 | print Utils->print_log(2,"!!!! FAILOVER !!!!! \n Cluster was without WRITER I have try to restore service promoting a node\n" ); 1972 | #exit 0; 1973 | #} 1974 | } 1975 | } 1976 | 1977 | } 1978 | 1979 | sub push_changes{ 1980 | my ($proxynode) = @_ ; 1981 | my $node = GaleraNode->new(); 1982 | my $SQL_command=""; 1983 | 1984 | 1985 | foreach my $key (sort keys %{$proxynode->{_action_nodes}}){ 1986 | my ($host, $port, $hg, $action) = split /s*;\s*/, $key; 1987 | 1988 | SWITCH: { 1989 | if ($action == $node->MOVE_DOWN_OFFLINE) { if($proxynode->{_action_nodes}->{$key}->get_retry_down >= $proxynode->retry_down){$proxynode->move_node_offline($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1990 | if ($action == $node->MOVE_DOWN_HG_CHANGE) { if($proxynode->{_action_nodes}->{$key}->get_retry_down >= $proxynode->retry_down){ $proxynode->move_node_down_hg_change($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1991 | if ($action == $node->MOVE_UP_OFFLINE) { if($proxynode->{_action_nodes}->{$key}->get_retry_up >= $proxynode->retry_up){ $proxynode->move_node_up_from_offline($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1992 | if ($action == $node->MOVE_UP_HG_CHANGE) { if($proxynode->{_action_nodes}->{$key}->get_retry_up >= $proxynode->retry_up){$proxynode->move_node_up_from_hg_change($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1993 | if ($action == $node->MOVE_TO_MAINTENANCE) { if($proxynode->{_action_nodes}->{$key}->get_retry_down >= $proxynode->retry_down){$proxynode->move_node_to_maintenance($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1994 | if ($action == $node->DELETE_NODE) { 1995 | if($proxynode->{_action_nodes}->{$key}->get_retry_up >= $proxynode->retry_up){ 1996 | $proxynode->delete_node_from_hostgroup($key,$proxynode->{_action_nodes}->{$key})}; last SWITCH; } 1997 | if($action == $node->INSERT_READ){if($proxynode->{_action_nodes}->{$key}->get_retry_up >= $proxynode->retry_up){ 1998 | $proxynode->insert_reader($key,$proxynode->{_action_nodes}->{$key}) 1999 | }; last SWITCH; 2000 | } 2001 | 2002 | if($action == $node->INSERT_WRITE){if($proxynode->{_action_nodes}->{$key}->get_retry_up >= $proxynode->retry_up){ 2003 | $proxynode->insert_writer($key,$proxynode->{_action_nodes}->{$key}) 2004 | }; last SWITCH; 2005 | } 2006 | 2007 | } 2008 | if($proxynode->retry_up > 0 || $proxynode->retry_down > 0){ 2009 | save_retry($proxynode,$key,$proxynode->{_action_nodes}->{$key}); 2010 | } 2011 | } 2012 | $proxynode->{_action_nodes} = undef; 2013 | } 2014 | 2015 | sub save_retry{ 2016 | #this action will take place only if retry is active 2017 | my ($self,$key,$node) = @_; 2018 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2019 | 2020 | if($self->debug >=1){print Utils->print_log(4,"Check retry Node:".$host." port:".$port . " hg:".$hg ." Time IN \n");} 2021 | 2022 | my $sql_string = "UPDATE mysql_servers SET comment='" 2023 | .$node->{_comment} 2024 | .$self->get_galera_cluster->cluster_identifier."_retry_up=".$node->get_retry_up 2025 | .";".$self->get_galera_cluster->cluster_identifier."_retry_down=".$node->get_retry_down 2026 | .";' WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2027 | 2028 | $self->{_dbh_proxy}->do($sql_string) or die "Couldn't execute statement: " . $self->{_dbh_proxy}->errstr; 2029 | $self->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $self->{_dbh_proxy}->errstr; 2030 | 2031 | if($self->debug >=1){print Utils->print_log(2," Reset retry to UP:".$node->get_retry_up." Down:".$node->get_retry_down."for node:" .$key 2032 | ." SQL:" .$sql_string 2033 | ."\n")} ; 2034 | if($self->debug >=1){print Utils->print_log(4,"Check retry Node:".$host." port:".$port . " hg:".$hg ." Time OUT \n");} 2035 | } 2036 | 2037 | sub move_node_offline{ 2038 | #this action involve only the proxy so we will 2039 | my ($proxynode, $key,$node) = @_; 2040 | 2041 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2042 | my $proxy_sql_command= " UPDATE mysql_servers SET status='OFFLINE_SOFT' WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2043 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2044 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2045 | 2046 | print Utils->print_log(2," Move node:" .$key 2047 | ." SQL:" .$proxy_sql_command 2048 | ."\n") ; 2049 | } 2050 | sub move_node_to_maintenance{ 2051 | #this action involve only the proxy so we will 2052 | my ($proxynode, $key,$node) = @_; 2053 | 2054 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2055 | my $proxy_sql_command= " UPDATE mysql_servers SET status='OFFLINE_SOFT' WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2056 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2057 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2058 | 2059 | print Utils->print_log(2," Move node:" .$key 2060 | ." SQL:" .$proxy_sql_command 2061 | ."\n") ; 2062 | } 2063 | 2064 | #remove a node from an hostgroup 2065 | sub delete_node_from_hostgroup{ 2066 | #this action involve only the proxy so we will 2067 | my ($proxynode, $key,$node) = @_; 2068 | 2069 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2070 | my $proxy_sql_command= " DELETE from mysql_servers WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2071 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2072 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2073 | 2074 | print Utils->print_log(2," DELETE node:" .$key 2075 | ." SQL:" .$proxy_sql_command 2076 | ."\n") ; 2077 | } 2078 | 2079 | 2080 | 2081 | 2082 | #move a node to a maintenance HG ((9000 + HG id)) 2083 | sub move_node_down_hg_change{ 2084 | my ($proxynode, $key,$node) = @_; 2085 | 2086 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2087 | if($hg > 9000) {return 1;} 2088 | 2089 | my $node_sql_command = "SET GLOBAL READ_ONLY=1;"; 2090 | my $proxy_sql_command =" UPDATE mysql_servers SET hostgroup_id=".(9000 + $hg)." WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2091 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2092 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2093 | print Utils->print_log(2," Move node:" .$key 2094 | ." SQL:" .$proxy_sql_command 2095 | ."\n" ); 2096 | } 2097 | 2098 | #Bring back a node that is just offline 2099 | sub move_node_up_from_offline{ 2100 | my ($proxynode, $key,$node) = @_; 2101 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2102 | my $proxy_sql_command= " UPDATE mysql_servers SET status='ONLINE' WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2103 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2104 | my $error_code = $proxynode->{_dbh_proxy}->err(); 2105 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2106 | print Utils->print_log(2," Move node:" .$key 2107 | ." SQL:" .$proxy_sql_command 2108 | ."\n" ); 2109 | } 2110 | 2111 | #move a node back to his original HG ((HG id - 9000)) 2112 | sub move_node_up_from_hg_change{ 2113 | my ($proxynode, $key,$node) = @_; 2114 | 2115 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2116 | #my $node_sql_command = "SET GLOBAL READ_ONLY=1;"; 2117 | my $proxy_sql_command =" UPDATE mysql_servers SET hostgroup_id=".($hg - 9000)." WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2118 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or warn "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2119 | my $error_code = $proxynode->{_dbh_proxy}->err(); 2120 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2121 | print Utils->print_log(2," Move node:" .$key 2122 | ." SQL:" .$proxy_sql_command 2123 | ."\n" ) ; 2124 | } 2125 | 2126 | #move a node back to his original HG ((HG id - 9000)) 2127 | sub add_node_to_readers{ 2128 | my ($proxynode, $key,$node) = @_; 2129 | 2130 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2131 | my $node_sql_command = "SET GLOBAL READ_ONLY=1;"; 2132 | my $proxy_sql_command =" UPDATE mysql_servers SET hostgroup_id=".($hg - 9000)." WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2133 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2134 | my $error_code = $proxynode->{_dbh_proxy}->err(); 2135 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or die "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2136 | print Utils->print_log(2," Move node:" .$key 2137 | ." SQL:" .$proxy_sql_command 2138 | ."\n" ) ; 2139 | } 2140 | 2141 | sub insert_reader{ 2142 | my ($proxynode, $key,$node) = @_; 2143 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2144 | my $proxy_sql_command ="INSERT INTO mysql_servers (hostgroup_id, hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment) ". 2145 | " VALUES($proxynode->{_hg_reader_id}" . 2146 | ",'$node->{_ip}'" . 2147 | ",$node->{_port} " . 2148 | ",$node->{_gtid_port} " . 2149 | ",'$node->{_proxy_status}' " . 2150 | ",$node->{_weight}" . 2151 | ",$node->{_compression}" . 2152 | ",$node->{_connections}" . 2153 | ",$node->{_max_replication_lag}" . 2154 | ",$node->{_use_ssl}" . 2155 | ",$node->{_max_latency}" . 2156 | ",'$node->{_comments}')" ; 2157 | 2158 | #my $proxy_sql_command =" UPDATE mysql_servers SET hostgroup_id=".($hg - 9000)." WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2159 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or warn "Couldn't execute statement: $proxy_sql_command" . $proxynode->{_dbh_proxy}->errstr; 2160 | my $error_code = $proxynode->{_dbh_proxy}->err(); 2161 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or warn "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2162 | print Utils->print_log(2," Move node:" .$key 2163 | ." SQL:" .$proxy_sql_command 2164 | ."\n" ) ; 2165 | } 2166 | 2167 | sub insert_writer{ 2168 | my ($proxynode, $key,$node) = @_; 2169 | my ($host, $port, $hg,$action) = split /s*;\s*/, $key; 2170 | my $proxy_sql_command ="INSERT INTO mysql_servers (hostgroup_id, hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment) ". 2171 | " VALUES($proxynode->{_hg_writer_id}" . 2172 | ",'$node->{_ip}'" . 2173 | ",$node->{_port} " . 2174 | ",$node->{_gtid_port} " . 2175 | ",'$node->{_proxy_status}' " . 2176 | ",$node->{_weight}" . 2177 | ",$node->{_compression}" . 2178 | ",$node->{_connections}" . 2179 | ",$node->{_max_replication_lag}" . 2180 | ",$node->{_use_ssl}" . 2181 | ",$node->{_max_latency}" . 2182 | ",'$node->{_comments}')" ; 2183 | 2184 | #my $proxy_sql_command =" UPDATE mysql_servers SET hostgroup_id=".($hg - 9000)." WHERE hostgroup_id=$hg AND hostname='$host' AND port='$port'"; 2185 | $proxynode->{_dbh_proxy}->do($proxy_sql_command) or warn "Couldn't execute statement: $proxy_sql_command" . $proxynode->{_dbh_proxy}->errstr; 2186 | my $error_code = $proxynode->{_dbh_proxy}->err(); 2187 | $proxynode->{_dbh_proxy}->do("LOAD MYSQL SERVERS TO RUNTIME") or warn "Couldn't execute statement: " . $proxynode->{_dbh_proxy}->errstr; 2188 | print Utils->print_log(2," Move node:" .$key 2189 | ." SQL:" .$proxy_sql_command 2190 | ."\n" ) ; 2191 | } 2192 | sub initiate_failover{ 2193 | my ($proxynode,$Galera_cluster) = @_ ; 2194 | my ( $nodes ) = $Galera_cluster->{_nodes}; 2195 | my ( $nodes_maint ) = $Galera_cluster->{_nodes_maint}; 2196 | my $failover_node; 2197 | my $candidate_failover_node; 2198 | my $min_index = 100; 2199 | my $max_weight=0; 2200 | my $cand_min_index = 100; 2201 | my $cand_max_weight=0; 2202 | 2203 | my $local_node; 2204 | my $hg_writer_id=0; 2205 | my $exclude_delete=""; 2206 | 2207 | #Valid values are: 2208 | # 0 [default] do not make failover 2209 | # 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers 2210 | # 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 2211 | # 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) 2212 | # 2213 | foreach my $key (sort keys %{$nodes}){ 2214 | if(defined $nodes->{$key} ){ 2215 | 2216 | #only if node has HG that is not maintenance it can be evaluated to be put down in some way 2217 | #Look for the node with the lowest weight in the same segment 2218 | if($nodes->{$key}->{_hostgroups} < 9000 2219 | && $nodes->{$key}->{_proxy_status} eq "ONLINE" 2220 | && $nodes->{$key}->{_process_status} > 0 2221 | && $nodes->{$key}->{_wsrep_status} == 4 2222 | && $nodes->{$key}->{_wsrep_rejectqueries} eq "NONE" 2223 | && $nodes->{$key}->{_wsrep_donorrejectqueries} eq "OFF" 2224 | && $nodes->{$key}->{_pxc_maint_mode} eq "DISABLED" 2225 | && $nodes->{$key}->{_read_only} eq "OFF" 2226 | ){ 2227 | 2228 | #IN case failover option is 1 we need to have: 2229 | #The failover group defined in Proxysql (8xxx + id of the HG) 2230 | #Node must be of HG 8XXXX id 2231 | #And must be in the same segment of the writer 2232 | if( 2233 | $proxynode->{_require_failover} == 1 2234 | # && $nodes->{$key}->{_wsrep_segment} == $Galera_cluster->{_main_segment} 2235 | && $Galera_cluster->{_has_failover_node} >0 2236 | && $nodes->{$key}->{_hostgroups} == (8000 + $Galera_cluster->{_hg_writer_id}) 2237 | ){ 2238 | 2239 | if($nodes->{$key}->{_weight} > $max_weight 2240 | && $nodes->{$key}->{_wsrep_segment} == $Galera_cluster->{_main_segment} 2241 | ){ 2242 | $max_weight= $nodes->{$key}->{_weight}; 2243 | #$min_index = $nodes->{$key}->{_wsrep_local_index}; 2244 | $failover_node = $nodes->{$key}; 2245 | } 2246 | elsif($nodes->{$key}->{_wsrep_segment} != $Galera_cluster->{_main_segment} 2247 | && !defined $failover_node 2248 | && $nodes->{$key}->{_weight} > $cand_max_weight){ 2249 | $cand_max_weight= $nodes->{$key}->{_weight}; 2250 | # $cand_min_index = $nodes->{$key}->{_wsrep_local_index}; 2251 | $candidate_failover_node = $nodes->{$key}; 2252 | } 2253 | 2254 | #if($nodes->{$key}->{_weight} > $max_weight){ 2255 | # $max_weight= $nodes->{$key}->{_weight}; 2256 | # $failover_node = $nodes->{$key}; 2257 | #} 2258 | } 2259 | #IN case failover option is 2 we need to have: 2260 | # must be in the same segment of the writer 2261 | #and be in the PXC_CLUSTER_VIEW 2262 | 2263 | elsif( 2264 | $proxynode->{_require_failover} == 2 2265 | && $nodes->{$key}->{_wsrep_segment} == $Galera_cluster->{_main_segment} 2266 | ){ 2267 | if($nodes->{$key}->{_wsrep_local_index} < $min_index 2268 | ){ 2269 | $min_index = $nodes->{$key}->{_wsrep_local_index}; 2270 | $failover_node = $nodes->{$key}; 2271 | } 2272 | } 2273 | elsif($proxynode->{_require_failover} == 3){ 2274 | if($nodes->{$key}->{_wsrep_segment} == $Galera_cluster->{_main_segment} 2275 | && $nodes->{$key}->{_wsrep_local_index} < $min_index){ 2276 | $min_index = $nodes->{$key}->{_wsrep_local_index}; 2277 | $failover_node = $nodes->{$key}; 2278 | } 2279 | elsif($nodes->{$key}->{_wsrep_segment} != $Galera_cluster->{_main_segment} 2280 | && !defined $failover_node 2281 | && $nodes->{$key}->{_wsrep_local_index} < $cand_min_index){ 2282 | $cand_min_index = $nodes->{$key}->{_wsrep_local_index}; 2283 | $candidate_failover_node = $nodes->{$key}; 2284 | } 2285 | } 2286 | } 2287 | } 2288 | } 2289 | if(defined $nodes_maint ){ 2290 | my $exclude_id = ""; 2291 | my $exclude_port = ""; 2292 | foreach my $key (sort keys %{$nodes_maint}){ 2293 | if(defined $nodes_maint->{$key}){ 2294 | if(length($exclude_id) > 1){ 2295 | $exclude_id = $exclude_id . ","; 2296 | $exclude_port = $exclude_port . ","; 2297 | } 2298 | 2299 | $exclude_id = $exclude_id ."'". $nodes_maint->{$key}->{_ip} ."'"; 2300 | $exclude_port = $exclude_port.$nodes_maint->{$key}->{_port} ; 2301 | } 2302 | } 2303 | if(length($exclude_id) > 1){ 2304 | $exclude_delete = $exclude_delete . " AND (hostname not in (".$exclude_id.") AND port not in (".$exclude_port."))" ; 2305 | } 2306 | } 2307 | 2308 | #if a node was found, try to do the failover removing the READ_ONLY 2309 | if(defined $candidate_failover_node && $failover_node){ 2310 | return $failover_node->promote_writer($proxynode,$Galera_cluster,$exclude_delete); 2311 | } 2312 | elsif(defined $candidate_failover_node && !defined $failover_node){ 2313 | return $candidate_failover_node->promote_writer($proxynode,$Galera_cluster,$exclude_delete); 2314 | } 2315 | else{ 2316 | if(!defined $failover_node){ 2317 | SWITCH: { 2318 | if ($proxynode->{_require_failover} == 1) { print Utils->print_log(1,"!!!! No node for failover found , try to use active_failover=2 OR add a valid node to the 8000 HG pool \n" ) ; last SWITCH; } 2319 | if ($proxynode->{_require_failover} == 2) { print Utils->print_log(1,"!!!! No node for failover found , try to use active_failover=3 But that may move production to the other segment.\n" ); last SWITCH; } 2320 | if ($proxynode->{_require_failover} == 3) { print Utils->print_log(1,"!!!! No node for failover found also in the other segments, I cannot continue you need to act manually \n" ); last SWITCH; } 2321 | } 2322 | } 2323 | 2324 | if(defined $failover_node){ 2325 | return $failover_node->promote_writer($proxynode,$Galera_cluster,$exclude_delete); 2326 | } 2327 | } 2328 | 2329 | 2330 | return 0; 2331 | } 2332 | 2333 | } 2334 | 2335 | { 2336 | package ProxySqlHG; 2337 | sub new { 2338 | my $class = shift; 2339 | 2340 | my $self = { 2341 | _id => undef, # 2342 | _type => undef, # available types: w writer; r reader ; mw maintance writer; mr maintenance reader 2343 | _size => 0, 2344 | }; 2345 | bless $self, $class; 2346 | return $self; 2347 | } 2348 | 2349 | sub id { 2350 | my ( $self, $id ) = @_; 2351 | $self->{_id} = $id if defined($id); 2352 | return $self->{_id}; 2353 | } 2354 | 2355 | sub type { 2356 | my ( $self, $type ) = @_; 2357 | $self->{_type} = $type if defined($type); 2358 | return $self->{_type}; 2359 | } 2360 | sub size { 2361 | my ( $self, $size ) = @_; 2362 | $self->{_size} = $size if defined($size); 2363 | return $self->{_size}; 2364 | } 2365 | 2366 | } 2367 | 2368 | { 2369 | package Utils; 2370 | use Time::HiRes qw(gettimeofday); 2371 | #============================================================================ 2372 | ## get_connection -- return a valid database connection handle (or die) 2373 | ## $dsn -- a perl DSN, e.g. "DBI:mysql:host=ltsdbwm1;port=3311" 2374 | ## $user -- a valid username, e.g. "check" 2375 | ## $pass -- a matching password, e.g. "g33k!" 2376 | 2377 | sub get_connection($$$$) { 2378 | my $dsn = shift; 2379 | my $user = shift; 2380 | my $pass = shift; 2381 | my $SPACER = shift; 2382 | my $dbh = DBI->connect($dsn, $user, $pass , { 2383 | PrintError => 0, 2384 | PrintWarn => 0, 2385 | RaiseError => 0}); 2386 | 2387 | if (!defined($dbh)) { 2388 | #die 2389 | print Utils->print_log(1, "Cannot connect to $dsn as $user\n"); 2390 | # Should not die and instead return undef so we can handle this shit 2391 | #die(); 2392 | return undef; 2393 | } 2394 | 2395 | return $dbh; 2396 | } 2397 | 2398 | 2399 | ###################################################################### 2400 | ## collection functions -- fetch status data from db 2401 | ## get_status -- return a hash ref to SHOW GLOBAL STATUS output 2402 | ## $dbh -- a non-null database handle, as returned from get_connection() 2403 | ## 2404 | 2405 | 2406 | sub get_status($$) { 2407 | my $dbh = shift; 2408 | my $debug = shift; 2409 | my %v; 2410 | my $cmd = "show /*!50000 global */ status"; 2411 | 2412 | my $sth = $dbh->prepare($cmd); 2413 | $sth->execute() or warn "Couldn't execute statement: $cmd" . $dbh->errstr ." \n"; 2414 | while (my $ref = $sth->fetchrow_hashref()) { 2415 | my $n = $ref->{'Variable_name'}; 2416 | $v{"\L$n\E"} = $ref->{'Value'}; 2417 | if ($debug>0){print "MySQL status = ".$n."\n";} 2418 | } 2419 | 2420 | return \%v; 2421 | } 2422 | ###################################################################### 2423 | ## collection functions -- fetch status data from db 2424 | ## get_status -- return a hash ref to SHOW GLOBAL STATUS output 2425 | ## $dbh -- a non-null database handle, as returned from get_connection() 2426 | ## 2427 | 2428 | sub get_status_by_name($$) { 2429 | my $dbh = shift; 2430 | my $debug = shift; 2431 | my $name = shift ; 2432 | my %v; 2433 | my $cmd = "show /*!50000 global */ status like '$name'"; 2434 | 2435 | my $sth = $dbh->prepare($cmd); 2436 | $sth->execute(); 2437 | while (my $ref = $sth->fetchrow_hashref()) { 2438 | my $n = $ref->{'Variable_name'}; 2439 | $v{"\L$n\E"} = $ref->{'Value'}; 2440 | if ($debug>0){print "MySQL status = ".$n."\n";} 2441 | } 2442 | 2443 | return \%v; 2444 | } 2445 | ## 2446 | ## get_variables -- return a hash ref to SHOW GLOBAL VARIABLES output 2447 | ## 2448 | ## $dbh -- a non-null database handle, as returned from get_connection() 2449 | ## 2450 | sub get_variables($$) { 2451 | my $dbh = shift; 2452 | my $debug = shift; 2453 | my %v; 2454 | my $cmd = "select * from performance_schema.global_variables"; 2455 | $dbh->{LongReadLen} = 0; 2456 | $dbh->{LongTruncOk} = 0; 2457 | 2458 | my $sth = $dbh->prepare($cmd); 2459 | $sth->execute() or warn "Couldn't execute statement: $cmd" . $dbh->errstr ." \n"; 2460 | while (my $ref = $sth->fetchrow_hashref()) { 2461 | my $n = $ref->{'VARIABLE_NAME'}; 2462 | $v{"\L$n\E"} = $ref->{'VARIABLE_VALUE'}; 2463 | # print STDERR "$n : ".$v{$n}. " ZZZZZZZZZZZZZZZZZZ ". $ref->{'Value'} ."\n"; 2464 | } 2465 | 2466 | 2467 | return \%v; 2468 | } 2469 | ## 2470 | ## get_variables -- return a hash ref to SHOW GLOBAL VARIABLES output 2471 | ## 2472 | ## $dbh -- a non-null database handle, as returned from get_connection() 2473 | ## 2474 | sub get_variablesByName($$) { 2475 | my $dbh = shift; 2476 | my $variableName = shift; 2477 | #my $debug = shift; 2478 | my %v; 2479 | my $cmd = "show variables like '$variableName'"; 2480 | 2481 | my $sth = $dbh->prepare($cmd); 2482 | $sth->execute() or warn "Couldn't execute statement: $cmd" . $dbh->errstr ." \n"; 2483 | while (my $ref = $sth->fetchrow_hashref()) { 2484 | my $n = $ref->{'Variable_name'}; 2485 | $v{"\L$n\E"} = $ref->{'Value'}; 2486 | } 2487 | return \%v; 2488 | } 2489 | ## 2490 | ## get_variables -- return a hash ref to SHOW GLOBAL VARIABLES output 2491 | ## 2492 | ## $dbh -- a non-null database handle, as returned from get_connection() 2493 | ## 2494 | sub get_pxc_clusterview($$) { 2495 | my $dbh = shift; 2496 | my $variableName = shift; 2497 | #my $debug = shift; 2498 | 2499 | my %v; 2500 | my $cmd = "select * from performance_schema.pxc_cluster_view where UUID = '$variableName'"; 2501 | 2502 | my $sth = $dbh->prepare($cmd); 2503 | $sth->execute() or warn "Couldn't execute statement: $cmd" . $dbh->errstr ." \n"; 2504 | my $ref; 2505 | while ( $ref = $sth->fetchrow_hashref()) { 2506 | foreach my $name ('HOST_NAME', 'UUID','STATUS','LOCAL_INDEX','SEGMENT'){ 2507 | my $n = lc $name; 2508 | $v{$n} = $ref->{$name}; 2509 | } 2510 | } 2511 | return \%v; 2512 | } 2513 | #Print time from invocation with milliseconds 2514 | sub get_current_time{ 2515 | use POSIX qw(strftime); 2516 | my $t = gettimeofday(); 2517 | my $date = strftime "%Y/%m/%d %H:%M:%S", localtime $t; 2518 | $date .= sprintf ".%03d", ($t-int($t))*1000; # without rounding 2519 | 2520 | return $date; 2521 | } 2522 | 2523 | #prit all environmnt variables 2524 | sub debugEnv{ 2525 | my $key = keys %ENV; 2526 | foreach $key (sort(keys %ENV)) { 2527 | print $key, '=', $ENV{$key}, "\n"; 2528 | } 2529 | 2530 | } 2531 | 2532 | 2533 | #Print a log entry 2534 | sub print_log($$){ 2535 | my $log_level = $_[1]; 2536 | my $text = $_[2]; 2537 | my $log_text = "[ - ] "; 2538 | 2539 | SWITCH: { 2540 | if ($log_level == 1) { $log_text= "[ERROR] "; last SWITCH; } 2541 | if ($log_level == 2) { $log_text= "[WARN] "; last SWITCH; } 2542 | if ($log_level == 3) { $log_text= "[INFO] "; last SWITCH; } 2543 | if ($log_level == 4) { $log_text= "[DEBUG] "; last SWITCH; } 2544 | } 2545 | return Utils::get_current_time.":".$log_text.$text; 2546 | 2547 | } 2548 | 2549 | 2550 | #trim a string 2551 | sub trim { 2552 | my $s = shift; 2553 | $s =~ s/^\s+|\s+$//g; 2554 | return $s 2555 | }; 2556 | 2557 | 2558 | } 2559 | 2560 | 2561 | # ############################################################################ 2562 | # Documentation 2563 | # ################# 2564 | =pod 2565 | 2566 | =head1 NAME 2567 | galera_check.pl 2568 | 2569 | =head1 OPTIONS 2570 | 2571 | =over 2572 | 2573 | galera_check.pl -u=admin -p=admin -h=192.168.1.50 -H=500:W,501:R -P=3310 --main_segment=1 --debug=0 --log --help 2574 | sample [options] [file ...] 2575 | Options: 2576 | -u|user user to connect to the proxy 2577 | -p|password Password for the proxy 2578 | -h|host Proxy host 2579 | -H Hostgroups with role definition. List comma separated. 2580 | Definition R = reader; W = writer [500:W,501:R] 2581 | --main_segment If segments are in use which one is the leading at the moment 2582 | --retry_up The number of loop/test the check has to do before moving a node up (default 0) 2583 | --retry_down The number of loop/test the check has to do before moving a node Down (default 0) 2584 | --log Full path to the log file ie (/var/log/proxysql/galera_check_) the check will add 2585 | the identifier for the specific HG. 2586 | --active_failover A value from 0 to 3, indicating what level/kind of fail-over the script must perform. 2587 | active_failover 2588 | Valid values are: 2589 | 0 [default] do not make failover 2590 | 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers 2591 | 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 2592 | 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) 2593 | --single_writer Active by default [single_writer = 1 ] if disable will allow to have multiple writers 2594 | 2595 | 2596 | Performance parameters 2597 | --check_timeout This parameter set in ms then time the script can alow a thread connecting to a MySQL node to wait, before forcing a returnn. 2598 | In short if a node will take longer then check_timeout its entry will be not filled and it will eventually ignored in the evaluation. 2599 | Setting the debug option =1 and look for [WARN] Check timeout Node ip : Information will tell you how much your nodes are exceeding the allowed limit. 2600 | You can use the difference to correctly set the check_timeout 2601 | Default is 800 ms 2602 | 2603 | --help help message 2604 | --debug When active the log will have a lot of information about the execution. Parse it for ERRORS if you have problems 2605 | --print_execution Active by default, it will print the execution time the check is taking in the log. This can be used to tune properly the scheduler time, and also the --check_timeout 2606 | 2607 | --development When set to 1 you can run the script in a loop from bash directly and test what is going to happen 2608 | --development_time Time in seconds that the loop wait to execute when in development mode (default 2 seconds) 2609 | 2610 | SSL support 2611 | Now the script identify if the node in the ProxySQL table mysql_servers has use_ssl = 1 and will set SSL to be used for that specific entry. 2612 | This means that SSL connection is by ProxySQL mysql_server entry NOT by IP:port combination. 2613 | 2614 | --ssl_certs_path This parameter allow you to specify a DIRECTORY to use to assign specific certificates. 2615 | At the moment is NOT possible to change the files names and ALL these 3 files must be there and named as follow: 2616 | - client-key.pem 2617 | - client-cert.pem 2618 | - ca.pem 2619 | Script will exit with an error if ssl_certs_pathis declared but not filled properly 2620 | OR if the user running the script doesn't have acces. 2621 | !!NOTE!! SSL connection requires more time to be established. This script is a check that needs to run very fast and constantly. 2622 | force it to use ssl WILL impact in the performance of the check. Tune properly the check_timeout parameter. 2623 | 2624 | =back 2625 | 2626 | =head1 DESCRIPTION 2627 | 2628 | Galera check is a script to manage integration between ProxySQL and Galera (from Codership). 2629 | Galera and its implementations like Percona Cluster (PCX), use the data-centric concept, as such the status of a node is relvant in relation to a cluster. 2630 | 2631 | In ProxySQL is possible to represent a cluster and its segments using HostGroups. 2632 | Galera check is design to manage a X number of nodes that belong to a given Hostgroup (HG). 2633 | In Galera_check it is also important to qualify the HG in case of use of Replication HG. 2634 | 2635 | galera_check works by HG and as such it will perform isolated actions/checks by HG. 2636 | It is not possible to have more than one check running on the same HG. The check will create a lock file {proxysql_galera_check_${hg}.pid} that will be used by the check to prevent duplicates. 2637 | 2638 | Galera_check will connect to the ProxySQL node and retrieve all the information regarding the Nodes/proxysql configuration. 2639 | It will then check in parallel each node and will retrieve the status and configuration. 2640 | 2641 | At the moment galera_check analyze and manage the following: 2642 | 2643 | Node states: 2644 | read_only 2645 | wsrep_status 2646 | wsrep_rejectqueries 2647 | wsrep_donorrejectqueries 2648 | wsrep_connected 2649 | wsrep_desinccount 2650 | wsrep_ready 2651 | wsrep_provider 2652 | wsrep_segment 2653 | Number of nodes in by segment 2654 | Retry loop 2655 | 2656 | - Number of nodes in by segment 2657 | If a node is the only one in a segment, the check will behave accordingly. 2658 | IE if a node is the only one in the MAIN segment, it will not put the node in OFFLINE_SOFT when the node become donor to prevent the cluster to become unavailable for the applications. 2659 | As mention is possible to declare a segment as MAIN, quite useful when managing prod and DR site. 2660 | 2661 | -The check can be configure to perform retry in a X interval. 2662 | Where X is the time define in the ProxySQL scheduler. 2663 | As such if the check is set to have 2 retry for UP and 4 for down, it will loop that number before doing anything. Given that Galera does some action behind the hood. 2664 | This feature is very useful in some not well known cases where Galera bhave weird. 2665 | IE whenever a node is set to READ_ONLY=1, galera desync and resync the node. 2666 | A check not taking this into account will cause a node to be set OFFLINE and back for no reason. 2667 | 2668 | Another important differentiation for this check is that it use special HGs for maintenance, all in range of 9000. 2669 | So if a node belong to HG 10 and the check needs to put it in maintenance mode, the node will be moved to HG 9010. 2670 | Once all is normal again, the Node will be put back on his original HG. 2671 | 2672 | This check does NOT modify any state of the Nodes. 2673 | Meaning It will NOT modify any variables or settings in the original node. It will ONLY change states in ProxySQL. 2674 | 2675 | The check is still a prototype and is not suppose to go to production (yet). 2676 | 2677 | 2678 | =over 2679 | 2680 | =item 1 2681 | 2682 | Note that galera_check is also Segment aware, as such the checks on the presence of Writer /reader is done by segment, respecting the MainSegment as primary. 2683 | 2684 | =back 2685 | 2686 | =head1 Configure in ProxySQL 2687 | 2688 | 2689 | INSERT INTO scheduler (id,active,interval_ms,filename,arg1) values (10,0,2000,"/var/lib/proxysql/galera_check.pl","-u=remoteUser -p=remotePW -h=192.168.1.50 -H=500:W,501:R -P=6032 --retry_down=2 --retry_up=1 --main_segment=1 --debug=0 --active_failover=1 --single_writer=1 --log=/var/lib/proxysql/galeraLog"); 2690 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 2691 | 2692 | To activate it 2693 | update scheduler set active=1 where id=10; 2694 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 2695 | 2696 | To update the parameters you must pass all of them not only the ones you want to change(IE enabling debug) 2697 | update scheduler set arg1="-u=remoteUser -p=remotePW -h=192.168.1.50 -H=500:W,501:R -P=6032 --retry_down=2 --retry_up=1 --main_segment=1 --debug=1 --active_failover=1 --single_writer=1 --log=/var/lib/proxysql/galeraLog" where id =10; 2698 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 2699 | 2700 | 2701 | delete from scheduler where id=10; 2702 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 2703 | 2704 | 2705 | 2706 | =head1 Rules: 2707 | 2708 | =over 2709 | 2710 | =item 1 2711 | 2712 | Set to offline_soft : 2713 | 2714 | any non 4 or 2 state, read only =ON 2715 | donor node reject queries - 0 size of cluster > 2 of nodes in the same segments more then one writer, node is NOT read_only. 2716 | 2717 | Changes to pxc_maint_mode to anything else DISABLED 2718 | 2719 | =item 2 2720 | 2721 | change HG t maintenance HG: 2722 | 2723 | Node/cluster in non primary 2724 | wsrep_reject_queries different from NONE 2725 | Donor, node reject queries =1 size of cluster 2726 | 2727 | =item 3 2728 | 2729 | Node comes back from offline_soft when (all of them): 2730 | 2731 | 1) Node state is 4 2732 | 3) wsrep_reject_queries = none 2733 | 4) Primary state 2734 | 2735 | 2736 | =item 4 2737 | 2738 | Node comes back from maintenance HG when (all of them): 2739 | 2740 | 1) node state is 4 2741 | 3) wsrep_reject_queries = none 2742 | 4) Primary state 2743 | 2744 | 2745 | =item 5 2746 | 2747 | active_failover 2748 | Valid values are: 2749 | 0 [default] do not make failover 2750 | 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers 2751 | 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 2752 | 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) 2753 | 2754 | =item 6 2755 | PXC_MAIN_MODE is fully supported. 2756 | Any node in a state different from pxc_maint_mode=disabled will be set in OFFLINE_SOFT for all the HostGroup. 2757 | 2758 | =item 7 2759 | internally shunning node. 2760 | While I am trying to rely as much as possible on ProxySQL, given few inefficiencies there are cases when I have to set a node to SHUNNED because ProxySQL doesn't recognize it correctly. 2761 | 2762 | =back 2763 | =cut 2764 | 2765 | -------------------------------------------------------------------------------- /path_to_your_file: -------------------------------------------------------------------------------- 1 | 2016/08/16 20:43:52.695 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 2 | 2016/08/16 20:43:52.695 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 3 | 2016/08/16 20:43:52.725 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 4 | 2016/08/16 20:43:52.725 Galera cluster object created 5 | 2016/08/16 20:43:52.728 Galera cluster node 192.168.1.5:3306:HG=500 6 | 2016/08/16 20:43:52.729 Galera cluster node 192.168.1.6:3306:HG=501 7 | 2016/08/16 20:43:52.729 Galera cluster node 192.168.1.7:3306:HG=501 8 | 2016/08/16 20:43:52.729 Galera cluster node 192.168.1.8:3306:HG=501 9 | 2016/08/16 20:43:52.729 Galera cluster node 192.168.1.9:3306:HG=501 10 | 2016/08/16 20:43:52.729 Galera cluster nodes loaded 11 | 2016/08/16 20:43:52.729 Creating new thread to manage server check:192.168.1.5:3306:HG500 12 | 2016/08/16 20:43:52.757 Creating new thread to manage server check:192.168.1.6:3306:HG501 13 | 2016/08/16 20:43:52.781 Creating new thread to manage server check:192.168.1.7:3306:HG501 14 | 2016/08/16 20:43:52.757 Node check START 192.168.1.5:3306:HG500 15 | 2016/08/16 20:43:52.800 Node check END 192.168.1.5:3306:HG500 16 | 2016/08/16 20:43:52.803 Creating new thread to manage server check:192.168.1.8:3306:HG501 17 | 2016/08/16 20:43:52.781 Node check START 192.168.1.6:3306:HG501 18 | 2016/08/16 20:43:52.821 Node check END 192.168.1.6:3306:HG501 19 | 2016/08/16 20:43:52.826 Creating new thread to manage server check:192.168.1.9:3306:HG501 20 | 2016/08/16 20:43:52.804 Node check START 192.168.1.7:3306:HG501 21 | 2016/08/16 20:43:52.844 Node check END 192.168.1.7:3306:HG501 22 | 2016/08/16 20:43:52.826 Node check START 192.168.1.8:3306:HG501 23 | 2016/08/16 20:43:52.870 Node check END 192.168.1.8:3306:HG501 24 | 2016/08/16 20:43:52.849 Node check START 192.168.1.9:3306:HG501 25 | 2016/08/16 20:43:52.890 Node check END 192.168.1.9:3306:HG501 26 | 2016/08/16 20:43:52.866 Thread joined : 1 27 | 2016/08/16 20:43:52.881 Thread joined : 2 28 | 2016/08/16 20:43:52.896 Thread joined : 3 29 | 2016/08/16 20:43:52.915 Thread joined : 4 30 | 2016/08/16 20:43:52.931 Thread joined : 5 31 | 2016/08/16 20:43:52.931 Multi Thread execution done in : 202.030897140503(ms) 32 | 2016/08/16 20:43:52.931 Evaluate nodes state 33 | END EXECUTION Total Time:237.498044967651 34 | 2016/08/16 20:43:54.935 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 35 | 2016/08/16 20:43:54.935 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 36 | 2016/08/16 20:43:54.946 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 37 | 2016/08/16 20:43:54.946 Galera cluster object created 38 | 2016/08/16 20:43:54.949 Galera cluster node 192.168.1.5:3306:HG=500 39 | 2016/08/16 20:43:54.949 Galera cluster node 192.168.1.6:3306:HG=501 40 | 2016/08/16 20:43:54.949 Galera cluster node 192.168.1.7:3306:HG=501 41 | 2016/08/16 20:43:54.949 Galera cluster node 192.168.1.8:3306:HG=501 42 | 2016/08/16 20:43:54.950 Galera cluster node 192.168.1.9:3306:HG=501 43 | 2016/08/16 20:43:54.950 Galera cluster nodes loaded 44 | 2016/08/16 20:43:54.950 Creating new thread to manage server check:192.168.1.5:3306:HG500 45 | 2016/08/16 20:43:54.978 Creating new thread to manage server check:192.168.1.6:3306:HG501 46 | 2016/08/16 20:43:55.006 Creating new thread to manage server check:192.168.1.7:3306:HG501 47 | 2016/08/16 20:43:54.978 Node check START 192.168.1.5:3306:HG500 48 | 2016/08/16 20:43:55.013 Node check END 192.168.1.5:3306:HG500 49 | 2016/08/16 20:43:55.029 Creating new thread to manage server check:192.168.1.8:3306:HG501 50 | 2016/08/16 20:43:55.006 Node check START 192.168.1.6:3306:HG501 51 | 2016/08/16 20:43:55.042 Node check END 192.168.1.6:3306:HG501 52 | 2016/08/16 20:43:55.053 Creating new thread to manage server check:192.168.1.9:3306:HG501 53 | 2016/08/16 20:43:55.029 Node check START 192.168.1.7:3306:HG501 54 | 2016/08/16 20:43:55.066 Node check END 192.168.1.7:3306:HG501 55 | 2016/08/16 20:43:55.053 Node check START 192.168.1.8:3306:HG501 56 | 2016/08/16 20:43:55.094 Node check END 192.168.1.8:3306:HG501 57 | 2016/08/16 20:43:55.077 Node check START 192.168.1.9:3306:HG501 58 | 2016/08/16 20:43:55.114 Node check END 192.168.1.9:3306:HG501 59 | 2016/08/16 20:43:55.095 Thread joined : 6 60 | 2016/08/16 20:43:55.110 Thread joined : 7 61 | 2016/08/16 20:43:55.124 Thread joined : 8 62 | 2016/08/16 20:43:55.144 Thread joined : 9 63 | 2016/08/16 20:43:55.163 Thread joined : 10 64 | 2016/08/16 20:43:55.164 Multi Thread execution done in : 213.661909103394(ms) 65 | 2016/08/16 20:43:55.164 Evaluate nodes state 66 | END EXECUTION Total Time:229.423046112061 67 | 2016/08/16 20:43:57.169 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 68 | 2016/08/16 20:43:57.169 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 69 | 2016/08/16 20:43:57.177 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 70 | 2016/08/16 20:43:57.178 Galera cluster object created 71 | 2016/08/16 20:43:57.180 Galera cluster node 192.168.1.5:3306:HG=500 72 | 2016/08/16 20:43:57.180 Galera cluster node 192.168.1.6:3306:HG=501 73 | 2016/08/16 20:43:57.181 Galera cluster node 192.168.1.7:3306:HG=501 74 | 2016/08/16 20:43:57.181 Galera cluster node 192.168.1.8:3306:HG=501 75 | 2016/08/16 20:43:57.181 Galera cluster node 192.168.1.9:3306:HG=501 76 | 2016/08/16 20:43:57.181 Galera cluster nodes loaded 77 | 2016/08/16 20:43:57.181 Creating new thread to manage server check:192.168.1.5:3306:HG500 78 | 2016/08/16 20:43:57.207 Creating new thread to manage server check:192.168.1.6:3306:HG501 79 | 2016/08/16 20:43:57.231 Creating new thread to manage server check:192.168.1.7:3306:HG501 80 | 2016/08/16 20:43:57.207 Node check START 192.168.1.5:3306:HG500 81 | 2016/08/16 20:43:57.245 Node check END 192.168.1.5:3306:HG500 82 | 2016/08/16 20:43:57.257 Creating new thread to manage server check:192.168.1.8:3306:HG501 83 | 2016/08/16 20:43:57.231 Node check START 192.168.1.6:3306:HG501 84 | 2016/08/16 20:43:57.267 Node check END 192.168.1.6:3306:HG501 85 | 2016/08/16 20:43:57.281 Creating new thread to manage server check:192.168.1.9:3306:HG501 86 | 2016/08/16 20:43:57.257 Node check START 192.168.1.7:3306:HG501 87 | 2016/08/16 20:43:57.293 Node check END 192.168.1.7:3306:HG501 88 | 2016/08/16 20:43:57.281 Node check START 192.168.1.8:3306:HG501 89 | 2016/08/16 20:43:57.324 Node check END 192.168.1.8:3306:HG501 90 | 2016/08/16 20:43:57.304 Node check START 192.168.1.9:3306:HG501 91 | 2016/08/16 20:43:57.346 Node check END 192.168.1.9:3306:HG501 92 | 2016/08/16 20:43:57.322 Thread joined : 11 93 | 2016/08/16 20:43:57.339 Thread joined : 12 94 | 2016/08/16 20:43:57.359 Thread joined : 13 95 | 2016/08/16 20:43:57.380 Thread joined : 14 96 | 2016/08/16 20:43:57.403 Thread joined : 15 97 | 2016/08/16 20:43:57.403 Multi Thread execution done in : 221.658945083618(ms) 98 | 2016/08/16 20:43:57.403 Evaluate nodes state 99 | END EXECUTION Total Time:234.990119934082 100 | 2016/08/16 20:43:59.405 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 101 | 2016/08/16 20:43:59.405 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 102 | 2016/08/16 20:43:59.413 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 103 | 2016/08/16 20:43:59.414 Galera cluster object created 104 | 2016/08/16 20:43:59.416 Galera cluster node 192.168.1.5:3306:HG=500 105 | 2016/08/16 20:43:59.416 Galera cluster node 192.168.1.6:3306:HG=501 106 | 2016/08/16 20:43:59.417 Galera cluster node 192.168.1.7:3306:HG=501 107 | 2016/08/16 20:43:59.417 Galera cluster node 192.168.1.8:3306:HG=501 108 | 2016/08/16 20:43:59.417 Galera cluster node 192.168.1.9:3306:HG=501 109 | 2016/08/16 20:43:59.417 Galera cluster nodes loaded 110 | 2016/08/16 20:43:59.417 Creating new thread to manage server check:192.168.1.5:3306:HG500 111 | 2016/08/16 20:43:59.443 Creating new thread to manage server check:192.168.1.6:3306:HG501 112 | 2016/08/16 20:43:59.467 Creating new thread to manage server check:192.168.1.7:3306:HG501 113 | 2016/08/16 20:43:59.443 Node check START 192.168.1.5:3306:HG500 114 | 2016/08/16 20:43:59.482 Node check END 192.168.1.5:3306:HG500 115 | 2016/08/16 20:43:59.491 Creating new thread to manage server check:192.168.1.8:3306:HG501 116 | 2016/08/16 20:43:59.467 Node check START 192.168.1.6:3306:HG501 117 | 2016/08/16 20:43:59.504 Node check END 192.168.1.6:3306:HG501 118 | 2016/08/16 20:43:59.515 Creating new thread to manage server check:192.168.1.9:3306:HG501 119 | 2016/08/16 20:43:59.491 Node check START 192.168.1.7:3306:HG501 120 | 2016/08/16 20:43:59.532 Node check END 192.168.1.7:3306:HG501 121 | 2016/08/16 20:43:59.515 Node check START 192.168.1.8:3306:HG501 122 | 2016/08/16 20:43:59.557 Node check END 192.168.1.8:3306:HG501 123 | 2016/08/16 20:43:59.539 Node check START 192.168.1.9:3306:HG501 124 | 2016/08/16 20:43:59.581 Node check END 192.168.1.9:3306:HG501 125 | 2016/08/16 20:43:59.558 Thread joined : 16 126 | 2016/08/16 20:43:59.574 Thread joined : 17 127 | 2016/08/16 20:43:59.592 Thread joined : 18 128 | 2016/08/16 20:43:59.609 Thread joined : 19 129 | 2016/08/16 20:43:59.625 Thread joined : 20 130 | 2016/08/16 20:43:59.625 Multi Thread execution done in : 207.617044448853(ms) 131 | 2016/08/16 20:43:59.625 Evaluate nodes state 132 | END EXECUTION Total Time:220.714092254639 133 | 2016/08/16 20:44:01.630 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 134 | 2016/08/16 20:44:01.630 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 135 | 2016/08/16 20:44:01.643 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 136 | 2016/08/16 20:44:01.643 Galera cluster object created 137 | 2016/08/16 20:44:01.646 Galera cluster node 192.168.1.5:3306:HG=500 138 | 2016/08/16 20:44:01.646 Galera cluster node 192.168.1.6:3306:HG=501 139 | 2016/08/16 20:44:01.646 Galera cluster node 192.168.1.7:3306:HG=501 140 | 2016/08/16 20:44:01.647 Galera cluster node 192.168.1.8:3306:HG=501 141 | 2016/08/16 20:44:01.647 Galera cluster node 192.168.1.9:3306:HG=501 142 | 2016/08/16 20:44:01.647 Galera cluster nodes loaded 143 | 2016/08/16 20:44:01.647 Creating new thread to manage server check:192.168.1.5:3306:HG500 144 | 2016/08/16 20:44:01.672 Creating new thread to manage server check:192.168.1.6:3306:HG501 145 | 2016/08/16 20:44:01.696 Creating new thread to manage server check:192.168.1.7:3306:HG501 146 | 2016/08/16 20:44:01.672 Node check START 192.168.1.5:3306:HG500 147 | 2016/08/16 20:44:01.709 Node check END 192.168.1.5:3306:HG500 148 | 2016/08/16 20:44:01.722 Creating new thread to manage server check:192.168.1.8:3306:HG501 149 | 2016/08/16 20:44:01.696 Node check START 192.168.1.6:3306:HG501 150 | 2016/08/16 20:44:01.737 Node check END 192.168.1.6:3306:HG501 151 | 2016/08/16 20:44:01.746 Creating new thread to manage server check:192.168.1.9:3306:HG501 152 | 2016/08/16 20:44:01.722 Node check START 192.168.1.7:3306:HG501 153 | 2016/08/16 20:44:01.761 Node check END 192.168.1.7:3306:HG501 154 | 2016/08/16 20:44:01.746 Node check START 192.168.1.8:3306:HG501 155 | 2016/08/16 20:44:01.787 Node check END 192.168.1.8:3306:HG501 156 | 2016/08/16 20:44:01.770 Node check START 192.168.1.9:3306:HG501 157 | 2016/08/16 20:44:01.809 Node check END 192.168.1.9:3306:HG501 158 | 2016/08/16 20:44:01.789 Thread joined : 21 159 | 2016/08/16 20:44:01.807 Thread joined : 22 160 | 2016/08/16 20:44:01.824 Thread joined : 23 161 | 2016/08/16 20:44:01.842 Thread joined : 24 162 | 2016/08/16 20:44:01.862 Thread joined : 25 163 | 2016/08/16 20:44:01.862 Multi Thread execution done in : 214.992046356201(ms) 164 | 2016/08/16 20:44:01.862 Evaluate nodes state 165 | END EXECUTION Total Time:233.632802963257 166 | 2016/08/16 20:44:03.868 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 167 | 2016/08/16 20:44:03.868 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 168 | 2016/08/16 20:44:03.877 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 169 | 2016/08/16 20:44:03.878 Galera cluster object created 170 | 2016/08/16 20:44:03.880 Galera cluster node 192.168.1.5:3306:HG=500 171 | 2016/08/16 20:44:03.881 Galera cluster node 192.168.1.6:3306:HG=501 172 | 2016/08/16 20:44:03.881 Galera cluster node 192.168.1.7:3306:HG=501 173 | 2016/08/16 20:44:03.881 Galera cluster node 192.168.1.8:3306:HG=501 174 | 2016/08/16 20:44:03.881 Galera cluster node 192.168.1.9:3306:HG=501 175 | 2016/08/16 20:44:03.882 Galera cluster nodes loaded 176 | 2016/08/16 20:44:03.882 Creating new thread to manage server check:192.168.1.5:3306:HG500 177 | 2016/08/16 20:44:03.909 Creating new thread to manage server check:192.168.1.6:3306:HG501 178 | 2016/08/16 20:44:03.931 Creating new thread to manage server check:192.168.1.7:3306:HG501 179 | 2016/08/16 20:44:03.909 Node check START 192.168.1.5:3306:HG500 180 | 2016/08/16 20:44:03.945 Node check END 192.168.1.5:3306:HG500 181 | 2016/08/16 20:44:03.955 Creating new thread to manage server check:192.168.1.8:3306:HG501 182 | 2016/08/16 20:44:03.931 Node check START 192.168.1.6:3306:HG501 183 | 2016/08/16 20:44:03.970 Node check END 192.168.1.6:3306:HG501 184 | 2016/08/16 20:44:03.979 Creating new thread to manage server check:192.168.1.9:3306:HG501 185 | 2016/08/16 20:44:03.955 Node check START 192.168.1.7:3306:HG501 186 | 2016/08/16 20:44:03.994 Node check END 192.168.1.7:3306:HG501 187 | 2016/08/16 20:44:03.979 Node check START 192.168.1.8:3306:HG501 188 | 2016/08/16 20:44:04.022 Node check END 192.168.1.8:3306:HG501 189 | 2016/08/16 20:44:04.003 Node check START 192.168.1.9:3306:HG501 190 | 2016/08/16 20:44:04.045 Node check END 192.168.1.9:3306:HG501 191 | 2016/08/16 20:44:04.022 Thread joined : 26 192 | 2016/08/16 20:44:04.041 Thread joined : 27 193 | 2016/08/16 20:44:04.060 Thread joined : 28 194 | 2016/08/16 20:44:04.078 Thread joined : 29 195 | 2016/08/16 20:44:04.098 Thread joined : 30 196 | 2016/08/16 20:44:04.098 Multi Thread execution done in : 216.578960418701(ms) 197 | 2016/08/16 20:44:04.099 Evaluate nodes state 198 | END EXECUTION Total Time:231.374979019165 199 | 2016/08/16 20:44:06.103 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 200 | 2016/08/16 20:44:06.103 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 201 | 2016/08/16 20:44:06.114 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 202 | 2016/08/16 20:44:06.114 Galera cluster object created 203 | 2016/08/16 20:44:06.116 Galera cluster node 192.168.1.5:3306:HG=500 204 | 2016/08/16 20:44:06.117 Galera cluster node 192.168.1.6:3306:HG=501 205 | 2016/08/16 20:44:06.117 Galera cluster node 192.168.1.7:3306:HG=501 206 | 2016/08/16 20:44:06.117 Galera cluster node 192.168.1.8:3306:HG=501 207 | 2016/08/16 20:44:06.117 Galera cluster node 192.168.1.9:3306:HG=501 208 | 2016/08/16 20:44:06.117 Galera cluster nodes loaded 209 | 2016/08/16 20:44:06.117 Creating new thread to manage server check:192.168.1.5:3306:HG500 210 | 2016/08/16 20:44:06.139 Creating new thread to manage server check:192.168.1.6:3306:HG501 211 | 2016/08/16 20:44:06.164 Creating new thread to manage server check:192.168.1.7:3306:HG501 212 | 2016/08/16 20:44:06.139 Node check START 192.168.1.5:3306:HG500 213 | 2016/08/16 20:44:06.177 Node check END 192.168.1.5:3306:HG500 214 | 2016/08/16 20:44:06.188 Creating new thread to manage server check:192.168.1.8:3306:HG501 215 | 2016/08/16 20:44:06.164 Node check START 192.168.1.6:3306:HG501 216 | 2016/08/16 20:44:06.202 Node check END 192.168.1.6:3306:HG501 217 | 2016/08/16 20:44:06.212 Creating new thread to manage server check:192.168.1.9:3306:HG501 218 | 2016/08/16 20:44:06.188 Node check START 192.168.1.7:3306:HG501 219 | 2016/08/16 20:44:06.225 Node check END 192.168.1.7:3306:HG501 220 | 2016/08/16 20:44:06.212 Node check START 192.168.1.8:3306:HG501 221 | 2016/08/16 20:44:06.252 Node check END 192.168.1.8:3306:HG501 222 | 2016/08/16 20:44:06.237 Node check START 192.168.1.9:3306:HG501 223 | 2016/08/16 20:44:06.283 Node check END 192.168.1.9:3306:HG501 224 | 2016/08/16 20:44:06.255 Thread joined : 31 225 | 2016/08/16 20:44:06.272 Thread joined : 32 226 | 2016/08/16 20:44:06.294 Thread joined : 33 227 | 2016/08/16 20:44:06.319 Thread joined : 34 228 | 2016/08/16 20:44:06.343 Thread joined : 35 229 | 2016/08/16 20:44:06.343 Multi Thread execution done in : 225.722789764404(ms) 230 | 2016/08/16 20:44:06.343 Evaluate nodes state 231 | END EXECUTION Total Time:241.1048412323 232 | 2016/08/16 20:44:08.348 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 233 | 2016/08/16 20:44:08.348 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 234 | 2016/08/16 20:44:08.357 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 235 | 2016/08/16 20:44:08.357 Galera cluster object created 236 | 2016/08/16 20:44:08.359 Galera cluster node 192.168.1.5:3306:HG=500 237 | 2016/08/16 20:44:08.360 Galera cluster node 192.168.1.6:3306:HG=501 238 | 2016/08/16 20:44:08.360 Galera cluster node 192.168.1.7:3306:HG=501 239 | 2016/08/16 20:44:08.360 Galera cluster node 192.168.1.8:3306:HG=501 240 | 2016/08/16 20:44:08.361 Galera cluster node 192.168.1.9:3306:HG=501 241 | 2016/08/16 20:44:08.361 Galera cluster nodes loaded 242 | 2016/08/16 20:44:08.361 Creating new thread to manage server check:192.168.1.5:3306:HG500 243 | 2016/08/16 20:44:08.389 Creating new thread to manage server check:192.168.1.6:3306:HG501 244 | 2016/08/16 20:44:08.418 Creating new thread to manage server check:192.168.1.7:3306:HG501 245 | 2016/08/16 20:44:08.389 Node check START 192.168.1.5:3306:HG500 246 | 2016/08/16 20:44:08.438 Node check END 192.168.1.5:3306:HG500 247 | 2016/08/16 20:44:08.452 Creating new thread to manage server check:192.168.1.8:3306:HG501 248 | 2016/08/16 20:44:08.418 Node check START 192.168.1.6:3306:HG501 249 | 2016/08/16 20:44:08.475 Node check END 192.168.1.6:3306:HG501 250 | 2016/08/16 20:44:08.485 Creating new thread to manage server check:192.168.1.9:3306:HG501 251 | 2016/08/16 20:44:08.452 Node check START 192.168.1.7:3306:HG501 252 | 2016/08/16 20:44:08.513 Node check END 192.168.1.7:3306:HG501 253 | 2016/08/16 20:44:08.485 Node check START 192.168.1.8:3306:HG501 254 | 2016/08/16 20:44:08.546 Node check END 192.168.1.8:3306:HG501 255 | 2016/08/16 20:44:08.524 Node check START 192.168.1.9:3306:HG501 256 | 2016/08/16 20:44:08.582 Node check END 192.168.1.9:3306:HG501 257 | 2016/08/16 20:44:08.552 Thread joined : 36 258 | 2016/08/16 20:44:08.580 Thread joined : 37 259 | 2016/08/16 20:44:08.601 Thread joined : 38 260 | 2016/08/16 20:44:08.622 Thread joined : 39 261 | 2016/08/16 20:44:08.643 Thread joined : 40 262 | 2016/08/16 20:44:08.643 Multi Thread execution done in : 282.09400177002(ms) 263 | 2016/08/16 20:44:08.644 Evaluate nodes state 264 | END EXECUTION Total Time:296.465158462524 265 | 2016/08/16 20:44:10.647 Inizializing hostgroup 500 wwith maintenance HG 9500 mw 266 | 2016/08/16 20:44:10.647 Inizializing hostgroup 501 rwith maintenance HG 9501 mr 267 | 2016/08/16 20:44:10.660 Connecting to ProxySQL DBI:mysql:host=192.168.1.50;port=3310 268 | 2016/08/16 20:44:10.660 Galera cluster object created 269 | 2016/08/16 20:44:10.663 Galera cluster node 192.168.1.5:3306:HG=500 270 | 2016/08/16 20:44:10.663 Galera cluster node 192.168.1.6:3306:HG=501 271 | 2016/08/16 20:44:10.664 Galera cluster node 192.168.1.7:3306:HG=501 272 | 2016/08/16 20:44:10.664 Galera cluster node 192.168.1.8:3306:HG=501 273 | 2016/08/16 20:44:10.664 Galera cluster node 192.168.1.9:3306:HG=501 274 | 2016/08/16 20:44:10.665 Galera cluster nodes loaded 275 | 2016/08/16 20:44:10.665 Creating new thread to manage server check:192.168.1.5:3306:HG500 276 | 2016/08/16 20:44:10.690 Creating new thread to manage server check:192.168.1.6:3306:HG501 277 | 2016/08/16 20:44:10.714 Creating new thread to manage server check:192.168.1.7:3306:HG501 278 | 2016/08/16 20:44:10.690 Node check START 192.168.1.5:3306:HG500 279 | 2016/08/16 20:44:10.731 Node check END 192.168.1.5:3306:HG500 280 | 2016/08/16 20:44:10.738 Creating new thread to manage server check:192.168.1.8:3306:HG501 281 | 2016/08/16 20:44:10.714 Node check START 192.168.1.6:3306:HG501 282 | 2016/08/16 20:44:10.754 Node check END 192.168.1.6:3306:HG501 283 | 2016/08/16 20:44:10.762 Creating new thread to manage server check:192.168.1.9:3306:HG501 284 | 2016/08/16 20:44:10.738 Node check START 192.168.1.7:3306:HG501 285 | 2016/08/16 20:44:10.777 Node check END 192.168.1.7:3306:HG501 286 | 2016/08/16 20:44:10.762 Node check START 192.168.1.8:3306:HG501 287 | 2016/08/16 20:44:10.801 Node check END 192.168.1.8:3306:HG501 288 | 2016/08/16 20:44:10.787 Node check START 192.168.1.9:3306:HG501 289 | 2016/08/16 20:44:10.827 Node check END 192.168.1.9:3306:HG501 290 | -------------------------------------------------------------------------------- /proxy_debug_tools/README.md: -------------------------------------------------------------------------------- 1 | The file marce.pl is the evolution of the initial test file writen by Marcelo Altman here https://github.com/sysown/proxysql/issues/2022 2 | I just take his idea of a simple and emphiric test, and add some more info. 3 | Thanks @altmanmarcelo 4 | -------------------------------------------------------------------------------- /proxy_debug_tools/marce.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use Time::HiRes qw(gettimeofday nanosleep); 4 | use POSIX qw(tzset strftime); 5 | use strict; 6 | use DBI; 7 | 8 | #Print time from invocation with milliseconds 9 | sub get_time($){ 10 | my $timeType = shift; 11 | $ENV{TZ} = 'UTC'; 12 | tzset; 13 | my $t = gettimeofday(); 14 | my $date = strftime "%Y/%m/%d %H:%M:%S", localtime $t; 15 | 16 | if($timeType == 1){return ($date= sprintf "%06d", ($t-int($t))*1000000); } 17 | elsif ($timeType == 2){return $date;} 18 | elsif ($timeType == 3){return ($t);} 19 | elsif ($timeType == 4){ 20 | return $t;} 21 | else {return ($date .= sprintf ".%03d", ($t-int($t))*1000);} 22 | 23 | } 24 | 25 | 26 | sub get_connection($$$$) { 27 | 28 | my $dsn = shift; 29 | my $user = shift; 30 | my $pass = shift; 31 | my $SPACER = shift; 32 | my $dbh = DBI->connect($dsn 33 | , $user,$pass 34 | , { RaiseError => 0 35 | , PrintError => 0 36 | , AutoCommit => 0 37 | } 38 | ); 39 | 40 | if (!defined($dbh)) { 41 | print("Cannot connect to $dsn as $user\n"); 42 | die(); 43 | return undef; 44 | } 45 | 46 | return $dbh; 47 | } 48 | my $user = 'app_ndb'; 49 | my $schema = 'test'; 50 | my $ip = '192.168.4.11'; 51 | my $port = 6033; 52 | my $engine='innodb'; 53 | my $dbh = get_connection('DBI:mysql:'.$schema.':'.$ip.':'.$port,$user,'test',' '); 54 | if(!defined $dbh){ 55 | return undef; 56 | } 57 | 58 | my $dbh2 = get_connection('DBI:mysql:'.$schema.':'.$ip.':'.$port,$user,'test',' '); 59 | if(!defined $dbh){ 60 | return undef; 61 | } 62 | my $maxReachedLag=0; 63 | my $numberOfcallWithLag=0; 64 | 65 | #Setup */ 66 | print get_time(2); 67 | $dbh->do('DROP TABLE IF EXISTS '.$schema.'.joinit'); 68 | $dbh->do('CREATE TABLE IF NOT EXISTS `'.$schema.'`.`joinit` ( 69 | `i` bigint(11) NOT NULL AUTO_INCREMENT, 70 | `s` char(255) DEFAULT NULL, 71 | `t` datetime NOT NULL, 72 | `g` bigint(11) NOT NULL, 73 | KEY(`i`, `t`), 74 | PRIMARY KEY(`i`) 75 | ) ENGINE=$engine DEFAULT CHARSET=utf8;'); 76 | my $date1=get_time(2) ; 77 | my $starTest=get_time(3); 78 | 79 | # Populate the test Table 80 | $dbh->do("INSERT INTO $schema.joinit VALUES (NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )));"); 81 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 82 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 83 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 84 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 85 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 86 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 87 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 88 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 89 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 90 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 91 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 92 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 93 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 94 | $dbh->do("INSERT INTO $schema.joinit SELECT NULL, uuid(), time('$date1'), (FLOOR( 1 + RAND( ) *60 )) FROM $schema.joinit;"); 95 | $dbh->commit(); 96 | #$dbh->prepare("set SESSION wsrep_sync_wait=0")->execute(); 97 | sleep(3); 98 | 99 | 100 | my $sth = $dbh->prepare("SELECT COUNT(i) FROM $schema.joinit"); 101 | $sth->execute(); 102 | my $rows; 103 | while (my $ref = $sth->fetchrow_hashref()) { 104 | $rows = $ref->{'COUNT(i)'}; 105 | } 106 | 107 | print get_time(2) . "\n Starting to RUN the test with $rows number of rows\n"; 108 | 109 | 110 | my $date2=get_time(2); 111 | 112 | my $sth2 = $dbh->prepare("SELECT i FROM joinit order by i"); 113 | $sth2->execute(); 114 | 115 | #Cycle for all EXSISTING IDs 116 | my $iCountOk = 0 ; 117 | my $loops = 0 ; 118 | my $totlagtime = 0; 119 | while (my $ref = $sth2->fetchrow_hashref()) { 120 | my $currrentID = $ref->{'i'}; 121 | my $result; 122 | $iCountOk++ ; 123 | $loops++; 124 | #Update dates to make the record different 125 | 126 | $sth = $dbh->prepare("UPDATE $schema.joinit SET t = '$date2' WHERE i = $currrentID")->execute(); 127 | #Forcing a commit (for galera or GR this will become a writeset) 128 | 129 | 130 | #starting to calculate the lag/latency after the commit this will include the query time 131 | $dbh->commit(); 132 | my $firstReadTime= get_time(4); 133 | my $secondReadTime=0; 134 | 135 | #nanosleep(15613000); 136 | 137 | #Read the updated record, when using ProxySQL this call SHOULD go to another server 138 | $result = $dbh->prepare("SELECT i FROM $schema.joinit WHERE t = '$date2' AND i = $currrentID")->execute(); 139 | 140 | #If we get no row, this means that the write has not be committed yet on the serving node 141 | if($result == 0) 142 | { 143 | $result = 0; 144 | $numberOfcallWithLag++; 145 | 146 | print get_time(2) . " Dirty Read Detected on i $currrentID . . ."; 147 | 148 | while($result != 1){ 149 | #crazy looping until we get the record. 150 | # !!! NOTE this do not guarantee we run the selct against the same server, but that we get the record back 151 | #print("SELECT i FROM $schema.joinit WHERE t = '$date2' AND i = $currrentID \n"); 152 | $result = $dbh->prepare("SELECT i FROM $schema.joinit WHERE t = '$date2' AND i = $currrentID")->execute(); 153 | $secondReadTime=get_time(4); 154 | } 155 | print " After ".int(($secondReadTime-$firstReadTime)*1000000)." microseconds, Loop $loops rows found $result \n"; 156 | } else { 157 | $secondReadTime=get_time(4); 158 | #print get_current_time(). " i $currrentID is ok in ".($secondReadTime - $firstReadTime)." microseconds rows= $result\n"; 159 | } 160 | if(int(($secondReadTime-$firstReadTime)*1000000) > $maxReachedLag){ 161 | $maxReachedLag = int(($secondReadTime-$firstReadTime)*1000000); 162 | } 163 | 164 | 165 | #convert difference in microseconds 166 | my $diff = int(($secondReadTime-$firstReadTime)*1000000); 167 | $totlagtime = $diff + $totlagtime; 168 | if($iCountOk == 200){ 169 | print "Status report at id ${currrentID} current lag average ".int($totlagtime/$loops)." total lag $totlagtime \n"; 170 | $iCountOk =0; 171 | } 172 | } 173 | $dbh->disconnect if (defined $dbh); 174 | my $endTime = get_time(3); 175 | print "Test run for #loops: $rows \nLag exceptions: $numberOfcallWithLag \nMax Lag time reached: $maxReachedLag microseconds (us)\n"; 176 | print "Average lag ".int($totlagtime/$loops) ." microseconds\n"; 177 | print "Total test time ". ($endTime - $starTest) ." seconds"; 178 | 179 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | WARNING 2 | ========== 3 | This project is deprecated a better tool to perform the same action is now available here 4 | https://github.com/Tusamarco/proxysql_scheduler 5 | 6 | Pleas try it and open issues in that project if needed 7 | 8 | Galera Check tool 9 | ==================== 10 | 11 | Galera check is a script to manage integration between ProxySQL and Galera (from Codership), including its different implementations like PXC. 12 | Galera and its implementations like Percona Cluster (PCX), use the data-centric concept, as such the status of a node is relvant in relation to a cluster. 13 | 14 | In ProxySQL is possible to represent a cluster and its segments using HostGroups. 15 | Galera check is design to manage a X number of nodes that belong to a given Hostgroup (HG). 16 | In Galera_ceck it is also important to qualify the HG. This is true in case of multi-node like PXC or in case of use of Replication HG. 17 | 18 | Galera_check can now also manage single writer in PXC and fail-over when in the need to. 19 | This is implemented using two new features: 20 | * Backup Node definition (using hostgroup) 21 | * Use PXC_CLUSTER_VIEW table (in PXC only) 22 | 23 | Galera_check works by HG and as such it will perform isolated actions/checks by HG. 24 | It is not possible to have more than one check running on the same HG. 25 | 26 | To prevent to have multiple test running , the check create a lock file {proxysql_galera_check_${hg}.pid} that will be used by the check to prevent duplicates. 27 | Galera_check will connect to the ProxySQL node and retrieve all the information regarding the Nodes/proxysql configuration. 28 | It will then check in *parallel* each node and will retrieve the status and configuration. 29 | 30 | At the moment galera_check analyze and manage the following: 31 | 32 | Node states: 33 | * pxc_main_mode 34 | * read_only 35 | * wsrep_status 36 | * wsrep_rejectqueries 37 | * wsrep_donorrejectqueries 38 | * wsrep_connected 39 | * wsrep_desinccount 40 | * wsrep_ready 41 | * wsrep_provider 42 | * wsrep_segment 43 | * Number of nodes in by segment 44 | * Retry loop 45 | 46 | PXC cluster state: 47 | * PXC_CLUSTER_VIEW 48 | 49 | Fail-over options: 50 | * Presence of active node in the special backup Hostgroup (8000 + original HG id). 51 | 52 | Special HostGoup 8000 is now used also for READERS, to define which should be checked and eventually add to the pool if missed 53 | 54 | If a node is the only one in a segment, the check will behave accordingly. 55 | IE if a node is the only one in the MAIN segment, it will not put the node in OFFLINE_SOFT when the node become donor to prevent the cluster to become unavailable for the applications. 56 | As mention is possible to declare a segment as MAIN, quite useful when managing prod and DR site. 57 | 58 | The check can be configure to perform retry in a X interval. 59 | Where X is the time define in the ProxySQL scheduler. 60 | As such if the check is set to have 2 retry for UP and 4 for down, it will loop that number before doing anything. Given that Galera does some action behind the hood. 61 | 62 | This feature is very useful in some not well known cases where Galera behave weird. 63 | IE whenever a node is set to READ_ONLY=1, galera desync and resync the node. 64 | A check not taking this into account will cause a node to be set OFFLINE and back for no reason. 65 | 66 | Another important differentiation for this check is that it use special HGs for maintenance, all in range of 9000. 67 | So if a node belong to HG 10 and the check needs to put it in maintenance mode, the node will be moved to HG 9010. 68 | Once all is normal again, the Node will be put back on his original HG. 69 | 70 | The special group of 8000 is instead used for __configuration__, this is it you will need to insert the 8XXXX referring to your WRITER HG and READER HG as the configuration the script needs to refer to. 71 | To be clear 8XXX where X are the digit of your Hostgroup id ie 20 -> 8020, 1 -> 8001 etc . 72 | 73 | This check does NOT modify any state of the Nodes. 74 | Meaning It will NOT modify any variables or settings in the original node. It will ONLY change states in ProxySQL. 75 | 76 | Multi PXC node and Single writer. 77 | ProxySQL can easily move traffic read or write from a node to another in case of a node failure. Normally playing with the weight will allow us to have a (stable enough) scenario. 78 | But this will not guarantee the FULL 100% isolation in case of the need to have single writer. 79 | When there is that need, using only the weight will not be enough given ProxySQL will direct some writes also to the other nodes, few indeed, but still some of them, as such no 100% isolated. 80 | Unless you use single_writer option (ON by default), in that case your PXC setup will rely on one Writer a time. 81 | 82 | To manage that and also to provide a good way to set/define what and how to fail-over in case of need, I had implement a new feature: 83 | *active_failover* 84 | Valid values are: 85 | 0 [default] do not make fail-over 86 | 1 make fail-over only if HG 8000 is specified in ProxySQL mysl_servers 87 | 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 88 | 3 do whatever to keep service up also fail-over to another segment (use PXC_CLUSTER_VIEW) 89 | 90 | Active fail-over works using three main features, the Backup Host Group and the information present in wsrep_provider_options and in the table performance_schema.pxc_cluster_view (PXC 5.7.19 and later). 91 | Because this you need to have the monitor ProxySQL user able to select from performance_schema table if you want to use pxc_cluster_view. 92 | For example: 93 | ```SQL 94 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.205',50,3306,1000000,2000); 95 | 96 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.205',8050,3306,1000,2000); 97 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.231',8050,3306,999,2000); 98 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.22',8050,3306,998,2000); 99 | 100 | 101 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.205',8052,3306,10000,2000); 102 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.231',8052,3306,10000,2000); 103 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.22',8052,3306,10000,2000); 104 | 105 | 106 | 107 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.205',52,3306,1,2000); 108 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.21',52,3306,1000000,2000); 109 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.231',52,3306,1,2000); 110 | 111 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.22',50,3306,1,2000); 112 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.23',50,3306,1,2000); 113 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.233',50,3306,1,2000); 114 | 115 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.22',52,3306,1,2000); 116 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.23',52,3306,1,2000); 117 | INSERT INTO mysql_servers (hostname,hostgroup_id,port,weight,max_connections) VALUES ('192.168.1.233',52,3306,10,2000); 118 | 119 | ``` 120 | Will create entries in ProxySQL for 2 main HG (50 for write and 52 for read) 121 | It will also create 3 entries for the SPECIAL group 8050. This group will be used by the script to manage the fail-over of HG 50. 122 | It will also create 3 entries for the SPECIAL group 8051. This group will be used by the script to manage the read nodes in HG 51. 123 | 124 | In the above example, what will happen is that in case of you have set `*active_failover*=1`, the script will check the nodes, if node `192.168.1.205',50` is not up, 125 | the script will try to identify another node within the same segment that has the **higest weight** IN THE 8050 HG. In this case it will elect as new writer the node`'192.168.1.231',8050`. 126 | So in this case you must set the nodes with different weight for HG 8050. 127 | 128 | **Please note** that active_failover=1, is the only deterministic method to failover, based on what **YOU** define. 129 | If set correctly across a ProxySQL cluster, all nodes will act the same. Yes a possible delay given the check interval may exists, but that cannot be avoided. 130 | 131 | If instead the `*active_failover*`=2, the script will use the pxc_cluster_view: 132 | ```SQL 133 | pxc_test@192.168.1.205) [performance_schema]>select * from pxc_cluster_view order by SEGMENT,LOCAL_INDEX; 134 | +-----------+--------------------------------------+--------+-------------+---------+ 135 | | HOST_NAME | UUID | STATUS | LOCAL_INDEX | SEGMENT | 136 | +-----------+--------------------------------------+--------+-------------+---------+ 137 | | node2 | 63870cc3-af5d-11e7-a0db-463be8426737 | SYNCED | 0 | 1 | 138 | | node3 | 666ad5a0-af5d-11e7-9b39-2726e5de8eb1 | SYNCED | 1 | 1 | 139 | | node1 | f3e45392-af5b-11e7-854e-9b29cd1909da | SYNCED | 5 | 1 | 140 | | node6 | 7540bdca-b267-11e7-bae9-464c3d263470 | SYNCED | 2 | 2 | 141 | | node5 | ab551483-b267-11e7-a1a1-9be1826f877f | SYNCED | 3 | 2 | 142 | | node4 | e24ebd01-af5d-11e7-86f0-42c8ceb6886c | SYNCED | 4 | 2 | 143 | +-----------+--------------------------------------+--------+-------------+---------+ 144 | 145 | ``` 146 | This is an objective reference to the current active view define by galera. The local index is the `wsrep_local_index`, and will be used in selecting the new writer. 147 | In this case, assuming `node2` was the current writer, the next one will be `node3`, given is the one with the lower index. This election will happen ONLY if the node is in the same segment. 148 | 149 | Finally if `*active_failover*`=3, the script will use the pxc_cluster_view but will not limit its selection to the main segment, but in this case if NO node will be found in the main segment, 150 | it will check and eventually elect as writer a node in the remote segment. This is a very extream situation, and if you decide to go for it, you need to be sure your production will work correctly. 151 | 152 | ```SQL 153 | (pxc_test@192.168.1.205) [performance_schema]>select * from pxc_cluster_view order by SEGMENT,LOCAL_INDEX; 154 | +-----------+--------------------------------------+--------+-------------+---------+ 155 | | HOST_NAME | UUID | STATUS | LOCAL_INDEX | SEGMENT | 156 | +-----------+--------------------------------------+--------+-------------+---------+ 157 | | node2 | 63870cc3-af5d-11e7-a0db-463be8426737 | DONOR | 0 | 1 | 158 | | node3 | 666ad5a0-af5d-11e7-9b39-2726e5de8eb1 | DONOR | 1 | 1 | 159 | | node1 | f3e45392-af5b-11e7-854e-9b29cd1909da | DONOR | 5 | 1 | 160 | | node6 | 7540bdca-b267-11e7-bae9-464c3d263470 | SYNCED | 2 | 2 | 161 | | node5 | ab551483-b267-11e7-a1a1-9be1826f877f | SYNCED | 3 | 2 | 162 | | node4 | e24ebd01-af5d-11e7-86f0-42c8ceb6886c | SYNCED | 4 | 2 | 163 | +-----------+--------------------------------------+--------+-------------+---------+ 164 | ``` 165 | In this case `node6` will become the new WRITER. 166 | 167 | 168 | ## More details 169 | 170 | ### How to use it 171 | ``` 172 | galera_check.pl -u=admin -p=admin -h=127.0.0.1 -H=500:W,501:R -P=6032 --main_segment=1 --debug=0 --log --help 173 | galera_check.pl -u=cluster1 -p=clusterpass -h=192.168.4.191 -H=200:W,201:R -P=6032 --main_segment=1 --debug=1 --log /tmp/test --active_failover=1 --retry_down=2 --retry_up=1 --single_writer=0 --writer_is_also_reader=1" 174 | sample [options] [file ...] 175 | Options: 176 | -u|user user to connect to the proxy 177 | -p|password Password for the proxy 178 | -h|host Proxy host 179 | -H Hostgroups with role definition. List comma separated. 180 | Definition R = reader; W = writer [500:W,501:R] 181 | --main_segment If segments are in use which one is the leading at the moment 182 | --retry_up The number of loop/test the check has to do before moving a node up (default 0) 183 | --retry_down The number of loop/test the check has to do before moving a node Down (default 0) 184 | --log Full path to the log file ie (/var/log/proxysql/galera_check_) the check will add 185 | the identifier for the specific HG. 186 | --active_failover A value from 0 to 3, indicating what level/kind of fail-over the script must perform. 187 | active_failover 188 | Valid values are: 189 | 0 [default] do not make failover 190 | 1 make failover only if HG 8000 is specified in ProxySQL mysl_servers 191 | 2 use PXC_CLUSTER_VIEW to identify a server in the same segment 192 | 3 do whatever to keep service up also failover to another segment (use PXC_CLUSTER_VIEW) 193 | --single_writer Active by default [single_writer = 1 ] if disable will allow to have multiple writers 194 | --writer_is_also_reader Active by default [writer_is_also_reader =1]. If disable the writer will be removed by the Reader Host group and will serve only reads inside the same transaction of the writes. 195 | 196 | 197 | Performance parameters 198 | --check_timeout This parameter set in ms then time the script can alow a thread connecting to a MySQL node to wait, before forcing a returnn. 199 | In short if a node will take longer then check_timeout its entry will be not filled and it will eventually ignored in the evaluation. 200 | Setting the debug option =1 and look for [WARN] Check timeout Node ip : Information will tell you how much your nodes are exceeding the allowed limit. 201 | You can use the difference to correctly set the check_timeout 202 | Default is 800 ms 203 | 204 | --help help message 205 | --debug When active the log will have a lot of information about the execution. Parse it for ERRORS if you have problems 206 | --print_execution Active by default, it will print the execution time the check is taking in the log. This can be used to tune properly the scheduler time, and also the --check_timeout 207 | 208 | --development When set to 1 you can run the script in a loop from bash directly and test what is going to happen 209 | --development_time Time in seconds that the loop wait to execute when in development mode (default 2 seconds) 210 | 211 | SSL support 212 | Now the script identify if the node in the ProxySQL table mysql_servers has use_ssl = 1 and will set SSL to be used for that specific entry. 213 | This means that SSL connection is by ProxySQL mysql_server entry NOT by IP:port combination. 214 | 215 | --ssl_cert_paths This parameter allow you to specify a DIRECTORY to use to assign specific certificates. 216 | At the moment is NOT possible to change the files names and ALL these 3 files must be there and named as follow: 217 | - client-key.pem 218 | - client-cert.pem 219 | - ca.pem 220 | Script will exit with an error if ssl_cert_pathsis declared but not filled properly 221 | OR if the user running the script doesn't have acces. 222 | !!NOTE!! SSL connection requires more time to be established. This script is a check that needs to run very fast and constantly. 223 | force it to use ssl WILL impact in the performance of the check. Tune properly the check_timeout parameter. 224 | 225 | ``` 226 | 227 | Note that galera_check is also Segment aware, as such the checks on the presence of Writer /reader is done by segment, respecting the MainSegment as primary. 228 | 229 | 230 | ### Examples of configurations in ProxySQL 231 | 232 | Simple check without retry no failover mode 233 | ``` 234 | INSERT INTO scheduler (id,active,interval_ms,filename,arg1) values (10,0,2000,"/var/lib/proxysql/galera_check.pl","-u=admin -p=admin -h=192.168.1.50 -H=500:W,501:R -P=3310 --main_segment=1 --debug=0 --log=/var/lib/proxysql/galeraLog"); 235 | ``` 236 | 237 | Simple check with retry options no failover mode 238 | ``` 239 | INSERT INTO scheduler (id,active,interval_ms,filename,arg1) values (10,0,2000,"/var/lib/proxysql/galera_check.pl","-u=admin -p=admin -h=192.168.1.50 -H=500:W,501:R -P=3310 --retry_down=2 --retry_up=1 --main_segment=1 --debug=0 --log=/var/lib/proxysql/galeraLog"); 240 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 241 | ``` 242 | 243 | Script with supporting SINGLE writer HG and Backup nodes 244 | ``` 245 | INSERT INTO scheduler (id,active,interval_ms,filename,arg1) values (10,0,2000,"/var/lib/proxysql/galera_check.pl","-u=admin -p=admin -h=192.168.1.50 -H=500:W,501:R -P=3310 --main_segment=1 --debug=0 --active_failover=1 --log=/var/lib/proxysql/galeraLog"); 246 | ``` 247 | Full mode with active_failover single writer and retry 248 | ``` 249 | INSERT INTO scheduler (id,active,interval_ms,filename,arg1) values (10,0,2000,"/var/lib/proxysql/galera_check.pl","-u=remoteUser -p=remotePW -h=192.168.1.50 -H=500:W,501:R -P=6032 --retry_down=2 --retry_up=1 --main_segment=1 --debug=0 --active_failover=1 --single_writer=1 --log=/var/lib/proxysql/galeraLog"); 250 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 251 | ``` 252 | 253 | To activate it 254 | ```update scheduler set active=1 where id=10; 255 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 256 | ``` 257 | 258 | To update the parameters you must pass all of them not only the ones you want to change(IE enabling debug) 259 | ``` 260 | update scheduler set arg1="-u=remoteUser -p=remotePW -h=192.168.1.50 -H=500:W,501:R -P=6032 --retry_down=2 --retry_up=1 --main_segment=1 --debug=1 --active_failover=1 --single_writer=1 --log=/var/lib/proxysql/galeraLog" where id =10; 261 | 262 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 263 | ``` 264 | 265 | delete from scheduler where id=10; 266 | ``` 267 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 268 | ``` 269 | 270 | 271 | For all to update the scheduler and make the script active: 272 | ``` 273 | update scheduler set active=1 where id=10; 274 | LOAD SCHEDULER TO RUNTIME; 275 | ``` 276 | 277 | Remove a rule from scheduler: 278 | ``` 279 | delete from scheduler where id=10; 280 | LOAD SCHEDULER TO RUNTIME;SAVE SCHEDULER TO DISK; 281 | ``` 282 | ## Logic Rules used in the check: 283 | 284 | * Set to offline_soft : 285 | 286 | any non 4 or 2 state, read only =ON 287 | donor node reject queries - 0 size of cluster > 2 of nodes in the same segments more then one writer, node is NOT read_only 288 | 289 | 290 | * change HG for maintenance HG: 291 | 292 | Node/cluster in non primary 293 | wsrep_reject_queries different from NONE 294 | Donor, node reject queries =1 size of cluster 295 | 296 | 297 | * Node comes back from offline_soft when (all of them): 298 | 299 | 1. Node state is 4 300 | 2. wsrep_reject_queries = none 301 | 3. Primary state 302 | 303 | 304 | * Node comes back from maintenance HG when (all of them): 305 | 306 | 1. node state is 4 307 | 2. wsrep_reject_queries = none 308 | 3. Primary state 309 | 310 | * PXC (pxc_maint_mode). 311 | 312 | PXC_MAIN_MODE is fully supported. 313 | Any node in a state different from pxc_maint_mode=disabled will be set in OFFLINE_SOFT for all the HostGroup. 314 | 315 | * internally shunning node. 316 | 317 | While I am trying to rely as much as possible on ProxySQL, given few inefficiencies there are cases when I have to set a node to SHUNNED because ProxySQL doesn't recognize it correctly. 318 | Mainly what the script does, it will identify the nodes not up (but still not SHUNNED) and will internally set them as SHUNNED. NO CHANGE TO ProxySQL is done, so you may not see it there, but an ERROR entry will be push to the log. 319 | 320 | * Single Writer. 321 | 322 | You can define IF you want to have multiple writers. Default is 1 writer only (**I strongly recommend you to do not use multiple writers unless you know very well what are you doing**), but you can now have multiple writers at the same time. 323 | 324 | 325 | 326 | WHY I add addition_to_sys_v2.sql 327 | ============================= 328 | Adding addition_to_sys_v2.sql 329 | 330 | This file is the updated and correct version of the file create by LeFred https://gist.github.com/lefred/77ddbde301c72535381ae7af9f968322 which is not working correctly. 331 | Also I have tested this solution and compared it with https://gist.github.com/lefred/6f79fd02d333851b8d18f52716f04d91#file-addition_to_sys_gr-sql 332 | and the output cost MORE in the file-addition_to_sys_gr-sql 333 | version than this one. 334 | 335 | All the credit goes to @lefred who did the first version 336 | --------------------------------------------------------------------------------