├── MANIFEST ├── Makefile.PL ├── t ├── network_zones.t ├── hosts-per-zone-replpol.t └── multiple-networks-replpol.t └── lib └── MogileFS ├── Network.pm ├── Plugin └── ZoneLocal.pm └── ReplicationPolicy ├── HostsPerNetwork.pm └── MultipleNetworks.pm /MANIFEST: -------------------------------------------------------------------------------- 1 | lib/MogileFS/Network.pm 2 | lib/MogileFS/Plugin/ZoneLocal.pm 3 | lib/MogileFS/ReplicationPolicy/HostsPerNetwork.pm 4 | lib/MogileFS/ReplicationPolicy/MultipleNetworks.pm 5 | Makefile.PL 6 | MANIFEST This list of files 7 | t/hosts-per-zone-replpol.t 8 | t/multiple-networks-replpol.t 9 | t/network_zones.t 10 | -------------------------------------------------------------------------------- /Makefile.PL: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use ExtUtils::MakeMaker; 4 | 5 | WriteMakefile( 6 | NAME => 'MogileFS-Network', 7 | VERSION_FROM => 'lib/MogileFS/Network.pm', 8 | AUTHOR => 'Jonathan Steinert ', 9 | ABSTRACT => 'MogileFS Server class for representing networks based on host IPs.', 10 | PREREQ_PM => { 11 | 'Net::Netmask' => 0, 12 | 'Net::Patricia' => 0, 13 | }, 14 | ); 15 | 16 | -------------------------------------------------------------------------------- /t/network_zones.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Test::More tests => 5; 6 | use FindBin qw($Bin); 7 | 8 | use MogileFS::Network; 9 | 10 | MogileFS::Network->test_config( 11 | zone_one => '127.0.0.0/16', 12 | zone_two => '10.0.0.0/8, 172.16.0.0/16', 13 | zone_three => '10.1.0.0/16', 14 | network_zones => 'one, two, three', 15 | ); 16 | 17 | 18 | is(lookup('127.0.0.1'), 'one', "Standard match"); 19 | is(lookup('10.0.0.1'), 'two', "Outer netblock match"); 20 | is(lookup('10.1.0.1'), 'three', "Inner netblock match"); 21 | is(lookup('172.16.0.1'), 'two', "Zone with multiple netblocks"); 22 | is(lookup('192.168.0.1'), undef, "Unknown zone"); 23 | 24 | sub lookup { 25 | return MogileFS::Network->zone_for_ip(@_); 26 | } 27 | -------------------------------------------------------------------------------- /lib/MogileFS/Network.pm: -------------------------------------------------------------------------------- 1 | package MogileFS::Network; 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Net::Netmask; 7 | use Net::Patricia; 8 | use MogileFS::Config; 9 | 10 | our $VERSION = "0.02"; 11 | 12 | use constant DEFAULT_RELOAD_INTERVAL => 60; 13 | 14 | my $trie = Net::Patricia->new(); # Net::Patricia object used for cache and lookup. 15 | my $next_reload = 0; # Epoch time at or after which the trie expires and must be regenerated. 16 | 17 | sub zone_for_ip { 18 | my $class = shift; 19 | my $ip = shift; 20 | 21 | return unless $ip; 22 | 23 | check_cache(); 24 | 25 | return $trie->match_string($ip); 26 | } 27 | 28 | sub check_cache { 29 | # Reload the trie if it's expired 30 | return unless (time() >= $next_reload); 31 | 32 | $trie = Net::Patricia->new(); 33 | 34 | my @zones = split(/\s*,\s*/, get_setting("network_zones")); 35 | 36 | my @netmasks; # [ $bits, $netmask, $zone ], ... 37 | 38 | foreach my $zone (@zones) { 39 | my $zone_masks = get_setting("zone_$zone"); 40 | 41 | if (not $zone_masks) { 42 | warn "couldn't find network_zone <> check your server settings"; 43 | next; 44 | } 45 | 46 | foreach my $network_string (split /[,\s]+/, $zone_masks) { 47 | my $netmask = Net::Netmask->new2($network_string); 48 | 49 | if (Net::Netmask::errstr()) { 50 | warn "couldn't parse <$zone> as a netmask. error was <" . Net::Netmask::errstr(). 51 | ">. check your server settings"; 52 | next; 53 | } 54 | 55 | push @netmasks, [$netmask->bits, $netmask, $zone]; 56 | } 57 | } 58 | 59 | # Sort these by mask bit count, because Net::Patricia doesn't say in its docs whether add order 60 | # or bit length is the overriding factor. 61 | foreach my $set (sort { $a->[0] <=> $b->[0] } @netmasks) { 62 | my ($bits, $netmask, $zone) = @$set; 63 | 64 | if (my $other_zone = $trie->match_exact_string("$netmask")) { 65 | warn "duplicate netmask <$netmask> in network zones '$zone' and '$other_zone'. check your server settings"; 66 | } 67 | 68 | $trie->add_string("$netmask", $zone); 69 | } 70 | 71 | my $interval = get_setting("network_reload_interval") || DEFAULT_RELOAD_INTERVAL; 72 | 73 | $next_reload = time() + $interval; 74 | 75 | return 1; 76 | } 77 | 78 | # This is a separate subroutine so I can redefine it at test time. 79 | sub get_setting { 80 | my $key = shift; 81 | return MogileFS::Config->server_setting($key); 82 | } 83 | 84 | sub test_config { 85 | my $class = shift; 86 | 87 | my %config = @_; 88 | 89 | no warnings 'redefine'; 90 | 91 | *get_setting = sub { 92 | my $key = shift; 93 | return $config{$key}; 94 | }; 95 | 96 | $next_reload = 0; 97 | } 98 | 99 | 1; 100 | -------------------------------------------------------------------------------- /lib/MogileFS/Plugin/ZoneLocal.pm: -------------------------------------------------------------------------------- 1 | # ZoneLocal plugin for MogileFS, by hachi 2 | 3 | package MogileFS::Plugin::ZoneLocal; 4 | 5 | use strict; 6 | use warnings; 7 | 8 | use MogileFS::Worker::Query; 9 | use MogileFS::Network; 10 | use MogileFS::Util qw/error/; 11 | 12 | sub prioritize_devs_current_zone; 13 | 14 | sub load { 15 | my $local_network = MogileFS::Config->config('local_network'); 16 | die "must define 'local_network' (ie: 10.5.0.0/16) in your mogilefsd.conf" 17 | unless $local_network; 18 | my $local_zone_test = MogileFS::Network->zone_for_ip($local_network); 19 | die "Could not resolve a local zone for $local_network. Please ensure this IP is within a configured zone" 20 | unless $local_zone_test; 21 | 22 | MogileFS::register_global_hook( 'cmd_get_paths_order_devices', sub { 23 | my $devices = shift; 24 | my $sorted_devs = shift; 25 | 26 | @$sorted_devs = prioritize_devs_current_zone( 27 | $MogileFS::REQ_client_ip, 28 | MogileFS::Worker::Query::sort_devs_by_utilization(@$devices) 29 | ); 30 | 31 | return 1; 32 | }); 33 | 34 | MogileFS::register_global_hook( 'cmd_create_open_order_devices', sub { 35 | my $devices = shift; 36 | my $sorted_devs = shift; 37 | 38 | @$sorted_devs = prioritize_devs_current_zone( 39 | $MogileFS::REQ_client_ip, 40 | MogileFS::Worker::Query::sort_devs_by_freespace(@$devices) 41 | ); 42 | 43 | return 1; 44 | }); 45 | 46 | MogileFS::register_global_hook( 'replicate_order_final_choices', sub { 47 | my $devs = shift; 48 | my $choices = shift; 49 | 50 | my @sorted = prioritize_devs_current_zone( 51 | MogileFS::Config->config('local_network'), 52 | map { $devs->{$_} } @$choices); 53 | @$choices = map { $_->id } @sorted; 54 | 55 | return 1; 56 | }); 57 | 58 | return 1; 59 | } 60 | 61 | sub unload { 62 | # remove our hooks 63 | MogileFS::unregister_global_hook( 'cmd_get_paths_order_devices' ); 64 | MogileFS::unregister_global_hook( 'cmd_create_open_order_devices' ); 65 | MogileFS::unregister_global_hook( 'replicate_order_final_choices' ); 66 | 67 | return 1; 68 | } 69 | 70 | sub prioritize_devs_current_zone { 71 | my $local_ip = shift; 72 | my $current_zone = MogileFS::Network->zone_for_ip($local_ip); 73 | error("Cannot find current zone for local ip $local_ip") 74 | unless defined $current_zone; 75 | 76 | my (@this_zone, @other_zone); 77 | 78 | foreach my $dev (@_) { 79 | my $ip = $dev->host->ip; 80 | my $host_id = $dev->host->id; 81 | my $zone = MogileFS::Network->zone_for_ip($ip); 82 | error("Cannot find zone for remote IP $ip") 83 | unless defined $zone; 84 | 85 | if ($current_zone eq $zone) { 86 | push @this_zone, $dev; 87 | } else { 88 | push @other_zone, $dev; 89 | } 90 | } 91 | 92 | return @this_zone, @other_zone; 93 | } 94 | 95 | 1; 96 | -------------------------------------------------------------------------------- /t/hosts-per-zone-replpol.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Test::More; 6 | use FindBin qw($Bin); 7 | 8 | use MogileFS::Server; 9 | use MogileFS::Util qw(error_code); 10 | use MogileFS::ReplicationPolicy::HostsPerNetwork; 11 | use MogileFS::Test; 12 | 13 | plan tests => 13; 14 | 15 | # already good. 16 | is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), 17 | "all_good", "all good"); 18 | 19 | # need to get it onto host2... 20 | is(rr("min=2 h1[d1=X d2=_] h2[d3=_ d4=_]"), 21 | "ideal(3,4)", "need host2"); 22 | 23 | # still needs to be on host2, even though 2 copies on host1 24 | is(rr("min=2 h1[d1=X d2=X] h2[d3=_ d4=_]"), 25 | "ideal(3,4)", "need host2, even though 2 on host1"); 26 | 27 | # anywhere will do. (can happen on, say, rebalance) 28 | is(rr("min=2 h1[d1=_ d2=_] h2[d3=_ d4=_]"), 29 | "ideal(1,2,3,4)", "anywhere"); 30 | 31 | # should desperately try d2, since host2 is down 32 | is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_]"), 33 | "desperate(2)"); 34 | 35 | # should try host3, since host2 is down 36 | is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_] h3[d5=_ d6=_]"), 37 | "ideal(5,6)"); 38 | 39 | # need a copy on a non-dead disk on host1 40 | is(rr("min=2 h1[d1=_ d2=X,dead] h2=alive[d3=X d4=_]"), 41 | "ideal(1)"); 42 | 43 | # minimum hosts is 3, only 2 available hosts. This test differs from 44 | # the one in multiplehosts because elevating these results to be 'ideal' 45 | # adds complexity that is unnecessary in my eyes. 46 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_]"), 47 | "desperate(1,4)"); 48 | 49 | # ... but if we have a 3rd host, it's gotta be there 50 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3[d5=_]"), 51 | "ideal(5)"); 52 | 53 | # ... unless that host is down, in which case it's back to 1/4, 54 | # but desperately 55 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3=down[d5=_]"), 56 | "desperate(1,4)"); 57 | 58 | # too good, uniq hosts > min 59 | is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_] h3[d5=X]"), 60 | "too_good"); 61 | 62 | # too good, but but with uniq hosts == min 63 | is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), 64 | "too_good"); 65 | 66 | # be happy with 3 copies, even though two are on same host (that's our max unique hosts) 67 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), 68 | "all_good"); 69 | 70 | sub rr { 71 | my ($state) = @_; 72 | my $ostate = $state; # original 73 | 74 | MogileFS::Host->t_wipe_singletons; 75 | MogileFS::Device->t_wipe_singletons; 76 | MogileFS::Config->set_config_no_broadcast("min_free_space", 100); 77 | 78 | my $min = 2; 79 | if ($state =~ s/^\bmin=(\d+)\b//) { 80 | $min = $1; 81 | } 82 | 83 | my $hosts = {}; 84 | my $devs = {}; 85 | my $on_devs = []; 86 | 87 | my $parse_error = sub { 88 | die "Can't parse:\n $ostate\n" 89 | }; 90 | while ($state =~ s/\bh(\d+)(?:=(.+?))?\[(.+?)\]//) { 91 | my ($n, $opts, $devstr) = ($1, $2, $3); 92 | $opts ||= ""; 93 | die "dup host $n" if $hosts->{$n}; 94 | 95 | my $h = $hosts->{$n} = MogileFS::Host->of_hostid($n); 96 | $h->t_init($opts || "alive"); 97 | $h->{hostip} = "127.0.0.1"; 98 | 99 | foreach my $ddecl (split(/\s+/, $devstr)) { 100 | $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ 101 | or $parse_error->(); 102 | my ($dn, $on_not, $status) = ($1, $2, $3); 103 | die "dup device $dn" if $devs->{$dn}; 104 | my $d = $devs->{$dn} = MogileFS::Device->of_devid($dn); 105 | $status ||= "alive"; 106 | $d->t_init($h->id, $status); 107 | if ($on_not eq "X" && $d->dstate->should_have_files) { 108 | push @$on_devs, $d; 109 | } 110 | } 111 | } 112 | $parse_error->() if $state =~ /\S/; 113 | 114 | my $polclass = "MogileFS::ReplicationPolicy::HostsPerNetwork"; 115 | 116 | my $pol = $polclass->new(hosts_per_zone => { one => $min }); 117 | 118 | MogileFS::Network->test_config( 119 | zone_one => '127.0.0.0/16', 120 | network_zones => 'one', 121 | ); 122 | 123 | my $rr = $pol->replicate_to( 124 | fid => 1, 125 | on_devs => $on_devs, 126 | all_devs => $devs, 127 | failed => {}, 128 | min => $min, 129 | ); 130 | return $rr->t_as_string; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /lib/MogileFS/ReplicationPolicy/HostsPerNetwork.pm: -------------------------------------------------------------------------------- 1 | package MogileFS::ReplicationPolicy::HostsPerNetwork; 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use base 'MogileFS::ReplicationPolicy'; 7 | 8 | use MogileFS::Network; 9 | use MogileFS::Util qw(weighted_list); 10 | use MogileFS::ReplicationRequest qw(ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); 11 | 12 | sub new { 13 | my $class = shift; 14 | my %args = @_; 15 | 16 | my $self = bless {}, $class; 17 | 18 | $self->{hosts_per_zone} = delete $args{hosts_per_zone} 19 | if $args{hosts_per_zone}; 20 | 21 | return $self; 22 | } 23 | 24 | sub new_from_policy_args { 25 | my ($class, $argref) = @_; 26 | # Note: "MultipleNetworks()" is okay, in which case the 'mindevcount' 27 | # on the class is used. (see below) 28 | $$argref =~ s/^\s* \( \s* ( [^)]*?) \s* \) \s*//x 29 | or die "$class failed to parse args: $$argref"; 30 | 31 | my @args = split /\s*,\s*/, $1; 32 | my %hosts_per_zone; 33 | 34 | foreach my $arg (@args) { 35 | my ($zone, $count) = split /\s*=\s*/, $arg; 36 | $hosts_per_zone{$zone} = $count; 37 | } 38 | 39 | return $class->new(hosts_per_zone => \%hosts_per_zone); 40 | } 41 | 42 | sub replicate_to { 43 | my ($self, %args) = @_; 44 | 45 | my $hosts_per_zone = $self->{hosts_per_zone}; 46 | 47 | my $fid = delete $args{fid}; # fid scalar to copy 48 | my $on_devs = delete $args{on_devs}; # arrayref of device objects 49 | my $all_devs = delete $args{all_devs}; # hashref of { devid => MogileFS::Device } 50 | my $failed = delete $args{failed}; # hashref of { devid => 1 } of failed attempts this round 51 | 52 | delete $args{min}; # We don't use this. 53 | 54 | warn "Unknown parameters: " . join(", ", sort keys %args) if %args; 55 | die "Missing parameters" unless $on_devs && $all_devs && $failed && $fid; 56 | 57 | # see which and how many unique hosts/networks we're already on. 58 | my %on_dev; 59 | my %on_host; 60 | 61 | my %on_host_per_zone; 62 | my %on_dev_per_zone; 63 | 64 | foreach my $dev (@$on_devs) { 65 | my $on_ip = $dev->host->ip; 66 | my $hostid = $dev->host->id; 67 | 68 | if ($on_ip) { 69 | my $zone = MogileFS::Network->zone_for_ip($on_ip); 70 | 71 | $on_dev_per_zone{$zone}++; 72 | 73 | # If we've already counted this host, then don't increment it for this zone 74 | $on_host_per_zone{$zone}++ unless $on_host{$dev->hostid}; 75 | } 76 | 77 | $on_dev{$dev->id}++; 78 | $on_host{$dev->hostid}++; 79 | } 80 | 81 | my %available_hosts_per_zone; 82 | my %available_hosts; 83 | 84 | foreach my $dev (values %$all_devs) { 85 | next unless $dev->dstate->should_have_files; 86 | my $ip = $dev->host->ip; 87 | my $hostid = $dev->host->id; 88 | my $zone = MogileFS::Network->zone_for_ip($ip); 89 | $available_hosts_per_zone{$zone}++ unless $available_hosts{$hostid}; 90 | $available_hosts{$hostid}++; 91 | } 92 | 93 | my %needed_network; 94 | my $too_good = 0; 95 | 96 | while (my ($zone, $needed) = each %$hosts_per_zone) { 97 | # If we already on all hosts in the target zone, and we're still not happy, then 98 | # we need to start doubling up on devices, but now devs is not to exceed the requested 99 | # number of hosts. 100 | my $on = ($needed <= $available_hosts_per_zone{$zone}) ? $on_host_per_zone{$zone} : $on_dev_per_zone{$zone}; 101 | $on ||= 0; 102 | 103 | if ($on < $needed) { 104 | $needed_network{$zone} = 1; 105 | } elsif ($on_dev_per_zone{$zone} > $needed) { 106 | $too_good++; 107 | } 108 | } 109 | 110 | unless (keys %needed_network) { 111 | return TOO_GOOD if $too_good; 112 | return ALL_GOOD; 113 | } 114 | 115 | my @all_dests = sort { 116 | $b->percent_free <=> $a->percent_free 117 | } grep { 118 | ! $on_dev{$_->devid} && 119 | ! $failed->{$_->devid} && 120 | $_->should_get_replicated_files 121 | } MogileFS::Device->devices; 122 | 123 | return TEMP_NO_ANSWER unless @all_dests; 124 | 125 | my @ideal; 126 | my @desp; 127 | 128 | foreach my $dev (@all_dests) { 129 | my $ip = $dev->host->ip; 130 | my $host_id = $dev->host->id; 131 | my $zone = MogileFS::Network->zone_for_ip($ip); 132 | 133 | # If we don't need more devices in this current network 134 | # zone, then don't include the current device. 135 | next unless $needed_network{$zone}; 136 | 137 | if ($on_host{$host_id}) { 138 | push @desp, $dev; 139 | } else { 140 | push @ideal, $dev; 141 | } 142 | } 143 | 144 | return TEMP_NO_ANSWER unless @desp or @ideal; 145 | 146 | @ideal = weighted_list(map { [$_, 100 * $_->percent_free] } 147 | splice(@ideal, 0, 20)); 148 | @desp = weighted_list(map { [$_, 100 * $_->percent_free] } 149 | splice(@desp, 0, 20)); 150 | 151 | return MogileFS::ReplicationRequest->new( 152 | ideal => \@ideal, 153 | desperate => \@desp, 154 | ); 155 | } 156 | 157 | 1; 158 | 159 | # Local Variables: 160 | # mode: perl 161 | # c-basic-indent: 4 162 | # indent-tabs-mode: nil 163 | # End: 164 | # vim: filetype=perl softtabstop=4 expandtab 165 | -------------------------------------------------------------------------------- /t/multiple-networks-replpol.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Test::More; 6 | use FindBin qw($Bin); 7 | use Net::Netmask; 8 | 9 | use MogileFS::Server; 10 | use MogileFS::Util qw(error_code); 11 | use MogileFS::ReplicationPolicy::MultipleNetworks; 12 | use MogileFS::Test; 13 | 14 | plan tests => 25; 15 | 16 | # need just the one, so we only have to stuff the cache once 17 | my $polclass = "MogileFS::ReplicationPolicy::MultipleNetworks"; 18 | my $pol = $polclass->new; 19 | 20 | # test that the MultipleHosts stuff still works 21 | # we cope when there are no ips 22 | 23 | # already good. 24 | is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_]"), 25 | "all_good", "all good"); 26 | 27 | # need to get it onto host2... 28 | is(rr("min=2 h1[d1=X d2=_] h2[d3=_ d4=_]"), 29 | "ideal(3,4)", "need host2"); 30 | 31 | # still needs to be on host2, even though 2 copies on host1 32 | is(rr("min=2 h1[d1=X d2=X] h2[d3=_ d4=_]"), 33 | "ideal(3,4)", "need host2, even though 2 on host1"); 34 | 35 | # anywhere will do. (can happen on, say, rebalance) 36 | is(rr("min=2 h1[d1=_ d2=_] h2[d3=_ d4=_]"), 37 | "ideal(1,2,3,4)", "anywhere"); 38 | 39 | # should desperately try d2, since host2 is down 40 | is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_]"), 41 | "desperate(2)"); 42 | 43 | # should try host3, since host2 is down 44 | is(rr("min=2 h1[d1=X d2=_] h2=down[d3=_ d4=_] h3[d5=_ d6=_]"), 45 | "ideal(5,6)"); 46 | 47 | # need a copy on a non-dead disk on host1 48 | is(rr("min=2 h1[d1=_ d2=X,dead] h2=alive[d3=X d4=_]"), 49 | "ideal(1)"); 50 | 51 | # this is an ideal move, since we only have 2 unique hosts: 52 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_]"), 53 | "ideal(1,4)"); 54 | 55 | # ... but if we have a 3rd host, it's gotta be there 56 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3[d5=_]"), 57 | "ideal(5)"); 58 | 59 | # ... unless that host is down, in which case it's back to 1/4, 60 | # but desperately 61 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=_] h3=down[d5=_]"), 62 | "desperate(1,4)"); 63 | 64 | # too good, uniq hosts > min 65 | is(rr("min=2 h1[d1=X d2=_] h2[d3=X d4=_] h3[d5=X]"), 66 | "too_good"); 67 | 68 | # too good, but but with uniq hosts == min 69 | is(rr("min=2 h1[d1=X d2=X] h2[d3=X d4=_]"), 70 | "too_good"); 71 | 72 | # be happy with 3 copies, even though two are on same host (that's our max unique hosts) 73 | is(rr("min=3 h1[d1=_ d2=X] h2[d3=X d4=X]"), 74 | "all_good"); 75 | 76 | ## 77 | ## 78 | # actual network policy tests 79 | my ($ad1, $ad2) = ("#192.168.0.2#" ,"#192.168.0.3#" ); 80 | my ($ad3, $ad4) = ("#10.0.0.2#" ,"#10.0.0.3#" ); 81 | my ($ad5, $ad6) = ("#146.101.246.2#","#146.101.142.130#"); 82 | 83 | # stuff the cache with the default, otherwise it'll go to the db 84 | $pol->stuff_cache('192.168.0.2' , Net::Netmask->new('192.168.0.0/16')); 85 | $pol->stuff_cache('192.168.0.3' , Net::Netmask->new('192.168.0.0/16')); 86 | $pol->stuff_cache('10.0.0.2' , Net::Netmask->new('10.0.0.0/16')); 87 | $pol->stuff_cache('10.0.0.3' , Net::Netmask->new('10.0.0.0/16')); 88 | $pol->stuff_cache('146.101.246.2' , Net::Netmask->new('146.101.0.0/16')); 89 | $pol->stuff_cache('146.101.142.130', Net::Netmask->new('146.101.0.0/16')); 90 | 91 | # retest some multiple Host logic all on the same network 92 | # already good. (there's only one network) 93 | is(rr("min=2 h1[d1=X d2=_]$ad1 h2[d3=X d4=_]$ad2"), 94 | "all_good", "all good"); 95 | 96 | # need to get it onto host2... 97 | is(rr("min=2 h1[d1=X d2=_]$ad1 h2[d3=_ d4=_]$ad2"), 98 | "desperate(2,3,4)", "need host2"); 99 | 100 | # still needs to be on host2, even though 2 copies on host1 101 | is(rr("min=2 h1[d1=X d2=X]$ad1 h2[d3=_ d4=_]$ad2"), 102 | "desperate(3,4)", "need host2, even though 2 on host1"); 103 | 104 | # target another network 105 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=_ d4=_]$ad2 h3[d5=_ d6=_]$ad3 h4[d7=_ d8=_]$ad4"), 106 | "ideal(5,6,7,8)","target other network"); # no device 3 or 4 (or 1) in the ideal 107 | 108 | # other network down 109 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=_ d4=_]$ad2 h3=down[d5=_ d6=_]$ad3 h4=down[d7=_ d8=_]$ad4"), 110 | "desperate(1,3,4)", "desperate this network"); 111 | 112 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=_ d4=_]$ad2 h3[d5=_ d6=_]$ad3 h4[d7=_ d8=_]$ad5"), 113 | "ideal(5,6,7,8)","include both other networks with three networks"); 114 | 115 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=_ d4=_]$ad2 h3=down[d5=_ d6=_]$ad3 h4[d7=_ d8=_]$ad5"), 116 | "ideal(7,8)","one of three networks down"); 117 | 118 | is(rr("min=2 h1[d1=_ d2=X,dead]$ad1 h2=alive[d3=_ d4=_]$ad2 h3=alive[d5=X d6=_]$ad3"), 119 | "ideal(1,3,4)","dead copies don't exclude a network"); 120 | 121 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=_ d4=_]$ad2 h3[d5=X d6=_]$ad3"), 122 | "all_good","enough copies on different networks"); 123 | 124 | is(rr("min=2 h1[d1=_ d2=X]$ad1 h2[d3=X d4=X]$ad2"), 125 | "too_good","3 copies on 2 networks with a min of 2 is too good"); 126 | 127 | # too many copies on one network, not enough on another, want to over-replicate 128 | is(rr("min=2 h1[d1=X d2=X]$ad1 h2[d3=X d4=X]$ad2 h3[d5=_ d6=_]$ad3 h4[d7=_ d8=_]$ad4"), 129 | "ideal(5,6,7,8)", "more than min hosts, but all on one network"); 130 | 131 | # mess with netmasks 132 | $pol->stuff_cache('146.101.246.2' , Net::Netmask->new('146.101.246.0/24')); 133 | $pol->stuff_cache('146.101.142.130', Net::Netmask->new('146.101.142.0/24')); 134 | 135 | is(rr("min=2 h1[d1=_ d2=X]$ad6 h2[d3=_ d4=_]$ad5 h3[d5=_ d6=_]$ad4 h4[d7=_ d8=_]$ad3"), 136 | "ideal(3,4,5,6,7,8)","target other network"); # ad5 and ad6 are no longer the same network 137 | 138 | sub rr { 139 | my ($state) = @_; 140 | my $ostate = $state; # original 141 | 142 | MogileFS::Host->t_wipe_singletons; 143 | MogileFS::Device->t_wipe_singletons; 144 | MogileFS::Config->set_config_no_broadcast("min_free_space", 100); 145 | 146 | my $min = 2; 147 | if ($state =~ s/^\bmin=(\d+)\b//) { 148 | $min = $1; 149 | } 150 | 151 | my $hosts = {}; 152 | my $devs = {}; 153 | my $on_devs = []; 154 | 155 | my $parse_error = sub { 156 | die "Can't parse:\n $ostate\n" 157 | }; 158 | while ($state =~ s/\bh(\d+)(?:=(.+?))?\[(.+?)\](#\d+\.\d+\.\d+\.\d+\.?#)?//) { 159 | my ($n, $opts, $devstr, $ip) = ($1, $2, $3, $4); 160 | $opts ||= ""; 161 | die "dup host $n" if $hosts->{$n}; 162 | 163 | # print "1 2 3 4 : <<$1>> <<$2>> <<$3>> <<$4>>\n"; 164 | # print "$state\n"; 165 | 166 | my $h = $hosts->{$n} = MogileFS::Host->of_hostid($n); 167 | $h->t_init($opts || "alive"); 168 | if ($ip) { 169 | $ip =~ s/#//g; 170 | # $h->set_ip($ip); # can't do, is persistent 171 | $h->{hostip} = $ip; 172 | } 173 | 174 | foreach my $ddecl (split(/\s+/, $devstr)) { 175 | $ddecl =~ /^d(\d+)=([_X])(?:,(\w+))?$/ 176 | or $parse_error->(); 177 | my ($dn, $on_not, $status) = ($1, $2, $3); 178 | die "dup device $dn" if $devs->{$dn}; 179 | my $d = $devs->{$dn} = MogileFS::Device->of_devid($dn); 180 | $status ||= "alive"; 181 | $d->t_init($h->id, $status); 182 | if ($on_not eq "X" && $d->dstate->should_have_files) { 183 | push @$on_devs, $d; 184 | } 185 | } 186 | } 187 | $parse_error->() if $state =~ /\S/; 188 | 189 | my $rr = $pol->replicate_to( 190 | fid => 1, 191 | on_devs => $on_devs, 192 | all_devs => $devs, 193 | failed => {}, 194 | min => $min, 195 | ); 196 | return $rr->t_as_string; 197 | } 198 | 199 | -------------------------------------------------------------------------------- /lib/MogileFS/ReplicationPolicy/MultipleNetworks.pm: -------------------------------------------------------------------------------- 1 | package MogileFS::ReplicationPolicy::MultipleNetworks; 2 | 3 | use strict; 4 | use base 'MogileFS::ReplicationPolicy'; 5 | use MogileFS::Util qw(weighted_list); 6 | use MogileFS::ReplicationRequest qw(ALL_GOOD TOO_GOOD TEMP_NO_ANSWER); 7 | 8 | my %cache; 9 | my $age; 10 | 11 | sub AVOIDNETWORK { return "AVOIDNETWORK"; } 12 | 13 | sub new { 14 | my ($class, $mindevcount) = @_; 15 | return bless { 16 | mindevcount => $mindevcount, 17 | }, $class; 18 | } 19 | 20 | sub new_from_policy_args { 21 | my ($class, $argref) = @_; 22 | # Note: "MultipleNetworks()" is okay, in which case the 'mindevcount' 23 | # on the class is used. (see below) 24 | $$argref =~ s/^\s* \( \s* (\d*) \s* \) \s*//x 25 | or die "$class failed to parse args: $$argref"; 26 | return $class->new($1) 27 | } 28 | 29 | sub mindevcount { $_[0]{mindevcount} } 30 | 31 | sub replicate_to { 32 | my ($self, %args) = @_; 33 | 34 | my $fid = delete $args{fid}; # fid scalar to copy 35 | my $on_devs = delete $args{on_devs}; # arrayref of device objects 36 | my $all_devs = delete $args{all_devs}; # hashref of { devid => MogileFS::Device } 37 | my $failed = delete $args{failed}; # hashref of { devid => 1 } of failed attempts this round 38 | 39 | # old-style 40 | my $min = delete $args{min}; 41 | $min = $self->{mindevcount} || $min; 42 | 43 | warn "Unknown parameters: " . join(", ", sort keys %args) if %args; 44 | die "Missing parameters" unless $on_devs && $all_devs && $failed && $fid; 45 | 46 | # number of devices we currently live on 47 | my $already_on = @$on_devs; 48 | 49 | # a silly special case, bail out early. 50 | return ALL_GOOD if $min == 1 && $already_on; 51 | 52 | # total disks available which are candidates for having files on them 53 | my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; 54 | 55 | # if we have two copies and that's all the disks there are 56 | # anywhere, be happy enough 57 | return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; 58 | 59 | # see which and how many unique hosts/networks we're already on. 60 | my %on_dev; 61 | my %on_host; 62 | my %on_network; 63 | foreach my $dev (@$on_devs) { 64 | $on_host{$dev->hostid} = 1; 65 | $on_dev{$dev->id} = 1; 66 | 67 | my $on_ip = $dev->host->ip; 68 | if ($on_ip) { 69 | my $network = network_for_ip($on_ip); 70 | $on_network{$network->desc} = $network; 71 | } 72 | } 73 | 74 | my $uniq_hosts_on = scalar keys %on_host; 75 | my $uniq_networks_on = scalar keys %on_network || 1; 76 | 77 | my ($total_uniq_hosts, $total_uniq_networks) = unique_hosts_and_networks($all_devs); 78 | 79 | # target as many networks as we can, but not more than min 80 | my $target_networks = ($min < $total_uniq_networks) ? $min : $total_uniq_networks; 81 | 82 | # we're never good if our copies aren't on as many networks as possible 83 | if (($target_networks / $uniq_networks_on) <= 1) { 84 | return TOO_GOOD if $uniq_hosts_on > $min; 85 | return TOO_GOOD if $uniq_hosts_on == $min && $already_on > $min; 86 | 87 | return ALL_GOOD if $uniq_hosts_on == $min; 88 | return ALL_GOOD if $uniq_hosts_on >= $total_uniq_hosts && $already_on >= $min; 89 | } 90 | 91 | # if there are more hosts we're not on yet, we want to exclude devices we're already 92 | # on from our applicable host search. 93 | # also exclude hosts on networks we're already on 94 | my @skip_network = values %on_network; 95 | my %skip_host; # hostid => 1 96 | if ($uniq_hosts_on < $total_uniq_hosts) { 97 | %skip_host = %on_host; 98 | 99 | if (@skip_network) { 100 | # work out hosts from the devs passed to us 101 | my %seen_host; 102 | foreach my $device (values %$all_devs) { 103 | next if ($seen_host{$device->host->id}++); 104 | 105 | foreach my $disliked_network (@skip_network) { 106 | if (($disliked_network->match($device->host->ip)) and 107 | (not $skip_host{$device->host->id})) { 108 | $skip_host{$device->host->id} = AVOIDNETWORK; 109 | } 110 | } 111 | } 112 | } 113 | } 114 | 115 | my @all_dests = sort { 116 | $b->percent_free <=> $a->percent_free 117 | } grep { 118 | ! $on_dev{$_->devid} && 119 | ! $failed->{$_->devid} && 120 | $_->should_get_replicated_files 121 | } MogileFS::Device->devices; 122 | 123 | return TEMP_NO_ANSWER unless @all_dests; 124 | 125 | my @ideal = grep { ! $skip_host{$_->hostid} } @all_dests; 126 | # wrong network is less desperate than wrong host 127 | my @network_desp = grep { $skip_host{$_->hostid} && 128 | $skip_host{$_->hostid} eq AVOIDNETWORK } @all_dests; 129 | my @host_desp = grep { $skip_host{$_->hostid} && 130 | $skip_host{$_->hostid} ne AVOIDNETWORK } @all_dests; 131 | 132 | @ideal = weighted_list(map { [$_, 100 * $_->percent_free] } 133 | splice(@ideal, 0, 20)); 134 | @network_desp = weighted_list(map { [$_, 100 * $_->percent_free] } 135 | splice(@network_desp, 0, 20)); 136 | @host_desp = weighted_list(map { [$_, 100 * $_->percent_free] } 137 | splice(@host_desp, 0, 20)); 138 | 139 | my @desp = (@network_desp, @host_desp); 140 | 141 | return MogileFS::ReplicationRequest->new( 142 | ideal => \@ideal, 143 | desperate => \@desp, 144 | ); 145 | } 146 | 147 | # can't just scalar keys %cache to count networks 148 | # might include networks for which we have no hosts yet 149 | sub unique_hosts_and_networks { 150 | my ($devs) = @_; 151 | 152 | my %host; 153 | my %netmask; 154 | foreach my $devid (keys %$devs) { 155 | my $dev = $devs->{$devid}; 156 | next unless $dev->dstate->should_get_repl_files; 157 | 158 | $host{$dev->hostid}++; 159 | 160 | my $ip = $dev->host->ip; 161 | $netmask{network_for_ip($ip)->desc}++; 162 | } 163 | 164 | return (scalar keys %host, scalar keys %netmask || 1); 165 | } 166 | 167 | 168 | { 169 | my %cache; # '192.168.0.0/24' => Net::Netmask->new2('192.168.0.0/24'); 170 | my $age; # increments every time we look 171 | 172 | # turn a server ip into a network 173 | # defaults to /16 ranges 174 | # this can be overridden with a "zone_$location" setting per network "zone" and 175 | # a lookup field listing all "zones" 176 | # e.g. 177 | # mogadm settings set network_zones location1,location2 178 | # mogadm settings set zone_location1 192.168.0.0/24 179 | # mogadm settings set zone_location2 10.0.0.0/24 180 | # zone names and netmasks must be unique 181 | sub network_for_ip { 182 | my ($ip) = @_; 183 | 184 | if (not $ip) { # can happen in testing 185 | return Net::Netmask->new('default'); 186 | } 187 | 188 | # clear the cache occasionally 189 | if (($age == 0) or ($age++ > 500)) { 190 | clear_and_build_cache(); 191 | $age = 1; 192 | } 193 | 194 | my $network; 195 | foreach my $zone (keys %cache) { 196 | if ($cache{$zone}->match($ip)) { 197 | $network = $cache{$zone}; 198 | } 199 | } 200 | 201 | if (not $network) { 202 | ($network) = ($ip =~ m/(\d+\.\d+)./); 203 | $network .= '/16'; # default 204 | $network = Net::Netmask->new2($network); 205 | } 206 | 207 | return $network; 208 | } 209 | 210 | sub clear_and_build_cache { 211 | undef %cache; 212 | 213 | my @zones = split(/\s*,\s*/,MogileFS::Config->server_setting("network_zones")); 214 | 215 | foreach my $zone (@zones) { 216 | my $netmask = MogileFS::Config->server_setting("zone_".$zone); 217 | 218 | if (not $netmask) { 219 | warn "couldn't find network_zone <> check your server settings"; 220 | next; 221 | } 222 | 223 | if ($cache{$netmask}) { 224 | warn "duplicate netmask <$netmask> in network zones. check your server settings"; 225 | } 226 | 227 | $cache{$netmask} = Net::Netmask->new2($netmask); 228 | 229 | if (Net::Netmask::errstr()) { 230 | warn "couldn't parse <$zone> as a netmask. error was <".Net::Netmask::errstr(). 231 | ">. check your server settings"; 232 | } 233 | } 234 | } 235 | 236 | sub stuff_cache { # for testing, or it'll try the db 237 | my ($self, $ip, $netmask) = @_; 238 | 239 | $cache{$ip} = $netmask; 240 | $age = 1; 241 | } 242 | } 243 | 244 | 1; 245 | 246 | # Local Variables: 247 | # mode: perl 248 | # c-basic-indent: 4 249 | # indent-tabs-mode: nil 250 | # End: 251 | 252 | __END__ 253 | 254 | =head1 NAME 255 | 256 | MogileFS::ReplicationPolicy::MultipleNetworks 257 | 258 | =head1 RULES 259 | 260 | This policy tries to put files onto devices which are on different networks, if that isn't possible then devices on the same network are returned as "desperate" options. 261 | 262 | We aim to have as many copies as we can on unique networks, if there are 2 copies on one network and none on another, with a min of 2, we will still over-replicate to the other network. When called from the rebalancer we will therefore rebalance across networks and reduce the correct copy. 263 | 264 | By default we class 2 hosts as being on 2 different networks if they're are on different /16 networks (255.255.0.0). This can be controlled using server settings, with a list of network "zones", and then a definition of a netmask for each "zone". 265 | 266 | mogadm settings set network_zones location1,location2 267 | mogadm settings set zone_location1 192.168.0.0/24 268 | mogadm settings set zone_location2 10.0.0.0/24 269 | 270 | Zone names and netmasks must each be unique. 271 | 272 | =head1 SEE ALSO 273 | 274 | L 275 | 276 | L 277 | 278 | l 279 | --------------------------------------------------------------------------------