Unverified Commit f4cc0072 authored by Francesc Guasch's avatar Francesc Guasch Committed by GitHub
Browse files

#984 nodes (#1001)

* wip(cli): use faster machine listing

issue #984

* feature(backend); set process priorities

issue #984

* wip(backend): cache connections and check free memory

Now Virtual Manager connections are cached.
When balancing VMs, check if machine memory fits

issue #984

* wip(backend): ignores disabled nodes, disable when one fails

When a node is disabled, try not to use it.
When a node fails, set disabled.
Use process priorities.
Improve MAC address generation on KVM

* fix(KVM): don't reset the password if machine up

issue #955

* wip(backend): remove forkmanager

We handle forks ourselves so we can tweak this more.
ie we can allow more process with more nodes.

issue #984

* test(nodes): check remote remove base

issue #984

* test(nodes): migrate without CD and fix volume names

issue #984

* test(download): give time process to start

* test(nodes): check base and volatile clones

issue #984

* wip(nodes): cached nodes and allow clone when preparing base

Check the cached nodes are there and not renamed
When preparing a remote base allow to clone the machine

issue #984

* refactor(KVM): disable nodes and better UUID

creating random uuids sometimes failed

issue #984

* wip(requests): kill request if pid not found

issue #984

* test(volumes): create mock volumes with different names

issue #984

* refactor(nodes): clean remote bases

issue #984

* test(nodes): check volatile clones on remote

* test(backend): properly create mock domains

issue #984

* refactor(KVM): cope with duplicated uuids

issue #984

* refactor(nodes): disable failed remote domains

also cope with duplicated uuids on migration

issue #984

* refactor(nodes): improve balancing failover and volatile

issue #984

* test(pools): check unique uuid on creation

issue #984

* refactor(nodes): fixed remove base on remote down node

issue #984

* refactor(nodes): do not check active if node down

issue #984

* refactor(requests): allow shutdown timeout on refreshing vms

May come useful one day, handy for testing by now

issue #984

* refactor(tests): removed debug

* refactor(test): check down domain on disabled/down nodes

issue #984

* refactor(backend): return all known remote ips

In some cases may be more than 1 remote ip, probably
when accessing from localhost

issue #984

* refactor(test): wait more for remote machines down

issue #984

* refactor(backend): use timeout shutdown only if defined

issue #984

* refactor(nodes): deal with down node on refresh

issue #984

* refactor(nodes): open machine on requested node

issue #984

* refactor(test): properly test down node

issue #984

* refactor(test): new request ordering makes this test obsolete

issue #984

* refactor(test): make sure test node starts ok

issue #984

* refactor(backend): deal with is_active failing

issue #984

* refactor(nodes): fallback on local machine if remote fails

issue #984

* fix(requests): create instead create_domain

issue #984

* refactor(nodes): connect on is_alive cold deep recurse

Also private method to clean cache

issue #984

* refactor(kvm): removed annoying debug message

issue #984
parent d98268a3
......@@ -1602,12 +1602,20 @@ sub search_domain($self, $name, $import = 0) {
return if !$id;
if ($id_vm) {
my $vm;
eval { $vm = Ravada::VM->open($id_vm) };
my $vm_is_active;
eval {
$vm = Ravada::VM->open($id_vm);
$vm_is_active = $vm->is_active if $vm;
};
warn $@ if $@;
if ( $vm && !$vm->is_active) {
$vm->disconnect();
if ( $vm && !$vm_is_active) {
eval {
$vm->disconnect();
$vm->connect;
};
warn $@ if $@;
}
if ($vm && $vm->is_active ) {
if ($vm && $vm_is_active ) {
my $domain;
eval { $domain = $vm->search_domain($name)};
warn $@ if $@;
......@@ -2378,7 +2386,6 @@ sub _can_fork {
$req->status('waiting') if $req->status() !~ 'waiting';
return 0;
}
sub _wait_pids {
my $self = shift;
......@@ -2538,8 +2545,14 @@ sub _cmd_start_clones {
$sth->execute($id_domain);
while ( my ($id, $name, $is_base) = $sth->fetchrow) {
if ($is_base == 0) {
my $domain2 = $self->search_domain_by_id($id);
if (!$domain2->is_active) {
my $domain2;
my $is_active;
eval {
$domain2 = $self->search_domain_by_id($id);
$is_active = $domain2->is_active;
};
warn $@ if $@;
if (!$is_active) {
my $req = Ravada::Request->start_domain(
uid => $uid
,name => $name
......@@ -2959,7 +2972,10 @@ sub _refresh_active_domains($self, $request=undef) {
my $domain = $vm->search_domain_by_id($id_domain);
$self->_refresh_active_domain($vm, $domain, \%active_domain) if $domain;
} else {
for my $domain ($vm->list_domains( )) {
my @domains;
eval { @domains = $vm->list_domains };
warn $@ if $@;
for my $domain (@domains) {
next if $active_domain{$domain->id};
next if $domain->is_hibernated;
$self->_refresh_active_domain($vm, $domain, \%active_domain);
......@@ -2976,7 +2992,8 @@ sub _refresh_down_nodes($self, $request = undef ) {
$sth->execute();
while ( my ($id) = $sth->fetchrow()) {
my $vm;
$vm = Ravada::VM->open($id);
eval { $vm = Ravada::VM->open($id) };
warn $@ if $@;
}
}
......
......@@ -900,11 +900,12 @@ sub open($class, @args) {
die "ERROR: Domain not found id=$id\n"
if !keys %$row;
if (!$vm && ( $id_vm || ( $self->_data('id_vm') && !$self->is_base) ) ) {
if (!$vm && ( $id_vm || defined $row->{id_vm} ) ) {
eval {
$vm = Ravada::VM->open(id => ( $id_vm or $self->_data('id_vm') )
$vm = Ravada::VM->open(id => ( $id_vm or $row->{id_vm} )
, readonly => $readonly);
};
warn $@ if $@;
if ($@ && $@ =~ /I can't find VM id=/) {
$vm = Ravada::VM->open( type => $self->type );
}
......@@ -917,18 +918,18 @@ sub open($class, @args) {
$vm = $vm_local->new( );
}
my $domain = $vm->search_domain($row->{name}, $force);
my $domain;
eval { $domain = $vm->search_domain($row->{name}, $force) };
if ( !$domain ) {
return if $vm->is_local;
if (!$vm_local) {
$vm_local = {};
my $vm_class = "Ravada::VM::".$row->{vm};
bless $vm_local, $vm_class;
}
$vm_local = {};
my $vm_class = "Ravada::VM::".$row->{vm};
bless $vm_local, $vm_class;
$vm = $vm_local->new();
$domain = $vm->search_domain($row->{name}, $force) or return;
$domain->_data(id_vm => $vm->id);
}
if (!$id_vm) {
$domain->_search_already_started() if !$domain->is_base;
......@@ -2846,7 +2847,9 @@ sub rsync($self, @args) {
}
if ($rsync->err) {
$request->status("done",join(" ",@{$rsync->err})) if $request;
confess $rsync->err;
confess "error syncing to ".$node->host."\n"
.Dumper($files)."\n"
.join(' ',@{$rsync->err});
}
$node->refresh_storage_pools();
}
......
......@@ -123,7 +123,7 @@ our %COMMAND = (
,important=> {
limit => 20
,priority => 1
,commands => ['clone','start','start_clones','create_domain','open_iptables']
,commands => ['clone','start','start_clones','create','open_iptables']
}
,secondary => {
limit => 50
......
......@@ -190,6 +190,10 @@ sub open {
}
sub _clean_cache {
%VM = ();
}
sub BUILD {
my $self = shift;
......@@ -984,9 +988,6 @@ sub _do_is_active($self) {
} else {
if ( $self->is_alive ) {
$ret = 1;
} else {
$self->connect();
$ret = 1 if $self->is_alive;
}
}
}
......@@ -1214,7 +1215,7 @@ sub balance_vm($self, $base=undef) {
eval { $active = $vm->is_active() };
my $error = $@;
if ($error && !$vm->is_local) {
warn "disabling ".$vm->name." $error";
warn "[balance] disabling ".$vm->name." ".$vm->enabled()." $error";
$vm->enabled(0);
}
......@@ -1230,10 +1231,6 @@ sub balance_vm($self, $base=undef) {
next;
}
if ( $free_memory < $min_memory ) {
push @status, ($vm->name." low free memory : $free_memory");
}
my $n_active = $vm->count_domains(status => 'active')
+ $vm->count_domains(status => 'starting');
......
......@@ -836,9 +836,7 @@ sub _domain_create_common {
last if !$@;
if ($@ =~ /libvirt error code: 9, .*already defined with uuid/) {
warn $@;
my $new_uuid = $self->_xml_modify_uuid($xml);
$self->_xml_modify_uuid($xml);
} elsif ($@ =~ /libvirt error code: 1, .* pool .* asynchronous/) {
sleep 1;
} else {
......
......@@ -249,10 +249,6 @@ sub test_requests_by_domain {
is($req1->status , 'done');
is($req2->status , 'done');
is($@,'');
like($req_clone->error,qr(has \d req)) or exit;
is($req_clone->status , 'retry');
is($req4->status , 'done');
is($domain->is_base,1) or exit;
......
......@@ -944,12 +944,15 @@ sub start_node($node) {
for ( 1 .. 60 ) {
my $is_active;
eval { $is_active = $node->_do_is_active };
eval {
$node->connect();
$is_active = $node->is_active(1)
};
warn $@ if $@;
last if $is_active;
sleep 1;
diag("Waiting for active node ".$node->name." $_") if !($_ % 10);
}
is($node->_do_is_active,1,"Expecting active node ".$node->name) or exit;
my $connect;
......
......@@ -59,6 +59,15 @@ sub test_down_node($vm, $node) {
sub test_disabled_node($vm, $node) {
diag("[".$vm->type."] Test clones should shutdown on disabled nodes");
start_node($node);
Ravada::VM::_clean_cache();
my $node2 = Ravada::VM->open($node->id);
$node2->_cached_active_time(0);
for ( 1 .. 60 ) {
last if $node2->is_active;
sleep 1;
}
$node->enabled(1);
is($node->enabled, 1) or exit;
my $domain = create_domain($vm);
$domain->prepare_base(user_admin);
......@@ -74,22 +83,28 @@ sub test_disabled_node($vm, $node) {
$node->enabled(0);
is($node->enabled, 0);
my $timeout = 3;
my $clone2 = Ravada::Domain->open( id => $clone->id, id_vm => $clone->_vm->id );
is($clone2->_vm->name, $clone->_vm->name) or exit;
my $timeout = 4;
my $req = Ravada::Request->refresh_vms( timeout_shutdown => $timeout );
rvd_back->_process_requests_dont_fork(1);
rvd_back->_process_requests_dont_fork();
is($req->status, 'done');
is($req->error, '',"Expecting no error after refresh vms");
my @reqs = $clone->list_requests();
ok(@reqs,"Expecting requests for clone to shutdown") or exit;
delete $clone->{_data};
if ( !$clone->is_active ) {
ok(@reqs,"Expecting requests for clone to shutdown") or exit;
}
for ( 1 .. $timeout * 2 ) {
delete $clone->{_data};
rvd_back->_process_requests_dont_fork(1);
rvd_back->_process_requests_dont_fork();
is($clone->_vm->id, $node->id ) or exit;
last if !$clone->is_active;
sleep 1;
diag("Waiting for clone ".$clone->name." down");
diag("Waiting for ".$clone->name." to shutdown on disabled node");
}
is($clone->is_active, 0, "Expecting clone ".$clone->name." not active in ".$clone->_vm->name
." after node disabled") or exit;
......@@ -140,6 +155,7 @@ for my $vm_name ( 'KVM', 'Void') {
next;
};
is($node->is_local,0,"Expecting ".$node->name." ".$node->ip." is remote" ) or BAIL_OUT();
test_down_node($vm, $node);
test_disabled_node($vm, $node);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment