From 7fd190aaebdc1ce13f3023a41d2191d1e4ad9fd2 Mon Sep 17 00:00:00 2001 From: Pavel Boldyrev <627562+bpg@users.noreply.github.com> Date: Wed, 16 Apr 2025 21:04:35 -0400 Subject: [PATCH] fix(vm): race condition on reboot causing inconsistent VM state (#1911) Signed-off-by: Pavel Boldyrev <627562+bpg@users.noreply.github.com> --- proxmox/nodes/vms/vms.go | 19 +++++++++++++++++++ proxmoxtf/resource/vm/vm.go | 37 ++++++++++++++----------------------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/proxmox/nodes/vms/vms.go b/proxmox/nodes/vms/vms.go index 1d2624c9..41f2d9e1 100644 --- a/proxmox/nodes/vms/vms.go +++ b/proxmox/nodes/vms/vms.go @@ -300,6 +300,25 @@ func (c *Client) RebuildCloudInitDisk(ctx context.Context) error { return nil } +// RebootVMAndWaitForRunning reboots a virtual machine and waits for it to be running. +func (c *Client) RebootVMAndWaitForRunning(ctx context.Context, rebootTimeoutSec int) error { + // We add 3 seconds padding to the timeout to account for retries and delays down the callstack. + ctx, cancel := context.WithTimeout(ctx, time.Duration(rebootTimeoutSec+3)*time.Second) + defer cancel() + + err := c.RebootVM( + ctx, + &RebootRequestBody{ + Timeout: &rebootTimeoutSec, + }, + ) + if err != nil { + return err + } + + return c.WaitForVMStatus(ctx, "running") +} + // RebootVM reboots a virtual machine. func (c *Client) RebootVM(ctx context.Context, d *RebootRequestBody) error { taskID, err := c.RebootVMAsync(ctx, d) diff --git a/proxmoxtf/resource/vm/vm.go b/proxmoxtf/resource/vm/vm.go index 100acc3c..9b9c2b57 100644 --- a/proxmoxtf/resource/vm/vm.go +++ b/proxmoxtf/resource/vm/vm.go @@ -2865,14 +2865,8 @@ func vmCreateStart(ctx context.Context, d *schema.ResourceData, m interface{}) d if reboot { rebootTimeoutSec := d.Get(mkTimeoutReboot).(int) - err := vmAPI.RebootVM( - ctx, - &vms.RebootRequestBody{ - Timeout: &rebootTimeoutSec, - }, - ) - if err != nil { - return diag.FromErr(err) + if e := vmAPI.RebootVMAndWaitForRunning(ctx, rebootTimeoutSec); e != nil { + return diag.FromErr(e) } } @@ -3544,7 +3538,16 @@ func vmReadCustom( return diag.FromErr(e) } - diags := vmReadPrimitiveValues(d, vmConfig, vmStatus) + var diags diag.Diagnostics + + if !d.Get(mkTemplate).(bool) { + // we shouldn't be updating this attribute at read. Instead, the current status should be captured + // in a separate computed attribute. To be addressed in v1.0 + err := d.Set(mkStarted, vmStatus.Status == "running") + diags = append(diags, diag.FromErr(err)...) + } + + diags = append(diags, vmReadPrimitiveValues(d, vmConfig)...) if diags.HasError() { return diags } @@ -4695,7 +4698,6 @@ func vmReadCustom( func vmReadPrimitiveValues( d *schema.ResourceData, vmConfig *vms.GetResponseData, - vmStatus *vms.GetStatusResponseData, ) diag.Diagnostics { var diags diag.Diagnostics @@ -4829,11 +4831,6 @@ func vmReadPrimitiveValues( diags = append(diags, diag.FromErr(err)...) } - if !d.Get(mkTemplate).(bool) { - err = d.Set(mkStarted, vmStatus.Status == "running") - diags = append(diags, diag.FromErr(err)...) - } - currentTabletDevice := d.Get(mkTabletDevice).(bool) if len(clone) == 0 || !currentTabletDevice { @@ -5804,14 +5801,8 @@ func vmUpdateDiskLocationAndSize( if vmStatus.Status != "stopped" { rebootTimeoutSec := d.Get(mkTimeoutReboot).(int) - err := vmAPI.RebootVM( - ctx, - &vms.RebootRequestBody{ - Timeout: &rebootTimeoutSec, - }, - ) - if err != nil { - return diag.FromErr(err) + if e := vmAPI.RebootVMAndWaitForRunning(ctx, rebootTimeoutSec); e != nil { + return diag.FromErr(e) } } }