From 0016b49d9bf3d76777094ca6bb2cf6c283b650d1 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 27 Jun 2024 07:32:40 -0400 Subject: [PATCH] KVM: Treat the device list as an rculist JIRA: https://issues.redhat.com/browse/RHEL-43288 A subsequent change to KVM/arm64 will necessitate walking the device list outside of the kvm->lock. Prepare by converting to an rculist. This has zero effect on the VM destruction path, as it is expected every reader is backed by a reference on the kvm struct. On the other hand, ensure a given device is completely destroyed before dropping the kvm->lock in the release() path, as certain devices expect to be a singleton (e.g. the vfio-kvm device). Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Oliver Upton Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20240422200158.2606761-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier (cherry picked from commit ea54dd374232cc3b6d0ac0a89d715d61ebb04bf6) Signed-off-by: Sebastian Ott --- virt/kvm/kvm_main.c | 14 +++++++++++--- virt/kvm/vfio.c | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3607362a5f2a..abaa41218ac7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1328,6 +1328,12 @@ static void kvm_destroy_devices(struct kvm *kvm) * We do not need to take the kvm->lock here, because nobody else * has a reference to the struct kvm at this point and therefore * cannot access the devices list anyhow. + * + * The device list is generally managed as an rculist, but list_del() + * is used intentionally here. If a bug in KVM introduced a reader that + * was not backed by a reference on the kvm struct, the hope is that + * it'd consume the poisoned forward pointer instead of suffering a + * use-after-free, even though this cannot be guaranteed. */ list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { list_del(&dev->vm_node); @@ -4710,7 +4716,8 @@ static int kvm_device_release(struct inode *inode, struct file *filp) if (dev->ops->release) { mutex_lock(&kvm->lock); - list_del(&dev->vm_node); + list_del_rcu(&dev->vm_node); + synchronize_rcu(); dev->ops->release(dev); mutex_unlock(&kvm->lock); } @@ -4793,7 +4800,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, kfree(dev); return ret; } - list_add(&dev->vm_node, &kvm->devices); + list_add_rcu(&dev->vm_node, &kvm->devices); mutex_unlock(&kvm->lock); if (ops->init) @@ -4804,7 +4811,8 @@ static int kvm_ioctl_create_device(struct kvm *kvm, if (ret < 0) { kvm_put_kvm_no_destroy(kvm); mutex_lock(&kvm->lock); - list_del(&dev->vm_node); + list_del_rcu(&dev->vm_node); + synchronize_rcu(); if (ops->release) ops->release(dev); mutex_unlock(&kvm->lock); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ca24ce120906..76b7f6085dcd 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -366,6 +366,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) struct kvm_device *tmp; struct kvm_vfio *kv; + lockdep_assert_held(&dev->kvm->lock); + /* Only one VFIO "device" per VM */ list_for_each_entry(tmp, &dev->kvm->devices, vm_node) if (tmp->ops == &kvm_vfio_ops)