From c5b6ababd21ab6bb251e5a2b4db110e76fb00a26 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:02 -0400 Subject: [PATCH 1/7] locking/mutex: implement mutex_trylock_nested Despite the fact that several lockdep-related checks are skipped when calling trylock* versions of the locking primitives, for example mutex_trylock, each time the mutex is acquired, a held_lock is still placed onto the lockdep stack by __lock_acquire() which is called regardless of whether the trylock* or regular locking API was used. This means that if the caller successfully acquires more than MAX_LOCK_DEPTH locks of the same class, even when using mutex_trylock, lockdep will still complain that the maximum depth of the held lock stack has been reached and disable itself. For example, the following error currently occurs in the ARM version of KVM, once the code tries to lock all vCPUs of a VM configured with more than MAX_LOCK_DEPTH vCPUs, a situation that can easily happen on modern systems, where having more than 48 CPUs is common, and it's also common to run VMs that have vCPU counts approaching that number: [ 328.171264] BUG: MAX_LOCK_DEPTH too low! [ 328.175227] turning off the locking correctness validator. [ 328.180726] Please attach the output of /proc/lock_stat to the bug report [ 328.187531] depth: 48 max: 48! [ 328.190678] 48 locks held by qemu-kvm/11664: [ 328.194957] #0: ffff800086de5ba0 (&kvm->lock){+.+.}-{3:3}, at: kvm_ioctl_create_device+0x174/0x5b0 [ 328.204048] #1: ffff0800e78800b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.212521] #2: ffff07ffeee51e98 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.220991] #3: ffff0800dc7d80b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.229463] #4: ffff07ffe0c980b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.237934] #5: ffff0800a3883c78 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.246405] #6: ffff07fffbe480b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 Luckily, in all instances that require locking all vCPUs, the 'kvm->lock' is taken a priori, and that fact makes it possible to use the little known feature of lockdep, called a 'nest_lock', to avoid this warning and subsequent lockdep self-disablement. The action of 'nested lock' being provided to lockdep's lock_acquire(), causes the lockdep to detect that the top of the held lock stack contains a lock of the same class and then increment its reference counter instead of pushing a new held_lock item onto that stack. See __lock_acquire for more information. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mutex.h | 15 +++++++++++++++ kernel/locking/mutex.c | 14 +++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2143d05116be..da4518cfd59c 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -193,7 +193,22 @@ extern void mutex_lock_io(struct mutex *lock); * * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); + +#define mutex_trylock_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map) \ +) + +#define mutex_trylock(lock) _mutex_trylock_nest_lock(lock, NULL) +#else extern int mutex_trylock(struct mutex *lock); +#define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock) +#endif + extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 555e2b3a665a..c75a838d3bae 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1062,6 +1062,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, #endif +#ifndef CONFIG_DEBUG_LOCK_ALLOC /** * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired @@ -1077,18 +1078,25 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, * mutex must be released by the same task that acquired it. */ int __sched mutex_trylock(struct mutex *lock) +{ + MUTEX_WARN_ON(lock->magic != lock); + return __mutex_trylock(lock); +} +EXPORT_SYMBOL(mutex_trylock); +#else +int __sched _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) { bool locked; MUTEX_WARN_ON(lock->magic != lock); - locked = __mutex_trylock(lock); if (locked) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_); return locked; } -EXPORT_SYMBOL(mutex_trylock); +EXPORT_SYMBOL(_mutex_trylock_nest_lock); +#endif #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched From fb49f07ba1d9d8c9bd8854878ae6b5b21ff9ac45 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:03 -0400 Subject: [PATCH 2/7] locking/mutex: implement mutex_lock_killable_nest_lock KVM's SEV intra-host migration code needs to lock all vCPUs of the source and the target VM, before it proceeds with the migration. The number of vCPUs that belong to each VM is not bounded by anything except a self-imposed KVM limit of CONFIG_KVM_MAX_NR_VCPUS vCPUs which is significantly larger than the depth of lockdep's lock stack. Luckily, the locks in both of the cases mentioned above, are held under the 'kvm->lock' of each VM, which means that we can use the little known lockdep feature called a "nest_lock" to support this use case in a cleaner way, compared to the way it's currently done. Implement and expose 'mutex_lock_killable_nest_lock' for this purpose. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mutex.h | 17 +++++++++++++---- kernel/locking/mutex.c | 7 ++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index da4518cfd59c..a039fa8c1780 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,16 +156,15 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); - extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); -extern int __must_check mutex_lock_killable_nested(struct mutex *lock, - unsigned int subclass); +extern int __must_check _mutex_lock_killable(struct mutex *lock, + unsigned int subclass, struct lockdep_map *nest_lock); extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) -#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_killable(lock) _mutex_lock_killable(lock, 0, NULL) #define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ @@ -174,6 +173,15 @@ do { \ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) +#define mutex_lock_killable_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_lock_killable(lock, 0, &(nest_lock)->dep_map) \ +) + +#define mutex_lock_killable_nested(lock, subclass) \ + _mutex_lock_killable(lock, subclass, NULL) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -183,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) +# define mutex_lock_killable_nest_lock(lock, nest_lock) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c75a838d3bae..234923121ff0 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -808,11 +808,12 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched -mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) +_mutex_lock_killable(struct mutex *lock, unsigned int subclass, + struct lockdep_map *nest) { - return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); + return __mutex_lock(lock, TASK_KILLABLE, subclass, nest, _RET_IP_); } -EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); +EXPORT_SYMBOL_GPL(_mutex_lock_killable); int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) From e4a454ced74c0ac97c8bd32f086ee3ad74528780 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:04 -0400 Subject: [PATCH 3/7] KVM: add kvm_lock_all_vcpus and kvm_trylock_all_vcpus In a few cases, usually in the initialization code, KVM locks all vCPUs of a VM to ensure that userspace doesn't do funny things while KVM performs an operation that affects the whole VM. Until now, all these operations were implemented using custom code, and all of them share the same problem: Lockdep can't cope with simultaneous locking of a large number of locks of the same class. However if these locks are taken while another lock is already held, which is luckily the case, it is possible to take advantage of little known _nest_lock feature of lockdep which allows in this case to have an unlimited number of locks of same class to be taken. To implement this, create two functions: kvm_lock_all_vcpus() and kvm_trylock_all_vcpus() Both functions are needed because some code that will be replaced in the subsequent patches, uses mutex_trylock, instead of regular mutex_lock. Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Acked-by: Marc Zyngier Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 +++ virt/kvm/kvm_main.c | 59 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dedc421b3e3..a6140415c693 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1015,6 +1015,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) void kvm_destroy_vcpus(struct kvm *kvm); +int kvm_trylock_all_vcpus(struct kvm *kvm); +int kvm_lock_all_vcpus(struct kvm *kvm); +void kvm_unlock_all_vcpus(struct kvm *kvm); + void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 69782df3617f..d660a7da3baa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1368,6 +1368,65 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) return 0; } +int kvm_trylock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i, j; + + lockdep_assert_held(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) + if (!mutex_trylock_nest_lock(&vcpu->mutex, &kvm->lock)) + goto out_unlock; + return 0; + +out_unlock: + kvm_for_each_vcpu(j, vcpu, kvm) { + if (i == j) + break; + mutex_unlock(&vcpu->mutex); + } + return -EINTR; +} +EXPORT_SYMBOL_GPL(kvm_trylock_all_vcpus); + +int kvm_lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i, j; + int r; + + lockdep_assert_held(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + r = mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock); + if (r) + goto out_unlock; + } + return 0; + +out_unlock: + kvm_for_each_vcpu(j, vcpu, kvm) { + if (i == j) + break; + mutex_unlock(&vcpu->mutex); + } + return r; +} +EXPORT_SYMBOL_GPL(kvm_lock_all_vcpus); + +void kvm_unlock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + + lockdep_assert_held(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) + mutex_unlock(&vcpu->mutex); +} +EXPORT_SYMBOL_GPL(kvm_unlock_all_vcpus); + /* * Allocation size is twice as large as the actual dirty bitmap size. * See kvm_vm_ioctl_get_dirty_log() why this is needed. From c560bc9286e66713819c2f30b9ff32ee79d7f75e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:05 -0400 Subject: [PATCH 4/7] x86: KVM: SVM: use kvm_lock_all_vcpus instead of a custom implementation Use kvm_lock_all_vcpus instead of sev's own implementation. Because kvm_lock_all_vcpus uses the _nest_lock feature of lockdep, which ignores subclasses, there is no longer a need to use separate subclasses for source and target VMs. No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 72 +++--------------------------------------- 1 file changed, 4 insertions(+), 68 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a7a7dc507336..710ca9810a17 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1882,70 +1882,6 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) atomic_set_release(&src_sev->migration_in_progress, 0); } -/* vCPU mutex subclasses. */ -enum sev_migration_role { - SEV_MIGRATION_SOURCE = 0, - SEV_MIGRATION_TARGET, - SEV_NR_MIGRATION_ROLES, -}; - -static int sev_lock_vcpus_for_migration(struct kvm *kvm, - enum sev_migration_role role) -{ - struct kvm_vcpu *vcpu; - unsigned long i, j; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (mutex_lock_killable_nested(&vcpu->mutex, role)) - goto out_unlock; - -#ifdef CONFIG_PROVE_LOCKING - if (!i) - /* - * Reset the role to one that avoids colliding with - * the role used for the first vcpu mutex. - */ - role = SEV_NR_MIGRATION_ROLES; - else - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); -#endif - } - - return 0; - -out_unlock: - - kvm_for_each_vcpu(j, vcpu, kvm) { - if (i == j) - break; - -#ifdef CONFIG_PROVE_LOCKING - if (j) - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); -#endif - - mutex_unlock(&vcpu->mutex); - } - return -EINTR; -} - -static void sev_unlock_vcpus_for_migration(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - bool first = true; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (first) - first = false; - else - mutex_acquire(&vcpu->mutex.dep_map, - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); - - mutex_unlock(&vcpu->mutex); - } -} - static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm); @@ -2083,10 +2019,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) charged = true; } - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); + ret = kvm_lock_all_vcpus(kvm); if (ret) goto out_dst_cgroup; - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); + ret = kvm_lock_all_vcpus(source_kvm); if (ret) goto out_dst_vcpu; @@ -2100,9 +2036,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) ret = 0; out_source_vcpu: - sev_unlock_vcpus_for_migration(source_kvm); + kvm_unlock_all_vcpus(source_kvm); out_dst_vcpu: - sev_unlock_vcpus_for_migration(kvm); + kvm_unlock_all_vcpus(kvm); out_dst_cgroup: /* Operates on the source on success, on the destination on failure. */ if (charged) From b586c5d21954f203ba40c6120307d2e370c668d7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:06 -0400 Subject: [PATCH 5/7] KVM: arm64: use kvm_trylock_all_vcpus when locking all vCPUs Use kvm_trylock_all_vcpus instead of a custom implementation when locking all vCPUs of a VM, to avoid triggering a lockdep warning, in the case in which the VM is configured to have more than MAX_LOCK_DEPTH vCPUs. This fixes the following false lockdep warning: [ 328.171264] BUG: MAX_LOCK_DEPTH too low! [ 328.175227] turning off the locking correctness validator. [ 328.180726] Please attach the output of /proc/lock_stat to the bug report [ 328.187531] depth: 48 max: 48! [ 328.190678] 48 locks held by qemu-kvm/11664: [ 328.194957] #0: ffff800086de5ba0 (&kvm->lock){+.+.}-{3:3}, at: kvm_ioctl_create_device+0x174/0x5b0 [ 328.204048] #1: ffff0800e78800b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.212521] #2: ffff07ffeee51e98 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.220991] #3: ffff0800dc7d80b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.229463] #4: ffff07ffe0c980b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.237934] #5: ffff0800a3883c78 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.246405] #6: ffff07fffbe480b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Acked-by: Marc Zyngier Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-6-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_host.h | 3 -- arch/arm64/kvm/arch_timer.c | 4 +-- arch/arm64/kvm/arm.c | 43 --------------------------- arch/arm64/kvm/vgic/vgic-init.c | 4 +-- arch/arm64/kvm/vgic/vgic-its.c | 8 ++--- arch/arm64/kvm/vgic/vgic-kvm-device.c | 12 ++++---- 6 files changed, 14 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d941abc6b5ee..6ce2c5173482 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1320,9 +1320,6 @@ int __init populate_sysreg_config(const struct sys_reg_desc *sr, unsigned int idx); int __init populate_nv_trap_config(void); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - void kvm_calculate_traps(struct kvm_vcpu *vcpu); /* MMIO helpers */ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5133dcbfe9f7..fdbc8beec930 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1766,7 +1766,7 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, mutex_lock(&kvm->lock); - if (lock_all_vcpus(kvm)) { + if (!kvm_trylock_all_vcpus(kvm)) { set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); /* @@ -1778,7 +1778,7 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, kvm->arch.timer_data.voffset = offset->counter_offset; kvm->arch.timer_data.poffset = offset->counter_offset; - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); } else { ret = -EBUSY; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 36cfcffb40d8..248e257e988b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1924,49 +1924,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) } } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - lockdep_assert_held(&kvm->lock); - - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - lockdep_assert_held(&kvm->lock); - - /* - * Any time a vcpu is in an ioctl (including running), the - * core KVM code tries to grab the vcpu->mutex. - * - * By grabbing the vcpu->mutex of all VCPUs we ensure that no - * other VCPUs can fiddle with the state while we access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1f33e71c2a73..6a426d403a6b 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -88,7 +88,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) lockdep_assert_held(&kvm->lock); ret = -EBUSY; - if (!lock_all_vcpus(kvm)) + if (kvm_trylock_all_vcpus(kvm)) return ret; mutex_lock(&kvm->arch.config_lock); @@ -142,7 +142,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) out_unlock: mutex_unlock(&kvm->arch.config_lock); - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 569f9da9049f..2eb3e023f66a 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1971,7 +1971,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -2006,7 +2006,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2676,7 +2676,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) mutex_lock(&kvm->lock); - if (!lock_all_vcpus(kvm)) { + if (kvm_trylock_all_vcpus(kvm)) { mutex_unlock(&kvm->lock); return -EBUSY; } @@ -2698,7 +2698,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) mutex_unlock(&its->its_lock); mutex_unlock(&kvm->arch.config_lock); - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); mutex_unlock(&kvm->lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 359094f68c23..f9ae790163fb 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -268,7 +268,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, return -ENXIO; mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -276,7 +276,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); r = vgic_v3_save_pending_tables(dev->kvm); mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; } @@ -390,7 +390,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -415,7 +415,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && !is_write) @@ -554,7 +554,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -611,7 +611,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && uaccess && !is_write) { From 8f56770d114b0261bc8d69bbc7ce8ce0c196d630 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:07 -0400 Subject: [PATCH 6/7] RISC-V: KVM: use kvm_trylock_all_vcpus when locking all vCPUs Use kvm_trylock_all_vcpus instead of a custom implementation when locking all vCPUs of a VM. Compile tested only. Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Reviewed-by: Anup Patel Tested-by: Anup Patel Message-ID: <20250512180407.659015-7-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/riscv/kvm/aia_device.c | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 43e472ff3e1a..806c41931cde 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -12,36 +12,6 @@ #include #include -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -static void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -static bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - static int aia_create(struct kvm_device *dev, u32 type) { int ret; @@ -53,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type) return -EEXIST; ret = -EBUSY; - if (!lock_all_vcpus(kvm)) + if (kvm_trylock_all_vcpus(kvm)) return ret; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -65,7 +35,7 @@ static int aia_create(struct kvm_device *dev, u32 type) kvm->arch.aia.in_kernel = true; out_unlock: - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); return ret; } From 4dbe28c0fabd69842890ba38f185b96664cba26a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 May 2025 10:34:30 +0200 Subject: [PATCH 7/7] rust: add helper for mutex_trylock After commit c5b6ababd21a ("locking/mutex: implement mutex_trylock_nested", currently in the KVM tree) mutex_trylock() will be a macro when lockdep is enabled. Rust therefore needs the corresponding helper. Just add it and the rust/bindings/bindings_helpers_generated.rs Makefile rules will do their thing. Reported-by: Stephen Rothwell Signed-off-by: Paolo Bonzini Message-ID: <20250528083431.1875345-1-pbonzini@redhat.com> Acked-by: Miguel Ojeda Reviewed-by: Alice Ryhl Signed-off-by: Paolo Bonzini --- rust/helpers/mutex.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rust/helpers/mutex.c b/rust/helpers/mutex.c index 06575553eda5..9ab29104bee1 100644 --- a/rust/helpers/mutex.c +++ b/rust/helpers/mutex.c @@ -7,6 +7,11 @@ void rust_helper_mutex_lock(struct mutex *lock) mutex_lock(lock); } +int rust_helper_mutex_trylock(struct mutex *lock) +{ + return mutex_trylock(lock); +} + void rust_helper___mutex_init(struct mutex *mutex, const char *name, struct lock_class_key *key) {