userfaultfd: move userfaultfd_ctx struct to header file

JIRA: https://issues.redhat.com/browse/RHEL-84184
JIRA: https://issues.redhat.com/browse/RHEL-80529

This patch is a backport of the following upstream commit:
commit f91e6b41dd11daffb138e3afdb4804aefc3d4e1b
Author: Lokesh Gidra <lokeshgidra@google.com>
Date:   Thu Feb 15 10:27:53 2024 -0800

    userfaultfd: move userfaultfd_ctx struct to header file

    Patch series "per-vma locks in userfaultfd", v7.

    Performing userfaultfd operations (like copy/move etc.) in critical
    section of mmap_lock (read-mode) causes significant contention on the lock
    when operations requiring the lock in write-mode are taking place
    concurrently.  We can use per-vma locks instead to significantly reduce
    the contention issue.

    Android runtime's Garbage Collector uses userfaultfd for concurrent
    compaction.  mmap-lock contention during compaction potentially causes
    jittery experience for the user.  During one such reproducible scenario,
    we observed the following improvements with this patch-set:

    - Wall clock time of compaction phase came down from ~3s to <500ms
    - Uninterruptible sleep time (across all threads in the process) was
      ~10ms (none in mmap_lock) during compaction, instead of >20s

    This patch (of 4):

    Move the struct to userfaultfd_k.h to be accessible from mm/userfaultfd.c.
    There are no other changes in the struct.

    This is required to prepare for using per-vma locks in userfaultfd
    operations.

    Link: https://lkml.kernel.org/r/20240215182756.3448972-1-lokeshgidra@google.com
    Link: https://lkml.kernel.org/r/20240215182756.3448972-2-lokeshgidra@google.com
    Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
    Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
    Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
    Cc: Andrea Arcangeli <aarcange@redhat.com>
    Cc: Axel Rasmussen <axelrasmussen@google.com>
    Cc: Brian Geffon <bgeffon@google.com>
    Cc: David Hildenbrand <david@redhat.com>
    Cc: Jann Horn <jannh@google.com>
    Cc: Kalesh Singh <kaleshsingh@google.com>
    Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
    Cc: Nicolas Geoffray <ngeoffray@google.com>
    Cc: Peter Xu <peterx@redhat.com>
    Cc: Ryan Roberts <ryan.roberts@arm.com>
    Cc: Suren Baghdasaryan <surenb@google.com>
    Cc: Tim Murray <timmurray@google.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Signed-off-by: Rafael Aquini <raquini@redhat.com>
This commit is contained in:
Rafael Aquini 2025-03-31 09:32:24 -04:00
parent 4480132866
commit d70c93702f
2 changed files with 39 additions and 39 deletions

View File

@ -37,45 +37,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly;
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
*
* Locking order:
* fd_wqh.lock
* fault_pending_wqh.lock
* fault_wqh.lock
* event_wqh.lock
*
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
* also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
wait_queue_head_t fault_pending_wqh;
/* waitqueue head for the userfaults */
wait_queue_head_t fault_wqh;
/* waitqueue head for the pseudo fd to wakeup poll/read */
wait_queue_head_t fd_wqh;
/* waitqueue head for events */
wait_queue_head_t event_wqh;
/* a refile sequence protected by fault_pending_wqh lock */
seqcount_spinlock_t refile_seq;
/* pseudo fd refcounting */
refcount_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
/* features requested from the userspace */
unsigned int features;
/* released */
bool released;
/* memory mappings are changing because of non-cooperative event */
atomic_t mmap_changing;
/* mm with one ore more vmas attached to this userfaultfd_ctx */
struct mm_struct *mm;
};
struct userfaultfd_fork_ctx {
struct userfaultfd_ctx *orig;
struct userfaultfd_ctx *new;

View File

@ -36,6 +36,45 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
*
* Locking order:
* fd_wqh.lock
* fault_pending_wqh.lock
* fault_wqh.lock
* event_wqh.lock
*
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
* also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
wait_queue_head_t fault_pending_wqh;
/* waitqueue head for the userfaults */
wait_queue_head_t fault_wqh;
/* waitqueue head for the pseudo fd to wakeup poll/read */
wait_queue_head_t fd_wqh;
/* waitqueue head for events */
wait_queue_head_t event_wqh;
/* a refile sequence protected by fault_pending_wqh lock */
seqcount_spinlock_t refile_seq;
/* pseudo fd refcounting */
refcount_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
/* features requested from the userspace */
unsigned int features;
/* released */
bool released;
/* memory mappings are changing because of non-cooperative event */
atomic_t mmap_changing;
/* mm with one ore more vmas attached to this userfaultfd_ctx */
struct mm_struct *mm;
};
extern int sysctl_unprivileged_userfaultfd;
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);