Merge: mm/gup, udmabuf: Complete memfd_pin_folios() for pinning memfd folios

MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6826

JIRA: https://issues.redhat.com/browse/RHEL-89519
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6826
Depends: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6414
Tested: The udmabuf selftest was run successfully without failure on
        the patched kernel with the appropriated hugetlb setting.

This MR completes the backport of the patch series "mm/gup: Introduce
memfd_pin_folios() for pinning memfd folios" and all the associated
and relevant fix commits as well as other patches to reduce merge
conflicts. It also backports the updated udmabuf selftest patches.

This is needed to support the AMD proposed solution to enable memory
cgroup to limit memory consumption of GPU heavy applications.

Signed-off-by: Waiman Long <longman@redhat.com>

Approved-by: Donald Dutile <ddutile@redhat.com>
Approved-by: Rafael Aquini <raquini@redhat.com>
Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com>

Merged-by: Jan Stancek <jstancek@redhat.com>
This commit is contained in:
Jan Stancek 2025-05-26 10:33:39 +02:00
commit 808c350e13
13 changed files with 421 additions and 148 deletions

View File

@ -378,6 +378,13 @@ Note that the number of overcommit and reserve pages remain global quantities,
as we don't know until fault time, when the faulting task's mempolicy is
applied, from which node the huge page allocation will be attempted.
The hugetlb may be migrated between the per-node hugepages pool in the following
scenarios: memory offline, memory failure, longterm pinning, syscalls(mbind,
migrate_pages and move_pages), alloc_contig_range() and alloc_contig_pages().
Now only memory offline, memory failure and syscalls allow fallbacking to allocate
a new hugetlb on a different node if the current node is unable to allocate during
hugetlb migration, that means these 3 cases can break the per-node hugepages pool.
.. _using_huge_pages:
Using Huge Pages

View File

@ -30,6 +30,12 @@ struct udmabuf {
struct sg_table *sg;
struct miscdevice *device;
pgoff_t *offsets;
struct list_head unpin_list;
};
struct udmabuf_folio {
struct folio *folio;
struct list_head list;
};
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@ -37,7 +43,8 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
pgoff_t pgoff = vmf->pgoff;
unsigned long pfn;
unsigned long addr, pfn;
vm_fault_t ret;
if (pgoff >= ubuf->pagecount)
return VM_FAULT_SIGBUS;
@ -45,7 +52,35 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
pfn = folio_pfn(ubuf->folios[pgoff]);
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
return vmf_insert_pfn(vma, vmf->address, pfn);
ret = vmf_insert_pfn(vma, vmf->address, pfn);
if (ret & VM_FAULT_ERROR)
return ret;
/* pre fault */
pgoff = vma->vm_pgoff;
addr = vma->vm_start;
for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) {
if (addr == vmf->address)
continue;
if (WARN_ON(pgoff >= ubuf->pagecount))
break;
pfn = folio_pfn(ubuf->folios[pgoff]);
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
/**
* If the below vmf_insert_pfn() fails, we do not return an
* error here during this pre-fault step. However, an error
* will be returned if the failure occurs when the addr is
* truly accessed.
*/
if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
break;
}
return ret;
}
static const struct vm_operations_struct udmabuf_vm_ops = {
@ -161,17 +196,43 @@ static void unmap_udmabuf(struct dma_buf_attachment *at,
return put_sg_table(at->dev, sg, direction);
}
static void unpin_all_folios(struct list_head *unpin_list)
{
struct udmabuf_folio *ubuf_folio;
while (!list_empty(unpin_list)) {
ubuf_folio = list_first_entry(unpin_list,
struct udmabuf_folio, list);
unpin_folio(ubuf_folio->folio);
list_del(&ubuf_folio->list);
kfree(ubuf_folio);
}
}
static int add_to_unpin_list(struct list_head *unpin_list,
struct folio *folio)
{
struct udmabuf_folio *ubuf_folio;
ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
if (!ubuf_folio)
return -ENOMEM;
ubuf_folio->folio = folio;
list_add_tail(&ubuf_folio->list, unpin_list);
return 0;
}
static void release_udmabuf(struct dma_buf *buf)
{
struct udmabuf *ubuf = buf->priv;
struct device *dev = ubuf->device->this_device;
pgoff_t pg;
if (ubuf->sg)
put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
for (pg = 0; pg < ubuf->pagecount; pg++)
folio_put(ubuf->folios[pg]);
unpin_all_folios(&ubuf->unpin_list);
kvfree(ubuf->offsets);
kvfree(ubuf->folios);
kfree(ubuf);
@ -226,71 +287,10 @@ static const struct dma_buf_ops udmabuf_ops = {
#define SEALS_WANTED (F_SEAL_SHRINK)
#define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
static int handle_hugetlb_pages(struct udmabuf *ubuf, struct file *memfd,
pgoff_t offset, pgoff_t pgcnt,
pgoff_t *pgbuf)
{
struct hstate *hpstate = hstate_file(memfd);
pgoff_t mapidx = offset >> huge_page_shift(hpstate);
pgoff_t subpgoff = (offset & ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
pgoff_t maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
struct folio *folio = NULL;
pgoff_t pgidx;
mapidx <<= huge_page_order(hpstate);
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
if (!folio) {
folio = __filemap_get_folio(memfd->f_mapping,
mapidx,
FGP_ACCESSED, 0);
if (IS_ERR(folio))
return PTR_ERR(folio);
}
folio_get(folio);
ubuf->folios[*pgbuf] = folio;
ubuf->offsets[*pgbuf] = subpgoff << PAGE_SHIFT;
(*pgbuf)++;
if (++subpgoff == maxsubpgs) {
folio_put(folio);
folio = NULL;
subpgoff = 0;
mapidx += pages_per_huge_page(hpstate);
}
}
if (folio)
folio_put(folio);
return 0;
}
static int handle_shmem_pages(struct udmabuf *ubuf, struct file *memfd,
pgoff_t offset, pgoff_t pgcnt,
pgoff_t *pgbuf)
{
pgoff_t pgidx, pgoff = offset >> PAGE_SHIFT;
struct folio *folio = NULL;
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
folio = shmem_read_folio(memfd->f_mapping, pgoff + pgidx);
if (IS_ERR(folio))
return PTR_ERR(folio);
ubuf->folios[*pgbuf] = folio;
(*pgbuf)++;
}
return 0;
}
static int check_memfd_seals(struct file *memfd)
{
int seals;
if (!memfd)
return -EBADFD;
if (!shmem_file(memfd) && !is_file_hugepages(memfd))
return -EBADFD;
@ -319,69 +319,126 @@ static struct dma_buf *export_udmabuf(struct udmabuf *ubuf,
return dma_buf_export(&exp_info);
}
static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd,
loff_t start, loff_t size)
{
pgoff_t pgoff, pgcnt, upgcnt = ubuf->pagecount;
struct folio **folios = NULL;
u32 cur_folio, cur_pgcnt;
long nr_folios;
long ret = 0;
loff_t end;
pgcnt = size >> PAGE_SHIFT;
folios = kvmalloc_array(pgcnt, sizeof(*folios), GFP_KERNEL);
if (!folios)
return -ENOMEM;
end = start + (pgcnt << PAGE_SHIFT) - 1;
nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff);
if (nr_folios <= 0) {
ret = nr_folios ? nr_folios : -EINVAL;
goto end;
}
cur_pgcnt = 0;
for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) {
pgoff_t subpgoff = pgoff;
size_t fsize = folio_size(folios[cur_folio]);
ret = add_to_unpin_list(&ubuf->unpin_list, folios[cur_folio]);
if (ret < 0)
goto end;
for (; subpgoff < fsize; subpgoff += PAGE_SIZE) {
ubuf->folios[upgcnt] = folios[cur_folio];
ubuf->offsets[upgcnt] = subpgoff;
++upgcnt;
if (++cur_pgcnt >= pgcnt)
goto end;
}
/**
* In a given range, only the first subpage of the first folio
* has an offset, that is returned by memfd_pin_folios().
* The first subpages of other folios (in the range) have an
* offset of 0.
*/
pgoff = 0;
}
end:
ubuf->pagecount = upgcnt;
kvfree(folios);
return ret;
}
static long udmabuf_create(struct miscdevice *device,
struct udmabuf_create_list *head,
struct udmabuf_create_item *list)
{
pgoff_t pgcnt, pgbuf = 0, pglimit;
struct file *memfd = NULL;
pgoff_t pgcnt = 0, pglimit;
struct udmabuf *ubuf;
struct dma_buf *dmabuf;
int ret = -EINVAL;
long ret = -EINVAL;
u32 i, flags;
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
if (!ubuf)
return -ENOMEM;
INIT_LIST_HEAD(&ubuf->unpin_list);
pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
for (i = 0; i < head->count; i++) {
if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
if (!PAGE_ALIGNED(list[i].offset))
goto err;
if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
if (!PAGE_ALIGNED(list[i].size))
goto err;
ubuf->pagecount += list[i].size >> PAGE_SHIFT;
if (ubuf->pagecount > pglimit)
pgcnt += list[i].size >> PAGE_SHIFT;
if (pgcnt > pglimit)
goto err;
}
if (!ubuf->pagecount)
if (!pgcnt)
goto err;
ubuf->folios = kvmalloc_array(ubuf->pagecount, sizeof(*ubuf->folios),
GFP_KERNEL);
ubuf->folios = kvmalloc_array(pgcnt, sizeof(*ubuf->folios), GFP_KERNEL);
if (!ubuf->folios) {
ret = -ENOMEM;
goto err;
}
ubuf->offsets = kvcalloc(ubuf->pagecount, sizeof(*ubuf->offsets),
GFP_KERNEL);
ubuf->offsets = kvcalloc(pgcnt, sizeof(*ubuf->offsets), GFP_KERNEL);
if (!ubuf->offsets) {
ret = -ENOMEM;
goto err;
}
pgbuf = 0;
for (i = 0; i < head->count; i++) {
memfd = fget(list[i].memfd);
struct file *memfd = fget(list[i].memfd);
if (!memfd) {
ret = -EBADFD;
goto err;
}
/*
* Take the inode lock to protect against concurrent
* memfd_add_seals(), which takes this lock in write mode.
*/
inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
if (ret < 0)
goto err;
pgcnt = list[i].size >> PAGE_SHIFT;
if (is_file_hugepages(memfd))
ret = handle_hugetlb_pages(ubuf, memfd,
list[i].offset,
pgcnt, &pgbuf);
else
ret = handle_shmem_pages(ubuf, memfd,
list[i].offset,
pgcnt, &pgbuf);
if (ret < 0)
goto err;
if (ret)
goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size);
out_unlock:
inode_unlock_shared(file_inode(memfd));
fput(memfd);
memfd = NULL;
if (ret)
goto err;
}
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
@ -403,10 +460,7 @@ static long udmabuf_create(struct miscdevice *device,
return ret;
err:
while (pgbuf > 0)
folio_put(ubuf->folios[--pgbuf]);
if (memfd)
fput(memfd);
unpin_all_folios(&ubuf->unpin_list);
kvfree(ubuf->offsets);
kvfree(ubuf->folios);
kfree(ubuf);

View File

@ -746,7 +746,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback);
struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
@ -974,6 +975,30 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return modified_mask;
}
static inline bool htlb_allow_alloc_fallback(int reason)
{
bool allowed_fallback = false;
/*
* Note: the memory offline, memory failure and migration syscalls will
* be allowed to fallback to other nodes due to lack of a better chioce,
* that might break the per-node hugetlb pool. While other cases will
* set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
*/
switch (reason) {
case MR_MEMORY_HOTPLUG:
case MR_MEMORY_FAILURE:
case MR_SYSCALL:
case MR_MEMPOLICY_MBIND:
allowed_fallback = true;
break;
default:
break;
}
return allowed_fallback;
}
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@ -1076,7 +1101,8 @@ alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback)
{
return NULL;
}
@ -1199,6 +1225,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return 0;
}
static inline bool htlb_allow_alloc_fallback(int reason)
{
return false;
}
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{

View File

@ -2069,13 +2069,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
/*
* Returns the number of collected folios. Return value is always >= 0.
*/
static unsigned long collect_longterm_unpinnable_folios(
static void collect_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;
unsigned long i;
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);
@ -2087,8 +2087,6 @@ static unsigned long collect_longterm_unpinnable_folios(
if (folio_is_longterm_pinnable(folio))
continue;
collected++;
if (folio_is_device_coherent(folio))
continue;
@ -2110,8 +2108,6 @@ static unsigned long collect_longterm_unpinnable_folios(
NR_ISOLATED_ANON + folio_is_file_lru(folio),
folio_nr_pages(folio));
}
return collected;
}
/*
@ -2162,6 +2158,7 @@ migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
.gfp_mask = GFP_USER | __GFP_NOWARN,
.reason = MR_LONGTERM_PIN,
};
if (migrate_pages(movable_folio_list, alloc_migration_target,
@ -2187,11 +2184,9 @@ static long
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
{
LIST_HEAD(movable_folio_list);
unsigned long collected;
collected = collect_longterm_unpinnable_folios(&movable_folio_list,
pofs);
if (!collected)
collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
if (list_empty(&movable_folio_list))
return 0;
return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);

View File

@ -2494,7 +2494,7 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback)
{
spin_lock_irq(&hugetlb_lock);
if (available_huge_pages(h)) {
@ -2509,6 +2509,10 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
}
spin_unlock_irq(&hugetlb_lock);
/* We cannot fallback to other nodes, as we could break the per-node pool. */
if (!allow_alloc_fallback)
gfp_mask |= __GFP_THISNODE;
return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
}
@ -2524,7 +2528,13 @@ struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *v
gfp_mask = htlb_alloc_mask(h);
node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
/*
* This is used to allocate a temporary hugetlb to hold the copied
* content, which will then be copied again to the final hugetlb
* consuming a reservation. Set the alloc_fallback to false to indicate
* that breaking the per-node hugetlb pool is not allowed in this case.
*/
folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask, false);
mpol_cond_put(mpol);
return folio;

View File

@ -961,6 +961,7 @@ struct migration_target_control {
int nid; /* preferred node id */
nodemask_t *nmask;
gfp_t gfp_mask;
enum migrate_reason reason;
};
/*

View File

@ -2691,6 +2691,7 @@ static int soft_offline_in_use_page(struct page *page)
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
.reason = MR_MEMORY_FAILURE,
};
if (!huge && folio_test_large(folio)) {

View File

@ -1831,6 +1831,7 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
struct migration_target_control mtc = {
.nmask = &nmask,
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
.reason = MR_MEMORY_HOTPLUG,
};
int ret;

View File

@ -1015,6 +1015,7 @@ static long migrate_to_node(struct mm_struct *mm, int source, int dest,
struct migration_target_control mtc = {
.nid = dest,
.gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
.reason = MR_SYSCALL,
};
nodes_clear(nmask);
@ -1182,8 +1183,20 @@ static struct folio *new_folio(struct folio *src, unsigned long start)
return NULL;
if (folio_test_hugetlb(src)) {
return alloc_hugetlb_folio_vma(folio_hstate(src),
vma, address);
struct mempolicy *mpol;
nodemask_t *nodemask;
struct folio *folio;
struct hstate *h;
gfp_t gfp;
int nid;
h = folio_hstate(src);
gfp = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp, &mpol, &nodemask);
folio = alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp,
htlb_allow_alloc_fallback(MR_MEMPOLICY_MBIND));
mpol_cond_put(mpol);
return folio;
}
if (folio_test_large(src))

View File

@ -2036,7 +2036,8 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private)
gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
return alloc_hugetlb_folio_nodemask(h, nid,
mtc->nmask, gfp_mask);
mtc->nmask, gfp_mask,
htlb_allow_alloc_fallback(mtc->reason));
}
if (folio_test_large(src)) {
@ -2075,6 +2076,7 @@ static int do_move_pages_to_node(struct mm_struct *mm,
struct migration_target_control mtc = {
.nid = node,
.gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
.reason = MR_SYSCALL,
};
err = migrate_pages(pagelist, alloc_migration_target, NULL,

View File

@ -6248,6 +6248,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
struct migration_target_control mtc = {
.nid = zone_to_nid(cc->zone),
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
.reason = MR_CONTIG_RANGE,
};
lru_cache_disable();

View File

@ -969,7 +969,8 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | __GFP_NOWARN |
__GFP_NOMEMALLOC | GFP_NOWAIT,
.nid = target_nid,
.nmask = &allowed_mask
.nmask = &allowed_mask,
.reason = MR_DEMOTION,
};
if (list_empty(demote_folios))

View File

@ -1,56 +1,170 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <linux/fcntl.h>
#include <fcntl.h>
#include <malloc.h>
#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <linux/memfd.h>
#include <linux/udmabuf.h>
#include "../../kselftest.h"
#define TEST_PREFIX "drivers/dma-buf/udmabuf"
#define NUM_PAGES 4
#define NUM_ENTRIES 4
#define MEMFD_SIZE 1024 /* in pages */
static int memfd_create(const char *name, unsigned int flags)
static unsigned int page_size;
static int create_memfd_with_seals(off64_t size, bool hpage)
{
return syscall(__NR_memfd_create, name, flags);
int memfd, ret;
unsigned int flags = MFD_ALLOW_SEALING;
if (hpage)
flags |= MFD_HUGETLB;
memfd = memfd_create("udmabuf-test", flags);
if (memfd < 0) {
ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
exit(KSFT_SKIP);
}
ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
if (ret < 0) {
ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
exit(KSFT_SKIP);
}
ret = ftruncate(memfd, size);
if (ret == -1) {
ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
exit(KSFT_FAIL);
}
return memfd;
}
static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size)
{
struct udmabuf_create_list *list;
int ubuf_fd, i;
list = malloc(sizeof(struct udmabuf_create_list) +
sizeof(struct udmabuf_create_item) * NUM_ENTRIES);
if (!list) {
ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX);
exit(KSFT_FAIL);
}
for (i = 0; i < NUM_ENTRIES; i++) {
list->list[i].memfd = memfd;
list->list[i].offset = i * (memfd_size / NUM_ENTRIES);
list->list[i].size = getpagesize() * NUM_PAGES;
}
list->count = NUM_ENTRIES;
list->flags = UDMABUF_FLAGS_CLOEXEC;
ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list);
free(list);
if (ubuf_fd < 0) {
ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX);
exit(KSFT_FAIL);
}
return ubuf_fd;
}
static void write_to_memfd(void *addr, off64_t size, char chr)
{
int i;
for (i = 0; i < size / page_size; i++) {
*((char *)addr + (i * page_size)) = chr;
}
}
static void *mmap_fd(int fd, off64_t size)
{
void *addr;
addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (addr == MAP_FAILED) {
ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX);
exit(KSFT_FAIL);
}
return addr;
}
static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size)
{
off64_t off;
int i = 0, j, k = 0, ret = 0;
char char1, char2;
while (i < NUM_ENTRIES) {
off = i * (memfd_size / NUM_ENTRIES);
for (j = 0; j < NUM_PAGES; j++, k++) {
char1 = *((char *)addr1 + off + (j * getpagesize()));
char2 = *((char *)addr2 + (k * getpagesize()));
if (char1 != char2) {
ret = -1;
goto err;
}
}
i++;
}
err:
munmap(addr1, memfd_size);
munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize());
return ret;
}
int main(int argc, char *argv[])
{
struct udmabuf_create create;
int devfd, memfd, buf, ret;
off_t size;
void *mem;
off64_t size;
void *addr1, *addr2;
ksft_print_header();
ksft_set_plan(6);
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
exit(77);
ksft_print_msg(
"%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
TEST_PREFIX);
exit(KSFT_SKIP);
}
memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
if (memfd < 0) {
printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
exit(77);
ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
exit(KSFT_SKIP);
}
ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
if (ret < 0) {
printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
exit(77);
ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
exit(KSFT_SKIP);
}
size = getpagesize() * NUM_PAGES;
ret = ftruncate(memfd, size);
if (ret == -1) {
printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
exit(1);
ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
exit(KSFT_FAIL);
}
memset(&create, 0, sizeof(create));
@ -60,44 +174,86 @@ int main(int argc, char *argv[])
create.offset = getpagesize()/2;
create.size = getpagesize();
buf = ioctl(devfd, UDMABUF_CREATE, &create);
if (buf >= 0) {
printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
exit(1);
}
if (buf >= 0)
ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX);
/* should fail (size not multiple of page) */
create.memfd = memfd;
create.offset = 0;
create.size = getpagesize()/2;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
if (buf >= 0) {
printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
exit(1);
}
if (buf >= 0)
ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX);
/* should fail (not memfd) */
create.memfd = 0; /* stdin */
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
if (buf >= 0) {
printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
exit(1);
}
if (buf >= 0)
ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX);
/* should work */
page_size = getpagesize();
addr1 = mmap_fd(memfd, size);
write_to_memfd(addr1, size, 'a');
create.memfd = memfd;
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
if (buf < 0) {
printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
exit(1);
}
if (buf < 0)
ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX);
munmap(addr1, size);
close(buf);
close(memfd);
/* should work (migration of 4k size pages)*/
size = MEMFD_SIZE * page_size;
memfd = create_memfd_with_seals(size, false);
addr1 = mmap_fd(memfd, size);
write_to_memfd(addr1, size, 'a');
buf = create_udmabuf_list(devfd, memfd, size);
addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
write_to_memfd(addr1, size, 'b');
ret = compare_chunks(addr1, addr2, size);
if (ret < 0)
ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX);
close(buf);
close(memfd);
/* should work (migration of 2MB size huge pages)*/
page_size = getpagesize() * 512; /* 2 MB */
size = MEMFD_SIZE * page_size;
memfd = create_memfd_with_seals(size, true);
addr1 = mmap_fd(memfd, size);
write_to_memfd(addr1, size, 'a');
buf = create_udmabuf_list(devfd, memfd, size);
addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
write_to_memfd(addr1, size, 'b');
ret = compare_chunks(addr1, addr2, size);
if (ret < 0)
ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX);
else
ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX);
fprintf(stderr, "%s: ok\n", TEST_PREFIX);
close(buf);
close(memfd);
close(devfd);
ksft_print_msg("%s: ok\n", TEST_PREFIX);
ksft_print_cnts();
return 0;
}