vfs-6.17-rc6.fixes

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaL6SyQAKCRCRxhvAZXjc
 ouTGAQDGiTnaENiOzRhzNl1XONTRv8a1uV0pxg4W3fNdiRlxgQEA/O90/+nM48KC
 pdV3WHz5eGfcnMTpqgHxK6HYgwklJAY=
 =oKnm
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.17-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "fuse:

   - Prevent opening of non-regular backing files.

     Fuse doesn't support non-regular files anyway.

   - Check whether copy_file_range() returns a larger size than
     requested.

   - Prevent overflow in copy_file_range() as fuse currently only
     supports 32-bit sized copies.

   - Cache the blocksize value if the server returned a new value as
     inode->i_blkbits isn't modified directly anymore.

   - Fix i_blkbits handling for iomap partial writes.

     By default i_blkbits is set to PAGE_SIZE which causes iomap to mark
     the whole folio as uptodate even on a partial write. But fuseblk
     filesystems support choosing a blocksize smaller than PAGE_SIZE
     risking data corruption. Simply enforce PAGE_SIZE as blocksize for
     fuseblk's internal inode for now.

   - Prevent out-of-bounds acces in fuse_dev_write() when the number of
     bytes to be retrieved is truncated to the fc->max_pages limit.

  virtiofs:

   - Fix page faults for DAX page addresses.

  Misc:

   - Tighten file handle decoding from userns.

     Check that the decoded dentry itself has a valid idmapping in the
     user namespace.

   - Fix mount-notify selftests.

   - Fix some indentation errors.

   - Add an FMODE_ flag to indicate IOCB_HAS_METADATA availability.

     This will be moved to an FOP_* flag with a bit more rework needed
     for that to happen not suitable for a fix.

   - Don't silently ignore metadata for sync read/write.

   - Don't pointlessly log warning when reading coredump sysctls"

* tag 'vfs-6.17-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fuse: virtio_fs: fix page fault for DAX page address
  selftests/fs/mount-notify: Fix compilation failure.
  fhandle: use more consistent rules for decoding file handle from userns
  fuse: Block access to folio overlimit
  fuse: fix fuseblk i_blkbits for iomap partial writes
  fuse: reflect cached blocksize if blocksize was changed
  fuse: prevent overflow in copy_file_range return value
  fuse: check if copy_file_range() returns larger than requested size
  fuse: do not allow mapping a non-regular backing file
  coredump: don't pointlessly check and spew warnings
  fs: fix indentation style
  block: don't silently ignore metadata for sync read/write
  fs: add a FMODE_ flag to indicate IOCB_HAS_METADATA availability
  Please enter a commit message to explain why this merge is necessary,
  especially if it merges an updated upstream into a topic branch.
This commit is contained in:
Linus Torvalds 2025-09-08 07:53:01 -07:00
commit f777d1112e
16 changed files with 86 additions and 31 deletions

View File

@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/uio.h>
@ -54,7 +55,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct bio bio;
ssize_t ret;
WARN_ON_ONCE(iocb->ki_flags & IOCB_HAS_METADATA);
if (nr_pages <= DIO_INLINE_BIO_VECS)
vecs = inline_vecs;
else {
@ -131,7 +131,7 @@ static void blkdev_bio_end_io(struct bio *bio)
if (bio->bi_status && !dio->bio.bi_status)
dio->bio.bi_status = bio->bi_status;
if (!is_sync && (dio->iocb->ki_flags & IOCB_HAS_METADATA))
if (bio_integrity(bio))
bio_integrity_unmap_user(bio);
if (atomic_dec_and_test(&dio->ref)) {
@ -233,7 +233,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
bio->bi_opf |= REQ_NOWAIT;
}
if (!is_sync && (iocb->ki_flags & IOCB_HAS_METADATA)) {
if (iocb->ki_flags & IOCB_HAS_METADATA) {
ret = bio_integrity_map_iter(bio, iocb->private);
if (unlikely(ret))
goto fail;
@ -301,7 +301,7 @@ static void blkdev_bio_end_io_async(struct bio *bio)
ret = blk_status_to_errno(bio->bi_status);
}
if (iocb->ki_flags & IOCB_HAS_METADATA)
if (bio_integrity(bio))
bio_integrity_unmap_user(bio);
iocb->ki_complete(iocb, ret);
@ -422,7 +422,8 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
if (likely(nr_pages <= BIO_MAX_VECS)) {
if (likely(nr_pages <= BIO_MAX_VECS &&
!(iocb->ki_flags & IOCB_HAS_METADATA))) {
if (is_sync_kiocb(iocb))
return __blkdev_direct_IO_simple(iocb, iter, bdev,
nr_pages);
@ -687,6 +688,8 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (bdev_can_atomic_write(bdev))
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
if (blk_get_integrity(bdev->bd_disk))
filp->f_mode |= FMODE_HAS_METADATA;
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
if (ret)

View File

@ -1466,11 +1466,15 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write,
ssize_t retval;
char old_core_pattern[CORENAME_MAX_SIZE];
if (write)
return proc_dostring(table, write, buffer, lenp, ppos);
retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
error = proc_dostring(table, write, buffer, lenp, ppos);
if (error)
return error;
if (!check_coredump_socket()) {
strscpy(core_pattern, old_core_pattern, retval + 1);
return -EINVAL;

View File

@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ
{
int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!error)
if (!error && !write)
validate_coredump_safety();
return error;
}

View File

@ -207,6 +207,14 @@ static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
if (!ctx->flags)
return 1;
/*
* Verify that the decoded dentry itself has a valid id mapping.
* In case the decoded dentry is the mountfd root itself, this
* verifies that the mountfd inode itself has a valid id mapping.
*/
if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(dentry)))
return 0;
/*
* It's racy as we're not taking rename_lock but we're able to ignore
* permissions and we just need an approximation whether we were able

View File

@ -1893,7 +1893,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
while (num) {
while (num && ap->num_folios < num_pages) {
struct folio *folio;
unsigned int folio_offset;
unsigned int nr_bytes;

View File

@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
if (attr->blksize != 0)
blkbits = ilog2(attr->blksize);
else
blkbits = inode->i_sb->s_blocksize_bits;
blkbits = fc->blkbits;
stat->blksize = 1 << blkbits;
}
@ -1377,6 +1377,7 @@ retry:
generic_fillattr(idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
stat->blksize = 1 << fi->cached_i_blkbits;
if (test_bit(FUSE_I_BTIME, &fi->state)) {
stat->btime = fi->i_btime;
stat->result_mask |= STATX_BTIME;

View File

@ -2960,7 +2960,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
.nodeid_out = ff_out->nodeid,
.fh_out = ff_out->fh,
.off_out = pos_out,
.len = len,
.len = min_t(size_t, len, UINT_MAX & PAGE_MASK),
.flags = flags
};
struct fuse_write_out outarg;
@ -3026,6 +3026,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
fc->no_copy_file_range = 1;
err = -EOPNOTSUPP;
}
if (!err && outarg.size > len)
err = -EIO;
if (err)
goto out;

View File

@ -210,6 +210,12 @@ struct fuse_inode {
/** Reference to backing file in passthrough mode */
struct fuse_backing *fb;
#endif
/*
* The underlying inode->i_blkbits value will not be modified,
* so preserve the blocksize specified by the server.
*/
u8 cached_i_blkbits;
};
/** FUSE inode state bits */
@ -969,6 +975,14 @@ struct fuse_conn {
/* Request timeout (in jiffies). 0 = no timeout */
unsigned int req_timeout;
} timeout;
/*
* This is a workaround until fuse uses iomap for reads.
* For fuseblk servers, this represents the blocksize passed in at
* mount time and for regular fuse servers, this is equivalent to
* inode->i_blkbits.
*/
u8 blkbits;
};
/*

View File

@ -289,6 +289,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
}
}
if (attr->blksize)
fi->cached_i_blkbits = ilog2(attr->blksize);
else
fi->cached_i_blkbits = fc->blkbits;
/*
* Don't set the sticky bit in i_mode, unless we want the VFS
* to check permissions. This prevents failures due to the
@ -1805,10 +1810,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
err = -EINVAL;
if (!sb_set_blocksize(sb, ctx->blksize))
goto err;
/*
* This is a workaround until fuse hooks into iomap for reads.
* Use PAGE_SIZE for the blocksize else if the writeback cache
* is enabled, buffered writes go through iomap and a read may
* overwrite partially written data if blocksize < PAGE_SIZE
*/
fc->blkbits = sb->s_blocksize_bits;
if (ctx->blksize != PAGE_SIZE &&
!sb_set_blocksize(sb, PAGE_SIZE))
goto err;
#endif
} else {
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
fc->blkbits = sb->s_blocksize_bits;
}
sb->s_subtype = ctx->subtype;

View File

@ -237,6 +237,11 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
if (!file)
goto out;
/* read/write/splice/mmap passthrough only relevant for regular files */
res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL;
if (!d_is_reg(file->f_path.dentry))
goto out_fput;
backing_sb = file_inode(file)->i_sb;
res = -ELOOP;
if (backing_sb->s_stack_depth >= fc->max_stack_depth)

View File

@ -1016,7 +1016,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
if (kaddr)
*kaddr = fs->window_kaddr + offset;
if (pfn)
*pfn = fs->window_phys_addr + offset;
*pfn = PHYS_PFN(fs->window_phys_addr + offset);
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}

View File

@ -2455,7 +2455,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
return ERR_PTR(-EINVAL);
}
if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (__has_locked_children(old_mnt, path->dentry))

View File

@ -149,7 +149,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)(1 << 12))
/* FMODE_* bit 13 */
/* Supports IOCB_HAS_METADATA */
#define FMODE_HAS_METADATA ((__force fmode_t)(1 << 13))
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)(1 << 14))

View File

@ -886,6 +886,9 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
if (req->flags & REQ_F_HAS_METADATA) {
struct io_async_rw *io = req->async_data;
if (!(file->f_mode & FMODE_HAS_METADATA))
return -EINVAL;
/*
* We have a union of meta fields with wpq used for buffered-io
* in io_async_rw, so fail it here.

View File

@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
// Needed for linux/fanotify.h
typedef struct {
int val[2];
} __kernel_fsid_t;
#define __kernel_fsid_t __kernel_fsid_t
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@ -10,20 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
#include "../statmount/statmount.h"
#include "../utils.h"
// Needed for linux/fanotify.h
#ifndef __kernel_fsid_t
typedef struct {
int val[2];
} __kernel_fsid_t;
#endif
#include <sys/fanotify.h>
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_cmds[] = {

View File

@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
// Needed for linux/fanotify.h
typedef struct {
int val[2];
} __kernel_fsid_t;
#define __kernel_fsid_t __kernel_fsid_t
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@ -10,21 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
#include "../../pidfd/pidfd.h"
#include "../statmount/statmount.h"
#include "../utils.h"
// Needed for linux/fanotify.h
#ifndef __kernel_fsid_t
typedef struct {
int val[2];
} __kernel_fsid_t;
#endif
#include <sys/fanotify.h>
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_types[] = {