nfs/localio: add direct IO enablement with sync and async IO support

[ Upstream commit 3feec68563 ]

This commit simply adds the required O_DIRECT plumbing.  It doesn't
address the fact that NFS doesn't ensure all writes are page aligned
(nor device logical block size aligned as required by O_DIRECT).

Because NFS will read-modify-write for IO that isn't aligned, LOCALIO
will not use O_DIRECT semantics by default if/when an application
requests the use of O_DIRECT.  Allow the use of O_DIRECT semantics by:
1: Adding a flag to the nfs_pgio_header struct to allow the NFS
   O_DIRECT layer to signal that O_DIRECT was used by the application
2: Adding a 'localio_O_DIRECT_semantics' NFS module parameter that
   when enabled will cause LOCALIO to use O_DIRECT semantics (this may
   cause IO to fail if applications do not properly align their IO).

This commit is derived from code developed by Weston Andros Adamson.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Stable-dep-of: 992203a1fb ("nfs/localio: restore creds before releasing pageio data")
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Mike Snitzer 2024-11-15 20:40:53 -05:00 committed by Greg Kroah-Hartman
parent b0bf81e05b
commit a707c9a838
4 changed files with 98 additions and 10 deletions

View File

@ -306,6 +306,19 @@ is issuing IO to the underlying local filesystem that it is sharing with
the NFS server. See: fs/nfs/localio.c:nfs_local_doio() and
fs/nfs/localio.c:nfs_local_commit().
With normal NFS that makes use of RPC to issue IO to the server, if an
application uses O_DIRECT the NFS client will bypass the pagecache but
the NFS server will not. Because the NFS server's use of buffered IO
affords applications to be less precise with their alignment when
issuing IO to the NFS client. LOCALIO can be configured to use O_DIRECT
semantics by setting the 'localio_O_DIRECT_semantics' nfs module
parameter to Y, e.g.:
echo Y > /sys/module/nfs/parameters/localio_O_DIRECT_semantics
Once enabled, it will cause LOCALIO to use O_DIRECT semantics (this may
cause IO to fail if applications do not properly align their IO).
Security
========

View File

@ -320,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {

View File

@ -35,6 +35,7 @@ struct nfs_local_kiocb {
struct bio_vec *bvec;
struct nfs_pgio_header *hdr;
struct work_struct work;
void (*aio_complete_work)(struct work_struct *);
struct nfsd_file *localio;
};
@ -50,6 +51,11 @@ static void nfs_local_fsync_work(struct work_struct *work);
static bool localio_enabled __read_mostly = true;
module_param(localio_enabled, bool, 0644);
static bool localio_O_DIRECT_semantics __read_mostly = false;
module_param(localio_O_DIRECT_semantics, bool, 0644);
MODULE_PARM_DESC(localio_O_DIRECT_semantics,
"LOCALIO will use O_DIRECT semantics to filesystem.");
static inline bool nfs_client_is_local(const struct nfs_client *clp)
{
return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
@ -287,10 +293,19 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
kfree(iocb);
return NULL;
}
init_sync_kiocb(&iocb->kiocb, file);
if (localio_O_DIRECT_semantics &&
test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
iocb->kiocb.ki_filp = file;
iocb->kiocb.ki_flags = IOCB_DIRECT;
} else
init_sync_kiocb(&iocb->kiocb, file);
iocb->kiocb.ki_pos = hdr->args.offset;
iocb->hdr = hdr;
iocb->kiocb.ki_flags &= ~IOCB_APPEND;
iocb->aio_complete_work = NULL;
return iocb;
}
@ -345,6 +360,18 @@ nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
nfs_local_hdr_release(hdr, hdr->task.tk_ops);
}
/*
* Complete the I/O from iocb->kiocb.ki_complete()
*
* Note that this function can be called from a bottom half context,
* hence we need to queue the rpc_call_done() etc to a workqueue
*/
static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
{
INIT_WORK(&iocb->work, iocb->aio_complete_work);
queue_work(nfsiod_workqueue, &iocb->work);
}
static void
nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
{
@ -367,6 +394,23 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
status > 0 ? status : 0, hdr->res.eof);
}
static void nfs_local_read_aio_complete_work(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
container_of(work, struct nfs_local_kiocb, work);
nfs_local_pgio_release(iocb);
}
static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
{
struct nfs_local_kiocb *iocb =
container_of(kiocb, struct nfs_local_kiocb, kiocb);
nfs_local_read_done(iocb, ret);
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
}
static void nfs_local_call_read(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
@ -381,10 +425,10 @@ static void nfs_local_call_read(struct work_struct *work)
nfs_local_iter_init(&iter, iocb, READ);
status = filp->f_op->read_iter(&iocb->kiocb, &iter);
WARN_ON_ONCE(status == -EIOCBQUEUED);
nfs_local_read_done(iocb, status);
nfs_local_pgio_release(iocb);
if (status != -EIOCBQUEUED) {
nfs_local_read_done(iocb, status);
nfs_local_pgio_release(iocb);
}
revert_creds(save_cred);
}
@ -412,6 +456,11 @@ nfs_do_local_read(struct nfs_pgio_header *hdr,
nfs_local_pgio_init(hdr, call_ops);
hdr->res.eof = false;
if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
iocb->aio_complete_work = nfs_local_read_aio_complete_work;
}
INIT_WORK(&iocb->work, nfs_local_call_read);
queue_work(nfslocaliod_workqueue, &iocb->work);
@ -541,6 +590,24 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
nfs_local_pgio_done(hdr, status);
}
static void nfs_local_write_aio_complete_work(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
container_of(work, struct nfs_local_kiocb, work);
nfs_local_vfs_getattr(iocb);
nfs_local_pgio_release(iocb);
}
static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
{
struct nfs_local_kiocb *iocb =
container_of(kiocb, struct nfs_local_kiocb, kiocb);
nfs_local_write_done(iocb, ret);
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
}
static void nfs_local_call_write(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
@ -559,11 +626,11 @@ static void nfs_local_call_write(struct work_struct *work)
file_start_write(filp);
status = filp->f_op->write_iter(&iocb->kiocb, &iter);
file_end_write(filp);
WARN_ON_ONCE(status == -EIOCBQUEUED);
nfs_local_write_done(iocb, status);
nfs_local_vfs_getattr(iocb);
nfs_local_pgio_release(iocb);
if (status != -EIOCBQUEUED) {
nfs_local_write_done(iocb, status);
nfs_local_vfs_getattr(iocb);
nfs_local_pgio_release(iocb);
}
revert_creds(save_cred);
current->flags = old_flags;
@ -599,10 +666,16 @@ nfs_do_local_write(struct nfs_pgio_header *hdr,
case NFS_FILE_SYNC:
iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
}
nfs_local_pgio_init(hdr, call_ops);
nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
iocb->aio_complete_work = nfs_local_write_aio_complete_work;
}
INIT_WORK(&iocb->work, nfs_local_call_write);
queue_work(nfslocaliod_workqueue, &iocb->work);

View File

@ -1637,6 +1637,7 @@ enum {
NFS_IOHDR_RESEND_PNFS,
NFS_IOHDR_RESEND_MDS,
NFS_IOHDR_UNSTABLE_WRITES,
NFS_IOHDR_ODIRECT,
};
struct nfs_io_completion;