Merge: io_uring: update to upstream v6.6
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/3318 Update io_uring and its dependencies to upstream kernel version 6.6. JIRA: https://issues.redhat.com/browse/RHEL-12076 JIRA: https://issues.redhat.com/browse/RHEL-14998 JIRA: https://issues.redhat.com/browse/RHEL-4447 CVE: CVE-2023-46862 Omitted-Fix: ab69838e7c75 ("io_uring/kbuf: Fix check of BID wrapping in provided buffers") Omitted-Fix: f74c746e476b ("io_uring/kbuf: Allow the full buffer id space for provided buffers") This is the list of new features available (includes upstream kernel versions 6.3-6.6): User-specified ring buffer Provided Buffers allocated by the kernel Ability to register the ring fd Multi-shot timeouts ability to pass custom flags to the completion queue entry for ring messages All of these features are covered by the liburing tests. In my testing, no-mmap-inval.t failed because of a broken test. socket-uring-cmd.t also failed because of a missing selinux policy rule. Try running audit2allow if you see a failure in that test. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Approved-by: Wander Lairson Costa <wander@redhat.com> Approved-by: Donald Dutile <ddutile@redhat.com> Approved-by: Chris von Recklinghausen <crecklin@redhat.com> Approved-by: Jiri Benc <jbenc@redhat.com> Approved-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Scott Weaver <scweaver@redhat.com>
This commit is contained in:
commit
8d95883db0
|
@ -448,17 +448,26 @@ io_uring_disabled
|
|||
Prevents all processes from creating new io_uring instances. Enabling this
|
||||
shrinks the kernel's attack surface.
|
||||
|
||||
= ==================================================================
|
||||
0 All processes can create io_uring instances as normal. This is the
|
||||
default setting.
|
||||
1 io_uring creation is disabled for unprivileged processes.
|
||||
io_uring_setup fails with -EPERM unless the calling process is
|
||||
privileged (CAP_SYS_ADMIN). Existing io_uring instances can
|
||||
still be used.
|
||||
2 io_uring creation is disabled for all processes. io_uring_setup
|
||||
= ======================================================================
|
||||
0 All processes can create io_uring instances as normal.
|
||||
1 io_uring creation is disabled (io_uring_setup() will fail with
|
||||
-EPERM) for unprivileged processes not in the io_uring_group group.
|
||||
Existing io_uring instances can still be used. See the
|
||||
documentation for io_uring_group for more information.
|
||||
2 io_uring creation is disabled for all processes. io_uring_setup()
|
||||
always fails with -EPERM. Existing io_uring instances can still be
|
||||
used.
|
||||
= ==================================================================
|
||||
used. This is the default setting.
|
||||
= ======================================================================
|
||||
|
||||
|
||||
io_uring_group
|
||||
==============
|
||||
|
||||
When io_uring_disabled is set to 1, a process must either be
|
||||
privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
|
||||
to create an io_uring instance. If io_uring_group is set to -1 (the
|
||||
default), only processes with the CAP_SYS_ADMIN capability may create
|
||||
io_uring instances.
|
||||
|
||||
|
||||
kexec_load_disabled
|
||||
|
|
|
@ -10086,7 +10086,6 @@ F: io_uring/
|
|||
F: include/linux/io_uring.h
|
||||
F: include/linux/io_uring_types.h
|
||||
F: include/uapi/linux/io_uring.h
|
||||
F: tools/io_uring/
|
||||
|
||||
IPMI SUBSYSTEM
|
||||
M: Corey Minyard <minyard@acm.org>
|
||||
|
|
|
@ -29,9 +29,11 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
|
|||
bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
|
||||
if (!bmd)
|
||||
return NULL;
|
||||
memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
|
||||
bmd->iter = *data;
|
||||
bmd->iter.iov = bmd->iov;
|
||||
if (iter_is_iovec(data)) {
|
||||
memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
|
||||
bmd->iter.__iov = bmd->iov;
|
||||
}
|
||||
return bmd;
|
||||
}
|
||||
|
||||
|
@ -636,7 +638,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
|||
copy = true;
|
||||
else if (iov_iter_is_bvec(iter))
|
||||
map_bvec = true;
|
||||
else if (!iter_is_iovec(iter))
|
||||
else if (!user_backed_iter(iter))
|
||||
copy = true;
|
||||
else if (queue_virt_boundary(q))
|
||||
copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
|
||||
|
@ -677,9 +679,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
|
|||
struct rq_map_data *map_data, void __user *ubuf,
|
||||
unsigned long len, gfp_t gfp_mask)
|
||||
{
|
||||
struct iovec iov;
|
||||
struct iov_iter i;
|
||||
int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
|
||||
int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i);
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
|
|
@ -506,7 +506,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
|||
* during an unstable branch.
|
||||
*/
|
||||
filp->f_flags |= O_LARGEFILE;
|
||||
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
|
||||
filp->f_mode |= FMODE_BUF_RASYNC;
|
||||
|
||||
/*
|
||||
* Use the file private data to store the holder for exclusive openes.
|
||||
|
@ -520,6 +520,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
|||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
|
||||
if (bdev_nowait(bdev))
|
||||
filp->f_mode |= FMODE_NOWAIT;
|
||||
|
||||
filp->f_mapping = bdev->bd_inode->i_mapping;
|
||||
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
|
||||
return 0;
|
||||
|
|
|
@ -362,7 +362,7 @@ static unsigned zero_mmap_capabilities(struct file *file)
|
|||
/* can't do an in-place private mapping if there's no MMU */
|
||||
static inline int private_mapping_ok(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & VM_MAYSHARE;
|
||||
return is_nommu_shared_mapping(vma->vm_flags);
|
||||
}
|
||||
#else
|
||||
|
||||
|
|
|
@ -284,11 +284,12 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
|
|||
}
|
||||
|
||||
while (dim) {
|
||||
const struct iovec *iov = iter_iov(from);
|
||||
int ret;
|
||||
unsigned long count = 0;
|
||||
|
||||
ret = hfi1_user_sdma_process_request(
|
||||
fd, (struct iovec *)(from->iov + done),
|
||||
fd, (struct iovec *)(iov + done),
|
||||
dim, &count);
|
||||
if (ret) {
|
||||
reqs = ret;
|
||||
|
|
|
@ -2246,10 +2246,10 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
|
||||
struct qib_user_sdma_queue *pq = fp->pq;
|
||||
|
||||
if (!iter_is_iovec(from) || !from->nr_segs || !pq)
|
||||
if (!from->user_backed || !from->nr_segs || !pq)
|
||||
return -EINVAL;
|
||||
|
||||
return qib_user_sdma_writev(rcd, pq, from->iov, from->nr_segs);
|
||||
return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs);
|
||||
}
|
||||
|
||||
static struct class *qib_class;
|
||||
|
|
|
@ -1473,7 +1473,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
|
|||
skb->truesize += skb->data_len;
|
||||
|
||||
for (i = 1; i < it->nr_segs; i++) {
|
||||
size_t fragsz = it->iov[i].iov_len;
|
||||
const struct iovec *iov = iter_iov(it);
|
||||
size_t fragsz = iov->iov_len;
|
||||
struct page *page;
|
||||
void *frag;
|
||||
|
||||
|
|
|
@ -551,7 +551,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
|||
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
|
||||
{
|
||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||
const struct nvme_uring_cmd *cmd = ioucmd->cmd;
|
||||
const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
|
||||
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
|
||||
struct nvme_uring_data d;
|
||||
struct nvme_command c;
|
||||
|
|
|
@ -1246,7 +1246,7 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
|
|||
p->kiocb = kiocb;
|
||||
if (p->aio) {
|
||||
p->to_free = dup_iter(&p->data, to, GFP_KERNEL);
|
||||
if (!p->to_free) {
|
||||
if (!iter_is_ubuf(&p->data) && !p->to_free) {
|
||||
kfree(p);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
|
|
@ -613,7 +613,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (!priv)
|
||||
goto fail;
|
||||
priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL);
|
||||
if (!priv->to_free) {
|
||||
if (!iter_is_ubuf(&priv->to) && !priv->to_free) {
|
||||
kfree(priv);
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
@ -641,7 +641,7 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
|
|||
{
|
||||
int sgl_count = 0;
|
||||
|
||||
if (!iter || !iter->iov) {
|
||||
if (!iter || !iter_iov(iter)) {
|
||||
pr_err("%s: iter->iov is NULL, but expected bytes: %zu"
|
||||
" present\n", __func__, bytes);
|
||||
return -EINVAL;
|
||||
|
|
|
@ -3621,10 +3621,15 @@ static int check_direct_read(struct btrfs_fs_info *fs_info,
|
|||
if (!iter_is_iovec(iter))
|
||||
return 0;
|
||||
|
||||
for (seg = 0; seg < iter->nr_segs; seg++)
|
||||
for (i = seg + 1; i < iter->nr_segs; i++)
|
||||
if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
|
||||
for (seg = 0; seg < iter->nr_segs; seg++) {
|
||||
for (i = seg + 1; i < iter->nr_segs; i++) {
|
||||
const struct iovec *iov1 = iter_iov(iter) + seg;
|
||||
const struct iovec *iov2 = iter_iov(iter) + i;
|
||||
|
||||
if (iov1->iov_base == iov2->iov_base)
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -446,7 +446,7 @@ bailout:
|
|||
|
||||
static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
|
||||
return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
|
||||
}
|
||||
|
||||
static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
|
||||
|
|
|
@ -153,8 +153,6 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
|
|||
path_noexec(&file->f_path)))
|
||||
goto exit;
|
||||
|
||||
fsnotify_open(file);
|
||||
|
||||
error = -ENOEXEC;
|
||||
|
||||
read_lock(&binfmt_lock);
|
||||
|
@ -939,9 +937,6 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
|
|||
if (err)
|
||||
goto exit;
|
||||
|
||||
if (name->name[0] != '\0')
|
||||
fsnotify_open(file);
|
||||
|
||||
out:
|
||||
return file;
|
||||
|
||||
|
|
|
@ -902,7 +902,8 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
|
|||
return ret;
|
||||
}
|
||||
|
||||
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
|
||||
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC |
|
||||
FMODE_DIO_PARALLEL_WRITE;
|
||||
return dquot_file_open(inode, filp);
|
||||
}
|
||||
|
||||
|
|
|
@ -236,7 +236,6 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
|
|||
retval = PTR_ERR(file);
|
||||
} else {
|
||||
retval = fd;
|
||||
fsnotify_open(file);
|
||||
fd_install(fd, file);
|
||||
}
|
||||
path_put(&path);
|
||||
|
|
|
@ -1370,7 +1370,7 @@ out:
|
|||
|
||||
static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
|
||||
{
|
||||
return (unsigned long)ii->iov->iov_base + ii->iov_offset;
|
||||
return (unsigned long)iter_iov(ii)->iov_base + ii->iov_offset;
|
||||
}
|
||||
|
||||
static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
|
||||
|
|
|
@ -19,10 +19,12 @@
|
|||
* Private flags for iomap_dio, must not overlap with the public ones in
|
||||
* iomap.h:
|
||||
*/
|
||||
#define IOMAP_DIO_WRITE_FUA (1 << 28)
|
||||
#define IOMAP_DIO_NEED_SYNC (1 << 29)
|
||||
#define IOMAP_DIO_WRITE (1 << 30)
|
||||
#define IOMAP_DIO_DIRTY (1 << 31)
|
||||
#define IOMAP_DIO_CALLER_COMP (1U << 26)
|
||||
#define IOMAP_DIO_INLINE_COMP (1U << 27)
|
||||
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
|
||||
#define IOMAP_DIO_NEED_SYNC (1U << 29)
|
||||
#define IOMAP_DIO_WRITE (1U << 30)
|
||||
#define IOMAP_DIO_DIRTY (1U << 31)
|
||||
|
||||
struct iomap_dio {
|
||||
struct kiocb *iocb;
|
||||
|
@ -40,7 +42,6 @@ struct iomap_dio {
|
|||
struct {
|
||||
struct iov_iter *iter;
|
||||
struct task_struct *waiter;
|
||||
struct bio *poll_bio;
|
||||
} submit;
|
||||
|
||||
/* used for aio completion: */
|
||||
|
@ -53,12 +54,14 @@ struct iomap_dio {
|
|||
static void iomap_dio_submit_bio(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio, struct bio *bio, loff_t pos)
|
||||
{
|
||||
struct kiocb *iocb = dio->iocb;
|
||||
|
||||
atomic_inc(&dio->ref);
|
||||
|
||||
/* Sync dio can't be polled reliably */
|
||||
if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
|
||||
bio_set_polled(bio, dio->iocb);
|
||||
dio->submit.poll_bio = bio;
|
||||
if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
|
||||
bio_set_polled(bio, iocb);
|
||||
WRITE_ONCE(iocb->private, bio);
|
||||
}
|
||||
|
||||
if (dio->dops && dio->dops->submit_io)
|
||||
|
@ -126,6 +129,11 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_complete);
|
||||
|
||||
static ssize_t iomap_dio_deferred_complete(void *data)
|
||||
{
|
||||
return iomap_dio_complete(data);
|
||||
}
|
||||
|
||||
static void iomap_dio_complete_work(struct work_struct *work)
|
||||
{
|
||||
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
|
||||
|
@ -148,27 +156,69 @@ static void iomap_dio_bio_end_io(struct bio *bio)
|
|||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
struct kiocb *iocb = dio->iocb;
|
||||
|
||||
if (bio->bi_status)
|
||||
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
|
||||
if (!atomic_dec_and_test(&dio->ref))
|
||||
goto release_bio;
|
||||
|
||||
if (atomic_dec_and_test(&dio->ref)) {
|
||||
if (dio->wait_for_completion) {
|
||||
struct task_struct *waiter = dio->submit.waiter;
|
||||
WRITE_ONCE(dio->submit.waiter, NULL);
|
||||
blk_wake_io_task(waiter);
|
||||
} else if (dio->flags & IOMAP_DIO_WRITE) {
|
||||
struct inode *inode = file_inode(dio->iocb->ki_filp);
|
||||
/*
|
||||
* Synchronous dio, task itself will handle any completion work
|
||||
* that needs after IO. All we need to do is wake the task.
|
||||
*/
|
||||
if (dio->wait_for_completion) {
|
||||
struct task_struct *waiter = dio->submit.waiter;
|
||||
|
||||
WRITE_ONCE(dio->iocb->private, NULL);
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
|
||||
} else {
|
||||
WRITE_ONCE(dio->iocb->private, NULL);
|
||||
iomap_dio_complete_work(&dio->aio.work);
|
||||
}
|
||||
WRITE_ONCE(dio->submit.waiter, NULL);
|
||||
blk_wake_io_task(waiter);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
||||
WRITE_ONCE(iocb->private, NULL);
|
||||
iomap_dio_complete_work(&dio->aio.work);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
|
||||
* our completion that way to avoid an async punt to a workqueue.
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
||||
/* only polled IO cares about private cleared */
|
||||
iocb->private = dio;
|
||||
iocb->dio_complete = iomap_dio_deferred_complete;
|
||||
|
||||
/*
|
||||
* Invoke ->ki_complete() directly. We've assigned our
|
||||
* dio_complete callback handler, and since the issuer set
|
||||
* IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
|
||||
* notice ->dio_complete being set and will defer calling that
|
||||
* handler until it can be done from a safe task context.
|
||||
*
|
||||
* Note that the 'res' being passed in here is not important
|
||||
* for this case. The actual completion value of the request
|
||||
* will be gotten from dio_complete when that is run by the
|
||||
* issuer.
|
||||
*/
|
||||
iocb->ki_complete(iocb, 0);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Async DIO completion that requires filesystem level completion work
|
||||
* gets punted to a work queue to complete as the operation may require
|
||||
* more IO to be issued to finalise filesystem metadata changes or
|
||||
* guarantee data integrity.
|
||||
*/
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
|
||||
&dio->aio.work);
|
||||
release_bio:
|
||||
if (should_dirty) {
|
||||
bio_check_pages_dirty(bio);
|
||||
} else {
|
||||
|
@ -197,7 +247,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
|||
/*
|
||||
* Figure out the bio's operation flags from the dio request, the
|
||||
* mapping, and whether or not we want FUA. Note that we can end up
|
||||
* clearing the WRITE_FUA flag in the dio request.
|
||||
* clearing the WRITE_THROUGH flag in the dio request.
|
||||
*/
|
||||
static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio,
|
||||
const struct iomap *iomap, bool use_fua)
|
||||
|
@ -217,7 +267,7 @@ static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio,
|
|||
if (use_fua)
|
||||
opflags |= REQ_FUA;
|
||||
else
|
||||
dio->flags &= ~IOMAP_DIO_WRITE_FUA;
|
||||
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
||||
|
||||
return opflags;
|
||||
}
|
||||
|
@ -258,12 +308,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
* Use a FUA write if we need datasync semantics, this is a pure
|
||||
* data IO that doesn't require any metadata updates (including
|
||||
* after IO completion such as unwritten extent conversion) and
|
||||
* the underlying device supports FUA. This allows us to avoid
|
||||
* cache flushes on IO completion.
|
||||
* the underlying device either supports FUA or doesn't have
|
||||
* a volatile write cache. This allows us to avoid cache flushes
|
||||
* on IO completion. If we can't use writethrough and need to
|
||||
* sync, disable in-task completions as dio completion will
|
||||
* need to call generic_write_sync() which will do a blocking
|
||||
* fsync / cache flush call.
|
||||
*/
|
||||
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
|
||||
(dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
|
||||
(dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
|
||||
(bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
|
||||
use_fua = true;
|
||||
else if (dio->flags & IOMAP_DIO_NEED_SYNC)
|
||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -278,10 +335,23 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
goto out;
|
||||
|
||||
/*
|
||||
* We can only poll for single bio I/Os.
|
||||
* We can only do deferred completion for pure overwrites that
|
||||
* don't require additional IO at completion. This rules out
|
||||
* writes that need zeroing or extent conversion, extend
|
||||
* the file size, or issue journal IO or cache flushes
|
||||
* during completion processing.
|
||||
*/
|
||||
if (need_zeroout ||
|
||||
((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) ||
|
||||
((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
|
||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
||||
|
||||
/*
|
||||
* The rules for polled IO completions follow the guidelines as the
|
||||
* ones we set for inline and deferred completions. If none of those
|
||||
* are available for this IO, clear the polled flag.
|
||||
*/
|
||||
if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
|
||||
dio->iocb->ki_flags &= ~IOCB_HIPRI;
|
||||
|
||||
if (need_zeroout) {
|
||||
|
@ -502,9 +572,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
|
||||
dio->submit.iter = iter;
|
||||
dio->submit.waiter = current;
|
||||
dio->submit.poll_bio = NULL;
|
||||
|
||||
if (iov_iter_rw(iter) == READ) {
|
||||
/* reads can always complete inline */
|
||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
||||
|
||||
if (iomi.pos >= dio->i_size)
|
||||
goto out_free_dio;
|
||||
|
||||
|
@ -523,6 +595,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
iomi.flags |= IOMAP_WRITE;
|
||||
dio->flags |= IOMAP_DIO_WRITE;
|
||||
|
||||
/*
|
||||
* Flag as supporting deferred completions, if the issuer
|
||||
* groks it. This can avoid a workqueue punt for writes.
|
||||
* We may later clear this flag if we need to do other IO
|
||||
* as part of this IO completion.
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
|
||||
dio->flags |= IOMAP_DIO_CALLER_COMP;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_has_page(mapping, iomi.pos, end)) {
|
||||
ret = -EAGAIN;
|
||||
|
@ -536,13 +617,16 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
dio->flags |= IOMAP_DIO_NEED_SYNC;
|
||||
|
||||
/*
|
||||
* For datasync only writes, we optimistically try using FUA for
|
||||
* this IO. Any non-FUA write that occurs will clear this flag,
|
||||
* hence we know before completion whether a cache flush is
|
||||
* necessary.
|
||||
* For datasync only writes, we optimistically try using
|
||||
* WRITE_THROUGH for this IO. This flag requires either
|
||||
* FUA writes through the device's write cache, or a
|
||||
* normal write to a device without a volatile write
|
||||
* cache. For the former, Any non-FUA write that occurs
|
||||
* will clear this flag, hence we know before completion
|
||||
* whether a cache flush is necessary.
|
||||
*/
|
||||
if ((iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) == IOCB_DSYNC)
|
||||
dio->flags |= IOMAP_DIO_WRITE_FUA;
|
||||
dio->flags |= IOMAP_DIO_WRITE_THROUGH;
|
||||
}
|
||||
|
||||
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
|
||||
|
@ -615,14 +699,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
iomap_dio_set_error(dio, ret);
|
||||
|
||||
/*
|
||||
* If all the writes we issued were FUA, we don't need to flush the
|
||||
* cache on IO completion. Clear the sync flag for this case.
|
||||
* If all the writes we issued were already written through to the
|
||||
* media, we don't need to flush the cache on IO completion. Clear the
|
||||
* sync flag for this case.
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_WRITE_FUA)
|
||||
if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
|
||||
dio->flags &= ~IOMAP_DIO_NEED_SYNC;
|
||||
|
||||
WRITE_ONCE(iocb->private, dio->submit.poll_bio);
|
||||
|
||||
/*
|
||||
* We are about to drop our additional submission reference, which
|
||||
* might be the last reference to the dio. There are three different
|
||||
|
|
|
@ -886,6 +886,11 @@ static int do_dentry_open(struct file *f,
|
|||
truncate_pagecache(inode, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once we return a file with FMODE_OPENED, __fput() will call
|
||||
* fsnotify_close(), so we need fsnotify_open() here for symmetry.
|
||||
*/
|
||||
fsnotify_open(f);
|
||||
return 0;
|
||||
|
||||
cleanup_all:
|
||||
|
@ -1270,7 +1275,6 @@ static long do_sys_openat2(int dfd, const char __user *filename,
|
|||
put_unused_fd(fd);
|
||||
fd = PTR_ERR(f);
|
||||
} else {
|
||||
fsnotify_open(f);
|
||||
fd_install(fd, f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -390,6 +390,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
||||
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
||||
|
||||
/*
|
||||
* Overlayfs doesn't support deferred completions, don't copy
|
||||
* this property in case it is set by the issuer.
|
||||
*/
|
||||
ifl &= ~IOCB_DIO_CALLER_COMP;
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
if (is_sync_kiocb(iocb)) {
|
||||
file_start_write(real.file);
|
||||
|
|
|
@ -40,7 +40,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
|
|||
}
|
||||
|
||||
if (atomic_read(&mm->mm_count) > 1 ||
|
||||
vma->vm_flags & VM_MAYSHARE) {
|
||||
is_nommu_shared_mapping(vma->vm_flags)) {
|
||||
sbytes += size;
|
||||
} else {
|
||||
bytes += size;
|
||||
|
|
|
@ -264,7 +264,7 @@ out:
|
|||
*/
|
||||
static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
if (!(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)))
|
||||
if (!is_nommu_shared_mapping(vma->vm_flags))
|
||||
return -ENOSYS;
|
||||
|
||||
file_accessed(file);
|
||||
|
|
|
@ -749,15 +749,14 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
|
|||
return -EOPNOTSUPP;
|
||||
|
||||
while (iov_iter_count(iter)) {
|
||||
struct iovec iovec = iov_iter_iovec(iter);
|
||||
ssize_t nr;
|
||||
|
||||
if (type == READ) {
|
||||
nr = filp->f_op->read(filp, iovec.iov_base,
|
||||
iovec.iov_len, ppos);
|
||||
nr = filp->f_op->read(filp, iter_iov_addr(iter),
|
||||
iter_iov_len(iter), ppos);
|
||||
} else {
|
||||
nr = filp->f_op->write(filp, iovec.iov_base,
|
||||
iovec.iov_len, ppos);
|
||||
nr = filp->f_op->write(filp, iter_iov_addr(iter),
|
||||
iter_iov_len(iter), ppos);
|
||||
}
|
||||
|
||||
if (nr < 0) {
|
||||
|
@ -766,7 +765,7 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
|
|||
break;
|
||||
}
|
||||
ret += nr;
|
||||
if (nr != iovec.iov_len)
|
||||
if (nr != iter_iov_len(iter))
|
||||
break;
|
||||
iov_iter_advance(iter, nr);
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
|
|||
*/
|
||||
static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
|
||||
return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS;
|
||||
}
|
||||
|
||||
static unsigned romfs_mmap_capabilities(struct file *file)
|
||||
|
|
|
@ -1171,7 +1171,8 @@ xfs_file_open(
|
|||
{
|
||||
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
|
||||
return -EIO;
|
||||
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC;
|
||||
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
|
||||
FMODE_DIO_PARALLEL_WRITE;
|
||||
return generic_file_open(inode, file);
|
||||
}
|
||||
|
||||
|
|
|
@ -159,6 +159,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
|||
/* File supports DIRECT IO */
|
||||
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
|
||||
|
||||
/* File supports non-exclusive O_DIRECT writes from multiple threads */
|
||||
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
|
||||
|
||||
/* File was opened by fanotify and shouldn't generate fanotify events */
|
||||
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
|
||||
|
||||
|
@ -312,20 +315,60 @@ enum rw_hint {
|
|||
#define IOCB_NOIO (1 << 20)
|
||||
/* can use bio alloc cache */
|
||||
#define IOCB_ALLOC_CACHE (1 << 21)
|
||||
/*
|
||||
* IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
|
||||
* iocb completion can be passed back to the owner for execution from a safe
|
||||
* context rather than needing to be punted through a workqueue. If this
|
||||
* flag is set, the bio completion handling may set iocb->dio_complete to a
|
||||
* handler function and iocb->private to context information for that handler.
|
||||
* The issuer should call the handler with that context information from task
|
||||
* context to complete the processing of the iocb. Note that while this
|
||||
* provides a task context for the dio_complete() callback, it should only be
|
||||
* used on the completion side for non-IO generating completions. It's fine to
|
||||
* call blocking functions from this callback, but they should not wait for
|
||||
* unrelated IO (like cache flushing, new IO generation, etc).
|
||||
*/
|
||||
#define IOCB_DIO_CALLER_COMP (1 << 22)
|
||||
|
||||
/* for use in trace events */
|
||||
#define TRACE_IOCB_STRINGS \
|
||||
{ IOCB_HIPRI, "HIPRI" }, \
|
||||
{ IOCB_DSYNC, "DSYNC" }, \
|
||||
{ IOCB_SYNC, "SYNC" }, \
|
||||
{ IOCB_NOWAIT, "NOWAIT" }, \
|
||||
{ IOCB_APPEND, "APPEND" }, \
|
||||
{ IOCB_EVENTFD, "EVENTFD"}, \
|
||||
{ IOCB_DIRECT, "DIRECT" }, \
|
||||
{ IOCB_WRITE, "WRITE" }, \
|
||||
{ IOCB_WAITQ, "WAITQ" }, \
|
||||
{ IOCB_NOIO, "NOIO" }, \
|
||||
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
|
||||
{ IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
|
||||
|
||||
struct kiocb {
|
||||
struct file *ki_filp;
|
||||
|
||||
/* The 'ki_filp' pointer is shared in a union for aio */
|
||||
randomized_struct_fields_start
|
||||
|
||||
loff_t ki_pos;
|
||||
void (*ki_complete)(struct kiocb *iocb, long ret);
|
||||
void *private;
|
||||
int ki_flags;
|
||||
u16 ki_ioprio; /* See linux/ioprio.h */
|
||||
struct wait_page_queue *ki_waitq; /* for async buffered IO */
|
||||
randomized_struct_fields_end
|
||||
union {
|
||||
/*
|
||||
* Only used for async buffered reads, where it denotes the
|
||||
* page waitqueue associated with completing the read. Valid
|
||||
* IFF IOCB_WAITQ is set.
|
||||
*/
|
||||
struct wait_page_queue *ki_waitq;
|
||||
/*
|
||||
* Can be used for O_DIRECT IO, where the completion handling
|
||||
* is punted back to the issuer of the IO. May only be set
|
||||
* if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
|
||||
* must then check for presence of this handler when ki_complete
|
||||
* is invoked. The data passed in to this handler must be
|
||||
* assigned to ->private when dio_complete is assigned.
|
||||
*/
|
||||
ssize_t (*dio_complete)(void *data);
|
||||
};
|
||||
};
|
||||
|
||||
static inline bool is_sync_kiocb(struct kiocb *kiocb)
|
||||
|
|
|
@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
|
|||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const void *cmd;
|
||||
const struct io_uring_sqe *sqe;
|
||||
union {
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
|
@ -36,18 +36,33 @@ struct io_uring_cmd {
|
|||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
|
@ -66,6 +81,7 @@ static inline void io_uring_free(struct task_struct *tsk)
|
|||
if (tsk->io_uring)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
|
@ -80,6 +96,10 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
|||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
|
@ -97,6 +117,11 @@ static inline const char *io_uring_get_opcode(u8 opcode)
|
|||
{
|
||||
return "";
|
||||
}
|
||||
static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -58,7 +58,7 @@ struct io_uring_task {
|
|||
|
||||
struct xarray xa;
|
||||
struct wait_queue_head wait;
|
||||
atomic_t in_idle;
|
||||
atomic_t in_cancel;
|
||||
atomic_t inflight_tracked;
|
||||
struct percpu_counter inflight;
|
||||
|
||||
|
@ -69,8 +69,8 @@ struct io_uring_task {
|
|||
};
|
||||
|
||||
struct io_uring {
|
||||
u32 head ____cacheline_aligned_in_smp;
|
||||
u32 tail ____cacheline_aligned_in_smp;
|
||||
u32 head;
|
||||
u32 tail;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -176,7 +176,6 @@ struct io_submit_state {
|
|||
unsigned short submit_nr;
|
||||
unsigned int cqes_count;
|
||||
struct blk_plug plug;
|
||||
struct io_uring_cqe cqes[16];
|
||||
};
|
||||
|
||||
struct io_ev_fd {
|
||||
|
@ -188,28 +187,34 @@ struct io_ev_fd {
|
|||
};
|
||||
|
||||
struct io_alloc_cache {
|
||||
struct hlist_head list;
|
||||
struct io_wq_work_node list;
|
||||
unsigned int nr_cached;
|
||||
unsigned int max_cached;
|
||||
size_t elem_size;
|
||||
};
|
||||
|
||||
struct io_ring_ctx {
|
||||
/* const or read-mostly hot data */
|
||||
struct {
|
||||
struct percpu_ref refs;
|
||||
|
||||
struct io_rings *rings;
|
||||
unsigned int flags;
|
||||
enum task_work_notify_mode notify_method;
|
||||
unsigned int compat: 1;
|
||||
unsigned int drain_next: 1;
|
||||
unsigned int restricted: 1;
|
||||
unsigned int off_timeout_used: 1;
|
||||
unsigned int drain_active: 1;
|
||||
unsigned int drain_disabled: 1;
|
||||
unsigned int has_evfd: 1;
|
||||
unsigned int syscall_iopoll: 1;
|
||||
/* all CQEs should be posted only by the submitter task */
|
||||
unsigned int task_complete: 1;
|
||||
unsigned int lockless_cq: 1;
|
||||
unsigned int syscall_iopoll: 1;
|
||||
unsigned int poll_activated: 1;
|
||||
unsigned int drain_disabled: 1;
|
||||
unsigned int compat: 1;
|
||||
|
||||
struct task_struct *submitter_task;
|
||||
struct io_rings *rings;
|
||||
struct percpu_ref refs;
|
||||
|
||||
enum task_work_notify_mode notify_method;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/* submission data */
|
||||
|
@ -237,7 +242,6 @@ struct io_ring_ctx {
|
|||
* uring_lock, and updated through io_uring_register(2)
|
||||
*/
|
||||
struct io_rsrc_node *rsrc_node;
|
||||
int rsrc_cached_refs;
|
||||
atomic_t cancel_seq;
|
||||
struct io_file_table file_table;
|
||||
unsigned nr_user_files;
|
||||
|
@ -248,32 +252,21 @@ struct io_ring_ctx {
|
|||
|
||||
struct io_buffer_list *io_bl;
|
||||
struct xarray io_bl_xa;
|
||||
struct list_head io_buffers_cache;
|
||||
|
||||
struct io_hash_table cancel_table_locked;
|
||||
struct list_head cq_overflow_list;
|
||||
struct io_alloc_cache apoll_cache;
|
||||
struct io_alloc_cache netmsg_cache;
|
||||
|
||||
/*
|
||||
* ->iopoll_list is protected by the ctx->uring_lock for
|
||||
* io_uring instances that don't use IORING_SETUP_SQPOLL.
|
||||
* For SQPOLL, only the single threaded io_sq_thread() will
|
||||
* manipulate the list, hence no extra locking is needed there.
|
||||
*/
|
||||
struct io_wq_work_list iopoll_list;
|
||||
bool poll_multi_queue;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/* IRQ completion list, under ->completion_lock */
|
||||
struct io_wq_work_list locked_free_list;
|
||||
unsigned int locked_free_nr;
|
||||
|
||||
const struct cred *sq_creds; /* cred used for __io_sq_thread() */
|
||||
struct io_sq_data *sq_data; /* if using sq thread polling */
|
||||
|
||||
struct wait_queue_head sqo_sq_wait;
|
||||
struct list_head sqd_list;
|
||||
|
||||
unsigned long check_cq;
|
||||
|
||||
unsigned int file_alloc_start;
|
||||
unsigned int file_alloc_end;
|
||||
|
||||
struct xarray personalities;
|
||||
u32 pers_next;
|
||||
|
||||
struct {
|
||||
/*
|
||||
* We cache a range of free CQEs we can use, once exhausted it
|
||||
|
@ -285,54 +278,69 @@ struct io_ring_ctx {
|
|||
unsigned cached_cq_tail;
|
||||
unsigned cq_entries;
|
||||
struct io_ev_fd __rcu *io_ev_fd;
|
||||
struct wait_queue_head cq_wait;
|
||||
unsigned cq_extra;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/*
|
||||
* task_work and async notification delivery cacheline. Expected to
|
||||
* regularly bounce b/w CPUs.
|
||||
*/
|
||||
struct {
|
||||
spinlock_t completion_lock;
|
||||
|
||||
bool poll_multi_queue;
|
||||
|
||||
/*
|
||||
* ->iopoll_list is protected by the ctx->uring_lock for
|
||||
* io_uring instances that don't use IORING_SETUP_SQPOLL.
|
||||
* For SQPOLL, only the single threaded io_sq_thread() will
|
||||
* manipulate the list, hence no extra locking is needed there.
|
||||
*/
|
||||
struct io_wq_work_list iopoll_list;
|
||||
struct io_hash_table cancel_table;
|
||||
|
||||
struct llist_head work_llist;
|
||||
|
||||
struct list_head io_buffers_comp;
|
||||
unsigned long check_cq;
|
||||
atomic_t cq_wait_nr;
|
||||
atomic_t cq_timeouts;
|
||||
struct wait_queue_head cq_wait;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/* timeouts */
|
||||
struct {
|
||||
spinlock_t timeout_lock;
|
||||
atomic_t cq_timeouts;
|
||||
struct list_head timeout_list;
|
||||
struct list_head ltimeout_list;
|
||||
unsigned cq_last_tm_flush;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/* Keep this last, we don't need it for the fast path */
|
||||
struct io_uring_cqe completion_cqes[16];
|
||||
|
||||
spinlock_t completion_lock;
|
||||
|
||||
/* IRQ completion list, under ->completion_lock */
|
||||
struct io_wq_work_list locked_free_list;
|
||||
unsigned int locked_free_nr;
|
||||
|
||||
struct list_head io_buffers_comp;
|
||||
struct list_head cq_overflow_list;
|
||||
struct io_hash_table cancel_table;
|
||||
|
||||
const struct cred *sq_creds; /* cred used for __io_sq_thread() */
|
||||
struct io_sq_data *sq_data; /* if using sq thread polling */
|
||||
|
||||
struct wait_queue_head sqo_sq_wait;
|
||||
struct list_head sqd_list;
|
||||
|
||||
unsigned int file_alloc_start;
|
||||
unsigned int file_alloc_end;
|
||||
|
||||
struct xarray personalities;
|
||||
u32 pers_next;
|
||||
|
||||
struct list_head io_buffers_cache;
|
||||
|
||||
/* Keep this last, we don't need it for the fast path */
|
||||
struct wait_queue_head poll_wq;
|
||||
struct io_restriction restrictions;
|
||||
struct task_struct *submitter_task;
|
||||
|
||||
/* slow path rsrc auxilary data, used by update/register */
|
||||
struct io_rsrc_node *rsrc_backup_node;
|
||||
struct io_mapped_ubuf *dummy_ubuf;
|
||||
struct io_rsrc_data *file_data;
|
||||
struct io_rsrc_data *buf_data;
|
||||
|
||||
struct delayed_work rsrc_put_work;
|
||||
struct callback_head rsrc_put_tw;
|
||||
struct llist_head rsrc_put_llist;
|
||||
/* protected by ->uring_lock */
|
||||
struct list_head rsrc_ref_list;
|
||||
spinlock_t rsrc_ref_lock;
|
||||
struct io_alloc_cache rsrc_node_cache;
|
||||
struct wait_queue_head rsrc_quiesce_wq;
|
||||
unsigned rsrc_quiesce;
|
||||
|
||||
struct list_head io_buffers_pages;
|
||||
|
||||
|
@ -357,10 +365,25 @@ struct io_ring_ctx {
|
|||
u32 iowq_limits[2];
|
||||
bool iowq_limits_set;
|
||||
|
||||
struct callback_head poll_wq_task_work;
|
||||
struct list_head defer_list;
|
||||
unsigned sq_thread_idle;
|
||||
/* protected by ->completion_lock */
|
||||
unsigned evfd_last_cq_tail;
|
||||
|
||||
/*
|
||||
* If IORING_SETUP_NO_MMAP is used, then the below holds
|
||||
* the gup'ed pages for the two rings, and the sqes.
|
||||
*/
|
||||
unsigned short n_ring_pages;
|
||||
unsigned short n_sqe_pages;
|
||||
struct page **ring_pages;
|
||||
struct page **sqe_pages;
|
||||
};
|
||||
|
||||
struct io_tw_state {
|
||||
/* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */
|
||||
bool locked;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -391,7 +414,6 @@ enum {
|
|||
REQ_F_SINGLE_POLL_BIT,
|
||||
REQ_F_DOUBLE_POLL_BIT,
|
||||
REQ_F_PARTIAL_IO_BIT,
|
||||
REQ_F_CQE32_INIT_BIT,
|
||||
REQ_F_APOLL_MULTISHOT_BIT,
|
||||
REQ_F_CLEAR_POLLIN_BIT,
|
||||
REQ_F_HASH_LOCKED_BIT,
|
||||
|
@ -461,15 +483,13 @@ enum {
|
|||
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
|
||||
/* fast poll multishot mode */
|
||||
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
|
||||
/* ->extra1 and ->extra2 are initialised */
|
||||
REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
|
||||
/* recvmsg special flag, clear EPOLLIN */
|
||||
REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
|
||||
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
|
||||
REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
|
||||
};
|
||||
|
||||
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
|
||||
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
|
||||
struct io_task_work {
|
||||
struct llist_node node;
|
||||
|
@ -559,14 +579,9 @@ struct io_kiocb {
|
|||
atomic_t refs;
|
||||
atomic_t poll_refs;
|
||||
struct io_task_work io_task_work;
|
||||
unsigned nr_tw;
|
||||
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
|
||||
union {
|
||||
struct hlist_node hash_node;
|
||||
struct {
|
||||
u64 extra1;
|
||||
u64 extra2;
|
||||
};
|
||||
};
|
||||
struct hlist_node hash_node;
|
||||
/* internal polling, see IORING_FEAT_FAST_POLL */
|
||||
struct async_poll *apoll;
|
||||
/* opcode allocated if it needs to store data for async defer */
|
||||
|
@ -576,6 +591,11 @@ struct io_kiocb {
|
|||
/* custom credentials, valid IFF REQ_F_CREDS is set */
|
||||
const struct cred *creds;
|
||||
struct io_wq_work work;
|
||||
|
||||
struct {
|
||||
u64 extra1;
|
||||
u64 extra2;
|
||||
} big_cqe;
|
||||
};
|
||||
|
||||
struct io_overflow_cqe {
|
||||
|
|
|
@ -1279,6 +1279,21 @@ static inline bool is_cow_mapping(vm_flags_t flags)
|
|||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_MMU
|
||||
static inline bool is_nommu_shared_mapping(vm_flags_t flags)
|
||||
{
|
||||
/*
|
||||
* NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
|
||||
* R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
|
||||
* a file mapping. R/O MAP_PRIVATE mappings might still modify
|
||||
* underlying memory if ptrace is active, so this is only possible if
|
||||
* ptrace does not apply. Note that there is no mprotect() to upgrade
|
||||
* write permissions later.
|
||||
*/
|
||||
return flags & VM_MAYSHARE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define SECTION_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
|
|
@ -18,10 +18,11 @@ static inline int ip_mroute_opt(int opt)
|
|||
|
||||
int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
|
||||
int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
|
||||
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
|
||||
int ipmr_ioctl(struct sock *sk, int cmd, void *arg);
|
||||
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
|
||||
int ip_mr_init(void);
|
||||
bool ipmr_rule_default(const struct fib_rule *rule);
|
||||
int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
|
||||
#else
|
||||
static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
|
@ -35,7 +36,7 @@ static inline int ip_mroute_getsockopt(struct sock *sk, int optname,
|
|||
return -ENOPROTOOPT;
|
||||
}
|
||||
|
||||
static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
static inline int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
|
||||
{
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
@ -54,6 +55,12 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
|
|||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define VIFF_STATIC 0x8000
|
||||
|
|
|
@ -29,10 +29,10 @@ struct sock;
|
|||
extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
|
||||
extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
|
||||
extern int ip6_mr_input(struct sk_buff *skb);
|
||||
extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
|
||||
extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
|
||||
extern int ip6_mr_init(void);
|
||||
extern void ip6_mr_cleanup(void);
|
||||
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg);
|
||||
#else
|
||||
static inline int ip6_mroute_setsockopt(struct sock *sock, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
|
@ -48,7 +48,7 @@ int ip6_mroute_getsockopt(struct sock *sock,
|
|||
}
|
||||
|
||||
static inline
|
||||
int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
|
||||
{
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
@ -100,6 +100,27 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
|
|||
#ifdef CONFIG_IPV6_MROUTE
|
||||
bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
|
||||
extern int ip6mr_sk_done(struct sock *sk);
|
||||
static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
/* These userspace buffers will be consumed by ip6mr_ioctl() */
|
||||
case SIOCGETMIFCNT_IN6: {
|
||||
struct sioc_mif_req6 buffer;
|
||||
|
||||
return sock_ioctl_inout(sk, cmd, arg, &buffer,
|
||||
sizeof(buffer));
|
||||
}
|
||||
case SIOCGETSGCNT_IN6: {
|
||||
struct sioc_sg_req6 buffer;
|
||||
|
||||
return sock_ioctl_inout(sk, cmd, arg, &buffer,
|
||||
sizeof(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
|
||||
{
|
||||
|
@ -109,5 +130,11 @@ static inline int ip6mr_sk_done(struct sock *sk)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -320,6 +320,7 @@ struct ucred {
|
|||
*/
|
||||
|
||||
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
|
||||
#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */
|
||||
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
|
||||
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
|
||||
descriptor received through
|
||||
|
@ -330,6 +331,8 @@ struct ucred {
|
|||
#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
|
||||
#endif
|
||||
|
||||
/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
|
||||
#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES)
|
||||
|
||||
/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
|
||||
#define SOL_IP 0
|
||||
|
|
|
@ -49,7 +49,8 @@ struct iov_iter {
|
|||
};
|
||||
size_t count;
|
||||
union {
|
||||
const struct iovec *iov;
|
||||
/* use iter_iov() to get the current vec */
|
||||
const struct iovec *__iov;
|
||||
const struct kvec *kvec;
|
||||
const struct bio_vec *bvec;
|
||||
struct xarray *xarray;
|
||||
|
@ -66,6 +67,10 @@ struct iov_iter {
|
|||
};
|
||||
};
|
||||
|
||||
#define iter_iov(iter) (iter)->__iov
|
||||
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset)
|
||||
#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
|
||||
|
||||
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
|
||||
{
|
||||
return i->iter_type;
|
||||
|
@ -141,15 +146,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
|
||||
{
|
||||
return (struct iovec) {
|
||||
.iov_base = iter->iov->iov_base + iter->iov_offset,
|
||||
.iov_len = min(iter->count,
|
||||
iter->iov->iov_len - iter->iov_offset),
|
||||
};
|
||||
}
|
||||
|
||||
size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
|
||||
size_t bytes, struct iov_iter *i);
|
||||
void iov_iter_advance(struct iov_iter *i, size_t bytes);
|
||||
|
@ -343,6 +339,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
|
|||
struct iov_iter *i, bool compat);
|
||||
int import_single_range(int type, void __user *buf, size_t len,
|
||||
struct iovec *iov, struct iov_iter *i);
|
||||
int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i);
|
||||
|
||||
static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
|
||||
void __user *buf, size_t count)
|
||||
|
|
|
@ -109,4 +109,25 @@ void phonet_sysctl_exit(void);
|
|||
int isi_register(void);
|
||||
void isi_unregister(void);
|
||||
|
||||
static inline bool sk_is_phonet(struct sock *sk)
|
||||
{
|
||||
return sk->sk_family == PF_PHONET;
|
||||
}
|
||||
|
||||
static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
int karg;
|
||||
|
||||
switch (cmd) {
|
||||
case SIOCPNADDRESOURCE:
|
||||
case SIOCPNDELRESOURCE:
|
||||
if (get_user(karg, (int __user *)arg))
|
||||
return -EFAULT;
|
||||
|
||||
return sk->sk_prot->ioctl(sk, cmd, &karg);
|
||||
}
|
||||
/* A positive return value means that the ioctl was not processed */
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1228,7 +1228,7 @@ struct proto {
|
|||
bool kern);
|
||||
|
||||
int (*ioctl)(struct sock *sk, int cmd,
|
||||
unsigned long arg);
|
||||
int *karg);
|
||||
int (*init)(struct sock *sk);
|
||||
void (*destroy)(struct sock *sk);
|
||||
void (*shutdown)(struct sock *sk, int how);
|
||||
|
@ -2972,6 +2972,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
|
|||
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
|
||||
sockptr_t optval, int optlen, bool old_timeval);
|
||||
|
||||
int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg, void *karg, size_t size);
|
||||
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
|
||||
static inline bool sk_is_readable(struct sock *sk)
|
||||
{
|
||||
if (sk->sk_prot->sock_is_readable)
|
||||
|
|
|
@ -342,7 +342,7 @@ void tcp_release_cb(struct sock *sk);
|
|||
void tcp_wfree(struct sk_buff *skb);
|
||||
void tcp_write_timer_handler(struct sock *sk);
|
||||
void tcp_delack_timer_handler(struct sock *sk);
|
||||
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
|
||||
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
|
||||
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rcv_space_adjust(struct sock *sk);
|
||||
|
|
|
@ -284,7 +284,7 @@ void udp_flush_pending_frames(struct sock *sk);
|
|||
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
|
||||
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
|
||||
int udp_rcv(struct sk_buff *skb);
|
||||
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
|
||||
int udp_ioctl(struct sock *sk, int cmd, int *karg);
|
||||
int udp_init_sock(struct sock *sk);
|
||||
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
|
||||
int __udp_disconnect(struct sock *sk, int flags);
|
||||
|
|
|
@ -360,19 +360,18 @@ TRACE_EVENT(io_uring_complete,
|
|||
);
|
||||
|
||||
/**
|
||||
* io_uring_submit_sqe - called before submitting one SQE
|
||||
* io_uring_submit_req - called before submitting a request
|
||||
*
|
||||
* @req: pointer to a submitted request
|
||||
* @force_nonblock: whether a context blocking or not
|
||||
*
|
||||
* Allows to track SQE submitting, to understand what was the source of it, SQ
|
||||
* thread or io_uring_enter call.
|
||||
*/
|
||||
TRACE_EVENT(io_uring_submit_sqe,
|
||||
TRACE_EVENT(io_uring_submit_req,
|
||||
|
||||
TP_PROTO(struct io_kiocb *req, bool force_nonblock),
|
||||
TP_PROTO(struct io_kiocb *req),
|
||||
|
||||
TP_ARGS(req, force_nonblock),
|
||||
TP_ARGS(req),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( void *, ctx )
|
||||
|
@ -380,7 +379,6 @@ TRACE_EVENT(io_uring_submit_sqe,
|
|||
__field( unsigned long long, user_data )
|
||||
__field( u8, opcode )
|
||||
__field( u32, flags )
|
||||
__field( bool, force_nonblock )
|
||||
__field( bool, sq_thread )
|
||||
|
||||
__string( op_str, io_uring_get_opcode(req->opcode) )
|
||||
|
@ -392,16 +390,15 @@ TRACE_EVENT(io_uring_submit_sqe,
|
|||
__entry->user_data = req->cqe.user_data;
|
||||
__entry->opcode = req->opcode;
|
||||
__entry->flags = req->flags;
|
||||
__entry->force_nonblock = force_nonblock;
|
||||
__entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL;
|
||||
|
||||
__assign_str(op_str, io_uring_get_opcode(req->opcode));
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
|
||||
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
|
||||
"sq_thread %d", __entry->ctx, __entry->req,
|
||||
__entry->user_data, __get_str(op_str),
|
||||
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
|
||||
__entry->flags, __entry->sq_thread)
|
||||
);
|
||||
|
||||
/*
|
||||
|
|
|
@ -173,6 +173,23 @@ enum {
|
|||
*/
|
||||
#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
|
||||
|
||||
/*
|
||||
* Application provides the memory for the rings
|
||||
*/
|
||||
#define IORING_SETUP_NO_MMAP (1U << 14)
|
||||
|
||||
/*
|
||||
* Register the ring fd in itself for use with
|
||||
* IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
|
||||
* than an fd.
|
||||
*/
|
||||
#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
|
||||
|
||||
/*
|
||||
* Removes indirection through the SQ index array.
|
||||
*/
|
||||
#define IORING_SETUP_NO_SQARRAY (1U << 16)
|
||||
|
||||
enum io_uring_op {
|
||||
IORING_OP_NOP,
|
||||
IORING_OP_READV,
|
||||
|
@ -252,6 +269,7 @@ enum io_uring_op {
|
|||
#define IORING_TIMEOUT_REALTIME (1U << 3)
|
||||
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
|
||||
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
|
||||
#define IORING_TIMEOUT_MULTISHOT (1U << 6)
|
||||
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
|
||||
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
|
||||
/*
|
||||
|
@ -286,11 +304,15 @@ enum io_uring_op {
|
|||
* request 'user_data'
|
||||
* IORING_ASYNC_CANCEL_ANY Match any request
|
||||
* IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor
|
||||
* IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key
|
||||
* IORING_ASYNC_CANCEL_OP Match request based on opcode
|
||||
*/
|
||||
#define IORING_ASYNC_CANCEL_ALL (1U << 0)
|
||||
#define IORING_ASYNC_CANCEL_FD (1U << 1)
|
||||
#define IORING_ASYNC_CANCEL_ANY (1U << 2)
|
||||
#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
|
||||
#define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
|
||||
#define IORING_ASYNC_CANCEL_OP (1U << 5)
|
||||
|
||||
/*
|
||||
* send/sendmsg and recv/recvmsg flags (sqe->ioprio)
|
||||
|
@ -349,6 +371,8 @@ enum {
|
|||
* applicable for IORING_MSG_DATA, obviously.
|
||||
*/
|
||||
#define IORING_MSG_RING_CQE_SKIP (1U << 0)
|
||||
/* Pass through the flags from sqe->file_index to cqe->flags */
|
||||
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
|
||||
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
|
@ -389,6 +413,9 @@ enum {
|
|||
#define IORING_OFF_SQ_RING 0ULL
|
||||
#define IORING_OFF_CQ_RING 0x8000000ULL
|
||||
#define IORING_OFF_SQES 0x10000000ULL
|
||||
#define IORING_OFF_PBUF_RING 0x80000000ULL
|
||||
#define IORING_OFF_PBUF_SHIFT 16
|
||||
#define IORING_OFF_MMAP_MASK 0xf8000000ULL
|
||||
|
||||
/*
|
||||
* Filled with the offset for mmap(2)
|
||||
|
@ -402,7 +429,7 @@ struct io_sqring_offsets {
|
|||
__u32 dropped;
|
||||
__u32 array;
|
||||
__u32 resv1;
|
||||
__u64 resv2;
|
||||
__u64 user_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -421,7 +448,7 @@ struct io_cqring_offsets {
|
|||
__u32 cqes;
|
||||
__u32 flags;
|
||||
__u32 resv1;
|
||||
__u64 resv2;
|
||||
__u64 user_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -472,6 +499,7 @@ struct io_uring_params {
|
|||
#define IORING_FEAT_RSRC_TAGS (1U << 10)
|
||||
#define IORING_FEAT_CQE_SKIP (1U << 11)
|
||||
#define IORING_FEAT_LINKED_FILE (1U << 12)
|
||||
#define IORING_FEAT_REG_REG_RING (1U << 13)
|
||||
|
||||
/*
|
||||
* io_uring_register(2) opcodes and arguments
|
||||
|
@ -519,7 +547,10 @@ enum {
|
|||
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
/* flag added to the opcode to use a registered ring fd */
|
||||
IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
|
||||
};
|
||||
|
||||
/* io-wq worker categories */
|
||||
|
@ -564,19 +595,6 @@ struct io_uring_rsrc_update2 {
|
|||
__u32 resv2;
|
||||
};
|
||||
|
||||
struct io_uring_notification_slot {
|
||||
__u64 tag;
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
struct io_uring_notification_register {
|
||||
__u32 nr_slots;
|
||||
__u32 resv;
|
||||
__u64 resv2;
|
||||
__u64 data;
|
||||
__u64 resv3;
|
||||
};
|
||||
|
||||
/* Skip updating fd indexes set to this value in the fd table */
|
||||
#define IORING_REGISTER_FILES_SKIP (-2)
|
||||
|
||||
|
@ -631,12 +649,26 @@ struct io_uring_buf_ring {
|
|||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags for IORING_REGISTER_PBUF_RING.
|
||||
*
|
||||
* IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring.
|
||||
* The application must not set a ring_addr in struct
|
||||
* io_uring_buf_reg, instead it must subsequently call
|
||||
* mmap(2) with the offset set as:
|
||||
* IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
|
||||
* to get a virtual mapping for the ring.
|
||||
*/
|
||||
enum {
|
||||
IOU_PBUF_RING_MMAP = 1,
|
||||
};
|
||||
|
||||
/* argument for IORING_(UN)REGISTER_PBUF_RING */
|
||||
struct io_uring_buf_reg {
|
||||
__u64 ring_addr;
|
||||
__u32 ring_entries;
|
||||
__u16 bgid;
|
||||
__u16 pad;
|
||||
__u16 flags;
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
|
@ -674,7 +706,9 @@ struct io_uring_sync_cancel_reg {
|
|||
__s32 fd;
|
||||
__u32 flags;
|
||||
struct __kernel_timespec timeout;
|
||||
__u64 pad[4];
|
||||
__u8 opcode;
|
||||
__u8 pad[7];
|
||||
__u64 pad2[3];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -694,6 +728,14 @@ struct io_uring_recvmsg_out {
|
|||
__u32 flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* Argument for IORING_OP_URING_CMD when file is a socket
|
||||
*/
|
||||
enum {
|
||||
SOCKET_URING_OP_SIOCINQ = 0,
|
||||
SOCKET_URING_OP_SIOCOUTQ,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -39,6 +39,7 @@ int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
ma->addr = READ_ONCE(sqe->addr);
|
||||
ma->len = READ_ONCE(sqe->len);
|
||||
ma->advice = READ_ONCE(sqe->fadvise_advice);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -51,8 +52,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_madvise *ma = io_kiocb_to_cmd(req, struct io_madvise);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
|
||||
io_req_set_res(req, ret, 0);
|
||||
|
@ -62,6 +62,18 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
|
|||
#endif
|
||||
}
|
||||
|
||||
static bool io_fadvise_force_async(struct io_fadvise *fa)
|
||||
{
|
||||
switch (fa->advice) {
|
||||
case POSIX_FADV_NORMAL:
|
||||
case POSIX_FADV_RANDOM:
|
||||
case POSIX_FADV_SEQUENTIAL:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
|
||||
|
@ -72,6 +84,8 @@ int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
fa->offset = READ_ONCE(sqe->off);
|
||||
fa->len = READ_ONCE(sqe->len);
|
||||
fa->advice = READ_ONCE(sqe->fadvise_advice);
|
||||
if (io_fadvise_force_async(fa))
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -80,16 +94,7 @@ int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
switch (fa->advice) {
|
||||
case POSIX_FADV_NORMAL:
|
||||
case POSIX_FADV_RANDOM:
|
||||
case POSIX_FADV_SEQUENTIAL:
|
||||
break;
|
||||
default:
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK && io_fadvise_force_async(fa));
|
||||
|
||||
ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
|
||||
if (ret < 0)
|
||||
|
|
|
@ -7,47 +7,60 @@
|
|||
#define IO_ALLOC_CACHE_MAX 512
|
||||
|
||||
struct io_cache_entry {
|
||||
struct hlist_node node;
|
||||
struct io_wq_work_node node;
|
||||
};
|
||||
|
||||
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
|
||||
struct io_cache_entry *entry)
|
||||
{
|
||||
if (cache->nr_cached < IO_ALLOC_CACHE_MAX) {
|
||||
if (cache->nr_cached < cache->max_cached) {
|
||||
cache->nr_cached++;
|
||||
hlist_add_head(&entry->node, &cache->list);
|
||||
wq_stack_add_head(&entry->node, &cache->list);
|
||||
/* KASAN poisons object */
|
||||
kasan_slab_free_mempool(entry);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache)
|
||||
{
|
||||
return !cache->list.next;
|
||||
}
|
||||
|
||||
static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache)
|
||||
{
|
||||
if (!hlist_empty(&cache->list)) {
|
||||
struct hlist_node *node = cache->list.first;
|
||||
if (cache->list.next) {
|
||||
struct io_cache_entry *entry;
|
||||
|
||||
hlist_del(node);
|
||||
entry = container_of(cache->list.next, struct io_cache_entry, node);
|
||||
kasan_unpoison_range(entry, cache->elem_size);
|
||||
cache->list.next = cache->list.next->next;
|
||||
cache->nr_cached--;
|
||||
return container_of(node, struct io_cache_entry, node);
|
||||
return entry;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void io_alloc_cache_init(struct io_alloc_cache *cache)
|
||||
static inline void io_alloc_cache_init(struct io_alloc_cache *cache,
|
||||
unsigned max_nr, size_t size)
|
||||
{
|
||||
INIT_HLIST_HEAD(&cache->list);
|
||||
cache->list.next = NULL;
|
||||
cache->nr_cached = 0;
|
||||
cache->max_cached = max_nr;
|
||||
cache->elem_size = size;
|
||||
}
|
||||
|
||||
static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
|
||||
void (*free)(struct io_cache_entry *))
|
||||
{
|
||||
while (!hlist_empty(&cache->list)) {
|
||||
struct hlist_node *node = cache->list.first;
|
||||
while (1) {
|
||||
struct io_cache_entry *entry = io_alloc_cache_get(cache);
|
||||
|
||||
hlist_del(node);
|
||||
free(container_of(node, struct io_cache_entry, node));
|
||||
if (!entry)
|
||||
break;
|
||||
free(entry);
|
||||
}
|
||||
cache->nr_cached = 0;
|
||||
}
|
||||
|
|
|
@ -22,33 +22,54 @@ struct io_cancel {
|
|||
u64 addr;
|
||||
u32 flags;
|
||||
s32 fd;
|
||||
u8 opcode;
|
||||
};
|
||||
|
||||
#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
|
||||
IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED)
|
||||
IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
|
||||
IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
|
||||
|
||||
/*
|
||||
* Returns true if the request matches the criteria outlined by 'cd'.
|
||||
*/
|
||||
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
|
||||
{
|
||||
bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
|
||||
|
||||
if (req->ctx != cd->ctx)
|
||||
return false;
|
||||
|
||||
if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
|
||||
match_user_data = true;
|
||||
|
||||
if (cd->flags & IORING_ASYNC_CANCEL_ANY)
|
||||
goto check_seq;
|
||||
if (cd->flags & IORING_ASYNC_CANCEL_FD) {
|
||||
if (req->file != cd->file)
|
||||
return false;
|
||||
}
|
||||
if (cd->flags & IORING_ASYNC_CANCEL_OP) {
|
||||
if (req->opcode != cd->opcode)
|
||||
return false;
|
||||
}
|
||||
if (match_user_data && req->cqe.user_data != cd->data)
|
||||
return false;
|
||||
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
|
||||
check_seq:
|
||||
if (cd->seq == req->work.cancel_seq)
|
||||
return false;
|
||||
req->work.cancel_seq = cd->seq;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool io_cancel_cb(struct io_wq_work *work, void *data)
|
||||
{
|
||||
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
||||
struct io_cancel_data *cd = data;
|
||||
|
||||
if (req->ctx != cd->ctx)
|
||||
return false;
|
||||
if (cd->flags & IORING_ASYNC_CANCEL_ANY) {
|
||||
;
|
||||
} else if (cd->flags & IORING_ASYNC_CANCEL_FD) {
|
||||
if (req->file != cd->file)
|
||||
return false;
|
||||
} else {
|
||||
if (req->cqe.user_data != cd->data)
|
||||
return false;
|
||||
}
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
|
||||
if (cd->seq == req->work.cancel_seq)
|
||||
return false;
|
||||
req->work.cancel_seq = cd->seq;
|
||||
}
|
||||
return true;
|
||||
return io_cancel_req_match(req, cd);
|
||||
}
|
||||
|
||||
static int io_async_cancel_one(struct io_uring_task *tctx,
|
||||
|
@ -111,7 +132,7 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
|
||||
if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
|
||||
return -EINVAL;
|
||||
if (sqe->off || sqe->len || sqe->splice_fd_in)
|
||||
if (sqe->off || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
|
||||
cancel->addr = READ_ONCE(sqe->addr);
|
||||
|
@ -123,6 +144,11 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return -EINVAL;
|
||||
cancel->fd = READ_ONCE(sqe->fd);
|
||||
}
|
||||
if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
|
||||
if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
|
||||
return -EINVAL;
|
||||
cancel->opcode = READ_ONCE(sqe->len);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -169,6 +195,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
|
|||
.ctx = req->ctx,
|
||||
.data = cancel->addr,
|
||||
.flags = cancel->flags,
|
||||
.opcode = cancel->opcode,
|
||||
.seq = atomic_inc_return(&req->ctx->cancel_seq),
|
||||
};
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
|
@ -216,13 +243,10 @@ static int __io_sync_cancel(struct io_uring_task *tctx,
|
|||
/* fixed must be grabbed every time since we drop the uring_lock */
|
||||
if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
|
||||
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
|
||||
unsigned long file_ptr;
|
||||
|
||||
if (unlikely(fd >= ctx->nr_user_files))
|
||||
return -EBADF;
|
||||
fd = array_index_nospec(fd, ctx->nr_user_files);
|
||||
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
|
||||
cd->file = (struct file *) (file_ptr & FFS_MASK);
|
||||
cd->file = io_file_from_index(&ctx->file_table, fd);
|
||||
if (!cd->file)
|
||||
return -EBADF;
|
||||
}
|
||||
|
@ -241,17 +265,22 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
|
|||
struct io_uring_sync_cancel_reg sc;
|
||||
struct fd f = { };
|
||||
DEFINE_WAIT(wait);
|
||||
int ret;
|
||||
int ret, i;
|
||||
|
||||
if (copy_from_user(&sc, arg, sizeof(sc)))
|
||||
return -EFAULT;
|
||||
if (sc.flags & ~CANCEL_FLAGS)
|
||||
return -EINVAL;
|
||||
if (sc.pad[0] || sc.pad[1] || sc.pad[2] || sc.pad[3])
|
||||
return -EINVAL;
|
||||
for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
|
||||
if (sc.pad[i])
|
||||
return -EINVAL;
|
||||
for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
|
||||
if (sc.pad2[i])
|
||||
return -EINVAL;
|
||||
|
||||
cd.data = sc.addr;
|
||||
cd.flags = sc.flags;
|
||||
cd.opcode = sc.opcode;
|
||||
|
||||
/* we can grab a normal file descriptor upfront */
|
||||
if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
|
||||
|
|
|
@ -8,11 +8,11 @@ struct io_cancel_data {
|
|||
u64 data;
|
||||
struct file *file;
|
||||
};
|
||||
u8 opcode;
|
||||
u32 flags;
|
||||
int seq;
|
||||
};
|
||||
|
||||
|
||||
int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
|
@ -21,3 +21,4 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
|
|||
void init_hash_table(struct io_hash_table *table, unsigned size);
|
||||
|
||||
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
|
||||
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);
|
||||
|
|
|
@ -48,10 +48,13 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
|
||||
struct seq_file *m)
|
||||
/*
|
||||
* Caller holds a reference to the file already, we don't need to do
|
||||
* anything else to get an extra reference.
|
||||
*/
|
||||
__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
|
||||
{
|
||||
struct io_sq_data *sq = NULL;
|
||||
struct io_ring_ctx *ctx = f->private_data;
|
||||
struct io_overflow_cqe *ocqe;
|
||||
struct io_rings *r = ctx->rings;
|
||||
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
|
||||
|
@ -62,6 +65,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
|
|||
unsigned int cq_shift = 0;
|
||||
unsigned int sq_shift = 0;
|
||||
unsigned int sq_entries, cq_entries;
|
||||
int sq_pid = -1, sq_cpu = -1;
|
||||
bool has_lock;
|
||||
unsigned int i;
|
||||
|
||||
|
@ -91,6 +95,8 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
|
|||
struct io_uring_sqe *sqe;
|
||||
unsigned int sq_idx;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_NO_SQARRAY)
|
||||
break;
|
||||
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
|
||||
if (sq_idx > sq_mask)
|
||||
continue;
|
||||
|
@ -139,13 +145,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
|
|||
has_lock = mutex_trylock(&ctx->uring_lock);
|
||||
|
||||
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
|
||||
sq = ctx->sq_data;
|
||||
if (!sq->thread)
|
||||
sq = NULL;
|
||||
struct io_sq_data *sq = ctx->sq_data;
|
||||
|
||||
if (mutex_trylock(&sq->lock)) {
|
||||
if (sq->thread) {
|
||||
sq_pid = task_pid_nr(sq->thread);
|
||||
sq_cpu = task_cpu(sq->thread);
|
||||
}
|
||||
mutex_unlock(&sq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
|
||||
seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
|
||||
seq_printf(m, "SqThread:\t%d\n", sq_pid);
|
||||
seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
|
||||
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
|
||||
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
|
||||
struct file *f = io_file_from_index(&ctx->file_table, i);
|
||||
|
@ -205,14 +217,4 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
|
|||
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
|
||||
{
|
||||
struct io_ring_ctx *ctx = f->private_data;
|
||||
|
||||
if (percpu_ref_tryget(&ctx->refs)) {
|
||||
__io_uring_show_fdinfo(ctx, m);
|
||||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
|||
u32 slot_index)
|
||||
__must_hold(&req->ctx->uring_lock)
|
||||
{
|
||||
bool needs_switch = false;
|
||||
struct io_fixed_file *file_slot;
|
||||
int ret;
|
||||
|
||||
|
@ -79,20 +78,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
|||
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
|
||||
|
||||
if (file_slot->file_ptr) {
|
||||
struct file *old_file;
|
||||
|
||||
ret = io_rsrc_node_switch_start(ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
|
||||
ctx->rsrc_node, old_file);
|
||||
io_slot_file(file_slot));
|
||||
if (ret)
|
||||
goto err;
|
||||
return ret;
|
||||
|
||||
file_slot->file_ptr = 0;
|
||||
io_file_bitmap_clear(&ctx->file_table, slot_index);
|
||||
needs_switch = true;
|
||||
}
|
||||
|
||||
ret = io_scm_file_account(ctx, file);
|
||||
|
@ -101,9 +93,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
|||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, slot_index);
|
||||
}
|
||||
err:
|
||||
if (needs_switch)
|
||||
io_rsrc_node_switch(ctx, ctx->file_data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -149,30 +138,25 @@ int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
|
|||
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
|
||||
{
|
||||
struct io_fixed_file *file_slot;
|
||||
struct file *file;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!ctx->file_data))
|
||||
return -ENXIO;
|
||||
if (offset >= ctx->nr_user_files)
|
||||
return -EINVAL;
|
||||
ret = io_rsrc_node_switch_start(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
offset = array_index_nospec(offset, ctx->nr_user_files);
|
||||
file_slot = io_fixed_file_slot(&ctx->file_table, offset);
|
||||
if (!file_slot->file_ptr)
|
||||
return -EBADF;
|
||||
|
||||
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
|
||||
ret = io_queue_rsrc_removal(ctx->file_data, offset,
|
||||
io_slot_file(file_slot));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
file_slot->file_ptr = 0;
|
||||
io_file_bitmap_clear(&ctx->file_table, offset);
|
||||
io_rsrc_node_switch(ctx, ctx->file_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,10 +5,6 @@
|
|||
#include <linux/file.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#define FFS_NOWAIT 0x1UL
|
||||
#define FFS_ISREG 0x2UL
|
||||
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
|
||||
|
||||
bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files);
|
||||
void io_free_file_tables(struct io_file_table *table);
|
||||
|
||||
|
@ -43,21 +39,31 @@ io_fixed_file_slot(struct io_file_table *table, unsigned i)
|
|||
return &table->files[i];
|
||||
}
|
||||
|
||||
#define FFS_NOWAIT 0x1UL
|
||||
#define FFS_ISREG 0x2UL
|
||||
#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
|
||||
|
||||
static inline unsigned int io_slot_flags(struct io_fixed_file *slot)
|
||||
{
|
||||
return (slot->file_ptr & ~FFS_MASK) << REQ_F_SUPPORT_NOWAIT_BIT;
|
||||
}
|
||||
|
||||
static inline struct file *io_slot_file(struct io_fixed_file *slot)
|
||||
{
|
||||
return (struct file *)(slot->file_ptr & FFS_MASK);
|
||||
}
|
||||
|
||||
static inline struct file *io_file_from_index(struct io_file_table *table,
|
||||
int index)
|
||||
{
|
||||
struct io_fixed_file *slot = io_fixed_file_slot(table, index);
|
||||
|
||||
return (struct file *) (slot->file_ptr & FFS_MASK);
|
||||
return io_slot_file(io_fixed_file_slot(table, index));
|
||||
}
|
||||
|
||||
static inline void io_fixed_file_set(struct io_fixed_file *file_slot,
|
||||
struct file *file)
|
||||
{
|
||||
unsigned long file_ptr = (unsigned long) file;
|
||||
|
||||
file_ptr |= io_file_get_flags(file);
|
||||
file_slot->file_ptr = file_ptr;
|
||||
file_slot->file_ptr = (unsigned long)file |
|
||||
(io_file_get_flags(file) >> REQ_F_SUPPORT_NOWAIT_BIT);
|
||||
}
|
||||
|
||||
static inline void io_reset_alloc_hint(struct io_ring_ctx *ctx)
|
||||
|
|
|
@ -74,6 +74,7 @@ int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -82,8 +83,7 @@ int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd,
|
||||
ren->newpath, ren->flags);
|
||||
|
@ -123,6 +123,7 @@ int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return PTR_ERR(un->filename);
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -131,8 +132,7 @@ int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
if (un->flags & AT_REMOVEDIR)
|
||||
ret = do_rmdir(un->dfd, un->filename);
|
||||
|
@ -170,6 +170,7 @@ int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return PTR_ERR(mkd->filename);
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -178,8 +179,7 @@ int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
|
||||
|
||||
|
@ -220,6 +220,7 @@ int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -228,8 +229,7 @@ int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_link *sl = io_kiocb_to_cmd(req, struct io_link);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
|
||||
|
||||
|
@ -243,7 +243,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
|
||||
const char __user *oldf, *newf;
|
||||
|
||||
if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
|
||||
if (sqe->buf_index || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
||||
return -EBADF;
|
||||
|
@ -265,6 +265,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -273,8 +274,7 @@ int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
|
||||
lnk->newpath, lnk->flags);
|
||||
|
|
602
io_uring/io-wq.c
602
io_uring/io-wq.c
File diff suppressed because it is too large
Load Diff
|
@ -50,8 +50,9 @@ void io_wq_put_and_exit(struct io_wq *wq);
|
|||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
||||
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
||||
|
||||
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
|
||||
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
|
||||
int io_wq_max_workers(struct io_wq *wq, int *new_count);
|
||||
bool io_wq_worker_stopped(void);
|
||||
|
||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||
{
|
||||
|
|
1502
io_uring/io_uring.c
1502
io_uring/io_uring.c
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,7 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <uapi/linux/eventpoll.h>
|
||||
#include "io-wq.h"
|
||||
|
@ -14,6 +15,17 @@
|
|||
#include <trace/events/io_uring.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
IOU_OK = 0,
|
||||
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
|
||||
|
@ -26,16 +38,13 @@ enum {
|
|||
IOU_STOP_MULTISHOT = -ECANCELED,
|
||||
};
|
||||
|
||||
struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow);
|
||||
bool io_req_cqe_overflow(struct io_kiocb *req);
|
||||
bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
|
||||
void io_req_cqe_overflow(struct io_kiocb *req);
|
||||
int io_run_task_work_sig(struct io_ring_ctx *ctx);
|
||||
int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
|
||||
int io_run_local_work(struct io_ring_ctx *ctx);
|
||||
void io_req_defer_failed(struct io_kiocb *req, s32 res);
|
||||
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
|
||||
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
|
||||
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
|
||||
bool allow_overflow);
|
||||
bool io_fill_cqe_req_aux(struct io_kiocb *req, bool defer, s32 res, u32 cflags);
|
||||
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
|
||||
|
||||
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
|
||||
|
@ -44,28 +53,26 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd);
|
|||
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned issue_flags);
|
||||
|
||||
static inline bool io_req_ffs_set(struct io_kiocb *req)
|
||||
{
|
||||
return req->flags & REQ_F_FIXED_FILE;
|
||||
}
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, bool allow_local);
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
void io_req_task_queue(struct io_kiocb *req);
|
||||
void io_queue_iowq(struct io_kiocb *req, bool *dont_use);
|
||||
void io_req_task_complete(struct io_kiocb *req, bool *locked);
|
||||
void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
|
||||
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void io_req_task_queue_fail(struct io_kiocb *req, int ret);
|
||||
void io_req_task_submit(struct io_kiocb *req, bool *locked);
|
||||
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void tctx_task_work(struct callback_head *cb);
|
||||
__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
|
||||
int io_uring_alloc_task_context(struct task_struct *task,
|
||||
struct io_ring_ctx *ctx);
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, bool *locked);
|
||||
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end);
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
|
||||
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
|
||||
void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
|
||||
void __io_submit_flush_completions(struct io_ring_ctx *ctx);
|
||||
int io_req_prep_async(struct io_kiocb *req);
|
||||
|
||||
struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
|
||||
|
@ -73,61 +80,73 @@ void io_wq_submit_work(struct io_wq_work *work);
|
|||
|
||||
void io_free_req(struct io_kiocb *req);
|
||||
void io_queue_next(struct io_kiocb *req);
|
||||
void __io_put_task(struct task_struct *task, int nr);
|
||||
void io_task_refs_refill(struct io_uring_task *tctx);
|
||||
bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
|
||||
|
||||
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
|
||||
bool cancel_all);
|
||||
|
||||
#define io_lockdep_assert_cq_locked(ctx) \
|
||||
do { \
|
||||
if (ctx->flags & IORING_SETUP_IOPOLL) { \
|
||||
lockdep_assert_held(&ctx->uring_lock); \
|
||||
} else if (!ctx->task_complete) { \
|
||||
lockdep_assert_held(&ctx->completion_lock); \
|
||||
} else if (ctx->submitter_task->flags & PF_EXITING) { \
|
||||
lockdep_assert(current_work()); \
|
||||
} else { \
|
||||
lockdep_assert(current == ctx->submitter_task); \
|
||||
} \
|
||||
} while (0)
|
||||
#if defined(CONFIG_PROVE_LOCKING)
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
lockdep_assert(in_task());
|
||||
|
||||
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
} else if (!ctx->task_complete) {
|
||||
lockdep_assert_held(&ctx->completion_lock);
|
||||
} else if (ctx->submitter_task) {
|
||||
/*
|
||||
* ->submitter_task may be NULL and we can still post a CQE,
|
||||
* if the ring has been setup with IORING_SETUP_R_DISABLED.
|
||||
* Not from an SQE, as those cannot be submitted, but via
|
||||
* updating tagged resources.
|
||||
*/
|
||||
if (ctx->submitter_task->flags & PF_EXITING)
|
||||
lockdep_assert(current_work());
|
||||
else
|
||||
lockdep_assert(current == ctx->submitter_task);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void io_req_task_work_add(struct io_kiocb *req)
|
||||
{
|
||||
__io_req_task_work_add(req, true);
|
||||
__io_req_task_work_add(req, 0);
|
||||
}
|
||||
|
||||
#define io_for_each_link(pos, head) \
|
||||
for (pos = (head); pos; pos = pos->link)
|
||||
|
||||
void io_cq_unlock_post(struct io_ring_ctx *ctx);
|
||||
|
||||
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
|
||||
bool overflow)
|
||||
static inline bool io_get_cqe_overflow(struct io_ring_ctx *ctx,
|
||||
struct io_uring_cqe **ret,
|
||||
bool overflow)
|
||||
{
|
||||
io_lockdep_assert_cq_locked(ctx);
|
||||
|
||||
if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
|
||||
struct io_uring_cqe *cqe = ctx->cqe_cached;
|
||||
|
||||
ctx->cached_cq_tail++;
|
||||
ctx->cqe_cached++;
|
||||
if (ctx->flags & IORING_SETUP_CQE32)
|
||||
ctx->cqe_cached++;
|
||||
return cqe;
|
||||
if (unlikely(ctx->cqe_cached >= ctx->cqe_sentinel)) {
|
||||
if (unlikely(!io_cqe_cache_refill(ctx, overflow)))
|
||||
return false;
|
||||
}
|
||||
|
||||
return __io_get_cqe(ctx, overflow);
|
||||
*ret = ctx->cqe_cached;
|
||||
ctx->cached_cq_tail++;
|
||||
ctx->cqe_cached++;
|
||||
if (ctx->flags & IORING_SETUP_CQE32)
|
||||
ctx->cqe_cached++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
|
||||
static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret)
|
||||
{
|
||||
return io_get_cqe_overflow(ctx, false);
|
||||
return io_get_cqe_overflow(ctx, ret, false);
|
||||
}
|
||||
|
||||
static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
|
||||
struct io_kiocb *req)
|
||||
static __always_inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
struct io_uring_cqe *cqe;
|
||||
|
||||
|
@ -136,39 +155,22 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
|
|||
* submission (by quite a lot). Increment the overflow count in
|
||||
* the ring.
|
||||
*/
|
||||
cqe = io_get_cqe(ctx);
|
||||
if (unlikely(!cqe))
|
||||
if (unlikely(!io_get_cqe(ctx, &cqe)))
|
||||
return false;
|
||||
|
||||
trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
|
||||
req->cqe.res, req->cqe.flags,
|
||||
(req->flags & REQ_F_CQE32_INIT) ? req->extra1 : 0,
|
||||
(req->flags & REQ_F_CQE32_INIT) ? req->extra2 : 0);
|
||||
if (trace_io_uring_complete_enabled())
|
||||
trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
|
||||
req->cqe.res, req->cqe.flags,
|
||||
req->big_cqe.extra1, req->big_cqe.extra2);
|
||||
|
||||
memcpy(cqe, &req->cqe, sizeof(*cqe));
|
||||
|
||||
if (ctx->flags & IORING_SETUP_CQE32) {
|
||||
u64 extra1 = 0, extra2 = 0;
|
||||
|
||||
if (req->flags & REQ_F_CQE32_INIT) {
|
||||
extra1 = req->extra1;
|
||||
extra2 = req->extra2;
|
||||
}
|
||||
|
||||
WRITE_ONCE(cqe->big_cqe[0], extra1);
|
||||
WRITE_ONCE(cqe->big_cqe[1], extra2);
|
||||
memcpy(cqe->big_cqe, &req->big_cqe, sizeof(*cqe));
|
||||
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
if (likely(__io_fill_cqe_req(ctx, req)))
|
||||
return true;
|
||||
return io_req_cqe_overflow(req);
|
||||
}
|
||||
|
||||
static inline void req_set_fail(struct io_kiocb *req)
|
||||
{
|
||||
req->flags |= REQ_F_FAIL;
|
||||
|
@ -189,10 +191,10 @@ static inline bool req_has_async_data(struct io_kiocb *req)
|
|||
return req->flags & REQ_F_ASYNC_DATA;
|
||||
}
|
||||
|
||||
static inline void io_put_file(struct file *file)
|
||||
static inline void io_put_file(struct io_kiocb *req)
|
||||
{
|
||||
if (file)
|
||||
fput(file);
|
||||
if (!(req->flags & REQ_F_FIXED_FILE) && req->file)
|
||||
fput(req->file);
|
||||
}
|
||||
|
||||
static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
|
||||
|
@ -223,8 +225,14 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx)
|
|||
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
|
||||
}
|
||||
|
||||
/* requires smb_mb() prior, see wq_has_sleeper() */
|
||||
static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
|
||||
static inline void io_poll_wq_wake(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (wq_has_sleeper(&ctx->poll_wq))
|
||||
__wake_up(&ctx->poll_wq, TASK_NORMAL, 0,
|
||||
poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
|
||||
}
|
||||
|
||||
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
|
||||
{
|
||||
/*
|
||||
* Trigger waitqueue handler on all waiters on our waitqueue. This
|
||||
|
@ -236,17 +244,11 @@ static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
|
|||
* waitqueue handlers, we know we have a dependency between eventfd or
|
||||
* epoll and should terminate multishot poll at that point.
|
||||
*/
|
||||
if (waitqueue_active(&ctx->cq_wait))
|
||||
if (wq_has_sleeper(&ctx->cq_wait))
|
||||
__wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
|
||||
poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
|
||||
}
|
||||
|
||||
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
|
||||
{
|
||||
smp_mb();
|
||||
__io_cqring_wake(ctx);
|
||||
}
|
||||
|
||||
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *r = ctx->rings;
|
||||
|
@ -257,9 +259,11 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
|
|||
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *rings = ctx->rings;
|
||||
unsigned int entries;
|
||||
|
||||
/* make sure SQ entry isn't read before tail */
|
||||
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
|
||||
entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
|
||||
return min(entries, ctx->sq_entries);
|
||||
}
|
||||
|
||||
static inline int io_run_task_work(void)
|
||||
|
@ -294,47 +298,11 @@ static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
|
|||
return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
|
||||
}
|
||||
|
||||
static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
|
||||
static inline void io_tw_lock(struct io_ring_ctx *ctx, struct io_tw_state *ts)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret2;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
|
||||
ret = io_run_local_work(ctx);
|
||||
|
||||
/* want to run this after in case more is added */
|
||||
ret2 = io_run_task_work();
|
||||
|
||||
/* Try propagate error in favour of if tasks were run,
|
||||
* but still make sure to run them if requested
|
||||
*/
|
||||
if (ret >= 0)
|
||||
ret += ret2;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
bool locked;
|
||||
int ret;
|
||||
|
||||
if (llist_empty(&ctx->work_llist))
|
||||
return 0;
|
||||
|
||||
locked = true;
|
||||
ret = __io_run_local_work(ctx, &locked);
|
||||
/* shouldn't happen! */
|
||||
if (WARN_ON_ONCE(!locked))
|
||||
if (!ts->locked) {
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
|
||||
{
|
||||
if (!*locked) {
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
*locked = true;
|
||||
ts->locked = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,19 +323,11 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
|
|||
|
||||
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (unlikely(ctx->off_timeout_used || ctx->drain_active || ctx->has_evfd))
|
||||
if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
|
||||
ctx->has_evfd || ctx->poll_activated))
|
||||
__io_commit_cqring_flush(ctx);
|
||||
}
|
||||
|
||||
/* must to be called somewhat shortly after putting a request */
|
||||
static inline void io_put_task(struct task_struct *task, int nr)
|
||||
{
|
||||
if (likely(task == current))
|
||||
task->io_uring->cached_refs += nr;
|
||||
else
|
||||
__io_put_task(task, nr);
|
||||
}
|
||||
|
||||
static inline void io_get_task_refs(int nr)
|
||||
{
|
||||
struct io_uring_task *tctx = current->io_uring;
|
||||
|
@ -382,19 +342,30 @@ static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
|
|||
return !ctx->submit_state.free_list.next;
|
||||
}
|
||||
|
||||
static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx)
|
||||
extern struct kmem_cache *req_cachep;
|
||||
|
||||
static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (unlikely(io_req_cache_empty(ctx)))
|
||||
return __io_alloc_req_refill(ctx);
|
||||
struct io_kiocb *req;
|
||||
|
||||
req = container_of(ctx->submit_state.free_list.next, struct io_kiocb, comp_list);
|
||||
wq_stack_extract(&ctx->submit_state.free_list);
|
||||
return req;
|
||||
}
|
||||
|
||||
static inline bool io_alloc_req(struct io_ring_ctx *ctx, struct io_kiocb **req)
|
||||
{
|
||||
if (unlikely(io_req_cache_empty(ctx))) {
|
||||
if (!__io_alloc_req_refill(ctx))
|
||||
return false;
|
||||
}
|
||||
*req = io_extract_req(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
|
||||
static inline bool io_allowed_defer_tw_run(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_wq_work_node *node;
|
||||
|
||||
node = wq_stack_extract(&ctx->submit_state.free_list);
|
||||
return container_of(node, struct io_kiocb, comp_list);
|
||||
return likely(ctx->submitter_task == current);
|
||||
}
|
||||
|
||||
static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx)
|
||||
|
@ -410,4 +381,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
|
|||
io_req_task_work_add(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
|
||||
* slot.
|
||||
*/
|
||||
static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (ctx->flags & IORING_SETUP_SQE128)
|
||||
return 2 * sizeof(struct io_uring_sqe);
|
||||
return sizeof(struct io_uring_sqe);
|
||||
}
|
||||
#endif
|
||||
|
|
167
io_uring/kbuf.c
167
io_uring/kbuf.c
|
@ -137,7 +137,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
|||
return NULL;
|
||||
|
||||
head &= bl->mask;
|
||||
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
|
||||
/* mmaped buffers are always contig */
|
||||
if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
|
||||
buf = &br->bufs[head];
|
||||
} else {
|
||||
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
|
||||
|
@ -179,7 +180,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
|||
|
||||
bl = io_buffer_get_list(ctx, req->buf_index);
|
||||
if (likely(bl)) {
|
||||
if (bl->buf_nr_pages)
|
||||
if (bl->is_mapped)
|
||||
ret = io_ring_buffer_select(req, len, bl, issue_flags);
|
||||
else
|
||||
ret = io_provided_buffer_select(req, len, bl);
|
||||
|
@ -214,17 +215,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
|
|||
if (!nbufs)
|
||||
return 0;
|
||||
|
||||
if (bl->buf_nr_pages) {
|
||||
int j;
|
||||
|
||||
if (bl->is_mapped) {
|
||||
i = bl->buf_ring->tail - bl->head;
|
||||
for (j = 0; j < bl->buf_nr_pages; j++)
|
||||
unpin_user_page(bl->buf_pages[j]);
|
||||
kvfree(bl->buf_pages);
|
||||
bl->buf_pages = NULL;
|
||||
bl->buf_nr_pages = 0;
|
||||
if (bl->is_mmap) {
|
||||
folio_put(virt_to_folio(bl->buf_ring));
|
||||
bl->buf_ring = NULL;
|
||||
bl->is_mmap = 0;
|
||||
} else if (bl->buf_nr_pages) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < bl->buf_nr_pages; j++)
|
||||
unpin_user_page(bl->buf_pages[j]);
|
||||
kvfree(bl->buf_pages);
|
||||
bl->buf_pages = NULL;
|
||||
bl->buf_nr_pages = 0;
|
||||
}
|
||||
/* make sure it's seen as empty */
|
||||
INIT_LIST_HEAD(&bl->buf_list);
|
||||
bl->is_mapped = 0;
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -304,7 +312,7 @@ int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (bl) {
|
||||
ret = -EINVAL;
|
||||
/* can't use provide/remove buffers command on mapped buffers */
|
||||
if (!bl->buf_nr_pages)
|
||||
if (!bl->is_mapped)
|
||||
ret = __io_remove_buffers(ctx, bl, p->nbufs);
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
@ -449,7 +457,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
|
|||
}
|
||||
}
|
||||
/* can't add buffers via this command for a mapped buffer ring */
|
||||
if (bl->buf_nr_pages) {
|
||||
if (bl->is_mapped) {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
@ -464,23 +472,98 @@ err:
|
|||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
struct io_uring_buf_ring *br;
|
||||
struct page **pages;
|
||||
int i, nr_pages;
|
||||
|
||||
pages = io_pin_pages(reg->ring_addr,
|
||||
flex_array_size(br, bufs, reg->ring_entries),
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
/*
|
||||
* Apparently some 32-bit boxes (ARM) will return highmem pages,
|
||||
* which then need to be mapped. We could support that, but it'd
|
||||
* complicate the code and slowdown the common cases quite a bit.
|
||||
* So just error out, returning -EINVAL just like we did on kernels
|
||||
* that didn't support mapped buffer rings.
|
||||
*/
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
if (PageHighMem(pages[i]))
|
||||
goto error_unpin;
|
||||
|
||||
br = page_address(pages[0]);
|
||||
#ifdef SHM_COLOUR
|
||||
/*
|
||||
* On platforms that have specific aliasing requirements, SHM_COLOUR
|
||||
* is set and we must guarantee that the kernel and user side align
|
||||
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
|
||||
* the application mmap's the provided ring buffer. Fail the request
|
||||
* if we, by chance, don't end up with aligned addresses. The app
|
||||
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
|
||||
* this transparently.
|
||||
*/
|
||||
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
|
||||
goto error_unpin;
|
||||
#endif
|
||||
bl->buf_pages = pages;
|
||||
bl->buf_nr_pages = nr_pages;
|
||||
bl->buf_ring = br;
|
||||
bl->is_mapped = 1;
|
||||
bl->is_mmap = 0;
|
||||
return 0;
|
||||
error_unpin:
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
unpin_user_page(pages[i]);
|
||||
kvfree(pages);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
|
||||
size_t ring_size;
|
||||
void *ptr;
|
||||
|
||||
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
|
||||
ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
|
||||
if (!ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
bl->buf_ring = ptr;
|
||||
bl->is_mapped = 1;
|
||||
bl->is_mmap = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
{
|
||||
struct io_uring_buf_reg reg;
|
||||
struct io_buffer_list *bl, *free_bl = NULL;
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
return -EINVAL;
|
||||
if (!reg.ring_addr)
|
||||
return -EFAULT;
|
||||
if (reg.ring_addr & ~PAGE_MASK)
|
||||
if (reg.flags & ~IOU_PBUF_RING_MMAP)
|
||||
return -EINVAL;
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
|
||||
if (!reg.ring_addr)
|
||||
return -EFAULT;
|
||||
if (reg.ring_addr & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (reg.ring_addr)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!is_power_of_2(reg.ring_entries))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -497,7 +580,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
bl = io_buffer_get_list(ctx, reg.bgid);
|
||||
if (bl) {
|
||||
/* if mapped buffer ring OR classic exists, don't allow */
|
||||
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
|
||||
if (bl->is_mapped || !list_empty(&bl->buf_list))
|
||||
return -EEXIST;
|
||||
} else {
|
||||
free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
|
||||
|
@ -505,22 +588,21 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pages = io_pin_pages(reg.ring_addr,
|
||||
flex_array_size(br, bufs, reg.ring_entries),
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages)) {
|
||||
kfree(free_bl);
|
||||
return PTR_ERR(pages);
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP))
|
||||
ret = io_pin_pbuf_ring(®, bl);
|
||||
else
|
||||
ret = io_alloc_pbuf_ring(®, bl);
|
||||
|
||||
if (!ret) {
|
||||
bl->nr_entries = reg.ring_entries;
|
||||
bl->mask = reg.ring_entries - 1;
|
||||
|
||||
io_buffer_add_list(ctx, bl, reg.bgid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
br = page_address(pages[0]);
|
||||
bl->buf_pages = pages;
|
||||
bl->buf_nr_pages = nr_pages;
|
||||
bl->nr_entries = reg.ring_entries;
|
||||
bl->buf_ring = br;
|
||||
bl->mask = reg.ring_entries - 1;
|
||||
io_buffer_add_list(ctx, bl, reg.bgid);
|
||||
return 0;
|
||||
kfree(free_bl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
|
@ -530,13 +612,15 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
return -EINVAL;
|
||||
if (reg.flags)
|
||||
return -EINVAL;
|
||||
|
||||
bl = io_buffer_get_list(ctx, reg.bgid);
|
||||
if (!bl)
|
||||
return -ENOENT;
|
||||
if (!bl->buf_nr_pages)
|
||||
if (!bl->is_mapped)
|
||||
return -EINVAL;
|
||||
|
||||
__io_remove_buffers(ctx, bl, -1U);
|
||||
|
@ -546,3 +630,14 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
|
||||
{
|
||||
struct io_buffer_list *bl;
|
||||
|
||||
bl = io_buffer_get_list(ctx, bgid);
|
||||
if (!bl || !bl->is_mmap)
|
||||
return NULL;
|
||||
|
||||
return bl->buf_ring;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,11 @@ struct io_buffer_list {
|
|||
__u16 nr_entries;
|
||||
__u16 head;
|
||||
__u16 mask;
|
||||
|
||||
/* ring mapped provided buffers */
|
||||
__u8 is_mapped;
|
||||
/* ring mapped provided buffers, but mmap'ed by application */
|
||||
__u8 is_mmap;
|
||||
};
|
||||
|
||||
struct io_buffer {
|
||||
|
@ -50,6 +55,8 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
|
|||
|
||||
void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
|
||||
|
||||
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
|
||||
|
||||
static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -13,6 +13,11 @@
|
|||
#include "filetable.h"
|
||||
#include "msg_ring.h"
|
||||
|
||||
|
||||
/* All valid masks for MSG_RING */
|
||||
#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
|
||||
IORING_MSG_RING_FLAGS_PASS)
|
||||
|
||||
struct io_msg {
|
||||
struct file *file;
|
||||
struct file *src_file;
|
||||
|
@ -21,7 +26,10 @@ struct io_msg {
|
|||
u32 len;
|
||||
u32 cmd;
|
||||
u32 src_fd;
|
||||
u32 dst_fd;
|
||||
union {
|
||||
u32 dst_fd;
|
||||
u32 cqe_flags;
|
||||
};
|
||||
u32 flags;
|
||||
};
|
||||
|
||||
|
@ -91,6 +99,11 @@ static void io_msg_tw_complete(struct callback_head *head)
|
|||
if (current->flags & PF_EXITING) {
|
||||
ret = -EOWNERDEAD;
|
||||
} else {
|
||||
u32 flags = 0;
|
||||
|
||||
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
|
||||
flags = msg->cqe_flags;
|
||||
|
||||
/*
|
||||
* If the target ring is using IOPOLL mode, then we need to be
|
||||
* holding the uring_lock for posting completions. Other ring
|
||||
|
@ -99,7 +112,7 @@ static void io_msg_tw_complete(struct callback_head *head)
|
|||
*/
|
||||
if (target_ctx->flags & IORING_SETUP_IOPOLL)
|
||||
mutex_lock(&target_ctx->uring_lock);
|
||||
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
|
||||
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
|
||||
ret = -EOVERFLOW;
|
||||
if (target_ctx->flags & IORING_SETUP_IOPOLL)
|
||||
mutex_unlock(&target_ctx->uring_lock);
|
||||
|
@ -114,9 +127,12 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
|
|||
{
|
||||
struct io_ring_ctx *target_ctx = req->file->private_data;
|
||||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
u32 flags = 0;
|
||||
int ret;
|
||||
|
||||
if (msg->src_fd || msg->dst_fd || msg->flags)
|
||||
if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
|
||||
return -EINVAL;
|
||||
if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
|
||||
return -EINVAL;
|
||||
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
|
||||
return -EBADFD;
|
||||
|
@ -124,15 +140,18 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (io_msg_need_remote(target_ctx))
|
||||
return io_msg_exec_remote(req, io_msg_tw_complete);
|
||||
|
||||
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
|
||||
flags = msg->cqe_flags;
|
||||
|
||||
ret = -EOVERFLOW;
|
||||
if (target_ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
|
||||
return -EAGAIN;
|
||||
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
|
||||
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
|
||||
ret = 0;
|
||||
io_double_unlock_ctx(target_ctx);
|
||||
} else {
|
||||
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
|
||||
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
|
@ -143,14 +162,12 @@ static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_fl
|
|||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct file *file = NULL;
|
||||
unsigned long file_ptr;
|
||||
int idx = msg->src_fd;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
if (likely(idx < ctx->nr_user_files)) {
|
||||
idx = array_index_nospec(idx, ctx->nr_user_files);
|
||||
file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
|
||||
file = (struct file *) (file_ptr & FFS_MASK);
|
||||
file = io_file_from_index(&ctx->file_table, idx);
|
||||
if (file)
|
||||
get_file(file);
|
||||
}
|
||||
|
@ -243,7 +260,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
msg->src_fd = READ_ONCE(sqe->addr3);
|
||||
msg->dst_fd = READ_ONCE(sqe->file_index);
|
||||
msg->flags = READ_ONCE(sqe->msg_ring_flags);
|
||||
if (msg->flags & ~IORING_MSG_RING_CQE_SKIP)
|
||||
if (msg->flags & ~IORING_MSG_RING_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
|
101
io_uring/net.c
101
io_uring/net.c
|
@ -92,6 +92,7 @@ int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return -EINVAL;
|
||||
|
||||
shutdown->how = READ_ONCE(sqe->len);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -101,8 +102,7 @@ int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct socket *sock;
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
sock = sock_from_file(req->file);
|
||||
if (unlikely(!sock))
|
||||
|
@ -183,10 +183,14 @@ static int io_setup_async_msg(struct io_kiocb *req,
|
|||
memcpy(async_msg, kmsg, sizeof(*kmsg));
|
||||
if (async_msg->msg.msg_name)
|
||||
async_msg->msg.msg_name = &async_msg->addr;
|
||||
|
||||
if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs)
|
||||
return -EAGAIN;
|
||||
|
||||
/* if were using fast_iov, set it to the new one */
|
||||
if (!kmsg->free_iov) {
|
||||
size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
|
||||
async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
|
||||
if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) {
|
||||
size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov;
|
||||
async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx];
|
||||
}
|
||||
|
||||
return -EAGAIN;
|
||||
|
@ -354,7 +358,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct sockaddr_storage __address;
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct msghdr msg;
|
||||
struct iovec iov;
|
||||
struct socket *sock;
|
||||
unsigned flags;
|
||||
int min_ret = 0;
|
||||
|
@ -388,7 +391,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (unlikely(!sock))
|
||||
return -ENOTSOCK;
|
||||
|
||||
ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
|
||||
ret = import_ubuf(WRITE, sr->buf, sr->len, &msg.msg_iter);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
|
@ -398,6 +401,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (flags & MSG_WAITALL)
|
||||
min_ret = iov_iter_count(&msg.msg_iter);
|
||||
|
||||
flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
|
||||
msg.msg_flags = flags;
|
||||
ret = sock_sendmsg(sock, &msg);
|
||||
if (ret < min_ret) {
|
||||
|
@ -542,6 +546,7 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
|
|||
struct io_async_msghdr *iomsg)
|
||||
{
|
||||
iomsg->msg.msg_name = &iomsg->addr;
|
||||
iomsg->msg.msg_iter.nr_segs = 0;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
|
@ -625,9 +630,15 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
|
|||
* again (for multishot).
|
||||
*/
|
||||
static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
unsigned int cflags, bool mshot_finished,
|
||||
struct msghdr *msg, bool mshot_finished,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
unsigned int cflags;
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (msg->msg_inq && msg->msg_inq != -1)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
|
||||
io_req_set_res(req, *ret, cflags);
|
||||
*ret = IOU_OK;
|
||||
|
@ -635,10 +646,18 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
|||
}
|
||||
|
||||
if (!mshot_finished) {
|
||||
if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
|
||||
if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
*ret, cflags | IORING_CQE_F_MORE)) {
|
||||
io_recv_prep_retry(req);
|
||||
return false;
|
||||
/* Known not-empty or unknown state, retry */
|
||||
if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
|
||||
msg->msg_inq == -1)
|
||||
return false;
|
||||
if (issue_flags & IO_URING_F_MULTISHOT)
|
||||
*ret = IOU_ISSUE_SKIP_COMPLETE;
|
||||
else
|
||||
*ret = -EAGAIN;
|
||||
return true;
|
||||
}
|
||||
/* Otherwise stop multishot but use the current result. */
|
||||
}
|
||||
|
@ -741,7 +760,6 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct io_async_msghdr iomsg, *kmsg;
|
||||
struct socket *sock;
|
||||
unsigned int cflags;
|
||||
unsigned flags;
|
||||
int ret, min_ret = 0;
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
|
@ -784,25 +802,26 @@ retry_multishot:
|
|||
}
|
||||
}
|
||||
|
||||
kmsg->fast_iov[0].iov_base = buf;
|
||||
kmsg->fast_iov[0].iov_len = len;
|
||||
iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
|
||||
len);
|
||||
iov_iter_ubuf(&kmsg->msg.msg_iter, READ, buf, len);
|
||||
}
|
||||
|
||||
flags = sr->msg_flags;
|
||||
if (force_nonblock)
|
||||
flags |= MSG_DONTWAIT;
|
||||
if (flags & MSG_WAITALL)
|
||||
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
|
||||
|
||||
kmsg->msg.msg_get_inq = 1;
|
||||
if (req->flags & REQ_F_APOLL_MULTISHOT)
|
||||
kmsg->msg.msg_inq = -1;
|
||||
if (req->flags & REQ_F_APOLL_MULTISHOT) {
|
||||
ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
|
||||
&mshot_finished);
|
||||
else
|
||||
} else {
|
||||
/* disable partial retry for recvmsg with cmsg attached */
|
||||
if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
|
||||
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
|
||||
|
||||
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
|
||||
kmsg->uaddr, flags);
|
||||
}
|
||||
|
||||
if (ret < min_ret) {
|
||||
if (ret == -EAGAIN && force_nonblock) {
|
||||
|
@ -832,11 +851,7 @@ retry_multishot:
|
|||
else
|
||||
io_kbuf_recycle(req, issue_flags);
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (kmsg->msg.msg_inq)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags))
|
||||
if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
|
||||
goto retry_multishot;
|
||||
|
||||
if (mshot_finished) {
|
||||
|
@ -855,8 +870,6 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct msghdr msg;
|
||||
struct socket *sock;
|
||||
struct iovec iov;
|
||||
unsigned int cflags;
|
||||
unsigned flags;
|
||||
int ret, min_ret = 0;
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
|
@ -873,6 +886,14 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (unlikely(!sock))
|
||||
return -ENOTSOCK;
|
||||
|
||||
msg.msg_name = NULL;
|
||||
msg.msg_namelen = 0;
|
||||
msg.msg_control = NULL;
|
||||
msg.msg_get_inq = 1;
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_iocb = NULL;
|
||||
msg.msg_ubuf = NULL;
|
||||
|
||||
retry_multishot:
|
||||
if (io_do_buffer_select(req)) {
|
||||
void __user *buf;
|
||||
|
@ -883,18 +904,12 @@ retry_multishot:
|
|||
sr->buf = buf;
|
||||
}
|
||||
|
||||
ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
|
||||
ret = import_ubuf(READ, sr->buf, len, &msg.msg_iter);
|
||||
if (unlikely(ret))
|
||||
goto out_free;
|
||||
|
||||
msg.msg_name = NULL;
|
||||
msg.msg_namelen = 0;
|
||||
msg.msg_control = NULL;
|
||||
msg.msg_get_inq = 1;
|
||||
msg.msg_inq = -1;
|
||||
msg.msg_flags = 0;
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_iocb = NULL;
|
||||
msg.msg_ubuf = NULL;
|
||||
|
||||
flags = sr->msg_flags;
|
||||
if (force_nonblock)
|
||||
|
@ -934,11 +949,7 @@ out_free:
|
|||
else
|
||||
io_kbuf_recycle(req, issue_flags);
|
||||
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
if (msg.msg_inq)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags))
|
||||
if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
|
||||
goto retry_multishot;
|
||||
|
||||
return ret;
|
||||
|
@ -1094,7 +1105,6 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct sockaddr_storage __address;
|
||||
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct msghdr msg;
|
||||
struct iovec iov;
|
||||
struct socket *sock;
|
||||
unsigned msg_flags;
|
||||
int ret, min_ret = 0;
|
||||
|
@ -1136,8 +1146,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
|
|||
msg.sg_from_iter = io_sg_from_iter;
|
||||
} else {
|
||||
io_notif_set_extended(zc->notif);
|
||||
ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
|
||||
&msg.msg_iter);
|
||||
ret = import_ubuf(WRITE, zc->buf, zc->len, &msg.msg_iter);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
ret = io_notif_account_mem(zc->notif, zc->len);
|
||||
|
@ -1151,6 +1160,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
|
|||
msg_flags |= MSG_DONTWAIT;
|
||||
if (msg_flags & MSG_WAITALL)
|
||||
min_ret = iov_iter_count(&msg.msg_iter);
|
||||
msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
|
||||
|
||||
msg.msg_flags = msg_flags;
|
||||
msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
|
||||
|
@ -1312,7 +1322,6 @@ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
|
||||
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
|
||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
|
||||
|
@ -1362,8 +1371,8 @@ retry:
|
|||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
|
||||
if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
|
||||
ret, IORING_CQE_F_MORE))
|
||||
goto retry;
|
||||
|
||||
return -ECANCELED;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
|
||||
#include "alloc_cache.h"
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
struct io_async_msghdr {
|
||||
#if defined(CONFIG_NET)
|
||||
union {
|
||||
struct iovec fast_iov[UIO_FASTIOV];
|
||||
struct {
|
||||
|
@ -22,8 +22,11 @@ struct io_async_msghdr {
|
|||
struct sockaddr __user *uaddr;
|
||||
struct msghdr msg;
|
||||
struct sockaddr_storage addr;
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
|
||||
struct io_async_connect {
|
||||
struct sockaddr_storage address;
|
||||
};
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "notif.h"
|
||||
#include "rsrc.h"
|
||||
|
||||
static void io_notif_complete_tw_ext(struct io_kiocb *notif, bool *locked)
|
||||
static void io_notif_complete_tw_ext(struct io_kiocb *notif, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_notif_data *nd = io_notif_to_data(notif);
|
||||
struct io_ring_ctx *ctx = notif->ctx;
|
||||
|
@ -21,7 +21,7 @@ static void io_notif_complete_tw_ext(struct io_kiocb *notif, bool *locked)
|
|||
__io_unaccount_mem(ctx->user, nd->account_pages);
|
||||
nd->account_pages = 0;
|
||||
}
|
||||
io_req_task_complete(notif, locked);
|
||||
io_req_task_complete(notif, ts);
|
||||
}
|
||||
|
||||
static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg,
|
||||
|
@ -31,7 +31,7 @@ static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg,
|
|||
struct io_kiocb *notif = cmd_to_io_kiocb(nd);
|
||||
|
||||
if (refcount_dec_and_test(&uarg->refcnt))
|
||||
io_req_task_work_add(notif);
|
||||
__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static void io_tx_ubuf_callback_ext(struct sk_buff *skb, struct ubuf_info *uarg,
|
||||
|
@ -68,9 +68,8 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
|||
struct io_kiocb *notif;
|
||||
struct io_notif_data *nd;
|
||||
|
||||
if (unlikely(!io_alloc_req_refill(ctx)))
|
||||
if (unlikely(!io_alloc_req(ctx, ¬if)))
|
||||
return NULL;
|
||||
notif = io_alloc_req(ctx);
|
||||
notif->opcode = IORING_OP_NOP;
|
||||
notif->flags = 0;
|
||||
notif->file = NULL;
|
||||
|
@ -80,7 +79,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
|||
notif->io_task_work.func = io_req_task_complete;
|
||||
|
||||
nd = io_notif_to_data(notif);
|
||||
nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
|
||||
nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
|
||||
nd->uarg.callback = io_tx_ubuf_callback;
|
||||
refcount_set(&nd->uarg.refcnt, 1);
|
||||
return notif;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "rsrc.h"
|
||||
|
||||
#define IO_NOTIF_UBUF_FLAGS (SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN)
|
||||
#define IO_NOTIF_SPLICE_BATCH 32
|
||||
|
||||
struct io_notif_data {
|
||||
|
@ -33,7 +34,7 @@ static inline void io_notif_flush(struct io_kiocb *notif)
|
|||
|
||||
/* drop slot's master ref */
|
||||
if (refcount_dec_and_test(&nd->uarg.refcnt))
|
||||
io_req_task_work_add(notif);
|
||||
__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len)
|
||||
|
|
339
io_uring/opdef.c
339
io_uring/opdef.c
|
@ -46,11 +46,10 @@ static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb,
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
const struct io_op_def io_op_defs[] = {
|
||||
const struct io_issue_def io_issue_defs[] = {
|
||||
[IORING_OP_NOP] = {
|
||||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.name = "NOP",
|
||||
.prep = io_nop_prep,
|
||||
.issue = io_nop,
|
||||
},
|
||||
|
@ -64,13 +63,8 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READV",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_read,
|
||||
.prep_async = io_readv_prep_async,
|
||||
.cleanup = io_readv_writev_cleanup,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITEV] = {
|
||||
.needs_file = 1,
|
||||
|
@ -82,18 +76,12 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITEV",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_write,
|
||||
.prep_async = io_writev_prep_async,
|
||||
.cleanup = io_readv_writev_cleanup,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_FSYNC] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "FSYNC",
|
||||
.prep = io_fsync_prep,
|
||||
.issue = io_fsync,
|
||||
},
|
||||
|
@ -106,11 +94,8 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READ_FIXED",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_read,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITE_FIXED] = {
|
||||
.needs_file = 1,
|
||||
|
@ -122,30 +107,24 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITE_FIXED",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_write,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_POLL_ADD] = {
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "POLL_ADD",
|
||||
.prep = io_poll_add_prep,
|
||||
.issue = io_poll_add,
|
||||
},
|
||||
[IORING_OP_POLL_REMOVE] = {
|
||||
.audit_skip = 1,
|
||||
.name = "POLL_REMOVE",
|
||||
.prep = io_poll_remove_prep,
|
||||
.issue = io_poll_remove,
|
||||
},
|
||||
[IORING_OP_SYNC_FILE_RANGE] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "SYNC_FILE_RANGE",
|
||||
.prep = io_sfr_prep,
|
||||
.issue = io_sync_file_range,
|
||||
},
|
||||
|
@ -155,14 +134,9 @@ const struct io_op_def io_op_defs[] = {
|
|||
.pollout = 1,
|
||||
.ioprio = 1,
|
||||
.manual_alloc = 1,
|
||||
.name = "SENDMSG",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep = io_sendmsg_prep,
|
||||
.issue = io_sendmsg,
|
||||
.prep_async = io_sendmsg_prep_async,
|
||||
.cleanup = io_sendmsg_recvmsg_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
|
@ -174,29 +148,21 @@ const struct io_op_def io_op_defs[] = {
|
|||
.buffer_select = 1,
|
||||
.ioprio = 1,
|
||||
.manual_alloc = 1,
|
||||
.name = "RECVMSG",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep = io_recvmsg_prep,
|
||||
.issue = io_recvmsg,
|
||||
.prep_async = io_recvmsg_prep_async,
|
||||
.cleanup = io_sendmsg_recvmsg_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_TIMEOUT] = {
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
.name = "TIMEOUT",
|
||||
.prep = io_timeout_prep,
|
||||
.issue = io_timeout,
|
||||
},
|
||||
[IORING_OP_TIMEOUT_REMOVE] = {
|
||||
/* used by timeout updates' prep() */
|
||||
.audit_skip = 1,
|
||||
.name = "TIMEOUT_REMOVE",
|
||||
.prep = io_timeout_remove_prep,
|
||||
.issue = io_timeout_remove,
|
||||
},
|
||||
|
@ -206,7 +172,6 @@ const struct io_op_def io_op_defs[] = {
|
|||
.pollin = 1,
|
||||
.poll_exclusive = 1,
|
||||
.ioprio = 1, /* used for flags */
|
||||
.name = "ACCEPT",
|
||||
#if defined(CONFIG_NET)
|
||||
.prep = io_accept_prep,
|
||||
.issue = io_accept,
|
||||
|
@ -216,14 +181,11 @@ const struct io_op_def io_op_defs[] = {
|
|||
},
|
||||
[IORING_OP_ASYNC_CANCEL] = {
|
||||
.audit_skip = 1,
|
||||
.name = "ASYNC_CANCEL",
|
||||
.prep = io_async_cancel_prep,
|
||||
.issue = io_async_cancel,
|
||||
},
|
||||
[IORING_OP_LINK_TIMEOUT] = {
|
||||
.audit_skip = 1,
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
.name = "LINK_TIMEOUT",
|
||||
.prep = io_link_timeout_prep,
|
||||
.issue = io_no_issue,
|
||||
},
|
||||
|
@ -231,46 +193,36 @@ const struct io_op_def io_op_defs[] = {
|
|||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.name = "CONNECT",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_connect),
|
||||
.prep = io_connect_prep,
|
||||
.issue = io_connect,
|
||||
.prep_async = io_connect_prep_async,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_FALLOCATE] = {
|
||||
.needs_file = 1,
|
||||
.name = "FALLOCATE",
|
||||
.prep = io_fallocate_prep,
|
||||
.issue = io_fallocate,
|
||||
},
|
||||
[IORING_OP_OPENAT] = {
|
||||
.name = "OPENAT",
|
||||
.prep = io_openat_prep,
|
||||
.issue = io_openat,
|
||||
.cleanup = io_open_cleanup,
|
||||
},
|
||||
[IORING_OP_CLOSE] = {
|
||||
.name = "CLOSE",
|
||||
.prep = io_close_prep,
|
||||
.issue = io_close,
|
||||
},
|
||||
[IORING_OP_FILES_UPDATE] = {
|
||||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.name = "FILES_UPDATE",
|
||||
.prep = io_files_update_prep,
|
||||
.issue = io_files_update,
|
||||
},
|
||||
[IORING_OP_STATX] = {
|
||||
.audit_skip = 1,
|
||||
.name = "STATX",
|
||||
.prep = io_statx_prep,
|
||||
.issue = io_statx,
|
||||
.cleanup = io_statx_cleanup,
|
||||
},
|
||||
[IORING_OP_READ] = {
|
||||
.needs_file = 1,
|
||||
|
@ -282,11 +234,8 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READ",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_read,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITE] = {
|
||||
.needs_file = 1,
|
||||
|
@ -298,22 +247,17 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITE",
|
||||
.prep = io_prep_rw,
|
||||
.issue = io_write,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_FADVISE] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "FADVISE",
|
||||
.prep = io_fadvise_prep,
|
||||
.issue = io_fadvise,
|
||||
},
|
||||
[IORING_OP_MADVISE] = {
|
||||
.audit_skip = 1,
|
||||
.name = "MADVISE",
|
||||
.prep = io_madvise_prep,
|
||||
.issue = io_madvise,
|
||||
},
|
||||
|
@ -324,13 +268,9 @@ const struct io_op_def io_op_defs[] = {
|
|||
.audit_skip = 1,
|
||||
.ioprio = 1,
|
||||
.manual_alloc = 1,
|
||||
.name = "SEND",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep = io_sendmsg_prep,
|
||||
.issue = io_send,
|
||||
.fail = io_sendrecv_fail,
|
||||
.prep_async = io_send_prep_async,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
|
@ -342,25 +282,20 @@ const struct io_op_def io_op_defs[] = {
|
|||
.buffer_select = 1,
|
||||
.audit_skip = 1,
|
||||
.ioprio = 1,
|
||||
.name = "RECV",
|
||||
#if defined(CONFIG_NET)
|
||||
.prep = io_recvmsg_prep,
|
||||
.issue = io_recv,
|
||||
.fail = io_sendrecv_fail,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_OPENAT2] = {
|
||||
.name = "OPENAT2",
|
||||
.prep = io_openat2_prep,
|
||||
.issue = io_openat2,
|
||||
.cleanup = io_open_cleanup,
|
||||
},
|
||||
[IORING_OP_EPOLL_CTL] = {
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "EPOLL",
|
||||
#if defined(CONFIG_EPOLL)
|
||||
.prep = io_epoll_ctl_prep,
|
||||
.issue = io_epoll_ctl,
|
||||
|
@ -373,21 +308,18 @@ const struct io_op_def io_op_defs[] = {
|
|||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "SPLICE",
|
||||
.prep = io_splice_prep,
|
||||
.issue = io_splice,
|
||||
},
|
||||
[IORING_OP_PROVIDE_BUFFERS] = {
|
||||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.name = "PROVIDE_BUFFERS",
|
||||
.prep = io_provide_buffers_prep,
|
||||
.issue = io_provide_buffers,
|
||||
},
|
||||
[IORING_OP_REMOVE_BUFFERS] = {
|
||||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.name = "REMOVE_BUFFERS",
|
||||
.prep = io_remove_buffers_prep,
|
||||
.issue = io_remove_buffers,
|
||||
},
|
||||
|
@ -396,13 +328,11 @@ const struct io_op_def io_op_defs[] = {
|
|||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.audit_skip = 1,
|
||||
.name = "TEE",
|
||||
.prep = io_tee_prep,
|
||||
.issue = io_tee,
|
||||
},
|
||||
[IORING_OP_SHUTDOWN] = {
|
||||
.needs_file = 1,
|
||||
.name = "SHUTDOWN",
|
||||
#if defined(CONFIG_NET)
|
||||
.prep = io_shutdown_prep,
|
||||
.issue = io_shutdown,
|
||||
|
@ -411,72 +341,51 @@ const struct io_op_def io_op_defs[] = {
|
|||
#endif
|
||||
},
|
||||
[IORING_OP_RENAMEAT] = {
|
||||
.name = "RENAMEAT",
|
||||
.prep = io_renameat_prep,
|
||||
.issue = io_renameat,
|
||||
.cleanup = io_renameat_cleanup,
|
||||
},
|
||||
[IORING_OP_UNLINKAT] = {
|
||||
.name = "UNLINKAT",
|
||||
.prep = io_unlinkat_prep,
|
||||
.issue = io_unlinkat,
|
||||
.cleanup = io_unlinkat_cleanup,
|
||||
},
|
||||
[IORING_OP_MKDIRAT] = {
|
||||
.name = "MKDIRAT",
|
||||
.prep = io_mkdirat_prep,
|
||||
.issue = io_mkdirat,
|
||||
.cleanup = io_mkdirat_cleanup,
|
||||
},
|
||||
[IORING_OP_SYMLINKAT] = {
|
||||
.name = "SYMLINKAT",
|
||||
.prep = io_symlinkat_prep,
|
||||
.issue = io_symlinkat,
|
||||
.cleanup = io_link_cleanup,
|
||||
},
|
||||
[IORING_OP_LINKAT] = {
|
||||
.name = "LINKAT",
|
||||
.prep = io_linkat_prep,
|
||||
.issue = io_linkat,
|
||||
.cleanup = io_link_cleanup,
|
||||
},
|
||||
[IORING_OP_MSG_RING] = {
|
||||
.needs_file = 1,
|
||||
.iopoll = 1,
|
||||
.name = "MSG_RING",
|
||||
.prep = io_msg_ring_prep,
|
||||
.issue = io_msg_ring,
|
||||
.cleanup = io_msg_ring_cleanup,
|
||||
},
|
||||
[IORING_OP_FSETXATTR] = {
|
||||
.needs_file = 1,
|
||||
.name = "FSETXATTR",
|
||||
.prep = io_fsetxattr_prep,
|
||||
.issue = io_fsetxattr,
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_SETXATTR] = {
|
||||
.name = "SETXATTR",
|
||||
.prep = io_setxattr_prep,
|
||||
.issue = io_setxattr,
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_FGETXATTR] = {
|
||||
.needs_file = 1,
|
||||
.name = "FGETXATTR",
|
||||
.prep = io_fgetxattr_prep,
|
||||
.issue = io_fgetxattr,
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_GETXATTR] = {
|
||||
.name = "GETXATTR",
|
||||
.prep = io_getxattr_prep,
|
||||
.issue = io_getxattr,
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_SOCKET] = {
|
||||
.audit_skip = 1,
|
||||
.name = "SOCKET",
|
||||
#if defined(CONFIG_NET)
|
||||
.prep = io_socket_prep,
|
||||
.issue = io_socket,
|
||||
|
@ -487,16 +396,12 @@ const struct io_op_def io_op_defs[] = {
|
|||
[IORING_OP_URING_CMD] = {
|
||||
.needs_file = 1,
|
||||
.plug = 1,
|
||||
.name = "URING_CMD",
|
||||
.iopoll = 1,
|
||||
.iopoll_queue = 1,
|
||||
.async_size = uring_cmd_pdu_size(1),
|
||||
.prep = io_uring_cmd_prep,
|
||||
.issue = io_uring_cmd,
|
||||
.prep_async = io_uring_cmd_prep_async,
|
||||
},
|
||||
[IORING_OP_SEND_ZC] = {
|
||||
.name = "SEND_ZC",
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
|
@ -504,32 +409,243 @@ const struct io_op_def io_op_defs[] = {
|
|||
.ioprio = 1,
|
||||
.manual_alloc = 1,
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep = io_send_zc_prep,
|
||||
.issue = io_send_zc,
|
||||
.prep_async = io_send_prep_async,
|
||||
.cleanup = io_send_zc_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_SENDMSG_ZC] = {
|
||||
.name = "SENDMSG_ZC",
|
||||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.ioprio = 1,
|
||||
.manual_alloc = 1,
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep = io_send_zc_prep,
|
||||
.issue = io_sendmsg_zc,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
const struct io_cold_def io_cold_defs[] = {
|
||||
[IORING_OP_NOP] = {
|
||||
.name = "NOP",
|
||||
},
|
||||
[IORING_OP_READV] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READV",
|
||||
.prep_async = io_readv_prep_async,
|
||||
.cleanup = io_readv_writev_cleanup,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITEV] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITEV",
|
||||
.prep_async = io_writev_prep_async,
|
||||
.cleanup = io_readv_writev_cleanup,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_FSYNC] = {
|
||||
.name = "FSYNC",
|
||||
},
|
||||
[IORING_OP_READ_FIXED] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READ_FIXED",
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITE_FIXED] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITE_FIXED",
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_POLL_ADD] = {
|
||||
.name = "POLL_ADD",
|
||||
},
|
||||
[IORING_OP_POLL_REMOVE] = {
|
||||
.name = "POLL_REMOVE",
|
||||
},
|
||||
[IORING_OP_SYNC_FILE_RANGE] = {
|
||||
.name = "SYNC_FILE_RANGE",
|
||||
},
|
||||
[IORING_OP_SENDMSG] = {
|
||||
.name = "SENDMSG",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep_async = io_sendmsg_prep_async,
|
||||
.cleanup = io_sendmsg_recvmsg_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_RECVMSG] = {
|
||||
.name = "RECVMSG",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep_async = io_recvmsg_prep_async,
|
||||
.cleanup = io_sendmsg_recvmsg_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_TIMEOUT] = {
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
.name = "TIMEOUT",
|
||||
},
|
||||
[IORING_OP_TIMEOUT_REMOVE] = {
|
||||
.name = "TIMEOUT_REMOVE",
|
||||
},
|
||||
[IORING_OP_ACCEPT] = {
|
||||
.name = "ACCEPT",
|
||||
},
|
||||
[IORING_OP_ASYNC_CANCEL] = {
|
||||
.name = "ASYNC_CANCEL",
|
||||
},
|
||||
[IORING_OP_LINK_TIMEOUT] = {
|
||||
.async_size = sizeof(struct io_timeout_data),
|
||||
.name = "LINK_TIMEOUT",
|
||||
},
|
||||
[IORING_OP_CONNECT] = {
|
||||
.name = "CONNECT",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_connect),
|
||||
.prep_async = io_connect_prep_async,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_FALLOCATE] = {
|
||||
.name = "FALLOCATE",
|
||||
},
|
||||
[IORING_OP_OPENAT] = {
|
||||
.name = "OPENAT",
|
||||
.cleanup = io_open_cleanup,
|
||||
},
|
||||
[IORING_OP_CLOSE] = {
|
||||
.name = "CLOSE",
|
||||
},
|
||||
[IORING_OP_FILES_UPDATE] = {
|
||||
.name = "FILES_UPDATE",
|
||||
},
|
||||
[IORING_OP_STATX] = {
|
||||
.name = "STATX",
|
||||
.cleanup = io_statx_cleanup,
|
||||
},
|
||||
[IORING_OP_READ] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "READ",
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_WRITE] = {
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.name = "WRITE",
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_FADVISE] = {
|
||||
.name = "FADVISE",
|
||||
},
|
||||
[IORING_OP_MADVISE] = {
|
||||
.name = "MADVISE",
|
||||
},
|
||||
[IORING_OP_SEND] = {
|
||||
.name = "SEND",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.fail = io_sendrecv_fail,
|
||||
.prep_async = io_send_prep_async,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_RECV] = {
|
||||
.name = "RECV",
|
||||
#if defined(CONFIG_NET)
|
||||
.fail = io_sendrecv_fail,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_OPENAT2] = {
|
||||
.name = "OPENAT2",
|
||||
.cleanup = io_open_cleanup,
|
||||
},
|
||||
[IORING_OP_EPOLL_CTL] = {
|
||||
.name = "EPOLL",
|
||||
},
|
||||
[IORING_OP_SPLICE] = {
|
||||
.name = "SPLICE",
|
||||
},
|
||||
[IORING_OP_PROVIDE_BUFFERS] = {
|
||||
.name = "PROVIDE_BUFFERS",
|
||||
},
|
||||
[IORING_OP_REMOVE_BUFFERS] = {
|
||||
.name = "REMOVE_BUFFERS",
|
||||
},
|
||||
[IORING_OP_TEE] = {
|
||||
.name = "TEE",
|
||||
},
|
||||
[IORING_OP_SHUTDOWN] = {
|
||||
.name = "SHUTDOWN",
|
||||
},
|
||||
[IORING_OP_RENAMEAT] = {
|
||||
.name = "RENAMEAT",
|
||||
.cleanup = io_renameat_cleanup,
|
||||
},
|
||||
[IORING_OP_UNLINKAT] = {
|
||||
.name = "UNLINKAT",
|
||||
.cleanup = io_unlinkat_cleanup,
|
||||
},
|
||||
[IORING_OP_MKDIRAT] = {
|
||||
.name = "MKDIRAT",
|
||||
.cleanup = io_mkdirat_cleanup,
|
||||
},
|
||||
[IORING_OP_SYMLINKAT] = {
|
||||
.name = "SYMLINKAT",
|
||||
.cleanup = io_link_cleanup,
|
||||
},
|
||||
[IORING_OP_LINKAT] = {
|
||||
.name = "LINKAT",
|
||||
.cleanup = io_link_cleanup,
|
||||
},
|
||||
[IORING_OP_MSG_RING] = {
|
||||
.name = "MSG_RING",
|
||||
.cleanup = io_msg_ring_cleanup,
|
||||
},
|
||||
[IORING_OP_FSETXATTR] = {
|
||||
.name = "FSETXATTR",
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_SETXATTR] = {
|
||||
.name = "SETXATTR",
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_FGETXATTR] = {
|
||||
.name = "FGETXATTR",
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_GETXATTR] = {
|
||||
.name = "GETXATTR",
|
||||
.cleanup = io_xattr_cleanup,
|
||||
},
|
||||
[IORING_OP_SOCKET] = {
|
||||
.name = "SOCKET",
|
||||
},
|
||||
[IORING_OP_URING_CMD] = {
|
||||
.name = "URING_CMD",
|
||||
.async_size = 2 * sizeof(struct io_uring_sqe),
|
||||
.prep_async = io_uring_cmd_prep_async,
|
||||
},
|
||||
[IORING_OP_SEND_ZC] = {
|
||||
.name = "SEND_ZC",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep_async = io_send_prep_async,
|
||||
.cleanup = io_send_zc_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_SENDMSG_ZC] = {
|
||||
.name = "SENDMSG_ZC",
|
||||
#if defined(CONFIG_NET)
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
.prep_async = io_sendmsg_prep_async,
|
||||
.cleanup = io_send_zc_cleanup,
|
||||
.fail = io_sendrecv_fail,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
|
@ -537,7 +653,7 @@ const struct io_op_def io_op_defs[] = {
|
|||
const char *io_uring_get_opcode(u8 opcode)
|
||||
{
|
||||
if (opcode < IORING_OP_LAST)
|
||||
return io_op_defs[opcode].name;
|
||||
return io_cold_defs[opcode].name;
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
|
@ -545,12 +661,13 @@ void __init io_uring_optable_init(void)
|
|||
{
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(io_cold_defs) != IORING_OP_LAST);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(io_issue_defs) != IORING_OP_LAST);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(io_op_defs); i++) {
|
||||
BUG_ON(!io_op_defs[i].prep);
|
||||
if (io_op_defs[i].prep != io_eopnotsupp_prep)
|
||||
BUG_ON(!io_op_defs[i].issue);
|
||||
WARN_ON_ONCE(!io_op_defs[i].name);
|
||||
for (i = 0; i < ARRAY_SIZE(io_issue_defs); i++) {
|
||||
BUG_ON(!io_issue_defs[i].prep);
|
||||
if (io_issue_defs[i].prep != io_eopnotsupp_prep)
|
||||
BUG_ON(!io_issue_defs[i].issue);
|
||||
WARN_ON_ONCE(!io_cold_defs[i].name);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#ifndef IOU_OP_DEF_H
|
||||
#define IOU_OP_DEF_H
|
||||
|
||||
struct io_op_def {
|
||||
struct io_issue_def {
|
||||
/* needs req->file assigned */
|
||||
unsigned needs_file : 1;
|
||||
/* should block plug */
|
||||
|
@ -29,19 +29,24 @@ struct io_op_def {
|
|||
unsigned iopoll_queue : 1;
|
||||
/* opcode specific path will handle ->async_data allocation if needed */
|
||||
unsigned manual_alloc : 1;
|
||||
|
||||
int (*issue)(struct io_kiocb *, unsigned int);
|
||||
int (*prep)(struct io_kiocb *, const struct io_uring_sqe *);
|
||||
};
|
||||
|
||||
struct io_cold_def {
|
||||
/* size of async data needed, if any */
|
||||
unsigned short async_size;
|
||||
|
||||
const char *name;
|
||||
|
||||
int (*prep)(struct io_kiocb *, const struct io_uring_sqe *);
|
||||
int (*issue)(struct io_kiocb *, unsigned int);
|
||||
int (*prep_async)(struct io_kiocb *);
|
||||
void (*cleanup)(struct io_kiocb *);
|
||||
void (*fail)(struct io_kiocb *);
|
||||
};
|
||||
|
||||
extern const struct io_op_def io_op_defs[];
|
||||
extern const struct io_issue_def io_issue_defs[];
|
||||
extern const struct io_cold_def io_cold_defs[];
|
||||
|
||||
void io_uring_optable_init(void);
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,17 @@ struct io_close {
|
|||
u32 file_slot;
|
||||
};
|
||||
|
||||
static bool io_openat_force_async(struct io_open *open)
|
||||
{
|
||||
/*
|
||||
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
|
||||
* it'll always -EAGAIN. Note that we test for __O_TMPFILE because
|
||||
* O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
|
||||
* async for.
|
||||
*/
|
||||
return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
|
||||
}
|
||||
|
||||
static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
||||
|
@ -61,6 +72,8 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
|||
|
||||
open->nofile = rlimit(RLIMIT_NOFILE);
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
if (io_openat_force_async(open))
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -108,12 +121,7 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
|
|||
nonblock_set = op.open_flag & O_NONBLOCK;
|
||||
resolve_nonblock = open->how.resolve & RESOLVE_CACHED;
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
/*
|
||||
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
|
||||
* it'll always -EAGAIN
|
||||
*/
|
||||
if (open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE))
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(io_openat_force_async(open));
|
||||
op.lookup_flags |= LOOKUP_CACHED;
|
||||
op.open_flag |= O_NONBLOCK;
|
||||
}
|
||||
|
@ -144,7 +152,6 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
|
||||
file->f_flags &= ~O_NONBLOCK;
|
||||
fsnotify_open(file);
|
||||
|
||||
if (!fixed)
|
||||
fd_install(ret, file);
|
||||
|
|
|
@ -51,6 +51,9 @@ struct io_poll_table {
|
|||
|
||||
#define IO_WQE_F_DOUBLE 1
|
||||
|
||||
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
|
||||
void *key);
|
||||
|
||||
static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe)
|
||||
{
|
||||
unsigned long priv = (unsigned long)wqe->private;
|
||||
|
@ -145,7 +148,7 @@ static void io_poll_req_insert_locked(struct io_kiocb *req)
|
|||
hlist_add_head(&req->hash_node, &table->hbs[index].list);
|
||||
}
|
||||
|
||||
static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
|
||||
static void io_poll_tw_hash_eject(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
|
@ -156,7 +159,7 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
|
|||
* already grabbed the mutex for us, but there is a chance it
|
||||
* failed.
|
||||
*/
|
||||
io_tw_lock(ctx, locked);
|
||||
io_tw_lock(ctx, ts);
|
||||
hash_del(&req->hash_node);
|
||||
req->flags &= ~REQ_F_HASH_LOCKED;
|
||||
} else {
|
||||
|
@ -164,15 +167,14 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
|
|||
}
|
||||
}
|
||||
|
||||
static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
|
||||
wait_queue_func_t wake_func)
|
||||
static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
|
||||
{
|
||||
poll->head = NULL;
|
||||
#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
|
||||
/* mask in events that we always want/need */
|
||||
poll->events = events | IO_POLL_UNMASK;
|
||||
INIT_LIST_HEAD(&poll->wait.entry);
|
||||
init_waitqueue_func_entry(&poll->wait, wake_func);
|
||||
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
|
||||
}
|
||||
|
||||
static inline void io_poll_remove_entry(struct io_poll *poll)
|
||||
|
@ -236,7 +238,7 @@ enum {
|
|||
* req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
|
||||
* poll and that the result is stored in req->cqe.
|
||||
*/
|
||||
static int io_poll_check_events(struct io_kiocb *req, bool *locked)
|
||||
static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
int v;
|
||||
|
||||
|
@ -298,13 +300,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
|
|||
__poll_t mask = mangle_poll(req->cqe.res &
|
||||
req->apoll_events);
|
||||
|
||||
if (!io_aux_cqe(req->ctx, *locked, req->cqe.user_data,
|
||||
mask, IORING_CQE_F_MORE, false)) {
|
||||
if (!io_fill_cqe_req_aux(req, ts->locked, mask,
|
||||
IORING_CQE_F_MORE)) {
|
||||
io_req_set_res(req, mask, 0);
|
||||
return IOU_POLL_REMOVE_POLL_USE_RES;
|
||||
}
|
||||
} else {
|
||||
int ret = io_poll_issue(req, locked);
|
||||
int ret = io_poll_issue(req, ts);
|
||||
if (ret == IOU_STOP_MULTISHOT)
|
||||
return IOU_POLL_REMOVE_POLL_USE_RES;
|
||||
if (ret < 0)
|
||||
|
@ -324,15 +326,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
|
|||
return IOU_POLL_NO_ACTION;
|
||||
}
|
||||
|
||||
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = io_poll_check_events(req, locked);
|
||||
ret = io_poll_check_events(req, ts);
|
||||
if (ret == IOU_POLL_NO_ACTION)
|
||||
return;
|
||||
io_poll_remove_entries(req);
|
||||
io_poll_tw_hash_eject(req, locked);
|
||||
io_poll_tw_hash_eject(req, ts);
|
||||
|
||||
if (req->opcode == IORING_OP_POLL_ADD) {
|
||||
if (ret == IOU_POLL_DONE) {
|
||||
|
@ -341,7 +343,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
|||
poll = io_kiocb_to_cmd(req, struct io_poll);
|
||||
req->cqe.res = mangle_poll(req->cqe.res & poll->events);
|
||||
} else if (ret == IOU_POLL_REISSUE) {
|
||||
io_req_task_submit(req, locked);
|
||||
io_req_task_submit(req, ts);
|
||||
return;
|
||||
} else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) {
|
||||
req->cqe.res = ret;
|
||||
|
@ -349,14 +351,14 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
|||
}
|
||||
|
||||
io_req_set_res(req, req->cqe.res, 0);
|
||||
io_req_task_complete(req, locked);
|
||||
io_req_task_complete(req, ts);
|
||||
} else {
|
||||
io_tw_lock(req->ctx, locked);
|
||||
io_tw_lock(req->ctx, ts);
|
||||
|
||||
if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
|
||||
io_req_task_complete(req, locked);
|
||||
io_req_task_complete(req, ts);
|
||||
else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE)
|
||||
io_req_task_submit(req, locked);
|
||||
io_req_task_submit(req, ts);
|
||||
else
|
||||
io_req_defer_failed(req, ret);
|
||||
}
|
||||
|
@ -508,7 +510,7 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
|
|||
|
||||
/* mark as double wq entry */
|
||||
wqe_private |= IO_WQE_F_DOUBLE;
|
||||
io_init_poll_iocb(poll, first->events, first->wait.func);
|
||||
io_init_poll_iocb(poll, first->events);
|
||||
if (!io_poll_double_prepare(req)) {
|
||||
/* the request is completing, just back off */
|
||||
kfree(poll);
|
||||
|
@ -569,7 +571,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
|
|||
|
||||
INIT_HLIST_NODE(&req->hash_node);
|
||||
req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
|
||||
io_init_poll_iocb(poll, mask, io_poll_wake);
|
||||
io_init_poll_iocb(poll, mask);
|
||||
poll->file = req->file;
|
||||
req->apoll_events = poll->events;
|
||||
|
||||
|
@ -690,7 +692,7 @@ alloc_apoll:
|
|||
|
||||
int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
|
||||
{
|
||||
const struct io_op_def *def = &io_op_defs[req->opcode];
|
||||
const struct io_issue_def *def = &io_issue_defs[req->opcode];
|
||||
struct async_poll *apoll;
|
||||
struct io_poll_table ipt;
|
||||
__poll_t mask = POLLPRI | POLLERR | EPOLLET;
|
||||
|
@ -822,14 +824,10 @@ static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
|
|||
|
||||
spin_lock(&hb->lock);
|
||||
hlist_for_each_entry(req, &hb->list, hash_node) {
|
||||
if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
|
||||
req->file != cd->file)
|
||||
continue;
|
||||
if (cd->seq == req->work.cancel_seq)
|
||||
continue;
|
||||
req->work.cancel_seq = cd->seq;
|
||||
*out_bucket = hb;
|
||||
return req;
|
||||
if (io_cancel_req_match(req, cd)) {
|
||||
*out_bucket = hb;
|
||||
return req;
|
||||
}
|
||||
}
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
|
@ -853,7 +851,8 @@ static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
|||
struct io_hash_bucket *bucket;
|
||||
struct io_kiocb *req;
|
||||
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP |
|
||||
IORING_ASYNC_CANCEL_ANY))
|
||||
req = io_poll_file_find(ctx, cd, table, &bucket);
|
||||
else
|
||||
req = io_poll_find(ctx, false, cd, table, &bucket);
|
||||
|
@ -970,12 +969,12 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
|
|||
int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update);
|
||||
struct io_cancel_data cd = { .data = poll_update->old_user_data, };
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_cancel_data cd = { .ctx = ctx, .data = poll_update->old_user_data, };
|
||||
struct io_hash_bucket *bucket;
|
||||
struct io_kiocb *preq;
|
||||
int ret2, ret = 0;
|
||||
bool locked = true;
|
||||
struct io_tw_state ts = { .locked = true };
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
|
||||
|
@ -1024,7 +1023,7 @@ found:
|
|||
|
||||
req_set_fail(preq);
|
||||
io_req_set_res(preq, -ECANCELED, 0);
|
||||
io_req_task_complete(preq, &locked);
|
||||
io_req_task_complete(preq, &ts);
|
||||
out:
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
if (ret < 0) {
|
||||
|
|
|
@ -38,3 +38,5 @@ bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
|
|||
bool cancel_all);
|
||||
|
||||
void io_apoll_cache_free(struct io_cache_entry *entry);
|
||||
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
|
|
419
io_uring/rsrc.c
419
io_uring/rsrc.c
|
@ -23,24 +23,21 @@ struct io_rsrc_update {
|
|||
u32 offset;
|
||||
};
|
||||
|
||||
static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
||||
struct io_mapped_ubuf **pimu,
|
||||
struct page **last_hpage);
|
||||
|
||||
#define IO_RSRC_REF_BATCH 100
|
||||
|
||||
/* only define max */
|
||||
#define IORING_MAX_FIXED_FILES (1U << 20)
|
||||
#define IORING_MAX_REG_BUFFERS (1U << 14)
|
||||
|
||||
void io_rsrc_refs_drop(struct io_ring_ctx *ctx)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
if (ctx->rsrc_cached_refs) {
|
||||
io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs);
|
||||
ctx->rsrc_cached_refs = 0;
|
||||
}
|
||||
}
|
||||
static const struct io_mapped_ubuf dummy_ubuf = {
|
||||
/* set invalid range, so io_import_fixed() fails meeting it */
|
||||
.ubuf = -1UL,
|
||||
.ubuf_end = 0,
|
||||
};
|
||||
|
||||
int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
|
||||
{
|
||||
|
@ -141,7 +138,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
|
|||
struct io_mapped_ubuf *imu = *slot;
|
||||
unsigned int i;
|
||||
|
||||
if (imu != ctx->dummy_ubuf) {
|
||||
if (imu != &dummy_ubuf) {
|
||||
for (i = 0; i < imu->nr_bvecs; i++)
|
||||
unpin_user_page(imu->bvec[i].bv_page);
|
||||
if (imu->acct_pages)
|
||||
|
@ -151,216 +148,129 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
|
|||
*slot = NULL;
|
||||
}
|
||||
|
||||
void io_rsrc_refs_refill(struct io_ring_ctx *ctx)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
static void io_rsrc_put_work(struct io_rsrc_node *node)
|
||||
{
|
||||
ctx->rsrc_cached_refs += IO_RSRC_REF_BATCH;
|
||||
percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH);
|
||||
}
|
||||
struct io_rsrc_put *prsrc = &node->item;
|
||||
|
||||
static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
|
||||
{
|
||||
struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
|
||||
struct io_ring_ctx *ctx = rsrc_data->ctx;
|
||||
struct io_rsrc_put *prsrc, *tmp;
|
||||
if (prsrc->tag)
|
||||
io_post_aux_cqe(node->ctx, prsrc->tag, 0, 0);
|
||||
|
||||
list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) {
|
||||
list_del(&prsrc->list);
|
||||
|
||||
if (prsrc->tag) {
|
||||
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
} else {
|
||||
io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
rsrc_data->do_put(ctx, prsrc);
|
||||
kfree(prsrc);
|
||||
}
|
||||
|
||||
io_rsrc_node_destroy(ref_node);
|
||||
if (atomic_dec_and_test(&rsrc_data->refs))
|
||||
complete(&rsrc_data->done);
|
||||
}
|
||||
|
||||
void io_rsrc_put_work(struct work_struct *work)
|
||||
{
|
||||
struct io_ring_ctx *ctx;
|
||||
struct llist_node *node;
|
||||
|
||||
ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
|
||||
node = llist_del_all(&ctx->rsrc_put_llist);
|
||||
|
||||
while (node) {
|
||||
struct io_rsrc_node *ref_node;
|
||||
struct llist_node *next = node->next;
|
||||
|
||||
ref_node = llist_entry(node, struct io_rsrc_node, llist);
|
||||
__io_rsrc_put_work(ref_node);
|
||||
node = next;
|
||||
switch (node->type) {
|
||||
case IORING_RSRC_FILE:
|
||||
io_rsrc_file_put(node->ctx, prsrc);
|
||||
break;
|
||||
case IORING_RSRC_BUFFER:
|
||||
io_rsrc_buf_put(node->ctx, prsrc);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void io_rsrc_put_tw(struct callback_head *cb)
|
||||
void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
{
|
||||
struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
|
||||
rsrc_put_tw);
|
||||
|
||||
io_rsrc_put_work(&ctx->rsrc_put_work.work);
|
||||
if (!io_alloc_cache_put(&ctx->rsrc_node_cache, &node->cache))
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
void io_wait_rsrc_data(struct io_rsrc_data *data)
|
||||
void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
|
||||
__must_hold(&node->ctx->uring_lock)
|
||||
{
|
||||
if (data && !atomic_dec_and_test(&data->refs))
|
||||
wait_for_completion(&data->done);
|
||||
}
|
||||
|
||||
void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
|
||||
{
|
||||
percpu_ref_exit(&ref_node->refs);
|
||||
kfree(ref_node);
|
||||
}
|
||||
|
||||
static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
|
||||
{
|
||||
struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
|
||||
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
|
||||
unsigned long flags;
|
||||
bool first_add = false;
|
||||
unsigned long delay = HZ;
|
||||
|
||||
spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
|
||||
node->done = true;
|
||||
|
||||
/* if we are mid-quiesce then do not delay */
|
||||
if (node->rsrc_data->quiesce)
|
||||
delay = 0;
|
||||
struct io_ring_ctx *ctx = node->ctx;
|
||||
|
||||
while (!list_empty(&ctx->rsrc_ref_list)) {
|
||||
node = list_first_entry(&ctx->rsrc_ref_list,
|
||||
struct io_rsrc_node, node);
|
||||
/* recycle ref nodes in order */
|
||||
if (!node->done)
|
||||
if (node->refs)
|
||||
break;
|
||||
list_del(&node->node);
|
||||
first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
|
||||
}
|
||||
spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
|
||||
|
||||
if (!first_add)
|
||||
return;
|
||||
|
||||
if (ctx->submitter_task) {
|
||||
if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw,
|
||||
ctx->notify_method))
|
||||
return;
|
||||
if (likely(!node->empty))
|
||||
io_rsrc_put_work(node);
|
||||
io_rsrc_node_destroy(ctx, node);
|
||||
}
|
||||
mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
|
||||
if (list_empty(&ctx->rsrc_ref_list) && unlikely(ctx->rsrc_quiesce))
|
||||
wake_up_all(&ctx->rsrc_quiesce_wq);
|
||||
}
|
||||
|
||||
static struct io_rsrc_node *io_rsrc_node_alloc(void)
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rsrc_node *ref_node;
|
||||
struct io_cache_entry *entry;
|
||||
|
||||
ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
|
||||
if (!ref_node)
|
||||
return NULL;
|
||||
|
||||
if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
|
||||
0, GFP_KERNEL)) {
|
||||
kfree(ref_node);
|
||||
return NULL;
|
||||
entry = io_alloc_cache_get(&ctx->rsrc_node_cache);
|
||||
if (entry) {
|
||||
ref_node = container_of(entry, struct io_rsrc_node, cache);
|
||||
} else {
|
||||
ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
|
||||
if (!ref_node)
|
||||
return NULL;
|
||||
}
|
||||
INIT_LIST_HEAD(&ref_node->node);
|
||||
INIT_LIST_HEAD(&ref_node->rsrc_list);
|
||||
ref_node->done = false;
|
||||
|
||||
ref_node->ctx = ctx;
|
||||
ref_node->empty = 0;
|
||||
ref_node->refs = 1;
|
||||
return ref_node;
|
||||
}
|
||||
|
||||
void io_rsrc_node_switch(struct io_ring_ctx *ctx,
|
||||
struct io_rsrc_data *data_to_kill)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
WARN_ON_ONCE(!ctx->rsrc_backup_node);
|
||||
WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
|
||||
|
||||
io_rsrc_refs_drop(ctx);
|
||||
|
||||
if (data_to_kill) {
|
||||
struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
|
||||
|
||||
rsrc_node->rsrc_data = data_to_kill;
|
||||
spin_lock_irq(&ctx->rsrc_ref_lock);
|
||||
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
|
||||
spin_unlock_irq(&ctx->rsrc_ref_lock);
|
||||
|
||||
atomic_inc(&data_to_kill->refs);
|
||||
percpu_ref_kill(&rsrc_node->refs);
|
||||
ctx->rsrc_node = NULL;
|
||||
}
|
||||
|
||||
if (!ctx->rsrc_node) {
|
||||
ctx->rsrc_node = ctx->rsrc_backup_node;
|
||||
ctx->rsrc_backup_node = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (ctx->rsrc_backup_node)
|
||||
return 0;
|
||||
ctx->rsrc_backup_node = io_rsrc_node_alloc();
|
||||
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
__cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
|
||||
struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rsrc_node *backup;
|
||||
DEFINE_WAIT(we);
|
||||
int ret;
|
||||
|
||||
/* As we may drop ->uring_lock, other task may have started quiesce */
|
||||
/* As We may drop ->uring_lock, other task may have started quiesce */
|
||||
if (data->quiesce)
|
||||
return -ENXIO;
|
||||
ret = io_rsrc_node_switch_start(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
io_rsrc_node_switch(ctx, data);
|
||||
|
||||
/* kill initial ref, already quiesced if zero */
|
||||
if (atomic_dec_and_test(&data->refs))
|
||||
backup = io_rsrc_node_alloc(ctx);
|
||||
if (!backup)
|
||||
return -ENOMEM;
|
||||
ctx->rsrc_node->empty = true;
|
||||
ctx->rsrc_node->type = -1;
|
||||
list_add_tail(&ctx->rsrc_node->node, &ctx->rsrc_ref_list);
|
||||
io_put_rsrc_node(ctx, ctx->rsrc_node);
|
||||
ctx->rsrc_node = backup;
|
||||
|
||||
if (list_empty(&ctx->rsrc_ref_list))
|
||||
return 0;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
||||
atomic_set(&ctx->cq_wait_nr, 1);
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
ctx->rsrc_quiesce++;
|
||||
data->quiesce = true;
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
do {
|
||||
prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
|
||||
ret = io_run_task_work_sig(ctx);
|
||||
if (ret < 0) {
|
||||
atomic_inc(&data->refs);
|
||||
/* wait for all works potentially completing data->done */
|
||||
flush_delayed_work(&ctx->rsrc_put_work);
|
||||
reinit_completion(&data->done);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (list_empty(&ctx->rsrc_ref_list))
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
flush_delayed_work(&ctx->rsrc_put_work);
|
||||
ret = wait_for_completion_interruptible(&data->done);
|
||||
if (!ret) {
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (atomic_read(&data->refs) <= 0)
|
||||
break;
|
||||
/*
|
||||
* it has been revived by another thread while
|
||||
* we were unlocked
|
||||
*/
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
}
|
||||
} while (1);
|
||||
data->quiesce = false;
|
||||
schedule();
|
||||
__set_current_state(TASK_RUNNING);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = 0;
|
||||
} while (!list_empty(&ctx->rsrc_ref_list));
|
||||
|
||||
finish_wait(&ctx->rsrc_quiesce_wq, &we);
|
||||
data->quiesce = false;
|
||||
ctx->rsrc_quiesce--;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
||||
atomic_set(&ctx->cq_wait_nr, 0);
|
||||
smp_mb();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -405,12 +315,12 @@ static __cold void **io_alloc_page_table(size_t size)
|
|||
return table;
|
||||
}
|
||||
|
||||
__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
|
||||
rsrc_put_fn *do_put, u64 __user *utags,
|
||||
__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, int type,
|
||||
u64 __user *utags,
|
||||
unsigned nr, struct io_rsrc_data **pdata)
|
||||
{
|
||||
struct io_rsrc_data *data;
|
||||
int ret = -ENOMEM;
|
||||
int ret = 0;
|
||||
unsigned i;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
|
@ -424,7 +334,7 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
|
|||
|
||||
data->nr = nr;
|
||||
data->ctx = ctx;
|
||||
data->do_put = do_put;
|
||||
data->rsrc_type = type;
|
||||
if (utags) {
|
||||
ret = -EFAULT;
|
||||
for (i = 0; i < nr; i++) {
|
||||
|
@ -435,9 +345,6 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
|
|||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
atomic_set(&data->refs, 1);
|
||||
init_completion(&data->done);
|
||||
*pdata = data;
|
||||
return 0;
|
||||
fail:
|
||||
|
@ -453,10 +360,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
|||
__s32 __user *fds = u64_to_user_ptr(up->data);
|
||||
struct io_rsrc_data *data = ctx->file_data;
|
||||
struct io_fixed_file *file_slot;
|
||||
struct file *file;
|
||||
int fd, i, err = 0;
|
||||
unsigned int done;
|
||||
bool needs_switch = false;
|
||||
|
||||
if (!ctx->file_data)
|
||||
return -ENXIO;
|
||||
|
@ -482,16 +387,16 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
|||
file_slot = io_fixed_file_slot(&ctx->file_table, i);
|
||||
|
||||
if (file_slot->file_ptr) {
|
||||
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||
err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file);
|
||||
err = io_queue_rsrc_removal(data, i,
|
||||
io_slot_file(file_slot));
|
||||
if (err)
|
||||
break;
|
||||
file_slot->file_ptr = 0;
|
||||
io_file_bitmap_clear(&ctx->file_table, i);
|
||||
needs_switch = true;
|
||||
}
|
||||
if (fd != -1) {
|
||||
file = fget(fd);
|
||||
struct file *file = fget(fd);
|
||||
|
||||
if (!file) {
|
||||
err = -EBADF;
|
||||
break;
|
||||
|
@ -519,9 +424,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
|||
io_file_bitmap_set(&ctx->file_table, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_switch)
|
||||
io_rsrc_node_switch(ctx, data);
|
||||
return done ? done : err;
|
||||
}
|
||||
|
||||
|
@ -532,7 +434,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
|||
u64 __user *tags = u64_to_user_ptr(up->tags);
|
||||
struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
|
||||
struct page *last_hpage = NULL;
|
||||
bool needs_switch = false;
|
||||
__u32 done;
|
||||
int i, err;
|
||||
|
||||
|
@ -543,7 +444,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
|||
|
||||
for (done = 0; done < nr_args; done++) {
|
||||
struct io_mapped_ubuf *imu;
|
||||
int offset = up->offset + done;
|
||||
u64 tag = 0;
|
||||
|
||||
err = io_copy_iov(ctx, &iov, iovs, done);
|
||||
|
@ -564,24 +464,20 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
|||
if (err)
|
||||
break;
|
||||
|
||||
i = array_index_nospec(offset, ctx->nr_user_bufs);
|
||||
if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
|
||||
i = array_index_nospec(up->offset + done, ctx->nr_user_bufs);
|
||||
if (ctx->user_bufs[i] != &dummy_ubuf) {
|
||||
err = io_queue_rsrc_removal(ctx->buf_data, i,
|
||||
ctx->rsrc_node, ctx->user_bufs[i]);
|
||||
ctx->user_bufs[i]);
|
||||
if (unlikely(err)) {
|
||||
io_buffer_unmap(ctx, &imu);
|
||||
break;
|
||||
}
|
||||
ctx->user_bufs[i] = ctx->dummy_ubuf;
|
||||
needs_switch = true;
|
||||
ctx->user_bufs[i] = (struct io_mapped_ubuf *)&dummy_ubuf;
|
||||
}
|
||||
|
||||
ctx->user_bufs[i] = imu;
|
||||
*io_get_tag_slot(ctx->buf_data, i) = tag;
|
||||
}
|
||||
|
||||
if (needs_switch)
|
||||
io_rsrc_node_switch(ctx, ctx->buf_data);
|
||||
return done ? done : err;
|
||||
}
|
||||
|
||||
|
@ -590,13 +486,11 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
|
|||
unsigned nr_args)
|
||||
{
|
||||
__u32 tmp;
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
if (check_add_overflow(up->offset, nr_args, &tmp))
|
||||
return -EOVERFLOW;
|
||||
err = io_rsrc_node_switch_start(ctx);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
switch (type) {
|
||||
case IORING_RSRC_FILE:
|
||||
|
@ -753,20 +647,24 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
|
|||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
|
||||
struct io_rsrc_node *node, void *rsrc)
|
||||
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc)
|
||||
{
|
||||
struct io_ring_ctx *ctx = data->ctx;
|
||||
struct io_rsrc_node *node = ctx->rsrc_node;
|
||||
u64 *tag_slot = io_get_tag_slot(data, idx);
|
||||
struct io_rsrc_put *prsrc;
|
||||
|
||||
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
|
||||
if (!prsrc)
|
||||
ctx->rsrc_node = io_rsrc_node_alloc(ctx);
|
||||
if (unlikely(!ctx->rsrc_node)) {
|
||||
ctx->rsrc_node = node;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
prsrc->tag = *tag_slot;
|
||||
node->item.rsrc = rsrc;
|
||||
node->type = data->rsrc_type;
|
||||
node->item.tag = *tag_slot;
|
||||
*tag_slot = 0;
|
||||
prsrc->rsrc = rsrc;
|
||||
list_add(&prsrc->list, &node->rsrc_list);
|
||||
list_add_tail(&node->node, &ctx->rsrc_ref_list);
|
||||
io_put_rsrc_node(ctx, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -881,20 +779,14 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
|
||||
static __cold void io_rsrc_file_scm_put(struct io_ring_ctx *ctx, struct file *file)
|
||||
{
|
||||
struct file *file = prsrc->file;
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct sock *sock = ctx->ring_sock->sk;
|
||||
struct sk_buff_head list, *head = &sock->sk_receive_queue;
|
||||
struct sk_buff *skb;
|
||||
int i;
|
||||
|
||||
if (!io_file_need_scm(file)) {
|
||||
fput(file);
|
||||
return;
|
||||
}
|
||||
|
||||
__skb_queue_head_init(&list);
|
||||
|
||||
/*
|
||||
|
@ -944,11 +836,19 @@ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
|
|||
__skb_queue_tail(head, skb);
|
||||
spin_unlock_irq(&head->lock);
|
||||
}
|
||||
#else
|
||||
fput(file);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
|
||||
{
|
||||
struct file *file = prsrc->file;
|
||||
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
fput(file);
|
||||
else
|
||||
io_rsrc_file_scm_put(ctx, file);
|
||||
}
|
||||
|
||||
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args, u64 __user *tags)
|
||||
{
|
||||
|
@ -965,10 +865,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||
return -EMFILE;
|
||||
if (nr_args > rlimit(RLIMIT_NOFILE))
|
||||
return -EMFILE;
|
||||
ret = io_rsrc_node_switch_start(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args,
|
||||
ret = io_rsrc_data_alloc(ctx, IORING_RSRC_FILE, tags, nr_args,
|
||||
&ctx->file_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -1022,7 +919,6 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||
|
||||
/* default it to the whole table */
|
||||
io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files);
|
||||
io_rsrc_node_switch(ctx, NULL);
|
||||
return 0;
|
||||
fail:
|
||||
__io_sqe_files_unregister(ctx);
|
||||
|
@ -1207,8 +1103,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
|||
unsigned long off;
|
||||
size_t size;
|
||||
int ret, nr_pages, i;
|
||||
struct folio *folio = NULL;
|
||||
|
||||
*pimu = ctx->dummy_ubuf;
|
||||
*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
|
||||
if (!iov->iov_base)
|
||||
return 0;
|
||||
|
||||
|
@ -1221,6 +1118,32 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
|||
goto done;
|
||||
}
|
||||
|
||||
/* If it's a huge page, try to coalesce them into a single bvec entry */
|
||||
if (nr_pages > 1) {
|
||||
folio = page_folio(pages[0]);
|
||||
for (i = 1; i < nr_pages; i++) {
|
||||
/*
|
||||
* Pages must be consecutive and on the same folio for
|
||||
* this to work
|
||||
*/
|
||||
if (page_folio(pages[i]) != folio ||
|
||||
pages[i] != pages[i - 1] + 1) {
|
||||
folio = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (folio) {
|
||||
/*
|
||||
* The pages are bound to the folio, it doesn't
|
||||
* actually unpin them but drops all but one reference,
|
||||
* which is usually put down by io_buffer_unmap().
|
||||
* Note, needs a better helper.
|
||||
*/
|
||||
unpin_user_pages(&pages[1], nr_pages - 1);
|
||||
nr_pages = 1;
|
||||
}
|
||||
}
|
||||
|
||||
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
|
||||
if (!imu)
|
||||
goto done;
|
||||
|
@ -1233,22 +1156,25 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
|||
|
||||
off = (unsigned long) iov->iov_base & ~PAGE_MASK;
|
||||
size = iov->iov_len;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
size_t vec_len;
|
||||
|
||||
vec_len = min_t(size_t, size, PAGE_SIZE - off);
|
||||
imu->bvec[i].bv_page = pages[i];
|
||||
imu->bvec[i].bv_len = vec_len;
|
||||
imu->bvec[i].bv_offset = off;
|
||||
off = 0;
|
||||
size -= vec_len;
|
||||
}
|
||||
/* store original address for later verification */
|
||||
imu->ubuf = (unsigned long) iov->iov_base;
|
||||
imu->ubuf_end = imu->ubuf + iov->iov_len;
|
||||
imu->nr_bvecs = nr_pages;
|
||||
*pimu = imu;
|
||||
ret = 0;
|
||||
|
||||
if (folio) {
|
||||
bvec_set_page(&imu->bvec[0], pages[0], size, off);
|
||||
goto done;
|
||||
}
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
size_t vec_len;
|
||||
|
||||
vec_len = min_t(size_t, size, PAGE_SIZE - off);
|
||||
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
|
||||
off = 0;
|
||||
size -= vec_len;
|
||||
}
|
||||
done:
|
||||
if (ret)
|
||||
kvfree(imu);
|
||||
|
@ -1276,10 +1202,7 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||
return -EBUSY;
|
||||
if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
|
||||
return -EINVAL;
|
||||
ret = io_rsrc_node_switch_start(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data);
|
||||
ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, tags, nr_args, &data);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = io_buffers_map_alloc(ctx, nr_args);
|
||||
|
@ -1316,8 +1239,6 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||
ctx->buf_data = data;
|
||||
if (ret)
|
||||
__io_sqe_buffers_unregister(ctx);
|
||||
else
|
||||
io_rsrc_node_switch(ctx, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1337,7 +1258,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
|||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* May not be a start of buffer, set size appropriately
|
||||
* Might not be a start of buffer, set size appropriately
|
||||
* and advance us to the beginning.
|
||||
*/
|
||||
offset = buf_addr - imu->ubuf;
|
||||
|
@ -1363,7 +1284,15 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
|||
const struct bio_vec *bvec = imu->bvec;
|
||||
|
||||
if (offset <= bvec->bv_len) {
|
||||
iov_iter_advance(iter, offset);
|
||||
/*
|
||||
* Note, huge pages buffers consists of one large
|
||||
* bvec entry and should always go this way. The other
|
||||
* branch doesn't expect non PAGE_SIZE'd chunks.
|
||||
*/
|
||||
iter->bvec = bvec;
|
||||
iter->nr_segs = bvec->bv_len;
|
||||
iter->count -= offset;
|
||||
iter->iov_offset = offset;
|
||||
} else {
|
||||
unsigned long seg_skip;
|
||||
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
|
||||
#include <net/af_unix.h>
|
||||
|
||||
#include "alloc_cache.h"
|
||||
|
||||
#define IO_NODE_ALLOC_CACHE_MAX 32
|
||||
|
||||
#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
|
||||
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
|
||||
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
|
||||
|
@ -14,7 +18,6 @@ enum {
|
|||
};
|
||||
|
||||
struct io_rsrc_put {
|
||||
struct list_head list;
|
||||
u64 tag;
|
||||
union {
|
||||
void *rsrc;
|
||||
|
@ -30,19 +33,20 @@ struct io_rsrc_data {
|
|||
|
||||
u64 **tags;
|
||||
unsigned int nr;
|
||||
rsrc_put_fn *do_put;
|
||||
atomic_t refs;
|
||||
struct completion done;
|
||||
u16 rsrc_type;
|
||||
bool quiesce;
|
||||
};
|
||||
|
||||
struct io_rsrc_node {
|
||||
struct percpu_ref refs;
|
||||
union {
|
||||
struct io_cache_entry cache;
|
||||
struct io_ring_ctx *ctx;
|
||||
};
|
||||
int refs;
|
||||
bool empty;
|
||||
u16 type;
|
||||
struct list_head node;
|
||||
struct list_head rsrc_list;
|
||||
struct io_rsrc_data *rsrc_data;
|
||||
struct llist_node llist;
|
||||
bool done;
|
||||
struct io_rsrc_put item;
|
||||
};
|
||||
|
||||
struct io_mapped_ubuf {
|
||||
|
@ -53,17 +57,10 @@ struct io_mapped_ubuf {
|
|||
struct bio_vec bvec[];
|
||||
};
|
||||
|
||||
void io_rsrc_put_tw(struct callback_head *cb);
|
||||
void io_rsrc_put_work(struct work_struct *work);
|
||||
void io_rsrc_refs_refill(struct io_ring_ctx *ctx);
|
||||
void io_wait_rsrc_data(struct io_rsrc_data *data);
|
||||
void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);
|
||||
void io_rsrc_refs_drop(struct io_ring_ctx *ctx);
|
||||
int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
|
||||
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
|
||||
struct io_rsrc_node *node, void *rsrc);
|
||||
void io_rsrc_node_switch(struct io_ring_ctx *ctx,
|
||||
struct io_rsrc_data *data_to_kill);
|
||||
void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
|
||||
void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *ref_node);
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
|
||||
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc);
|
||||
|
||||
int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
struct io_mapped_ubuf *imu,
|
||||
|
@ -107,36 +104,24 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
|
|||
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int size, unsigned int type);
|
||||
|
||||
static inline void io_rsrc_put_node(struct io_rsrc_node *node, int nr)
|
||||
static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
{
|
||||
percpu_ref_put_many(&node->refs, nr);
|
||||
}
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
static inline void io_req_put_rsrc(struct io_kiocb *req)
|
||||
{
|
||||
if (req->rsrc_node)
|
||||
io_rsrc_put_node(req->rsrc_node, 1);
|
||||
if (node && !--node->refs)
|
||||
io_rsrc_node_ref_zero(node);
|
||||
}
|
||||
|
||||
static inline void io_req_put_rsrc_locked(struct io_kiocb *req,
|
||||
struct io_ring_ctx *ctx)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct io_rsrc_node *node = req->rsrc_node;
|
||||
|
||||
if (node) {
|
||||
if (node == ctx->rsrc_node)
|
||||
ctx->rsrc_cached_refs++;
|
||||
else
|
||||
io_rsrc_put_node(node, 1);
|
||||
}
|
||||
io_put_rsrc_node(ctx, req->rsrc_node);
|
||||
}
|
||||
|
||||
static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx)
|
||||
static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx,
|
||||
struct io_rsrc_node *node)
|
||||
{
|
||||
ctx->rsrc_cached_refs--;
|
||||
if (unlikely(ctx->rsrc_cached_refs < 0))
|
||||
io_rsrc_refs_refill(ctx);
|
||||
node->refs++;
|
||||
}
|
||||
|
||||
static inline void io_req_set_rsrc_node(struct io_kiocb *req,
|
||||
|
@ -149,7 +134,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
|
|||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
req->rsrc_node = ctx->rsrc_node;
|
||||
io_charge_rsrc_node(ctx);
|
||||
io_charge_rsrc_node(ctx, ctx->rsrc_node);
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +147,12 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
|
|||
return &data->tags[table_idx][off];
|
||||
}
|
||||
|
||||
static inline int io_rsrc_init(struct io_ring_ctx *ctx)
|
||||
{
|
||||
ctx->rsrc_node = io_rsrc_node_alloc(ctx);
|
||||
return ctx->rsrc_node ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
|
||||
|
|
100
io_uring/rw.c
100
io_uring/rw.c
|
@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
} else {
|
||||
rw->kiocb.ki_ioprio = get_current_ioprio();
|
||||
}
|
||||
rw->kiocb.dio_complete = NULL;
|
||||
|
||||
rw->addr = READ_ONCE(sqe->addr);
|
||||
rw->len = READ_ONCE(sqe->len);
|
||||
|
@ -283,16 +284,25 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
|
|||
return res;
|
||||
}
|
||||
|
||||
static void io_req_rw_complete(struct io_kiocb *req, bool *locked)
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
struct kiocb *kiocb = &rw->kiocb;
|
||||
|
||||
if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
|
||||
long res = kiocb->dio_complete(rw->kiocb.private);
|
||||
|
||||
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
||||
}
|
||||
|
||||
io_req_io_end(req);
|
||||
|
||||
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
|
||||
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
|
||||
req->cqe.flags |= io_put_kbuf(req, issue_flags);
|
||||
}
|
||||
io_req_task_complete(req, locked);
|
||||
io_req_task_complete(req, ts);
|
||||
}
|
||||
|
||||
static void io_complete_rw(struct kiocb *kiocb, long res)
|
||||
|
@ -300,11 +310,13 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
|
|||
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(rw);
|
||||
|
||||
if (__io_complete_rw_common(req, res))
|
||||
return;
|
||||
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
||||
if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
|
||||
if (__io_complete_rw_common(req, res))
|
||||
return;
|
||||
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
|
||||
}
|
||||
req->io_task_work.func = io_req_rw_complete;
|
||||
io_req_task_work_add(req);
|
||||
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
|
||||
|
@ -332,7 +344,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
|
|||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
unsigned final_ret = io_fixup_rw_res(req, ret);
|
||||
|
||||
if (req->flags & REQ_F_CUR_POS)
|
||||
if (ret >= 0 && req->flags & REQ_F_CUR_POS)
|
||||
req->file->f_pos = rw->kiocb.ki_pos;
|
||||
if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
|
||||
if (!__io_complete_rw_common(req, ret)) {
|
||||
|
@ -391,7 +403,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
|
|||
rw->len = sqe_len;
|
||||
}
|
||||
|
||||
ret = import_single_range(ddir, buf, sqe_len, s->fast_iov, iter);
|
||||
ret = import_ubuf(ddir, buf, sqe_len, iter);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
return NULL;
|
||||
|
@ -410,7 +422,7 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req,
|
|||
unsigned int issue_flags)
|
||||
{
|
||||
*iovec = __io_import_iovec(rw, req, s, issue_flags);
|
||||
if (unlikely(IS_ERR(*iovec)))
|
||||
if (IS_ERR(*iovec))
|
||||
return PTR_ERR(*iovec);
|
||||
|
||||
iov_iter_save_state(&s->iter, &s->iter_state);
|
||||
|
@ -447,23 +459,25 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
|
|||
ppos = io_kiocb_ppos(kiocb);
|
||||
|
||||
while (iov_iter_count(iter)) {
|
||||
struct iovec iovec;
|
||||
void __user *addr;
|
||||
size_t len;
|
||||
ssize_t nr;
|
||||
|
||||
if (!iov_iter_is_bvec(iter)) {
|
||||
iovec = iov_iter_iovec(iter);
|
||||
if (iter_is_ubuf(iter)) {
|
||||
addr = iter->ubuf + iter->iov_offset;
|
||||
len = iov_iter_count(iter);
|
||||
} else if (!iov_iter_is_bvec(iter)) {
|
||||
addr = iter_iov_addr(iter);
|
||||
len = iter_iov_len(iter);
|
||||
} else {
|
||||
iovec.iov_base = u64_to_user_ptr(rw->addr);
|
||||
iovec.iov_len = rw->len;
|
||||
addr = u64_to_user_ptr(rw->addr);
|
||||
len = rw->len;
|
||||
}
|
||||
|
||||
if (ddir == READ) {
|
||||
nr = file->f_op->read(file, iovec.iov_base,
|
||||
iovec.iov_len, ppos);
|
||||
} else {
|
||||
nr = file->f_op->write(file, iovec.iov_base,
|
||||
iovec.iov_len, ppos);
|
||||
}
|
||||
if (ddir == READ)
|
||||
nr = file->f_op->read(file, addr, len, ppos);
|
||||
else
|
||||
nr = file->f_op->write(file, addr, len, ppos);
|
||||
|
||||
if (nr < 0) {
|
||||
if (!ret)
|
||||
|
@ -479,7 +493,7 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
|
|||
if (!rw->len)
|
||||
break;
|
||||
}
|
||||
if (nr != iovec.iov_len)
|
||||
if (nr != len)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -495,15 +509,15 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
|
|||
io->free_iovec = iovec;
|
||||
io->bytes_done = 0;
|
||||
/* can only be fixed buffers, no need to do anything */
|
||||
if (iov_iter_is_bvec(iter))
|
||||
if (iov_iter_is_bvec(iter) || iter_is_ubuf(iter))
|
||||
return;
|
||||
if (!iovec) {
|
||||
unsigned iov_off = 0;
|
||||
|
||||
io->s.iter.iov = io->s.fast_iov;
|
||||
if (iter->iov != fast_iov) {
|
||||
iov_off = iter->iov - fast_iov;
|
||||
io->s.iter.iov += iov_off;
|
||||
io->s.iter.__iov = io->s.fast_iov;
|
||||
if (iter->__iov != fast_iov) {
|
||||
iov_off = iter_iov(iter) - fast_iov;
|
||||
io->s.iter.__iov += iov_off;
|
||||
}
|
||||
if (io->s.fast_iov != fast_iov)
|
||||
memcpy(io->s.fast_iov + iov_off, fast_iov + iov_off,
|
||||
|
@ -516,7 +530,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
|
|||
static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
|
||||
struct io_rw_state *s, bool force)
|
||||
{
|
||||
if (!force && !io_op_defs[req->opcode].prep_async)
|
||||
if (!force && !io_cold_defs[req->opcode].prep_async)
|
||||
return 0;
|
||||
if (!req_has_async_data(req)) {
|
||||
struct io_async_rw *iorw;
|
||||
|
@ -664,8 +678,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
|
|||
if (unlikely(!file || !(file->f_mode & mode)))
|
||||
return -EBADF;
|
||||
|
||||
if (!io_req_ffs_set(req))
|
||||
req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
|
||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
||||
req->flags |= io_file_get_flags(file);
|
||||
|
||||
kiocb->ki_flags = iocb_flags(file);
|
||||
ret = kiocb_set_rw_flags(kiocb, rw->flags);
|
||||
|
@ -981,13 +995,6 @@ copy_iov:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
|
||||
{
|
||||
io_commit_cqring_flush(ctx);
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL)
|
||||
io_cqring_wake(ctx);
|
||||
}
|
||||
|
||||
void io_rw_fail(struct io_kiocb *req)
|
||||
{
|
||||
int res;
|
||||
|
@ -1058,24 +1065,17 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
|||
if (!smp_load_acquire(&req->iopoll_completed))
|
||||
break;
|
||||
nr_events++;
|
||||
if (unlikely(req->flags & REQ_F_CQE_SKIP))
|
||||
continue;
|
||||
|
||||
req->cqe.flags = io_put_kbuf(req, 0);
|
||||
if (unlikely(!__io_fill_cqe_req(ctx, req))) {
|
||||
spin_lock(&ctx->completion_lock);
|
||||
io_req_cqe_overflow(req);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!nr_events))
|
||||
return 0;
|
||||
|
||||
io_commit_cqring(ctx);
|
||||
io_cqring_ev_posted_iopoll(ctx);
|
||||
pos = start ? start->next : ctx->iopoll_list.first;
|
||||
wq_list_cut(&ctx->iopoll_list, prev, start);
|
||||
io_free_batch_list(ctx, pos);
|
||||
|
||||
if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
|
||||
return 0;
|
||||
ctx->submit_state.compl_reqs.first = pos;
|
||||
__io_submit_flush_completions(ctx);
|
||||
return nr_events;
|
||||
}
|
||||
|
|
|
@ -22,3 +22,4 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags);
|
|||
int io_writev_prep_async(struct io_kiocb *req);
|
||||
void io_readv_writev_cleanup(struct io_kiocb *req);
|
||||
void io_rw_fail(struct io_kiocb *req);
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#define __wq_list_for_each(pos, head) \
|
||||
for (pos = (head)->first; pos; pos = (pos)->next)
|
||||
|
||||
#define wq_list_for_each(pos, prv, head) \
|
||||
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
|
||||
|
||||
|
@ -27,28 +30,6 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
|
|||
list->last = node;
|
||||
}
|
||||
|
||||
/**
|
||||
* wq_list_merge - merge the second list to the first one.
|
||||
* @list0: the first list
|
||||
* @list1: the second list
|
||||
* Return the first node after mergence.
|
||||
*/
|
||||
static inline struct io_wq_work_node *wq_list_merge(struct io_wq_work_list *list0,
|
||||
struct io_wq_work_list *list1)
|
||||
{
|
||||
struct io_wq_work_node *ret;
|
||||
|
||||
if (!list0->first) {
|
||||
ret = list1->first;
|
||||
} else {
|
||||
ret = list0->first;
|
||||
list0->last->next = list1->first;
|
||||
}
|
||||
INIT_WQ_LIST(list0);
|
||||
INIT_WQ_LIST(list1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||
struct io_wq_work_list *list)
|
||||
{
|
||||
|
@ -135,4 +116,4 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
|
|||
return container_of(work->list.next, struct io_wq_work, list);
|
||||
}
|
||||
|
||||
#endif // INTERNAL_IO_SLIST_H
|
||||
#endif // INTERNAL_IO_SLIST_H
|
||||
|
|
|
@ -34,6 +34,7 @@ static int __io_splice_prep(struct io_kiocb *req,
|
|||
if (unlikely(sp->flags & ~valid_flags))
|
||||
return -EINVAL;
|
||||
sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -52,8 +53,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct file *in;
|
||||
long ret = 0;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
if (sp->flags & SPLICE_F_FD_IN_FIXED)
|
||||
in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags);
|
||||
|
@ -68,7 +68,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = do_tee(in, out, sp->len, flags);
|
||||
|
||||
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
|
||||
io_put_file(in);
|
||||
fput(in);
|
||||
done:
|
||||
if (ret != sp->len)
|
||||
req_set_fail(req);
|
||||
|
@ -94,8 +94,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct file *in;
|
||||
long ret = 0;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
if (sp->flags & SPLICE_F_FD_IN_FIXED)
|
||||
in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags);
|
||||
|
@ -113,7 +112,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
|
||||
|
||||
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
|
||||
io_put_file(in);
|
||||
fput(in);
|
||||
done:
|
||||
if (ret != sp->len)
|
||||
req_set_fail(req);
|
||||
|
|
|
@ -255,9 +255,13 @@ static int io_sq_thread(void *data)
|
|||
sqt_spin = true;
|
||||
|
||||
if (sqt_spin || !time_after(jiffies, timeout)) {
|
||||
cond_resched();
|
||||
if (sqt_spin)
|
||||
timeout = jiffies + sqd->sq_thread_idle;
|
||||
if (unlikely(need_resched())) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
cond_resched();
|
||||
mutex_lock(&sqd->lock);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -311,7 +315,7 @@ static int io_sq_thread(void *data)
|
|||
do_exit(0);
|
||||
}
|
||||
|
||||
int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
|
||||
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
|
@ -326,7 +330,6 @@ int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
|
|||
} while (!signal_pending(current));
|
||||
|
||||
finish_wait(&ctx->sqo_sq_wait, &wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__cold int io_sq_offload_create(struct io_ring_ctx *ctx,
|
||||
|
@ -418,3 +421,20 @@ err:
|
|||
io_sq_thread_finish(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
|
||||
cpumask_var_t mask)
|
||||
{
|
||||
struct io_sq_data *sqd = ctx->sq_data;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (sqd) {
|
||||
io_sq_thread_park(sqd);
|
||||
/* Don't set affinity for a dying thread */
|
||||
if (sqd->thread)
|
||||
ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
|
||||
io_sq_thread_unpark(sqd);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -26,4 +26,5 @@ void io_sq_thread_stop(struct io_sq_data *sqd);
|
|||
void io_sq_thread_park(struct io_sq_data *sqd);
|
||||
void io_sq_thread_unpark(struct io_sq_data *sqd);
|
||||
void io_put_sq_data(struct io_sq_data *sqd);
|
||||
int io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
||||
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
||||
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
|
||||
|
|
|
@ -48,6 +48,7 @@ int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -56,8 +57,7 @@ int io_statx(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_statx *sx = io_kiocb_to_cmd(req, struct io_statx);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_statx(sx->dfd, sx->filename, sx->flags, sx->mask, sx->buffer);
|
||||
io_req_set_res(req, ret, 0);
|
||||
|
|
|
@ -32,6 +32,8 @@ int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
sync->off = READ_ONCE(sqe->off);
|
||||
sync->len = READ_ONCE(sqe->len);
|
||||
sync->flags = READ_ONCE(sqe->sync_range_flags);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -41,8 +43,7 @@ int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
|
|||
int ret;
|
||||
|
||||
/* sync_file_range always requires a blocking context */
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = sync_file_range(req->file, sync->off, sync->len, sync->flags);
|
||||
io_req_set_res(req, ret, 0);
|
||||
|
@ -62,6 +63,7 @@ int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
|
||||
sync->off = READ_ONCE(sqe->off);
|
||||
sync->len = READ_ONCE(sqe->len);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -72,8 +74,7 @@ int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
|
|||
int ret;
|
||||
|
||||
/* fsync always requires a blocking context */
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = vfs_fsync_range(req->file, sync->off, end > 0 ? end : LLONG_MAX,
|
||||
sync->flags & IORING_FSYNC_DATASYNC);
|
||||
|
@ -91,6 +92,7 @@ int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
sync->off = READ_ONCE(sqe->off);
|
||||
sync->len = READ_ONCE(sqe->addr);
|
||||
sync->mode = READ_ONCE(sqe->len);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -100,8 +102,8 @@ int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
|
|||
int ret;
|
||||
|
||||
/* fallocate always requiring blocking context */
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = vfs_fallocate(req->file, sync->mode, sync->off, sync->len);
|
||||
if (ret >= 0)
|
||||
fsnotify_modify(req->file);
|
||||
|
|
|
@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
|
|||
|
||||
xa_init(&tctx->xa);
|
||||
init_waitqueue_head(&tctx->wait);
|
||||
atomic_set(&tctx->in_idle, 0);
|
||||
atomic_set(&tctx->in_cancel, 0);
|
||||
atomic_set(&tctx->inflight_tracked, 0);
|
||||
task->io_uring = tctx;
|
||||
init_llist_head(&tctx->task_list);
|
||||
|
@ -208,29 +208,38 @@ void io_uring_unreg_ringfd(void)
|
|||
}
|
||||
}
|
||||
|
||||
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end)
|
||||
{
|
||||
int offset;
|
||||
for (offset = start; offset < end; offset++) {
|
||||
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
|
||||
if (tctx->registered_rings[offset])
|
||||
continue;
|
||||
|
||||
tctx->registered_rings[offset] = file;
|
||||
return offset;
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
|
||||
int start, int end)
|
||||
{
|
||||
struct file *file;
|
||||
int offset;
|
||||
|
||||
for (offset = start; offset < end; offset++) {
|
||||
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
|
||||
if (tctx->registered_rings[offset])
|
||||
continue;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
return -EBADF;
|
||||
} else if (!io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
tctx->registered_rings[offset] = file;
|
||||
return offset;
|
||||
file = fget(fd);
|
||||
if (!file) {
|
||||
return -EBADF;
|
||||
} else if (!io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return -EBUSY;
|
||||
offset = io_ring_add_registered_file(tctx, file, start, end);
|
||||
if (offset < 0)
|
||||
fput(file);
|
||||
return offset;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -17,6 +17,7 @@ struct io_timeout {
|
|||
struct file *file;
|
||||
u32 off;
|
||||
u32 target_seq;
|
||||
u32 repeats;
|
||||
struct list_head list;
|
||||
/* head of the link, used by linked timeouts only */
|
||||
struct io_kiocb *head;
|
||||
|
@ -37,8 +38,9 @@ struct io_timeout_rem {
|
|||
static inline bool io_is_timeout_noseq(struct io_kiocb *req)
|
||||
{
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_timeout_data *data = req->async_data;
|
||||
|
||||
return !timeout->off;
|
||||
return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
|
||||
}
|
||||
|
||||
static inline void io_put_req(struct io_kiocb *req)
|
||||
|
@ -49,6 +51,44 @@ static inline void io_put_req(struct io_kiocb *req)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool io_timeout_finish(struct io_timeout *timeout,
|
||||
struct io_timeout_data *data)
|
||||
{
|
||||
if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
|
||||
return true;
|
||||
|
||||
if (!timeout->off || (timeout->repeats && --timeout->repeats))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
|
||||
|
||||
static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_timeout_data *data = req->async_data;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (!io_timeout_finish(timeout, data)) {
|
||||
bool filled;
|
||||
filled = io_fill_cqe_req_aux(req, ts->locked, -ETIME,
|
||||
IORING_CQE_F_MORE);
|
||||
if (filled) {
|
||||
/* re-arm timer */
|
||||
spin_lock_irq(&ctx->timeout_lock);
|
||||
list_add(&timeout->list, ctx->timeout_list.prev);
|
||||
data->timer.function = io_timeout_fn;
|
||||
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
|
||||
spin_unlock_irq(&ctx->timeout_lock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
io_req_task_complete(req, ts);
|
||||
}
|
||||
|
||||
static bool io_kill_timeout(struct io_kiocb *req, int status)
|
||||
__must_hold(&req->ctx->timeout_lock)
|
||||
{
|
||||
|
@ -101,9 +141,9 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
|
|||
spin_unlock_irq(&ctx->timeout_lock);
|
||||
}
|
||||
|
||||
static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked)
|
||||
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
|
||||
{
|
||||
io_tw_lock(link->ctx, locked);
|
||||
io_tw_lock(link->ctx, ts);
|
||||
while (link) {
|
||||
struct io_kiocb *nxt = link->link;
|
||||
long res = -ECANCELED;
|
||||
|
@ -112,7 +152,7 @@ static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked)
|
|||
res = link->cqe.res;
|
||||
link->link = NULL;
|
||||
io_req_set_res(link, res, 0);
|
||||
io_req_task_complete(link, locked);
|
||||
io_req_task_complete(link, ts);
|
||||
link = nxt;
|
||||
}
|
||||
}
|
||||
|
@ -212,7 +252,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
|
|||
req_set_fail(req);
|
||||
|
||||
io_req_set_res(req, -ETIME, 0);
|
||||
req->io_task_work.func = io_req_task_complete;
|
||||
req->io_task_work.func = io_timeout_complete;
|
||||
io_req_task_work_add(req);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
@ -228,16 +268,10 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
|
|||
list_for_each_entry(timeout, &ctx->timeout_list, list) {
|
||||
struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
|
||||
|
||||
if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
|
||||
cd->data != tmp->cqe.user_data)
|
||||
continue;
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
|
||||
if (cd->seq == tmp->work.cancel_seq)
|
||||
continue;
|
||||
tmp->work.cancel_seq = cd->seq;
|
||||
if (io_cancel_req_match(tmp, cd)) {
|
||||
req = tmp;
|
||||
break;
|
||||
}
|
||||
req = tmp;
|
||||
break;
|
||||
}
|
||||
if (!req)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
@ -265,9 +299,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
|
||||
static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_kiocb *prev = timeout->prev;
|
||||
int ret = -ENOENT;
|
||||
|
@ -282,11 +316,11 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
|
|||
ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
|
||||
}
|
||||
io_req_set_res(req, ret ?: -ETIME, 0);
|
||||
io_req_task_complete(req, locked);
|
||||
io_req_task_complete(req, ts);
|
||||
io_put_req(prev);
|
||||
} else {
|
||||
io_req_set_res(req, -ETIME, 0);
|
||||
io_req_task_complete(req, locked);
|
||||
io_req_task_complete(req, ts);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -369,7 +403,7 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
|
|||
struct timespec64 *ts, enum hrtimer_mode mode)
|
||||
__must_hold(&ctx->timeout_lock)
|
||||
{
|
||||
struct io_cancel_data cd = { .data = user_data, };
|
||||
struct io_cancel_data cd = { .ctx = ctx, .data = user_data, };
|
||||
struct io_kiocb *req = io_timeout_extract(ctx, &cd);
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_timeout_data *data;
|
||||
|
@ -433,7 +467,7 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
|
|||
int ret;
|
||||
|
||||
if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
|
||||
struct io_cancel_data cd = { .data = tr->addr, };
|
||||
struct io_cancel_data cd = { .ctx = ctx, .data = tr->addr, };
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ret = io_timeout_cancel(ctx, &cd);
|
||||
|
@ -470,16 +504,27 @@ static int __io_timeout_prep(struct io_kiocb *req,
|
|||
return -EINVAL;
|
||||
flags = READ_ONCE(sqe->timeout_flags);
|
||||
if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
|
||||
IORING_TIMEOUT_ETIME_SUCCESS))
|
||||
IORING_TIMEOUT_ETIME_SUCCESS |
|
||||
IORING_TIMEOUT_MULTISHOT))
|
||||
return -EINVAL;
|
||||
/* more than one clock specified is invalid, obviously */
|
||||
if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
|
||||
return -EINVAL;
|
||||
/* multishot requests only make sense with rel values */
|
||||
if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
|
||||
return -EINVAL;
|
||||
|
||||
INIT_LIST_HEAD(&timeout->list);
|
||||
timeout->off = off;
|
||||
if (unlikely(off && !req->ctx->off_timeout_used))
|
||||
req->ctx->off_timeout_used = true;
|
||||
/*
|
||||
* for multishot reqs w/ fixed nr of repeats, repeats tracks the
|
||||
* remaining nr
|
||||
*/
|
||||
timeout->repeats = 0;
|
||||
if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
|
||||
timeout->repeats = off;
|
||||
|
||||
if (WARN_ON_ONCE(req_has_async_data(req)))
|
||||
return -EFAULT;
|
||||
|
@ -543,7 +588,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
|
|||
goto add;
|
||||
}
|
||||
|
||||
tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
|
||||
tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
|
||||
timeout->target_seq = tail + off;
|
||||
|
||||
/* Update the last seq here in case io_flush_timeouts() hasn't.
|
||||
|
|
|
@ -7,36 +7,44 @@
|
|||
#include <linux/nospec.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <uapi/asm-generic/ioctls.h>
|
||||
|
||||
#include "io_uring.h"
|
||||
#include "rsrc.h"
|
||||
#include "uring_cmd.h"
|
||||
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
|
||||
|
||||
ioucmd->task_work_cb(ioucmd, issue_flags);
|
||||
}
|
||||
|
||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags)
|
||||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
|
||||
|
||||
ioucmd->task_work_cb = task_work_cb;
|
||||
req->io_task_work.func = io_uring_cmd_work;
|
||||
io_req_task_work_add(req);
|
||||
__io_req_task_work_add(req, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
|
||||
EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
|
||||
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
|
||||
|
||||
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
|
||||
u64 extra1, u64 extra2)
|
||||
{
|
||||
req->extra1 = extra1;
|
||||
req->extra2 = extra2;
|
||||
req->flags |= REQ_F_CQE32_INIT;
|
||||
req->big_cqe.extra1 = extra1;
|
||||
req->big_cqe.extra2 = extra2;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -54,25 +62,24 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
|
|||
io_req_set_res(req, ret, 0);
|
||||
if (req->ctx->flags & IORING_SETUP_CQE32)
|
||||
io_req_set_cqe32_extra(req, res2, 0);
|
||||
if (req->ctx->flags & IORING_SETUP_IOPOLL)
|
||||
if (req->ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
/* order with io_iopoll_req_issued() checking ->iopoll_complete */
|
||||
smp_store_release(&req->iopoll_completed, 1);
|
||||
else
|
||||
io_req_complete_post(req, issue_flags);
|
||||
} else {
|
||||
struct io_tw_state ts = {
|
||||
.locked = !(issue_flags & IO_URING_F_UNLOCKED),
|
||||
};
|
||||
io_req_task_complete(req, &ts);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
|
||||
|
||||
int io_uring_cmd_prep_async(struct io_kiocb *req)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||
size_t cmd_size;
|
||||
|
||||
BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16);
|
||||
BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80);
|
||||
|
||||
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
|
||||
|
||||
memcpy(req->async_data, ioucmd->cmd, cmd_size);
|
||||
memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx));
|
||||
ioucmd->sqe = req->async_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -98,7 +105,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
req->imu = ctx->user_bufs[index];
|
||||
io_req_set_rsrc_node(req, ctx, 0);
|
||||
}
|
||||
ioucmd->cmd = sqe->cmd;
|
||||
ioucmd->sqe = sqe;
|
||||
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
|
||||
return 0;
|
||||
}
|
||||
|
@ -129,9 +136,6 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
|
|||
WRITE_ONCE(ioucmd->cookie, NULL);
|
||||
}
|
||||
|
||||
if (req_has_async_data(req))
|
||||
ioucmd->cmd = req->async_data;
|
||||
|
||||
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
|
||||
if (ret == -EAGAIN) {
|
||||
if (!req_has_async_data(req)) {
|
||||
|
@ -160,3 +164,30 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
|||
return io_import_fixed(rw, iter, req->imu, ubuf, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
|
||||
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct socket *sock = cmd->file->private_data;
|
||||
struct sock *sk = sock->sk;
|
||||
struct proto *prot = READ_ONCE(sk->sk_prot);
|
||||
int ret, arg = 0;
|
||||
|
||||
if (!prot || !prot->ioctl)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (cmd->sqe->cmd_op) {
|
||||
case SOCKET_URING_OP_SIOCINQ:
|
||||
ret = prot->ioctl(sk, SIOCINQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
case SOCKET_URING_OP_SIOCOUTQ:
|
||||
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
|
||||
|
|
|
@ -3,11 +3,3 @@
|
|||
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_uring_cmd_prep_async(struct io_kiocb *req);
|
||||
|
||||
/*
|
||||
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
|
||||
* the following sqe if SQE128 is used.
|
||||
*/
|
||||
#define uring_cmd_pdu_size(is_sqe128) \
|
||||
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
|
||||
offsetof(struct io_uring_sqe, cmd))
|
||||
|
|
|
@ -75,6 +75,7 @@ static int __io_getxattr_prep(struct io_kiocb *req,
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -109,8 +110,7 @@ int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct io_xattr *ix = io_kiocb_to_cmd(req, struct io_xattr);
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
|
||||
req->file->f_path.dentry,
|
||||
|
@ -127,8 +127,7 @@ int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct path path;
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
retry:
|
||||
ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
|
||||
|
@ -176,6 +175,7 @@ static int __io_setxattr_prep(struct io_kiocb *req,
|
|||
}
|
||||
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -224,8 +224,7 @@ int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
{
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
ret = __io_setxattr(req, issue_flags, &req->file->f_path);
|
||||
io_xattr_finish(req, ret);
|
||||
|
@ -239,8 +238,7 @@ int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
struct path path;
|
||||
int ret;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
|
||||
|
||||
retry:
|
||||
ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
|
||||
|
|
|
@ -126,13 +126,13 @@ __out: \
|
|||
iterate_buf(i, n, base, len, off, \
|
||||
i->ubuf, (I)) \
|
||||
} else if (likely(iter_is_iovec(i))) { \
|
||||
const struct iovec *iov = i->iov; \
|
||||
const struct iovec *iov = iter_iov(i); \
|
||||
void __user *base; \
|
||||
size_t len; \
|
||||
iterate_iovec(i, n, base, len, off, \
|
||||
iov, (I)) \
|
||||
i->nr_segs -= iov - i->iov; \
|
||||
i->iov = iov; \
|
||||
i->nr_segs -= iov - iter_iov(i); \
|
||||
i->__iov = iov; \
|
||||
} else if (iov_iter_is_bvec(i)) { \
|
||||
const struct bio_vec *bvec = i->bvec; \
|
||||
void *base; \
|
||||
|
@ -361,7 +361,7 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
|
|||
size_t skip;
|
||||
|
||||
size -= count;
|
||||
for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
|
||||
for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
|
||||
size_t len = min(count, p->iov_len - skip);
|
||||
size_t ret;
|
||||
|
||||
|
@ -404,7 +404,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
|
|||
size_t skip;
|
||||
|
||||
size -= count;
|
||||
for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
|
||||
for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
|
||||
size_t len = min(count, p->iov_len - skip);
|
||||
size_t ret;
|
||||
|
||||
|
@ -431,7 +431,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
|
|||
.nofault = false,
|
||||
.user_backed = true,
|
||||
.data_source = direction,
|
||||
.iov = iov,
|
||||
.__iov = iov,
|
||||
.nr_segs = nr_segs,
|
||||
.iov_offset = 0,
|
||||
.count = count
|
||||
|
@ -881,14 +881,14 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
|
|||
i->count -= size;
|
||||
|
||||
size += i->iov_offset; // from beginning of current segment
|
||||
for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
|
||||
for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) {
|
||||
if (likely(size < iov->iov_len))
|
||||
break;
|
||||
size -= iov->iov_len;
|
||||
}
|
||||
i->iov_offset = size;
|
||||
i->nr_segs -= iov - i->iov;
|
||||
i->iov = iov;
|
||||
i->nr_segs -= iov - iter_iov(i);
|
||||
i->__iov = iov;
|
||||
}
|
||||
|
||||
void iov_iter_advance(struct iov_iter *i, size_t size)
|
||||
|
@ -963,12 +963,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
|
|||
unroll -= n;
|
||||
}
|
||||
} else { /* same logics for iovec and kvec */
|
||||
const struct iovec *iov = i->iov;
|
||||
const struct iovec *iov = iter_iov(i);
|
||||
while (1) {
|
||||
size_t n = (--iov)->iov_len;
|
||||
i->nr_segs++;
|
||||
if (unroll <= n) {
|
||||
i->iov = iov;
|
||||
i->__iov = iov;
|
||||
i->iov_offset = n - unroll;
|
||||
return;
|
||||
}
|
||||
|
@ -985,7 +985,7 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
|
|||
{
|
||||
if (i->nr_segs > 1) {
|
||||
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
|
||||
return min(i->count, i->iov->iov_len - i->iov_offset);
|
||||
return min(i->count, iter_iov(i)->iov_len - i->iov_offset);
|
||||
if (iov_iter_is_bvec(i))
|
||||
return min(i->count, i->bvec->bv_len - i->iov_offset);
|
||||
}
|
||||
|
@ -1100,13 +1100,14 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
|
|||
unsigned k;
|
||||
|
||||
for (k = 0; k < i->nr_segs; k++, skip = 0) {
|
||||
size_t len = i->iov[k].iov_len - skip;
|
||||
const struct iovec *iov = iter_iov(i) + k;
|
||||
size_t len = iov->iov_len - skip;
|
||||
|
||||
if (len > size)
|
||||
len = size;
|
||||
if (len & len_mask)
|
||||
return false;
|
||||
if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask)
|
||||
if ((unsigned long)(iov->iov_base + skip) & addr_mask)
|
||||
return false;
|
||||
|
||||
size -= len;
|
||||
|
@ -1199,9 +1200,10 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
|
|||
unsigned k;
|
||||
|
||||
for (k = 0; k < i->nr_segs; k++, skip = 0) {
|
||||
size_t len = i->iov[k].iov_len - skip;
|
||||
const struct iovec *iov = iter_iov(i) + k;
|
||||
size_t len = iov->iov_len - skip;
|
||||
if (len) {
|
||||
res |= (unsigned long)i->iov[k].iov_base + skip;
|
||||
res |= (unsigned long)iov->iov_base + skip;
|
||||
if (len > size)
|
||||
len = size;
|
||||
res |= len;
|
||||
|
@ -1278,14 +1280,15 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
|
|||
return ~0U;
|
||||
|
||||
for (k = 0; k < i->nr_segs; k++) {
|
||||
if (i->iov[k].iov_len) {
|
||||
unsigned long base = (unsigned long)i->iov[k].iov_base;
|
||||
const struct iovec *iov = iter_iov(i) + k;
|
||||
if (iov->iov_len) {
|
||||
unsigned long base = (unsigned long)iov->iov_base;
|
||||
if (v) // if not the first one
|
||||
res |= base | v; // this start | previous end
|
||||
v = base + i->iov[k].iov_len;
|
||||
if (size <= i->iov[k].iov_len)
|
||||
v = base + iov->iov_len;
|
||||
if (size <= iov->iov_len)
|
||||
break;
|
||||
size -= i->iov[k].iov_len;
|
||||
size -= iov->iov_len;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
@ -1401,13 +1404,14 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
|
|||
return (unsigned long)i->ubuf + i->iov_offset;
|
||||
|
||||
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
|
||||
size_t len = i->iov[k].iov_len - skip;
|
||||
const struct iovec *iov = iter_iov(i) + k;
|
||||
size_t len = iov->iov_len - skip;
|
||||
|
||||
if (unlikely(!len))
|
||||
continue;
|
||||
if (*size > len)
|
||||
*size = len;
|
||||
return (unsigned long)i->iov[k].iov_base + skip;
|
||||
return (unsigned long)iov->iov_base + skip;
|
||||
}
|
||||
BUG(); // if it had been empty, we wouldn't get called
|
||||
}
|
||||
|
@ -1596,7 +1600,7 @@ static int iov_npages(const struct iov_iter *i, int maxpages)
|
|||
const struct iovec *p;
|
||||
int npages = 0;
|
||||
|
||||
for (p = i->iov; size; skip = 0, p++) {
|
||||
for (p = iter_iov(i); size; skip = 0, p++) {
|
||||
unsigned offs = offset_in_page(p->iov_base + skip);
|
||||
size_t len = min(p->iov_len - skip, size);
|
||||
|
||||
|
@ -1673,7 +1677,7 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
|
|||
flags);
|
||||
else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
|
||||
/* iovec and kvec have identical layout */
|
||||
return new->iov = kmemdup(new->iov,
|
||||
return new->__iov = kmemdup(new->__iov,
|
||||
new->nr_segs * sizeof(struct iovec),
|
||||
flags);
|
||||
return NULL;
|
||||
|
@ -1855,6 +1859,17 @@ int import_single_range(int rw, void __user *buf, size_t len,
|
|||
}
|
||||
EXPORT_SYMBOL(import_single_range);
|
||||
|
||||
int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
|
||||
{
|
||||
if (len > MAX_RW_COUNT)
|
||||
len = MAX_RW_COUNT;
|
||||
if (unlikely(!access_ok(buf, len)))
|
||||
return -EFAULT;
|
||||
|
||||
iov_iter_ubuf(i, rw, buf, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* iov_iter_restore() - Restore a &struct iov_iter to the same state as when
|
||||
* iov_iter_save_state() was called.
|
||||
|
@ -1869,8 +1884,8 @@ EXPORT_SYMBOL(import_single_range);
|
|||
*/
|
||||
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
|
||||
{
|
||||
if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
|
||||
!iov_iter_is_kvec(i) && !iter_is_ubuf(i))
|
||||
if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) &&
|
||||
!iter_is_ubuf(i)) && !iov_iter_is_kvec(i))
|
||||
return;
|
||||
i->iov_offset = state->iov_offset;
|
||||
i->count = state->count;
|
||||
|
@ -1889,6 +1904,6 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
|
|||
if (iov_iter_is_bvec(i))
|
||||
i->bvec -= state->nr_segs - i->nr_segs;
|
||||
else
|
||||
i->iov -= state->nr_segs - i->nr_segs;
|
||||
i->__iov -= state->nr_segs - i->nr_segs;
|
||||
i->nr_segs = state->nr_segs;
|
||||
}
|
||||
|
|
|
@ -1478,7 +1478,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||
size_t, vlen, int, behavior, unsigned int, flags)
|
||||
{
|
||||
ssize_t ret;
|
||||
struct iovec iovstack[UIO_FASTIOV], iovec;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
struct task_struct *task;
|
||||
|
@ -1525,12 +1525,11 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||
total_len = iov_iter_count(&iter);
|
||||
|
||||
while (iov_iter_count(&iter)) {
|
||||
iovec = iov_iter_iovec(&iter);
|
||||
ret = do_madvise(mm, (unsigned long)iovec.iov_base,
|
||||
iovec.iov_len, behavior);
|
||||
ret = do_madvise(mm, (unsigned long)iter_iov_addr(&iter),
|
||||
iter_iov_len(&iter), behavior);
|
||||
if (ret < 0)
|
||||
break;
|
||||
iov_iter_advance(&iter, iovec.iov_len);
|
||||
iov_iter_advance(&iter, iter_iov_len(&iter));
|
||||
}
|
||||
|
||||
ret = (total_len - iov_iter_count(&iter)) ? : ret;
|
||||
|
|
11
mm/nommu.c
11
mm/nommu.c
|
@ -975,9 +975,10 @@ static int do_mmap_private(struct vm_area_struct *vma,
|
|||
*/
|
||||
if (capabilities & NOMMU_MAP_DIRECT) {
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
/* shouldn't return success if we're not sharing */
|
||||
if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags)))
|
||||
ret = -ENOSYS;
|
||||
if (ret == 0) {
|
||||
/* shouldn't return success if we're not sharing */
|
||||
BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
|
||||
vma->vm_region->vm_top = vma->vm_region->vm_end;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1118,7 +1119,7 @@ unsigned long do_mmap(struct file *file,
|
|||
* these cases, sharing is handled in the driver or filesystem rather
|
||||
* than here
|
||||
*/
|
||||
if (vm_flags & VM_MAYSHARE) {
|
||||
if (is_nommu_shared_mapping(vm_flags)) {
|
||||
struct vm_region *pregion;
|
||||
unsigned long pglen, rpglen, pgend, rpgend, start;
|
||||
|
||||
|
@ -1128,7 +1129,7 @@ unsigned long do_mmap(struct file *file,
|
|||
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
|
||||
pregion = rb_entry(rb, struct vm_region, vm_rb);
|
||||
|
||||
if (!(pregion->vm_flags & VM_MAYSHARE))
|
||||
if (!is_nommu_shared_mapping(pregion->vm_flags))
|
||||
continue;
|
||||
|
||||
/* search for overlapping mappings on the same file */
|
||||
|
@ -1575,7 +1576,7 @@ static unsigned long do_mremap(unsigned long addr,
|
|||
if (vma->vm_end != vma->vm_start + old_len)
|
||||
return (unsigned long) -EFAULT;
|
||||
|
||||
if (vma->vm_flags & VM_MAYSHARE)
|
||||
if (is_nommu_shared_mapping(vma->vm_flags))
|
||||
return (unsigned long) -EPERM;
|
||||
|
||||
if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
|
||||
|
|
|
@ -114,6 +114,9 @@
|
|||
#include <linux/memcontrol.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/mroute.h>
|
||||
#include <linux/mroute6.h>
|
||||
#include <linux/icmpv6.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
@ -138,6 +141,7 @@
|
|||
|
||||
#include <net/tcp.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include <net/phonet/phonet.h>
|
||||
|
||||
#include <linux/ethtool.h>
|
||||
|
||||
|
@ -4028,3 +4032,63 @@ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
|
|||
return sk->sk_prot->bind_add(sk, addr, addr_len);
|
||||
}
|
||||
EXPORT_SYMBOL(sock_bind_add);
|
||||
|
||||
/* Copy 'size' bytes from userspace and return `size` back to userspace */
|
||||
int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
|
||||
void __user *arg, void *karg, size_t size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(karg, arg, size))
|
||||
return -EFAULT;
|
||||
|
||||
ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (copy_to_user(arg, karg, size))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(sock_ioctl_inout);
|
||||
|
||||
/* This is the most common ioctl prep function, where the result (4 bytes) is
|
||||
* copied back to userspace if the ioctl() returns successfully. No input is
|
||||
* copied from userspace as input argument.
|
||||
*/
|
||||
static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
int ret, karg = 0;
|
||||
|
||||
ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return put_user(karg, (int __user *)arg);
|
||||
}
|
||||
|
||||
/* A wrapper around sock ioctls, which copies the data from userspace
|
||||
* (depending on the protocol/ioctl), and copies back the result to userspace.
|
||||
* The main motivation for this function is to pass kernel memory to the
|
||||
* protocol ioctl callbacks, instead of userspace memory.
|
||||
*/
|
||||
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
int rc = 1;
|
||||
|
||||
if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
|
||||
rc = ipmr_sk_ioctl(sk, cmd, arg);
|
||||
else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
|
||||
rc = ip6mr_sk_ioctl(sk, cmd, arg);
|
||||
else if (sk_is_phonet(sk))
|
||||
rc = phonet_sk_ioctl(sk, cmd, arg);
|
||||
|
||||
/* If ioctl was processed, returns its value */
|
||||
if (rc <= 0)
|
||||
return rc;
|
||||
|
||||
/* Otherwise call the default handler */
|
||||
return sock_ioctl_out(sk, cmd, arg);
|
||||
}
|
||||
EXPORT_SYMBOL(sk_ioctl);
|
||||
|
|
|
@ -297,7 +297,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
|
|||
char __user *optval, int __user *optlen);
|
||||
int dccp_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen);
|
||||
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
|
||||
int dccp_ioctl(struct sock *sk, int cmd, int *karg);
|
||||
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
||||
int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
|
||||
int flags, int *addr_len);
|
||||
|
|
|
@ -371,7 +371,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
|
|||
|
||||
EXPORT_SYMBOL_GPL(dccp_poll);
|
||||
|
||||
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
int dccp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
int rc = -ENOTCONN;
|
||||
|
||||
|
@ -382,17 +382,17 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ: {
|
||||
int amount = sk_wmem_alloc_get(sk);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
/* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
|
||||
* always 0, comparably to UDP.
|
||||
*/
|
||||
|
||||
rc = put_user(amount, (int __user *)arg);
|
||||
rc = 0;
|
||||
}
|
||||
break;
|
||||
case SIOCINQ: {
|
||||
struct sk_buff *skb;
|
||||
unsigned long amount = 0;
|
||||
*karg = 0;
|
||||
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
if (skb != NULL) {
|
||||
|
@ -400,9 +400,9 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
* We will only return the amount of this packet since
|
||||
* that is all that will be read.
|
||||
*/
|
||||
amount = skb->len;
|
||||
*karg = skb->len;
|
||||
}
|
||||
rc = put_user(amount, (int __user *)arg);
|
||||
rc = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -162,7 +162,7 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
|
|||
default:
|
||||
if (!sk->sk_prot->ioctl)
|
||||
return -ENOIOCTLCMD;
|
||||
return sk->sk_prot->ioctl(sk, cmd, arg);
|
||||
return sk_ioctl(sk, cmd, (void __user *)arg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -524,22 +524,21 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int dgram_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ:
|
||||
{
|
||||
int amount = sk_wmem_alloc_get(sk);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case SIOCINQ:
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
unsigned long amount;
|
||||
|
||||
amount = 0;
|
||||
*karg = 0;
|
||||
spin_lock_bh(&sk->sk_receive_queue.lock);
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
if (skb) {
|
||||
|
@ -547,10 +546,10 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
* of this packet since that is all
|
||||
* that will be read.
|
||||
*/
|
||||
amount = skb->len - ieee802154_hdr_length(skb);
|
||||
*karg = skb->len - ieee802154_hdr_length(skb);
|
||||
}
|
||||
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
||||
return put_user(amount, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1004,7 +1004,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
|||
break;
|
||||
default:
|
||||
if (sk->sk_prot->ioctl)
|
||||
err = sk->sk_prot->ioctl(sk, cmd, arg);
|
||||
err = sk_ioctl(sk, cmd, (void __user *)arg);
|
||||
else
|
||||
err = -ENOIOCTLCMD;
|
||||
break;
|
||||
|
|
|
@ -1540,6 +1540,28 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Execute if this ioctl is a special mroute ioctl */
|
||||
int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
/* These userspace buffers will be consumed by ipmr_ioctl() */
|
||||
case SIOCGETVIFCNT: {
|
||||
struct sioc_vif_req buffer;
|
||||
|
||||
return sock_ioctl_inout(sk, cmd, arg, &buffer,
|
||||
sizeof(buffer));
|
||||
}
|
||||
case SIOCGETSGCNT: {
|
||||
struct sioc_sg_req buffer;
|
||||
|
||||
return sock_ioctl_inout(sk, cmd, arg, &buffer,
|
||||
sizeof(buffer));
|
||||
}
|
||||
}
|
||||
/* return code > 0 means that the ioctl was not executed */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Getsock opt support for the multicast routing system. */
|
||||
int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
|
||||
sockptr_t optlen)
|
||||
|
@ -1586,13 +1608,13 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
|
|||
}
|
||||
|
||||
/* The IP multicast ioctl support routines. */
|
||||
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
|
||||
{
|
||||
struct sioc_sg_req sr;
|
||||
struct sioc_vif_req vr;
|
||||
struct vif_device *vif;
|
||||
struct mfc_cache *c;
|
||||
struct net *net = sock_net(sk);
|
||||
struct sioc_vif_req *vr;
|
||||
struct sioc_sg_req *sr;
|
||||
struct mr_table *mrt;
|
||||
|
||||
mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
|
||||
|
@ -1601,40 +1623,33 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
|||
|
||||
switch (cmd) {
|
||||
case SIOCGETVIFCNT:
|
||||
if (copy_from_user(&vr, arg, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
if (vr.vifi >= mrt->maxvif)
|
||||
vr = (struct sioc_vif_req *)arg;
|
||||
if (vr->vifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
|
||||
vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
vif = &mrt->vif_table[vr.vifi];
|
||||
if (VIF_EXISTS(mrt, vr.vifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
vif = &mrt->vif_table[vr->vifi];
|
||||
if (VIF_EXISTS(mrt, vr->vifi)) {
|
||||
vr->icount = vif->pkt_in;
|
||||
vr->ocount = vif->pkt_out;
|
||||
vr->ibytes = vif->bytes_in;
|
||||
vr->obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
return -EFAULT;
|
||||
sr = (struct sioc_sg_req *)arg;
|
||||
|
||||
rcu_read_lock();
|
||||
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
|
||||
c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
|
||||
if (c) {
|
||||
sr.pktcnt = c->_c.mfc_un.res.pkt;
|
||||
sr.bytecnt = c->_c.mfc_un.res.bytes;
|
||||
sr.wrong_if = c->_c.mfc_un.res.wrong_if;
|
||||
sr->pktcnt = c->_c.mfc_un.res.pkt;
|
||||
sr->bytecnt = c->_c.mfc_un.res.bytes;
|
||||
sr->wrong_if = c->_c.mfc_un.res.wrong_if;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &sr, sizeof(sr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
|
|
@ -855,29 +855,29 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
|
|||
return do_raw_getsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int raw_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ: {
|
||||
int amount = sk_wmem_alloc_get(sk);
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
return 0;
|
||||
}
|
||||
case SIOCINQ: {
|
||||
struct sk_buff *skb;
|
||||
int amount = 0;
|
||||
|
||||
spin_lock_bh(&sk->sk_receive_queue.lock);
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
if (skb)
|
||||
amount = skb->len;
|
||||
*karg = skb->len;
|
||||
else
|
||||
*karg = 0;
|
||||
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
||||
return put_user(amount, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
default:
|
||||
#ifdef CONFIG_IP_MROUTE
|
||||
return ipmr_ioctl(sk, cmd, (void __user *)arg);
|
||||
return ipmr_ioctl(sk, cmd, karg);
|
||||
#else
|
||||
return -ENOIOCTLCMD;
|
||||
#endif
|
||||
|
|
|
@ -596,7 +596,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
|
|||
}
|
||||
EXPORT_SYMBOL(tcp_poll);
|
||||
|
||||
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
int tcp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int answ;
|
||||
|
@ -638,7 +638,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return put_user(answ, (int __user *)arg);
|
||||
*karg = answ;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_ioctl);
|
||||
|
||||
|
|
|
@ -1717,21 +1717,19 @@ static int first_packet_length(struct sock *sk)
|
|||
* IOCTL requests applicable to the UDP protocol
|
||||
*/
|
||||
|
||||
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
int udp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ:
|
||||
{
|
||||
int amount = sk_wmem_alloc_get(sk);
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case SIOCINQ:
|
||||
{
|
||||
int amount = max_t(int, 0, first_packet_length(sk));
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
*karg = max_t(int, 0, first_packet_length(sk));
|
||||
return 0;
|
||||
}
|
||||
|
||||
default:
|
||||
|
|
|
@ -594,7 +594,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
|||
prot = READ_ONCE(sk->sk_prot);
|
||||
if (!prot->ioctl)
|
||||
return -ENOIOCTLCMD;
|
||||
return prot->ioctl(sk, cmd, arg);
|
||||
return sk_ioctl(sk, cmd, (void __user *)arg);
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
return 0;
|
||||
|
|
|
@ -1853,11 +1853,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
|
|||
/*
|
||||
* The IP multicast ioctl support routines.
|
||||
*/
|
||||
|
||||
int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
|
||||
{
|
||||
struct sioc_sg_req6 sr;
|
||||
struct sioc_mif_req6 vr;
|
||||
struct sioc_sg_req6 *sr;
|
||||
struct sioc_mif_req6 *vr;
|
||||
struct vif_device *vif;
|
||||
struct mfc6_cache *c;
|
||||
struct net *net = sock_net(sk);
|
||||
|
@ -1869,40 +1868,33 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
|||
|
||||
switch (cmd) {
|
||||
case SIOCGETMIFCNT_IN6:
|
||||
if (copy_from_user(&vr, arg, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
if (vr.mifi >= mrt->maxvif)
|
||||
vr = (struct sioc_mif_req6 *)arg;
|
||||
if (vr->mifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
|
||||
vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
vif = &mrt->vif_table[vr.mifi];
|
||||
if (VIF_EXISTS(mrt, vr.mifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
vif = &mrt->vif_table[vr->mifi];
|
||||
if (VIF_EXISTS(mrt, vr->mifi)) {
|
||||
vr->icount = vif->pkt_in;
|
||||
vr->ocount = vif->pkt_out;
|
||||
vr->ibytes = vif->bytes_in;
|
||||
vr->obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT_IN6:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
return -EFAULT;
|
||||
sr = (struct sioc_sg_req6 *)arg;
|
||||
|
||||
rcu_read_lock();
|
||||
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
|
||||
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
|
||||
&sr->grp.sin6_addr);
|
||||
if (c) {
|
||||
sr.pktcnt = c->_c.mfc_un.res.pkt;
|
||||
sr.bytecnt = c->_c.mfc_un.res.bytes;
|
||||
sr.wrong_if = c->_c.mfc_un.res.wrong_if;
|
||||
sr->pktcnt = c->_c.mfc_un.res.pkt;
|
||||
sr->bytecnt = c->_c.mfc_un.res.bytes;
|
||||
sr->wrong_if = c->_c.mfc_un.res.wrong_if;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &sr, sizeof(sr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
|
|
@ -1116,29 +1116,29 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
|
|||
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int rawv6_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ: {
|
||||
int amount = sk_wmem_alloc_get(sk);
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
return 0;
|
||||
}
|
||||
case SIOCINQ: {
|
||||
struct sk_buff *skb;
|
||||
int amount = 0;
|
||||
|
||||
spin_lock_bh(&sk->sk_receive_queue.lock);
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
if (skb)
|
||||
amount = skb->len;
|
||||
*karg = skb->len;
|
||||
else
|
||||
*karg = 0;
|
||||
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
||||
return put_user(amount, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
default:
|
||||
#ifdef CONFIG_IPV6_MROUTE
|
||||
return ip6mr_ioctl(sk, cmd, (void __user *)arg);
|
||||
return ip6mr_ioctl(sk, cmd, karg);
|
||||
#else
|
||||
return -ENOIOCTLCMD;
|
||||
#endif
|
||||
|
|
|
@ -272,7 +272,7 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops
|
|||
void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
|
||||
|
||||
/* IOCTL helper for IP encap modules. */
|
||||
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
|
||||
int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
|
||||
|
||||
/* Extract the tunnel structure from a socket's sk_user_data pointer,
|
||||
* validating the tunnel magic feather.
|
||||
|
|
|
@ -563,19 +563,18 @@ out:
|
|||
return err ? err : copied;
|
||||
}
|
||||
|
||||
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
int l2tp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
int amount;
|
||||
|
||||
switch (cmd) {
|
||||
case SIOCOUTQ:
|
||||
amount = sk_wmem_alloc_get(sk);
|
||||
*karg = sk_wmem_alloc_get(sk);
|
||||
break;
|
||||
case SIOCINQ:
|
||||
spin_lock_bh(&sk->sk_receive_queue.lock);
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
amount = skb ? skb->len : 0;
|
||||
*karg = skb ? skb->len : 0;
|
||||
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
||||
break;
|
||||
|
||||
|
@ -583,7 +582,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return put_user(amount, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(l2tp_ioctl);
|
||||
|
||||
|
|
|
@ -3620,11 +3620,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
|
|||
return (int)delta;
|
||||
}
|
||||
|
||||
static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
bool slow;
|
||||
int answ;
|
||||
|
||||
switch (cmd) {
|
||||
case SIOCINQ:
|
||||
|
@ -3633,24 +3632,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
|
||||
lock_sock(sk);
|
||||
__mptcp_move_skbs(msk);
|
||||
answ = mptcp_inq_hint(sk);
|
||||
*karg = mptcp_inq_hint(sk);
|
||||
release_sock(sk);
|
||||
break;
|
||||
case SIOCOUTQ:
|
||||
slow = lock_sock_fast(sk);
|
||||
answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
|
||||
*karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
|
||||
unlock_sock_fast(sk, slow);
|
||||
break;
|
||||
case SIOCOUTQNSD:
|
||||
slow = lock_sock_fast(sk);
|
||||
answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
|
||||
*karg = mptcp_ioctl_outq(msk, msk->snd_nxt);
|
||||
unlock_sock_fast(sk, slow);
|
||||
break;
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return put_user(answ, (int __user *)arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
|
||||
|
|
|
@ -28,24 +28,21 @@ static void pn_sock_close(struct sock *sk, long timeout)
|
|||
sk_common_release(sk);
|
||||
}
|
||||
|
||||
static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int pn_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
int answ;
|
||||
|
||||
switch (cmd) {
|
||||
case SIOCINQ:
|
||||
lock_sock(sk);
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
answ = skb ? skb->len : 0;
|
||||
*karg = skb ? skb->len : 0;
|
||||
release_sock(sk);
|
||||
return put_user(answ, (int __user *)arg);
|
||||
return 0;
|
||||
|
||||
case SIOCPNADDRESOURCE:
|
||||
case SIOCPNDELRESOURCE: {
|
||||
u32 res;
|
||||
if (get_user(res, (u32 __user *)arg))
|
||||
return -EFAULT;
|
||||
u32 res = *karg;
|
||||
if (res >= 256)
|
||||
return -EINVAL;
|
||||
if (cmd == SIOCPNADDRESOURCE)
|
||||
|
|
|
@ -916,10 +916,9 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
||||
static int pep_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
{
|
||||
struct pep_sock *pn = pep_sk(sk);
|
||||
int answ;
|
||||
int ret = -ENOIOCTLCMD;
|
||||
|
||||
switch (cmd) {
|
||||
|
@ -932,13 +931,13 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
|
|||
lock_sock(sk);
|
||||
if (sock_flag(sk, SOCK_URGINLINE) &&
|
||||
!skb_queue_empty(&pn->ctrlreq_queue))
|
||||
answ = skb_peek(&pn->ctrlreq_queue)->len;
|
||||
*karg = skb_peek(&pn->ctrlreq_queue)->len;
|
||||
else if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
answ = skb_peek(&sk->sk_receive_queue)->len;
|
||||
*karg = skb_peek(&sk->sk_receive_queue)->len;
|
||||
else
|
||||
answ = 0;
|
||||
*karg = 0;
|
||||
release_sock(sk);
|
||||
ret = put_user(answ, (int __user *)arg);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SIOCPNENABLEPIPE:
|
||||
|
|
|
@ -387,7 +387,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
|
|||
return put_user(handle, (__u16 __user *)arg);
|
||||
}
|
||||
|
||||
return sk->sk_prot->ioctl(sk, cmd, arg);
|
||||
return sk_ioctl(sk, cmd, (void __user *)arg);
|
||||
}
|
||||
|
||||
static int pn_socket_listen(struct socket *sock, int backlog)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue