|
@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
|
|
|
req->ki_retry = NULL;
|
|
|
req->ki_dtor = NULL;
|
|
|
req->private = NULL;
|
|
|
+ req->ki_iovec = NULL;
|
|
|
INIT_LIST_HEAD(&req->ki_run_list);
|
|
|
|
|
|
/* Check if the completion queue has enough free space to
|
|
@@ -460,6 +461,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
|
|
|
|
|
|
if (req->ki_dtor)
|
|
|
req->ki_dtor(req);
|
|
|
+ if (req->ki_iovec != &req->ki_inline_vec)
|
|
|
+ kfree(req->ki_iovec);
|
|
|
kmem_cache_free(kiocb_cachep, req);
|
|
|
ctx->reqs_active--;
|
|
|
|
|
@@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * aio_p{read,write} are the default ki_retry methods for
|
|
|
- * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
|
|
|
- * multiple calls to f_op->aio_read(). They loop around partial progress
|
|
|
- * instead of returning -EIOCBRETRY because they don't have the means to call
|
|
|
- * kick_iocb().
|
|
|
- */
|
|
|
-static ssize_t aio_pread(struct kiocb *iocb)
|
|
|
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
|
|
|
{
|
|
|
- struct file *file = iocb->ki_filp;
|
|
|
- struct address_space *mapping = file->f_mapping;
|
|
|
- struct inode *inode = mapping->host;
|
|
|
- ssize_t ret = 0;
|
|
|
-
|
|
|
- do {
|
|
|
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
|
|
|
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
|
|
|
-
|
|
|
- ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
|
|
|
- 1, iocb->ki_pos);
|
|
|
- /*
|
|
|
- * Can't just depend on iocb->ki_left to determine
|
|
|
- * whether we are done. This may have been a short read.
|
|
|
- */
|
|
|
- if (ret > 0) {
|
|
|
- iocb->ki_buf += ret;
|
|
|
- iocb->ki_left -= ret;
|
|
|
+ struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];
|
|
|
+
|
|
|
+ BUG_ON(ret <= 0);
|
|
|
+
|
|
|
+ while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
|
|
|
+ ssize_t this = min((ssize_t)iov->iov_len, ret);
|
|
|
+ iov->iov_base += this;
|
|
|
+ iov->iov_len -= this;
|
|
|
+ iocb->ki_left -= this;
|
|
|
+ ret -= this;
|
|
|
+ if (iov->iov_len == 0) {
|
|
|
+ iocb->ki_cur_seg++;
|
|
|
+ iov++;
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- /*
|
|
|
- * For pipes and sockets we return once we have some data; for
|
|
|
- * regular files we retry till we complete the entire read or
|
|
|
- * find that we can't read any more data (e.g short reads).
|
|
|
- */
|
|
|
- } while (ret > 0 && iocb->ki_left > 0 &&
|
|
|
- !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
|
|
|
-
|
|
|
- /* This means we must have transferred all that we could */
|
|
|
- /* No need to retry anymore */
|
|
|
- if ((ret == 0) || (iocb->ki_left == 0))
|
|
|
- ret = iocb->ki_nbytes - iocb->ki_left;
|
|
|
-
|
|
|
- return ret;
|
|
|
+ /* the caller should not have done more io than what fit in
|
|
|
+ * the remaining iovecs */
|
|
|
+ BUG_ON(ret > 0 && iocb->ki_left == 0);
|
|
|
}
|
|
|
|
|
|
-/* see aio_pread() */
|
|
|
-static ssize_t aio_pwrite(struct kiocb *iocb)
|
|
|
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
|
|
|
{
|
|
|
struct file *file = iocb->ki_filp;
|
|
|
+ struct address_space *mapping = file->f_mapping;
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
+ ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
|
|
|
+ unsigned long, loff_t);
|
|
|
ssize_t ret = 0;
|
|
|
+ unsigned short opcode;
|
|
|
+
|
|
|
+ if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
|
|
|
+ (iocb->ki_opcode == IOCB_CMD_PREAD)) {
|
|
|
+ rw_op = file->f_op->aio_read;
|
|
|
+ opcode = IOCB_CMD_PREADV;
|
|
|
+ } else {
|
|
|
+ rw_op = file->f_op->aio_write;
|
|
|
+ opcode = IOCB_CMD_PWRITEV;
|
|
|
+ }
|
|
|
|
|
|
do {
|
|
|
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
|
|
|
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
|
|
|
-
|
|
|
- ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
|
|
|
- 1, iocb->ki_pos);
|
|
|
- if (ret > 0) {
|
|
|
- iocb->ki_buf += ret;
|
|
|
- iocb->ki_left -= ret;
|
|
|
- }
|
|
|
- } while (ret > 0 && iocb->ki_left > 0);
|
|
|
+ ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
|
|
|
+ iocb->ki_nr_segs - iocb->ki_cur_seg,
|
|
|
+ iocb->ki_pos);
|
|
|
+ if (ret > 0)
|
|
|
+ aio_advance_iovec(iocb, ret);
|
|
|
+
|
|
|
+ /* retry all partial writes. retry partial reads as long as its a
|
|
|
+ * regular file. */
|
|
|
+ } while (ret > 0 && iocb->ki_left > 0 &&
|
|
|
+ (opcode == IOCB_CMD_PWRITEV ||
|
|
|
+ (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
|
|
|
|
|
|
+ /* This means we must have transferred all that we could */
|
|
|
+ /* No need to retry anymore */
|
|
|
if ((ret == 0) || (iocb->ki_left == 0))
|
|
|
ret = iocb->ki_nbytes - iocb->ki_left;
|
|
|
|
|
@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *iocb)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb)
|
|
|
+{
|
|
|
+ ssize_t ret;
|
|
|
+
|
|
|
+ ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf,
|
|
|
+ kiocb->ki_nbytes, 1,
|
|
|
+ &kiocb->ki_inline_vec, &kiocb->ki_iovec);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
|
|
|
+ kiocb->ki_cur_seg = 0;
|
|
|
+ /* ki_nbytes/left now reflect bytes instead of segs */
|
|
|
+ kiocb->ki_nbytes = ret;
|
|
|
+ kiocb->ki_left = ret;
|
|
|
+
|
|
|
+ ret = 0;
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
|
|
|
+{
|
|
|
+ kiocb->ki_iovec = &kiocb->ki_inline_vec;
|
|
|
+ kiocb->ki_iovec->iov_base = kiocb->ki_buf;
|
|
|
+ kiocb->ki_iovec->iov_len = kiocb->ki_left;
|
|
|
+ kiocb->ki_nr_segs = 1;
|
|
|
+ kiocb->ki_cur_seg = 0;
|
|
|
+ kiocb->ki_nbytes = kiocb->ki_left;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* aio_setup_iocb:
|
|
|
* Performs the initial checks and aio retry method
|
|
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
|
|
|
ret = security_file_permission(file, MAY_READ);
|
|
|
if (unlikely(ret))
|
|
|
break;
|
|
|
+ ret = aio_setup_single_vector(kiocb);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
ret = -EINVAL;
|
|
|
if (file->f_op->aio_read)
|
|
|
- kiocb->ki_retry = aio_pread;
|
|
|
+ kiocb->ki_retry = aio_rw_vect_retry;
|
|
|
break;
|
|
|
case IOCB_CMD_PWRITE:
|
|
|
ret = -EBADF;
|
|
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
|
|
|
ret = security_file_permission(file, MAY_WRITE);
|
|
|
if (unlikely(ret))
|
|
|
break;
|
|
|
+ ret = aio_setup_single_vector(kiocb);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (file->f_op->aio_write)
|
|
|
+ kiocb->ki_retry = aio_rw_vect_retry;
|
|
|
+ break;
|
|
|
+ case IOCB_CMD_PREADV:
|
|
|
+ ret = -EBADF;
|
|
|
+ if (unlikely(!(file->f_mode & FMODE_READ)))
|
|
|
+ break;
|
|
|
+ ret = security_file_permission(file, MAY_READ);
|
|
|
+ if (unlikely(ret))
|
|
|
+ break;
|
|
|
+ ret = aio_setup_vectored_rw(READ, kiocb);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (file->f_op->aio_read)
|
|
|
+ kiocb->ki_retry = aio_rw_vect_retry;
|
|
|
+ break;
|
|
|
+ case IOCB_CMD_PWRITEV:
|
|
|
+ ret = -EBADF;
|
|
|
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
|
|
|
+ break;
|
|
|
+ ret = security_file_permission(file, MAY_WRITE);
|
|
|
+ if (unlikely(ret))
|
|
|
+ break;
|
|
|
+ ret = aio_setup_vectored_rw(WRITE, kiocb);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
ret = -EINVAL;
|
|
|
if (file->f_op->aio_write)
|
|
|
- kiocb->ki_retry = aio_pwrite;
|
|
|
+ kiocb->ki_retry = aio_rw_vect_retry;
|
|
|
break;
|
|
|
case IOCB_CMD_FDSYNC:
|
|
|
ret = -EINVAL;
|