|
@@ -628,6 +628,116 @@ out_lock:
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * xfs_file_dio_aio_write - handle direct IO writes
|
|
|
|
+ *
|
|
|
|
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
|
|
|
|
+ * By spearating it from the buffered write path we remove all the tricky to
|
|
|
|
+ * follow locking changes and looping.
|
|
|
|
+ *
|
|
|
|
+ * Returns with locks held indicated by @iolock and errors indicated by
|
|
|
|
+ * negative return values.
|
|
|
|
+ */
|
|
|
|
+STATIC ssize_t
|
|
|
|
+xfs_file_dio_aio_write(
|
|
|
|
+ struct kiocb *iocb,
|
|
|
|
+ const struct iovec *iovp,
|
|
|
|
+ unsigned long nr_segs,
|
|
|
|
+ loff_t pos,
|
|
|
|
+ size_t ocount,
|
|
|
|
+ int *iolock)
|
|
|
|
+{
|
|
|
|
+ struct file *file = iocb->ki_filp;
|
|
|
|
+ struct address_space *mapping = file->f_mapping;
|
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
|
+ struct xfs_inode *ip = XFS_I(inode);
|
|
|
|
+ struct xfs_mount *mp = ip->i_mount;
|
|
|
|
+ ssize_t ret = 0;
|
|
|
|
+ xfs_fsize_t new_size;
|
|
|
|
+ size_t count = ocount;
|
|
|
|
+ struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
|
|
|
|
+ mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
+
|
|
|
|
+ *iolock = 0;
|
|
|
|
+ if ((pos & target->bt_smask) || (count & target->bt_smask))
|
|
|
|
+ return -XFS_ERROR(EINVAL);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * For direct I/O, if there are cached pages or we're extending
|
|
|
|
+ * the file, we need IOLOCK_EXCL until we're sure the bytes at
|
|
|
|
+ * the new EOF have been zeroed and/or the cached pages are
|
|
|
|
+ * flushed out.
|
|
|
|
+ */
|
|
|
|
+ if (mapping->nrpages || pos > ip->i_size)
|
|
|
|
+ *iolock = XFS_IOLOCK_EXCL;
|
|
|
|
+ else
|
|
|
|
+ *iolock = XFS_IOLOCK_SHARED;
|
|
|
|
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
|
|
|
|
+
|
|
|
|
+ ret = generic_write_checks(file, &pos, &count,
|
|
|
|
+ S_ISBLK(inode->i_mode));
|
|
|
|
+ if (ret) {
|
|
|
|
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
|
|
|
|
+ *iolock = 0;
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ new_size = pos + count;
|
|
|
|
+ if (new_size > ip->i_size)
|
|
|
|
+ ip->i_new_size = new_size;
|
|
|
|
+
|
|
|
|
+ if (likely(!(file->f_mode & FMODE_NOCMTIME)))
|
|
|
|
+ file_update_time(file);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If the offset is beyond the size of the file, we have a couple of
|
|
|
|
+ * things to do. First, if there is already space allocated we need to
|
|
|
|
+ * either create holes or zero the disk or ...
|
|
|
|
+ *
|
|
|
|
+ * If there is a page where the previous size lands, we need to zero it
|
|
|
|
+ * out up to the new size.
|
|
|
|
+ */
|
|
|
|
+ if (pos > ip->i_size) {
|
|
|
|
+ ret = -xfs_zero_eof(ip, pos, ip->i_size);
|
|
|
|
+ if (ret) {
|
|
|
|
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If we're writing the file then make sure to clear the setuid and
|
|
|
|
+ * setgid bits if the process is not being run by root. This keeps
|
|
|
|
+ * people from modifying setuid and setgid binaries.
|
|
|
|
+ */
|
|
|
|
+ ret = file_remove_suid(file);
|
|
|
|
+ if (unlikely(ret))
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ if (mapping->nrpages) {
|
|
|
|
+ WARN_ON(*iolock != XFS_IOLOCK_EXCL);
|
|
|
|
+ ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
|
|
|
|
+ FI_REMAPF_LOCKED);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (*iolock == XFS_IOLOCK_EXCL) {
|
|
|
|
+ /* demote the lock now the cached pages are gone */
|
|
|
|
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
|
+ *iolock = XFS_IOLOCK_SHARED;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
|
|
|
|
+ ret = generic_file_direct_write(iocb, iovp,
|
|
|
|
+ &nr_segs, pos, &iocb->ki_pos, count, ocount);
|
|
|
|
+
|
|
|
|
+ /* No fallback to buffered IO on errors for XFS. */
|
|
|
|
+ ASSERT(ret < 0 || ret == count);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
STATIC ssize_t
|
|
STATIC ssize_t
|
|
xfs_file_aio_write(
|
|
xfs_file_aio_write(
|
|
struct kiocb *iocb,
|
|
struct kiocb *iocb,
|
|
@@ -670,12 +780,12 @@ xfs_file_aio_write(
|
|
|
|
|
|
relock:
|
|
relock:
|
|
if (ioflags & IO_ISDIRECT) {
|
|
if (ioflags & IO_ISDIRECT) {
|
|
- iolock = XFS_IOLOCK_SHARED;
|
|
|
|
- } else {
|
|
|
|
- iolock = XFS_IOLOCK_EXCL;
|
|
|
|
|
|
+ ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
|
|
|
|
+ ocount, &iolock);
|
|
|
|
+ goto done_io;
|
|
}
|
|
}
|
|
|
|
+ iolock = XFS_IOLOCK_EXCL;
|
|
|
|
|
|
-start:
|
|
|
|
xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
|
|
xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
|
|
ret = generic_write_checks(file, &pos, &count,
|
|
ret = generic_write_checks(file, &pos, &count,
|
|
S_ISBLK(inode->i_mode));
|
|
S_ISBLK(inode->i_mode));
|
|
@@ -684,30 +794,6 @@ start:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
- if (ioflags & IO_ISDIRECT) {
|
|
|
|
- xfs_buftarg_t *target =
|
|
|
|
- XFS_IS_REALTIME_INODE(ip) ?
|
|
|
|
- mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
-
|
|
|
|
- if ((pos & target->bt_smask) || (count & target->bt_smask)) {
|
|
|
|
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
|
|
|
|
- return XFS_ERROR(-EINVAL);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * For direct I/O, if there are cached pages or we're extending
|
|
|
|
- * the file, we need IOLOCK_EXCL until we're sure the bytes at
|
|
|
|
- * the new EOF have been zeroed and/or the cached pages are
|
|
|
|
- * flushed out. Upgrade the I/O lock and start again.
|
|
|
|
- */
|
|
|
|
- if (iolock != XFS_IOLOCK_EXCL &&
|
|
|
|
- (mapping->nrpages || pos > ip->i_size)) {
|
|
|
|
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
|
|
|
|
- iolock = XFS_IOLOCK_EXCL;
|
|
|
|
- goto start;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
new_size = pos + count;
|
|
new_size = pos + count;
|
|
if (new_size > ip->i_size)
|
|
if (new_size > ip->i_size)
|
|
ip->i_new_size = new_size;
|
|
ip->i_new_size = new_size;
|
|
@@ -746,41 +832,7 @@ start:
|
|
/* We can write back this queue in page reclaim */
|
|
/* We can write back this queue in page reclaim */
|
|
current->backing_dev_info = mapping->backing_dev_info;
|
|
current->backing_dev_info = mapping->backing_dev_info;
|
|
|
|
|
|
- if ((ioflags & IO_ISDIRECT)) {
|
|
|
|
- if (mapping->nrpages) {
|
|
|
|
- WARN_ON(iolock != XFS_IOLOCK_EXCL);
|
|
|
|
- ret = -xfs_flushinval_pages(ip,
|
|
|
|
- (pos & PAGE_CACHE_MASK),
|
|
|
|
- -1, FI_REMAPF_LOCKED);
|
|
|
|
- if (ret)
|
|
|
|
- goto out_unlock_internal;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (iolock == XFS_IOLOCK_EXCL) {
|
|
|
|
- /* demote the lock now the cached pages are gone */
|
|
|
|
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
|
- iolock = XFS_IOLOCK_SHARED;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
|
|
|
|
- ret = generic_file_direct_write(iocb, iovp,
|
|
|
|
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * direct-io write to a hole: fall through to buffered I/O
|
|
|
|
- * for completing the rest of the request.
|
|
|
|
- */
|
|
|
|
- if (ret >= 0 && ret != count) {
|
|
|
|
- XFS_STATS_ADD(xs_write_bytes, ret);
|
|
|
|
-
|
|
|
|
- pos += ret;
|
|
|
|
- count -= ret;
|
|
|
|
-
|
|
|
|
- ioflags &= ~IO_ISDIRECT;
|
|
|
|
- xfs_rw_iunlock(ip, iolock);
|
|
|
|
- goto relock;
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
|
|
+ if (!(ioflags & IO_ISDIRECT)) {
|
|
int enospc = 0;
|
|
int enospc = 0;
|
|
|
|
|
|
write_retry:
|
|
write_retry:
|
|
@@ -802,6 +854,7 @@ write_retry:
|
|
|
|
|
|
current->backing_dev_info = NULL;
|
|
current->backing_dev_info = NULL;
|
|
|
|
|
|
|
|
+done_io:
|
|
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
|
|
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
|
|
|
|
|
|
if (ret <= 0)
|
|
if (ret <= 0)
|