13 years ago · eb838e73dc
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5773,18 +5773,109 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
			
 
				+			      struct extent_state **cached_state, int writing)
			
 
				+{
			
 
				+	struct btrfs_ordered_extent *ordered;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	while (1) {
			
 
				+		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				+				 0, cached_state);
			
 
				+		/*
			
 
				+		 * We're concerned with the entire range that we're going to be
			
 
				+		 * doing DIO to, so we need to make sure theres no ordered
			
 
				+		 * extents in this range.
			
 
				+		 */
			
 
				+		ordered = btrfs_lookup_ordered_range(inode, lockstart,
			
 
				+						     lockend - lockstart + 1);
			
 
				+
			
 
				+		/*
			
 
				+		 * We need to make sure there are no buffered pages in this
			
 
				+		 * range either, we could have raced between the invalidate in
			
 
				+		 * generic_file_direct_write and locking the extent.  The
			
 
				+		 * invalidate needs to happen so that reads after a write do not
			
 
				+		 * get stale data.
			
 
				+		 */
			
 
				+		if (!ordered && (!writing ||
			
 
				+		    !test_range_bit(&BTRFS_I(inode)->io_tree,
			
 
				+				    lockstart, lockend, EXTENT_UPTODATE, 0,
			
 
				+				    *cached_state)))
			
 
				+			break;
			
 
				+
			
 
				+		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				+				     cached_state, GFP_NOFS);
			
 
				+
			
 
				+		if (ordered) {
			
 
				+			btrfs_start_ordered_extent(inode, ordered, 1);
			
 
				+			btrfs_put_ordered_extent(ordered);
			
 
				+		} else {
			
 
				+			/* Screw you mmap */
			
 
				+			ret = filemap_write_and_wait_range(inode->i_mapping,
			
 
				+							   lockstart,
			
 
				+							   lockend);
			
 
				+			if (ret)
			
 
				+				break;
			
 
				+
			
 
				+			/*
			
 
				+			 * If we found a page that couldn't be invalidated just
			
 
				+			 * fall back to buffered.
			
 
				+			 */
			
 
				+			ret = invalidate_inode_pages2_range(inode->i_mapping,
			
 
				+					lockstart >> PAGE_CACHE_SHIFT,
			
 
				+					lockend >> PAGE_CACHE_SHIFT);
			
 
				+			if (ret)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		cond_resched();
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
			
 
				 				   struct buffer_head *bh_result, int create)
			
 
				 {
			
 
				 	struct extent_map *em;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				+	struct extent_state *cached_state = NULL;
			
 
				 	u64 start = iblock << inode->i_blkbits;
			
 
				+	u64 lockstart, lockend;
			
 
				 	u64 len = bh_result->b_size;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				+	int unlock_bits = EXTENT_LOCKED;
			
 
				+	int ret;
			
 
				+
			
 
				+	lockstart = start;
			
 
				+	lockend = start + len - 1;
			
 
				+	if (create) {
			
 
				+		ret = btrfs_delalloc_reserve_space(inode, len);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If this errors out it's because we couldn't invalidate pagecache for
			
 
				+	 * this range and we need to fallback to buffered.
			
 
				+	 */
			
 
				+	if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
			
 
				+		return -ENOTBLK;
			
 
				+
			
 
				+	if (create) {
			
 
				+		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				+				     lockend, EXTENT_DELALLOC, NULL,
			
 
				+				     &cached_state, GFP_NOFS);
			
 
				+		if (ret)
			
 
				+			goto unlock_err;
			
 
				+	}
			
 
				 
			
 
				 	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
			
 
				-	if (IS_ERR(em))
			
 
				-		return PTR_ERR(em);
			
 
				+	if (IS_ERR(em)) {
			
 
				+		ret = PTR_ERR(em);
			
 
				+		goto unlock_err;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
			
@@ -5803,17 +5894,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
			
 
				 	    em->block_start == EXTENT_MAP_INLINE) {
			
 
				 		free_extent_map(em);
			
 
				-		return -ENOTBLK;
			
 
				+		ret = -ENOTBLK;
			
 
				+		goto unlock_err;
			
 
				 	}
			
 
				 
			
 
				 	/* Just a good old fashioned hole, return */
			
 
				 	if (!create && (em->block_start == EXTENT_MAP_HOLE ||
			
 
				 			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
			
 
				 		free_extent_map(em);
			
 
				-		/* DIO will do one hole at a time, so just unlock a sector */
			
 
				-		unlock_extent(&BTRFS_I(inode)->io_tree, start,
			
 
				-			      start + root->sectorsize - 1);
			
 
				-		return 0;
			
 
				+		ret = 0;
			
 
				+		goto unlock_err;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -5826,8 +5916,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	 *
			
 
				 	 */
			
 
				 	if (!create) {
			
 
				-		len = em->len - (start - em->start);
			
 
				-		goto map;
			
 
				+		len = min(len, em->len - (start - em->start));
			
 
				+		lockstart = start + len;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				 
			
 
				 	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
			
@@ -5859,7 +5950,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 			btrfs_end_transaction(trans, root);
			
 
				 			if (ret) {
			
 
				 				free_extent_map(em);
			
 
				-				return ret;
			
 
				+				goto unlock_err;
			
 
				 			}
			
 
				 			goto unlock;
			
 
				 		}
			
@@ -5872,14 +5963,12 @@ must_cow:
 
				 	 */
			
 
				 	len = bh_result->b_size;
			
 
				 	em = btrfs_new_extent_direct(inode, em, start, len);
			
 
				-	if (IS_ERR(em))
			
 
				-		return PTR_ERR(em);
			
 
				+	if (IS_ERR(em)) {
			
 
				+		ret = PTR_ERR(em);
			
 
				+		goto unlock_err;
			
 
				+	}
			
 
				 	len = min(len, em->len - (start - em->start));
			
 
				 unlock:
			
 
				-	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
			
 
				-			  EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
			
 
				-			  0, NULL, GFP_NOFS);
			
 
				-map:
			
 
				 	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
			
 
				 		inode->i_blkbits;
			
 
				 	bh_result->b_size = len;
			
@@ -5897,9 +5986,28 @@ map:
 
				 			i_size_write(inode, start + len);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * In the case of write we need to clear and unlock the entire range,
			
 
				+	 * in the case of read we need to unlock only the end area that we
			
 
				+	 * aren't using if there is any left over space.
			
 
				+	 */
			
 
				+	if (lockstart < lockend)
			
 
				+		clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				+				 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
			
 
				+	else
			
 
				+		free_extent_state(cached_state);
			
 
				+
			
 
				 	free_extent_map(em);
			
 
				 
			
 
				 	return 0;
			
 
				+
			
 
				+unlock_err:
			
 
				+	if (create)
			
 
				+		unlock_bits |= EXTENT_DO_ACCOUNTING;
			
 
				+
			
 
				+	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				+			 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 struct btrfs_dio_private {
			
@@ -6340,132 +6448,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 
				 out:
			
 
				 	return retval;
			
 
				 }
			
 
				+
			
 
				 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
			
 
				 			const struct iovec *iov, loff_t offset,
			
 
				 			unsigned long nr_segs)
			
 
				 {
			
 
				 	struct file *file = iocb->ki_filp;
			
 
				 	struct inode *inode = file->f_mapping->host;
			
 
				-	struct btrfs_ordered_extent *ordered;
			
 
				-	struct extent_state *cached_state = NULL;
			
 
				-	u64 lockstart, lockend;
			
 
				-	ssize_t ret;
			
 
				-	int writing = rw & WRITE;
			
 
				-	int write_bits = 0;
			
 
				-	size_t count = iov_length(iov, nr_segs);
			
 
				 
			
 
				 	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
			
 
				-			    offset, nr_segs)) {
			
 
				+			    offset, nr_segs))
			
 
				 		return 0;
			
 
				-	}
			
 
				-
			
 
				-	lockstart = offset;
			
 
				-	lockend = offset + count - 1;
			
 
				-
			
 
				-	if (writing) {
			
 
				-		ret = btrfs_delalloc_reserve_space(inode, count);
			
 
				-		if (ret)
			
 
				-			goto out;
			
 
				-	}
			
 
				 
			
 
				-	while (1) {
			
 
				-		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				-				 0, &cached_state);
			
 
				-		/*
			
 
				-		 * We're concerned with the entire range that we're going to be
			
 
				-		 * doing DIO to, so we need to make sure theres no ordered
			
 
				-		 * extents in this range.
			
 
				-		 */
			
 
				-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
			
 
				-						     lockend - lockstart + 1);
			
 
				-
			
 
				-		/*
			
 
				-		 * We need to make sure there are no buffered pages in this
			
 
				-		 * range either, we could have raced between the invalidate in
			
 
				-		 * generic_file_direct_write and locking the extent.  The
			
 
				-		 * invalidate needs to happen so that reads after a write do not
			
 
				-		 * get stale data.
			
 
				-		 */
			
 
				-		if (!ordered && (!writing ||
			
 
				-		    !test_range_bit(&BTRFS_I(inode)->io_tree,
			
 
				-				    lockstart, lockend, EXTENT_UPTODATE, 0,
			
 
				-				    cached_state)))
			
 
				-			break;
			
 
				-
			
 
				-		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				-				     &cached_state, GFP_NOFS);
			
 
				-
			
 
				-		if (ordered) {
			
 
				-			btrfs_start_ordered_extent(inode, ordered, 1);
			
 
				-			btrfs_put_ordered_extent(ordered);
			
 
				-		} else {
			
 
				-			/* Screw you mmap */
			
 
				-			ret = filemap_write_and_wait_range(file->f_mapping,
			
 
				-							   lockstart,
			
 
				-							   lockend);
			
 
				-			if (ret)
			
 
				-				goto out;
			
 
				-
			
 
				-			/*
			
 
				-			 * If we found a page that couldn't be invalidated just
			
 
				-			 * fall back to buffered.
			
 
				-			 */
			
 
				-			ret = invalidate_inode_pages2_range(file->f_mapping,
			
 
				-					lockstart >> PAGE_CACHE_SHIFT,
			
 
				-					lockend >> PAGE_CACHE_SHIFT);
			
 
				-			if (ret) {
			
 
				-				if (ret == -EBUSY)
			
 
				-					ret = 0;
			
 
				-				goto out;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		cond_resched();
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * we don't use btrfs_set_extent_delalloc because we don't want
			
 
				-	 * the dirty or uptodate bits
			
 
				-	 */
			
 
				-	if (writing) {
			
 
				-		write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
			
 
				-		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
 
				-				     EXTENT_DELALLOC, NULL, &cached_state,
			
 
				-				     GFP_NOFS);
			
 
				-		if (ret) {
			
 
				-			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				-					 lockend, EXTENT_LOCKED | write_bits,
			
 
				-					 1, 0, &cached_state, GFP_NOFS);
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	free_extent_state(cached_state);
			
 
				-	cached_state = NULL;
			
 
				-
			
 
				-	ret = __blockdev_direct_IO(rw, iocb, inode,
			
 
				+	return __blockdev_direct_IO(rw, iocb, inode,
			
 
				 		   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
			
 
				 		   iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
			
 
				 		   btrfs_submit_direct, 0);
			
 
				-
			
 
				-	if (ret < 0 && ret != -EIOCBQUEUED) {
			
 
				-		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
			
 
				-			      offset + iov_length(iov, nr_segs) - 1,
			
 
				-			      EXTENT_LOCKED | write_bits, 1, 0,
			
 
				-			      &cached_state, GFP_NOFS);
			
 
				-	} else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
			
 
				-		/*
			
 
				-		 * We're falling back to buffered, unlock the section we didn't
			
 
				-		 * do IO on.
			
 
				-		 */
			
 
				-		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
			
 
				-			      offset + iov_length(iov, nr_segs) - 1,
			
 
				-			      EXTENT_LOCKED | write_bits, 1, 0,
			
 
				-			      &cached_state, GFP_NOFS);
			
 
				-	}
			
 
				-out:
			
 
				-	free_extent_state(cached_state);
			
 
				-	return ret;
			
 
				 }
			
 
				 
			
 
				 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,