14 years ago · 0e5b88cd99
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
 
				 	u64 disk_total;		/* total bytes on disk, takes mirrors into
			
 
				 				   account */
			
 
				 
			
 
				+	/*
			
 
				+	 * we bump reservation progress every time we decrement
			
 
				+	 * bytes_reserved.  This way people waiting for reservations
			
 
				+	 * know something good has happened and they can check
			
 
				+	 * for progress.  The number here isn't to be trusted, it
			
 
				+	 * just shows reclaim activity
			
 
				+	 */
			
 
				+	unsigned long reservation_progress;
			
 
				+
			
 
				 	int full;		/* indicates that we cannot allocate any more
			
 
				 				   chunks for this space */
			
 
				 	int force_alloc;	/* set if we need to force a chunk alloc for
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 
				 	u64 max_reclaim;
			
 
				 	u64 reclaimed = 0;
			
 
				 	long time_left;
			
 
				-	int pause = 1;
			
 
				 	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
			
 
				 	int loops = 0;
			
 
				+	unsigned long progress;
			
 
				 
			
 
				 	block_rsv = &root->fs_info->delalloc_block_rsv;
			
 
				 	space_info = block_rsv->space_info;
			
 
				 
			
 
				 	smp_mb();
			
 
				 	reserved = space_info->bytes_reserved;
			
 
				+	progress = space_info->reservation_progress;
			
 
				 
			
 
				 	if (reserved == 0)
			
 
				 		return 0;
			
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 
				 		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
			
 
				 
			
 
				 		spin_lock(&space_info->lock);
			
 
				-		if (reserved > space_info->bytes_reserved) {
			
 
				-			loops = 0;
			
 
				+		if (reserved > space_info->bytes_reserved)
			
 
				 			reclaimed += reserved - space_info->bytes_reserved;
			
 
				-		} else {
			
 
				-			loops++;
			
 
				-		}
			
 
				 		reserved = space_info->bytes_reserved;
			
 
				 		spin_unlock(&space_info->lock);
			
 
				 
			
 
				+		loops++;
			
 
				+
			
 
				 		if (reserved == 0 || reclaimed >= max_reclaim)
			
 
				 			break;
			
 
				 
			
 
				 		if (trans && trans->transaction->blocked)
			
 
				 			return -EAGAIN;
			
 
				 
			
 
				-		__set_current_state(TASK_INTERRUPTIBLE);
			
 
				-		time_left = schedule_timeout(pause);
			
 
				+		time_left = schedule_timeout_interruptible(1);
			
 
				 
			
 
				 		/* We were interrupted, exit */
			
 
				 		if (time_left)
			
 
				 			break;
			
 
				 
			
 
				-		pause <<= 1;
			
 
				-		if (pause > HZ / 10)
			
 
				-			pause = HZ / 10;
			
 
				+		/* we've kicked the IO a few times, if anything has been freed,
			
 
				+		 * exit.  There is no sense in looping here for a long time
			
 
				+		 * when we really need to commit the transaction, or there are
			
 
				+		 * just too many writers without enough free space
			
 
				+		 */
			
 
				+
			
 
				+		if (loops > 3) {
			
 
				+			smp_mb();
			
 
				+			if (progress != space_info->reservation_progress)
			
 
				+				break;
			
 
				+		}
			
 
				 
			
 
				 	}
			
 
				 	return reclaimed >= to_reclaim;
			
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
 
				 		if (num_bytes) {
			
 
				 			spin_lock(&space_info->lock);
			
 
				 			space_info->bytes_reserved -= num_bytes;
			
 
				+			space_info->reservation_progress++;
			
 
				 			spin_unlock(&space_info->lock);
			
 
				 		}
			
 
				 	}
			
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 
				 	if (block_rsv->reserved >= block_rsv->size) {
			
 
				 		num_bytes = block_rsv->reserved - block_rsv->size;
			
 
				 		sinfo->bytes_reserved -= num_bytes;
			
 
				+		sinfo->reservation_progress++;
			
 
				 		block_rsv->reserved = block_rsv->size;
			
 
				 		block_rsv->full = 1;
			
 
				 	}
			
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 		to_reserve = 0;
			
 
				 	}
			
 
				 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
			
 
				-
			
 
				 	to_reserve += calc_csum_metadata_size(inode, num_bytes);
			
 
				 	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
			
 
				 	if (ret)
			
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 
				 			btrfs_set_block_group_used(&cache->item, old_val);
			
 
				 			cache->reserved -= num_bytes;
			
 
				 			cache->space_info->bytes_reserved -= num_bytes;
			
 
				+			cache->space_info->reservation_progress++;
			
 
				 			cache->space_info->bytes_used += num_bytes;
			
 
				 			cache->space_info->disk_used += num_bytes * factor;
			
 
				 			spin_unlock(&cache->lock);
			
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
 
				 	if (reserved) {
			
 
				 		cache->reserved -= num_bytes;
			
 
				 		cache->space_info->bytes_reserved -= num_bytes;
			
 
				+		cache->space_info->reservation_progress++;
			
 
				 	}
			
 
				 	spin_unlock(&cache->lock);
			
 
				 	spin_unlock(&cache->space_info->lock);
			
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
 
				 				space_info->bytes_readonly += num_bytes;
			
 
				 			cache->reserved -= num_bytes;
			
 
				 			space_info->bytes_reserved -= num_bytes;
			
 
				+			space_info->reservation_progress++;
			
 
				 		}
			
 
				 		spin_unlock(&cache->lock);
			
 
				 		spin_unlock(&space_info->lock);
			
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 		if (ret) {
			
 
				 			spin_lock(&cache->space_info->lock);
			
 
				 			cache->space_info->bytes_reserved -= buf->len;
			
 
				+			cache->space_info->reservation_progress++;
			
 
				 			spin_unlock(&cache->space_info->lock);
			
 
				 		}
			
 
				 		goto out;
			
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
				 	}
			
 
				 
			
 
				 	while (!end) {
			
 
				-		off = extent_map_end(em);
			
 
				-		if (off >= max)
			
 
				-			end = 1;
			
 
				+		u64 offset_in_extent;
			
 
				+
			
 
				+		/* break if the extent we found is outside the range */
			
 
				+		if (em->start >= max || extent_map_end(em) < off)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * get_extent may return an extent that starts before our
			
 
				+		 * requested range.  We have to make sure the ranges
			
 
				+		 * we return to fiemap always move forward and don't
			
 
				+		 * overlap, so adjust the offsets here
			
 
				+		 */
			
 
				+		em_start = max(em->start, off);
			
 
				 
			
 
				-		em_start = em->start;
			
 
				-		em_len = em->len;
			
 
				+		/*
			
 
				+		 * record the offset from the start of the extent
			
 
				+		 * for adjusting the disk offset below
			
 
				+		 */
			
 
				+		offset_in_extent = em_start - em->start;
			
 
				 		em_end = extent_map_end(em);
			
 
				+		em_len = em_end - em_start;
			
 
				 		emflags = em->flags;
			
 
				 		disko = 0;
			
 
				 		flags = 0;
			
 
				 
			
 
				+		/*
			
 
				+		 * bump off for our next call to get_extent
			
 
				+		 */
			
 
				+		off = extent_map_end(em);
			
 
				+		if (off >= max)
			
 
				+			end = 1;
			
 
				+
			
 
				 		if (em->block_start == EXTENT_MAP_LAST_BYTE) {
			
 
				 			end = 1;
			
 
				 			flags |= FIEMAP_EXTENT_LAST;
			
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
				 			flags |= (FIEMAP_EXTENT_DELALLOC |
			
 
				 				  FIEMAP_EXTENT_UNKNOWN);
			
 
				 		} else {
			
 
				-			disko = em->block_start;
			
 
				+			disko = em->block_start + offset_in_extent;
			
 
				 		}
			
 
				 		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
			
 
				 			flags |= FIEMAP_EXTENT_ENCODED;
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 
				 
			
 
				 		/* Flush processor's dcache for this page */
			
 
				 		flush_dcache_page(page);
			
 
				+
			
 
				+		/*
			
 
				+		 * if we get a partial write, we can end up with
			
 
				+		 * partially up to date pages.  These add
			
 
				+		 * a lot of complexity, so make sure they don't
			
 
				+		 * happen by forcing this copy to be retried.
			
 
				+		 *
			
 
				+		 * The rest of the btrfs_file_write code will fall
			
 
				+		 * back to page at a time copies after we return 0.
			
 
				+		 */
			
 
				+		if (!PageUptodate(page) && copied < count)
			
 
				+			copied = 0;
			
 
				+
			
 
				 		iov_iter_advance(i, copied);
			
 
				 		write_bytes -= copied;
			
 
				 		total_copied += copied;
			
@@ -762,6 +775,27 @@ out:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * on error we return an unlocked page and the error value
			
 
				+ * on success we return a locked page and 0
			
 
				+ */
			
 
				+static int prepare_uptodate_page(struct page *page, u64 pos)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
			
 
				+		ret = btrfs_readpage(NULL, page);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		lock_page(page);
			
 
				+		if (!PageUptodate(page)) {
			
 
				+			unlock_page(page);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * this gets pages into the page cache and locks them down, it also properly
			
 
				  * waits for data=ordered extents to finish before allowing the pages to be
			
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 
				 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
			
 
				 	struct inode *inode = fdentry(file)->d_inode;
			
 
				 	int err = 0;
			
 
				+	int faili = 0;
			
 
				 	u64 start_pos;
			
 
				 	u64 last_pos;
			
 
				 
			
@@ -794,15 +829,24 @@ again:
 
				 	for (i = 0; i < num_pages; i++) {
			
 
				 		pages[i] = grab_cache_page(inode->i_mapping, index + i);
			
 
				 		if (!pages[i]) {
			
 
				-			int c;
			
 
				-			for (c = i - 1; c >= 0; c--) {
			
 
				-				unlock_page(pages[c]);
			
 
				-				page_cache_release(pages[c]);
			
 
				-			}
			
 
				-			return -ENOMEM;
			
 
				+			faili = i - 1;
			
 
				+			err = -ENOMEM;
			
 
				+			goto fail;
			
 
				+		}
			
 
				+
			
 
				+		if (i == 0)
			
 
				+			err = prepare_uptodate_page(pages[i], pos);
			
 
				+		if (i == num_pages - 1)
			
 
				+			err = prepare_uptodate_page(pages[i],
			
 
				+						    pos + write_bytes);
			
 
				+		if (err) {
			
 
				+			page_cache_release(pages[i]);
			
 
				+			faili = i - 1;
			
 
				+			goto fail;
			
 
				 		}
			
 
				 		wait_on_page_writeback(pages[i]);
			
 
				 	}
			
 
				+	err = 0;
			
 
				 	if (start_pos < inode->i_size) {
			
 
				 		struct btrfs_ordered_extent *ordered;
			
 
				 		lock_extent_bits(&BTRFS_I(inode)->io_tree,
			
@@ -842,6 +886,14 @@ again:
 
				 		WARN_ON(!PageLocked(pages[i]));
			
 
				 	}
			
 
				 	return 0;
			
 
				+fail:
			
 
				+	while (faili >= 0) {
			
 
				+		unlock_page(pages[faili]);
			
 
				+		page_cache_release(pages[faili]);
			
 
				+		faili--;
			
 
				+	}
			
 
				+	return err;
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
			
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
				 	struct file *file = iocb->ki_filp;
			
 
				 	struct inode *inode = fdentry(file)->d_inode;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-	struct page *pinned[2];
			
 
				 	struct page **pages = NULL;
			
 
				 	struct iov_iter i;
			
 
				 	loff_t *ppos = &iocb->ki_pos;
			
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
				 	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
			
 
				 		      (file->f_flags & O_DIRECT));
			
 
				 
			
 
				-	pinned[0] = NULL;
			
 
				-	pinned[1] = NULL;
			
 
				-
			
 
				 	start_pos = pos;
			
 
				 
			
 
				 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
			
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
				 	first_index = pos >> PAGE_CACHE_SHIFT;
			
 
				 	last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
			
 
				 
			
 
				-	/*
			
 
				-	 * there are lots of better ways to do this, but this code
			
 
				-	 * makes sure the first and last page in the file range are
			
 
				-	 * up to date and ready for cow
			
 
				-	 */
			
 
				-	if ((pos & (PAGE_CACHE_SIZE - 1))) {
			
 
				-		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
			
 
				-		if (!PageUptodate(pinned[0])) {
			
 
				-			ret = btrfs_readpage(NULL, pinned[0]);
			
 
				-			BUG_ON(ret);
			
 
				-			wait_on_page_locked(pinned[0]);
			
 
				-		} else {
			
 
				-			unlock_page(pinned[0]);
			
 
				-		}
			
 
				-	}
			
 
				-	if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
			
 
				-		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
			
 
				-		if (!PageUptodate(pinned[1])) {
			
 
				-			ret = btrfs_readpage(NULL, pinned[1]);
			
 
				-			BUG_ON(ret);
			
 
				-			wait_on_page_locked(pinned[1]);
			
 
				-		} else {
			
 
				-			unlock_page(pinned[1]);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	while (iov_iter_count(&i) > 0) {
			
 
				 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
			
 
				 		size_t write_bytes = min(iov_iter_count(&i),
			
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
				 
			
 
				 		copied = btrfs_copy_from_user(pos, num_pages,
			
 
				 					   write_bytes, pages, &i);
			
 
				-		dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
			
 
				-				PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+		/*
			
 
				+		 * if we have trouble faulting in the pages, fall
			
 
				+		 * back to one page at a time
			
 
				+		 */
			
 
				+		if (copied < write_bytes)
			
 
				+			nrptrs = 1;
			
 
				+
			
 
				+		if (copied == 0)
			
 
				+			dirty_pages = 0;
			
 
				+		else
			
 
				+			dirty_pages = (copied + offset +
			
 
				+				       PAGE_CACHE_SIZE - 1) >>
			
 
				+				       PAGE_CACHE_SHIFT;
			
 
				 
			
 
				 		if (num_pages > dirty_pages) {
			
 
				 			if (copied > 0)
			
@@ -1069,10 +1103,6 @@ out:
 
				 		err = ret;
			
 
				 
			
 
				 	kfree(pages);
			
 
				-	if (pinned[0])
			
 
				-		page_cache_release(pinned[0]);
			
 
				-	if (pinned[1])
			
 
				-		page_cache_release(pinned[1]);
			
 
				 	*ppos = pos;
			
 
				 
			
 
				 	/*
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
				 		goto fail;
			
 
				 
			
 
				 	/*
			
 
				-	 * 1 item for inode ref
			
 
				+	 * 2 items for inode and inode ref
			
 
				 	 * 2 items for dir items
			
 
				+	 * 1 item for parent inode
			
 
				 	 */
			
 
				-	trans = btrfs_start_transaction(root, 3);
			
 
				+	trans = btrfs_start_transaction(root, 5);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		err = PTR_ERR(trans);
			
 
				 		goto fail;
			
@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
				 	if (!skip_sum) {
			
 
				 		dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
			
 
				 		if (!dip->csums) {
			
 
				+			kfree(dip);
			
 
				 			ret = -ENOMEM;
			
 
				 			goto free_ordered;
			
 
				 		}