17 years ago · 323ac95bce
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1388,7 +1388,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 
				 	struct btrfs_key prealloc_block;
			
 
				 
			
 
				 	lowest_level = p->lowest_level;
			
 
				-	WARN_ON(lowest_level && ins_len);
			
 
				+	WARN_ON(lowest_level && ins_len > 0);
			
 
				 	WARN_ON(p->nodes[0] != NULL);
			
 
				 	WARN_ON(cow && root == root->fs_info->extent_root &&
			
 
				 		!mutex_is_locked(&root->fs_info->alloc_mutex));
			
@@ -3186,6 +3186,36 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * a helper function to delete the leaf pointed to by path->slots[1] and
			
 
				+ * path->nodes[1].  bytenr is the node block pointer, but since the callers
			
 
				+ * already know it, it is faster to have them pass it down than to
			
 
				+ * read it out of the node again.
			
 
				+ *
			
 
				+ * This deletes the pointer in path->nodes[1] and frees the leaf
			
 
				+ * block extent.  zero is returned if it all worked out, < 0 otherwise.
			
 
				+ *
			
 
				+ * The path must have already been setup for deleting the leaf, including
			
 
				+ * all the proper balancing.  path->nodes[1] must be locked.
			
 
				+ */
			
 
				+noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
			
 
				+			    struct btrfs_root *root,
			
 
				+			    struct btrfs_path *path, u64 bytenr)
			
 
				+{
			
 
				+	int ret;
			
 
				+	u64 root_gen = btrfs_header_generation(path->nodes[1]);
			
 
				+
			
 
				+	ret = del_ptr(trans, root, path, 1, path->slots[1]);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = btrfs_free_extent(trans, root, bytenr,
			
 
				+				btrfs_level_size(root, 0),
			
 
				+				path->nodes[1]->start,
			
 
				+				btrfs_header_owner(path->nodes[1]),
			
 
				+				root_gen, 0, 0, 1);
			
 
				+	return ret;
			
 
				+}
			
 
				 /*
			
 
				  * delete the item at the leaf level in path.  If that empties
			
 
				  * the leaf, remove it from the tree
			
@@ -3251,17 +3281,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
				 		if (leaf == root->node) {
			
 
				 			btrfs_set_header_level(leaf, 0);
			
 
				 		} else {
			
 
				-			u64 root_gen = btrfs_header_generation(path->nodes[1]);
			
 
				-			wret = del_ptr(trans, root, path, 1, path->slots[1]);
			
 
				-			if (wret)
			
 
				-				ret = wret;
			
 
				-			wret = btrfs_free_extent(trans, root,
			
 
				-					 leaf->start, leaf->len,
			
 
				-					 path->nodes[1]->start,
			
 
				-					 btrfs_header_owner(path->nodes[1]),
			
 
				-					 root_gen, 0, 0, 1);
			
 
				-			if (wret)
			
 
				-				ret = wret;
			
 
				+			ret = btrfs_del_leaf(trans, root, path, leaf->start);
			
 
				+			BUG_ON(ret);
			
 
				 		}
			
 
				 	} else {
			
 
				 		int used = leaf_space_used(leaf, 0, nritems);
			
@@ -3296,24 +3317,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
				 			}
			
 
				 
			
 
				 			if (btrfs_header_nritems(leaf) == 0) {
			
 
				-				u64 root_gen;
			
 
				-				u64 bytenr = leaf->start;
			
 
				-				u32 blocksize = leaf->len;
			
 
				-
			
 
				-				root_gen = btrfs_header_generation(
			
 
				-							   path->nodes[1]);
			
 
				-
			
 
				-				wret = del_ptr(trans, root, path, 1, slot);
			
 
				-				if (wret)
			
 
				-					ret = wret;
			
 
				-
			
 
				+				path->slots[1] = slot;
			
 
				+				ret = btrfs_del_leaf(trans, root, path, leaf->start);
			
 
				+				BUG_ON(ret);
			
 
				 				free_extent_buffer(leaf);
			
 
				-				wret = btrfs_free_extent(trans, root, bytenr,
			
 
				-					     blocksize, path->nodes[1]->start,
			
 
				-					     btrfs_header_owner(path->nodes[1]),
			
 
				-					     root_gen, 0, 0, 1);
			
 
				-				if (wret)
			
 
				-					ret = wret;
			
 
				 			} else {
			
 
				 				/* if we're still in the path, make sure
			
 
				 				 * we're dirty.  Otherwise, one of the
			
@@ -3418,8 +3425,8 @@ again:
 
				 		level = btrfs_header_level(cur);
			
 
				 		sret = bin_search(cur, min_key, level, &slot);
			
 
				 
			
 
				-		/* at level = 0, we're done, setup the path and exit */
			
 
				-		if (level == 0) {
			
 
				+		/* at the lowest level, we're done, setup the path and exit */
			
 
				+		if (level == path->lowest_level) {
			
 
				 			if (slot >= nritems)
			
 
				 				goto find_next_key;
			
 
				 			ret = 0;
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1649,7 +1649,9 @@ void btrfs_free_path(struct btrfs_path *p);
 
				 void btrfs_init_path(struct btrfs_path *p);
			
 
				 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			
 
				 		   struct btrfs_path *path, int slot, int nr);
			
 
				-
			
 
				+int btrfs_del_leaf(struct btrfs_trans_handle *trans,
			
 
				+			    struct btrfs_root *root,
			
 
				+			    struct btrfs_path *path, u64 bytenr);
			
 
				 static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
			
 
				 				 struct btrfs_root *root,
			
 
				 				 struct btrfs_path *path)
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1389,6 +1389,154 @@ fail:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * when truncating bytes in a file, it is possible to avoid reading
			
 
				+ * the leaves that contain only checksum items.  This can be the
			
 
				+ * majority of the IO required to delete a large file, but it must
			
 
				+ * be done carefully.
			
 
				+ *
			
 
				+ * The keys in the level just above the leaves are checked to make sure
			
 
				+ * the lowest key in a given leaf is a csum key, and starts at an offset
			
 
				+ * after the new  size.
			
 
				+ *
			
 
				+ * Then the key for the next leaf is checked to make sure it also has
			
 
				+ * a checksum item for the same file.  If it does, we know our target leaf
			
 
				+ * contains only checksum items, and it can be safely freed without reading
			
 
				+ * it.
			
 
				+ *
			
 
				+ * This is just an optimization targeted at large files.  It may do
			
 
				+ * nothing.  It will return 0 unless things went badly.
			
 
				+ */
			
 
				+static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
			
 
				+				     struct btrfs_root *root,
			
 
				+				     struct btrfs_path *path,
			
 
				+				     struct inode *inode, u64 new_size)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	int ret;
			
 
				+	int nritems;
			
 
				+	struct btrfs_key found_key;
			
 
				+	struct btrfs_key other_key;
			
 
				+
			
 
				+	path->lowest_level = 1;
			
 
				+	key.objectid = inode->i_ino;
			
 
				+	key.type = BTRFS_CSUM_ITEM_KEY;
			
 
				+	key.offset = new_size;
			
 
				+again:
			
 
				+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (path->nodes[1] == NULL) {
			
 
				+		ret = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	ret = 0;
			
 
				+	btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
			
 
				+	nritems = btrfs_header_nritems(path->nodes[1]);
			
 
				+
			
 
				+	if (!nritems)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (path->slots[1] >= nritems)
			
 
				+		goto next_node;
			
 
				+
			
 
				+	/* did we find a key greater than anything we want to delete? */
			
 
				+	if (found_key.objectid > inode->i_ino ||
			
 
				+	   (found_key.objectid == inode->i_ino && found_key.type > key.type))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* we check the next key in the node to make sure the leave contains
			
 
				+	 * only checksum items.  This comparison doesn't work if our
			
 
				+	 * leaf is the last one in the node
			
 
				+	 */
			
 
				+	if (path->slots[1] + 1 >= nritems) {
			
 
				+next_node:
			
 
				+		/* search forward from the last key in the node, this
			
 
				+		 * will bring us into the next node in the tree
			
 
				+		 */
			
 
				+		btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
			
 
				+
			
 
				+		/* unlikely, but we inc below, so check to be safe */
			
 
				+		if (found_key.offset == (u64)-1)
			
 
				+			goto out;
			
 
				+
			
 
				+		/* search_forward needs a path with locks held, do the
			
 
				+		 * search again for the original key.  It is possible
			
 
				+		 * this will race with a balance and return a path that
			
 
				+		 * we could modify, but this drop is just an optimization
			
 
				+		 * and is allowed to miss some leaves.
			
 
				+		 */
			
 
				+		btrfs_release_path(root, path);
			
 
				+		found_key.offset++;
			
 
				+
			
 
				+		/* setup a max key for search_forward */
			
 
				+		other_key.offset = (u64)-1;
			
 
				+		other_key.type = key.type;
			
 
				+		other_key.objectid = key.objectid;
			
 
				+
			
 
				+		path->keep_locks = 1;
			
 
				+		ret = btrfs_search_forward(root, &found_key, &other_key,
			
 
				+					   path, 0, 0);
			
 
				+		path->keep_locks = 0;
			
 
				+		if (ret || found_key.objectid != key.objectid ||
			
 
				+		    found_key.type != key.type) {
			
 
				+			ret = 0;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		key.offset = found_key.offset;
			
 
				+		btrfs_release_path(root, path);
			
 
				+		cond_resched();
			
 
				+		goto again;
			
 
				+	}
			
 
				+
			
 
				+	/* we know there's one more slot after us in the tree,
			
 
				+	 * read that key so we can verify it is also a checksum item
			
 
				+	 */
			
 
				+	btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
			
 
				+
			
 
				+	if (found_key.objectid < inode->i_ino)
			
 
				+		goto next_key;
			
 
				+
			
 
				+	if (found_key.type != key.type || found_key.offset < new_size)
			
 
				+		goto next_key;
			
 
				+
			
 
				+	/*
			
 
				+	 * if the key for the next leaf isn't a csum key from this objectid,
			
 
				+	 * we can't be sure there aren't good items inside this leaf.
			
 
				+	 * Bail out
			
 
				+	 */
			
 
				+	if (other_key.objectid != inode->i_ino || other_key.type != key.type)
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * it is safe to delete this leaf, it contains only
			
 
				+	 * csum items from this inode at an offset >= new_size
			
 
				+	 */
			
 
				+	ret = btrfs_del_leaf(trans, root, path,
			
 
				+			     btrfs_node_blockptr(path->nodes[1],
			
 
				+						 path->slots[1]));
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				+next_key:
			
 
				+	btrfs_release_path(root, path);
			
 
				+
			
 
				+	if (other_key.objectid == inode->i_ino &&
			
 
				+	    other_key.type == key.type && other_key.offset > key.offset) {
			
 
				+		key.offset = other_key.offset;
			
 
				+		cond_resched();
			
 
				+		goto again;
			
 
				+	}
			
 
				+	ret = 0;
			
 
				+out:
			
 
				+	/* fixup any changes we've made to the path */
			
 
				+	path->lowest_level = 0;
			
 
				+	path->keep_locks = 0;
			
 
				+	btrfs_release_path(root, path);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * this can truncate away extent items, csum items and directory items.
			
 
				  * It starts at a high offset and removes keys until it can't find
			
@@ -1436,6 +1584,10 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
				 	key.type = (u8)-1;
			
 
				 
			
 
				 	btrfs_init_path(path);
			
 
				+
			
 
				+	ret = drop_csum_leaves(trans, root, path, inode, new_size);
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				 search_again:
			
 
				 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
			
 
				 	if (ret < 0) {