|
@@ -855,6 +855,41 @@ xfs_btree_readahead(
|
|
|
return xfs_btree_readahead_sblock(cur, lr, block);
|
|
|
}
|
|
|
|
|
|
+STATIC xfs_daddr_t
|
|
|
+xfs_btree_ptr_to_daddr(
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
+ union xfs_btree_ptr *ptr)
|
|
|
+{
|
|
|
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
|
|
+ ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
|
|
|
+
|
|
|
+ return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
|
|
|
+ } else {
|
|
|
+ ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
|
|
|
+ ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
|
|
|
+
|
|
|
+ return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
|
|
|
+ be32_to_cpu(ptr->s));
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Readahead @count btree blocks at the given @ptr location.
|
|
|
+ *
|
|
|
+ * We don't need to care about long or short form btrees here as we have a
|
|
|
+ * method of converting the ptr directly to a daddr available to us.
|
|
|
+ */
|
|
|
+STATIC void
|
|
|
+xfs_btree_readahead_ptr(
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
+ union xfs_btree_ptr *ptr,
|
|
|
+ xfs_extlen_t count)
|
|
|
+{
|
|
|
+ xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
|
|
|
+ xfs_btree_ptr_to_daddr(cur, ptr),
|
|
|
+ cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Set the buffer for level "lev" in the cursor to bp, releasing
|
|
|
* any previous buffer.
|
|
@@ -1073,24 +1108,6 @@ xfs_btree_buf_to_ptr(
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-STATIC xfs_daddr_t
|
|
|
-xfs_btree_ptr_to_daddr(
|
|
|
- struct xfs_btree_cur *cur,
|
|
|
- union xfs_btree_ptr *ptr)
|
|
|
-{
|
|
|
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
|
|
- ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
|
|
|
-
|
|
|
- return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
|
|
|
- } else {
|
|
|
- ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
|
|
|
- ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
|
|
|
-
|
|
|
- return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
|
|
|
- be32_to_cpu(ptr->s));
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
STATIC void
|
|
|
xfs_btree_set_refs(
|
|
|
struct xfs_btree_cur *cur,
|
|
@@ -3869,3 +3886,112 @@ xfs_btree_get_rec(
|
|
|
*stat = 1;
|
|
|
return 0;
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Change the owner of a btree.
|
|
|
+ *
|
|
|
+ * The mechanism we use here is ordered buffer logging. Because we don't know
|
|
|
+ * how many buffers were are going to need to modify, we don't really want to
|
|
|
+ * have to make transaction reservations for the worst case of every buffer in a
|
|
|
+ * full size btree as that may be more space that we can fit in the log....
|
|
|
+ *
|
|
|
+ * We do the btree walk in the most optimal manner possible - we have sibling
|
|
|
+ * pointers so we can just walk all the blocks on each level from left to right
|
|
|
+ * in a single pass, and then move to the next level and do the same. We can
|
|
|
+ * also do readahead on the sibling pointers to get IO moving more quickly,
|
|
|
+ * though for slow disks this is unlikely to make much difference to performance
|
|
|
+ * as the amount of CPU work we have to do before moving to the next block is
|
|
|
+ * relatively small.
|
|
|
+ *
|
|
|
+ * For each btree block that we load, modify the owner appropriately, set the
|
|
|
+ * buffer as an ordered buffer and log it appropriately. We need to ensure that
|
|
|
+ * we mark the region we change dirty so that if the buffer is relogged in
|
|
|
+ * a subsequent transaction the changes we make here as an ordered buffer are
|
|
|
+ * correctly relogged in that transaction.
|
|
|
+ */
|
|
|
+static int
|
|
|
+xfs_btree_block_change_owner(
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
+ int level,
|
|
|
+ __uint64_t new_owner)
|
|
|
+{
|
|
|
+ struct xfs_btree_block *block;
|
|
|
+ struct xfs_buf *bp;
|
|
|
+ union xfs_btree_ptr rptr;
|
|
|
+
|
|
|
+ /* do right sibling readahead */
|
|
|
+ xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
|
|
|
+
|
|
|
+ /* modify the owner */
|
|
|
+ block = xfs_btree_get_block(cur, level, &bp);
|
|
|
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
|
|
+ block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
|
|
|
+ else
|
|
|
+ block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Log owner change as an ordered buffer. If the block is a root block
|
|
|
+ * hosted in an inode, we might not have a buffer pointer here and we
|
|
|
+ * shouldn't attempt to log the change as the information is already
|
|
|
+ * held in the inode and discarded when the root block is formatted into
|
|
|
+ * the on-disk inode fork. We still change it, though, so everything is
|
|
|
+ * consistent in memory.
|
|
|
+ */
|
|
|
+ if (bp) {
|
|
|
+ xfs_trans_ordered_buf(cur->bc_tp, bp);
|
|
|
+ xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
|
|
|
+ } else {
|
|
|
+ ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
|
|
|
+ ASSERT(level == cur->bc_nlevels - 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* now read rh sibling block for next iteration */
|
|
|
+ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
|
|
|
+ if (xfs_btree_ptr_is_null(cur, &rptr))
|
|
|
+ return ENOENT;
|
|
|
+
|
|
|
+ return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+xfs_btree_change_owner(
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
+ __uint64_t new_owner)
|
|
|
+{
|
|
|
+ union xfs_btree_ptr lptr;
|
|
|
+ int level;
|
|
|
+ struct xfs_btree_block *block = NULL;
|
|
|
+ int error = 0;
|
|
|
+
|
|
|
+ cur->bc_ops->init_ptr_from_cur(cur, &lptr);
|
|
|
+
|
|
|
+ /* for each level */
|
|
|
+ for (level = cur->bc_nlevels - 1; level >= 0; level--) {
|
|
|
+ /* grab the left hand block */
|
|
|
+ error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
|
|
|
+ if (error)
|
|
|
+ return error;
|
|
|
+
|
|
|
+ /* readahead the left most block for the next level down */
|
|
|
+ if (level > 0) {
|
|
|
+ union xfs_btree_ptr *ptr;
|
|
|
+
|
|
|
+ ptr = xfs_btree_ptr_addr(cur, 1, block);
|
|
|
+ xfs_btree_readahead_ptr(cur, ptr, 1);
|
|
|
+
|
|
|
+ /* save for the next iteration of the loop */
|
|
|
+ lptr = *ptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* for each buffer in the level */
|
|
|
+ do {
|
|
|
+ error = xfs_btree_block_change_owner(cur, level,
|
|
|
+ new_owner);
|
|
|
+ } while (!error);
|
|
|
+
|
|
|
+ if (error != ENOENT)
|
|
|
+ return error;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|