|
@@ -188,7 +188,7 @@ _xfs_buf_initialize(
|
|
|
atomic_set(&bp->b_hold, 1);
|
|
|
init_completion(&bp->b_iowait);
|
|
|
INIT_LIST_HEAD(&bp->b_list);
|
|
|
- INIT_LIST_HEAD(&bp->b_hash_list);
|
|
|
+ RB_CLEAR_NODE(&bp->b_rbnode);
|
|
|
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
|
|
|
XB_SET_OWNER(bp);
|
|
|
bp->b_target = target;
|
|
@@ -262,8 +262,6 @@ xfs_buf_free(
|
|
|
{
|
|
|
trace_xfs_buf_free(bp, _RET_IP_);
|
|
|
|
|
|
- ASSERT(list_empty(&bp->b_hash_list));
|
|
|
-
|
|
|
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
|
|
|
uint i;
|
|
|
|
|
@@ -422,8 +420,10 @@ _xfs_buf_find(
|
|
|
{
|
|
|
xfs_off_t range_base;
|
|
|
size_t range_length;
|
|
|
- xfs_bufhash_t *hash;
|
|
|
- xfs_buf_t *bp, *n;
|
|
|
+ struct xfs_perag *pag;
|
|
|
+ struct rb_node **rbp;
|
|
|
+ struct rb_node *parent;
|
|
|
+ xfs_buf_t *bp;
|
|
|
|
|
|
range_base = (ioff << BBSHIFT);
|
|
|
range_length = (isize << BBSHIFT);
|
|
@@ -432,14 +432,37 @@ _xfs_buf_find(
|
|
|
ASSERT(!(range_length < (1 << btp->bt_sshift)));
|
|
|
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
|
|
|
|
|
|
- hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
|
|
|
-
|
|
|
- spin_lock(&hash->bh_lock);
|
|
|
-
|
|
|
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
|
|
|
- ASSERT(btp == bp->b_target);
|
|
|
- if (bp->b_file_offset == range_base &&
|
|
|
- bp->b_buffer_length == range_length) {
|
|
|
+ /* get tree root */
|
|
|
+ pag = xfs_perag_get(btp->bt_mount,
|
|
|
+ xfs_daddr_to_agno(btp->bt_mount, ioff));
|
|
|
+
|
|
|
+ /* walk tree */
|
|
|
+ spin_lock(&pag->pag_buf_lock);
|
|
|
+ rbp = &pag->pag_buf_tree.rb_node;
|
|
|
+ parent = NULL;
|
|
|
+ bp = NULL;
|
|
|
+ while (*rbp) {
|
|
|
+ parent = *rbp;
|
|
|
+ bp = rb_entry(parent, struct xfs_buf, b_rbnode);
|
|
|
+
|
|
|
+ if (range_base < bp->b_file_offset)
|
|
|
+ rbp = &(*rbp)->rb_left;
|
|
|
+ else if (range_base > bp->b_file_offset)
|
|
|
+ rbp = &(*rbp)->rb_right;
|
|
|
+ else {
|
|
|
+ /*
|
|
|
+ * found a block offset match. If the range doesn't
|
|
|
+ * match, the only way this is allowed is if the buffer
|
|
|
+ * in the cache is stale and the transaction that made
|
|
|
+ * it stale has not yet committed. i.e. we are
|
|
|
+ * reallocating a busy extent. Skip this buffer and
|
|
|
+ * continue searching to the right for an exact match.
|
|
|
+ */
|
|
|
+ if (bp->b_buffer_length != range_length) {
|
|
|
+ ASSERT(bp->b_flags & XBF_STALE);
|
|
|
+ rbp = &(*rbp)->rb_right;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
atomic_inc(&bp->b_hold);
|
|
|
goto found;
|
|
|
}
|
|
@@ -449,17 +472,21 @@ _xfs_buf_find(
|
|
|
if (new_bp) {
|
|
|
_xfs_buf_initialize(new_bp, btp, range_base,
|
|
|
range_length, flags);
|
|
|
- new_bp->b_hash = hash;
|
|
|
- list_add(&new_bp->b_hash_list, &hash->bh_list);
|
|
|
+ rb_link_node(&new_bp->b_rbnode, parent, rbp);
|
|
|
+ rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
|
|
|
+ /* the buffer keeps the perag reference until it is freed */
|
|
|
+ new_bp->b_pag = pag;
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
} else {
|
|
|
XFS_STATS_INC(xb_miss_locked);
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
}
|
|
|
-
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
return new_bp;
|
|
|
|
|
|
found:
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
|
|
|
/* Attempt to get the semaphore without sleeping,
|
|
|
* if this does not work then we need to drop the
|
|
@@ -809,27 +836,30 @@ void
|
|
|
xfs_buf_rele(
|
|
|
xfs_buf_t *bp)
|
|
|
{
|
|
|
- xfs_bufhash_t *hash = bp->b_hash;
|
|
|
+ struct xfs_perag *pag = bp->b_pag;
|
|
|
|
|
|
trace_xfs_buf_rele(bp, _RET_IP_);
|
|
|
|
|
|
- if (unlikely(!hash)) {
|
|
|
+ if (!pag) {
|
|
|
ASSERT(!bp->b_relse);
|
|
|
+ ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
|
|
|
if (atomic_dec_and_test(&bp->b_hold))
|
|
|
xfs_buf_free(bp);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
+ ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
|
|
|
ASSERT(atomic_read(&bp->b_hold) > 0);
|
|
|
- if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
|
|
|
+ if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
|
|
|
if (bp->b_relse) {
|
|
|
atomic_inc(&bp->b_hold);
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
- (*(bp->b_relse)) (bp);
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
+ bp->b_relse(bp);
|
|
|
} else {
|
|
|
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
|
|
|
- list_del_init(&bp->b_hash_list);
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
+ rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
xfs_buf_free(bp);
|
|
|
}
|
|
|
}
|
|
@@ -1429,56 +1459,24 @@ xfs_buf_iomove(
|
|
|
*/
|
|
|
void
|
|
|
xfs_wait_buftarg(
|
|
|
- xfs_buftarg_t *btp)
|
|
|
+ struct xfs_buftarg *btp)
|
|
|
{
|
|
|
- xfs_bufhash_t *hash;
|
|
|
- uint i;
|
|
|
+ struct xfs_perag *pag;
|
|
|
+ uint i;
|
|
|
|
|
|
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
|
|
|
- hash = &btp->bt_hash[i];
|
|
|
- spin_lock(&hash->bh_lock);
|
|
|
- while (!list_empty(&hash->bh_list)) {
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
+ for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
|
|
|
+ pag = xfs_perag_get(btp->bt_mount, i);
|
|
|
+ spin_lock(&pag->pag_buf_lock);
|
|
|
+ while (rb_first(&pag->pag_buf_tree)) {
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
delay(100);
|
|
|
- spin_lock(&hash->bh_lock);
|
|
|
+ spin_lock(&pag->pag_buf_lock);
|
|
|
}
|
|
|
- spin_unlock(&hash->bh_lock);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Allocate buffer hash table for a given target.
|
|
|
- * For devices containing metadata (i.e. not the log/realtime devices)
|
|
|
- * we need to allocate a much larger hash table.
|
|
|
- */
|
|
|
-STATIC void
|
|
|
-xfs_alloc_bufhash(
|
|
|
- xfs_buftarg_t *btp,
|
|
|
- int external)
|
|
|
-{
|
|
|
- unsigned int i;
|
|
|
-
|
|
|
- if (external) {
|
|
|
- btp->bt_hash = NULL;
|
|
|
- return;
|
|
|
- }
|
|
|
- btp->bt_hashshift = 12; /* 4096 buckets */
|
|
|
- btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
|
|
|
- sizeof(xfs_bufhash_t));
|
|
|
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
|
|
|
- spin_lock_init(&btp->bt_hash[i].bh_lock);
|
|
|
- INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
|
|
|
+ spin_unlock(&pag->pag_buf_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-STATIC void
|
|
|
-xfs_free_bufhash(
|
|
|
- xfs_buftarg_t *btp)
|
|
|
-{
|
|
|
- kmem_free_large(btp->bt_hash);
|
|
|
- btp->bt_hash = NULL;
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* buftarg list for delwrite queue processing
|
|
|
*/
|
|
@@ -1511,7 +1509,6 @@ xfs_free_buftarg(
|
|
|
xfs_flush_buftarg(btp, 1);
|
|
|
if (mp->m_flags & XFS_MOUNT_BARRIER)
|
|
|
xfs_blkdev_issue_flush(btp);
|
|
|
- xfs_free_bufhash(btp);
|
|
|
iput(btp->bt_mapping->host);
|
|
|
|
|
|
/* Unregister the buftarg first so that we don't get a
|
|
@@ -1651,7 +1648,6 @@ xfs_alloc_buftarg(
|
|
|
goto error;
|
|
|
if (xfs_alloc_delwrite_queue(btp, fsname))
|
|
|
goto error;
|
|
|
- xfs_alloc_bufhash(btp, external);
|
|
|
return btp;
|
|
|
|
|
|
error:
|