Browse Source

[XFS] Fix inode reclaim scalability regression. When a filesystem has
millions of inodes cached and has sparse cluster population, removing
inodes from the cluster hash consumes excessive amounts of CPU time.
Reduce the CPU cost by making removal O(1) via use of a double linked list
for the hash chains.

SGI-PV: 951551
SGI-Modid: xfs-linux-melb:xfs-kern:25683a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>

David Chinner 19 years ago
parent
commit
1fc5d959d8
2 changed files with 13 additions and 17 deletions
  1. 12 17
      fs/xfs/xfs_iget.c
  2. 1 0
      fs/xfs/xfs_inode.h

+ 12 - 17
fs/xfs/xfs_iget.c

@@ -421,7 +421,10 @@ finish_inode:
 			ip->i_chash = chlnew;
 			chlnew->chl_ip = ip;
 			chlnew->chl_blkno = ip->i_blkno;
+			if (ch->ch_list)
+				ch->ch_list->chl_prev = chlnew;
 			chlnew->chl_next = ch->ch_list;
+			chlnew->chl_prev = NULL;
 			ch->ch_list = chlnew;
 			chlnew = NULL;
 		}
@@ -723,23 +726,15 @@ xfs_iextract(
 		ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
 		ASSERT(ip->i_chash != NULL);
 		chm=NULL;
-		for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
-			if (chl->chl_blkno == ip->i_blkno) {
-				if (chm == NULL) {
-					/* first item on the list */
-					ch->ch_list = chl->chl_next;
-				} else {
-					chm->chl_next = chl->chl_next;
-				}
-				kmem_zone_free(xfs_chashlist_zone, chl);
-				break;
-			} else {
-				ASSERT(chl->chl_ip != ip);
-				chm = chl;
-			}
-		}
-		ASSERT_ALWAYS(chl != NULL);
-       } else {
+		chl = ip->i_chash;
+		if (chl->chl_prev)
+			chl->chl_prev->chl_next = chl->chl_next;
+		else
+			ch->ch_list = chl->chl_next;
+		if (chl->chl_next)
+			chl->chl_next->chl_prev = chl->chl_prev;
+		kmem_zone_free(xfs_chashlist_zone, chl);
+	} else {
 		/* delete one inode from a non-empty list */
 		iq = ip->i_cnext;
 		iq->i_cprev = ip->i_cprev;

+ 1 - 0
fs/xfs/xfs_inode.h

@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
  */
 typedef struct xfs_chashlist {
 	struct xfs_chashlist	*chl_next;
+	struct xfs_chashlist	*chl_prev;
 	struct xfs_inode	*chl_ip;
 	xfs_daddr_t		chl_blkno;	/* starting block number of
 						 * the cluster */