19 年之前 · aa9588741d
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
			
 
				 		   (unsigned long long)block, nr, flags, inode);
			
 
				 
			
 
				+	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
			
 
				+	       (!inode || !(flags & OCFS2_BH_CACHED)));
			
 
				+
			
 
				 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
			
 
				 		status = -EINVAL;
			
 
				 		mlog_errno(status);
			
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 		bh = bhs[i];
			
 
				 		ignore_cache = 0;
			
 
				 
			
 
				+		/* There are three read-ahead cases here which we need to
			
 
				+		 * be concerned with. All three assume a buffer has
			
 
				+		 * previously been submitted with OCFS2_BH_READAHEAD
			
 
				+		 * and it hasn't yet completed I/O.
			
 
				+		 *
			
 
				+		 * 1) The current request is sync to disk. This rarely
			
 
				+		 *    happens these days, and never when performance
			
 
				+		 *    matters - the code can just wait on the buffer
			
 
				+		 *    lock and re-submit.
			
 
				+		 *
			
 
				+		 * 2) The current request is cached, but not
			
 
				+		 *    readahead. ocfs2_buffer_uptodate() will return
			
 
				+		 *    false anyway, so we'll wind up waiting on the
			
 
				+		 *    buffer lock to do I/O. We re-check the request
			
 
				+		 *    with after getting the lock to avoid a re-submit.
			
 
				+		 *
			
 
				+		 * 3) The current request is readahead (and so must
			
 
				+		 *    also be a caching one). We short circuit if the
			
 
				+		 *    buffer is locked (under I/O) and if it's in the
			
 
				+		 *    uptodate cache. The re-check from #2 catches the
			
 
				+		 *    case that the previous read-ahead completes just
			
 
				+		 *    before our is-it-in-flight check.
			
 
				+		 */
			
 
				+
			
 
				 		if (flags & OCFS2_BH_CACHED &&
			
 
				 		    !ocfs2_buffer_uptodate(inode, bh)) {
			
 
				 			mlog(ML_UPTODATE,
			
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				+			/* A read-ahead request was made - if the
			
 
				+			 * buffer is already under read-ahead from a
			
 
				+			 * previously submitted request than we are
			
 
				+			 * done here. */
			
 
				+			if ((flags & OCFS2_BH_READAHEAD)
			
 
				+			    && ocfs2_buffer_read_ahead(inode, bh))
			
 
				+				continue;
			
 
				+
			
 
				 			lock_buffer(bh);
			
 
				 			if (buffer_jbd(bh)) {
			
 
				 #ifdef CATCH_BH_JBD_RACES
			
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 				continue;
			
 
				 #endif
			
 
				 			}
			
 
				+
			
 
				+			/* Re-check ocfs2_buffer_uptodate() as a
			
 
				+			 * previously read-ahead buffer may have
			
 
				+			 * completed I/O while we were waiting for the
			
 
				+			 * buffer lock. */
			
 
				+			if ((flags & OCFS2_BH_CACHED)
			
 
				+			    && !(flags & OCFS2_BH_READAHEAD)
			
 
				+			    && ocfs2_buffer_uptodate(inode, bh)) {
			
 
				+				unlock_buffer(bh);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				 			clear_buffer_uptodate(bh);
			
 
				 			get_bh(bh); /* for end_buffer_read_sync() */
			
 
				 			bh->b_end_io = end_buffer_read_sync;
			
 
				-			if (flags & OCFS2_BH_READAHEAD)
			
 
				-				submit_bh(READA, bh);
			
 
				-			else
			
 
				-				submit_bh(READ, bh);
			
 
				+			submit_bh(READ, bh);
			
 
				 			continue;
			
 
				 		}
			
 
				 	}
			
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 	for (i = (nr - 1); i >= 0; i--) {
			
 
				 		bh = bhs[i];
			
 
				 
			
 
				-		/* We know this can't have changed as we hold the
			
 
				-		 * inode sem. Avoid doing any work on the bh if the
			
 
				-		 * journal has it. */
			
 
				-		if (!buffer_jbd(bh))
			
 
				-			wait_on_buffer(bh);
			
 
				-
			
 
				-		if (!buffer_uptodate(bh)) {
			
 
				-			/* Status won't be cleared from here on out,
			
 
				-			 * so we can safely record this and loop back
			
 
				-			 * to cleanup the other buffers. Don't need to
			
 
				-			 * remove the clustered uptodate information
			
 
				-			 * for this bh as it's not marked locally
			
 
				-			 * uptodate. */
			
 
				-			status = -EIO;
			
 
				-			brelse(bh);
			
 
				-			bhs[i] = NULL;
			
 
				-			continue;
			
 
				+		if (!(flags & OCFS2_BH_READAHEAD)) {
			
 
				+			/* We know this can't have changed as we hold the
			
 
				+			 * inode sem. Avoid doing any work on the bh if the
			
 
				+			 * journal has it. */
			
 
				+			if (!buffer_jbd(bh))
			
 
				+				wait_on_buffer(bh);
			
 
				+
			
 
				+			if (!buffer_uptodate(bh)) {
			
 
				+				/* Status won't be cleared from here on out,
			
 
				+				 * so we can safely record this and loop back
			
 
				+				 * to cleanup the other buffers. Don't need to
			
 
				+				 * remove the clustered uptodate information
			
 
				+				 * for this bh as it's not marked locally
			
 
				+				 * uptodate. */
			
 
				+				status = -EIO;
			
 
				+				brelse(bh);
			
 
				+				bhs[i] = NULL;
			
 
				+				continue;
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				+		/* Always set the buffer in the cache, even if it was
			
 
				+		 * a forced read, or read-ahead which hasn't yet
			
 
				+		 * completed. */
			
 
				 		if (inode)
			
 
				 			ocfs2_set_buffer_uptodate(inode, bh);
			
 
				 	}
			
 
				 	if (inode)
			
 
				 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
			
 
				 
			
 
				-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
			
 
				+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
			
 
				 	     (unsigned long long)block, nr,
			
 
				-	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
			
 
				+	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
			
 
				 
			
 
				 bail:
			
 
				 
			
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 
				 
			
 
				 
			
 
				 #define OCFS2_BH_CACHED            1
			
 
				-#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
			
 
				+#define OCFS2_BH_READAHEAD         8
			
 
				 
			
 
				 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
			
 
				 				   struct buffer_head **bh, int flags,
			
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 
				 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
			
 
				 {
			
 
				 	int error = 0;
			
 
				-	unsigned long offset, blk;
			
 
				-	int i, num, stored;
			
 
				+	unsigned long offset, blk, last_ra_blk = 0;
			
 
				+	int i, stored;
			
 
				 	struct buffer_head * bh, * tmp;
			
 
				 	struct ocfs2_dir_entry * de;
			
 
				 	int err;
			
 
				 	struct inode *inode = filp->f_dentry->d_inode;
			
 
				 	struct super_block * sb = inode->i_sb;
			
 
				-	int have_disk_lock = 0;
			
 
				+	unsigned int ra_sectors = 16;
			
 
				 
			
 
				 	mlog_entry("dirino=%llu\n",
			
 
				 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
			
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
				 			mlog_errno(error);
			
 
				 		/* we haven't got any yet, so propagate the error. */
			
 
				 		stored = error;
			
 
				-		goto bail;
			
 
				+		goto bail_nolock;
			
 
				 	}
			
 
				-	have_disk_lock = 1;
			
 
				 
			
 
				 	offset = filp->f_pos & (sb->s_blocksize - 1);
			
 
				 
			
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * Do the readahead (8k)
			
 
				-		 */
			
 
				-		if (!offset) {
			
 
				-			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
			
 
				+		/* The idea here is to begin with 8k read-ahead and to stay
			
 
				+		 * 4k ahead of our current position.
			
 
				+		 *
			
 
				+		 * TODO: Use the pagecache for this. We just need to
			
 
				+		 * make sure it's cluster-safe... */
			
 
				+		if (!last_ra_blk
			
 
				+		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
			
 
				+			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
			
 
				 			     i > 0; i--) {
			
 
				 				tmp = ocfs2_bread(inode, ++blk, &err, 1);
			
 
				 				if (tmp)
			
 
				 					brelse(tmp);
			
 
				 			}
			
 
				+			last_ra_blk = blk;
			
 
				+			ra_sectors = 8;
			
 
				 		}
			
 
				 
			
 
				 revalidate:
			
@@ -194,9 +198,9 @@ revalidate:
 
				 
			
 
				 	stored = 0;
			
 
				 bail:
			
 
				-	if (have_disk_lock)
			
 
				-		ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_meta_unlock(inode, 0);
			
 
				 
			
 
				+bail_nolock:
			
 
				 	mlog_exit(stored);
			
 
				 
			
 
				 	return stored;
			
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
 
				 	u64 p_blkno;
			
 
				 	int readflags = OCFS2_BH_CACHED;
			
 
				 
			
 
				-#if 0
			
 
				-	/* only turn this on if we know we can deal with read_block
			
 
				-	 * returning nothing */
			
 
				 	if (reada)
			
 
				 		readflags |= OCFS2_BH_READAHEAD;
			
 
				-#endif
			
 
				 
			
 
				 	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
			
 
				 	    i_size_read(inode)) {
			
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
 
				 #include "journal.h"
			
 
				 #include "namei.h"
			
 
				 #include "suballoc.h"
			
 
				+#include "super.h"
			
 
				 #include "symlink.h"
			
 
				 #include "sysfile.h"
			
 
				 #include "uptodate.h"
			
@@ -1962,13 +1963,8 @@ restart:
 
				 				}
			
 
				 				num++;
			
 
				 
			
 
				-				/* XXX: questionable readahead stuff here */
			
 
				 				bh = ocfs2_bread(dir, b++, &err, 1);
			
 
				 				bh_use[ra_max] = bh;
			
 
				-#if 0		// ???
			
 
				-				if (bh)
			
 
				-					ll_rw_block(READ, 1, &bh);
			
 
				-#endif
			
 
				 			}
			
 
				 		}
			
 
				 		if ((bh = bh_use[ra_ptr++]) == NULL)
			
@@ -1976,6 +1972,10 @@ restart:
 
				 		wait_on_buffer(bh);
			
 
				 		if (!buffer_uptodate(bh)) {
			
 
				 			/* read error, skip block & hope for the best */
			
 
				+			ocfs2_error(dir->i_sb, "reading directory %llu, "
			
 
				+				    "offset %lu\n",
			
 
				+				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
			
 
				+				    block);
			
 
				 			brelse(bh);
			
 
				 			goto next;
			
 
				 		}
			
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 
				 }
			
 
				 
			
 
				 /* Warning: even if it returns true, this does *not* guarantee that
			
 
				- * the block is stored in our inode metadata cache. */
			
 
				+ * the block is stored in our inode metadata cache. 
			
 
				+ * 
			
 
				+ * This can be called under lock_buffer()
			
 
				+ */
			
 
				 int ocfs2_buffer_uptodate(struct inode *inode,
			
 
				 			  struct buffer_head *bh)
			
 
				 {
			
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 
				 	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
			
 
				 }
			
 
				 
			
 
				+/* 
			
 
				+ * Determine whether a buffer is currently out on a read-ahead request.
			
 
				+ * ip_io_sem should be held to serialize submitters with the logic here.
			
 
				+ */
			
 
				+int ocfs2_buffer_read_ahead(struct inode *inode,
			
 
				+			    struct buffer_head *bh)
			
 
				+{
			
 
				+	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
			
 
				+}
			
 
				+
			
 
				 /* Requires ip_lock */
			
 
				 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
			
 
				 				     sector_t block)
			
@@ -403,7 +416,11 @@ out_free:
 
				  *
			
 
				  * Note that this function may actually fail to insert the block if
			
 
				  * memory cannot be allocated. This is not fatal however (but may
			
 
				- * result in a performance penalty) */
			
 
				+ * result in a performance penalty)
			
 
				+ *
			
 
				+ * Readahead buffers can be passed in here before the I/O request is
			
 
				+ * completed.
			
 
				+ */
			
 
				 void ocfs2_set_buffer_uptodate(struct inode *inode,
			
 
				 			       struct buffer_head *bh)
			
 
				 {
			
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 
				 				   struct buffer_head *bh);
			
 
				 void ocfs2_remove_from_cache(struct inode *inode,
			
 
				 			     struct buffer_head *bh);
			
 
				+int ocfs2_buffer_read_ahead(struct inode *inode,
			
 
				+			    struct buffer_head *bh);
			
 
				 
			
 
				 #endif /* OCFS2_UPTODATE_H */