17 years ago · 29bd17af7d
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -35,7 +35,6 @@ Features which OCFS2 does not support yet:
 
				 	- Directory change notification (F_NOTIFY)
			
 
				 	- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
			
 
				 	- POSIX ACLs
			
 
				-	- readpages / writepages (not user visible)
			
 
				 
			
 
				 Mount options
			
 
				 =============
			
@@ -62,3 +61,18 @@ data=writeback		Data ordering is not preserved, data may be written
 
				 preferred_slot=0(*)	During mount, try to use this filesystem slot first. If
			
 
				 			it is in use by another node, the first empty one found
			
 
				 			will be chosen. Invalid values will be ignored.
			
 
				+commit=nrsec	(*)	Ocfs2 can be told to sync all its data and metadata
			
 
				+			every 'nrsec' seconds. The default value is 5 seconds.
			
 
				+			This means that if you lose your power, you will lose
			
 
				+			as much as the latest 5 seconds of work (your
			
 
				+			filesystem will not be damaged though, thanks to the
			
 
				+			journaling).  This default value (or any low value)
			
 
				+			will hurt performance, but it's good for data-safety.
			
 
				+			Setting it to 0 will have the same effect as leaving
			
 
				+			it at the default (5 seconds).
			
 
				+			Setting it to very large values will improve
			
 
				+			performance.
			
 
				+localalloc=8(*)		Allows custom localalloc size in MB. If the value is too
			
 
				+			large, the fs will silently revert it to the default.
			
 
				+			Localalloc is not enabled for local mounts.
			
 
				+localflocks		This disables cluster aware flock.
			
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -138,6 +138,7 @@ Code	Seq#	Include File		Comments
 
				 'm'	00-1F	net/irda/irmod.h	conflict!
			
 
				 'n'	00-7F	linux/ncp_fs.h
			
 
				 'n'	E0-FF	video/matrox.h          matroxfb
			
 
				+'o'	00-1F	fs/ocfs2/ocfs2_fs.h	OCFS2
			
 
				 'p'	00-0F	linux/phantom.h		conflict! (OpenHaptics needs this)
			
 
				 'p'	00-3F	linux/mc146818rtc.h	conflict!
			
 
				 'p'	40-7F	linux/nvram.h
			
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -440,14 +440,8 @@ config OCFS2_FS
 
				 	  Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
			
 
				 	  OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
			
 
				 
			
 
				-	  Note: Features which OCFS2 does not support yet:
			
 
				-	          - extended attributes
			
 
				-	          - quotas
			
 
				-	          - cluster aware flock
			
 
				-	          - Directory change notification (F_NOTIFY)
			
 
				-	          - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
			
 
				-	          - POSIX ACLs
			
 
				-	          - readpages / writepages (not user visible)
			
 
				+	  For more information on OCFS2, see the file
			
 
				+	  <file:Documentation/filesystems/ocfs2.txt>.
			
 
				 
			
 
				 config OCFS2_DEBUG_MASKLOG
			
 
				 	bool "OCFS2 logging support"
			
@@ -1028,8 +1022,8 @@ config HUGETLB_PAGE
 
				 	def_bool HUGETLBFS
			
 
				 
			
 
				 config CONFIGFS_FS
			
 
				-	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
			
 
				-	depends on SYSFS && EXPERIMENTAL
			
 
				+	tristate "Userspace-driven configuration filesystem"
			
 
				+	depends on SYSFS
			
 
				 	help
			
 
				 	  configfs is a ram-based filesystem that provides the converse
			
 
				 	  of sysfs's functionality. Where sysfs is a filesystem-based
			
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -546,7 +546,7 @@ static int populate_groups(struct config_group *group)
 
				 		 * That said, taking our i_mutex is closer to mkdir
			
 
				 		 * emulation, and shouldn't hurt.
			
 
				 		 */
			
 
				-		mutex_lock(&dentry->d_inode->i_mutex);
			
 
				+		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
			
 
				 
			
 
				 		for (i = 0; group->default_groups[i]; i++) {
			
 
				 			new_group = group->default_groups[i];
			
@@ -1405,7 +1405,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 
				 	sd = configfs_sb->s_root->d_fsdata;
			
 
				 	link_group(to_config_group(sd->s_element), group);
			
 
				 
			
 
				-	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
			
 
				+	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
			
 
				+			I_MUTEX_PARENT);
			
 
				 
			
 
				 	name.name = group->cg_item.ci_name;
			
 
				 	name.len = strlen(name.name);
			
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -320,7 +320,7 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att
 
				 	umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
			
 
				 	int error = 0;
			
 
				 
			
 
				-	mutex_lock(&dir->d_inode->i_mutex);
			
 
				+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
			
 
				 	error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
			
 
				 	mutex_unlock(&dir->d_inode->i_mutex);
			
 
				 
			
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -19,16 +19,17 @@ ocfs2-objs := \
 
				 	ioctl.o 		\
			
 
				 	journal.o 		\
			
 
				 	localalloc.o 		\
			
 
				+	locks.o			\
			
 
				 	mmap.o 			\
			
 
				 	namei.o 		\
			
 
				+	resize.o		\
			
 
				 	slot_map.o 		\
			
 
				 	suballoc.o 		\
			
 
				 	super.o 		\
			
 
				 	symlink.o 		\
			
 
				 	sysfile.o 		\
			
 
				 	uptodate.o		\
			
 
				-	ver.o 			\
			
 
				-	vote.o
			
 
				+	ver.o
			
 
				 
			
 
				 obj-$(CONFIG_OCFS2_FS) += cluster/
			
 
				 obj-$(CONFIG_OCFS2_FS) += dlm/
			
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4731,7 +4731,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 
				 
			
 
				 	mutex_lock(&data_alloc_inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1);
			
 
				+	status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto out_mutex;
			
@@ -4753,7 +4753,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 
				 
			
 
				 out_unlock:
			
 
				 	brelse(data_alloc_bh);
			
 
				-	ocfs2_meta_unlock(data_alloc_inode, 1);
			
 
				+	ocfs2_inode_unlock(data_alloc_inode, 1);
			
 
				 
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&data_alloc_inode->i_mutex);
			
@@ -5077,7 +5077,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb,
 
				 
			
 
				 	mutex_lock(&inode->i_mutex);
			
 
				 
			
 
				-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
			
 
				+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out_mutex;
			
@@ -5118,7 +5118,7 @@ out_journal:
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 out_unlock:
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 	brelse(di_bh);
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&inode->i_mutex);
			
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -26,6 +26,7 @@
 
				 #include <asm/byteorder.h>
			
 
				 #include <linux/swap.h>
			
 
				 #include <linux/pipe_fs_i.h>
			
 
				+#include <linux/mpage.h>
			
 
				 
			
 
				 #define MLOG_MASK_PREFIX ML_FILE_IO
			
 
				 #include <cluster/masklog.h>
			
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
				 {
			
 
				 	int err = 0;
			
 
				 	unsigned int ext_flags;
			
 
				-	u64 p_blkno, past_eof;
			
 
				+	u64 max_blocks = bh_result->b_size >> inode->i_blkbits;
			
 
				+	u64 p_blkno, count, past_eof;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 
			
 
				 	mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
			
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL,
			
 
				+	err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
			
 
				 					  &ext_flags);
			
 
				 	if (err) {
			
 
				 		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
			
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	if (max_blocks < count)
			
 
				+		count = max_blocks;
			
 
				+
			
 
				 	/*
			
 
				 	 * ocfs2 never allocates in this function - the only time we
			
 
				 	 * need to use BH_New is when we're extending i_size on a file
			
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
				 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
			
 
				 		map_bh(bh_result, inode->i_sb, p_blkno);
			
 
				 
			
 
				+	bh_result->b_size = count << inode->i_blkbits;
			
 
				+
			
 
				 	if (!ocfs2_sparse_alloc(osb)) {
			
 
				 		if (p_blkno == 0) {
			
 
				 			err = -EIO;
			
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 
				 			   struct buffer_head *di_bh)
			
 
				 {
			
 
				 	void *kaddr;
			
 
				-	unsigned int size;
			
 
				+	loff_t size;
			
 
				 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
			
 
				 
			
 
				 	if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
			
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 
				 	if (size > PAGE_CACHE_SIZE ||
			
 
				 	    size > ocfs2_max_inline_data(inode->i_sb)) {
			
 
				 		ocfs2_error(inode->i_sb,
			
 
				-			    "Inode %llu has with inline data has bad size: %u",
			
 
				-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, size);
			
 
				+			    "Inode %llu has with inline data has bad size: %Lu",
			
 
				+			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				+			    (unsigned long long)size);
			
 
				 		return -EROFS;
			
 
				 	}
			
 
				 
			
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 
				 
			
 
				 	mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
			
 
				 
			
 
				-	ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
			
 
				+	ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
			
 
				 	if (ret != 0) {
			
 
				 		if (ret == AOP_TRUNCATED_PAGE)
			
 
				 			unlock = 0;
			
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 
				 
			
 
				 	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
			
 
				 		ret = AOP_TRUNCATED_PAGE;
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 
				 		goto out_alloc;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_data_lock_with_page(inode, 0, page);
			
 
				-	if (ret != 0) {
			
 
				-		if (ret == AOP_TRUNCATED_PAGE)
			
 
				-			unlock = 0;
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_alloc;
			
 
				-	}
			
 
				-
			
 
				 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
			
 
				 		ret = ocfs2_readpage_inline(inode, page);
			
 
				 	else
			
 
				 		ret = block_read_full_page(page, ocfs2_get_block);
			
 
				 	unlock = 0;
			
 
				 
			
 
				-	ocfs2_data_unlock(inode, 0);
			
 
				 out_alloc:
			
 
				 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				-out_meta_unlock:
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+out_inode_unlock:
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 out:
			
 
				 	if (unlock)
			
 
				 		unlock_page(page);
			
@@ -331,6 +330,62 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This is used only for read-ahead. Failures or difficult to handle
			
 
				+ * situations are safe to ignore.
			
 
				+ *
			
 
				+ * Right now, we don't bother with BH_Boundary - in-inode extent lists
			
 
				+ * are quite large (243 extents on 4k blocks), so most inodes don't
			
 
				+ * grow out to a tree. If need be, detecting boundary extents could
			
 
				+ * trivially be added in a future version of ocfs2_get_block().
			
 
				+ */
			
 
				+static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
			
 
				+			   struct list_head *pages, unsigned nr_pages)
			
 
				+{
			
 
				+	int ret, err = -EIO;
			
 
				+	struct inode *inode = mapping->host;
			
 
				+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				+	loff_t start;
			
 
				+	struct page *last;
			
 
				+
			
 
				+	/*
			
 
				+	 * Use the nonblocking flag for the dlm code to avoid page
			
 
				+	 * lock inversion, but don't bother with retrying.
			
 
				+	 */
			
 
				+	ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);
			
 
				+	if (ret)
			
 
				+		return err;
			
 
				+
			
 
				+	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
			
 
				+		ocfs2_inode_unlock(inode, 0);
			
 
				+		return err;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Don't bother with inline-data. There isn't anything
			
 
				+	 * to read-ahead in that case anyway...
			
 
				+	 */
			
 
				+	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check whether a remote node truncated this file - we just
			
 
				+	 * drop out in that case as it's not worth handling here.
			
 
				+	 */
			
 
				+	last = list_entry(pages->prev, struct page, lru);
			
 
				+	start = (loff_t)last->index << PAGE_CACHE_SHIFT;
			
 
				+	if (start >= i_size_read(inode))
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);
			
 
				+
			
 
				+out_unlock:
			
 
				+	up_read(&oi->ip_alloc_sem);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 /* Note: Because we don't support holes, our allocation has
			
 
				  * already happened (allocation writes zeros to the file data)
			
 
				  * so we don't have to worry about ordered writes in
			
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 
				 	 * accessed concurrently from multiple nodes.
			
 
				 	 */
			
 
				 	if (!INODE_JOURNAL(inode)) {
			
 
				-		err = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+		err = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 		if (err) {
			
 
				 			if (err != -ENOENT)
			
 
				 				mlog_errno(err);
			
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 
				 
			
 
				 	if (!INODE_JOURNAL(inode)) {
			
 
				 		up_read(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				-		ocfs2_meta_unlock(inode, 0);
			
 
				+		ocfs2_inode_unlock(inode, 0);
			
 
				 	}
			
 
				 
			
 
				 	if (err) {
			
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw,
 
				 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
			
 
				-		/*
			
 
				-		 * We get PR data locks even for O_DIRECT.  This
			
 
				-		 * allows concurrent O_DIRECT I/O but doesn't let
			
 
				-		 * O_DIRECT with extending and buffered zeroing writes
			
 
				-		 * race.  If they did race then the buffered zeroing
			
 
				-		 * could be written back after the O_DIRECT I/O.  It's
			
 
				-		 * one thing to tell people not to mix buffered and
			
 
				-		 * O_DIRECT writes, but expecting them to understand
			
 
				-		 * that file extension is also an implicit buffered
			
 
				-		 * write is too much.  By getting the PR we force
			
 
				-		 * writeback of the buffered zeroing before
			
 
				-		 * proceeding.
			
 
				-		 */
			
 
				-		ret = ocfs2_data_lock(inode, 0);
			
 
				-		if (ret < 0) {
			
 
				-			mlog_errno(ret);
			
 
				-			goto out;
			
 
				-		}
			
 
				-		ocfs2_data_unlock(inode, 0);
			
 
				-	}
			
 
				-
			
 
				 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
			
 
				 					    inode->i_sb->s_bdev, iov, offset,
			
 
				 					    nr_segs, 
			
 
				 					    ocfs2_direct_IO_get_blocks,
			
 
				 					    ocfs2_dio_end_io);
			
 
				-out:
			
 
				+
			
 
				 	mlog_exit(ret);
			
 
				 	return ret;
			
 
				 }
			
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
 
				 	struct buffer_head *di_bh = NULL;
			
 
				 	struct inode *inode = mapping->host;
			
 
				 
			
 
				-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
			
 
				+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		return ret;
			
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
 
				 	 */
			
 
				 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				-	ret = ocfs2_data_lock(inode, 1);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_fail;
			
 
				-	}
			
 
				-
			
 
				 	ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
			
 
				 				       fsdata, di_bh, NULL);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				-		goto out_fail_data;
			
 
				+		goto out_fail;
			
 
				 	}
			
 
				 
			
 
				 	brelse(di_bh);
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				-out_fail_data:
			
 
				-	ocfs2_data_unlock(inode, 1);
			
 
				 out_fail:
			
 
				 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				 	brelse(di_bh);
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
 
				 
			
 
				 	ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
			
 
				 
			
 
				-	ocfs2_data_unlock(inode, 1);
			
 
				 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				 const struct address_space_operations ocfs2_aops = {
			
 
				 	.readpage	= ocfs2_readpage,
			
 
				+	.readpages	= ocfs2_readpages,
			
 
				 	.writepage	= ocfs2_writepage,
			
 
				 	.write_begin	= ocfs2_write_begin,
			
 
				 	.write_end	= ocfs2_write_end,
			
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 
				 		 * information for this bh as it's not marked locally
			
 
				 		 * uptodate. */
			
 
				 		ret = -EIO;
			
 
				-		brelse(bh);
			
 
				+		put_bh(bh);
			
 
				 	}
			
 
				 
			
 
				 	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
			
@@ -256,7 +256,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 
				 				 * for this bh as it's not marked locally
			
 
				 				 * uptodate. */
			
 
				 				status = -EIO;
			
 
				-				brelse(bh);
			
 
				+				put_bh(bh);
			
 
				 				bhs[i] = NULL;
			
 
				 				continue;
			
 
				 			}
			
@@ -280,3 +280,64 @@ bail:
 
				 	mlog_exit(status);
			
 
				 	return status;
			
 
				 }
			
 
				+
			
 
				+/* Check whether the blkno is the super block or one of the backups. */
			
 
				+static void ocfs2_check_super_or_backup(struct super_block *sb,
			
 
				+					sector_t blkno)
			
 
				+{
			
 
				+	int i;
			
 
				+	u64 backup_blkno;
			
 
				+
			
 
				+	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
			
 
				+		backup_blkno = ocfs2_backup_super_blkno(sb, i);
			
 
				+		if (backup_blkno == blkno)
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Write super block and backups doesn't need to collaborate with journal,
			
 
				+ * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
			
 
				+ * into this function.
			
 
				+ */
			
 
				+int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
			
 
				+				struct buffer_head *bh)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	mlog_entry_void();
			
 
				+
			
 
				+	BUG_ON(buffer_jbd(bh));
			
 
				+	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
			
 
				+
			
 
				+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
			
 
				+		ret = -EROFS;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lock_buffer(bh);
			
 
				+	set_buffer_uptodate(bh);
			
 
				+
			
 
				+	/* remove from dirty list before I/O. */
			
 
				+	clear_buffer_dirty(bh);
			
 
				+
			
 
				+	get_bh(bh); /* for end_buffer_write_sync() */
			
 
				+	bh->b_end_io = end_buffer_write_sync;
			
 
				+	submit_bh(WRITE, bh);
			
 
				+
			
 
				+	wait_on_buffer(bh);
			
 
				+
			
 
				+	if (!buffer_uptodate(bh)) {
			
 
				+		ret = -EIO;
			
 
				+		put_bh(bh);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	mlog_exit(ret);
			
 
				+	return ret;
			
 
				+}
			
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -47,6 +47,8 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 
				 		      int                  flags,
			
 
				 		      struct inode        *inode);
			
 
				 
			
 
				+int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
			
 
				+				struct buffer_head *bh);
			
 
				 
			
 
				 #define OCFS2_BH_CACHED            1
			
 
				 #define OCFS2_BH_READAHEAD         8
			
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -35,7 +35,7 @@
 
				 #define O2HB_LIVE_THRESHOLD	   2
			
 
				 /* number of equal samples to be seen as dead */
			
 
				 extern unsigned int o2hb_dead_threshold;
			
 
				-#define O2HB_DEFAULT_DEAD_THRESHOLD	   7
			
 
				+#define O2HB_DEFAULT_DEAD_THRESHOLD	   31
			
 
				 /* Otherwise MAX_WRITE_TIMEOUT will be zero... */
			
 
				 #define O2HB_MIN_DEAD_THRESHOLD	  2
			
 
				 #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1))
			
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -60,8 +60,8 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
 
				 /* same as hb delay, we're waiting for another node to recognize our hb */
			
 
				 #define O2NET_RECONNECT_DELAY_MS_DEFAULT	2000
			
 
				 
			
 
				-#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	5000
			
 
				-#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		10000
			
 
				+#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	2000
			
 
				+#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		30000
			
 
				 
			
 
				 
			
 
				 /* TODO: figure this out.... */
			
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,12 @@
 
				  * locking semantics of the file system using the protocol.  It should 
			
 
				  * be somewhere else, I'm sure, but right now it isn't.
			
 
				  *
			
 
				+ * New in version 10:
			
 
				+ * 	- Meta/data locks combined
			
 
				+ *
			
 
				+ * New in version 9:
			
 
				+ * 	- All votes removed
			
 
				+ *
			
 
				  * New in version 8:
			
 
				  * 	- Replace delete inode votes with a cluster lock
			
 
				  *
			
@@ -60,7 +66,7 @@
 
				  * 	- full 64 bit i_size in the metadata lock lvbs
			
 
				  * 	- introduction of "rw" lock and pushing meta/data locking down
			
 
				  */
			
 
				-#define O2NET_PROTOCOL_VERSION 8ULL
			
 
				+#define O2NET_PROTOCOL_VERSION 10ULL
			
 
				 struct o2net_handshake {
			
 
				 	__be64	protocol_version;
			
 
				 	__be64	connector_id;
			
--- a/fs/ocfs2/cluster/ver.c
+++ b/fs/ocfs2/cluster/ver.c
@@ -28,7 +28,7 @@
 
				 
			
 
				 #include "ver.h"
			
 
				 
			
 
				-#define CLUSTER_BUILD_VERSION "1.3.3"
			
 
				+#define CLUSTER_BUILD_VERSION "1.5.0"
			
 
				 
			
 
				 #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
			
 
				 
			
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry,
 
				 /*
			
 
				  * Walk the inode alias list, and find a dentry which has a given
			
 
				  * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
			
 
				- * is looking for a dentry_lock reference. The vote thread is looking
			
 
				- * to unhash aliases, so we allow it to skip any that already have
			
 
				- * that property.
			
 
				+ * is looking for a dentry_lock reference. The downconvert thread is
			
 
				+ * looking to unhash aliases, so we allow it to skip any that already
			
 
				+ * have that property.
			
 
				  */
			
 
				 struct dentry *ocfs2_find_local_alias(struct inode *inode,
			
 
				 				      u64 parent_blkno,
			
@@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 
				 	dl->dl_count = 0;
			
 
				 	/*
			
 
				 	 * Does this have to happen below, for all attaches, in case
			
 
				-	 * the struct inode gets blown away by votes?
			
 
				+	 * the struct inode gets blown away by the downconvert thread?
			
 
				 	 */
			
 
				 	dl->dl_inode = igrab(inode);
			
 
				 	dl->dl_parent_blkno = parent_blkno;
			
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -846,14 +846,14 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
				 	mlog_entry("dirino=%llu\n",
			
 
				 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
			
 
				 
			
 
				-	error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
			
 
				+	error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
			
 
				 	if (lock_level && error >= 0) {
			
 
				 		/* We release EX lock which used to update atime
			
 
				 		 * and get PR lock again to reduce contention
			
 
				 		 * on commonly accessed directories. */
			
 
				-		ocfs2_meta_unlock(inode, 1);
			
 
				+		ocfs2_inode_unlock(inode, 1);
			
 
				 		lock_level = 0;
			
 
				-		error = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+		error = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	}
			
 
				 	if (error < 0) {
			
 
				 		if (error != -ENOENT)
			
@@ -865,7 +865,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
				 	error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
			
 
				 				      dirent, filldir, NULL);
			
 
				 
			
 
				-	ocfs2_meta_unlock(inode, lock_level);
			
 
				+	ocfs2_inode_unlock(inode, lock_level);
			
 
				 
			
 
				 bail_nolock:
			
 
				 	mlog_exit(error);
			
--- a/fs/ocfs2/dlm/dlmfsver.c
+++ b/fs/ocfs2/dlm/dlmfsver.c
@@ -28,7 +28,7 @@
 
				 
			
 
				 #include "dlmfsver.h"
			
 
				 
			
 
				-#define DLM_BUILD_VERSION "1.3.3"
			
 
				+#define DLM_BUILD_VERSION "1.5.0"
			
 
				 
			
 
				 #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
			
 
				 
			
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2270,6 +2270,12 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	/* Clean up join state on node death. */
			
 
				+	if (dlm->joining_node == idx) {
			
 
				+		mlog(0, "Clearing join state for node %u\n", idx);
			
 
				+		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
			
 
				+	}
			
 
				+
			
 
				 	/* check to see if the node is already considered dead */
			
 
				 	if (!test_bit(idx, dlm->live_nodes_map)) {
			
 
				 		mlog(0, "for domain %s, node %d is already dead. "
			
@@ -2288,12 +2294,6 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
 
				 
			
 
				 	clear_bit(idx, dlm->live_nodes_map);
			
 
				 
			
 
				-	/* Clean up join state on node death. */
			
 
				-	if (dlm->joining_node == idx) {
			
 
				-		mlog(0, "Clearing join state for node %u\n", idx);
			
 
				-		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
			
 
				-	}
			
 
				-
			
 
				 	/* make sure local cleanup occurs before the heartbeat events */
			
 
				 	if (!test_bit(idx, dlm->recovery_map))
			
 
				 		dlm_do_local_recovery_cleanup(dlm, idx);
			
@@ -2321,6 +2321,13 @@ void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data)
 
				 	if (!dlm_grab(dlm))
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * This will notify any dlm users that a node in our domain
			
 
				+	 * went away without notifying us first.
			
 
				+	 */
			
 
				+	if (test_bit(idx, dlm->domain_map))
			
 
				+		dlm_fire_domain_eviction_callbacks(dlm, idx);
			
 
				+
			
 
				 	spin_lock(&dlm->spinlock);
			
 
				 	__dlm_hb_node_down(dlm, idx);
			
 
				 	spin_unlock(&dlm->spinlock);
			
--- a/fs/ocfs2/dlm/dlmver.c
+++ b/fs/ocfs2/dlm/dlmver.c
@@ -28,7 +28,7 @@
 
				 
			
 
				 #include "dlmver.h"
			
 
				 
			
 
				-#define DLM_BUILD_VERSION "1.3.3"
			
 
				+#define DLM_BUILD_VERSION "1.5.0"
			
 
				 
			
 
				 #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
			
 
				 
			
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -55,7 +55,6 @@
 
				 #include "slot_map.h"
			
 
				 #include "super.h"
			
 
				 #include "uptodate.h"
			
 
				-#include "vote.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
@@ -69,6 +68,7 @@ struct ocfs2_mask_waiter {
 
				 
			
 
				 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
			
 
				 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
			
 
				+static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
			
 
				 
			
 
				 /*
			
 
				  * Return value from ->downconvert_worker functions.
			
@@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops {
 
				 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
			
 
				 
			
 
				 	/*
			
 
				-	 * Optionally called in the downconvert (or "vote") thread
			
 
				-	 * after a successful downconvert. The lockres will not be
			
 
				-	 * referenced after this callback is called, so it is safe to
			
 
				-	 * free memory, etc.
			
 
				+	 * Optionally called in the downconvert thread after a
			
 
				+	 * successful downconvert. The lockres will not be referenced
			
 
				+	 * after this callback is called, so it is safe to free
			
 
				+	 * memory, etc.
			
 
				 	 *
			
 
				 	 * The exact semantics of when this is called are controlled
			
 
				 	 * by ->downconvert_worker()
			
@@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 
				 	.flags		= 0,
			
 
				 };
			
 
				 
			
 
				-static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
			
 
				+static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
			
 
				 	.get_osb	= ocfs2_get_inode_osb,
			
 
				 	.check_downconvert = ocfs2_check_meta_downconvert,
			
 
				 	.set_lvb	= ocfs2_set_meta_lvb,
			
 
				-	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
			
 
				-};
			
 
				-
			
 
				-static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
			
 
				-	.get_osb	= ocfs2_get_inode_osb,
			
 
				 	.downconvert_worker = ocfs2_data_convert_worker,
			
 
				-	.flags		= 0,
			
 
				+	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
			
 
				 };
			
 
				 
			
 
				 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
			
@@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
 
				 	.flags		= 0,
			
 
				 };
			
 
				 
			
 
				+static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
			
 
				+	.get_osb	= ocfs2_get_file_osb,
			
 
				+	.flags		= 0,
			
 
				+};
			
 
				+
			
 
				 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
			
 
				 {
			
 
				 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
			
 
				-		lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
			
 
				 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
			
 
				 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
			
 
				 }
			
@@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 
				 		"resource %s: %s\n", dlm_errname(_stat), _func,	\
			
 
				 		_lockres->l_name, dlm_errmsg(_stat));		\
			
 
				 } while (0)
			
 
				-static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
			
 
				-				 struct ocfs2_lock_res *lockres);
			
 
				-static int ocfs2_meta_lock_update(struct inode *inode,
			
 
				+static int ocfs2_downconvert_thread(void *arg);
			
 
				+static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
			
 
				+					struct ocfs2_lock_res *lockres);
			
 
				+static int ocfs2_inode_lock_update(struct inode *inode,
			
 
				 				  struct buffer_head **bh);
			
 
				 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
			
 
				 static inline int ocfs2_highest_compat_lock_level(int level);
			
 
				+static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
			
 
				+				      int new_level);
			
 
				+static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
			
 
				+				  struct ocfs2_lock_res *lockres,
			
 
				+				  int new_level,
			
 
				+				  int lvb);
			
 
				+static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
			
 
				+				        struct ocfs2_lock_res *lockres);
			
 
				+static int ocfs2_cancel_convert(struct ocfs2_super *osb,
			
 
				+				struct ocfs2_lock_res *lockres);
			
 
				+
			
 
				 
			
 
				 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
			
 
				 				  u64 blkno,
			
@@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 
				 			ops = &ocfs2_inode_rw_lops;
			
 
				 			break;
			
 
				 		case OCFS2_LOCK_TYPE_META:
			
 
				-			ops = &ocfs2_inode_meta_lops;
			
 
				-			break;
			
 
				-		case OCFS2_LOCK_TYPE_DATA:
			
 
				-			ops = &ocfs2_inode_data_lops;
			
 
				+			ops = &ocfs2_inode_inode_lops;
			
 
				 			break;
			
 
				 		case OCFS2_LOCK_TYPE_OPEN:
			
 
				 			ops = &ocfs2_inode_open_lops;
			
@@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
 
				 	return OCFS2_SB(inode->i_sb);
			
 
				 }
			
 
				 
			
 
				+static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
			
 
				+{
			
 
				+	struct ocfs2_file_private *fp = lockres->l_priv;
			
 
				+
			
 
				+	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
			
 
				+}
			
 
				+
			
 
				 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
			
 
				 {
			
 
				 	__be64 inode_blkno_be;
			
@@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
 
				 				   &ocfs2_rename_lops, osb);
			
 
				 }
			
 
				 
			
 
				+void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
			
 
				+			      struct ocfs2_file_private *fp)
			
 
				+{
			
 
				+	struct inode *inode = fp->fp_file->f_mapping->host;
			
 
				+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				+
			
 
				+	ocfs2_lock_res_init_once(lockres);
			
 
				+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
			
 
				+			      inode->i_generation, lockres->l_name);
			
 
				+	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
			
 
				+				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
			
 
				+				   fp);
			
 
				+	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
			
 
				+}
			
 
				+
			
 
				 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
			
 
				 {
			
 
				 	mlog_entry_void();
			
@@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level)
 
				 	     lockres->l_name, level, lockres->l_level,
			
 
				 	     ocfs2_lock_type_string(lockres->l_type));
			
 
				 
			
 
				+	/*
			
 
				+	 * We can skip the bast for locks which don't enable caching -
			
 
				+	 * they'll be dropped at the earliest possible time anyway.
			
 
				+	 */
			
 
				+	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
			
 
				+		return;
			
 
				+
			
 
				 	spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
			
 
				 	if (needs_downconvert)
			
@@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level)
 
				 
			
 
				 	wake_up(&lockres->l_event);
			
 
				 
			
 
				-	ocfs2_kick_vote_thread(osb);
			
 
				+	ocfs2_wake_downconvert_thread(osb);
			
 
				 }
			
 
				 
			
 
				 static void ocfs2_locking_ast(void *opaque)
			
@@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 
				 
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
			
 
				+					     struct ocfs2_lock_res *lockres)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = wait_for_completion_interruptible(&mw->mw_complete);
			
 
				+	if (ret)
			
 
				+		lockres_remove_mask_waiter(lockres, mw);
			
 
				+	else
			
 
				+		ret = mw->mw_status;
			
 
				+	/* Re-arm the completion in case we want to wait on it again */
			
 
				+	INIT_COMPLETION(mw->mw_complete);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int ocfs2_cluster_lock(struct ocfs2_super *osb,
			
 
				 			      struct ocfs2_lock_res *lockres,
			
 
				 			      int level,
			
@@ -1089,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 
				 	mlog_entry_void();
			
 
				 	spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				 	ocfs2_dec_holders(lockres, level);
			
 
				-	ocfs2_vote_on_unlock(osb, lockres);
			
 
				+	ocfs2_downconvert_on_unlock(osb, lockres);
			
 
				 	spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				 	mlog_exit_void();
			
 
				 }
			
@@ -1147,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 
				 	 * We don't want to use LKM_LOCAL on a meta data lock as they
			
 
				 	 * don't use a generation in their lock names.
			
 
				 	 */
			
 
				-	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
			
 
				+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto bail;
			
@@ -1311,76 +1357,221 @@ out:
 
				 	mlog_exit_void();
			
 
				 }
			
 
				 
			
 
				-int ocfs2_data_lock_full(struct inode *inode,
			
 
				-			 int write,
			
 
				-			 int arg_flags)
			
 
				+static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
			
 
				+				     int level)
			
 
				 {
			
 
				-	int status = 0, level;
			
 
				-	struct ocfs2_lock_res *lockres;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+	int ret;
			
 
				+	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
			
 
				+	unsigned long flags;
			
 
				+	struct ocfs2_mask_waiter mw;
			
 
				 
			
 
				-	BUG_ON(!inode);
			
 
				+	ocfs2_init_mask_waiter(&mw);
			
 
				 
			
 
				-	mlog_entry_void();
			
 
				+retry_cancel:
			
 
				+	spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				+	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
			
 
				+		ret = ocfs2_prepare_cancel_convert(osb, lockres);
			
 
				+		if (ret) {
			
 
				+			spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				+			ret = ocfs2_cancel_convert(osb, lockres);
			
 
				+			if (ret < 0) {
			
 
				+				mlog_errno(ret);
			
 
				+				goto out;
			
 
				+			}
			
 
				+			goto retry_cancel;
			
 
				+		}
			
 
				+		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
			
 
				+		spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				 
			
 
				-	mlog(0, "inode %llu take %s DATA lock\n",
			
 
				-	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				-	     write ? "EXMODE" : "PRMODE");
			
 
				+		ocfs2_wait_for_mask(&mw);
			
 
				+		goto retry_cancel;
			
 
				+	}
			
 
				 
			
 
				-	/* We'll allow faking a readonly data lock for
			
 
				-	 * rodevices. */
			
 
				-	if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) {
			
 
				-		if (write) {
			
 
				-			status = -EROFS;
			
 
				-			mlog_errno(status);
			
 
				+	ret = -ERESTARTSYS;
			
 
				+	/*
			
 
				+	 * We may still have gotten the lock, in which case there's no
			
 
				+	 * point to restarting the syscall.
			
 
				+	 */
			
 
				+	if (lockres->l_level == level)
			
 
				+		ret = 0;
			
 
				+
			
 
				+	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
			
 
				+	     lockres->l_flags, lockres->l_level, lockres->l_action);
			
 
				+
			
 
				+	spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				+
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
			
 
				+ * flock() calls. The locking approach this requires is sufficiently
			
 
				+ * different from all other cluster lock types that we implement a
			
 
				+ * seperate path to the "low-level" dlm calls. In particular:
			
 
				+ *
			
 
				+ * - No optimization of lock levels is done - we take at exactly
			
 
				+ *   what's been requested.
			
 
				+ *
			
 
				+ * - No lock caching is employed. We immediately downconvert to
			
 
				+ *   no-lock at unlock time. This also means flock locks never go on
			
 
				+ *   the blocking list).
			
 
				+ *
			
 
				+ * - Since userspace can trivially deadlock itself with flock, we make
			
 
				+ *   sure to allow cancellation of a misbehaving applications flock()
			
 
				+ *   request.
			
 
				+ *
			
 
				+ * - Access to any flock lockres doesn't require concurrency, so we
			
 
				+ *   can simplify the code by requiring the caller to guarantee
			
 
				+ *   serialization of dlmglue flock calls.
			
 
				+ */
			
 
				+int ocfs2_file_lock(struct file *file, int ex, int trylock)
			
 
				+{
			
 
				+	int ret, level = ex ? LKM_EXMODE : LKM_PRMODE;
			
 
				+	unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0;
			
 
				+	unsigned long flags;
			
 
				+	struct ocfs2_file_private *fp = file->private_data;
			
 
				+	struct ocfs2_lock_res *lockres = &fp->fp_flock;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
			
 
				+	struct ocfs2_mask_waiter mw;
			
 
				+
			
 
				+	ocfs2_init_mask_waiter(&mw);
			
 
				+
			
 
				+	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
			
 
				+	    (lockres->l_level > LKM_NLMODE)) {
			
 
				+		mlog(ML_ERROR,
			
 
				+		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
			
 
				+		     "level: %u\n", lockres->l_name, lockres->l_flags,
			
 
				+		     lockres->l_level);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				+	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
			
 
				+		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
			
 
				+		spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				+
			
 
				+		/*
			
 
				+		 * Get the lock at NLMODE to start - that way we
			
 
				+		 * can cancel the upconvert request if need be.
			
 
				+		 */
			
 
				+		ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
			
 
				+		if (ret < 0) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				 		}
			
 
				-		goto out;
			
 
				+
			
 
				+		ret = ocfs2_wait_for_mask(&mw);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				 	}
			
 
				 
			
 
				-	if (ocfs2_mount_local(osb))
			
 
				-		goto out;
			
 
				+	lockres->l_action = OCFS2_AST_CONVERT;
			
 
				+	lkm_flags |= LKM_CONVERT;
			
 
				+	lockres->l_requested = level;
			
 
				+	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
			
 
				 
			
 
				-	lockres = &OCFS2_I(inode)->ip_data_lockres;
			
 
				+	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
			
 
				+	spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				 
			
 
				-	level = write ? LKM_EXMODE : LKM_PRMODE;
			
 
				+	ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags,
			
 
				+		      lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
			
 
				+		      ocfs2_locking_ast, lockres, ocfs2_blocking_ast);
			
 
				+	if (ret != DLM_NORMAL) {
			
 
				+		if (trylock && ret == DLM_NOTQUEUED)
			
 
				+			ret = -EAGAIN;
			
 
				+		else {
			
 
				+			ocfs2_log_dlm_error("dlmlock", ret, lockres);
			
 
				+			ret = -EINVAL;
			
 
				+		}
			
 
				 
			
 
				-	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level,
			
 
				-				    0, arg_flags);
			
 
				-	if (status < 0 && status != -EAGAIN)
			
 
				-		mlog_errno(status);
			
 
				+		ocfs2_recover_from_dlm_error(lockres, 1);
			
 
				+		lockres_remove_mask_waiter(lockres, &mw);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
			
 
				+	if (ret == -ERESTARTSYS) {
			
 
				+		/*
			
 
				+		 * Userspace can cause deadlock itself with
			
 
				+		 * flock(). Current behavior locally is to allow the
			
 
				+		 * deadlock, but abort the system call if a signal is
			
 
				+		 * received. We follow this example, otherwise a
			
 
				+		 * poorly written program could sit in kernel until
			
 
				+		 * reboot.
			
 
				+		 *
			
 
				+		 * Handling this is a bit more complicated for Ocfs2
			
 
				+		 * though. We can't exit this function with an
			
 
				+		 * outstanding lock request, so a cancel convert is
			
 
				+		 * required. We intentionally overwrite 'ret' - if the
			
 
				+		 * cancel fails and the lock was granted, it's easier
			
 
				+		 * to just bubble sucess back up to the user.
			
 
				+		 */
			
 
				+		ret = ocfs2_flock_handle_signal(lockres, level);
			
 
				+	}
			
 
				 
			
 
				 out:
			
 
				-	mlog_exit(status);
			
 
				-	return status;
			
 
				+
			
 
				+	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
			
 
				+	     lockres->l_name, ex, trylock, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-/* see ocfs2_meta_lock_with_page() */
			
 
				-int ocfs2_data_lock_with_page(struct inode *inode,
			
 
				-			      int write,
			
 
				-			      struct page *page)
			
 
				+void ocfs2_file_unlock(struct file *file)
			
 
				 {
			
 
				 	int ret;
			
 
				+	unsigned long flags;
			
 
				+	struct ocfs2_file_private *fp = file->private_data;
			
 
				+	struct ocfs2_lock_res *lockres = &fp->fp_flock;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
			
 
				+	struct ocfs2_mask_waiter mw;
			
 
				 
			
 
				-	ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK);
			
 
				-	if (ret == -EAGAIN) {
			
 
				-		unlock_page(page);
			
 
				-		if (ocfs2_data_lock(inode, write) == 0)
			
 
				-			ocfs2_data_unlock(inode, write);
			
 
				-		ret = AOP_TRUNCATED_PAGE;
			
 
				+	ocfs2_init_mask_waiter(&mw);
			
 
				+
			
 
				+	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
			
 
				+		return;
			
 
				+
			
 
				+	if (lockres->l_level == LKM_NLMODE)
			
 
				+		return;
			
 
				+
			
 
				+	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
			
 
				+	     lockres->l_name, lockres->l_flags, lockres->l_level,
			
 
				+	     lockres->l_action);
			
 
				+
			
 
				+	spin_lock_irqsave(&lockres->l_lock, flags);
			
 
				+	/*
			
 
				+	 * Fake a blocking ast for the downconvert code.
			
 
				+	 */
			
 
				+	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
			
 
				+	lockres->l_blocking = LKM_EXMODE;
			
 
				+
			
 
				+	ocfs2_prepare_downconvert(lockres, LKM_NLMODE);
			
 
				+	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
			
 
				+	spin_unlock_irqrestore(&lockres->l_lock, flags);
			
 
				+
			
 
				+	ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0);
			
 
				+	if (ret) {
			
 
				+		mlog_errno(ret);
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				-	return ret;
			
 
				+	ret = ocfs2_wait_for_mask(&mw);
			
 
				+	if (ret)
			
 
				+		mlog_errno(ret);
			
 
				 }
			
 
				 
			
 
				-static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
			
 
				-				 struct ocfs2_lock_res *lockres)
			
 
				+static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
			
 
				+					struct ocfs2_lock_res *lockres)
			
 
				 {
			
 
				 	int kick = 0;
			
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				 	/* If we know that another node is waiting on our lock, kick
			
 
				-	 * the vote thread * pre-emptively when we reach a release
			
 
				+	 * the downconvert thread * pre-emptively when we reach a release
			
 
				 	 * condition. */
			
 
				 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
			
 
				 		switch(lockres->l_blocking) {
			
@@ -1398,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
 
				 	}
			
 
				 
			
 
				 	if (kick)
			
 
				-		ocfs2_kick_vote_thread(osb);
			
 
				-
			
 
				-	mlog_exit_void();
			
 
				-}
			
 
				-
			
 
				-void ocfs2_data_unlock(struct inode *inode,
			
 
				-		       int write)
			
 
				-{
			
 
				-	int level = write ? LKM_EXMODE : LKM_PRMODE;
			
 
				-	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-
			
 
				-	mlog_entry_void();
			
 
				-
			
 
				-	mlog(0, "inode %llu drop %s DATA lock\n",
			
 
				-	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				-	     write ? "EXMODE" : "PRMODE");
			
 
				-
			
 
				-	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
			
 
				-	    !ocfs2_mount_local(osb))
			
 
				-		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
			
 
				+		ocfs2_wake_downconvert_thread(osb);
			
 
				 
			
 
				 	mlog_exit_void();
			
 
				 }
			
@@ -1442,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec)
 
				 
			
 
				 /* Call this with the lockres locked. I am reasonably sure we don't
			
 
				  * need ip_lock in this function as anyone who would be changing those
			
 
				- * values is supposed to be blocked in ocfs2_meta_lock right now. */
			
 
				+ * values is supposed to be blocked in ocfs2_inode_lock right now. */
			
 
				 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
			
 
				 {
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				-	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
			
 
				+	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
			
 
				 	struct ocfs2_meta_lvb *lvb;
			
 
				 
			
 
				 	mlog_entry_void();
			
@@ -1496,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
 
				 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
			
 
				 {
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				-	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
			
 
				+	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
			
 
				 	struct ocfs2_meta_lvb *lvb;
			
 
				 
			
 
				 	mlog_entry_void();
			
@@ -1604,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre
 
				 }
			
 
				 
			
 
				 /* may or may not return a bh if it went to disk. */
			
 
				-static int ocfs2_meta_lock_update(struct inode *inode,
			
 
				+static int ocfs2_inode_lock_update(struct inode *inode,
			
 
				 				  struct buffer_head **bh)
			
 
				 {
			
 
				 	int status = 0;
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				-	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
			
 
				+	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
			
 
				 	struct ocfs2_dinode *fe;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 
			
@@ -1721,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode,
 
				  * returns < 0 error if the callback will never be called, otherwise
			
 
				  * the result of the lock will be communicated via the callback.
			
 
				  */
			
 
				-int ocfs2_meta_lock_full(struct inode *inode,
			
 
				+int ocfs2_inode_lock_full(struct inode *inode,
			
 
				 			 struct buffer_head **ret_bh,
			
 
				 			 int ex,
			
 
				 			 int arg_flags)
			
@@ -1756,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode,
 
				 		wait_event(osb->recovery_event,
			
 
				 			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
			
 
				 
			
 
				-	lockres = &OCFS2_I(inode)->ip_meta_lockres;
			
 
				+	lockres = &OCFS2_I(inode)->ip_inode_lockres;
			
 
				 	level = ex ? LKM_EXMODE : LKM_PRMODE;
			
 
				 	dlm_flags = 0;
			
 
				 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
			
@@ -1795,11 +1966,11 @@ local:
 
				 	}
			
 
				 
			
 
				 	/* This is fun. The caller may want a bh back, or it may
			
 
				-	 * not. ocfs2_meta_lock_update definitely wants one in, but
			
 
				+	 * not. ocfs2_inode_lock_update definitely wants one in, but
			
 
				 	 * may or may not read one, depending on what's in the
			
 
				 	 * LVB. The result of all of this is that we've *only* gone to
			
 
				 	 * disk if we have to, so the complexity is worthwhile. */
			
 
				-	status = ocfs2_meta_lock_update(inode, &local_bh);
			
 
				+	status = ocfs2_inode_lock_update(inode, &local_bh);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -1821,7 +1992,7 @@ bail:
 
				 			*ret_bh = NULL;
			
 
				 		}
			
 
				 		if (acquired)
			
 
				-			ocfs2_meta_unlock(inode, ex);
			
 
				+			ocfs2_inode_unlock(inode, ex);
			
 
				 	}
			
 
				 
			
 
				 	if (local_bh)
			
@@ -1832,19 +2003,20 @@ bail:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * This is working around a lock inversion between tasks acquiring DLM locks
			
 
				- * while holding a page lock and the vote thread which blocks dlm lock acquiry
			
 
				- * while acquiring page locks.
			
 
				+ * This is working around a lock inversion between tasks acquiring DLM
			
 
				+ * locks while holding a page lock and the downconvert thread which
			
 
				+ * blocks dlm lock acquiry while acquiring page locks.
			
 
				  *
			
 
				  * ** These _with_page variantes are only intended to be called from aop
			
 
				  * methods that hold page locks and return a very specific *positive* error
			
 
				  * code that aop methods pass up to the VFS -- test for errors with != 0. **
			
 
				  *
			
 
				- * The DLM is called such that it returns -EAGAIN if it would have blocked
			
 
				- * waiting for the vote thread.  In that case we unlock our page so the vote
			
 
				- * thread can make progress.  Once we've done this we have to return
			
 
				- * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up
			
 
				- * into the VFS who will then immediately retry the aop call.
			
 
				+ * The DLM is called such that it returns -EAGAIN if it would have
			
 
				+ * blocked waiting for the downconvert thread.  In that case we unlock
			
 
				+ * our page so the downconvert thread can make progress.  Once we've
			
 
				+ * done this we have to return AOP_TRUNCATED_PAGE so the aop method
			
 
				+ * that called us can bubble that back up into the VFS who will then
			
 
				+ * immediately retry the aop call.
			
 
				  *
			
 
				  * We do a blocking lock and immediate unlock before returning, though, so that
			
 
				  * the lock has a great chance of being cached on this node by the time the VFS
			
@@ -1852,32 +2024,32 @@ bail:
 
				  * ping locks back and forth, but that's a risk we're willing to take to avoid
			
 
				  * the lock inversion simply.
			
 
				  */
			
 
				-int ocfs2_meta_lock_with_page(struct inode *inode,
			
 
				+int ocfs2_inode_lock_with_page(struct inode *inode,
			
 
				 			      struct buffer_head **ret_bh,
			
 
				 			      int ex,
			
 
				 			      struct page *page)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
			
 
				+	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
			
 
				 	if (ret == -EAGAIN) {
			
 
				 		unlock_page(page);
			
 
				-		if (ocfs2_meta_lock(inode, ret_bh, ex) == 0)
			
 
				-			ocfs2_meta_unlock(inode, ex);
			
 
				+		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
			
 
				+			ocfs2_inode_unlock(inode, ex);
			
 
				 		ret = AOP_TRUNCATED_PAGE;
			
 
				 	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int ocfs2_meta_lock_atime(struct inode *inode,
			
 
				+int ocfs2_inode_lock_atime(struct inode *inode,
			
 
				 			  struct vfsmount *vfsmnt,
			
 
				 			  int *level)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				 	mlog_entry_void();
			
 
				-	ret = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+	ret = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		return ret;
			
@@ -1890,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode,
 
				 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
			
 
				 		struct buffer_head *bh = NULL;
			
 
				 
			
 
				-		ocfs2_meta_unlock(inode, 0);
			
 
				-		ret = ocfs2_meta_lock(inode, &bh, 1);
			
 
				+		ocfs2_inode_unlock(inode, 0);
			
 
				+		ret = ocfs2_inode_lock(inode, &bh, 1);
			
 
				 		if (ret < 0) {
			
 
				 			mlog_errno(ret);
			
 
				 			return ret;
			
@@ -1908,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void ocfs2_meta_unlock(struct inode *inode,
			
 
				+void ocfs2_inode_unlock(struct inode *inode,
			
 
				 		       int ex)
			
 
				 {
			
 
				 	int level = ex ? LKM_EXMODE : LKM_PRMODE;
			
 
				-	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
			
 
				+	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 
			
 
				 	mlog_entry_void();
			
@@ -2320,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	/* launch vote thread */
			
 
				-	osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote");
			
 
				-	if (IS_ERR(osb->vote_task)) {
			
 
				-		status = PTR_ERR(osb->vote_task);
			
 
				-		osb->vote_task = NULL;
			
 
				+	/* launch downconvert thread */
			
 
				+	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
			
 
				+	if (IS_ERR(osb->dc_task)) {
			
 
				+		status = PTR_ERR(osb->dc_task);
			
 
				+		osb->dc_task = NULL;
			
 
				 		mlog_errno(status);
			
 
				 		goto bail;
			
 
				 	}
			
@@ -2353,8 +2525,8 @@ local:
 
				 bail:
			
 
				 	if (status < 0) {
			
 
				 		ocfs2_dlm_shutdown_debug(osb);
			
 
				-		if (osb->vote_task)
			
 
				-			kthread_stop(osb->vote_task);
			
 
				+		if (osb->dc_task)
			
 
				+			kthread_stop(osb->dc_task);
			
 
				 	}
			
 
				 
			
 
				 	mlog_exit(status);
			
@@ -2369,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
 
				 
			
 
				 	ocfs2_drop_osb_locks(osb);
			
 
				 
			
 
				-	if (osb->vote_task) {
			
 
				-		kthread_stop(osb->vote_task);
			
 
				-		osb->vote_task = NULL;
			
 
				+	if (osb->dc_task) {
			
 
				+		kthread_stop(osb->dc_task);
			
 
				+		osb->dc_task = NULL;
			
 
				 	}
			
 
				 
			
 
				 	ocfs2_lock_res_free(&osb->osb_super_lockres);
			
@@ -2527,7 +2699,7 @@ out:
 
				 
			
 
				 /* Mark the lockres as being dropped. It will no longer be
			
 
				  * queued if blocking, but we still may have to wait on it
			
 
				- * being dequeued from the vote thread before we can consider
			
 
				+ * being dequeued from the downconvert thread before we can consider
			
 
				  * it safe to drop. 
			
 
				  *
			
 
				  * You can *not* attempt to call cluster_lock on this lockres anymore. */
			
@@ -2590,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode)
 
				 	status = err;
			
 
				 
			
 
				 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
			
 
				-			      &OCFS2_I(inode)->ip_data_lockres);
			
 
				-	if (err < 0)
			
 
				-		mlog_errno(err);
			
 
				-	if (err < 0 && !status)
			
 
				-		status = err;
			
 
				-
			
 
				-	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
			
 
				-			      &OCFS2_I(inode)->ip_meta_lockres);
			
 
				+			      &OCFS2_I(inode)->ip_inode_lockres);
			
 
				 	if (err < 0)
			
 
				 		mlog_errno(err);
			
 
				 	if (err < 0 && !status)
			
@@ -2850,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 
				        	inode = ocfs2_lock_res_inode(lockres);
			
 
				 	mapping = inode->i_mapping;
			
 
				 
			
 
				+	if (S_ISREG(inode->i_mode))
			
 
				+		goto out;
			
 
				+
			
 
				 	/*
			
 
				 	 * We need this before the filemap_fdatawrite() so that it can
			
 
				 	 * transfer the dirty bit from the PTE to the
			
@@ -2875,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 
				 		filemap_fdatawait(mapping);
			
 
				 	}
			
 
				 
			
 
				+out:
			
 
				 	return UNBLOCK_CONTINUE;
			
 
				 }
			
 
				 
			
@@ -2903,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
 
				 
			
 
				 /*
			
 
				  * Does the final reference drop on our dentry lock. Right now this
			
 
				- * happens in the vote thread, but we could choose to simplify the
			
 
				+ * happens in the downconvert thread, but we could choose to simplify the
			
 
				  * dlmglue API and push these off to the ocfs2_wq in the future.
			
 
				  */
			
 
				 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
			
@@ -3042,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 
				 	mlog(0, "lockres %s blocked.\n", lockres->l_name);
			
 
				 
			
 
				 	/* Detect whether a lock has been marked as going away while
			
 
				-	 * the vote thread was processing other things. A lock can
			
 
				+	 * the downconvert thread was processing other things. A lock can
			
 
				 	 * still be marked with OCFS2_LOCK_FREEING after this check,
			
 
				 	 * but short circuiting here will still save us some
			
 
				 	 * performance. */
			
@@ -3091,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 
				 
			
 
				 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
			
 
				 
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				+	spin_lock(&osb->dc_task_lock);
			
 
				 	if (list_empty(&lockres->l_blocked_list)) {
			
 
				 		list_add_tail(&lockres->l_blocked_list,
			
 
				 			      &osb->blocked_lock_list);
			
 
				 		osb->blocked_lock_count++;
			
 
				 	}
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				+	spin_unlock(&osb->dc_task_lock);
			
 
				+
			
 
				+	mlog_exit_void();
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	unsigned long processed;
			
 
				+	struct ocfs2_lock_res *lockres;
			
 
				+
			
 
				+	mlog_entry_void();
			
 
				+
			
 
				+	spin_lock(&osb->dc_task_lock);
			
 
				+	/* grab this early so we know to try again if a state change and
			
 
				+	 * wake happens part-way through our work  */
			
 
				+	osb->dc_work_sequence = osb->dc_wake_sequence;
			
 
				+
			
 
				+	processed = osb->blocked_lock_count;
			
 
				+	while (processed) {
			
 
				+		BUG_ON(list_empty(&osb->blocked_lock_list));
			
 
				+
			
 
				+		lockres = list_entry(osb->blocked_lock_list.next,
			
 
				+				     struct ocfs2_lock_res, l_blocked_list);
			
 
				+		list_del_init(&lockres->l_blocked_list);
			
 
				+		osb->blocked_lock_count--;
			
 
				+		spin_unlock(&osb->dc_task_lock);
			
 
				+
			
 
				+		BUG_ON(!processed);
			
 
				+		processed--;
			
 
				+
			
 
				+		ocfs2_process_blocked_lock(osb, lockres);
			
 
				+
			
 
				+		spin_lock(&osb->dc_task_lock);
			
 
				+	}
			
 
				+	spin_unlock(&osb->dc_task_lock);
			
 
				 
			
 
				 	mlog_exit_void();
			
 
				 }
			
 
				+
			
 
				+static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	int empty = 0;
			
 
				+
			
 
				+	spin_lock(&osb->dc_task_lock);
			
 
				+	if (list_empty(&osb->blocked_lock_list))
			
 
				+		empty = 1;
			
 
				+
			
 
				+	spin_unlock(&osb->dc_task_lock);
			
 
				+	return empty;
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	int should_wake = 0;
			
 
				+
			
 
				+	spin_lock(&osb->dc_task_lock);
			
 
				+	if (osb->dc_work_sequence != osb->dc_wake_sequence)
			
 
				+		should_wake = 1;
			
 
				+	spin_unlock(&osb->dc_task_lock);
			
 
				+
			
 
				+	return should_wake;
			
 
				+}
			
 
				+
			
 
				+int ocfs2_downconvert_thread(void *arg)
			
 
				+{
			
 
				+	int status = 0;
			
 
				+	struct ocfs2_super *osb = arg;
			
 
				+
			
 
				+	/* only quit once we've been asked to stop and there is no more
			
 
				+	 * work available */
			
 
				+	while (!(kthread_should_stop() &&
			
 
				+		ocfs2_downconvert_thread_lists_empty(osb))) {
			
 
				+
			
 
				+		wait_event_interruptible(osb->dc_event,
			
 
				+					 ocfs2_downconvert_thread_should_wake(osb) ||
			
 
				+					 kthread_should_stop());
			
 
				+
			
 
				+		mlog(0, "downconvert_thread: awoken\n");
			
 
				+
			
 
				+		ocfs2_downconvert_thread_do_work(osb);
			
 
				+	}
			
 
				+
			
 
				+	osb->dc_task = NULL;
			
 
				+	return status;
			
 
				+}
			
 
				+
			
 
				+void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	spin_lock(&osb->dc_task_lock);
			
 
				+	/* make sure the voting thread gets a swipe at whatever changes
			
 
				+	 * the caller may have made to the voting state */
			
 
				+	osb->dc_wake_sequence++;
			
 
				+	spin_unlock(&osb->dc_task_lock);
			
 
				+	wake_up(&osb->dc_event);
			
 
				+}
			
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,12 +49,12 @@ struct ocfs2_meta_lvb {
 
				 	__be32       lvb_reserved2;
			
 
				 };
			
 
				 
			
 
				-/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
			
 
				+/* ocfs2_inode_lock_full() 'arg_flags' flags */
			
 
				 /* don't wait on recovery. */
			
 
				 #define OCFS2_META_LOCK_RECOVERY	(0x01)
			
 
				 /* Instruct the dlm not to queue ourselves on the other node. */
			
 
				 #define OCFS2_META_LOCK_NOQUEUE		(0x02)
			
 
				-/* don't block waiting for the vote thread, instead return -EAGAIN */
			
 
				+/* don't block waiting for the downconvert thread, instead return -EAGAIN */
			
 
				 #define OCFS2_LOCK_NONBLOCK		(0x04)
			
 
				 
			
 
				 int ocfs2_dlm_init(struct ocfs2_super *osb);
			
@@ -66,38 +66,32 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 
				 			       struct inode *inode);
			
 
				 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
			
 
				 				u64 parent, struct inode *inode);
			
 
				+struct ocfs2_file_private;
			
 
				+void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
			
 
				+			      struct ocfs2_file_private *fp);
			
 
				 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
			
 
				 int ocfs2_create_new_inode_locks(struct inode *inode);
			
 
				 int ocfs2_drop_inode_locks(struct inode *inode);
			
 
				-int ocfs2_data_lock_full(struct inode *inode,
			
 
				-			 int write,
			
 
				-			 int arg_flags);
			
 
				-#define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0)
			
 
				-int ocfs2_data_lock_with_page(struct inode *inode,
			
 
				-			      int write,
			
 
				-			      struct page *page);
			
 
				-void ocfs2_data_unlock(struct inode *inode,
			
 
				-		       int write);
			
 
				 int ocfs2_rw_lock(struct inode *inode, int write);
			
 
				 void ocfs2_rw_unlock(struct inode *inode, int write);
			
 
				 int ocfs2_open_lock(struct inode *inode);
			
 
				 int ocfs2_try_open_lock(struct inode *inode, int write);
			
 
				 void ocfs2_open_unlock(struct inode *inode);
			
 
				-int ocfs2_meta_lock_atime(struct inode *inode,
			
 
				+int ocfs2_inode_lock_atime(struct inode *inode,
			
 
				 			  struct vfsmount *vfsmnt,
			
 
				 			  int *level);
			
 
				-int ocfs2_meta_lock_full(struct inode *inode,
			
 
				+int ocfs2_inode_lock_full(struct inode *inode,
			
 
				 			 struct buffer_head **ret_bh,
			
 
				 			 int ex,
			
 
				 			 int arg_flags);
			
 
				-int ocfs2_meta_lock_with_page(struct inode *inode,
			
 
				+int ocfs2_inode_lock_with_page(struct inode *inode,
			
 
				 			      struct buffer_head **ret_bh,
			
 
				 			      int ex,
			
 
				 			      struct page *page);
			
 
				 /* 99% of the time we don't want to supply any additional flags --
			
 
				  * those are for very specific cases only. */
			
 
				-#define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0)
			
 
				-void ocfs2_meta_unlock(struct inode *inode,
			
 
				+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
			
 
				+void ocfs2_inode_unlock(struct inode *inode,
			
 
				 		       int ex);
			
 
				 int ocfs2_super_lock(struct ocfs2_super *osb,
			
 
				 		     int ex);
			
@@ -107,14 +101,17 @@ int ocfs2_rename_lock(struct ocfs2_super *osb);
 
				 void ocfs2_rename_unlock(struct ocfs2_super *osb);
			
 
				 int ocfs2_dentry_lock(struct dentry *dentry, int ex);
			
 
				 void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
			
 
				+int ocfs2_file_lock(struct file *file, int ex, int trylock);
			
 
				+void ocfs2_file_unlock(struct file *file);
			
 
				 
			
 
				 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
			
 
				 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
			
 
				 			       struct ocfs2_lock_res *lockres);
			
 
				 
			
 
				-/* for the vote thread */
			
 
				+/* for the downconvert thread */
			
 
				 void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
			
 
				 				struct ocfs2_lock_res *lockres);
			
 
				+void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
			
 
				 
			
 
				 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
			
 
				 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
			
--- a/fs/ocfs2/endian.h
+++ b/fs/ocfs2/endian.h
@@ -37,11 +37,6 @@ static inline void le64_add_cpu(__le64 *var, u64 val)
 
				 	*var = cpu_to_le64(le64_to_cpu(*var) + val);
			
 
				 }
			
 
				 
			
 
				-static inline void le32_and_cpu(__le32 *var, u32 val)
			
 
				-{
			
 
				-	*var = cpu_to_le32(le32_to_cpu(*var) & val);
			
 
				-}
			
 
				-
			
 
				 static inline void be32_add_cpu(__be32 *var, u32 val)
			
 
				 {
			
 
				 	*var = cpu_to_be32(be32_to_cpu(*var) + val);
			
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 
				 		return ERR_PTR(-ESTALE);
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0);
			
 
				 
			
 
				 	if (IS_ERR(inode))
			
 
				 		return (void *)inode;
			
@@ -95,7 +95,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
				 	mlog(0, "find parent of directory %llu\n",
			
 
				 	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(dir, NULL, 0);
			
 
				+	status = ocfs2_inode_lock(dir, NULL, 0);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -109,7 +109,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
				 		goto bail_unlock;
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog(ML_ERROR, "Unable to create inode %llu\n",
			
 
				 		     (unsigned long long)blkno);
			
@@ -126,7 +126,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
				 	parent->d_op = &ocfs2_dentry_ops;
			
 
				 
			
 
				 bail_unlock:
			
 
				-	ocfs2_meta_unlock(dir, 0);
			
 
				+	ocfs2_inode_unlock(dir, 0);
			
 
				 
			
 
				 bail:
			
 
				 	mlog_exit_ptr(parent);
			
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -51,6 +51,7 @@
 
				 #include "inode.h"
			
 
				 #include "ioctl.h"
			
 
				 #include "journal.h"
			
 
				+#include "locks.h"
			
 
				 #include "mmap.h"
			
 
				 #include "suballoc.h"
			
 
				 #include "super.h"
			
@@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode)
 
				 	return sync_mapping_buffers(inode->i_mapping);
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_init_file_private(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	struct ocfs2_file_private *fp;
			
 
				+
			
 
				+	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
			
 
				+	if (!fp)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	fp->fp_file = file;
			
 
				+	mutex_init(&fp->fp_mutex);
			
 
				+	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
			
 
				+	file->private_data = fp;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_free_file_private(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	struct ocfs2_file_private *fp = file->private_data;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+
			
 
				+	if (fp) {
			
 
				+		ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
			
 
				+		ocfs2_lock_res_free(&fp->fp_flock);
			
 
				+		kfree(fp);
			
 
				+		file->private_data = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int ocfs2_file_open(struct inode *inode, struct file *file)
			
 
				 {
			
 
				 	int status;
			
@@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 
				 
			
 
				 	oi->ip_open_count++;
			
 
				 	spin_unlock(&oi->ip_lock);
			
 
				-	status = 0;
			
 
				+
			
 
				+	status = ocfs2_init_file_private(inode, file);
			
 
				+	if (status) {
			
 
				+		/*
			
 
				+		 * We want to set open count back if we're failing the
			
 
				+		 * open.
			
 
				+		 */
			
 
				+		spin_lock(&oi->ip_lock);
			
 
				+		oi->ip_open_count--;
			
 
				+		spin_unlock(&oi->ip_lock);
			
 
				+	}
			
 
				+
			
 
				 leave:
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
@@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
 
				 		oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
			
 
				 	spin_unlock(&oi->ip_lock);
			
 
				 
			
 
				+	ocfs2_free_file_private(inode, file);
			
 
				+
			
 
				 	mlog_exit(0);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_dir_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return ocfs2_init_file_private(inode, file);
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_dir_release(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	ocfs2_free_file_private(inode, file);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int ocfs2_sync_file(struct file *file,
			
 
				 			   struct dentry *dentry,
			
 
				 			   int datasync)
			
@@ -382,18 +436,13 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 
			
 
				 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				-	/* This forces other nodes to sync and drop their pages. Do
			
 
				-	 * this even if we have a truncate without allocation change -
			
 
				-	 * ocfs2 cluster sizes can be much greater than page size, so
			
 
				-	 * we have to truncate them anyway.  */
			
 
				-	status = ocfs2_data_lock(inode, 1);
			
 
				-	if (status < 0) {
			
 
				-		up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				-
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				+	/*
			
 
				+	 * The inode lock forced other nodes to sync and drop their
			
 
				+	 * pages, which (correctly) happens even if we have a truncate
			
 
				+	 * without allocation change - ocfs2 cluster sizes can be much
			
 
				+	 * greater than page size, so we have to truncate them
			
 
				+	 * anyway.
			
 
				+	 */
			
 
				 	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
			
 
				 	truncate_inode_pages(inode->i_mapping, new_i_size);
			
 
				 
			
@@ -403,7 +452,7 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 		if (status)
			
 
				 			mlog_errno(status);
			
 
				 
			
 
				-		goto bail_unlock_data;
			
 
				+		goto bail_unlock_sem;
			
 
				 	}
			
 
				 
			
 
				 	/* alright, we're going to need to do a full blown alloc size
			
@@ -413,25 +462,23 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				-		goto bail_unlock_data;
			
 
				+		goto bail_unlock_sem;
			
 
				 	}
			
 
				 
			
 
				 	status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				-		goto bail_unlock_data;
			
 
				+		goto bail_unlock_sem;
			
 
				 	}
			
 
				 
			
 
				 	status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				-		goto bail_unlock_data;
			
 
				+		goto bail_unlock_sem;
			
 
				 	}
			
 
				 
			
 
				 	/* TODO: orphan dir cleanup here. */
			
 
				-bail_unlock_data:
			
 
				-	ocfs2_data_unlock(inode, 1);
			
 
				-
			
 
				+bail_unlock_sem:
			
 
				 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				 bail:
			
@@ -579,7 +626,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
 
				 
			
 
				 	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
			
 
				 	     "clusters_to_add = %u, extents_to_split = %u\n",
			
 
				-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
			
 
				+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
			
 
				 	     le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
			
 
				 
			
 
				 	num_free_extents = ocfs2_num_free_extents(osb, inode, di);
			
@@ -760,7 +807,7 @@ restarted_transaction:
 
				 	     le32_to_cpu(fe->i_clusters),
			
 
				 	     (unsigned long long)le64_to_cpu(fe->i_size));
			
 
				 	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
			
 
				-	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
			
 
				+	     OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
			
 
				 
			
 
				 leave:
			
 
				 	if (handle) {
			
@@ -917,7 +964,7 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 			     struct buffer_head *di_bh,
			
 
				 			     u64 new_i_size)
			
 
				 {
			
 
				-	int ret = 0, data_locked = 0;
			
 
				+	int ret = 0;
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				 
			
 
				 	BUG_ON(!di_bh);
			
@@ -943,20 +990,6 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 	    && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
			
 
				 		goto out_update_size;
			
 
				 
			
 
				-	/* 
			
 
				-	 * protect the pages that ocfs2_zero_extend is going to be
			
 
				-	 * pulling into the page cache.. we do this before the
			
 
				-	 * metadata extend so that we don't get into the situation
			
 
				-	 * where we've extended the metadata but can't get the data
			
 
				-	 * lock to zero.
			
 
				-	 */
			
 
				-	ret = ocfs2_data_lock(inode, 1);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				-	data_locked = 1;
			
 
				-
			
 
				 	/*
			
 
				 	 * The alloc sem blocks people in read/write from reading our
			
 
				 	 * allocation until we're done changing it. We depend on
			
@@ -980,7 +1013,7 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 			up_write(&oi->ip_alloc_sem);
			
 
				 
			
 
				 			mlog_errno(ret);
			
 
				-			goto out_unlock;
			
 
				+			goto out;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -991,7 +1024,7 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				-		goto out_unlock;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				 out_update_size:
			
@@ -999,10 +1032,6 @@ out_update_size:
 
				 	if (ret < 0)
			
 
				 		mlog_errno(ret);
			
 
				 
			
 
				-out_unlock:
			
 
				-	if (data_locked)
			
 
				-		ocfs2_data_unlock(inode, 1);
			
 
				-
			
 
				 out:
			
 
				 	return ret;
			
 
				 }
			
@@ -1050,7 +1079,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_meta_lock(inode, &bh, 1);
			
 
				+	status = ocfs2_inode_lock(inode, &bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -1102,7 +1131,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
				 bail_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 bail_unlock:
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 bail_unlock_rw:
			
 
				 	if (size_change)
			
 
				 		ocfs2_rw_unlock(inode, 1);
			
@@ -1149,7 +1178,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				-	ret = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+	ret = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	if (ret) {
			
 
				 		if (ret != -ENOENT)
			
 
				 			mlog_errno(ret);
			
@@ -1158,7 +1187,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 
				 
			
 
				 	ret = generic_permission(inode, mask, NULL);
			
 
				 
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 out:
			
 
				 	mlog_exit(ret);
			
 
				 	return ret;
			
@@ -1630,7 +1659,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
			
 
				+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out_rw_unlock;
			
@@ -1638,7 +1667,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 
			
 
				 	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
			
 
				 		ret = -EPERM;
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 
			
 
				 	switch (sr->l_whence) {
			
@@ -1652,7 +1681,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 		break;
			
 
				 	default:
			
 
				 		ret = -EINVAL;
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 	sr->l_whence = 0;
			
 
				 
			
@@ -1663,14 +1692,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 	    || (sr->l_start + llen) < 0
			
 
				 	    || (sr->l_start + llen) > max_off) {
			
 
				 		ret = -EINVAL;
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 	size = sr->l_start + sr->l_len;
			
 
				 
			
 
				 	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
			
 
				 		if (sr->l_len <= 0) {
			
 
				 			ret = -EINVAL;
			
 
				-			goto out_meta_unlock;
			
 
				+			goto out_inode_unlock;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1678,7 +1707,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 		ret = __ocfs2_write_remove_suid(inode, di_bh);
			
 
				 		if (ret) {
			
 
				 			mlog_errno(ret);
			
 
				-			goto out_meta_unlock;
			
 
				+			goto out_inode_unlock;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1704,7 +1733,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1714,7 +1743,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 	if (IS_ERR(handle)) {
			
 
				 		ret = PTR_ERR(handle);
			
 
				 		mlog_errno(ret);
			
 
				-		goto out_meta_unlock;
			
 
				+		goto out_inode_unlock;
			
 
				 	}
			
 
				 
			
 
				 	if (change_size && i_size_read(inode) < size)
			
@@ -1727,9 +1756,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				-out_meta_unlock:
			
 
				+out_inode_unlock:
			
 
				 	brelse(di_bh);
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 out_rw_unlock:
			
 
				 	ocfs2_rw_unlock(inode, 1);
			
 
				 
			
@@ -1799,7 +1828,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 
				 	 * if we need to make modifications here.
			
 
				 	 */
			
 
				 	for(;;) {
			
 
				-		ret = ocfs2_meta_lock(inode, NULL, meta_level);
			
 
				+		ret = ocfs2_inode_lock(inode, NULL, meta_level);
			
 
				 		if (ret < 0) {
			
 
				 			meta_level = -1;
			
 
				 			mlog_errno(ret);
			
@@ -1817,7 +1846,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 
				 		 * set inode->i_size at the end of a write. */
			
 
				 		if (should_remove_suid(dentry)) {
			
 
				 			if (meta_level == 0) {
			
 
				-				ocfs2_meta_unlock(inode, meta_level);
			
 
				+				ocfs2_inode_unlock(inode, meta_level);
			
 
				 				meta_level = 1;
			
 
				 				continue;
			
 
				 			}
			
@@ -1886,7 +1915,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 
				 		*ppos = saved_pos;
			
 
				 
			
 
				 out_unlock:
			
 
				-	ocfs2_meta_unlock(inode, meta_level);
			
 
				+	ocfs2_inode_unlock(inode, meta_level);
			
 
				 
			
 
				 out:
			
 
				 	return ret;
			
@@ -2099,12 +2128,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 
				 	/*
			
 
				 	 * See the comment in ocfs2_file_aio_read()
			
 
				 	 */
			
 
				-	ret = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+	ret = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto bail;
			
 
				 	}
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 
			
 
				 	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
			
 
				 
			
@@ -2160,12 +2189,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 
				 	 * like i_size. This allows the checks down below
			
 
				 	 * generic_file_aio_read() a chance of actually working. 
			
 
				 	 */
			
 
				-	ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
			
 
				+	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto bail;
			
 
				 	}
			
 
				-	ocfs2_meta_unlock(inode, lock_level);
			
 
				+	ocfs2_inode_unlock(inode, lock_level);
			
 
				 
			
 
				 	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
			
 
				 	if (ret == -EINVAL)
			
@@ -2204,6 +2233,7 @@ const struct inode_operations ocfs2_special_file_iops = {
 
				 };
			
 
				 
			
 
				 const struct file_operations ocfs2_fops = {
			
 
				+	.llseek		= generic_file_llseek,
			
 
				 	.read		= do_sync_read,
			
 
				 	.write		= do_sync_write,
			
 
				 	.mmap		= ocfs2_mmap,
			
@@ -2216,16 +2246,21 @@ const struct file_operations ocfs2_fops = {
 
				 #ifdef CONFIG_COMPAT
			
 
				 	.compat_ioctl   = ocfs2_compat_ioctl,
			
 
				 #endif
			
 
				+	.flock		= ocfs2_flock,
			
 
				 	.splice_read	= ocfs2_file_splice_read,
			
 
				 	.splice_write	= ocfs2_file_splice_write,
			
 
				 };
			
 
				 
			
 
				 const struct file_operations ocfs2_dops = {
			
 
				+	.llseek		= generic_file_llseek,
			
 
				 	.read		= generic_read_dir,
			
 
				 	.readdir	= ocfs2_readdir,
			
 
				 	.fsync		= ocfs2_sync_file,
			
 
				+	.release	= ocfs2_dir_release,
			
 
				+	.open		= ocfs2_dir_open,
			
 
				 	.ioctl		= ocfs2_ioctl,
			
 
				 #ifdef CONFIG_COMPAT
			
 
				 	.compat_ioctl   = ocfs2_compat_ioctl,
			
 
				 #endif
			
 
				+	.flock		= ocfs2_flock,
			
 
				 };
			
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -32,6 +32,12 @@ extern const struct inode_operations ocfs2_file_iops;
 
				 extern const struct inode_operations ocfs2_special_file_iops;
			
 
				 struct ocfs2_alloc_context;
			
 
				 
			
 
				+struct ocfs2_file_private {
			
 
				+	struct file		*fp_file;
			
 
				+	struct mutex		fp_mutex;
			
 
				+	struct ocfs2_lock_res	fp_flock;
			
 
				+};
			
 
				+
			
 
				 enum ocfs2_alloc_restarted {
			
 
				 	RESTART_NONE = 0,
			
 
				 	RESTART_TRANS,
			
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -30,9 +30,6 @@
 
				 #include <linux/highmem.h>
			
 
				 #include <linux/kmod.h>
			
 
				 
			
 
				-#include <cluster/heartbeat.h>
			
 
				-#include <cluster/nodemanager.h>
			
 
				-
			
 
				 #include <dlm/dlmapi.h>
			
 
				 
			
 
				 #define MLOG_MASK_PREFIX ML_SUPER
			
@@ -44,13 +41,9 @@
 
				 #include "heartbeat.h"
			
 
				 #include "inode.h"
			
 
				 #include "journal.h"
			
 
				-#include "vote.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
 
				-#define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
			
 
				-#define OCFS2_HB_NODE_UP_PRI	   OCFS2_HB_NODE_DOWN_PRI
			
 
				-
			
 
				 static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
			
 
				 					    int bit);
			
 
				 static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
			
@@ -64,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
 
				 void ocfs2_init_node_maps(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	spin_lock_init(&osb->node_map_lock);
			
 
				-	ocfs2_node_map_init(&osb->mounted_map);
			
 
				 	ocfs2_node_map_init(&osb->recovery_map);
			
 
				-	ocfs2_node_map_init(&osb->umount_map);
			
 
				 	ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
			
 
				 }
			
 
				 
			
@@ -87,24 +78,7 @@ static void ocfs2_do_node_down(int node_num,
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) {
			
 
				-		/* If a node is in the umount map, then we've been
			
 
				-		 * expecting him to go down and we know ahead of time
			
 
				-		 * that recovery is not necessary. */
			
 
				-		ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				 	ocfs2_recovery_thread(osb, node_num);
			
 
				-
			
 
				-	ocfs2_remove_node_from_vote_queues(osb, node_num);
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_hb_node_down_cb(struct o2nm_node *node,
			
 
				-				  int node_num,
			
 
				-				  void *data)
			
 
				-{
			
 
				-	ocfs2_do_node_down(node_num, (struct ocfs2_super *) data);
			
 
				 }
			
 
				 
			
 
				 /* Called from the dlm when it's about to evict a node. We may also
			
@@ -121,27 +95,8 @@ static void ocfs2_dlm_eviction_cb(int node_num,
 
				 	ocfs2_do_node_down(node_num, osb);
			
 
				 }
			
 
				 
			
 
				-static void ocfs2_hb_node_up_cb(struct o2nm_node *node,
			
 
				-				int node_num,
			
 
				-				void *data)
			
 
				-{
			
 
				-	struct ocfs2_super *osb = data;
			
 
				-
			
 
				-	BUG_ON(osb->node_num == node_num);
			
 
				-
			
 
				-	mlog(0, "node up event for %d\n", node_num);
			
 
				-	ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
			
 
				-}
			
 
				-
			
 
				 void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
			
 
				 {
			
 
				-	o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB,
			
 
				-			    ocfs2_hb_node_down_cb, osb,
			
 
				-			    OCFS2_HB_NODE_DOWN_PRI);
			
 
				-
			
 
				-	o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB,
			
 
				-			    ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI);
			
 
				-
			
 
				 	/* Not exactly a heartbeat callback, but leads to essentially
			
 
				 	 * the same path so we set it up here. */
			
 
				 	dlm_setup_eviction_cb(&osb->osb_eviction_cb,
			
@@ -149,39 +104,6 @@ void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
 
				 			      osb);
			
 
				 }
			
 
				 
			
 
				-/* Most functions here are just stubs for now... */
			
 
				-int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int status;
			
 
				-
			
 
				-	if (ocfs2_mount_local(osb))
			
 
				-		return 0;
			
 
				-
			
 
				-	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
			
 
				-	}
			
 
				-
			
 
				-bail:
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	if (ocfs2_mount_local(osb))
			
 
				-		return;
			
 
				-
			
 
				-	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
			
 
				-	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
			
 
				-}
			
 
				-
			
 
				 void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	int ret;
			
@@ -341,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb,
 
				 
			
 
				 	spin_lock(&osb->node_map_lock);
			
 
				 
			
 
				-	__ocfs2_node_map_clear_bit(&osb->mounted_map, num);
			
 
				-
			
 
				 	if (!test_bit(num, osb->recovery_map.map)) {
			
 
				 	    __ocfs2_node_map_set_bit(&osb->recovery_map, num);
			
 
				 	    set = 1;
			
--- a/fs/ocfs2/heartbeat.h
+++ b/fs/ocfs2/heartbeat.h
@@ -29,8 +29,6 @@
 
				 void ocfs2_init_node_maps(struct ocfs2_super *osb);
			
 
				 
			
 
				 void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb);
			
 
				-int ocfs2_register_hb_callbacks(struct ocfs2_super *osb);
			
 
				-void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb);
			
 
				 void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
			
 
				 
			
 
				 /* node map functions - used to keep track of mounted and in-recovery
			
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -49,7 +49,6 @@
 
				 #include "symlink.h"
			
 
				 #include "sysfile.h"
			
 
				 #include "uptodate.h"
			
 
				-#include "vote.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
@@ -58,8 +57,11 @@ struct ocfs2_find_inode_args
 
				 	u64		fi_blkno;
			
 
				 	unsigned long	fi_ino;
			
 
				 	unsigned int	fi_flags;
			
 
				+	unsigned int	fi_sysfile_type;
			
 
				 };
			
 
				 
			
 
				+static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
			
 
				+
			
 
				 static int ocfs2_read_locked_inode(struct inode *inode,
			
 
				 				   struct ocfs2_find_inode_args *args);
			
 
				 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque);
			
@@ -107,7 +109,8 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
 
				 		oi->ip_attr |= OCFS2_DIRSYNC_FL;
			
 
				 }
			
 
				 
			
 
				-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
			
 
				+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
			
 
				+			 int sysfile_type)
			
 
				 {
			
 
				 	struct inode *inode = NULL;
			
 
				 	struct super_block *sb = osb->sb;
			
@@ -127,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
 
				 	args.fi_blkno = blkno;
			
 
				 	args.fi_flags = flags;
			
 
				 	args.fi_ino = ino_from_blkno(sb, blkno);
			
 
				+	args.fi_sysfile_type = sysfile_type;
			
 
				 
			
 
				 	inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
			
 
				 			     ocfs2_init_locked_inode, &args);
			
@@ -201,6 +205,9 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 
				 
			
 
				 	inode->i_ino = args->fi_ino;
			
 
				 	OCFS2_I(inode)->ip_blkno = args->fi_blkno;
			
 
				+	if (args->fi_sysfile_type != 0)
			
 
				+		lockdep_set_class(&inode->i_mutex,
			
 
				+			&ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
			
 
				 
			
 
				 	mlog_exit(0);
			
 
				 	return 0;
			
@@ -322,7 +329,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
				 		 */
			
 
				 		BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL);
			
 
				 
			
 
				-		ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
			
 
				+		ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres,
			
 
				 					  OCFS2_LOCK_TYPE_META, 0, inode);
			
 
				 
			
 
				 		ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
			
@@ -333,10 +340,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
				 				  OCFS2_LOCK_TYPE_RW, inode->i_generation,
			
 
				 				  inode);
			
 
				 
			
 
				-	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
			
 
				-				  OCFS2_LOCK_TYPE_DATA, inode->i_generation,
			
 
				-				  inode);
			
 
				-
			
 
				 	ocfs2_set_inode_flags(inode);
			
 
				 
			
 
				 	status = 0;
			
@@ -414,7 +417,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
				 	if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
			
 
				 		generation = osb->fs_generation;
			
 
				 
			
 
				-	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
			
 
				+	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres,
			
 
				 				  OCFS2_LOCK_TYPE_META,
			
 
				 				  generation, inode);
			
 
				 
			
@@ -429,7 +432,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
				 			mlog_errno(status);
			
 
				 			return status;
			
 
				 		}
			
 
				-		status = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+		status = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 		if (status) {
			
 
				 			make_bad_inode(inode);
			
 
				 			mlog_errno(status);
			
@@ -484,7 +487,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 
				 
			
 
				 bail:
			
 
				 	if (can_lock)
			
 
				-		ocfs2_meta_unlock(inode, 0);
			
 
				+		ocfs2_inode_unlock(inode, 0);
			
 
				 
			
 
				 	if (status < 0)
			
 
				 		make_bad_inode(inode);
			
@@ -586,7 +589,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	mutex_lock(&inode_alloc_inode->i_mutex);
			
 
				-	status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1);
			
 
				+	status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mutex_unlock(&inode_alloc_inode->i_mutex);
			
 
				 
			
@@ -617,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
			
 
				-	le32_and_cpu(&di->i_flags, ~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
			
 
				+	di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
			
 
				 
			
 
				 	status = ocfs2_journal_dirty(handle, di_bh);
			
 
				 	if (status < 0) {
			
@@ -635,7 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 
				 bail_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 bail_unlock:
			
 
				-	ocfs2_meta_unlock(inode_alloc_inode, 1);
			
 
				+	ocfs2_inode_unlock(inode_alloc_inode, 1);
			
 
				 	mutex_unlock(&inode_alloc_inode->i_mutex);
			
 
				 	brelse(inode_alloc_bh);
			
 
				 bail:
			
@@ -709,7 +712,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
 
				 	 * delete_inode operation. We do this now to avoid races with
			
 
				 	 * recovery completion on other nodes. */
			
 
				 	mutex_lock(&orphan_dir_inode->i_mutex);
			
 
				-	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
			
 
				+	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mutex_unlock(&orphan_dir_inode->i_mutex);
			
 
				 
			
@@ -718,8 +721,8 @@ static int ocfs2_wipe_inode(struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	/* we do this while holding the orphan dir lock because we
			
 
				-	 * don't want recovery being run from another node to vote for
			
 
				-	 * an inode delete on us -- this will result in two nodes
			
 
				+	 * don't want recovery being run from another node to try an
			
 
				+	 * inode delete underneath us -- this will result in two nodes
			
 
				 	 * truncating the same file! */
			
 
				 	status = ocfs2_truncate_for_delete(osb, inode, di_bh);
			
 
				 	if (status < 0) {
			
@@ -733,7 +736,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
 
				 		mlog_errno(status);
			
 
				 
			
 
				 bail_unlock_dir:
			
 
				-	ocfs2_meta_unlock(orphan_dir_inode, 1);
			
 
				+	ocfs2_inode_unlock(orphan_dir_inode, 1);
			
 
				 	mutex_unlock(&orphan_dir_inode->i_mutex);
			
 
				 	brelse(orphan_dir_bh);
			
 
				 bail:
			
@@ -744,7 +747,7 @@ bail:
 
				 }
			
 
				 
			
 
				 /* There is a series of simple checks that should be done before a
			
 
				- * vote is even considered. Encapsulate those in this function. */
			
 
				+ * trylock is even considered. Encapsulate those in this function. */
			
 
				 static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
			
 
				 {
			
 
				 	int ret = 0;
			
@@ -758,14 +761,14 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	/* If we're coming from process_vote we can't go into our own
			
 
				+	/* If we're coming from downconvert_thread we can't go into our own
			
 
				 	 * voting [hello, deadlock city!], so unforuntately we just
			
 
				 	 * have to skip deleting this guy. That's OK though because
			
 
				 	 * the node who's doing the actual deleting should handle it
			
 
				 	 * anyway. */
			
 
				-	if (current == osb->vote_task) {
			
 
				+	if (current == osb->dc_task) {
			
 
				 		mlog(0, "Skipping delete of %lu because we're currently "
			
 
				-		     "in process_vote\n", inode->i_ino);
			
 
				+		     "in downconvert\n", inode->i_ino);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
@@ -779,10 +782,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
 
				 		goto bail_unlock;
			
 
				 	}
			
 
				 
			
 
				-	/* If we have voted "yes" on the wipe of this inode for
			
 
				-	 * another node, it will be marked here so we can safely skip
			
 
				-	 * it. Recovery will cleanup any inodes we might inadvertantly
			
 
				-	 * skip here. */
			
 
				+	/* If we have allowd wipe of this inode for another node, it
			
 
				+	 * will be marked here so we can safely skip it. Recovery will
			
 
				+	 * cleanup any inodes we might inadvertantly skip here. */
			
 
				 	if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) {
			
 
				 		mlog(0, "Skipping delete of %lu because another node "
			
 
				 		     "has done this for us.\n", inode->i_ino);
			
@@ -929,13 +931,13 @@ void ocfs2_delete_inode(struct inode *inode)
 
				 
			
 
				 	/* Lock down the inode. This gives us an up to date view of
			
 
				 	 * it's metadata (for verification), and allows us to
			
 
				-	 * serialize delete_inode votes. 
			
 
				+	 * serialize delete_inode on multiple nodes.
			
 
				 	 *
			
 
				 	 * Even though we might be doing a truncate, we don't take the
			
 
				 	 * allocation lock here as it won't be needed - nobody will
			
 
				 	 * have the file open.
			
 
				 	 */
			
 
				-	status = ocfs2_meta_lock(inode, &di_bh, 1);
			
 
				+	status = ocfs2_inode_lock(inode, &di_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -947,15 +949,15 @@ void ocfs2_delete_inode(struct inode *inode)
 
				 	 * before we go ahead and wipe the inode. */
			
 
				 	status = ocfs2_query_inode_wipe(inode, di_bh, &wipe);
			
 
				 	if (!wipe || status < 0) {
			
 
				-		/* Error and inode busy vote both mean we won't be
			
 
				+		/* Error and remote inode busy both mean we won't be
			
 
				 		 * removing the inode, so they take almost the same
			
 
				 		 * path. */
			
 
				 		if (status < 0)
			
 
				 			mlog_errno(status);
			
 
				 
			
 
				-		/* Someone in the cluster has voted to not wipe this
			
 
				-		 * inode, or it was never completely orphaned. Write
			
 
				-		 * out the pages and exit now. */
			
 
				+		/* Someone in the cluster has disallowed a wipe of
			
 
				+		 * this inode, or it was never completely
			
 
				+		 * orphaned. Write out the pages and exit now. */
			
 
				 		ocfs2_cleanup_delete_inode(inode, 1);
			
 
				 		goto bail_unlock_inode;
			
 
				 	}
			
@@ -981,7 +983,7 @@ void ocfs2_delete_inode(struct inode *inode)
 
				 	OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
			
 
				 
			
 
				 bail_unlock_inode:
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 	brelse(di_bh);
			
 
				 bail_unblock:
			
 
				 	status = sigprocmask(SIG_SETMASK, &oldset, NULL);
			
@@ -1008,15 +1010,14 @@ void ocfs2_clear_inode(struct inode *inode)
 
				 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
			
 
				 			"Inode=%lu\n", inode->i_ino);
			
 
				 
			
 
				-	/* For remove delete_inode vote, we hold open lock before,
			
 
				-	 * now it is time to unlock PR and EX open locks. */
			
 
				+	/* To preven remote deletes we hold open lock before, now it
			
 
				+	 * is time to unlock PR and EX open locks. */
			
 
				 	ocfs2_open_unlock(inode);
			
 
				 
			
 
				 	/* Do these before all the other work so that we don't bounce
			
 
				-	 * the vote thread while waiting to destroy the locks. */
			
 
				+	 * the downconvert thread while waiting to destroy the locks. */
			
 
				 	ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
			
 
				-	ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
			
 
				-	ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
			
 
				+	ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
			
 
				 	ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
			
 
				 
			
 
				 	/* We very well may get a clear_inode before all an inodes
			
@@ -1039,8 +1040,7 @@ void ocfs2_clear_inode(struct inode *inode)
 
				 		mlog_errno(status);
			
 
				 
			
 
				 	ocfs2_lock_res_free(&oi->ip_rw_lockres);
			
 
				-	ocfs2_lock_res_free(&oi->ip_meta_lockres);
			
 
				-	ocfs2_lock_res_free(&oi->ip_data_lockres);
			
 
				+	ocfs2_lock_res_free(&oi->ip_inode_lockres);
			
 
				 	ocfs2_lock_res_free(&oi->ip_open_lockres);
			
 
				 
			
 
				 	ocfs2_metadata_cache_purge(inode);
			
@@ -1184,15 +1184,15 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
 
				 	}
			
 
				 	spin_unlock(&OCFS2_I(inode)->ip_lock);
			
 
				 
			
 
				-	/* Let ocfs2_meta_lock do the work of updating our struct
			
 
				+	/* Let ocfs2_inode_lock do the work of updating our struct
			
 
				 	 * inode for us. */
			
 
				-	status = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+	status = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
 
				 		goto bail;
			
 
				 	}
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 bail:
			
 
				 	mlog_exit(status);
			
 
				 
			
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -34,8 +34,7 @@ struct ocfs2_inode_info
 
				 	u64			ip_blkno;
			
 
				 
			
 
				 	struct ocfs2_lock_res		ip_rw_lockres;
			
 
				-	struct ocfs2_lock_res		ip_meta_lockres;
			
 
				-	struct ocfs2_lock_res		ip_data_lockres;
			
 
				+	struct ocfs2_lock_res		ip_inode_lockres;
			
 
				 	struct ocfs2_lock_res		ip_open_lockres;
			
 
				 
			
 
				 	/* protects allocation changes on this inode. */
			
@@ -121,9 +120,10 @@ void ocfs2_delete_inode(struct inode *inode);
 
				 void ocfs2_drop_inode(struct inode *inode);
			
 
				 
			
 
				 /* Flags for ocfs2_iget() */
			
 
				-#define OCFS2_FI_FLAG_SYSFILE		0x4
			
 
				-#define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x8
			
 
				-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
			
 
				+#define OCFS2_FI_FLAG_SYSFILE		0x1
			
 
				+#define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x2
			
 
				+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
			
 
				+			 int sysfile_type);
			
 
				 int ocfs2_inode_init_private(struct inode *inode);
			
 
				 int ocfs2_inode_revalidate(struct dentry *dentry);
			
 
				 int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
			
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -20,6 +20,7 @@
 
				 
			
 
				 #include "ocfs2_fs.h"
			
 
				 #include "ioctl.h"
			
 
				+#include "resize.h"
			
 
				 
			
 
				 #include <linux/ext2_fs.h>
			
 
				 
			
@@ -27,14 +28,14 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
 
				 {
			
 
				 	int status;
			
 
				 
			
 
				-	status = ocfs2_meta_lock(inode, NULL, 0);
			
 
				+	status = ocfs2_inode_lock(inode, NULL, 0);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		return status;
			
 
				 	}
			
 
				 	ocfs2_get_inode_flags(OCFS2_I(inode));
			
 
				 	*flags = OCFS2_I(inode)->ip_attr;
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
@@ -52,7 +53,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 
				 
			
 
				 	mutex_lock(&inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(inode, &bh, 1);
			
 
				+	status = ocfs2_inode_lock(inode, &bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto bail;
			
@@ -100,7 +101,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 
				 
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 bail_unlock:
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 bail:
			
 
				 	mutex_unlock(&inode->i_mutex);
			
 
				 
			
@@ -115,8 +116,10 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
 
				 	unsigned int cmd, unsigned long arg)
			
 
				 {
			
 
				 	unsigned int flags;
			
 
				+	int new_clusters;
			
 
				 	int status;
			
 
				 	struct ocfs2_space_resv sr;
			
 
				+	struct ocfs2_new_group_input input;
			
 
				 
			
 
				 	switch (cmd) {
			
 
				 	case OCFS2_IOC_GETFLAGS:
			
@@ -140,6 +143,23 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
 
				 			return -EFAULT;
			
 
				 
			
 
				 		return ocfs2_change_file_space(filp, cmd, &sr);
			
 
				+	case OCFS2_IOC_GROUP_EXTEND:
			
 
				+		if (!capable(CAP_SYS_RESOURCE))
			
 
				+			return -EPERM;
			
 
				+
			
 
				+		if (get_user(new_clusters, (int __user *)arg))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		return ocfs2_group_extend(inode, new_clusters);
			
 
				+	case OCFS2_IOC_GROUP_ADD:
			
 
				+	case OCFS2_IOC_GROUP_ADD64:
			
 
				+		if (!capable(CAP_SYS_RESOURCE))
			
 
				+			return -EPERM;
			
 
				+
			
 
				+		if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		return ocfs2_group_add(inode, &input);
			
 
				 	default:
			
 
				 		return -ENOTTY;
			
 
				 	}
			
@@ -162,6 +182,9 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 
				 	case OCFS2_IOC_RESVSP64:
			
 
				 	case OCFS2_IOC_UNRESVSP:
			
 
				 	case OCFS2_IOC_UNRESVSP64:
			
 
				+	case OCFS2_IOC_GROUP_EXTEND:
			
 
				+	case OCFS2_IOC_GROUP_ADD:
			
 
				+	case OCFS2_IOC_GROUP_ADD64:
			
 
				 		break;
			
 
				 	default:
			
 
				 		return -ENOIOCTLCMD;
			
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -44,7 +44,6 @@
 
				 #include "localalloc.h"
			
 
				 #include "slot_map.h"
			
 
				 #include "super.h"
			
 
				-#include "vote.h"
			
 
				 #include "sysfile.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
@@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
 
				 	mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n",
			
 
				 	     journal->j_trans_id, flushed);
			
 
				 
			
 
				-	ocfs2_kick_vote_thread(osb);
			
 
				+	ocfs2_wake_downconvert_thread(osb);
			
 
				 	wake_up(&journal->j_checkpointed);
			
 
				 finally:
			
 
				 	mlog_exit(status);
			
@@ -314,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-#define OCFS2_DEFAULT_COMMIT_INTERVAL 	(HZ * 5)
			
 
				+#define OCFS2_DEFAULT_COMMIT_INTERVAL 	(HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
			
 
				 
			
 
				 void ocfs2_set_journal_params(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	journal_t *journal = osb->journal->j_journal;
			
 
				+	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
			
 
				+
			
 
				+	if (osb->osb_commit_interval)
			
 
				+		commit_interval = osb->osb_commit_interval;
			
 
				 
			
 
				 	spin_lock(&journal->j_state_lock);
			
 
				-	journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
			
 
				+	journal->j_commit_interval = commit_interval;
			
 
				 	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
			
 
				 		journal->j_flags |= JFS_BARRIER;
			
 
				 	else
			
@@ -337,7 +340,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 
				 	struct ocfs2_dinode *di = NULL;
			
 
				 	struct buffer_head *bh = NULL;
			
 
				 	struct ocfs2_super *osb;
			
 
				-	int meta_lock = 0;
			
 
				+	int inode_lock = 0;
			
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
@@ -367,14 +370,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 
				 	/* Skip recovery waits here - journal inode metadata never
			
 
				 	 * changes in a live cluster so it can be considered an
			
 
				 	 * exception to the rule. */
			
 
				-	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
			
 
				+	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ERESTARTSYS)
			
 
				 			mlog(ML_ERROR, "Could not get lock on journal!\n");
			
 
				 		goto done;
			
 
				 	}
			
 
				 
			
 
				-	meta_lock = 1;
			
 
				+	inode_lock = 1;
			
 
				 	di = (struct ocfs2_dinode *)bh->b_data;
			
 
				 
			
 
				 	if (inode->i_size <  OCFS2_MIN_JOURNAL_SIZE) {
			
@@ -414,8 +417,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 
				 	status = 0;
			
 
				 done:
			
 
				 	if (status < 0) {
			
 
				-		if (meta_lock)
			
 
				-			ocfs2_meta_unlock(inode, 1);
			
 
				+		if (inode_lock)
			
 
				+			ocfs2_inode_unlock(inode, 1);
			
 
				 		if (bh != NULL)
			
 
				 			brelse(bh);
			
 
				 		if (inode) {
			
@@ -544,7 +547,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 
				 	OCFS2_I(inode)->ip_open_count--;
			
 
				 
			
 
				 	/* unlock our journal */
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 	brelse(journal->j_bh);
			
 
				 	journal->j_bh = NULL;
			
@@ -883,8 +886,8 @@ restart:
 
				 	ocfs2_super_unlock(osb, 1);
			
 
				 
			
 
				 	/* We always run recovery on our own orphan dir - the dead
			
 
				-	 * node(s) may have voted "no" on an inode delete earlier. A
			
 
				-	 * revote is therefore required. */
			
 
				+	 * node(s) may have disallowd a previos inode delete. Re-processing
			
 
				+	 * is therefore required. */
			
 
				 	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
			
 
				 					NULL);
			
 
				 
			
@@ -973,9 +976,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 
				 	}
			
 
				 	SET_INODE_JOURNAL(inode);
			
 
				 
			
 
				-	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
			
 
				+	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
			
 
				 	if (status < 0) {
			
 
				-		mlog(0, "status returned from ocfs2_meta_lock=%d\n", status);
			
 
				+		mlog(0, "status returned from ocfs2_inode_lock=%d\n", status);
			
 
				 		if (status != -ERESTARTSYS)
			
 
				 			mlog(ML_ERROR, "Could not lock journal!\n");
			
 
				 		goto done;
			
@@ -1047,7 +1050,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 
				 done:
			
 
				 	/* drop the lock on this nodes journal */
			
 
				 	if (got_lock)
			
 
				-		ocfs2_meta_unlock(inode, 1);
			
 
				+		ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 	if (inode)
			
 
				 		iput(inode);
			
@@ -1162,14 +1165,14 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
 
				 	SET_INODE_JOURNAL(inode);
			
 
				 
			
 
				 	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
			
 
				-	status = ocfs2_meta_lock_full(inode, NULL, 1, flags);
			
 
				+	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -EAGAIN)
			
 
				 			mlog_errno(status);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 bail:
			
 
				 	if (inode)
			
 
				 		iput(inode);
			
@@ -1241,7 +1244,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
 
				 
			
 
				 	/* Skip bad inodes so that recovery can continue */
			
 
				 	iter = ocfs2_iget(p->osb, ino,
			
 
				-			  OCFS2_FI_FLAG_ORPHAN_RECOVERY);
			
 
				+			  OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
			
 
				 	if (IS_ERR(iter))
			
 
				 		return 0;
			
 
				 
			
@@ -1277,7 +1280,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 
				 	}	
			
 
				 
			
 
				 	mutex_lock(&orphan_dir_inode->i_mutex);
			
 
				-	status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0);
			
 
				+	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto out;
			
@@ -1293,7 +1296,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 
				 	*head = priv.head;
			
 
				 
			
 
				 out_cluster:
			
 
				-	ocfs2_meta_unlock(orphan_dir_inode, 0);
			
 
				+	ocfs2_inode_unlock(orphan_dir_inode, 0);
			
 
				 out:
			
 
				 	mutex_unlock(&orphan_dir_inode->i_mutex);
			
 
				 	iput(orphan_dir_inode);
			
@@ -1380,10 +1383,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 
				 		iter = oi->ip_next_orphan;
			
 
				 
			
 
				 		spin_lock(&oi->ip_lock);
			
 
				-		/* Delete voting may have set these on the assumption
			
 
				-		 * that the other node would wipe them successfully.
			
 
				-		 * If they are still in the node's orphan dir, we need
			
 
				-		 * to reset that state. */
			
 
				+		/* The remote delete code may have set these on the
			
 
				+		 * assumption that the other node would wipe them
			
 
				+		 * successfully.  If they are still in the node's
			
 
				+		 * orphan dir, we need to reset that state. */
			
 
				 		oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
			
 
				 
			
 
				 		/* Set the proper information to get us going into
			
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -278,6 +278,12 @@ int                  ocfs2_journal_dirty_data(handle_t *handle,
 
				 /* simple file updates like chmod, etc. */
			
 
				 #define OCFS2_INODE_UPDATE_CREDITS 1
			
 
				 
			
 
				+/* group extend. inode update and last group update. */
			
 
				+#define OCFS2_GROUP_EXTEND_CREDITS	(OCFS2_INODE_UPDATE_CREDITS + 1)
			
 
				+
			
 
				+/* group add. inode update and the new group update. */
			
 
				+#define OCFS2_GROUP_ADD_CREDITS	(OCFS2_INODE_UPDATE_CREDITS + 1)
			
 
				+
			
 
				 /* get one bit out of a suballocator: dinode + group descriptor +
			
 
				  * prev. group desc. if we relink. */
			
 
				 #define OCFS2_SUBALLOC_ALLOC (3)
			
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 
				 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
			
 
				 					  struct inode *local_alloc_inode);
			
 
				 
			
 
				-/*
			
 
				- * Determine how large our local alloc window should be, in bits.
			
 
				- *
			
 
				- * These values (and the behavior in ocfs2_alloc_should_use_local) have
			
 
				- * been chosen so that most allocations, including new block groups go
			
 
				- * through local alloc.
			
 
				- */
			
 
				 static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
			
 
				 {
			
 
				-	BUG_ON(osb->s_clustersize_bits < 12);
			
 
				+	BUG_ON(osb->s_clustersize_bits > 20);
			
 
				 
			
 
				-	return 2048 >> (osb->s_clustersize_bits - 12);
			
 
				+	/* Size local alloc windows by the megabyte */
			
 
				+	return osb->local_alloc_size << (20 - osb->s_clustersize_bits);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
 
				 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
			
 
				 {
			
 
				 	int la_bits = ocfs2_local_alloc_window_bits(osb);
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	if (osb->local_alloc_state != OCFS2_LA_ENABLED)
			
 
				-		return 0;
			
 
				+		goto bail;
			
 
				 
			
 
				 	/* la_bits should be at least twice the size (in clusters) of
			
 
				 	 * a new block group. We want to be sure block group
			
 
				 	 * allocations go through the local alloc, so allow an
			
 
				 	 * allocation to take up to half the bitmap. */
			
 
				 	if (bits > (la_bits / 2))
			
 
				-		return 0;
			
 
				+		goto bail;
			
 
				 
			
 
				-	return 1;
			
 
				+	ret = 1;
			
 
				+bail:
			
 
				+	mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
			
 
				+	     osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int ocfs2_load_local_alloc(struct ocfs2_super *osb)
			
@@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				+	if (ocfs2_mount_local(osb))
			
 
				+		goto bail;
			
 
				+
			
 
				+	if (osb->local_alloc_size == 0)
			
 
				+		goto bail;
			
 
				+
			
 
				+	if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) {
			
 
				+		mlog(ML_NOTICE, "Requested local alloc window %d is larger "
			
 
				+		     "than max possible %u. Using defaults.\n",
			
 
				+		     ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1));
			
 
				+		osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
			
 
				+	}
			
 
				+
			
 
				 	/* read the alloc off disk */
			
 
				 	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
			
 
				 					    osb->slot_num);
			
@@ -181,6 +193,9 @@ bail:
 
				 	if (inode)
			
 
				 		iput(inode);
			
 
				 
			
 
				+	mlog(0, "Local alloc window bits = %d\n",
			
 
				+	     ocfs2_local_alloc_window_bits(osb));
			
 
				+
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
 
				 }
			
@@ -231,7 +246,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
				 
			
 
				 	mutex_lock(&main_bm_inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				+	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto out_mutex;
			
@@ -286,7 +301,7 @@ out_unlock:
 
				 	if (main_bm_bh)
			
 
				 		brelse(main_bm_bh);
			
 
				 
			
 
				-	ocfs2_meta_unlock(main_bm_inode, 1);
			
 
				+	ocfs2_inode_unlock(main_bm_inode, 1);
			
 
				 
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&main_bm_inode->i_mutex);
			
@@ -399,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 
				 
			
 
				 	mutex_lock(&main_bm_inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				+	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto out_mutex;
			
@@ -424,7 +439,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 out_unlock:
			
 
				-	ocfs2_meta_unlock(main_bm_inode, 1);
			
 
				+	ocfs2_inode_unlock(main_bm_inode, 1);
			
 
				 
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&main_bm_inode->i_mutex);
			
@@ -521,6 +536,9 @@ bail:
 
				 		iput(local_alloc_inode);
			
 
				 	}
			
 
				 
			
 
				+	mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
			
 
				+	     status);
			
 
				+
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
 
				 }
			
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -0,0 +1,125 @@
 
				+/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				+ * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				+ *
			
 
				+ * locks.c
			
 
				+ *
			
 
				+ * Userspace file locking support
			
 
				+ *
			
 
				+ * Copyright (C) 2007 Oracle.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License as published by the Free Software Foundation; either
			
 
				+ * version 2 of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/fs.h>
			
 
				+
			
 
				+#define MLOG_MASK_PREFIX ML_INODE
			
 
				+#include <cluster/masklog.h>
			
 
				+
			
 
				+#include "ocfs2.h"
			
 
				+
			
 
				+#include "dlmglue.h"
			
 
				+#include "file.h"
			
 
				+#include "locks.h"
			
 
				+
			
 
				+static int ocfs2_do_flock(struct file *file, struct inode *inode,
			
 
				+			  int cmd, struct file_lock *fl)
			
 
				+{
			
 
				+	int ret = 0, level = 0, trylock = 0;
			
 
				+	struct ocfs2_file_private *fp = file->private_data;
			
 
				+	struct ocfs2_lock_res *lockres = &fp->fp_flock;
			
 
				+
			
 
				+	if (fl->fl_type == F_WRLCK)
			
 
				+		level = 1;
			
 
				+	if (!IS_SETLKW(cmd))
			
 
				+		trylock = 1;
			
 
				+
			
 
				+	mutex_lock(&fp->fp_mutex);
			
 
				+
			
 
				+	if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
			
 
				+	    lockres->l_level > LKM_NLMODE) {
			
 
				+		int old_level = 0;
			
 
				+
			
 
				+		if (lockres->l_level == LKM_EXMODE)
			
 
				+			old_level = 1;
			
 
				+
			
 
				+		if (level == old_level)
			
 
				+			goto out;
			
 
				+
			
 
				+		/*
			
 
				+		 * Converting an existing lock is not guaranteed to be
			
 
				+		 * atomic, so we can get away with simply unlocking
			
 
				+		 * here and allowing the lock code to try at the new
			
 
				+		 * level.
			
 
				+		 */
			
 
				+
			
 
				+		flock_lock_file_wait(file,
			
 
				+				     &(struct file_lock){.fl_type = F_UNLCK});
			
 
				+
			
 
				+		ocfs2_file_unlock(file);
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_file_lock(file, level, trylock);
			
 
				+	if (ret) {
			
 
				+		if (ret == -EAGAIN && trylock)
			
 
				+			ret = -EWOULDBLOCK;
			
 
				+		else
			
 
				+			mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = flock_lock_file_wait(file, fl);
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&fp->fp_mutex);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct ocfs2_file_private *fp = file->private_data;
			
 
				+
			
 
				+	mutex_lock(&fp->fp_mutex);
			
 
				+	ocfs2_file_unlock(file);
			
 
				+	ret = flock_lock_file_wait(file, fl);
			
 
				+	mutex_unlock(&fp->fp_mutex);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Overall flow of ocfs2_flock() was influenced by gfs2_flock().
			
 
				+ */
			
 
				+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
			
 
				+{
			
 
				+	struct inode *inode = file->f_mapping->host;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+
			
 
				+	if (!(fl->fl_flags & FL_FLOCK))
			
 
				+		return -ENOLCK;
			
 
				+	if (__mandatory_lock(inode))
			
 
				+		return -ENOLCK;
			
 
				+
			
 
				+	if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
			
 
				+	    ocfs2_mount_local(osb))
			
 
				+		return flock_lock_file_wait(file, fl);
			
 
				+
			
 
				+	if (fl->fl_type == F_UNLCK)
			
 
				+		return ocfs2_do_funlock(file, cmd, fl);
			
 
				+	else
			
 
				+		return ocfs2_do_flock(file, inode, cmd, fl);
			
 
				+}
			
--- a/fs/ocfs2/locks.h
+++ b/fs/ocfs2/locks.h
@@ -1,9 +1,9 @@
 
				 /* -*- mode: c; c-basic-offset: 8; -*-
			
 
				  * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				  *
			
 
				- * vote.h
			
 
				+ * locks.h
			
 
				  *
			
 
				- * description here
			
 
				+ * Function prototypes for Userspace file locking support
			
 
				  *
			
 
				  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
			
 
				  *
			
@@ -23,26 +23,9 @@
 
				  * Boston, MA 021110-1307, USA.
			
 
				  */
			
 
				 
			
 
				+#ifndef OCFS2_LOCKS_H
			
 
				+#define OCFS2_LOCKS_H
			
 
				 
			
 
				-#ifndef VOTE_H
			
 
				-#define VOTE_H
			
 
				+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
			
 
				 
			
 
				-int ocfs2_vote_thread(void *arg);
			
 
				-static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				-	/* make sure the voting thread gets a swipe at whatever changes
			
 
				-	 * the caller may have made to the voting state */
			
 
				-	osb->vote_wake_sequence++;
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				-	wake_up(&osb->vote_event);
			
 
				-}
			
 
				-
			
 
				-int ocfs2_request_mount_vote(struct ocfs2_super *osb);
			
 
				-int ocfs2_request_umount_vote(struct ocfs2_super *osb);
			
 
				-int ocfs2_register_net_handlers(struct ocfs2_super *osb);
			
 
				-void ocfs2_unregister_net_handlers(struct ocfs2_super *osb);
			
 
				-
			
 
				-void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb,
			
 
				-					int node_num);
			
 
				-#endif
			
 
				+#endif /* OCFS2_LOCKS_H */
			
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -168,7 +168,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 
				 	 * node. Taking the data lock will also ensure that we don't
			
 
				 	 * attempt page truncation as part of a downconvert.
			
 
				 	 */
			
 
				-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
			
 
				+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
@@ -181,21 +181,12 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 
				 	 */
			
 
				 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				-	ret = ocfs2_data_lock(inode, 1);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_meta_unlock;
			
 
				-	}
			
 
				-
			
 
				 	ret = __ocfs2_page_mkwrite(inode, di_bh, page);
			
 
				 
			
 
				-	ocfs2_data_unlock(inode, 1);
			
 
				-
			
 
				-out_meta_unlock:
			
 
				 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				 	brelse(di_bh);
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 out:
			
 
				 	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
			
@@ -214,13 +205,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 
				 {
			
 
				 	int ret = 0, lock_level = 0;
			
 
				 
			
 
				-	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
			
 
				+	ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode,
			
 
				 				    file->f_vfsmnt, &lock_level);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
 
				 	}
			
 
				-	ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
			
 
				+	ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
			
 
				 out:
			
 
				 	vma->vm_ops = &ocfs2_file_vm_ops;
			
 
				 	vma->vm_flags |= VM_CAN_NONLINEAR;
			
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -60,7 +60,6 @@
 
				 #include "symlink.h"
			
 
				 #include "sysfile.h"
			
 
				 #include "uptodate.h"
			
 
				-#include "vote.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
@@ -116,7 +115,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
				 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
			
 
				 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(dir, NULL, 0);
			
 
				+	status = ocfs2_inode_lock(dir, NULL, 0);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -129,7 +128,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 
				 	if (status < 0)
			
 
				 		goto bail_add;
			
 
				 
			
 
				-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
			
 
				+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		ret = ERR_PTR(-EACCES);
			
 
				 		goto bail_unlock;
			
@@ -176,8 +175,8 @@ bail_unlock:
 
				 	/* Don't drop the cluster lock until *after* the d_add --
			
 
				 	 * unlink on another node will message us to remove that
			
 
				 	 * dentry under this lock so otherwise we can race this with
			
 
				-	 * the vote thread and have a stale dentry. */
			
 
				-	ocfs2_meta_unlock(dir, 0);
			
 
				+	 * the downconvert thread and have a stale dentry. */
			
 
				+	ocfs2_inode_unlock(dir, 0);
			
 
				 
			
 
				 bail:
			
 
				 
			
@@ -209,7 +208,7 @@ static int ocfs2_mknod(struct inode *dir,
 
				 	/* get our super block */
			
 
				 	osb = OCFS2_SB(dir->i_sb);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
			
 
				+	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -323,7 +322,7 @@ leave:
 
				 	if (handle)
			
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				-	ocfs2_meta_unlock(dir, 1);
			
 
				+	ocfs2_inode_unlock(dir, 1);
			
 
				 
			
 
				 	if (status == -ENOSPC)
			
 
				 		mlog(0, "Disk is full\n");
			
@@ -553,7 +552,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 	if (S_ISDIR(inode->i_mode))
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	err = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
			
 
				+	err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
			
 
				 	if (err < 0) {
			
 
				 		if (err != -ENOENT)
			
 
				 			mlog_errno(err);
			
@@ -578,7 +577,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	err = ocfs2_meta_lock(inode, &fe_bh, 1);
			
 
				+	err = ocfs2_inode_lock(inode, &fe_bh, 1);
			
 
				 	if (err < 0) {
			
 
				 		if (err != -ENOENT)
			
 
				 			mlog_errno(err);
			
@@ -643,10 +642,10 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 out_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 out_unlock_inode:
			
 
				-	ocfs2_meta_unlock(inode, 1);
			
 
				+	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 out:
			
 
				-	ocfs2_meta_unlock(dir, 1);
			
 
				+	ocfs2_inode_unlock(dir, 1);
			
 
				 
			
 
				 	if (de_bh)
			
 
				 		brelse(de_bh);
			
@@ -720,7 +719,7 @@ static int ocfs2_unlink(struct inode *dir,
 
				 		return -EPERM;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_meta_lock(dir, &parent_node_bh, 1);
			
 
				+	status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -745,7 +744,7 @@ static int ocfs2_unlink(struct inode *dir,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_meta_lock(inode, &fe_bh, 1);
			
 
				+	status = ocfs2_inode_lock(inode, &fe_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir,
 
				 
			
 
				 	status = ocfs2_remote_dentry_delete(dentry);
			
 
				 	if (status < 0) {
			
 
				-		/* This vote should succeed under all normal
			
 
				+		/* This remote delete should succeed under all normal
			
 
				 		 * circumstances. */
			
 
				 		mlog_errno(status);
			
 
				 		goto leave;
			
@@ -841,13 +840,13 @@ leave:
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 	if (child_locked)
			
 
				-		ocfs2_meta_unlock(inode, 1);
			
 
				+		ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				-	ocfs2_meta_unlock(dir, 1);
			
 
				+	ocfs2_inode_unlock(dir, 1);
			
 
				 
			
 
				 	if (orphan_dir) {
			
 
				 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
			
 
				-		ocfs2_meta_unlock(orphan_dir, 1);
			
 
				+		ocfs2_inode_unlock(orphan_dir, 1);
			
 
				 		mutex_unlock(&orphan_dir->i_mutex);
			
 
				 		iput(orphan_dir);
			
 
				 	}
			
@@ -908,7 +907,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 
				 			inode1 = tmpinode;
			
 
				 		}
			
 
				 		/* lock id2 */
			
 
				-		status = ocfs2_meta_lock(inode2, bh2, 1);
			
 
				+		status = ocfs2_inode_lock(inode2, bh2, 1);
			
 
				 		if (status < 0) {
			
 
				 			if (status != -ENOENT)
			
 
				 				mlog_errno(status);
			
@@ -917,14 +916,14 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 
				 	}
			
 
				 
			
 
				 	/* lock id1 */
			
 
				-	status = ocfs2_meta_lock(inode1, bh1, 1);
			
 
				+	status = ocfs2_inode_lock(inode1, bh1, 1);
			
 
				 	if (status < 0) {
			
 
				 		/*
			
 
				 		 * An error return must mean that no cluster locks
			
 
				 		 * were held on function exit.
			
 
				 		 */
			
 
				 		if (oi1->ip_blkno != oi2->ip_blkno)
			
 
				-			ocfs2_meta_unlock(inode2, 1);
			
 
				+			ocfs2_inode_unlock(inode2, 1);
			
 
				 
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -937,10 +936,10 @@ bail:
 
				 
			
 
				 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
			
 
				 {
			
 
				-	ocfs2_meta_unlock(inode1, 1);
			
 
				+	ocfs2_inode_unlock(inode1, 1);
			
 
				 
			
 
				 	if (inode1 != inode2)
			
 
				-		ocfs2_meta_unlock(inode2, 1);
			
 
				+		ocfs2_inode_unlock(inode2, 1);
			
 
				 }
			
 
				 
			
 
				 static int ocfs2_rename(struct inode *old_dir,
			
@@ -1031,10 +1030,11 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 
			
 
				 	/*
			
 
				 	 * Aside from allowing a meta data update, the locking here
			
 
				-	 * also ensures that the vote thread on other nodes won't have
			
 
				-	 * to concurrently downconvert the inode and the dentry locks.
			
 
				+	 * also ensures that the downconvert thread on other nodes
			
 
				+	 * won't have to concurrently downconvert the inode and the
			
 
				+	 * dentry locks.
			
 
				 	 */
			
 
				-	status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1);
			
 
				+	status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -1143,7 +1143,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 			goto bail;
			
 
				 		}
			
 
				 
			
 
				-		status = ocfs2_meta_lock(new_inode, &newfe_bh, 1);
			
 
				+		status = ocfs2_inode_lock(new_inode, &newfe_bh, 1);
			
 
				 		if (status < 0) {
			
 
				 			if (status != -ENOENT)
			
 
				 				mlog_errno(status);
			
@@ -1355,14 +1355,14 @@ bail:
 
				 		ocfs2_double_unlock(old_dir, new_dir);
			
 
				 
			
 
				 	if (old_child_locked)
			
 
				-		ocfs2_meta_unlock(old_inode, 1);
			
 
				+		ocfs2_inode_unlock(old_inode, 1);
			
 
				 
			
 
				 	if (new_child_locked)
			
 
				-		ocfs2_meta_unlock(new_inode, 1);
			
 
				+		ocfs2_inode_unlock(new_inode, 1);
			
 
				 
			
 
				 	if (orphan_dir) {
			
 
				 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
			
 
				-		ocfs2_meta_unlock(orphan_dir, 1);
			
 
				+		ocfs2_inode_unlock(orphan_dir, 1);
			
 
				 		mutex_unlock(&orphan_dir->i_mutex);
			
 
				 		iput(orphan_dir);
			
 
				 	}
			
@@ -1530,7 +1530,7 @@ static int ocfs2_symlink(struct inode *dir,
 
				 	credits = ocfs2_calc_symlink_credits(sb);
			
 
				 
			
 
				 	/* lock the parent directory */
			
 
				-	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
			
 
				+	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		if (status != -ENOENT)
			
 
				 			mlog_errno(status);
			
@@ -1657,7 +1657,7 @@ bail:
 
				 	if (handle)
			
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				-	ocfs2_meta_unlock(dir, 1);
			
 
				+	ocfs2_inode_unlock(dir, 1);
			
 
				 
			
 
				 	if (new_fe_bh)
			
 
				 		brelse(new_fe_bh);
			
@@ -1735,7 +1735,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
				 
			
 
				 	mutex_lock(&orphan_dir_inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
			
 
				+	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto leave;
			
@@ -1745,7 +1745,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
				 					      orphan_dir_bh, name,
			
 
				 					      OCFS2_ORPHAN_NAMELEN, de_bh);
			
 
				 	if (status < 0) {
			
 
				-		ocfs2_meta_unlock(orphan_dir_inode, 1);
			
 
				+		ocfs2_inode_unlock(orphan_dir_inode, 1);
			
 
				 
			
 
				 		mlog_errno(status);
			
 
				 		goto leave;
			
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -101,6 +101,7 @@ enum ocfs2_unlock_action {
 
				 					       * about to be
			
 
				 					       * dropped. */
			
 
				 #define OCFS2_LOCK_QUEUED        (0x00000100) /* queued for downconvert */
			
 
				+#define OCFS2_LOCK_NOCACHE       (0x00000200) /* don't use a holder count */
			
 
				 
			
 
				 struct ocfs2_lock_res_ops;
			
 
				 
			
@@ -170,6 +171,7 @@ enum ocfs2_mount_options
 
				 	OCFS2_MOUNT_NOINTR  = 1 << 2,   /* Don't catch signals */
			
 
				 	OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
			
 
				 	OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
			
 
				+	OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
			
 
				 };
			
 
				 
			
 
				 #define OCFS2_OSB_SOFT_RO	0x0001
			
@@ -189,9 +191,7 @@ struct ocfs2_super
 
				 	struct ocfs2_slot_info *slot_info;
			
 
				 
			
 
				 	spinlock_t node_map_lock;
			
 
				-	struct ocfs2_node_map mounted_map;
			
 
				 	struct ocfs2_node_map recovery_map;
			
 
				-	struct ocfs2_node_map umount_map;
			
 
				 
			
 
				 	u64 root_blkno;
			
 
				 	u64 system_dir_blkno;
			
@@ -231,7 +231,9 @@ struct ocfs2_super
 
				 	wait_queue_head_t checkpoint_event;
			
 
				 	atomic_t needs_checkpoint;
			
 
				 	struct ocfs2_journal *journal;
			
 
				+	unsigned long osb_commit_interval;
			
 
				 
			
 
				+	int local_alloc_size;
			
 
				 	enum ocfs2_local_alloc_state local_alloc_state;
			
 
				 	struct buffer_head *local_alloc_bh;
			
 
				 	u64 la_last_gd;
			
@@ -254,28 +256,21 @@ struct ocfs2_super
 
				 
			
 
				 	wait_queue_head_t recovery_event;
			
 
				 
			
 
				-	spinlock_t vote_task_lock;
			
 
				-	struct task_struct *vote_task;
			
 
				-	wait_queue_head_t vote_event;
			
 
				-	unsigned long vote_wake_sequence;
			
 
				-	unsigned long vote_work_sequence;
			
 
				+	spinlock_t dc_task_lock;
			
 
				+	struct task_struct *dc_task;
			
 
				+	wait_queue_head_t dc_event;
			
 
				+	unsigned long dc_wake_sequence;
			
 
				+	unsigned long dc_work_sequence;
			
 
				 
			
 
				+	/*
			
 
				+	 * Any thread can add locks to the list, but the downconvert
			
 
				+	 * thread is the only one allowed to remove locks. Any change
			
 
				+	 * to this rule requires updating
			
 
				+	 * ocfs2_downconvert_thread_do_work().
			
 
				+	 */
			
 
				 	struct list_head blocked_lock_list;
			
 
				 	unsigned long blocked_lock_count;
			
 
				 
			
 
				-	struct list_head vote_list;
			
 
				-	int vote_count;
			
 
				-
			
 
				-	u32 net_key;
			
 
				-	spinlock_t net_response_lock;
			
 
				-	unsigned int net_response_ids;
			
 
				-	struct list_head net_response_list;
			
 
				-
			
 
				-	struct o2hb_callback_func osb_hb_up;
			
 
				-	struct o2hb_callback_func osb_hb_down;
			
 
				-
			
 
				-	struct list_head	osb_net_handlers;
			
 
				-
			
 
				 	wait_queue_head_t		osb_mount_event;
			
 
				 
			
 
				 	/* Truncate log info */
			
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -231,6 +231,20 @@ struct ocfs2_space_resv {
 
				 #define OCFS2_IOC_RESVSP64	_IOW ('X', 42, struct ocfs2_space_resv)
			
 
				 #define OCFS2_IOC_UNRESVSP64	_IOW ('X', 43, struct ocfs2_space_resv)
			
 
				 
			
 
				+/* Used to pass group descriptor data when online resize is done */
			
 
				+struct ocfs2_new_group_input {
			
 
				+	__u64 group;		/* Group descriptor's blkno. */
			
 
				+	__u32 clusters;		/* Total number of clusters in this group */
			
 
				+	__u32 frees;		/* Total free clusters in this group */
			
 
				+	__u16 chain;		/* Chain for this group */
			
 
				+	__u16 reserved1;
			
 
				+	__u32 reserved2;
			
 
				+};
			
 
				+
			
 
				+#define OCFS2_IOC_GROUP_EXTEND	_IOW('o', 1, int)
			
 
				+#define OCFS2_IOC_GROUP_ADD	_IOW('o', 2,struct ocfs2_new_group_input)
			
 
				+#define OCFS2_IOC_GROUP_ADD64	_IOW('o', 3,struct ocfs2_new_group_input)
			
 
				+
			
 
				 /*
			
 
				  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
			
 
				  */
			
@@ -256,6 +270,14 @@ struct ocfs2_space_resv {
 
				 /* Journal limits (in bytes) */
			
 
				 #define OCFS2_MIN_JOURNAL_SIZE		(4 * 1024 * 1024)
			
 
				 
			
 
				+/*
			
 
				+ * Default local alloc size (in megabytes)
			
 
				+ *
			
 
				+ * The value chosen should be such that most allocations, including new
			
 
				+ * block groups, use local alloc.
			
 
				+ */
			
 
				+#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE	8
			
 
				+
			
 
				 struct ocfs2_system_inode_info {
			
 
				 	char	*si_name;
			
 
				 	int	si_iflags;
			
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -45,6 +45,7 @@ enum ocfs2_lock_type {
 
				 	OCFS2_LOCK_TYPE_RW,
			
 
				 	OCFS2_LOCK_TYPE_DENTRY,
			
 
				 	OCFS2_LOCK_TYPE_OPEN,
			
 
				+	OCFS2_LOCK_TYPE_FLOCK,
			
 
				 	OCFS2_NUM_LOCK_TYPES
			
 
				 };
			
 
				 
			
@@ -73,6 +74,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 
				 		case OCFS2_LOCK_TYPE_OPEN:
			
 
				 			c = 'O';
			
 
				 			break;
			
 
				+		case OCFS2_LOCK_TYPE_FLOCK:
			
 
				+			c = 'F';
			
 
				+			break;
			
 
				 		default:
			
 
				 			c = '\0';
			
 
				 	}
			
@@ -90,6 +94,7 @@ static char *ocfs2_lock_type_strings[] = {
 
				 	[OCFS2_LOCK_TYPE_RW] = "Write/Read",
			
 
				 	[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
			
 
				 	[OCFS2_LOCK_TYPE_OPEN] = "Open",
			
 
				+	[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
			
 
				 };
			
 
				 
			
 
				 static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
			
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -0,0 +1,634 @@
 
				+/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				+ * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				+ *
			
 
				+ * resize.c
			
 
				+ *
			
 
				+ * volume resize.
			
 
				+ * Inspired by ext3/resize.c.
			
 
				+ *
			
 
				+ * Copyright (C) 2007 Oracle.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License as published by the Free Software Foundation; either
			
 
				+ * version 2 of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#define MLOG_MASK_PREFIX ML_DISK_ALLOC
			
 
				+#include <cluster/masklog.h>
			
 
				+
			
 
				+#include "ocfs2.h"
			
 
				+
			
 
				+#include "alloc.h"
			
 
				+#include "dlmglue.h"
			
 
				+#include "inode.h"
			
 
				+#include "journal.h"
			
 
				+#include "super.h"
			
 
				+#include "sysfile.h"
			
 
				+#include "uptodate.h"
			
 
				+
			
 
				+#include "buffer_head_io.h"
			
 
				+#include "suballoc.h"
			
 
				+#include "resize.h"
			
 
				+
			
 
				+/*
			
 
				+ * Check whether there are new backup superblocks exist
			
 
				+ * in the last group. If there are some, mark them or clear
			
 
				+ * them in the bitmap.
			
 
				+ *
			
 
				+ * Return how many backups we find in the last group.
			
 
				+ */
			
 
				+static u16 ocfs2_calc_new_backup_super(struct inode *inode,
			
 
				+				       struct ocfs2_group_desc *gd,
			
 
				+				       int new_clusters,
			
 
				+				       u32 first_new_cluster,
			
 
				+				       u16 cl_cpg,
			
 
				+				       int set)
			
 
				+{
			
 
				+	int i;
			
 
				+	u16 backups = 0;
			
 
				+	u32 cluster;
			
 
				+	u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno);
			
 
				+
			
 
				+	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
			
 
				+		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
			
 
				+		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
			
 
				+
			
 
				+		gd_blkno = ocfs2_which_cluster_group(inode, cluster);
			
 
				+		if (gd_blkno < lgd_blkno)
			
 
				+			continue;
			
 
				+		else if (gd_blkno > lgd_blkno)
			
 
				+			break;
			
 
				+
			
 
				+		if (set)
			
 
				+			ocfs2_set_bit(cluster % cl_cpg,
			
 
				+				      (unsigned long *)gd->bg_bitmap);
			
 
				+		else
			
 
				+			ocfs2_clear_bit(cluster % cl_cpg,
			
 
				+					(unsigned long *)gd->bg_bitmap);
			
 
				+		backups++;
			
 
				+	}
			
 
				+
			
 
				+	mlog_exit_void();
			
 
				+	return backups;
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_update_last_group_and_inode(handle_t *handle,
			
 
				+					     struct inode *bm_inode,
			
 
				+					     struct buffer_head *bm_bh,
			
 
				+					     struct buffer_head *group_bh,
			
 
				+					     u32 first_new_cluster,
			
 
				+					     int new_clusters)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb);
			
 
				+	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data;
			
 
				+	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
			
 
				+	struct ocfs2_chain_rec *cr;
			
 
				+	struct ocfs2_group_desc *group;
			
 
				+	u16 chain, num_bits, backups = 0;
			
 
				+	u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
			
 
				+	u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
			
 
				+
			
 
				+	mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
			
 
				+		   new_clusters, first_new_cluster);
			
 
				+
			
 
				+	ret = ocfs2_journal_access(handle, bm_inode, group_bh,
			
 
				+				   OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	group = (struct ocfs2_group_desc *)group_bh->b_data;
			
 
				+
			
 
				+	/* update the group first. */
			
 
				+	num_bits = new_clusters * cl_bpc;
			
 
				+	le16_add_cpu(&group->bg_bits, num_bits);
			
 
				+	le16_add_cpu(&group->bg_free_bits_count, num_bits);
			
 
				+
			
 
				+	/*
			
 
				+	 * check whether there are some new backup superblocks exist in
			
 
				+	 * this group and update the group bitmap accordingly.
			
 
				+	 */
			
 
				+	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb,
			
 
				+				     OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
			
 
				+		backups = ocfs2_calc_new_backup_super(bm_inode,
			
 
				+						     group,
			
 
				+						     new_clusters,
			
 
				+						     first_new_cluster,
			
 
				+						     cl_cpg, 1);
			
 
				+		le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_journal_dirty(handle, group_bh);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_rollback;
			
 
				+	}
			
 
				+
			
 
				+	/* update the inode accordingly. */
			
 
				+	ret = ocfs2_journal_access(handle, bm_inode, bm_bh,
			
 
				+				   OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_rollback;
			
 
				+	}
			
 
				+
			
 
				+	chain = le16_to_cpu(group->bg_chain);
			
 
				+	cr = (&cl->cl_recs[chain]);
			
 
				+	le32_add_cpu(&cr->c_total, num_bits);
			
 
				+	le32_add_cpu(&cr->c_free, num_bits);
			
 
				+	le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits);
			
 
				+	le32_add_cpu(&fe->i_clusters, new_clusters);
			
 
				+
			
 
				+	if (backups) {
			
 
				+		le32_add_cpu(&cr->c_free, -1 * backups);
			
 
				+		le32_add_cpu(&fe->id1.bitmap1.i_used, backups);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&OCFS2_I(bm_inode)->ip_lock);
			
 
				+	OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
			
 
				+	le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits);
			
 
				+	spin_unlock(&OCFS2_I(bm_inode)->ip_lock);
			
 
				+	i_size_write(bm_inode, le64_to_cpu(fe->i_size));
			
 
				+
			
 
				+	ocfs2_journal_dirty(handle, bm_bh);
			
 
				+
			
 
				+out_rollback:
			
 
				+	if (ret < 0) {
			
 
				+		ocfs2_calc_new_backup_super(bm_inode,
			
 
				+					    group,
			
 
				+					    new_clusters,
			
 
				+					    first_new_cluster,
			
 
				+					    cl_cpg, 0);
			
 
				+		le16_add_cpu(&group->bg_free_bits_count, backups);
			
 
				+		le16_add_cpu(&group->bg_bits, -1 * num_bits);
			
 
				+		le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
			
 
				+	}
			
 
				+out:
			
 
				+	mlog_exit(ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int update_backups(struct inode * inode, u32 clusters, char *data)
			
 
				+{
			
 
				+	int i, ret = 0;
			
 
				+	u32 cluster;
			
 
				+	u64 blkno;
			
 
				+	struct buffer_head *backup = NULL;
			
 
				+	struct ocfs2_dinode *backup_di = NULL;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+
			
 
				+	/* calculate the real backups we need to update. */
			
 
				+	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
			
 
				+		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
			
 
				+		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
			
 
				+		if (cluster > clusters)
			
 
				+			break;
			
 
				+
			
 
				+		ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL);
			
 
				+		if (ret < 0) {
			
 
				+			mlog_errno(ret);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		memcpy(backup->b_data, data, inode->i_sb->s_blocksize);
			
 
				+
			
 
				+		backup_di = (struct ocfs2_dinode *)backup->b_data;
			
 
				+		backup_di->i_blkno = cpu_to_le64(blkno);
			
 
				+
			
 
				+		ret = ocfs2_write_super_or_backup(osb, backup);
			
 
				+		brelse(backup);
			
 
				+		backup = NULL;
			
 
				+		if (ret < 0) {
			
 
				+			mlog_errno(ret);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_update_super_and_backups(struct inode *inode,
			
 
				+					   int new_clusters)
			
 
				+{
			
 
				+	int ret;
			
 
				+	u32 clusters = 0;
			
 
				+	struct buffer_head *super_bh = NULL;
			
 
				+	struct ocfs2_dinode *super_di = NULL;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+
			
 
				+	/*
			
 
				+	 * update the superblock last.
			
 
				+	 * It doesn't matter if the write failed.
			
 
				+	 */
			
 
				+	ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO,
			
 
				+			       &super_bh, 0, NULL);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	super_di = (struct ocfs2_dinode *)super_bh->b_data;
			
 
				+	le32_add_cpu(&super_di->i_clusters, new_clusters);
			
 
				+	clusters = le32_to_cpu(super_di->i_clusters);
			
 
				+
			
 
				+	ret = ocfs2_write_super_or_backup(osb, super_bh);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB))
			
 
				+		ret = update_backups(inode, clusters, super_bh->b_data);
			
 
				+
			
 
				+out:
			
 
				+	brelse(super_bh);
			
 
				+	if (ret)
			
 
				+		printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s"
			
 
				+			" during fs resize. This condition is not fatal,"
			
 
				+			" but fsck.ocfs2 should be run to fix it\n",
			
 
				+			osb->dev_str);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Extend the filesystem to the new number of clusters specified.  This entry
			
 
				+ * point is only used to extend the current filesystem to the end of the last
			
 
				+ * existing group.
			
 
				+ */
			
 
				+int ocfs2_group_extend(struct inode * inode, int new_clusters)
			
 
				+{
			
 
				+	int ret;
			
 
				+	handle_t *handle;
			
 
				+	struct buffer_head *main_bm_bh = NULL;
			
 
				+	struct buffer_head *group_bh = NULL;
			
 
				+	struct inode *main_bm_inode = NULL;
			
 
				+	struct ocfs2_dinode *fe = NULL;
			
 
				+	struct ocfs2_group_desc *group = NULL;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+	u16 cl_bpc;
			
 
				+	u32 first_new_cluster;
			
 
				+	u64 lgd_blkno;
			
 
				+
			
 
				+	mlog_entry_void();
			
 
				+
			
 
				+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
			
 
				+		return -EROFS;
			
 
				+
			
 
				+	if (new_clusters < 0)
			
 
				+		return -EINVAL;
			
 
				+	else if (new_clusters == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	main_bm_inode = ocfs2_get_system_file_inode(osb,
			
 
				+						    GLOBAL_BITMAP_SYSTEM_INODE,
			
 
				+						    OCFS2_INVALID_SLOT);
			
 
				+	if (!main_bm_inode) {
			
 
				+		ret = -EINVAL;
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock(&main_bm_inode->i_mutex);
			
 
				+
			
 
				+	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_mutex;
			
 
				+	}
			
 
				+
			
 
				+	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
			
 
				+
			
 
				+	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
			
 
				+				 ocfs2_group_bitmap_size(osb->sb) * 8) {
			
 
				+		mlog(ML_ERROR, "The disk is too old and small. "
			
 
				+		     "Force to do offline resize.");
			
 
				+		ret = -EINVAL;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	if (!OCFS2_IS_VALID_DINODE(fe)) {
			
 
				+		OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
			
 
				+		ret = -EIO;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	first_new_cluster = le32_to_cpu(fe->i_clusters);
			
 
				+	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
			
 
				+					      first_new_cluster - 1);
			
 
				+
			
 
				+	ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED,
			
 
				+			       main_bm_inode);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	group = (struct ocfs2_group_desc *)group_bh->b_data;
			
 
				+
			
 
				+	ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
			
 
				+	if (ret) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
			
 
				+	if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
			
 
				+		le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	mlog(0, "extend the last group at %llu, new clusters = %d\n",
			
 
				+	     (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
			
 
				+
			
 
				+	handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		mlog_errno(PTR_ERR(handle));
			
 
				+		ret = -EINVAL;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/* update the last group descriptor and inode. */
			
 
				+	ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode,
			
 
				+						main_bm_bh, group_bh,
			
 
				+						first_new_cluster,
			
 
				+						new_clusters);
			
 
				+	if (ret) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_commit;
			
 
				+	}
			
 
				+
			
 
				+	ocfs2_update_super_and_backups(main_bm_inode, new_clusters);
			
 
				+
			
 
				+out_commit:
			
 
				+	ocfs2_commit_trans(osb, handle);
			
 
				+out_unlock:
			
 
				+	brelse(group_bh);
			
 
				+	brelse(main_bm_bh);
			
 
				+
			
 
				+	ocfs2_inode_unlock(main_bm_inode, 1);
			
 
				+
			
 
				+out_mutex:
			
 
				+	mutex_unlock(&main_bm_inode->i_mutex);
			
 
				+	iput(main_bm_inode);
			
 
				+
			
 
				+out:
			
 
				+	mlog_exit_void();
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_check_new_group(struct inode *inode,
			
 
				+				 struct ocfs2_dinode *di,
			
 
				+				 struct ocfs2_new_group_input *input,
			
 
				+				 struct buffer_head *group_bh)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct ocfs2_group_desc *gd;
			
 
				+	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
			
 
				+	unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
			
 
				+				le16_to_cpu(di->id2.i_chain.cl_bpc);
			
 
				+
			
 
				+
			
 
				+	gd = (struct ocfs2_group_desc *)group_bh->b_data;
			
 
				+
			
 
				+	ret = -EIO;
			
 
				+	if (!OCFS2_IS_VALID_GROUP_DESC(gd))
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno));
			
 
				+	else if (di->i_blkno != gd->bg_parent_dinode)
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
			
 
				+		     "pointer (%llu, expected %llu)\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
			
 
				+		     (unsigned long long)le64_to_cpu(di->i_blkno));
			
 
				+	else if (le16_to_cpu(gd->bg_bits) > max_bits)
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_bits));
			
 
				+	else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
			
 
				+		     "claims that %u are free\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_bits),
			
 
				+		     le16_to_cpu(gd->bg_free_bits_count));
			
 
				+	else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
			
 
				+		     "max bitmap bits of %u\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_bits),
			
 
				+		     8 * le16_to_cpu(gd->bg_size));
			
 
				+	else if (le16_to_cpu(gd->bg_chain) != input->chain)
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
			
 
				+		     "while input has %u set.\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_chain), input->chain);
			
 
				+	else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc)
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
			
 
				+		     "input has %u clusters set\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_bits), input->clusters);
			
 
				+	else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc)
			
 
				+		mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u "
			
 
				+		     "but it should have %u set\n",
			
 
				+		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
			
 
				+		     le16_to_cpu(gd->bg_bits),
			
 
				+		     input->frees * cl_bpc);
			
 
				+	else
			
 
				+		ret = 0;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int ocfs2_verify_group_and_input(struct inode *inode,
			
 
				+					struct ocfs2_dinode *di,
			
 
				+					struct ocfs2_new_group_input *input,
			
 
				+					struct buffer_head *group_bh)
			
 
				+{
			
 
				+	u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count);
			
 
				+	u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
			
 
				+	u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec);
			
 
				+	u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group);
			
 
				+	u32 total_clusters = le32_to_cpu(di->i_clusters);
			
 
				+	int ret = -EINVAL;
			
 
				+
			
 
				+	if (cluster < total_clusters)
			
 
				+		mlog(ML_ERROR, "add a group which is in the current volume.\n");
			
 
				+	else if (input->chain >= cl_count)
			
 
				+		mlog(ML_ERROR, "input chain exceeds the limit.\n");
			
 
				+	else if (next_free != cl_count && next_free != input->chain)
			
 
				+		mlog(ML_ERROR,
			
 
				+		     "the add group should be in chain %u\n", next_free);
			
 
				+	else if (total_clusters + input->clusters < total_clusters)
			
 
				+		mlog(ML_ERROR, "add group's clusters overflow.\n");
			
 
				+	else if (input->clusters > cl_cpg)
			
 
				+		mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n");
			
 
				+	else if (input->frees > input->clusters)
			
 
				+		mlog(ML_ERROR, "the free cluster exceeds the total clusters\n");
			
 
				+	else if (total_clusters % cl_cpg != 0)
			
 
				+		mlog(ML_ERROR,
			
 
				+		     "the last group isn't full. Use group extend first.\n");
			
 
				+	else if (input->group != ocfs2_which_cluster_group(inode, cluster))
			
 
				+		mlog(ML_ERROR, "group blkno is invalid\n");
			
 
				+	else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh)))
			
 
				+		mlog(ML_ERROR, "group descriptor check failed.\n");
			
 
				+	else
			
 
				+		ret = 0;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* Add a new group descriptor to global_bitmap. */
			
 
				+int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
			
 
				+{
			
 
				+	int ret;
			
 
				+	handle_t *handle;
			
 
				+	struct buffer_head *main_bm_bh = NULL;
			
 
				+	struct inode *main_bm_inode = NULL;
			
 
				+	struct ocfs2_dinode *fe = NULL;
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+	struct buffer_head *group_bh = NULL;
			
 
				+	struct ocfs2_group_desc *group = NULL;
			
 
				+	struct ocfs2_chain_list *cl;
			
 
				+	struct ocfs2_chain_rec *cr;
			
 
				+	u16 cl_bpc;
			
 
				+
			
 
				+	mlog_entry_void();
			
 
				+
			
 
				+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
			
 
				+		return -EROFS;
			
 
				+
			
 
				+	main_bm_inode = ocfs2_get_system_file_inode(osb,
			
 
				+						    GLOBAL_BITMAP_SYSTEM_INODE,
			
 
				+						    OCFS2_INVALID_SLOT);
			
 
				+	if (!main_bm_inode) {
			
 
				+		ret = -EINVAL;
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock(&main_bm_inode->i_mutex);
			
 
				+
			
 
				+	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_mutex;
			
 
				+	}
			
 
				+
			
 
				+	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
			
 
				+
			
 
				+	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
			
 
				+				 ocfs2_group_bitmap_size(osb->sb) * 8) {
			
 
				+		mlog(ML_ERROR, "The disk is too old and small."
			
 
				+		     " Force to do offline resize.");
			
 
				+		ret = -EINVAL;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL);
			
 
				+	if (ret < 0) {
			
 
				+		mlog(ML_ERROR, "Can't read the group descriptor # %llu "
			
 
				+		     "from the device.", (unsigned long long)input->group);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	ocfs2_set_new_buffer_uptodate(inode, group_bh);
			
 
				+
			
 
				+	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
			
 
				+	if (ret) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	mlog(0, "Add a new group  %llu in chain = %u, length = %u\n",
			
 
				+	     (unsigned long long)input->group, input->chain, input->clusters);
			
 
				+
			
 
				+	handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		mlog_errno(PTR_ERR(handle));
			
 
				+		ret = -EINVAL;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
			
 
				+	cl = &fe->id2.i_chain;
			
 
				+	cr = &cl->cl_recs[input->chain];
			
 
				+
			
 
				+	ret = ocfs2_journal_access(handle, main_bm_inode, group_bh,
			
 
				+				   OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_commit;
			
 
				+	}
			
 
				+
			
 
				+	group = (struct ocfs2_group_desc *)group_bh->b_data;
			
 
				+	group->bg_next_group = cr->c_blkno;
			
 
				+
			
 
				+	ret = ocfs2_journal_dirty(handle, group_bh);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_commit;
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh,
			
 
				+				   OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				+	if (ret < 0) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out_commit;
			
 
				+	}
			
 
				+
			
 
				+	if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) {
			
 
				+		le16_add_cpu(&cl->cl_next_free_rec, 1);
			
 
				+		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
			
 
				+	}
			
 
				+
			
 
				+	cr->c_blkno = le64_to_cpu(input->group);
			
 
				+	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
			
 
				+	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
			
 
				+
			
 
				+	le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc);
			
 
				+	le32_add_cpu(&fe->id1.bitmap1.i_used,
			
 
				+		     (input->clusters - input->frees) * cl_bpc);
			
 
				+	le32_add_cpu(&fe->i_clusters, input->clusters);
			
 
				+
			
 
				+	ocfs2_journal_dirty(handle, main_bm_bh);
			
 
				+
			
 
				+	spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
			
 
				+	OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
			
 
				+	le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits);
			
 
				+	spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
			
 
				+	i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));
			
 
				+
			
 
				+	ocfs2_update_super_and_backups(main_bm_inode, input->clusters);
			
 
				+
			
 
				+out_commit:
			
 
				+	ocfs2_commit_trans(osb, handle);
			
 
				+out_unlock:
			
 
				+	brelse(group_bh);
			
 
				+	brelse(main_bm_bh);
			
 
				+
			
 
				+	ocfs2_inode_unlock(main_bm_inode, 1);
			
 
				+
			
 
				+out_mutex:
			
 
				+	mutex_unlock(&main_bm_inode->i_mutex);
			
 
				+	iput(main_bm_inode);
			
 
				+
			
 
				+out:
			
 
				+	mlog_exit_void();
			
 
				+	return ret;
			
 
				+}
			
--- a/fs/ocfs2/resize.h
+++ b/fs/ocfs2/resize.h
@@ -0,0 +1,32 @@
 
				+/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				+ * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				+ *
			
 
				+ * resize.h
			
 
				+ *
			
 
				+ * Function prototypes
			
 
				+ *
			
 
				+ * Copyright (C) 2007 Oracle.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License as published by the Free Software Foundation; either
			
 
				+ * version 2 of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef OCFS2_RESIZE_H
			
 
				+#define OCFS2_RESIZE_H
			
 
				+
			
 
				+int ocfs2_group_extend(struct inode * inode, int new_clusters);
			
 
				+int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input);
			
 
				+
			
 
				+#endif /* OCFS2_RESIZE_H */
			
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 
				 			      s16 slot_num,
			
 
				 			      s16 node_num);
			
 
				 
			
 
				-/* Use the slot information we've collected to create a map of mounted
			
 
				- * nodes. Should be holding an EX on super block. assumes slot info is
			
 
				- * up to date. Note that we call this *after* we find a slot, so our
			
 
				- * own node should be set in the map too... */
			
 
				-void ocfs2_populate_mounted_map(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct ocfs2_slot_info *si = osb->slot_info;
			
 
				-
			
 
				-	spin_lock(&si->si_lock);
			
 
				-
			
 
				-	for (i = 0; i < si->si_size; i++)
			
 
				-		if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT)
			
 
				-			ocfs2_node_map_set_bit(osb, &osb->mounted_map,
			
 
				-					      si->si_global_node_nums[i]);
			
 
				-
			
 
				-	spin_unlock(&si->si_lock);
			
 
				-}
			
 
				-
			
 
				 /* post the slot information on disk into our slot_info struct. */
			
 
				 void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
			
 
				 {
			
--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 
				 void ocfs2_clear_slot(struct ocfs2_slot_info *si,
			
 
				 		      s16 slot_num);
			
 
				 
			
 
				-void ocfs2_populate_mounted_map(struct ocfs2_super *osb);
			
 
				-
			
 
				 static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
			
 
				 				      int slot_num)
			
 
				 {
			
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -101,8 +101,6 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
 
				 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
			
 
				 						   u64 bg_blkno,
			
 
				 						   u16 bg_bit_off);
			
 
				-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
			
 
				-					    u32 cluster);
			
 
				 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
			
 
				 						u64 data_blkno,
			
 
				 						u64 *bg_blkno,
			
@@ -114,7 +112,7 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
 
				 
			
 
				 	if (inode) {
			
 
				 		if (ac->ac_which != OCFS2_AC_USE_LOCAL)
			
 
				-			ocfs2_meta_unlock(inode, 1);
			
 
				+			ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 		mutex_unlock(&inode->i_mutex);
			
 
				 
			
@@ -131,9 +129,9 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 
				 }
			
 
				 
			
 
				 /* somewhat more expensive than our other checks, so use sparingly. */
			
 
				-static int ocfs2_check_group_descriptor(struct super_block *sb,
			
 
				-					struct ocfs2_dinode *di,
			
 
				-					struct ocfs2_group_desc *gd)
			
 
				+int ocfs2_check_group_descriptor(struct super_block *sb,
			
 
				+				 struct ocfs2_dinode *di,
			
 
				+				 struct ocfs2_group_desc *gd)
			
 
				 {
			
 
				 	unsigned int max_bits;
			
 
				 
			
@@ -412,7 +410,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
				 
			
 
				 	mutex_lock(&alloc_inode->i_mutex);
			
 
				 
			
 
				-	status = ocfs2_meta_lock(alloc_inode, &bh, 1);
			
 
				+	status = ocfs2_inode_lock(alloc_inode, &bh, 1);
			
 
				 	if (status < 0) {
			
 
				 		mutex_unlock(&alloc_inode->i_mutex);
			
 
				 		iput(alloc_inode);
			
@@ -1443,8 +1441,7 @@ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
 
				 
			
 
				 /* given a cluster offset, calculate which block group it belongs to
			
 
				  * and return that block offset. */
			
 
				-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
			
 
				-					    u32 cluster)
			
 
				+u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
			
 
				 {
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 	u32 group_no;
			
@@ -1519,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
 
				 		if (min_clusters > (osb->bitmap_cpg - 1)) {
			
 
				 			/* The only paths asking for contiguousness
			
 
				 			 * should know about this already. */
			
 
				-			mlog(ML_ERROR, "minimum allocation requested exceeds "
			
 
				-				       "group bitmap size!");
			
 
				+			mlog(ML_ERROR, "minimum allocation requested %u exceeds "
			
 
				+			     "group bitmap size %u!\n", min_clusters,
			
 
				+			     osb->bitmap_cpg);
			
 
				 			status = -ENOSPC;
			
 
				 			goto bail;
			
 
				 		}
			
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -147,4 +147,12 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
 
				 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
			
 
				 				      struct ocfs2_alloc_context *ac);
			
 
				 
			
 
				+/* given a cluster offset, calculate which block group it belongs to
			
 
				+ * and return that block offset. */
			
 
				+u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
			
 
				+
			
 
				+/* somewhat more expensive than our other checks, so use sparingly. */
			
 
				+int ocfs2_check_group_descriptor(struct super_block *sb,
			
 
				+				 struct ocfs2_dinode *di,
			
 
				+				 struct ocfs2_group_desc *gd);
			
 
				 #endif /* _CHAINALLOC_H_ */
			
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -65,7 +65,6 @@
 
				 #include "sysfile.h"
			
 
				 #include "uptodate.h"
			
 
				 #include "ver.h"
			
 
				-#include "vote.h"
			
 
				 
			
 
				 #include "buffer_head_io.h"
			
 
				 
			
@@ -84,9 +83,11 @@ MODULE_LICENSE("GPL");
 
				 
			
 
				 struct mount_options
			
 
				 {
			
 
				+	unsigned long	commit_interval;
			
 
				 	unsigned long	mount_opt;
			
 
				 	unsigned int	atime_quantum;
			
 
				 	signed short	slot;
			
 
				+	unsigned int	localalloc_opt;
			
 
				 };
			
 
				 
			
 
				 static int ocfs2_parse_options(struct super_block *sb, char *options,
			
@@ -150,6 +151,9 @@ enum {
 
				 	Opt_data_writeback,
			
 
				 	Opt_atime_quantum,
			
 
				 	Opt_slot,
			
 
				+	Opt_commit,
			
 
				+	Opt_localalloc,
			
 
				+	Opt_localflocks,
			
 
				 	Opt_err,
			
 
				 };
			
 
				 
			
@@ -165,6 +169,9 @@ static match_table_t tokens = {
 
				 	{Opt_data_writeback, "data=writeback"},
			
 
				 	{Opt_atime_quantum, "atime_quantum=%u"},
			
 
				 	{Opt_slot, "preferred_slot=%u"},
			
 
				+	{Opt_commit, "commit=%u"},
			
 
				+	{Opt_localalloc, "localalloc=%d"},
			
 
				+	{Opt_localflocks, "localflocks"},
			
 
				 	{Opt_err, NULL}
			
 
				 };
			
 
				 
			
@@ -213,7 +220,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
				 
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				-	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				+	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
			
 
				 	if (IS_ERR(new)) {
			
 
				 		status = PTR_ERR(new);
			
 
				 		mlog_errno(status);
			
@@ -221,7 +228,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
				 	}
			
 
				 	osb->root_inode = new;
			
 
				 
			
 
				-	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				+	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
			
 
				 	if (IS_ERR(new)) {
			
 
				 		status = PTR_ERR(new);
			
 
				 		mlog_errno(status);
			
@@ -443,6 +450,8 @@ unlock_osb:
 
				 		osb->s_mount_opt = parsed_options.mount_opt;
			
 
				 		osb->s_atime_quantum = parsed_options.atime_quantum;
			
 
				 		osb->preferred_slot = parsed_options.slot;
			
 
				+		if (parsed_options.commit_interval)
			
 
				+			osb->osb_commit_interval = parsed_options.commit_interval;
			
 
				 
			
 
				 		if (!ocfs2_is_hard_readonly(osb))
			
 
				 			ocfs2_set_journal_params(osb);
			
@@ -597,6 +606,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
				 	osb->s_mount_opt = parsed_options.mount_opt;
			
 
				 	osb->s_atime_quantum = parsed_options.atime_quantum;
			
 
				 	osb->preferred_slot = parsed_options.slot;
			
 
				+	osb->osb_commit_interval = parsed_options.commit_interval;
			
 
				+	osb->local_alloc_size = parsed_options.localalloc_opt;
			
 
				 
			
 
				 	sb->s_magic = OCFS2_SUPER_MAGIC;
			
 
				 
			
@@ -747,9 +758,11 @@ static int ocfs2_parse_options(struct super_block *sb,
 
				 	mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
			
 
				 		   options ? options : "(none)");
			
 
				 
			
 
				+	mopt->commit_interval = 0;
			
 
				 	mopt->mount_opt = 0;
			
 
				 	mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
			
 
				 	mopt->slot = OCFS2_INVALID_SLOT;
			
 
				+	mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
			
 
				 
			
 
				 	if (!options) {
			
 
				 		status = 1;
			
@@ -816,6 +829,41 @@ static int ocfs2_parse_options(struct super_block *sb,
 
				 			if (option)
			
 
				 				mopt->slot = (s16)option;
			
 
				 			break;
			
 
				+		case Opt_commit:
			
 
				+			option = 0;
			
 
				+			if (match_int(&args[0], &option)) {
			
 
				+				status = 0;
			
 
				+				goto bail;
			
 
				+			}
			
 
				+			if (option < 0)
			
 
				+				return 0;
			
 
				+			if (option == 0)
			
 
				+				option = JBD_DEFAULT_MAX_COMMIT_AGE;
			
 
				+			mopt->commit_interval = HZ * option;
			
 
				+			break;
			
 
				+		case Opt_localalloc:
			
 
				+			option = 0;
			
 
				+			if (match_int(&args[0], &option)) {
			
 
				+				status = 0;
			
 
				+				goto bail;
			
 
				+			}
			
 
				+			if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8))
			
 
				+				mopt->localalloc_opt = option;
			
 
				+			break;
			
 
				+		case Opt_localflocks:
			
 
				+			/*
			
 
				+			 * Changing this during remount could race
			
 
				+			 * flock() requests, or "unbalance" existing
			
 
				+			 * ones (e.g., a lock is taken in one mode but
			
 
				+			 * dropped in the other). If users care enough
			
 
				+			 * to flip locking modes during remount, we
			
 
				+			 * could add a "local" flag to individual
			
 
				+			 * flock structures for proper tracking of
			
 
				+			 * state.
			
 
				+			 */
			
 
				+			if (!is_remount)
			
 
				+				mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
			
 
				+			break;
			
 
				 		default:
			
 
				 			mlog(ML_ERROR,
			
 
				 			     "Unrecognized mount option \"%s\" "
			
@@ -864,6 +912,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 
				 	if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
			
 
				 		seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
			
 
				 
			
 
				+	if (osb->osb_commit_interval)
			
 
				+		seq_printf(s, ",commit=%u",
			
 
				+			   (unsigned) (osb->osb_commit_interval / HZ));
			
 
				+
			
 
				+	if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
			
 
				+		seq_printf(s, ",localalloc=%d", osb->local_alloc_size);
			
 
				+
			
 
				+	if (opts & OCFS2_MOUNT_LOCALFLOCKS)
			
 
				+		seq_printf(s, ",localflocks,");
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -965,7 +1023,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_meta_lock(inode, &bh, 0);
			
 
				+	status = ocfs2_inode_lock(inode, &bh, 0);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto bail;
			
@@ -989,7 +1047,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 
			
 
				 	brelse(bh);
			
 
				 
			
 
				-	ocfs2_meta_unlock(inode, 0);
			
 
				+	ocfs2_inode_unlock(inode, 0);
			
 
				 	status = 0;
			
 
				 bail:
			
 
				 	if (inode)
			
@@ -1020,8 +1078,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
 
				 	oi->ip_clusters = 0;
			
 
				 
			
 
				 	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
			
 
				-	ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
			
 
				-	ocfs2_lock_res_init_once(&oi->ip_data_lockres);
			
 
				+	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
			
 
				 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
			
 
				 
			
 
				 	ocfs2_metadata_cache_init(&oi->vfs_inode);
			
@@ -1117,25 +1174,12 @@ static int ocfs2_mount_volume(struct super_block *sb)
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_register_hb_callbacks(osb);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				-
			
 
				 	status = ocfs2_dlm_init(osb);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	/* requires vote_thread to be running. */
			
 
				-	status = ocfs2_register_net_handlers(osb);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				-
			
 
				 	status = ocfs2_super_lock(osb, 1);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
@@ -1150,8 +1194,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	ocfs2_populate_mounted_map(osb);
			
 
				-
			
 
				 	/* load all node-local system inodes */
			
 
				 	status = ocfs2_init_local_system_inodes(osb);
			
 
				 	if (status < 0) {
			
@@ -1174,15 +1216,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
 
				 	if (ocfs2_mount_local(osb))
			
 
				 		goto leave;
			
 
				 
			
 
				-	/* This should be sent *after* we recovered our journal as it
			
 
				-	 * will cause other nodes to unmark us as needing
			
 
				-	 * recovery. However, we need to send it *before* dropping the
			
 
				-	 * super block lock as otherwise their recovery threads might
			
 
				-	 * try to clean us up while we're live! */
			
 
				-	status = ocfs2_request_mount_vote(osb);
			
 
				-	if (status < 0)
			
 
				-		mlog_errno(status);
			
 
				-
			
 
				 leave:
			
 
				 	if (unlock_super)
			
 
				 		ocfs2_super_unlock(osb, 1);
			
@@ -1240,10 +1273,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
				 			mlog_errno(tmp);
			
 
				 			return;
			
 
				 		}
			
 
				-
			
 
				-		tmp = ocfs2_request_umount_vote(osb);
			
 
				-		if (tmp < 0)
			
 
				-			mlog_errno(tmp);
			
 
				 	}
			
 
				 
			
 
				 	if (osb->slot_num != OCFS2_INVALID_SLOT)
			
@@ -1254,13 +1283,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
				 
			
 
				 	ocfs2_release_system_inodes(osb);
			
 
				 
			
 
				-	if (osb->dlm) {
			
 
				-		ocfs2_unregister_net_handlers(osb);
			
 
				-
			
 
				+	if (osb->dlm)
			
 
				 		ocfs2_dlm_shutdown(osb);
			
 
				-	}
			
 
				-
			
 
				-	ocfs2_clear_hb_callbacks(osb);
			
 
				 
			
 
				 	debugfs_remove(osb->osb_debug_root);
			
 
				 
			
@@ -1315,7 +1339,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	int i, cbits, bbits;
			
 
				 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
			
 
				 	struct inode *inode = NULL;
			
 
				-	struct buffer_head *bitmap_bh = NULL;
			
 
				 	struct ocfs2_journal *journal;
			
 
				 	__le32 uuid_net_key;
			
 
				 	struct ocfs2_super *osb;
			
@@ -1344,19 +1367,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	osb->s_sectsize_bits = blksize_bits(sector_size);
			
 
				 	BUG_ON(!osb->s_sectsize_bits);
			
 
				 
			
 
				-	osb->net_response_ids = 0;
			
 
				-	spin_lock_init(&osb->net_response_lock);
			
 
				-	INIT_LIST_HEAD(&osb->net_response_list);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&osb->osb_net_handlers);
			
 
				 	init_waitqueue_head(&osb->recovery_event);
			
 
				-	spin_lock_init(&osb->vote_task_lock);
			
 
				-	init_waitqueue_head(&osb->vote_event);
			
 
				-	osb->vote_work_sequence = 0;
			
 
				-	osb->vote_wake_sequence = 0;
			
 
				+	spin_lock_init(&osb->dc_task_lock);
			
 
				+	init_waitqueue_head(&osb->dc_event);
			
 
				+	osb->dc_work_sequence = 0;
			
 
				+	osb->dc_wake_sequence = 0;
			
 
				 	INIT_LIST_HEAD(&osb->blocked_lock_list);
			
 
				 	osb->blocked_lock_count = 0;
			
 
				-	INIT_LIST_HEAD(&osb->vote_list);
			
 
				 	spin_lock_init(&osb->osb_lock);
			
 
				 
			
 
				 	atomic_set(&osb->alloc_stats.moves, 0);
			
@@ -1496,7 +1513,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
			
 
				-	osb->net_key = le32_to_cpu(uuid_net_key);
			
 
				 
			
 
				 	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
			
 
				 	osb->vol_label[63] = '\0';
			
@@ -1539,25 +1555,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
			
 
				-
			
 
				-	/* We don't have a cluster lock on the bitmap here because
			
 
				-	 * we're only interested in static information and the extra
			
 
				-	 * complexity at mount time isn't worht it. Don't pass the
			
 
				-	 * inode in to the read function though as we don't want it to
			
 
				-	 * be put in the cache. */
			
 
				-	status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
			
 
				-				  NULL);
			
 
				 	iput(inode);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				 
			
 
				-	di = (struct ocfs2_dinode *) bitmap_bh->b_data;
			
 
				-	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
			
 
				-	brelse(bitmap_bh);
			
 
				-	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
			
 
				-	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
			
 
				+	osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8;
			
 
				 
			
 
				 	status = ocfs2_init_slot_info(osb);
			
 
				 	if (status < 0) {
			
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -112,7 +112,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
			
 
				+	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE, type);
			
 
				 	if (IS_ERR(inode)) {
			
 
				 		mlog_errno(PTR_ERR(inode));
			
 
				 		inode = NULL;
			
--- a/fs/ocfs2/ver.c
+++ b/fs/ocfs2/ver.c
@@ -29,7 +29,7 @@
 
				 
			
 
				 #include "ver.h"
			
 
				 
			
 
				-#define OCFS2_BUILD_VERSION "1.3.3"
			
 
				+#define OCFS2_BUILD_VERSION "1.5.0"
			
 
				 
			
 
				 #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
			
 
				 
			
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -1,756 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * vote.c
			
 
				- *
			
 
				- * description here
			
 
				- *
			
 
				- * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/highmem.h>
			
 
				-#include <linux/kthread.h>
			
 
				-
			
 
				-#include <cluster/heartbeat.h>
			
 
				-#include <cluster/nodemanager.h>
			
 
				-#include <cluster/tcp.h>
			
 
				-
			
 
				-#include <dlm/dlmapi.h>
			
 
				-
			
 
				-#define MLOG_MASK_PREFIX ML_VOTE
			
 
				-#include <cluster/masklog.h>
			
 
				-
			
 
				-#include "ocfs2.h"
			
 
				-
			
 
				-#include "alloc.h"
			
 
				-#include "dlmglue.h"
			
 
				-#include "extent_map.h"
			
 
				-#include "heartbeat.h"
			
 
				-#include "inode.h"
			
 
				-#include "journal.h"
			
 
				-#include "slot_map.h"
			
 
				-#include "vote.h"
			
 
				-
			
 
				-#include "buffer_head_io.h"
			
 
				-
			
 
				-#define OCFS2_MESSAGE_TYPE_VOTE     (0x1)
			
 
				-#define OCFS2_MESSAGE_TYPE_RESPONSE (0x2)
			
 
				-struct ocfs2_msg_hdr
			
 
				-{
			
 
				-	__be32 h_response_id; /* used to lookup message handle on sending
			
 
				-			    * node. */
			
 
				-	__be32 h_request;
			
 
				-	__be64 h_blkno;
			
 
				-	__be32 h_generation;
			
 
				-	__be32 h_node_num;    /* node sending this particular message. */
			
 
				-};
			
 
				-
			
 
				-struct ocfs2_vote_msg
			
 
				-{
			
 
				-	struct ocfs2_msg_hdr v_hdr;
			
 
				-	__be32 v_reserved1;
			
 
				-} __attribute__ ((packed));
			
 
				-
			
 
				-/* Responses are given these values to maintain backwards
			
 
				- * compatibility with older ocfs2 versions */
			
 
				-#define OCFS2_RESPONSE_OK		(0)
			
 
				-#define OCFS2_RESPONSE_BUSY		(-16)
			
 
				-#define OCFS2_RESPONSE_BAD_MSG		(-22)
			
 
				-
			
 
				-struct ocfs2_response_msg
			
 
				-{
			
 
				-	struct ocfs2_msg_hdr r_hdr;
			
 
				-	__be32 r_response;
			
 
				-} __attribute__ ((packed));
			
 
				-
			
 
				-struct ocfs2_vote_work {
			
 
				-	struct list_head   w_list;
			
 
				-	struct ocfs2_vote_msg w_msg;
			
 
				-};
			
 
				-
			
 
				-enum ocfs2_vote_request {
			
 
				-	OCFS2_VOTE_REQ_INVALID = 0,
			
 
				-	OCFS2_VOTE_REQ_MOUNT,
			
 
				-	OCFS2_VOTE_REQ_UMOUNT,
			
 
				-	OCFS2_VOTE_REQ_LAST
			
 
				-};
			
 
				-
			
 
				-static inline int ocfs2_is_valid_vote_request(int request)
			
 
				-{
			
 
				-	return OCFS2_VOTE_REQ_INVALID < request &&
			
 
				-		request < OCFS2_VOTE_REQ_LAST;
			
 
				-}
			
 
				-
			
 
				-typedef void (*ocfs2_net_response_callback)(void *priv,
			
 
				-					    struct ocfs2_response_msg *resp);
			
 
				-struct ocfs2_net_response_cb {
			
 
				-	ocfs2_net_response_callback	rc_cb;
			
 
				-	void				*rc_priv;
			
 
				-};
			
 
				-
			
 
				-struct ocfs2_net_wait_ctxt {
			
 
				-	struct list_head        n_list;
			
 
				-	u32                     n_response_id;
			
 
				-	wait_queue_head_t       n_event;
			
 
				-	struct ocfs2_node_map   n_node_map;
			
 
				-	int                     n_response; /* an agreggate response. 0 if
			
 
				-					     * all nodes are go, < 0 on any
			
 
				-					     * negative response from any
			
 
				-					     * node or network error. */
			
 
				-	struct ocfs2_net_response_cb *n_callback;
			
 
				-};
			
 
				-
			
 
				-static void ocfs2_process_mount_request(struct ocfs2_super *osb,
			
 
				-					unsigned int node_num)
			
 
				-{
			
 
				-	mlog(0, "MOUNT vote from node %u\n", node_num);
			
 
				-	/* The other node only sends us this message when he has an EX
			
 
				-	 * on the superblock, so our recovery threads (if having been
			
 
				-	 * launched) are waiting on it.*/
			
 
				-	ocfs2_recovery_map_clear(osb, node_num);
			
 
				-	ocfs2_node_map_set_bit(osb, &osb->mounted_map, node_num);
			
 
				-
			
 
				-	/* We clear the umount map here because a node may have been
			
 
				-	 * previously mounted, safely unmounted but never stopped
			
 
				-	 * heartbeating - in which case we'd have a stale entry. */
			
 
				-	ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_process_umount_request(struct ocfs2_super *osb,
			
 
				-					 unsigned int node_num)
			
 
				-{
			
 
				-	mlog(0, "UMOUNT vote from node %u\n", node_num);
			
 
				-	ocfs2_node_map_clear_bit(osb, &osb->mounted_map, node_num);
			
 
				-	ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num);
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_process_vote(struct ocfs2_super *osb,
			
 
				-			       struct ocfs2_vote_msg *msg)
			
 
				-{
			
 
				-	int net_status, vote_response;
			
 
				-	unsigned int node_num;
			
 
				-	u64 blkno;
			
 
				-	enum ocfs2_vote_request request;
			
 
				-	struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
			
 
				-	struct ocfs2_response_msg response;
			
 
				-
			
 
				-	/* decode the network mumbo jumbo into local variables. */
			
 
				-	request = be32_to_cpu(hdr->h_request);
			
 
				-	blkno = be64_to_cpu(hdr->h_blkno);
			
 
				-	node_num = be32_to_cpu(hdr->h_node_num);
			
 
				-
			
 
				-	mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n",
			
 
				-	     request, (unsigned long long)blkno, node_num);
			
 
				-
			
 
				-	if (!ocfs2_is_valid_vote_request(request)) {
			
 
				-		mlog(ML_ERROR, "Invalid vote request %d from node %u\n",
			
 
				-		     request, node_num);
			
 
				-		vote_response = OCFS2_RESPONSE_BAD_MSG;
			
 
				-		goto respond;
			
 
				-	}
			
 
				-
			
 
				-	vote_response = OCFS2_RESPONSE_OK;
			
 
				-
			
 
				-	switch (request) {
			
 
				-	case OCFS2_VOTE_REQ_UMOUNT:
			
 
				-		ocfs2_process_umount_request(osb, node_num);
			
 
				-		goto respond;
			
 
				-	case OCFS2_VOTE_REQ_MOUNT:
			
 
				-		ocfs2_process_mount_request(osb, node_num);
			
 
				-		goto respond;
			
 
				-	default:
			
 
				-		/* avoids a gcc warning */
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-respond:
			
 
				-	/* Response struture is small so we just put it on the stack
			
 
				-	 * and stuff it inline. */
			
 
				-	memset(&response, 0, sizeof(struct ocfs2_response_msg));
			
 
				-	response.r_hdr.h_response_id = hdr->h_response_id;
			
 
				-	response.r_hdr.h_blkno = hdr->h_blkno;
			
 
				-	response.r_hdr.h_generation = hdr->h_generation;
			
 
				-	response.r_hdr.h_node_num = cpu_to_be32(osb->node_num);
			
 
				-	response.r_response = cpu_to_be32(vote_response);
			
 
				-
			
 
				-	net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
			
 
				-					osb->net_key,
			
 
				-					&response,
			
 
				-					sizeof(struct ocfs2_response_msg),
			
 
				-					node_num,
			
 
				-					NULL);
			
 
				-	/* We still want to error print for ENOPROTOOPT here. The
			
 
				-	 * sending node shouldn't have unregistered his net handler
			
 
				-	 * without sending an unmount vote 1st */
			
 
				-	if (net_status < 0
			
 
				-	    && net_status != -ETIMEDOUT
			
 
				-	    && net_status != -ENOTCONN)
			
 
				-		mlog(ML_ERROR, "message to node %u fails with error %d!\n",
			
 
				-		     node_num, net_status);
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	unsigned long processed;
			
 
				-	struct ocfs2_lock_res *lockres;
			
 
				-	struct ocfs2_vote_work *work;
			
 
				-
			
 
				-	mlog_entry_void();
			
 
				-
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				-	/* grab this early so we know to try again if a state change and
			
 
				-	 * wake happens part-way through our work  */
			
 
				-	osb->vote_work_sequence = osb->vote_wake_sequence;
			
 
				-
			
 
				-	processed = osb->blocked_lock_count;
			
 
				-	while (processed) {
			
 
				-		BUG_ON(list_empty(&osb->blocked_lock_list));
			
 
				-
			
 
				-		lockres = list_entry(osb->blocked_lock_list.next,
			
 
				-				     struct ocfs2_lock_res, l_blocked_list);
			
 
				-		list_del_init(&lockres->l_blocked_list);
			
 
				-		osb->blocked_lock_count--;
			
 
				-		spin_unlock(&osb->vote_task_lock);
			
 
				-
			
 
				-		BUG_ON(!processed);
			
 
				-		processed--;
			
 
				-
			
 
				-		ocfs2_process_blocked_lock(osb, lockres);
			
 
				-
			
 
				-		spin_lock(&osb->vote_task_lock);
			
 
				-	}
			
 
				-
			
 
				-	while (osb->vote_count) {
			
 
				-		BUG_ON(list_empty(&osb->vote_list));
			
 
				-		work = list_entry(osb->vote_list.next,
			
 
				-				  struct ocfs2_vote_work, w_list);
			
 
				-		list_del(&work->w_list);
			
 
				-		osb->vote_count--;
			
 
				-		spin_unlock(&osb->vote_task_lock);
			
 
				-
			
 
				-		ocfs2_process_vote(osb, &work->w_msg);
			
 
				-		kfree(work);
			
 
				-
			
 
				-		spin_lock(&osb->vote_task_lock);
			
 
				-	}
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				-
			
 
				-	mlog_exit_void();
			
 
				-}
			
 
				-
			
 
				-static int ocfs2_vote_thread_lists_empty(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int empty = 0;
			
 
				-
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				-	if (list_empty(&osb->blocked_lock_list) &&
			
 
				-	    list_empty(&osb->vote_list))
			
 
				-		empty = 1;
			
 
				-
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				-	return empty;
			
 
				-}
			
 
				-
			
 
				-static int ocfs2_vote_thread_should_wake(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int should_wake = 0;
			
 
				-
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				-	if (osb->vote_work_sequence != osb->vote_wake_sequence)
			
 
				-		should_wake = 1;
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				-
			
 
				-	return should_wake;
			
 
				-}
			
 
				-
			
 
				-int ocfs2_vote_thread(void *arg)
			
 
				-{
			
 
				-	int status = 0;
			
 
				-	struct ocfs2_super *osb = arg;
			
 
				-
			
 
				-	/* only quit once we've been asked to stop and there is no more
			
 
				-	 * work available */
			
 
				-	while (!(kthread_should_stop() &&
			
 
				-		 ocfs2_vote_thread_lists_empty(osb))) {
			
 
				-
			
 
				-		wait_event_interruptible(osb->vote_event,
			
 
				-					 ocfs2_vote_thread_should_wake(osb) ||
			
 
				-					 kthread_should_stop());
			
 
				-
			
 
				-		mlog(0, "vote_thread: awoken\n");
			
 
				-
			
 
				-		ocfs2_vote_thread_do_work(osb);
			
 
				-	}
			
 
				-
			
 
				-	osb->vote_task = NULL;
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id)
			
 
				-{
			
 
				-	struct ocfs2_net_wait_ctxt *w;
			
 
				-
			
 
				-	w = kzalloc(sizeof(*w), GFP_NOFS);
			
 
				-	if (!w) {
			
 
				-		mlog_errno(-ENOMEM);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&w->n_list);
			
 
				-	init_waitqueue_head(&w->n_event);
			
 
				-	ocfs2_node_map_init(&w->n_node_map);
			
 
				-	w->n_response_id = response_id;
			
 
				-	w->n_callback = NULL;
			
 
				-bail:
			
 
				-	return w;
			
 
				-}
			
 
				-
			
 
				-static unsigned int ocfs2_new_response_id(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	unsigned int ret;
			
 
				-
			
 
				-	spin_lock(&osb->net_response_lock);
			
 
				-	ret = ++osb->net_response_ids;
			
 
				-	spin_unlock(&osb->net_response_lock);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_dequeue_net_wait_ctxt(struct ocfs2_super *osb,
			
 
				-					struct ocfs2_net_wait_ctxt *w)
			
 
				-{
			
 
				-	spin_lock(&osb->net_response_lock);
			
 
				-	list_del(&w->n_list);
			
 
				-	spin_unlock(&osb->net_response_lock);
			
 
				-}
			
 
				-
			
 
				-static void ocfs2_queue_net_wait_ctxt(struct ocfs2_super *osb,
			
 
				-				      struct ocfs2_net_wait_ctxt *w)
			
 
				-{
			
 
				-	spin_lock(&osb->net_response_lock);
			
 
				-	list_add_tail(&w->n_list,
			
 
				-		      &osb->net_response_list);
			
 
				-	spin_unlock(&osb->net_response_lock);
			
 
				-}
			
 
				-
			
 
				-static void __ocfs2_mark_node_responded(struct ocfs2_super *osb,
			
 
				-					struct ocfs2_net_wait_ctxt *w,
			
 
				-					int node_num)
			
 
				-{
			
 
				-	assert_spin_locked(&osb->net_response_lock);
			
 
				-
			
 
				-	ocfs2_node_map_clear_bit(osb, &w->n_node_map, node_num);
			
 
				-	if (ocfs2_node_map_is_empty(osb, &w->n_node_map))
			
 
				-		wake_up(&w->n_event);
			
 
				-}
			
 
				-
			
 
				-/* Intended to be called from the node down callback, we fake remove
			
 
				- * the node from all our response contexts */
			
 
				-void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb,
			
 
				-					int node_num)
			
 
				-{
			
 
				-	struct list_head *p;
			
 
				-	struct ocfs2_net_wait_ctxt *w = NULL;
			
 
				-
			
 
				-	spin_lock(&osb->net_response_lock);
			
 
				-
			
 
				-	list_for_each(p, &osb->net_response_list) {
			
 
				-		w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list);
			
 
				-
			
 
				-		__ocfs2_mark_node_responded(osb, w, node_num);
			
 
				-	}
			
 
				-
			
 
				-	spin_unlock(&osb->net_response_lock);
			
 
				-}
			
 
				-
			
 
				-static int ocfs2_broadcast_vote(struct ocfs2_super *osb,
			
 
				-				struct ocfs2_vote_msg *request,
			
 
				-				unsigned int response_id,
			
 
				-				int *response,
			
 
				-				struct ocfs2_net_response_cb *callback)
			
 
				-{
			
 
				-	int status, i, remote_err;
			
 
				-	struct ocfs2_net_wait_ctxt *w = NULL;
			
 
				-	int dequeued = 0;
			
 
				-
			
 
				-	mlog_entry_void();
			
 
				-
			
 
				-	w = ocfs2_new_net_wait_ctxt(response_id);
			
 
				-	if (!w) {
			
 
				-		status = -ENOMEM;
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-	w->n_callback = callback;
			
 
				-
			
 
				-	/* we're pretty much ready to go at this point, and this fills
			
 
				-	 * in n_response which we need anyway... */
			
 
				-	ocfs2_queue_net_wait_ctxt(osb, w);
			
 
				-
			
 
				-	i = ocfs2_node_map_iterate(osb, &osb->mounted_map, 0);
			
 
				-
			
 
				-	while (i != O2NM_INVALID_NODE_NUM) {
			
 
				-		if (i != osb->node_num) {
			
 
				-			mlog(0, "trying to send request to node %i\n", i);
			
 
				-			ocfs2_node_map_set_bit(osb, &w->n_node_map, i);
			
 
				-
			
 
				-			remote_err = 0;
			
 
				-			status = o2net_send_message(OCFS2_MESSAGE_TYPE_VOTE,
			
 
				-						    osb->net_key,
			
 
				-						    request,
			
 
				-						    sizeof(*request),
			
 
				-						    i,
			
 
				-						    &remote_err);
			
 
				-			if (status == -ETIMEDOUT) {
			
 
				-				mlog(0, "remote node %d timed out!\n", i);
			
 
				-				status = -EAGAIN;
			
 
				-				goto bail;
			
 
				-			}
			
 
				-			if (remote_err < 0) {
			
 
				-				status = remote_err;
			
 
				-				mlog(0, "remote error %d on node %d!\n",
			
 
				-				     remote_err, i);
			
 
				-				mlog_errno(status);
			
 
				-				goto bail;
			
 
				-			}
			
 
				-			if (status < 0) {
			
 
				-				mlog_errno(status);
			
 
				-				goto bail;
			
 
				-			}
			
 
				-		}
			
 
				-		i++;
			
 
				-		i = ocfs2_node_map_iterate(osb, &osb->mounted_map, i);
			
 
				-		mlog(0, "next is %d, i am %d\n", i, osb->node_num);
			
 
				-	}
			
 
				-	mlog(0, "done sending, now waiting on responses...\n");
			
 
				-
			
 
				-	wait_event(w->n_event, ocfs2_node_map_is_empty(osb, &w->n_node_map));
			
 
				-
			
 
				-	ocfs2_dequeue_net_wait_ctxt(osb, w);
			
 
				-	dequeued = 1;
			
 
				-
			
 
				-	*response = w->n_response;
			
 
				-	status = 0;
			
 
				-bail:
			
 
				-	if (w) {
			
 
				-		if (!dequeued)
			
 
				-			ocfs2_dequeue_net_wait_ctxt(osb, w);
			
 
				-		kfree(w);
			
 
				-	}
			
 
				-
			
 
				-	mlog_exit(status);
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
			
 
				-						      u64 blkno,
			
 
				-						      unsigned int generation,
			
 
				-						      enum ocfs2_vote_request type)
			
 
				-{
			
 
				-	struct ocfs2_vote_msg *request;
			
 
				-	struct ocfs2_msg_hdr *hdr;
			
 
				-
			
 
				-	BUG_ON(!ocfs2_is_valid_vote_request(type));
			
 
				-
			
 
				-	request = kzalloc(sizeof(*request), GFP_NOFS);
			
 
				-	if (!request) {
			
 
				-		mlog_errno(-ENOMEM);
			
 
				-	} else {
			
 
				-		hdr = &request->v_hdr;
			
 
				-		hdr->h_node_num = cpu_to_be32(osb->node_num);
			
 
				-		hdr->h_request = cpu_to_be32(type);
			
 
				-		hdr->h_blkno = cpu_to_be64(blkno);
			
 
				-		hdr->h_generation = cpu_to_be32(generation);
			
 
				-	}
			
 
				-
			
 
				-	return request;
			
 
				-}
			
 
				-
			
 
				-/* Complete the buildup of a new vote request and process the
			
 
				- * broadcast return value. */
			
 
				-static int ocfs2_do_request_vote(struct ocfs2_super *osb,
			
 
				-				 struct ocfs2_vote_msg *request,
			
 
				-				 struct ocfs2_net_response_cb *callback)
			
 
				-{
			
 
				-	int status, response = -EBUSY;
			
 
				-	unsigned int response_id;
			
 
				-	struct ocfs2_msg_hdr *hdr;
			
 
				-
			
 
				-	response_id = ocfs2_new_response_id(osb);
			
 
				-
			
 
				-	hdr = &request->v_hdr;
			
 
				-	hdr->h_response_id = cpu_to_be32(response_id);
			
 
				-
			
 
				-	status = ocfs2_broadcast_vote(osb, request, response_id, &response,
			
 
				-				      callback);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = response;
			
 
				-bail:
			
 
				-
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-int ocfs2_request_mount_vote(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int status;
			
 
				-	struct ocfs2_vote_msg *request = NULL;
			
 
				-
			
 
				-	request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT);
			
 
				-	if (!request) {
			
 
				-		status = -ENOMEM;
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = -EAGAIN;
			
 
				-	while (status == -EAGAIN) {
			
 
				-		if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) &&
			
 
				-		    signal_pending(current)) {
			
 
				-			status = -ERESTARTSYS;
			
 
				-			goto bail;
			
 
				-		}
			
 
				-
			
 
				-		if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
			
 
				-					   osb->node_num)) {
			
 
				-			status = 0;
			
 
				-			goto bail;
			
 
				-		}
			
 
				-
			
 
				-		status = ocfs2_do_request_vote(osb, request, NULL);
			
 
				-	}
			
 
				-
			
 
				-bail:
			
 
				-	kfree(request);
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-int ocfs2_request_umount_vote(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int status;
			
 
				-	struct ocfs2_vote_msg *request = NULL;
			
 
				-
			
 
				-	request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT);
			
 
				-	if (!request) {
			
 
				-		status = -ENOMEM;
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = -EAGAIN;
			
 
				-	while (status == -EAGAIN) {
			
 
				-		/* Do not check signals on this vote... We really want
			
 
				-		 * this one to go all the way through. */
			
 
				-
			
 
				-		if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
			
 
				-					   osb->node_num)) {
			
 
				-			status = 0;
			
 
				-			goto bail;
			
 
				-		}
			
 
				-
			
 
				-		status = ocfs2_do_request_vote(osb, request, NULL);
			
 
				-	}
			
 
				-
			
 
				-bail:
			
 
				-	kfree(request);
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-/* TODO: This should eventually be a hash table! */
			
 
				-static struct ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(struct ocfs2_super *osb,
			
 
				-							       u32 response_id)
			
 
				-{
			
 
				-	struct list_head *p;
			
 
				-	struct ocfs2_net_wait_ctxt *w = NULL;
			
 
				-
			
 
				-	list_for_each(p, &osb->net_response_list) {
			
 
				-		w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list);
			
 
				-		if (response_id == w->n_response_id)
			
 
				-			break;
			
 
				-		w = NULL;
			
 
				-	}
			
 
				-
			
 
				-	return w;
			
 
				-}
			
 
				-
			
 
				-/* Translate response codes into local node errno values */
			
 
				-static inline int ocfs2_translate_response(int response)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	switch (response) {
			
 
				-	case OCFS2_RESPONSE_OK:
			
 
				-		ret = 0;
			
 
				-		break;
			
 
				-
			
 
				-	case OCFS2_RESPONSE_BUSY:
			
 
				-		ret = -EBUSY;
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		ret = -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int ocfs2_handle_response_message(struct o2net_msg *msg,
			
 
				-					 u32 len,
			
 
				-					 void *data, void **ret_data)
			
 
				-{
			
 
				-	unsigned int response_id, node_num;
			
 
				-	int response_status;
			
 
				-	struct ocfs2_super *osb = data;
			
 
				-	struct ocfs2_response_msg *resp;
			
 
				-	struct ocfs2_net_wait_ctxt * w;
			
 
				-	struct ocfs2_net_response_cb *resp_cb;
			
 
				-
			
 
				-	resp = (struct ocfs2_response_msg *) msg->buf;
			
 
				-
			
 
				-	response_id = be32_to_cpu(resp->r_hdr.h_response_id);
			
 
				-	node_num = be32_to_cpu(resp->r_hdr.h_node_num);
			
 
				-	response_status = 
			
 
				-		ocfs2_translate_response(be32_to_cpu(resp->r_response));
			
 
				-
			
 
				-	mlog(0, "received response message:\n");
			
 
				-	mlog(0, "h_response_id = %u\n", response_id);
			
 
				-	mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request));
			
 
				-	mlog(0, "h_blkno = %llu\n",
			
 
				-	     (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno));
			
 
				-	mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation));
			
 
				-	mlog(0, "h_node_num = %u\n", node_num);
			
 
				-	mlog(0, "r_response = %d\n", response_status);
			
 
				-
			
 
				-	spin_lock(&osb->net_response_lock);
			
 
				-	w = __ocfs2_find_net_wait_ctxt(osb, response_id);
			
 
				-	if (!w) {
			
 
				-		mlog(0, "request not found!\n");
			
 
				-		goto bail;
			
 
				-	}
			
 
				-	resp_cb = w->n_callback;
			
 
				-
			
 
				-	if (response_status && (!w->n_response)) {
			
 
				-		/* we only really need one negative response so don't
			
 
				-		 * set it twice. */
			
 
				-		w->n_response = response_status;
			
 
				-	}
			
 
				-
			
 
				-	if (resp_cb) {
			
 
				-		spin_unlock(&osb->net_response_lock);
			
 
				-
			
 
				-		resp_cb->rc_cb(resp_cb->rc_priv, resp);
			
 
				-
			
 
				-		spin_lock(&osb->net_response_lock);
			
 
				-	}
			
 
				-
			
 
				-	__ocfs2_mark_node_responded(osb, w, node_num);
			
 
				-bail:
			
 
				-	spin_unlock(&osb->net_response_lock);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int ocfs2_handle_vote_message(struct o2net_msg *msg,
			
 
				-				     u32 len,
			
 
				-				     void *data, void **ret_data)
			
 
				-{
			
 
				-	int status;
			
 
				-	struct ocfs2_super *osb = data;
			
 
				-	struct ocfs2_vote_work *work;
			
 
				-
			
 
				-	work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS);
			
 
				-	if (!work) {
			
 
				-		status = -ENOMEM;
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&work->w_list);
			
 
				-	memcpy(&work->w_msg, msg->buf, sizeof(struct ocfs2_vote_msg));
			
 
				-
			
 
				-	mlog(0, "scheduling vote request:\n");
			
 
				-	mlog(0, "h_response_id = %u\n",
			
 
				-	     be32_to_cpu(work->w_msg.v_hdr.h_response_id));
			
 
				-	mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request));
			
 
				-	mlog(0, "h_blkno = %llu\n",
			
 
				-	     (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno));
			
 
				-	mlog(0, "h_generation = %u\n",
			
 
				-	     be32_to_cpu(work->w_msg.v_hdr.h_generation));
			
 
				-	mlog(0, "h_node_num = %u\n",
			
 
				-	     be32_to_cpu(work->w_msg.v_hdr.h_node_num));
			
 
				-
			
 
				-	spin_lock(&osb->vote_task_lock);
			
 
				-	list_add_tail(&work->w_list, &osb->vote_list);
			
 
				-	osb->vote_count++;
			
 
				-	spin_unlock(&osb->vote_task_lock);
			
 
				-
			
 
				-	ocfs2_kick_vote_thread(osb);
			
 
				-
			
 
				-	status = 0;
			
 
				-bail:
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-void ocfs2_unregister_net_handlers(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	if (!osb->net_key)
			
 
				-		return;
			
 
				-
			
 
				-	o2net_unregister_handler_list(&osb->osb_net_handlers);
			
 
				-
			
 
				-	if (!list_empty(&osb->net_response_list))
			
 
				-		mlog(ML_ERROR, "net response list not empty!\n");
			
 
				-
			
 
				-	osb->net_key = 0;
			
 
				-}
			
 
				-
			
 
				-int ocfs2_register_net_handlers(struct ocfs2_super *osb)
			
 
				-{
			
 
				-	int status = 0;
			
 
				-
			
 
				-	if (ocfs2_mount_local(osb))
			
 
				-		return 0;
			
 
				-
			
 
				-	status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
			
 
				-					osb->net_key,
			
 
				-					sizeof(struct ocfs2_response_msg),
			
 
				-					ocfs2_handle_response_message,
			
 
				-					osb, NULL, &osb->osb_net_handlers);
			
 
				-	if (status) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = o2net_register_handler(OCFS2_MESSAGE_TYPE_VOTE,
			
 
				-					osb->net_key,
			
 
				-					sizeof(struct ocfs2_vote_msg),
			
 
				-					ocfs2_handle_vote_message,
			
 
				-					osb, NULL, &osb->osb_net_handlers);
			
 
				-	if (status) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-bail:
			
 
				-	if (status < 0)
			
 
				-		ocfs2_unregister_net_handlers(osb);
			
 
				-
			
 
				-	return status;
			
 
				-}
			
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -49,6 +49,7 @@ header-y += comstats.h
 
				 header-y += const.h
			
 
				 header-y += cgroupstats.h
			
 
				 header-y += cycx_cfm.h
			
 
				+header-y += dlmconstants.h
			
 
				 header-y += dlm_device.h
			
 
				 header-y += dlm_netlink.h
			
 
				 header-y += dm-ioctl.h
			
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -19,148 +19,12 @@
 
				  * routines and structures to use DLM lockspaces
			
 
				  */
			
 
				 
			
 
				-/*
			
 
				- * Lock Modes
			
 
				- */
			
 
				+/* Lock levels and flags are here */
			
 
				+#include <linux/dlmconstants.h>
			
 
				 
			
 
				-#define DLM_LOCK_IV		-1	/* invalid */
			
 
				-#define DLM_LOCK_NL		0	/* null */
			
 
				-#define DLM_LOCK_CR		1	/* concurrent read */
			
 
				-#define DLM_LOCK_CW		2	/* concurrent write */
			
 
				-#define DLM_LOCK_PR		3	/* protected read */
			
 
				-#define DLM_LOCK_PW		4	/* protected write */
			
 
				-#define DLM_LOCK_EX		5	/* exclusive */
			
 
				-
			
 
				-/*
			
 
				- * Maximum size in bytes of a dlm_lock name
			
 
				- */
			
 
				 
			
 
				 #define DLM_RESNAME_MAXLEN	64
			
 
				 
			
 
				-/*
			
 
				- * Flags to dlm_lock
			
 
				- *
			
 
				- * DLM_LKF_NOQUEUE
			
 
				- *
			
 
				- * Do not queue the lock request on the wait queue if it cannot be granted
			
 
				- * immediately.  If the lock cannot be granted because of this flag, DLM will
			
 
				- * either return -EAGAIN from the dlm_lock call or will return 0 from
			
 
				- * dlm_lock and -EAGAIN in the lock status block when the AST is executed.
			
 
				- *
			
 
				- * DLM_LKF_CANCEL
			
 
				- *
			
 
				- * Used to cancel a pending lock request or conversion.  A converting lock is
			
 
				- * returned to its previously granted mode.
			
 
				- *
			
 
				- * DLM_LKF_CONVERT
			
 
				- *
			
 
				- * Indicates a lock conversion request.  For conversions the name and namelen
			
 
				- * are ignored and the lock ID in the LKSB is used to identify the lock.
			
 
				- *
			
 
				- * DLM_LKF_VALBLK
			
 
				- *
			
 
				- * Requests DLM to return the current contents of the lock value block in the
			
 
				- * lock status block.  When this flag is set in a lock conversion from PW or EX
			
 
				- * modes, DLM assigns the value specified in the lock status block to the lock
			
 
				- * value block of the lock resource.  The LVB is a DLM_LVB_LEN size array
			
 
				- * containing application-specific information.
			
 
				- *
			
 
				- * DLM_LKF_QUECVT
			
 
				- *
			
 
				- * Force a conversion request to be queued, even if it is compatible with
			
 
				- * the granted modes of other locks on the same resource.
			
 
				- *
			
 
				- * DLM_LKF_IVVALBLK
			
 
				- *
			
 
				- * Invalidate the lock value block.
			
 
				- *
			
 
				- * DLM_LKF_CONVDEADLK
			
 
				- *
			
 
				- * Allows the dlm to resolve conversion deadlocks internally by demoting the
			
 
				- * granted mode of a converting lock to NL.  The DLM_SBF_DEMOTED flag is
			
 
				- * returned for a conversion that's been effected by this.
			
 
				- *
			
 
				- * DLM_LKF_PERSISTENT
			
 
				- *
			
 
				- * Only relevant to locks originating in userspace.  A persistent lock will not
			
 
				- * be removed if the process holding the lock exits.
			
 
				- *
			
 
				- * DLM_LKF_NODLCKWT
			
 
				- *
			
 
				- * Do not cancel the lock if it gets into conversion deadlock.
			
 
				- * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
			
 
				- *
			
 
				- * DLM_LKF_NODLCKBLK
			
 
				- *
			
 
				- * net yet implemented
			
 
				- *
			
 
				- * DLM_LKF_EXPEDITE
			
 
				- *
			
 
				- * Used only with new requests for NL mode locks.  Tells the lock manager
			
 
				- * to grant the lock, ignoring other locks in convert and wait queues.
			
 
				- *
			
 
				- * DLM_LKF_NOQUEUEBAST
			
 
				- *
			
 
				- * Send blocking AST's before returning -EAGAIN to the caller.  It is only
			
 
				- * used along with the NOQUEUE flag.  Blocking AST's are not sent for failed
			
 
				- * NOQUEUE requests otherwise.
			
 
				- *
			
 
				- * DLM_LKF_HEADQUE
			
 
				- *
			
 
				- * Add a lock to the head of the convert or wait queue rather than the tail.
			
 
				- *
			
 
				- * DLM_LKF_NOORDER
			
 
				- *
			
 
				- * Disregard the standard grant order rules and grant a lock as soon as it
			
 
				- * is compatible with other granted locks.
			
 
				- *
			
 
				- * DLM_LKF_ORPHAN
			
 
				- *
			
 
				- * not yet implemented
			
 
				- *
			
 
				- * DLM_LKF_ALTPR
			
 
				- *
			
 
				- * If the requested mode cannot be granted immediately, try to grant the lock
			
 
				- * in PR mode instead.  If this alternate mode is granted instead of the
			
 
				- * requested mode, DLM_SBF_ALTMODE is returned in the lksb.
			
 
				- *
			
 
				- * DLM_LKF_ALTCW
			
 
				- *
			
 
				- * The same as ALTPR, but the alternate mode is CW.
			
 
				- *
			
 
				- * DLM_LKF_FORCEUNLOCK
			
 
				- *
			
 
				- * Unlock the lock even if it is converting or waiting or has sublocks.
			
 
				- * Only really for use by the userland device.c code.
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-#define DLM_LKF_NOQUEUE		0x00000001
			
 
				-#define DLM_LKF_CANCEL		0x00000002
			
 
				-#define DLM_LKF_CONVERT		0x00000004
			
 
				-#define DLM_LKF_VALBLK		0x00000008
			
 
				-#define DLM_LKF_QUECVT		0x00000010
			
 
				-#define DLM_LKF_IVVALBLK	0x00000020
			
 
				-#define DLM_LKF_CONVDEADLK	0x00000040
			
 
				-#define DLM_LKF_PERSISTENT	0x00000080
			
 
				-#define DLM_LKF_NODLCKWT	0x00000100
			
 
				-#define DLM_LKF_NODLCKBLK	0x00000200
			
 
				-#define DLM_LKF_EXPEDITE	0x00000400
			
 
				-#define DLM_LKF_NOQUEUEBAST	0x00000800
			
 
				-#define DLM_LKF_HEADQUE		0x00001000
			
 
				-#define DLM_LKF_NOORDER		0x00002000
			
 
				-#define DLM_LKF_ORPHAN		0x00004000
			
 
				-#define DLM_LKF_ALTPR		0x00008000
			
 
				-#define DLM_LKF_ALTCW		0x00010000
			
 
				-#define DLM_LKF_FORCEUNLOCK	0x00020000
			
 
				-#define DLM_LKF_TIMEOUT		0x00040000
			
 
				-
			
 
				-/*
			
 
				- * Some return codes that are not in errno.h
			
 
				- */
			
 
				-
			
 
				-#define DLM_ECANCEL		0x10001
			
 
				-#define DLM_EUNLOCK		0x10002
			
 
				 
			
 
				 typedef void dlm_lockspace_t;
			
 
				 
			
--- a/include/linux/dlmconstants.h
+++ b/include/linux/dlmconstants.h
@@ -0,0 +1,159 @@
 
				+/******************************************************************************
			
 
				+*******************************************************************************
			
 
				+**
			
 
				+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
			
 
				+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
			
 
				+**
			
 
				+**  This copyrighted material is made available to anyone wishing to use,
			
 
				+**  modify, copy, or redistribute it subject to the terms and conditions
			
 
				+**  of the GNU General Public License v.2.
			
 
				+**
			
 
				+*******************************************************************************
			
 
				+******************************************************************************/
			
 
				+
			
 
				+#ifndef __DLMCONSTANTS_DOT_H__
			
 
				+#define __DLMCONSTANTS_DOT_H__
			
 
				+
			
 
				+/*
			
 
				+ * Constants used by DLM interface.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Lock Modes
			
 
				+ */
			
 
				+
			
 
				+#define DLM_LOCK_IV		(-1)	/* invalid */
			
 
				+#define DLM_LOCK_NL		0	/* null */
			
 
				+#define DLM_LOCK_CR		1	/* concurrent read */
			
 
				+#define DLM_LOCK_CW		2	/* concurrent write */
			
 
				+#define DLM_LOCK_PR		3	/* protected read */
			
 
				+#define DLM_LOCK_PW		4	/* protected write */
			
 
				+#define DLM_LOCK_EX		5	/* exclusive */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Flags to dlm_lock
			
 
				+ *
			
 
				+ * DLM_LKF_NOQUEUE
			
 
				+ *
			
 
				+ * Do not queue the lock request on the wait queue if it cannot be granted
			
 
				+ * immediately.  If the lock cannot be granted because of this flag, DLM will
			
 
				+ * either return -EAGAIN from the dlm_lock call or will return 0 from
			
 
				+ * dlm_lock and -EAGAIN in the lock status block when the AST is executed.
			
 
				+ *
			
 
				+ * DLM_LKF_CANCEL
			
 
				+ *
			
 
				+ * Used to cancel a pending lock request or conversion.  A converting lock is
			
 
				+ * returned to its previously granted mode.
			
 
				+ *
			
 
				+ * DLM_LKF_CONVERT
			
 
				+ *
			
 
				+ * Indicates a lock conversion request.  For conversions the name and namelen
			
 
				+ * are ignored and the lock ID in the LKSB is used to identify the lock.
			
 
				+ *
			
 
				+ * DLM_LKF_VALBLK
			
 
				+ *
			
 
				+ * Requests DLM to return the current contents of the lock value block in the
			
 
				+ * lock status block.  When this flag is set in a lock conversion from PW or EX
			
 
				+ * modes, DLM assigns the value specified in the lock status block to the lock
			
 
				+ * value block of the lock resource.  The LVB is a DLM_LVB_LEN size array
			
 
				+ * containing application-specific information.
			
 
				+ *
			
 
				+ * DLM_LKF_QUECVT
			
 
				+ *
			
 
				+ * Force a conversion request to be queued, even if it is compatible with
			
 
				+ * the granted modes of other locks on the same resource.
			
 
				+ *
			
 
				+ * DLM_LKF_IVVALBLK
			
 
				+ *
			
 
				+ * Invalidate the lock value block.
			
 
				+ *
			
 
				+ * DLM_LKF_CONVDEADLK
			
 
				+ *
			
 
				+ * Allows the dlm to resolve conversion deadlocks internally by demoting the
			
 
				+ * granted mode of a converting lock to NL.  The DLM_SBF_DEMOTED flag is
			
 
				+ * returned for a conversion that's been effected by this.
			
 
				+ *
			
 
				+ * DLM_LKF_PERSISTENT
			
 
				+ *
			
 
				+ * Only relevant to locks originating in userspace.  A persistent lock will not
			
 
				+ * be removed if the process holding the lock exits.
			
 
				+ *
			
 
				+ * DLM_LKF_NODLCKWT
			
 
				+ *
			
 
				+ * Do not cancel the lock if it gets into conversion deadlock.
			
 
				+ * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
			
 
				+ *
			
 
				+ * DLM_LKF_NODLCKBLK
			
 
				+ *
			
 
				+ * net yet implemented
			
 
				+ *
			
 
				+ * DLM_LKF_EXPEDITE
			
 
				+ *
			
 
				+ * Used only with new requests for NL mode locks.  Tells the lock manager
			
 
				+ * to grant the lock, ignoring other locks in convert and wait queues.
			
 
				+ *
			
 
				+ * DLM_LKF_NOQUEUEBAST
			
 
				+ *
			
 
				+ * Send blocking AST's before returning -EAGAIN to the caller.  It is only
			
 
				+ * used along with the NOQUEUE flag.  Blocking AST's are not sent for failed
			
 
				+ * NOQUEUE requests otherwise.
			
 
				+ *
			
 
				+ * DLM_LKF_HEADQUE
			
 
				+ *
			
 
				+ * Add a lock to the head of the convert or wait queue rather than the tail.
			
 
				+ *
			
 
				+ * DLM_LKF_NOORDER
			
 
				+ *
			
 
				+ * Disregard the standard grant order rules and grant a lock as soon as it
			
 
				+ * is compatible with other granted locks.
			
 
				+ *
			
 
				+ * DLM_LKF_ORPHAN
			
 
				+ *
			
 
				+ * not yet implemented
			
 
				+ *
			
 
				+ * DLM_LKF_ALTPR
			
 
				+ *
			
 
				+ * If the requested mode cannot be granted immediately, try to grant the lock
			
 
				+ * in PR mode instead.  If this alternate mode is granted instead of the
			
 
				+ * requested mode, DLM_SBF_ALTMODE is returned in the lksb.
			
 
				+ *
			
 
				+ * DLM_LKF_ALTCW
			
 
				+ *
			
 
				+ * The same as ALTPR, but the alternate mode is CW.
			
 
				+ *
			
 
				+ * DLM_LKF_FORCEUNLOCK
			
 
				+ *
			
 
				+ * Unlock the lock even if it is converting or waiting or has sublocks.
			
 
				+ * Only really for use by the userland device.c code.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define DLM_LKF_NOQUEUE		0x00000001
			
 
				+#define DLM_LKF_CANCEL		0x00000002
			
 
				+#define DLM_LKF_CONVERT		0x00000004
			
 
				+#define DLM_LKF_VALBLK		0x00000008
			
 
				+#define DLM_LKF_QUECVT		0x00000010
			
 
				+#define DLM_LKF_IVVALBLK	0x00000020
			
 
				+#define DLM_LKF_CONVDEADLK	0x00000040
			
 
				+#define DLM_LKF_PERSISTENT	0x00000080
			
 
				+#define DLM_LKF_NODLCKWT	0x00000100
			
 
				+#define DLM_LKF_NODLCKBLK	0x00000200
			
 
				+#define DLM_LKF_EXPEDITE	0x00000400
			
 
				+#define DLM_LKF_NOQUEUEBAST	0x00000800
			
 
				+#define DLM_LKF_HEADQUE		0x00001000
			
 
				+#define DLM_LKF_NOORDER		0x00002000
			
 
				+#define DLM_LKF_ORPHAN		0x00004000
			
 
				+#define DLM_LKF_ALTPR		0x00008000
			
 
				+#define DLM_LKF_ALTCW		0x00010000
			
 
				+#define DLM_LKF_FORCEUNLOCK	0x00020000
			
 
				+#define DLM_LKF_TIMEOUT		0x00040000
			
 
				+
			
 
				+/*
			
 
				+ * Some return codes that are not in errno.h
			
 
				+ */
			
 
				+
			
 
				+#define DLM_ECANCEL		0x10001
			
 
				+#define DLM_EUNLOCK		0x10002
			
 
				+
			
 
				+#endif  /* __DLMCONSTANTS_DOT_H__ */