16 years ago · 78609a812e
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
 
															 	.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
														
 
															 };
														
 
															+static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
														
 
															+					  u64 blkno)
														
 
															+{
														
 
															+	struct ocfs2_dx_root_block *dx_root = et->et_object;
														
 
															+
														
 
															+	dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
														
 
															+}
														
 
															+
														
 
															+static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
														
 
															+{
														
 
															+	struct ocfs2_dx_root_block *dx_root = et->et_object;
														
 
															+
														
 
															+	return le64_to_cpu(dx_root->dr_last_eb_blk);
														
 
															+}
														
 
															+
														
 
															+static void ocfs2_dx_root_update_clusters(struct inode *inode,
														
 
															+					  struct ocfs2_extent_tree *et,
														
 
															+					  u32 clusters)
														
 
															+{
														
 
															+	struct ocfs2_dx_root_block *dx_root = et->et_object;
														
 
															+
														
 
															+	le32_add_cpu(&dx_root->dr_clusters, clusters);
														
 
															+}
														
 
															+
														
 
															+static int ocfs2_dx_root_sanity_check(struct inode *inode,
														
 
															+				      struct ocfs2_extent_tree *et)
														
 
															+{
														
 
															+	struct ocfs2_dx_root_block *dx_root = et->et_object;
														
 
															+
														
 
															+	BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
														
 
															+{
														
 
															+	struct ocfs2_dx_root_block *dx_root = et->et_object;
														
 
															+
														
 
															+	et->et_root_el = &dx_root->dr_list;
														
 
															+}
														
 
															+
														
 
															+static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
														
 
															+	.eo_set_last_eb_blk	= ocfs2_dx_root_set_last_eb_blk,
														
 
															+	.eo_get_last_eb_blk	= ocfs2_dx_root_get_last_eb_blk,
														
 
															+	.eo_update_clusters	= ocfs2_dx_root_update_clusters,
														
 
															+	.eo_sanity_check	= ocfs2_dx_root_sanity_check,
														
 
															+	.eo_fill_root_el	= ocfs2_dx_root_fill_root_el,
														
 
															+};
														
 
															+
														
 
															 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
														
 
															 				     struct inode *inode,
														
 
															 				     struct buffer_head *bh,
														
@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 
															 				 &ocfs2_xattr_value_et_ops);
														
 
															 }
														
 
															+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
														
 
															+				    struct inode *inode,
														
 
															+				    struct buffer_head *bh)
														
 
															+{
														
 
															+	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
														
 
															+				 NULL, &ocfs2_dx_root_et_ops);
														
 
															+}
														
 
															+
														
 
															 static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
														
 
															 					    u64 new_last_eb_blk)
														
 
															 {
														
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
 
															 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
														
 
															 					struct inode *inode,
														
 
															 					struct ocfs2_xattr_value_buf *vb);
														
 
															+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
														
 
															+				    struct inode *inode,
														
 
															+				    struct buffer_head *bh);
														
 
															 /*
														
 
															  * Read an extent block into *bh.  If *bh is NULL, a bh will be
														
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
 
															 }
														
 
															 const struct address_space_operations ocfs2_aops = {
														
 
															-	.readpage	= ocfs2_readpage,
														
 
															-	.readpages	= ocfs2_readpages,
														
 
															-	.writepage	= ocfs2_writepage,
														
 
															-	.write_begin	= ocfs2_write_begin,
														
 
															-	.write_end	= ocfs2_write_end,
														
 
															-	.bmap		= ocfs2_bmap,
														
 
															-	.sync_page	= block_sync_page,
														
 
															-	.direct_IO	= ocfs2_direct_IO,
														
 
															-	.invalidatepage	= ocfs2_invalidatepage,
														
 
															-	.releasepage	= ocfs2_releasepage,
														
 
															-	.migratepage	= buffer_migrate_page,
														
 
															+	.readpage		= ocfs2_readpage,
														
 
															+	.readpages		= ocfs2_readpages,
														
 
															+	.writepage		= ocfs2_writepage,
														
 
															+	.write_begin		= ocfs2_write_begin,
														
 
															+	.write_end		= ocfs2_write_end,
														
 
															+	.bmap			= ocfs2_bmap,
														
 
															+	.sync_page		= block_sync_page,
														
 
															+	.direct_IO		= ocfs2_direct_IO,
														
 
															+	.invalidatepage		= ocfs2_invalidatepage,
														
 
															+	.releasepage		= ocfs2_releasepage,
														
 
															+	.migratepage		= buffer_migrate_page,
														
 
															+	.is_partially_uptodate	= block_is_partially_uptodate,
														
 
															 };
														
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -33,6 +33,7 @@
 
															 #include <linux/random.h>
														
 
															 #include <linux/crc32.h>
														
 
															 #include <linux/time.h>
														
 
															+#include <linux/debugfs.h>
														
 
															 #include "heartbeat.h"
														
 
															 #include "tcp.h"
														
@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 
															 static LIST_HEAD(o2hb_node_events);
														
 
															 static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
														
 
															+#define O2HB_DEBUG_DIR			"o2hb"
														
 
															+#define O2HB_DEBUG_LIVENODES		"livenodes"
														
 
															+static struct dentry *o2hb_debug_dir;
														
 
															+static struct dentry *o2hb_debug_livenodes;
														
 
															+
														
 
															 static LIST_HEAD(o2hb_all_regions);
														
 
															 static struct o2hb_callback {
														
@@ -905,7 +911,77 @@ static int o2hb_thread(void *data)
 
															 	return 0;
														
 
															 }
														
 
															-void o2hb_init(void)
														
 
															+#ifdef CONFIG_DEBUG_FS
														
 
															+static int o2hb_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
														
 
															+	char *buf = NULL;
														
 
															+	int i = -1;
														
 
															+	int out = 0;
														
 
															+
														
 
															+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
														
 
															+	if (!buf)
														
 
															+		goto bail;
														
 
															+
														
 
															+	o2hb_fill_node_map(map, sizeof(map));
														
 
															+
														
 
															+	while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
														
 
															+		out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
														
 
															+	out += snprintf(buf + out, PAGE_SIZE - out, "\n");
														
 
															+
														
 
															+	i_size_write(inode, out);
														
 
															+
														
 
															+	file->private_data = buf;
														
 
															+
														
 
															+	return 0;
														
 
															+bail:
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+static int o2hb_debug_release(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	kfree(file->private_data);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
														
 
															+				 size_t nbytes, loff_t *ppos)
														
 
															+{
														
 
															+	return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
														
 
															+				       i_size_read(file->f_mapping->host));
														
 
															+}
														
 
															+#else
														
 
															+static int o2hb_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+static int o2hb_debug_release(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
														
 
															+			       size_t nbytes, loff_t *ppos)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif  /* CONFIG_DEBUG_FS */
														
 
															+
														
 
															+static struct file_operations o2hb_debug_fops = {
														
 
															+	.open =		o2hb_debug_open,
														
 
															+	.release =	o2hb_debug_release,
														
 
															+	.read =		o2hb_debug_read,
														
 
															+	.llseek =	generic_file_llseek,
														
 
															+};
														
 
															+
														
 
															+void o2hb_exit(void)
														
 
															+{
														
 
															+	if (o2hb_debug_livenodes)
														
 
															+		debugfs_remove(o2hb_debug_livenodes);
														
 
															+	if (o2hb_debug_dir)
														
 
															+		debugfs_remove(o2hb_debug_dir);
														
 
															+}
														
 
															+
														
 
															+int o2hb_init(void)
														
 
															 {
														
 
															 	int i;
														
@@ -918,6 +994,24 @@ void o2hb_init(void)
 
															 	INIT_LIST_HEAD(&o2hb_node_events);
														
 
															 	memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
														
 
															+
														
 
															+	o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
														
 
															+	if (!o2hb_debug_dir) {
														
 
															+		mlog_errno(-ENOMEM);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
														
 
															+						   S_IFREG|S_IRUSR,
														
 
															+						   o2hb_debug_dir, NULL,
														
 
															+						   &o2hb_debug_fops);
														
 
															+	if (!o2hb_debug_livenodes) {
														
 
															+		mlog_errno(-ENOMEM);
														
 
															+		debugfs_remove(o2hb_debug_dir);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															 }
														
 
															 /* if we're already in a callback then we're already serialized by the sem */
														
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid,
 
															 			      struct o2hb_callback_func *hc);
														
 
															 void o2hb_fill_node_map(unsigned long *map,
														
 
															 			unsigned bytes);
														
 
															-void o2hb_init(void);
														
 
															+void o2hb_exit(void);
														
 
															+int o2hb_init(void);
														
 
															 int o2hb_check_node_heartbeating(u8 node_num);
														
 
															 int o2hb_check_node_heartbeating_from_callback(u8 node_num);
														
 
															 int o2hb_check_local_node_heartbeating(void);
														
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void)
 
															 	o2cb_sys_shutdown();
														
 
															 	o2net_exit();
														
 
															+	o2hb_exit();
														
 
															 }
														
 
															 static int __init init_o2nm(void)
														
@@ -889,11 +890,13 @@ static int __init init_o2nm(void)
 
															 	cluster_print_version();
														
 
															-	o2hb_init();
														
 
															+	ret = o2hb_init();
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															 	ret = o2net_init();
														
 
															 	if (ret)
														
 
															-		goto out;
														
 
															+		goto out_o2hb;
														
 
															 	ret = o2net_register_hb_callbacks();
														
 
															 	if (ret)
														
@@ -916,6 +919,8 @@ out_callbacks:
 
															 	o2net_unregister_hb_callbacks();
														
 
															 out_o2net:
														
 
															 	o2net_exit();
														
 
															+out_o2hb:
														
 
															+	o2hb_exit();
														
 
															 out:
														
 
															 	return ret;
														
 
															 }
														
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -26,44 +26,70 @@
 
															 #ifndef OCFS2_DIR_H
														
 
															 #define OCFS2_DIR_H
														
 
															-struct buffer_head *ocfs2_find_entry(const char *name,
														
 
															-				     int namelen,
														
 
															-				     struct inode *dir,
														
 
															-				     struct ocfs2_dir_entry **res_dir);
														
 
															+struct ocfs2_dx_hinfo {
														
 
															+	u32	major_hash;
														
 
															+	u32	minor_hash;
														
 
															+};
														
 
															+
														
 
															+struct ocfs2_dir_lookup_result {
														
 
															+	struct buffer_head		*dl_leaf_bh;	/* Unindexed leaf
														
 
															+							 * block */
														
 
															+	struct ocfs2_dir_entry		*dl_entry;	/* Target dirent in
														
 
															+							 * unindexed leaf */
														
 
															+
														
 
															+	struct buffer_head		*dl_dx_root_bh;	/* Root of indexed
														
 
															+							 * tree */
														
 
															+
														
 
															+	struct buffer_head		*dl_dx_leaf_bh;	/* Indexed leaf block */
														
 
															+	struct ocfs2_dx_entry		*dl_dx_entry;	/* Target dx_entry in
														
 
															+							 * indexed leaf */
														
 
															+	struct ocfs2_dx_hinfo		dl_hinfo;	/* Name hash results */
														
 
															+
														
 
															+	struct buffer_head		*dl_prev_leaf_bh;/* Previous entry in
														
 
															+							  * dir free space
														
 
															+							  * list. NULL if
														
 
															+							  * previous entry is
														
 
															+							  * dx root block. */
														
 
															+};
														
 
															+
														
 
															+void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res);
														
 
															+
														
 
															+int ocfs2_find_entry(const char *name, int namelen,
														
 
															+		     struct inode *dir,
														
 
															+		     struct ocfs2_dir_lookup_result *lookup);
														
 
															 int ocfs2_delete_entry(handle_t *handle,
														
 
															 		       struct inode *dir,
														
 
															-		       struct ocfs2_dir_entry *de_del,
														
 
															-		       struct buffer_head *bh);
														
 
															+		       struct ocfs2_dir_lookup_result *res);
														
 
															 int __ocfs2_add_entry(handle_t *handle,
														
 
															 		      struct inode *dir,
														
 
															 		      const char *name, int namelen,
														
 
															 		      struct inode *inode, u64 blkno,
														
 
															 		      struct buffer_head *parent_fe_bh,
														
 
															-		      struct buffer_head *insert_bh);
														
 
															+		      struct ocfs2_dir_lookup_result *lookup);
														
 
															 static inline int ocfs2_add_entry(handle_t *handle,
														
 
															 				  struct dentry *dentry,
														
 
															 				  struct inode *inode, u64 blkno,
														
 
															 				  struct buffer_head *parent_fe_bh,
														
 
															-				  struct buffer_head *insert_bh)
														
 
															+				  struct ocfs2_dir_lookup_result *lookup)
														
 
															 {
														
 
															 	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
														
 
															 				 dentry->d_name.name, dentry->d_name.len,
														
 
															-				 inode, blkno, parent_fe_bh, insert_bh);
														
 
															+				 inode, blkno, parent_fe_bh, lookup);
														
 
															 }
														
 
															 int ocfs2_update_entry(struct inode *dir, handle_t *handle,
														
 
															-		       struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
														
 
															+		       struct ocfs2_dir_lookup_result *res,
														
 
															 		       struct inode *new_entry_inode);
														
 
															 int ocfs2_check_dir_for_entry(struct inode *dir,
														
 
															 			      const char *name,
														
 
															 			      int namelen);
														
 
															 int ocfs2_empty_dir(struct inode *inode);
														
 
															+
														
 
															 int ocfs2_find_files_on_disk(const char *name,
														
 
															 			     int namelen,
														
 
															 			     u64 *blkno,
														
 
															 			     struct inode *inode,
														
 
															-			     struct buffer_head **dirent_bh,
														
 
															-			     struct ocfs2_dir_entry **dirent);
														
 
															+			     struct ocfs2_dir_lookup_result *res);
														
 
															 int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
														
 
															 			       int namelen, u64 *blkno);
														
 
															 int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
														
@@ -74,14 +100,17 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
 
															 				 struct buffer_head *parent_fe_bh,
														
 
															 				 const char *name,
														
 
															 				 int namelen,
														
 
															-				 struct buffer_head **ret_de_bh);
														
 
															+				 struct ocfs2_dir_lookup_result *lookup);
														
 
															 struct ocfs2_alloc_context;
														
 
															 int ocfs2_fill_new_dir(struct ocfs2_super *osb,
														
 
															 		       handle_t *handle,
														
 
															 		       struct inode *parent,
														
 
															 		       struct inode *inode,
														
 
															 		       struct buffer_head *fe_bh,
														
 
															-		       struct ocfs2_alloc_context *data_ac);
														
 
															+		       struct ocfs2_alloc_context *data_ac,
														
 
															+		       struct ocfs2_alloc_context *meta_ac);
														
 
															+
														
 
															+int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh);
														
 
															 struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
														
 
															 							    void *data);
														
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -52,16 +52,12 @@
 
															 enum dlm_mle_type {
														
 
															 	DLM_MLE_BLOCK,
														
 
															 	DLM_MLE_MASTER,
														
 
															-	DLM_MLE_MIGRATION
														
 
															-};
														
 
															-
														
 
															-struct dlm_lock_name {
														
 
															-	u8 len;
														
 
															-	u8 name[DLM_LOCKID_NAME_MAX];
														
 
															+	DLM_MLE_MIGRATION,
														
 
															+	DLM_MLE_NUM_TYPES
														
 
															 };
														
 
															 struct dlm_master_list_entry {
														
 
															-	struct list_head list;
														
 
															+	struct hlist_node master_hash_node;
														
 
															 	struct list_head hb_events;
														
 
															 	struct dlm_ctxt *dlm;
														
 
															 	spinlock_t spinlock;
														
@@ -78,10 +74,10 @@ struct dlm_master_list_entry {
 
															 	enum dlm_mle_type type;
														
 
															 	struct o2hb_callback_func mle_hb_up;
														
 
															 	struct o2hb_callback_func mle_hb_down;
														
 
															-	union {
														
 
															-		struct dlm_lock_resource *res;
														
 
															-		struct dlm_lock_name name;
														
 
															-	} u;
														
 
															+	struct dlm_lock_resource *mleres;
														
 
															+	unsigned char mname[DLM_LOCKID_NAME_MAX];
														
 
															+	unsigned int mnamelen;
														
 
															+	unsigned int mnamehash;
														
 
															 };
														
 
															 enum dlm_ast_type {
														
@@ -151,13 +147,14 @@ struct dlm_ctxt
 
															 	unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
														
 
															 	struct dlm_recovery_ctxt reco;
														
 
															 	spinlock_t master_lock;
														
 
															-	struct list_head master_list;
														
 
															+	struct hlist_head **master_hash;
														
 
															 	struct list_head mle_hb_events;
														
 
															 	/* these give a really vague idea of the system load */
														
 
															-	atomic_t local_resources;
														
 
															-	atomic_t remote_resources;
														
 
															-	atomic_t unknown_resources;
														
 
															+	atomic_t mle_tot_count[DLM_MLE_NUM_TYPES];
														
 
															+	atomic_t mle_cur_count[DLM_MLE_NUM_TYPES];
														
 
															+	atomic_t res_tot_count;
														
 
															+	atomic_t res_cur_count;
														
 
															 	struct dlm_debug_ctxt *dlm_debug_ctxt;
														
 
															 	struct dentry *dlm_debugfs_subroot;
														
@@ -195,6 +192,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
 
															 	return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
														
 
															 }
														
 
															+static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm,
														
 
															+						 unsigned i)
														
 
															+{
														
 
															+	return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] +
														
 
															+			(i % DLM_BUCKETS_PER_PAGE);
														
 
															+}
														
 
															+
														
 
															 /* these keventd work queue items are for less-frequently
														
 
															  * called functions that cannot be directly called from the
														
 
															  * net message handlers for some reason, usually because
														
@@ -848,9 +852,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
 
															 					      unsigned int len);
														
 
															 int dlm_is_host_down(int errno);
														
 
															-void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
														
 
															-			      struct dlm_lock_resource *res,
														
 
															-			      u8 owner);
														
 
															+
														
 
															 struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
														
 
															 						 const char *lockid,
														
 
															 						 int namelen,
														
@@ -1008,6 +1010,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
 
															 					  DLM_LOCK_RES_MIGRATING));
														
 
															 }
														
 
															+void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
														
 
															+void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle);
														
 
															+
														
 
															 /* create/destroy slab caches */
														
 
															 int dlm_init_master_caches(void);
														
 
															 void dlm_destroy_master_caches(void);
														
@@ -1110,6 +1115,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter)
 
															 	return bit;
														
 
															 }
														
 
															+static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
														
 
															+					 struct dlm_lock_resource *res,
														
 
															+					 u8 owner)
														
 
															+{
														
 
															+	assert_spin_locked(&res->spinlock);
														
 
															+
														
 
															+	res->owner = owner;
														
 
															+}
														
 
															+static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
														
 
															+					    struct dlm_lock_resource *res,
														
 
															+					    u8 owner)
														
 
															+{
														
 
															+	assert_spin_locked(&res->spinlock);
														
 
															+
														
 
															+	if (owner != res->owner)
														
 
															+		dlm_set_lockres_owner(dlm, res, owner);
														
 
															+}
														
 
															 #endif /* DLMCOMMON_H */
														
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
 
															 static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
														
 
															 {
														
 
															 	int out = 0;
														
 
															-	unsigned int namelen;
														
 
															-	const char *name;
														
 
															 	char *mle_type;
														
 
															-	if (mle->type != DLM_MLE_MASTER) {
														
 
															-		namelen = mle->u.name.len;
														
 
															-		name = mle->u.name.name;
														
 
															-	} else {
														
 
															-		namelen = mle->u.res->lockname.len;
														
 
															-		name = mle->u.res->lockname.name;
														
 
															-	}
														
 
															-
														
 
															 	if (mle->type == DLM_MLE_BLOCK)
														
 
															 		mle_type = "BLK";
														
 
															 	else if (mle->type == DLM_MLE_MASTER)
														
@@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
 
															 	else
														
 
															 		mle_type = "MIG";
														
 
															-	out += stringify_lockname(name, namelen, buf + out, len - out);
														
 
															+	out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out);
														
 
															 	out += snprintf(buf + out, len - out,
														
 
															 			"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
														
 
															 			mle_type, mle->master, mle->new_master,
														
@@ -501,23 +491,33 @@ static struct file_operations debug_purgelist_fops = {
 
															 static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
														
 
															 {
														
 
															 	struct dlm_master_list_entry *mle;
														
 
															-	int out = 0;
														
 
															-	unsigned long total = 0;
														
 
															+	struct hlist_head *bucket;
														
 
															+	struct hlist_node *list;
														
 
															+	int i, out = 0;
														
 
															+	unsigned long total = 0, longest = 0, bktcnt;
														
 
															 	out += snprintf(db->buf + out, db->len - out,
														
 
															 			"Dumping MLEs for Domain: %s\n", dlm->name);
														
 
															 	spin_lock(&dlm->master_lock);
														
 
															-	list_for_each_entry(mle, &dlm->master_list, list) {
														
 
															-		++total;
														
 
															-		if (db->len - out < 200)
														
 
															-			continue;
														
 
															-		out += dump_mle(mle, db->buf + out, db->len - out);
														
 
															+	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
														
 
															+		bucket = dlm_master_hash(dlm, i);
														
 
															+		hlist_for_each(list, bucket) {
														
 
															+			mle = hlist_entry(list, struct dlm_master_list_entry,
														
 
															+					  master_hash_node);
														
 
															+			++total;
														
 
															+			++bktcnt;
														
 
															+			if (db->len - out < 200)
														
 
															+				continue;
														
 
															+			out += dump_mle(mle, db->buf + out, db->len - out);
														
 
															+		}
														
 
															+		longest = max(longest, bktcnt);
														
 
															+		bktcnt = 0;
														
 
															 	}
														
 
															 	spin_unlock(&dlm->master_lock);
														
 
															 	out += snprintf(db->buf + out, db->len - out,
														
 
															-			"Total on list: %ld\n", total);
														
 
															+			"Total: %ld, Longest: %ld\n", total, longest);
														
 
															 	return out;
														
 
															 }
														
@@ -756,12 +756,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 
															 	int out = 0;
														
 
															 	struct dlm_reco_node_data *node;
														
 
															 	char *state;
														
 
															-	int lres, rres, ures, tres;
														
 
															-
														
 
															-	lres = atomic_read(&dlm->local_resources);
														
 
															-	rres = atomic_read(&dlm->remote_resources);
														
 
															-	ures = atomic_read(&dlm->unknown_resources);
														
 
															-	tres = lres + rres + ures;
														
 
															+	int cur_mles = 0, tot_mles = 0;
														
 
															+	int i;
														
 
															 	spin_lock(&dlm->spinlock);
														
@@ -804,21 +800,48 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 
															 				 db->buf + out, db->len - out);
														
 
															 	out += snprintf(db->buf + out, db->len - out, "\n");
														
 
															-	/* Mastered Resources Total: xxx  Locally: xxx  Remotely: ... */
														
 
															+	/* Lock Resources: xxx (xxx) */
														
 
															+	out += snprintf(db->buf + out, db->len - out,
														
 
															+			"Lock Resources: %d (%d)\n",
														
 
															+			atomic_read(&dlm->res_cur_count),
														
 
															+			atomic_read(&dlm->res_tot_count));
														
 
															+
														
 
															+	for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
														
 
															+		tot_mles += atomic_read(&dlm->mle_tot_count[i]);
														
 
															+
														
 
															+	for (i = 0; i < DLM_MLE_NUM_TYPES; ++i)
														
 
															+		cur_mles += atomic_read(&dlm->mle_cur_count[i]);
														
 
															+
														
 
															+	/* MLEs: xxx (xxx) */
														
 
															+	out += snprintf(db->buf + out, db->len - out,
														
 
															+			"MLEs: %d (%d)\n", cur_mles, tot_mles);
														
 
															+
														
 
															+	/*  Blocking: xxx (xxx) */
														
 
															+	out += snprintf(db->buf + out, db->len - out,
														
 
															+			"  Blocking: %d (%d)\n",
														
 
															+			atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
														
 
															+			atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
														
 
															+
														
 
															+	/*  Mastery: xxx (xxx) */
														
 
															+	out += snprintf(db->buf + out, db->len - out,
														
 
															+			"  Mastery: %d (%d)\n",
														
 
															+			atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
														
 
															+			atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
														
 
															+
														
 
															+	/*  Migration: xxx (xxx) */
														
 
															 	out += snprintf(db->buf + out, db->len - out,
														
 
															-			"Mastered Resources Total: %d  Locally: %d  "
														
 
															-			"Remotely: %d  Unknown: %d\n",
														
 
															-			tres, lres, rres, ures);
														
 
															+			"  Migration: %d (%d)\n",
														
 
															+			atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
														
 
															+			atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
														
 
															 	/* Lists: Dirty=Empty  Purge=InUse  PendingASTs=Empty  ... */
														
 
															 	out += snprintf(db->buf + out, db->len - out,
														
 
															 			"Lists: Dirty=%s  Purge=%s  PendingASTs=%s  "
														
 
															-			"PendingBASTs=%s  Master=%s\n",
														
 
															+			"PendingBASTs=%s\n",
														
 
															 			(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
														
 
															 			(list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
														
 
															 			(list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
														
 
															-			(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
														
 
															-			(list_empty(&dlm->master_list) ? "Empty" : "InUse"));
														
 
															+			(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
														
 
															 	/* Purge Count: xxx  Refs: xxx */
														
 
															 	out += snprintf(db->buf + out, db->len - out,
														
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
 
															 	if (dlm->lockres_hash)
														
 
															 		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
														
 
															+	if (dlm->master_hash)
														
 
															+		dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
														
 
															+
														
 
															 	if (dlm->name)
														
 
															 		kfree(dlm->name);
														
@@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
															 	for (i = 0; i < DLM_HASH_BUCKETS; i++)
														
 
															 		INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
														
 
															+	dlm->master_hash = (struct hlist_head **)
														
 
															+				dlm_alloc_pagevec(DLM_HASH_PAGES);
														
 
															+	if (!dlm->master_hash) {
														
 
															+		mlog_errno(-ENOMEM);
														
 
															+		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
														
 
															+		kfree(dlm->name);
														
 
															+		kfree(dlm);
														
 
															+		dlm = NULL;
														
 
															+		goto leave;
														
 
															+	}
														
 
															+
														
 
															+	for (i = 0; i < DLM_HASH_BUCKETS; i++)
														
 
															+		INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
														
 
															+
														
 
															 	strcpy(dlm->name, domain);
														
 
															 	dlm->key = key;
														
 
															 	dlm->node_num = o2nm_this_node();
														
 
															 	ret = dlm_create_debugfs_subroot(dlm);
														
 
															 	if (ret < 0) {
														
 
															+		dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
														
 
															 		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
														
 
															 		kfree(dlm->name);
														
 
															 		kfree(dlm);
														
@@ -1579,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
															 	init_waitqueue_head(&dlm->reco.event);
														
 
															 	init_waitqueue_head(&dlm->ast_wq);
														
 
															 	init_waitqueue_head(&dlm->migration_wq);
														
 
															-	INIT_LIST_HEAD(&dlm->master_list);
														
 
															 	INIT_LIST_HEAD(&dlm->mle_hb_events);
														
 
															 	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
														
@@ -1587,9 +1604,13 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
															 	dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
														
 
															 	dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
														
 
															-	atomic_set(&dlm->local_resources, 0);
														
 
															-	atomic_set(&dlm->remote_resources, 0);
														
 
															-	atomic_set(&dlm->unknown_resources, 0);
														
 
															+
														
 
															+	atomic_set(&dlm->res_tot_count, 0);
														
 
															+	atomic_set(&dlm->res_cur_count, 0);
														
 
															+	for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) {
														
 
															+		atomic_set(&dlm->mle_tot_count[i], 0);
														
 
															+		atomic_set(&dlm->mle_cur_count[i], 0);
														
 
															+	}
														
 
															 	spin_lock_init(&dlm->work_lock);
														
 
															 	INIT_LIST_HEAD(&dlm->work_list);
														
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -73,22 +73,13 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
 
															 				const char *name,
														
 
															 				unsigned int namelen)
														
 
															 {
														
 
															-	struct dlm_lock_resource *res;
														
 
															-
														
 
															 	if (dlm != mle->dlm)
														
 
															 		return 0;
														
 
															-	if (mle->type == DLM_MLE_BLOCK ||
														
 
															-	    mle->type == DLM_MLE_MIGRATION) {
														
 
															-		if (namelen != mle->u.name.len ||
														
 
															-    	    	    memcmp(name, mle->u.name.name, namelen)!=0)
														
 
															-			return 0;
														
 
															-	} else {
														
 
															-		res = mle->u.res;
														
 
															-		if (namelen != res->lockname.len ||
														
 
															-		    memcmp(res->lockname.name, name, namelen) != 0)
														
 
															-			return 0;
														
 
															-	}
														
 
															+	if (namelen != mle->mnamelen ||
														
 
															+	    memcmp(name, mle->mname, namelen) != 0)
														
 
															+		return 0;
														
 
															+
														
 
															 	return 1;
														
 
															 }
														
@@ -283,7 +274,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
 
															 	mle->dlm = dlm;
														
 
															 	mle->type = type;
														
 
															-	INIT_LIST_HEAD(&mle->list);
														
 
															+	INIT_HLIST_NODE(&mle->master_hash_node);
														
 
															 	INIT_LIST_HEAD(&mle->hb_events);
														
 
															 	memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
														
 
															 	spin_lock_init(&mle->spinlock);
														
@@ -295,19 +286,27 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
 
															 	mle->new_master = O2NM_MAX_NODES;
														
 
															 	mle->inuse = 0;
														
 
															+	BUG_ON(mle->type != DLM_MLE_BLOCK &&
														
 
															+	       mle->type != DLM_MLE_MASTER &&
														
 
															+	       mle->type != DLM_MLE_MIGRATION);
														
 
															+
														
 
															 	if (mle->type == DLM_MLE_MASTER) {
														
 
															 		BUG_ON(!res);
														
 
															-		mle->u.res = res;
														
 
															-	} else if (mle->type == DLM_MLE_BLOCK) {
														
 
															-		BUG_ON(!name);
														
 
															-		memcpy(mle->u.name.name, name, namelen);
														
 
															-		mle->u.name.len = namelen;
														
 
															-	} else /* DLM_MLE_MIGRATION */ {
														
 
															+		mle->mleres = res;
														
 
															+		memcpy(mle->mname, res->lockname.name, res->lockname.len);
														
 
															+		mle->mnamelen = res->lockname.len;
														
 
															+		mle->mnamehash = res->lockname.hash;
														
 
															+	} else {
														
 
															 		BUG_ON(!name);
														
 
															-		memcpy(mle->u.name.name, name, namelen);
														
 
															-		mle->u.name.len = namelen;
														
 
															+		mle->mleres = NULL;
														
 
															+		memcpy(mle->mname, name, namelen);
														
 
															+		mle->mnamelen = namelen;
														
 
															+		mle->mnamehash = dlm_lockid_hash(name, namelen);
														
 
															 	}
														
 
															+	atomic_inc(&dlm->mle_tot_count[mle->type]);
														
 
															+	atomic_inc(&dlm->mle_cur_count[mle->type]);
														
 
															+
														
 
															 	/* copy off the node_map and register hb callbacks on our copy */
														
 
															 	memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
														
 
															 	memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
														
@@ -318,6 +317,24 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
 
															 	__dlm_mle_attach_hb_events(dlm, mle);
														
 
															 }
														
 
															+void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
														
 
															+{
														
 
															+	assert_spin_locked(&dlm->spinlock);
														
 
															+	assert_spin_locked(&dlm->master_lock);
														
 
															+
														
 
															+	if (!hlist_unhashed(&mle->master_hash_node))
														
 
															+		hlist_del_init(&mle->master_hash_node);
														
 
															+}
														
 
															+
														
 
															+void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
														
 
															+{
														
 
															+	struct hlist_head *bucket;
														
 
															+
														
 
															+	assert_spin_locked(&dlm->master_lock);
														
 
															+
														
 
															+	bucket = dlm_master_hash(dlm, mle->mnamehash);
														
 
															+	hlist_add_head(&mle->master_hash_node, bucket);
														
 
															+}
														
 
															 /* returns 1 if found, 0 if not */
														
 
															 static int dlm_find_mle(struct dlm_ctxt *dlm,
														
@@ -325,10 +342,17 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
 
															 			char *name, unsigned int namelen)
														
 
															 {
														
 
															 	struct dlm_master_list_entry *tmpmle;
														
 
															+	struct hlist_head *bucket;
														
 
															+	struct hlist_node *list;
														
 
															+	unsigned int hash;
														
 
															 	assert_spin_locked(&dlm->master_lock);
														
 
															-	list_for_each_entry(tmpmle, &dlm->master_list, list) {
														
 
															+	hash = dlm_lockid_hash(name, namelen);
														
 
															+	bucket = dlm_master_hash(dlm, hash);
														
 
															+	hlist_for_each(list, bucket) {
														
 
															+		tmpmle = hlist_entry(list, struct dlm_master_list_entry,
														
 
															+				     master_hash_node);
														
 
															 		if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
														
 
															 			continue;
														
 
															 		dlm_get_mle(tmpmle);
														
@@ -408,24 +432,20 @@ static void dlm_mle_release(struct kref *kref)
 
															 	mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
														
 
															 	dlm = mle->dlm;
														
 
															-	if (mle->type != DLM_MLE_MASTER) {
														
 
															-		mlog(0, "calling mle_release for %.*s, type %d\n",
														
 
															-		     mle->u.name.len, mle->u.name.name, mle->type);
														
 
															-	} else {
														
 
															-		mlog(0, "calling mle_release for %.*s, type %d\n",
														
 
															-		     mle->u.res->lockname.len,
														
 
															-		     mle->u.res->lockname.name, mle->type);
														
 
															-	}
														
 
															 	assert_spin_locked(&dlm->spinlock);
														
 
															 	assert_spin_locked(&dlm->master_lock);
														
 
															+	mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname,
														
 
															+	     mle->type);
														
 
															+
														
 
															 	/* remove from list if not already */
														
 
															-	if (!list_empty(&mle->list))
														
 
															-		list_del_init(&mle->list);
														
 
															+	__dlm_unlink_mle(dlm, mle);
														
 
															 	/* detach the mle from the domain node up/down events */
														
 
															 	__dlm_mle_detach_hb_events(dlm, mle);
														
 
															+	atomic_dec(&dlm->mle_cur_count[mle->type]);
														
 
															+
														
 
															 	/* NOTE: kfree under spinlock here.
														
 
															 	 * if this is bad, we can move this to a freelist. */
														
 
															 	kmem_cache_free(dlm_mle_cache, mle);
														
@@ -465,43 +485,6 @@ void dlm_destroy_master_caches(void)
 
															 		kmem_cache_destroy(dlm_lockres_cache);
														
 
															 }
														
 
															-static void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
														
 
															-				  struct dlm_lock_resource *res,
														
 
															-				  u8 owner)
														
 
															-{
														
 
															-	assert_spin_locked(&res->spinlock);
														
 
															-
														
 
															-	mlog_entry("%.*s, %u\n", res->lockname.len, res->lockname.name, owner);
														
 
															-
														
 
															-	if (owner == dlm->node_num)
														
 
															-		atomic_inc(&dlm->local_resources);
														
 
															-	else if (owner == DLM_LOCK_RES_OWNER_UNKNOWN)
														
 
															-		atomic_inc(&dlm->unknown_resources);
														
 
															-	else
														
 
															-		atomic_inc(&dlm->remote_resources);
														
 
															-
														
 
															-	res->owner = owner;
														
 
															-}
														
 
															-
														
 
															-void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
														
 
															-			      struct dlm_lock_resource *res, u8 owner)
														
 
															-{
														
 
															-	assert_spin_locked(&res->spinlock);
														
 
															-
														
 
															-	if (owner == res->owner)
														
 
															-		return;
														
 
															-
														
 
															-	if (res->owner == dlm->node_num)
														
 
															-		atomic_dec(&dlm->local_resources);
														
 
															-	else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN)
														
 
															-		atomic_dec(&dlm->unknown_resources);
														
 
															-	else
														
 
															-		atomic_dec(&dlm->remote_resources);
														
 
															-
														
 
															-	dlm_set_lockres_owner(dlm, res, owner);
														
 
															-}
														
 
															-
														
 
															-
														
 
															 static void dlm_lockres_release(struct kref *kref)
														
 
															 {
														
 
															 	struct dlm_lock_resource *res;
														
@@ -527,6 +510,8 @@ static void dlm_lockres_release(struct kref *kref)
 
															 	}
														
 
															 	spin_unlock(&dlm->track_lock);
														
 
															+	atomic_dec(&dlm->res_cur_count);
														
 
															+
														
 
															 	dlm_put(dlm);
														
 
															 	if (!hlist_unhashed(&res->hash_node) ||
														
@@ -607,6 +592,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 
															 	kref_init(&res->refs);
														
 
															+	atomic_inc(&dlm->res_tot_count);
														
 
															+	atomic_inc(&dlm->res_cur_count);
														
 
															+
														
 
															 	/* just for consistency */
														
 
															 	spin_lock(&res->spinlock);
														
 
															 	dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
														
@@ -843,7 +831,7 @@ lookup:
 
															 		alloc_mle = NULL;
														
 
															 		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
														
 
															 		set_bit(dlm->node_num, mle->maybe_map);
														
 
															-		list_add(&mle->list, &dlm->master_list);
														
 
															+		__dlm_insert_mle(dlm, mle);
														
 
															 		/* still holding the dlm spinlock, check the recovery map
														
 
															 		 * to see if there are any nodes that still need to be 
														
@@ -1270,7 +1258,7 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
 
															 						     res->lockname.len,
														
 
															 						     res->lockname.name);
														
 
															 						mle->type = DLM_MLE_MASTER;
														
 
															-						mle->u.res = res;
														
 
															+						mle->mleres = res;
														
 
															 					}
														
 
															 				}
														
 
															 			}
														
@@ -1315,14 +1303,8 @@ static int dlm_do_master_request(struct dlm_lock_resource *res,
 
															 	BUG_ON(mle->type == DLM_MLE_MIGRATION);
														
 
															-	if (mle->type != DLM_MLE_MASTER) {
														
 
															-		request.namelen = mle->u.name.len;
														
 
															-		memcpy(request.name, mle->u.name.name, request.namelen);
														
 
															-	} else {
														
 
															-		request.namelen = mle->u.res->lockname.len;
														
 
															-		memcpy(request.name, mle->u.res->lockname.name,
														
 
															-			request.namelen);
														
 
															-	}
														
 
															+	request.namelen = (u8)mle->mnamelen;
														
 
															+	memcpy(request.name, mle->mname, request.namelen);
														
 
															 again:
														
 
															 	ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request,
														
@@ -1575,7 +1557,7 @@ way_up_top:
 
															 		// "add the block.\n");
														
 
															 		dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
														
 
															 		set_bit(request->node_idx, mle->maybe_map);
														
 
															-		list_add(&mle->list, &dlm->master_list);
														
 
															+		__dlm_insert_mle(dlm, mle);
														
 
															 		response = DLM_MASTER_RESP_NO;
														
 
															 	} else {
														
 
															 		// mlog(0, "mle was found\n");
														
@@ -1967,7 +1949,7 @@ ok:
 
															 			     assert->node_idx, rr, extra_ref, mle->inuse);
														
 
															 			dlm_print_one_mle(mle);
														
 
															 		}
														
 
															-		list_del_init(&mle->list);
														
 
															+		__dlm_unlink_mle(dlm, mle);
														
 
															 		__dlm_mle_detach_hb_events(dlm, mle);
														
 
															 		__dlm_put_mle(mle);
														
 
															 		if (extra_ref) {
														
@@ -3159,10 +3141,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
 
															 			tmp->master = master;
														
 
															 			atomic_set(&tmp->woken, 1);
														
 
															 			wake_up(&tmp->wq);
														
 
															-			/* remove it from the list so that only one
														
 
															-			 * mle will be found */
														
 
															-			list_del_init(&tmp->list);
														
 
															-			/* this was obviously WRONG.  mle is uninited here.  should be tmp. */
														
 
															+			/* remove it so that only one mle will be found */
														
 
															+			__dlm_unlink_mle(dlm, tmp);
														
 
															 			__dlm_mle_detach_hb_events(dlm, tmp);
														
 
															 			ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
														
 
															 			mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
														
@@ -3181,137 +3161,164 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
 
															 	mle->master = master;
														
 
															 	/* do this for consistency with other mle types */
														
 
															 	set_bit(new_master, mle->maybe_map);
														
 
															-	list_add(&mle->list, &dlm->master_list);
														
 
															+	__dlm_insert_mle(dlm, mle);
														
 
															 	return ret;
														
 
															 }
														
 
															-
														
 
															-void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
														
 
															+/*
														
 
															+ * Sets the owner of the lockres, associated to the mle, to UNKNOWN
														
 
															+ */
														
 
															+static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm,
														
 
															+					struct dlm_master_list_entry *mle)
														
 
															 {
														
 
															-	struct dlm_master_list_entry *mle, *next;
														
 
															 	struct dlm_lock_resource *res;
														
 
															-	unsigned int hash;
														
 
															-	mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
														
 
															-top:
														
 
															-	assert_spin_locked(&dlm->spinlock);
														
 
															+	/* Find the lockres associated to the mle and set its owner to UNK */
														
 
															+	res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen,
														
 
															+				   mle->mnamehash);
														
 
															+	if (res) {
														
 
															+		spin_unlock(&dlm->master_lock);
														
 
															-	/* clean the master list */
														
 
															-	spin_lock(&dlm->master_lock);
														
 
															-	list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
														
 
															-		BUG_ON(mle->type != DLM_MLE_BLOCK &&
														
 
															-		       mle->type != DLM_MLE_MASTER &&
														
 
															-		       mle->type != DLM_MLE_MIGRATION);
														
 
															-
														
 
															-		/* MASTER mles are initiated locally.  the waiting
														
 
															-		 * process will notice the node map change
														
 
															-		 * shortly.  let that happen as normal. */
														
 
															-		if (mle->type == DLM_MLE_MASTER)
														
 
															-			continue;
														
 
															+		/* move lockres onto recovery list */
														
 
															+		spin_lock(&res->spinlock);
														
 
															+		dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
														
 
															+		dlm_move_lockres_to_recovery_list(dlm, res);
														
 
															+		spin_unlock(&res->spinlock);
														
 
															+		dlm_lockres_put(res);
														
 
															+		/* about to get rid of mle, detach from heartbeat */
														
 
															+		__dlm_mle_detach_hb_events(dlm, mle);
														
 
															-		/* BLOCK mles are initiated by other nodes.
														
 
															-		 * need to clean up if the dead node would have
														
 
															-		 * been the master. */
														
 
															-		if (mle->type == DLM_MLE_BLOCK) {
														
 
															-			int bit;
														
 
															+		/* dump the mle */
														
 
															+		spin_lock(&dlm->master_lock);
														
 
															+		__dlm_put_mle(mle);
														
 
															+		spin_unlock(&dlm->master_lock);
														
 
															+	}
														
 
															-			spin_lock(&mle->spinlock);
														
 
															-			bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
														
 
															-			if (bit != dead_node) {
														
 
															-				mlog(0, "mle found, but dead node %u would "
														
 
															-				     "not have been master\n", dead_node);
														
 
															-				spin_unlock(&mle->spinlock);
														
 
															-			} else {
														
 
															-				/* must drop the refcount by one since the
														
 
															-				 * assert_master will never arrive.  this
														
 
															-				 * may result in the mle being unlinked and
														
 
															-				 * freed, but there may still be a process
														
 
															-				 * waiting in the dlmlock path which is fine. */
														
 
															-				mlog(0, "node %u was expected master\n",
														
 
															-				     dead_node);
														
 
															-				atomic_set(&mle->woken, 1);
														
 
															-				spin_unlock(&mle->spinlock);
														
 
															-				wake_up(&mle->wq);
														
 
															-				/* do not need events any longer, so detach 
														
 
															-				 * from heartbeat */
														
 
															-				__dlm_mle_detach_hb_events(dlm, mle);
														
 
															-				__dlm_put_mle(mle);
														
 
															-			}
														
 
															-			continue;
														
 
															-		}
														
 
															+	return res;
														
 
															+}
														
 
															-		/* everything else is a MIGRATION mle */
														
 
															-
														
 
															-		/* the rule for MIGRATION mles is that the master
														
 
															-		 * becomes UNKNOWN if *either* the original or
														
 
															-		 * the new master dies.  all UNKNOWN lockreses
														
 
															-		 * are sent to whichever node becomes the recovery
														
 
															-		 * master.  the new master is responsible for
														
 
															-		 * determining if there is still a master for
														
 
															-		 * this lockres, or if he needs to take over
														
 
															-		 * mastery.  either way, this node should expect
														
 
															-		 * another message to resolve this. */
														
 
															-		if (mle->master != dead_node &&
														
 
															-		    mle->new_master != dead_node)
														
 
															-			continue;
														
 
															+static void dlm_clean_migration_mle(struct dlm_ctxt *dlm,
														
 
															+				    struct dlm_master_list_entry *mle)
														
 
															+{
														
 
															+	__dlm_mle_detach_hb_events(dlm, mle);
														
 
															-		/* if we have reached this point, this mle needs to
														
 
															-		 * be removed from the list and freed. */
														
 
															+	spin_lock(&mle->spinlock);
														
 
															+	__dlm_unlink_mle(dlm, mle);
														
 
															+	atomic_set(&mle->woken, 1);
														
 
															+	spin_unlock(&mle->spinlock);
														
 
															-		/* remove from the list early.  NOTE: unlinking
														
 
															-		 * list_head while in list_for_each_safe */
														
 
															-		__dlm_mle_detach_hb_events(dlm, mle);
														
 
															-		spin_lock(&mle->spinlock);
														
 
															-		list_del_init(&mle->list);
														
 
															+	wake_up(&mle->wq);
														
 
															+}
														
 
															+
														
 
															+static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
														
 
															+				struct dlm_master_list_entry *mle, u8 dead_node)
														
 
															+{
														
 
															+	int bit;
														
 
															+
														
 
															+	BUG_ON(mle->type != DLM_MLE_BLOCK);
														
 
															+
														
 
															+	spin_lock(&mle->spinlock);
														
 
															+	bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
														
 
															+	if (bit != dead_node) {
														
 
															+		mlog(0, "mle found, but dead node %u would not have been "
														
 
															+		     "master\n", dead_node);
														
 
															+		spin_unlock(&mle->spinlock);
														
 
															+	} else {
														
 
															+		/* Must drop the refcount by one since the assert_master will
														
 
															+		 * never arrive. This may result in the mle being unlinked and
														
 
															+		 * freed, but there may still be a process waiting in the
														
 
															+		 * dlmlock path which is fine. */
														
 
															+		mlog(0, "node %u was expected master\n", dead_node);
														
 
															 		atomic_set(&mle->woken, 1);
														
 
															 		spin_unlock(&mle->spinlock);
														
 
															 		wake_up(&mle->wq);
														
 
															-		mlog(0, "%s: node %u died during migration from "
														
 
															-		     "%u to %u!\n", dlm->name, dead_node,
														
 
															-		     mle->master, mle->new_master);
														
 
															-		/* if there is a lockres associated with this
														
 
															-	 	 * mle, find it and set its owner to UNKNOWN */
														
 
															-		hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len);
														
 
															-		res = __dlm_lookup_lockres(dlm, mle->u.name.name,
														
 
															-					   mle->u.name.len, hash);
														
 
															-		if (res) {
														
 
															-			/* unfortunately if we hit this rare case, our
														
 
															-		 	 * lock ordering is messed.  we need to drop
														
 
															-		 	 * the master lock so that we can take the
														
 
															-		  	 * lockres lock, meaning that we will have to
														
 
															-			 * restart from the head of list. */
														
 
															-			spin_unlock(&dlm->master_lock);
														
 
															+		/* Do not need events any longer, so detach from heartbeat */
														
 
															+		__dlm_mle_detach_hb_events(dlm, mle);
														
 
															+		__dlm_put_mle(mle);
														
 
															+	}
														
 
															+}
														
 
															-			/* move lockres onto recovery list */
														
 
															-			spin_lock(&res->spinlock);
														
 
															-			dlm_set_lockres_owner(dlm, res,
														
 
															-				      	DLM_LOCK_RES_OWNER_UNKNOWN);
														
 
															-			dlm_move_lockres_to_recovery_list(dlm, res);
														
 
															-			spin_unlock(&res->spinlock);
														
 
															-			dlm_lockres_put(res);
														
 
															+void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
														
 
															+{
														
 
															+	struct dlm_master_list_entry *mle;
														
 
															+	struct dlm_lock_resource *res;
														
 
															+	struct hlist_head *bucket;
														
 
															+	struct hlist_node *list;
														
 
															+	unsigned int i;
														
 
															-			/* about to get rid of mle, detach from heartbeat */
														
 
															-			__dlm_mle_detach_hb_events(dlm, mle);
														
 
															+	mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
														
 
															+top:
														
 
															+	assert_spin_locked(&dlm->spinlock);
														
 
															-			/* dump the mle */
														
 
															-			spin_lock(&dlm->master_lock);
														
 
															-			__dlm_put_mle(mle);
														
 
															-			spin_unlock(&dlm->master_lock);
														
 
															+	/* clean the master list */
														
 
															+	spin_lock(&dlm->master_lock);
														
 
															+	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
														
 
															+		bucket = dlm_master_hash(dlm, i);
														
 
															+		hlist_for_each(list, bucket) {
														
 
															+			mle = hlist_entry(list, struct dlm_master_list_entry,
														
 
															+					  master_hash_node);
														
 
															+
														
 
															+			BUG_ON(mle->type != DLM_MLE_BLOCK &&
														
 
															+			       mle->type != DLM_MLE_MASTER &&
														
 
															+			       mle->type != DLM_MLE_MIGRATION);
														
 
															+
														
 
															+			/* MASTER mles are initiated locally. The waiting
														
 
															+			 * process will notice the node map change shortly.
														
 
															+			 * Let that happen as normal. */
														
 
															+			if (mle->type == DLM_MLE_MASTER)
														
 
															+				continue;
														
 
															+
														
 
															+			/* BLOCK mles are initiated by other nodes. Need to
														
 
															+			 * clean up if the dead node would have been the
														
 
															+			 * master. */
														
 
															+			if (mle->type == DLM_MLE_BLOCK) {
														
 
															+				dlm_clean_block_mle(dlm, mle, dead_node);
														
 
															+				continue;
														
 
															+			}
														
 
															-			/* restart */
														
 
															-			goto top;
														
 
															-		}
														
 
															+			/* Everything else is a MIGRATION mle */
														
 
															+
														
 
															+			/* The rule for MIGRATION mles is that the master
														
 
															+			 * becomes UNKNOWN if *either* the original or the new
														
 
															+			 * master dies. All UNKNOWN lockres' are sent to
														
 
															+			 * whichever node becomes the recovery master. The new
														
 
															+			 * master is responsible for determining if there is
														
 
															+			 * still a master for this lockres, or if he needs to
														
 
															+			 * take over mastery. Either way, this node should
														
 
															+			 * expect another message to resolve this. */
														
 
															+
														
 
															+			if (mle->master != dead_node &&
														
 
															+			    mle->new_master != dead_node)
														
 
															+				continue;
														
 
															+
														
 
															+			/* If we have reached this point, this mle needs to be
														
 
															+			 * removed from the list and freed. */
														
 
															+			dlm_clean_migration_mle(dlm, mle);
														
 
															+
														
 
															+			mlog(0, "%s: node %u died during migration from "
														
 
															+			     "%u to %u!\n", dlm->name, dead_node, mle->master,
														
 
															+			     mle->new_master);
														
 
															+
														
 
															+			/* If we find a lockres associated with the mle, we've
														
 
															+			 * hit this rare case that messes up our lock ordering.
														
 
															+			 * If so, we need to drop the master lock so that we can
														
 
															+			 * take the lockres lock, meaning that we will have to
														
 
															+			 * restart from the head of list. */
														
 
															+			res = dlm_reset_mleres_owner(dlm, mle);
														
 
															+			if (res)
														
 
															+				/* restart */
														
 
															+				goto top;
														
 
															-		/* this may be the last reference */
														
 
															-		__dlm_put_mle(mle);
														
 
															+			/* This may be the last reference */
														
 
															+			__dlm_put_mle(mle);
														
 
															+		}
														
 
															 	}
														
 
															 	spin_unlock(&dlm->master_lock);
														
 
															 }
														
 
															-
														
 
															 int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
														
 
															 			 u8 old_master)
														
 
															 {
														
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 
															 	spin_lock(&res->spinlock);
														
 
															 	if (!__dlm_lockres_unused(res)) {
														
 
															-		spin_unlock(&res->spinlock);
														
 
															 		mlog(0, "%s:%.*s: tried to purge but not unused\n",
														
 
															 		     dlm->name, res->lockname.len, res->lockname.name);
														
 
															-		return -ENOTEMPTY;
														
 
															+		__dlm_print_one_lock_resource(res);
														
 
															+		spin_unlock(&res->spinlock);
														
 
															+		BUG();
														
 
															 	}
														
 
															+
														
 
															+	if (res->state & DLM_LOCK_RES_MIGRATING) {
														
 
															+		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
														
 
															+		     "being remastered\n", dlm->name, res->lockname.len,
														
 
															+		     res->lockname.name);
														
 
															+		/* Re-add the lockres to the end of the purge list */
														
 
															+		if (!list_empty(&res->purge)) {
														
 
															+			list_del_init(&res->purge);
														
 
															+			list_add_tail(&res->purge, &dlm->purge_list);
														
 
															+		}
														
 
															+		spin_unlock(&res->spinlock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															 	master = (res->owner == dlm->node_num);
														
 
															+
														
 
															 	if (!master)
														
 
															 		res->state |= DLM_LOCK_RES_DROPPING_REF;
														
 
															 	spin_unlock(&res->spinlock);
														
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -244,6 +244,10 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 
															 	.flags		= 0,
														
 
															 };
														
 
															+static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
														
 
															+	.flags		= 0,
														
 
															+};
														
 
															+
														
 
															 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
														
 
															 	.get_osb	= ocfs2_get_dentry_osb,
														
 
															 	.post_unlock	= ocfs2_dentry_post_unlock,
														
@@ -622,6 +626,17 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
 
															 				   &ocfs2_rename_lops, osb);
														
 
															 }
														
 
															+static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
														
 
															+					 struct ocfs2_super *osb)
														
 
															+{
														
 
															+	/* nfs_sync lockres doesn't come from a slab so we call init
														
 
															+	 * once on it manually.  */
														
 
															+	ocfs2_lock_res_init_once(res);
														
 
															+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
														
 
															+	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
														
 
															+				   &ocfs2_nfs_sync_lops, osb);
														
 
															+}
														
 
															+
														
 
															 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
														
 
															 			      struct ocfs2_file_private *fp)
														
 
															 {
														
@@ -2417,6 +2432,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
 
															 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
														
 
															 }
														
 
															+int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
														
 
															+{
														
 
															+	int status;
														
 
															+	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
														
 
															+
														
 
															+	if (ocfs2_is_hard_readonly(osb))
														
 
															+		return -EROFS;
														
 
															+
														
 
															+	if (ocfs2_mount_local(osb))
														
 
															+		return 0;
														
 
															+
														
 
															+	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
														
 
															+				    0, 0);
														
 
															+	if (status < 0)
														
 
															+		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
														
 
															+
														
 
															+	return status;
														
 
															+}
														
 
															+
														
 
															+void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
														
 
															+{
														
 
															+	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
														
 
															+
														
 
															+	if (!ocfs2_mount_local(osb))
														
 
															+		ocfs2_cluster_unlock(osb, lockres,
														
 
															+				     ex ? LKM_EXMODE : LKM_PRMODE);
														
 
															+}
														
 
															+
														
 
															 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
														
 
															 {
														
 
															 	int ret;
														
@@ -2798,6 +2841,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 
															 local:
														
 
															 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
														
 
															 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
														
 
															+	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
														
 
															 	osb->cconn = conn;
														
@@ -2833,6 +2877,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
 
															 	ocfs2_lock_res_free(&osb->osb_super_lockres);
														
 
															 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
														
 
															+	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
														
 
															 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
														
 
															 	osb->cconn = NULL;
														
@@ -3015,6 +3060,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
 
															 {
														
 
															 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
														
 
															 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
														
 
															+	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
														
 
															 }
														
 
															 int ocfs2_drop_inode_locks(struct inode *inode)
														
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -115,6 +115,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
 
															 			int ex);
														
 
															 int ocfs2_rename_lock(struct ocfs2_super *osb);
														
 
															 void ocfs2_rename_unlock(struct ocfs2_super *osb);
														
 
															+int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
														
 
															+void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex);
														
 
															 int ocfs2_dentry_lock(struct dentry *dentry, int ex);
														
 
															 void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
														
 
															 int ocfs2_file_lock(struct file *file, int ex, int trylock);
														
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -31,6 +31,7 @@
 
															 #include "ocfs2.h"
														
 
															+#include "alloc.h"
														
 
															 #include "dir.h"
														
 
															 #include "dlmglue.h"
														
 
															 #include "dcache.h"
														
@@ -38,6 +39,7 @@
 
															 #include "inode.h"
														
 
															 #include "buffer_head_io.h"
														
 
															+#include "suballoc.h"
														
 
															 struct ocfs2_inode_handle
														
 
															 {
														
@@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 
															 		struct ocfs2_inode_handle *handle)
														
 
															 {
														
 
															 	struct inode *inode;
														
 
															+	struct ocfs2_super *osb = OCFS2_SB(sb);
														
 
															+	u64 blkno = handle->ih_blkno;
														
 
															+	int status, set;
														
 
															 	struct dentry *result;
														
 
															 	mlog_entry("(0x%p, 0x%p)\n", sb, handle);
														
 
															-	if (handle->ih_blkno == 0) {
														
 
															-		mlog_errno(-ESTALE);
														
 
															-		return ERR_PTR(-ESTALE);
														
 
															+	if (blkno == 0) {
														
 
															+		mlog(0, "nfs wants inode with blkno: 0\n");
														
 
															+		result = ERR_PTR(-ESTALE);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	inode = ocfs2_ilookup(sb, blkno);
														
 
															+	/*
														
 
															+	 * If the inode exists in memory, we only need to check it's
														
 
															+	 * generation number
														
 
															+	 */
														
 
															+	if (inode)
														
 
															+		goto check_gen;
														
 
															+
														
 
															+	/*
														
 
															+	 * This will synchronize us against ocfs2_delete_inode() on
														
 
															+	 * all nodes
														
 
															+	 */
														
 
															+	status = ocfs2_nfs_sync_lock(osb, 1);
														
 
															+	if (status < 0) {
														
 
															+		mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
														
 
															+		goto check_err;
														
 
															+	}
														
 
															+
														
 
															+	status = ocfs2_test_inode_bit(osb, blkno, &set);
														
 
															+	if (status < 0) {
														
 
															+		if (status == -EINVAL) {
														
 
															+			/*
														
 
															+			 * The blkno NFS gave us doesn't even show up
														
 
															+			 * as an inode, we return -ESTALE to be
														
 
															+			 * nice
														
 
															+			 */
														
 
															+			mlog(0, "test inode bit failed %d\n", status);
														
 
															+			status = -ESTALE;
														
 
															+		} else {
														
 
															+			mlog(ML_ERROR, "test inode bit failed %d\n", status);
														
 
															+		}
														
 
															+		goto unlock_nfs_sync;
														
 
															+	}
														
 
															+
														
 
															+	/* If the inode allocator bit is clear, this inode must be stale */
														
 
															+	if (!set) {
														
 
															+		mlog(0, "inode %llu suballoc bit is clear\n", blkno);
														
 
															+		status = -ESTALE;
														
 
															+		goto unlock_nfs_sync;
														
 
															 	}
														
 
															-	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0);
														
 
															+	inode = ocfs2_iget(osb, blkno, 0, 0);
														
 
															-	if (IS_ERR(inode))
														
 
															-		return (void *)inode;
														
 
															+unlock_nfs_sync:
														
 
															+	ocfs2_nfs_sync_unlock(osb, 1);
														
 
															+check_err:
														
 
															+	if (status < 0) {
														
 
															+		if (status == -ESTALE) {
														
 
															+			mlog(0, "stale inode ino: %llu generation: %u\n",
														
 
															+			     blkno, handle->ih_generation);
														
 
															+		}
														
 
															+		result = ERR_PTR(status);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	if (IS_ERR(inode)) {
														
 
															+		mlog_errno(PTR_ERR(inode));
														
 
															+		result = (void *)inode;
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+check_gen:
														
 
															 	if (handle->ih_generation != inode->i_generation) {
														
 
															 		iput(inode);
														
 
															-		return ERR_PTR(-ESTALE);
														
 
															+		mlog(0, "stale inode ino: %llu generation: %u\n", blkno,
														
 
															+		     handle->ih_generation);
														
 
															+		result = ERR_PTR(-ESTALE);
														
 
															+		goto bail;
														
 
															 	}
														
 
															 	result = d_obtain_alias(inode);
														
 
															 	if (!IS_ERR(result))
														
 
															 		result->d_op = &ocfs2_dentry_ops;
														
 
															+	else
														
 
															+		mlog_errno(PTR_ERR(result));
														
 
															+bail:
														
 
															 	mlog_exit_ptr(result);
														
 
															 	return result;
														
 
															 }
														
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -38,6 +38,7 @@
 
															 #include "ocfs2.h"
														
 
															 #include "alloc.h"
														
 
															+#include "dir.h"
														
 
															 #include "blockcheck.h"
														
 
															 #include "dlmglue.h"
														
 
															 #include "extent_map.h"
														
@@ -112,6 +113,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
 
															 		oi->ip_attr |= OCFS2_DIRSYNC_FL;
														
 
															 }
														
 
															+struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
														
 
															+{
														
 
															+	struct ocfs2_find_inode_args args;
														
 
															+
														
 
															+	args.fi_blkno = blkno;
														
 
															+	args.fi_flags = 0;
														
 
															+	args.fi_ino = ino_from_blkno(sb, blkno);
														
 
															+	args.fi_sysfile_type = 0;
														
 
															+
														
 
															+	return ilookup5(sb, blkno, ocfs2_find_actor, &args);
														
 
															+}
														
 
															 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
														
 
															 			 int sysfile_type)
														
 
															 {
														
@@ -275,7 +287,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
															 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
														
 
															 		     (unsigned long long)le64_to_cpu(fe->i_blkno));
														
 
															-	inode->i_nlink = le16_to_cpu(fe->i_links_count);
														
 
															+	inode->i_nlink = ocfs2_read_links_count(fe);
														
 
															 	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
														
 
															 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
														
@@ -351,6 +363,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
															 	ocfs2_set_inode_flags(inode);
														
 
															+	OCFS2_I(inode)->ip_last_used_slot = 0;
														
 
															+	OCFS2_I(inode)->ip_last_used_group = 0;
														
 
															 	mlog_exit_void();
														
 
															 }
														
@@ -606,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 
															 	}
														
 
															 	handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
														
 
															-					ocfs2_quota_trans_credits(inode->i_sb));
														
 
															+				   ocfs2_quota_trans_credits(inode->i_sb));
														
 
															 	if (IS_ERR(handle)) {
														
 
															 		status = PTR_ERR(handle);
														
 
															 		mlog_errno(status);
														
@@ -740,6 +754,15 @@ static int ocfs2_wipe_inode(struct inode *inode,
 
															 		goto bail_unlock_dir;
														
 
															 	}
														
 
															+	/* Remove any dir index tree */
														
 
															+	if (S_ISDIR(inode->i_mode)) {
														
 
															+		status = ocfs2_dx_dir_truncate(inode, di_bh);
														
 
															+		if (status) {
														
 
															+			mlog_errno(status);
														
 
															+			goto bail_unlock_dir;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	/*Free extended attribute resources associated with this inode.*/
														
 
															 	status = ocfs2_xattr_remove(inode, di_bh);
														
 
															 	if (status < 0) {
														
@@ -949,6 +972,17 @@ void ocfs2_delete_inode(struct inode *inode)
 
															 		goto bail;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * Synchronize us against ocfs2_get_dentry. We take this in
														
 
															+	 * shared mode so that all nodes can still concurrently
														
 
															+	 * process deletes.
														
 
															+	 */
														
 
															+	status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
														
 
															+	if (status < 0) {
														
 
															+		mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
														
 
															+		ocfs2_cleanup_delete_inode(inode, 0);
														
 
															+		goto bail_unblock;
														
 
															+	}
														
 
															 	/* Lock down the inode. This gives us an up to date view of
														
 
															 	 * it's metadata (for verification), and allows us to
														
 
															 	 * serialize delete_inode on multiple nodes.
														
@@ -962,7 +996,7 @@ void ocfs2_delete_inode(struct inode *inode)
 
															 		if (status != -ENOENT)
														
 
															 			mlog_errno(status);
														
 
															 		ocfs2_cleanup_delete_inode(inode, 0);
														
 
															-		goto bail_unblock;
														
 
															+		goto bail_unlock_nfs_sync;
														
 
															 	}
														
 
															 	/* Query the cluster. This will be the final decision made
														
@@ -1005,6 +1039,10 @@ void ocfs2_delete_inode(struct inode *inode)
 
															 bail_unlock_inode:
														
 
															 	ocfs2_inode_unlock(inode, 1);
														
 
															 	brelse(di_bh);
														
 
															+
														
 
															+bail_unlock_nfs_sync:
														
 
															+	ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
														
 
															+
														
 
															 bail_unblock:
														
 
															 	status = sigprocmask(SIG_SETMASK, &oldset, NULL);
														
 
															 	if (status < 0)
														
@@ -1205,7 +1243,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 
															 	spin_unlock(&OCFS2_I(inode)->ip_lock);
														
 
															 	fe->i_size = cpu_to_le64(i_size_read(inode));
														
 
															-	fe->i_links_count = cpu_to_le16(inode->i_nlink);
														
 
															+	ocfs2_set_links_count(fe, inode->i_nlink);
														
 
															 	fe->i_uid = cpu_to_le32(inode->i_uid);
														
 
															 	fe->i_gid = cpu_to_le32(inode->i_gid);
														
 
															 	fe->i_mode = cpu_to_le16(inode->i_mode);
														
@@ -1242,7 +1280,7 @@ void ocfs2_refresh_inode(struct inode *inode,
 
															 	OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
														
 
															 	ocfs2_set_inode_flags(inode);
														
 
															 	i_size_write(inode, le64_to_cpu(fe->i_size));
														
 
															-	inode->i_nlink = le16_to_cpu(fe->i_links_count);
														
 
															+	inode->i_nlink = ocfs2_read_links_count(fe);
														
 
															 	inode->i_uid = le32_to_cpu(fe->i_uid);
														
 
															 	inode->i_gid = le32_to_cpu(fe->i_gid);
														
 
															 	inode->i_mode = le16_to_cpu(fe->i_mode);
														
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -72,6 +72,10 @@ struct ocfs2_inode_info
 
															 	struct inode			vfs_inode;
														
 
															 	struct jbd2_inode		ip_jinode;
														
 
															+
														
 
															+	/* Only valid if the inode is the dir. */
														
 
															+	u32				ip_last_used_slot;
														
 
															+	u64				ip_last_used_group;
														
 
															 };
														
 
															 /*
														
@@ -124,6 +128,7 @@ void ocfs2_drop_inode(struct inode *inode);
 
															 /* Flags for ocfs2_iget() */
														
 
															 #define OCFS2_FI_FLAG_SYSFILE		0x1
														
 
															 #define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x2
														
 
															+struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
														
 
															 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
														
 
															 			 int sysfile_type);
														
 
															 int ocfs2_inode_init_private(struct inode *inode);
														
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
 
															 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
														
 
															 				 int slot);
														
 
															 static int ocfs2_commit_thread(void *arg);
														
 
															+static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
														
 
															+					    int slot_num,
														
 
															+					    struct ocfs2_dinode *la_dinode,
														
 
															+					    struct ocfs2_dinode *tl_dinode,
														
 
															+					    struct ocfs2_quota_recovery *qrec);
														
 
															 static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
														
 
															 {
														
@@ -76,18 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
 
															 	return __ocfs2_wait_on_mount(osb, 1);
														
 
															 }
														
 
															-
														
 
															-
														
 
															 /*
														
 
															- * The recovery_list is a simple linked list of node numbers to recover.
														
 
															- * It is protected by the recovery_lock.
														
 
															+ * This replay_map is to track online/offline slots, so we could recover
														
 
															+ * offline slots during recovery and mount
														
 
															  */
														
 
															-struct ocfs2_recovery_map {
														
 
															-	unsigned int rm_used;
														
 
															-	unsigned int *rm_entries;
														
 
															+enum ocfs2_replay_state {
														
 
															+	REPLAY_UNNEEDED = 0,	/* Replay is not needed, so ignore this map */
														
 
															+	REPLAY_NEEDED, 		/* Replay slots marked in rm_replay_slots */
														
 
															+	REPLAY_DONE 		/* Replay was already queued */
														
 
															 };
														
 
															+struct ocfs2_replay_map {
														
 
															+	unsigned int rm_slots;
														
 
															+	enum ocfs2_replay_state rm_state;
														
 
															+	unsigned char rm_replay_slots[0];
														
 
															+};
														
 
															+
														
 
															+void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
														
 
															+{
														
 
															+	if (!osb->replay_map)
														
 
															+		return;
														
 
															+
														
 
															+	/* If we've already queued the replay, we don't have any more to do */
														
 
															+	if (osb->replay_map->rm_state == REPLAY_DONE)
														
 
															+		return;
														
 
															+
														
 
															+	osb->replay_map->rm_state = state;
														
 
															+}
														
 
															+
														
 
															+int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
														
 
															+{
														
 
															+	struct ocfs2_replay_map *replay_map;
														
 
															+	int i, node_num;
														
 
															+
														
 
															+	/* If replay map is already set, we don't do it again */
														
 
															+	if (osb->replay_map)
														
 
															+		return 0;
														
 
															+
														
 
															+	replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
														
 
															+			     (osb->max_slots * sizeof(char)), GFP_KERNEL);
														
 
															+
														
 
															+	if (!replay_map) {
														
 
															+		mlog_errno(-ENOMEM);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock(&osb->osb_lock);
														
 
															+
														
 
															+	replay_map->rm_slots = osb->max_slots;
														
 
															+	replay_map->rm_state = REPLAY_UNNEEDED;
														
 
															+
														
 
															+	/* set rm_replay_slots for offline slot(s) */
														
 
															+	for (i = 0; i < replay_map->rm_slots; i++) {
														
 
															+		if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
														
 
															+			replay_map->rm_replay_slots[i] = 1;
														
 
															+	}
														
 
															+
														
 
															+	osb->replay_map = replay_map;
														
 
															+	spin_unlock(&osb->osb_lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
														
 
															+{
														
 
															+	struct ocfs2_replay_map *replay_map = osb->replay_map;
														
 
															+	int i;
														
 
															+
														
 
															+	if (!replay_map)
														
 
															+		return;
														
 
															+
														
 
															+	if (replay_map->rm_state != REPLAY_NEEDED)
														
 
															+		return;
														
 
															+
														
 
															+	for (i = 0; i < replay_map->rm_slots; i++)
														
 
															+		if (replay_map->rm_replay_slots[i])
														
 
															+			ocfs2_queue_recovery_completion(osb->journal, i, NULL,
														
 
															+							NULL, NULL);
														
 
															+	replay_map->rm_state = REPLAY_DONE;
														
 
															+}
														
 
															+
														
 
															+void ocfs2_free_replay_slots(struct ocfs2_super *osb)
														
 
															+{
														
 
															+	struct ocfs2_replay_map *replay_map = osb->replay_map;
														
 
															+
														
 
															+	if (!osb->replay_map)
														
 
															+		return;
														
 
															+
														
 
															+	kfree(replay_map);
														
 
															+	osb->replay_map = NULL;
														
 
															+}
														
 
															+
														
 
															 int ocfs2_recovery_init(struct ocfs2_super *osb)
														
 
															 {
														
 
															 	struct ocfs2_recovery_map *rm;
														
@@ -496,6 +580,22 @@ static struct ocfs2_triggers dq_triggers = {
 
															 	},
														
 
															 };
														
 
															+static struct ocfs2_triggers dr_triggers = {
														
 
															+	.ot_triggers = {
														
 
															+		.t_commit = ocfs2_commit_trigger,
														
 
															+		.t_abort = ocfs2_abort_trigger,
														
 
															+	},
														
 
															+	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
														
 
															+};
														
 
															+
														
 
															+static struct ocfs2_triggers dl_triggers = {
														
 
															+	.ot_triggers = {
														
 
															+		.t_commit = ocfs2_commit_trigger,
														
 
															+		.t_abort = ocfs2_abort_trigger,
														
 
															+	},
														
 
															+	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
														
 
															+};
														
 
															+
														
 
															 static int __ocfs2_journal_access(handle_t *handle,
														
 
															 				  struct inode *inode,
														
 
															 				  struct buffer_head *bh,
														
@@ -600,6 +700,20 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
 
															 				      type);
														
 
															 }
														
 
															+int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
														
 
															+			    struct buffer_head *bh, int type)
														
 
															+{
														
 
															+	return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
														
 
															+				      type);
														
 
															+}
														
 
															+
														
 
															+int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
														
 
															+			    struct buffer_head *bh, int type)
														
 
															+{
														
 
															+	return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
														
 
															+				      type);
														
 
															+}
														
 
															+
														
 
															 int ocfs2_journal_access(handle_t *handle, struct inode *inode,
														
 
															 			 struct buffer_head *bh, int type)
														
 
															 {
														
@@ -1176,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
 
															 }
														
 
															 /* Called by the mount code to queue recovery the last part of
														
 
															- * recovery for it's own slot. */
														
 
															+ * recovery for it's own and offline slot(s). */
														
 
															 void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
														
 
															 {
														
 
															 	struct ocfs2_journal *journal = osb->journal;
														
 
															-	if (osb->dirty) {
														
 
															-		/* No need to queue up our truncate_log as regular
														
 
															-		 * cleanup will catch that. */
														
 
															-		ocfs2_queue_recovery_completion(journal,
														
 
															-						osb->slot_num,
														
 
															-						osb->local_alloc_copy,
														
 
															-						NULL,
														
 
															-						NULL);
														
 
															-		ocfs2_schedule_truncate_log_flush(osb, 0);
														
 
															+	/* No need to queue up our truncate_log as regular cleanup will catch
														
 
															+	 * that */
														
 
															+	ocfs2_queue_recovery_completion(journal, osb->slot_num,
														
 
															+					osb->local_alloc_copy, NULL, NULL);
														
 
															+	ocfs2_schedule_truncate_log_flush(osb, 0);
														
 
															-		osb->local_alloc_copy = NULL;
														
 
															-		osb->dirty = 0;
														
 
															-	}
														
 
															+	osb->local_alloc_copy = NULL;
														
 
															+	osb->dirty = 0;
														
 
															+
														
 
															+	/* queue to recover orphan slots for all offline slots */
														
 
															+	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
														
 
															+	ocfs2_queue_replay_slots(osb);
														
 
															+	ocfs2_free_replay_slots(osb);
														
 
															 }
														
 
															 void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
														
@@ -1236,6 +1350,14 @@ restart:
 
															 		goto bail;
														
 
															 	}
														
 
															+	status = ocfs2_compute_replay_slots(osb);
														
 
															+	if (status < 0)
														
 
															+		mlog_errno(status);
														
 
															+
														
 
															+	/* queue recovery for our own slot */
														
 
															+	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
														
 
															+					NULL, NULL);
														
 
															+
														
 
															 	spin_lock(&osb->osb_lock);
														
 
															 	while (rm->rm_used) {
														
 
															 		/* It's always safe to remove entry zero, as we won't
														
@@ -1301,11 +1423,8 @@ skip_recovery:
 
															 	ocfs2_super_unlock(osb, 1);
														
 
															-	/* We always run recovery on our own orphan dir - the dead
														
 
															-	 * node(s) may have disallowd a previos inode delete. Re-processing
														
 
															-	 * is therefore required. */
														
 
															-	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
														
 
															-					NULL, NULL);
														
 
															+	/* queue recovery for offline slots */
														
 
															+	ocfs2_queue_replay_slots(osb);
														
 
															 bail:
														
 
															 	mutex_lock(&osb->recovery_lock);
														
@@ -1314,6 +1433,7 @@ bail:
 
															 		goto restart;
														
 
															 	}
														
 
															+	ocfs2_free_replay_slots(osb);
														
 
															 	osb->recovery_thread_task = NULL;
														
 
															 	mb(); /* sync with ocfs2_recovery_thread_running */
														
 
															 	wake_up(&osb->recovery_event);
														
@@ -1465,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 
															 		goto done;
														
 
															 	}
														
 
															+	/* we need to run complete recovery for offline orphan slots */
														
 
															+	ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
														
 
															+
														
 
															 	mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
														
 
															 	     node_num, slot_num,
														
 
															 	     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
														
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -38,6 +38,17 @@ enum ocfs2_journal_state {
 
															 struct ocfs2_super;
														
 
															 struct ocfs2_dinode;
														
 
															+/*
														
 
															+ * The recovery_list is a simple linked list of node numbers to recover.
														
 
															+ * It is protected by the recovery_lock.
														
 
															+ */
														
 
															+
														
 
															+struct ocfs2_recovery_map {
														
 
															+	unsigned int rm_used;
														
 
															+	unsigned int *rm_entries;
														
 
															+};
														
 
															+
														
 
															+
														
 
															 struct ocfs2_journal {
														
 
															 	enum ocfs2_journal_state   j_state;    /* Journals current state   */
														
@@ -139,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
 
															 int ocfs2_recovery_init(struct ocfs2_super *osb);
														
 
															 void ocfs2_recovery_exit(struct ocfs2_super *osb);
														
 
															+int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
														
 
															 /*
														
 
															  *  Journal Control:
														
 
															  *  Initialize, Load, Shutdown, Wipe a journal.
														
@@ -266,6 +278,12 @@ int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
 
															 /* dirblock */
														
 
															 int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
														
 
															 			    struct buffer_head *bh, int type);
														
 
															+/* ocfs2_dx_root_block */
														
 
															+int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
														
 
															+			    struct buffer_head *bh, int type);
														
 
															+/* ocfs2_dx_leaf */
														
 
															+int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
														
 
															+			    struct buffer_head *bh, int type);
														
 
															 /* Anything that has no ecc */
														
 
															 int ocfs2_journal_access(handle_t *handle, struct inode *inode,
														
 
															 			 struct buffer_head *bh, int type);
														
@@ -368,14 +386,29 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
 
															 }
														
 
															 /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
														
 
															- * bitmap block for the new bit) */
														
 
															-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
														
 
															+ * bitmap block for the new bit) dx_root update for free list */
														
 
															+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
														
 
															+
														
 
															+static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
														
 
															+{
														
 
															+	/* 1 block for index, 2 allocs (data, metadata), 1 clusters
														
 
															+	 * worth of blocks for initial extent. */
														
 
															+	return 1 + 2 * OCFS2_SUBALLOC_ALLOC +
														
 
															+		ocfs2_clusters_to_blocks(sb, 1);
														
 
															+}
														
 
															-/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
														
 
															- * group descriptor + mkdir/symlink blocks + quota update */
														
 
															-static inline int ocfs2_mknod_credits(struct super_block *sb)
														
 
															+/* parent fe, parent block, new file entry, index leaf, inode alloc fe, inode
														
 
															+ * alloc group descriptor + mkdir/symlink blocks + dir blocks + xattr
														
 
															+ * blocks + quota update */
														
 
															+static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
														
 
															+				      int xattr_credits)
														
 
															 {
														
 
															-	return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
														
 
															+	int dir_credits = OCFS2_DIR_LINK_ADDITIONAL_CREDITS;
														
 
															+
														
 
															+	if (is_dir)
														
 
															+		dir_credits += ocfs2_add_dir_index_credits(sb);
														
 
															+
														
 
															+	return 4 + OCFS2_SUBALLOC_ALLOC + dir_credits + xattr_credits +
														
 
															 	       ocfs2_quota_trans_credits(sb);
														
 
															 }
														
@@ -388,31 +421,31 @@ static inline int ocfs2_mknod_credits(struct super_block *sb)
 
															 #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
														
 
															 /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
														
 
															- * update on dir */
														
 
															+ * update on dir + index leaf + dx root update for free list */
														
 
															 static inline int ocfs2_link_credits(struct super_block *sb)
														
 
															 {
														
 
															-	return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
														
 
															+	return 2*OCFS2_INODE_UPDATE_CREDITS + 3 +
														
 
															 	       ocfs2_quota_trans_credits(sb);
														
 
															 }
														
 
															 /* inode + dir inode (if we unlink a dir), + dir entry block + orphan
														
 
															- * dir inode link */
														
 
															+ * dir inode link + dir inode index leaf + dir index root */
														
 
															 static inline int ocfs2_unlink_credits(struct super_block *sb)
														
 
															 {
														
 
															 	/* The quota update from ocfs2_link_credits is unused here... */
														
 
															-	return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
														
 
															+	return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb);
														
 
															 }
														
 
															 /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
														
 
															- * inode alloc group descriptor */
														
 
															-#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1)
														
 
															+ * inode alloc group descriptor + orphan dir index leaf */
														
 
															+#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3)
														
 
															 /* dinode update, old dir dinode update, new dir dinode update, old
														
 
															  * dir dir entry, new dir dir entry, dir entry update for renaming
														
 
															- * directory + target unlink */
														
 
															+ * directory + target unlink + 3 x dir index leaves */
														
 
															 static inline int ocfs2_rename_credits(struct super_block *sb)
														
 
															 {
														
 
															-	return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
														
 
															+	return 3 * OCFS2_INODE_UPDATE_CREDITS + 6 + ocfs2_unlink_credits(sb);
														
 
															 }
														
 
															 /* global bitmap dinode, group desc., relinked group,
														
@@ -422,6 +455,20 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
 
															 					  + OCFS2_INODE_UPDATE_CREDITS \
														
 
															 					  + OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
														
 
															+/* inode update, removal of dx root block from allocator */
														
 
															+#define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS +	\
														
 
															+				      OCFS2_SUBALLOC_FREE)
														
 
															+
														
 
															+static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
														
 
															+{
														
 
															+	int credits = 1 + OCFS2_SUBALLOC_ALLOC;
														
 
															+
														
 
															+	credits += ocfs2_clusters_to_blocks(sb, 1);
														
 
															+	credits += ocfs2_quota_trans_credits(sb);
														
 
															+
														
 
															+	return credits;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Please note that the caller must make sure that root_el is the root
														
 
															  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
														
@@ -457,7 +504,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
 
															 static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
														
 
															 {
														
 
															-	int blocks = ocfs2_mknod_credits(sb);
														
 
															+	int blocks = ocfs2_mknod_credits(sb, 0, 0);
														
 
															 	/* links can be longer than one block so we may update many
														
 
															 	 * within our single allocated extent. */
														
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -28,7 +28,6 @@
 
															 #include <linux/slab.h>
														
 
															 #include <linux/highmem.h>
														
 
															 #include <linux/bitops.h>
														
 
															-#include <linux/debugfs.h>
														
 
															 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
														
 
															 #include <cluster/masklog.h>
														
@@ -75,84 +74,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 
															 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
														
 
															 					  struct inode *local_alloc_inode);
														
 
															-#ifdef CONFIG_OCFS2_FS_STATS
														
 
															-
														
 
															-static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
														
 
															-{
														
 
															-	file->private_data = inode->i_private;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-#define LA_DEBUG_BUF_SZ	PAGE_CACHE_SIZE
														
 
															-#define LA_DEBUG_VER	1
														
 
															-static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
														
 
															-				   size_t count, loff_t *ppos)
														
 
															-{
														
 
															-	static DEFINE_MUTEX(la_debug_mutex);
														
 
															-	struct ocfs2_super *osb = file->private_data;
														
 
															-	int written, ret;
														
 
															-	char *buf = osb->local_alloc_debug_buf;
														
 
															-
														
 
															-	mutex_lock(&la_debug_mutex);
														
 
															-	memset(buf, 0, LA_DEBUG_BUF_SZ);
														
 
															-
														
 
															-	written = snprintf(buf, LA_DEBUG_BUF_SZ,
														
 
															-			   "0x%x\t0x%llx\t%u\t%u\t0x%x\n",
														
 
															-			   LA_DEBUG_VER,
														
 
															-			   (unsigned long long)osb->la_last_gd,
														
 
															-			   osb->local_alloc_default_bits,
														
 
															-			   osb->local_alloc_bits, osb->local_alloc_state);
														
 
															-
														
 
															-	ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
														
 
															-
														
 
															-	mutex_unlock(&la_debug_mutex);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static const struct file_operations ocfs2_la_debug_fops = {
														
 
															-	.open =		ocfs2_la_debug_open,
														
 
															-	.read =		ocfs2_la_debug_read,
														
 
															-};
														
 
															-
														
 
															-static void ocfs2_init_la_debug(struct ocfs2_super *osb)
														
 
															-{
														
 
															-	osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
														
 
															-	if (!osb->local_alloc_debug_buf)
														
 
															-		return;
														
 
															-
														
 
															-	osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
														
 
															-						     S_IFREG|S_IRUSR,
														
 
															-						     osb->osb_debug_root,
														
 
															-						     osb,
														
 
															-						     &ocfs2_la_debug_fops);
														
 
															-	if (!osb->local_alloc_debug) {
														
 
															-		kfree(osb->local_alloc_debug_buf);
														
 
															-		osb->local_alloc_debug_buf = NULL;
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
														
 
															-{
														
 
															-	if (osb->local_alloc_debug)
														
 
															-		debugfs_remove(osb->local_alloc_debug);
														
 
															-
														
 
															-	if (osb->local_alloc_debug_buf)
														
 
															-		kfree(osb->local_alloc_debug_buf);
														
 
															-
														
 
															-	osb->local_alloc_debug_buf = NULL;
														
 
															-	osb->local_alloc_debug = NULL;
														
 
															-}
														
 
															-#else	/* CONFIG_OCFS2_FS_STATS */
														
 
															-static void ocfs2_init_la_debug(struct ocfs2_super *osb)
														
 
															-{
														
 
															-	return;
														
 
															-}
														
 
															-static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
														
 
															-{
														
 
															-	return;
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
														
 
															 {
														
 
															 	return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
														
@@ -226,8 +147,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
															 	mlog_entry_void();
														
 
															-	ocfs2_init_la_debug(osb);
														
 
															-
														
 
															 	if (osb->local_alloc_bits == 0)
														
 
															 		goto bail;
														
@@ -299,9 +218,6 @@ bail:
 
															 	if (inode)
														
 
															 		iput(inode);
														
 
															-	if (status < 0)
														
 
															-		ocfs2_shutdown_la_debug(osb);
														
 
															-
														
 
															 	mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
														
 
															 	mlog_exit(status);
														
@@ -331,8 +247,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
															 	cancel_delayed_work(&osb->la_enable_wq);
														
 
															 	flush_workqueue(ocfs2_wq);
														
 
															-	ocfs2_shutdown_la_debug(osb);
														
 
															-
														
 
															 	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
														
 
															 		goto out;
														
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -80,14 +80,14 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
															 				    struct inode **ret_orphan_dir,
														
 
															 				    struct inode *inode,
														
 
															 				    char *name,
														
 
															-				    struct buffer_head **de_bh);
														
 
															+				    struct ocfs2_dir_lookup_result *lookup);
														
 
															 static int ocfs2_orphan_add(struct ocfs2_super *osb,
														
 
															 			    handle_t *handle,
														
 
															 			    struct inode *inode,
														
 
															 			    struct ocfs2_dinode *fe,
														
 
															 			    char *name,
														
 
															-			    struct buffer_head *de_bh,
														
 
															+			    struct ocfs2_dir_lookup_result *lookup,
														
 
															 			    struct inode *orphan_dir_inode);
														
 
															 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
														
@@ -228,17 +228,18 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	struct ocfs2_super *osb;
														
 
															 	struct ocfs2_dinode *dirfe;
														
 
															 	struct buffer_head *new_fe_bh = NULL;
														
 
															-	struct buffer_head *de_bh = NULL;
														
 
															 	struct inode *inode = NULL;
														
 
															 	struct ocfs2_alloc_context *inode_ac = NULL;
														
 
															 	struct ocfs2_alloc_context *data_ac = NULL;
														
 
															-	struct ocfs2_alloc_context *xattr_ac = NULL;
														
 
															+	struct ocfs2_alloc_context *meta_ac = NULL;
														
 
															 	int want_clusters = 0;
														
 
															+	int want_meta = 0;
														
 
															 	int xattr_credits = 0;
														
 
															 	struct ocfs2_security_xattr_info si = {
														
 
															 		.enable = 1,
														
 
															 	};
														
 
															 	int did_quota_inode = 0;
														
 
															+	struct ocfs2_dir_lookup_result lookup = { NULL, };
														
 
															 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
														
 
															 		   (unsigned long)dev, dentry->d_name.len,
														
@@ -254,13 +255,13 @@ static int ocfs2_mknod(struct inode *dir,
 
															 		return status;
														
 
															 	}
														
 
															-	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
														
 
															+	if (S_ISDIR(mode) && (dir->i_nlink >= ocfs2_link_max(osb))) {
														
 
															 		status = -EMLINK;
														
 
															 		goto leave;
														
 
															 	}
														
 
															 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
														
 
															-	if (!dirfe->i_links_count) {
														
 
															+	if (!ocfs2_read_links_count(dirfe)) {
														
 
															 		/* can't make a file in a deleted directory. */
														
 
															 		status = -ENOENT;
														
 
															 		goto leave;
														
@@ -274,7 +275,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	/* get a spot inside the dir. */
														
 
															 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
														
 
															 					      dentry->d_name.name,
														
 
															-					      dentry->d_name.len, &de_bh);
														
 
															+					      dentry->d_name.len, &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -308,17 +309,29 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	/* calculate meta data/clusters for setting security and acl xattr */
														
 
															 	status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
														
 
															-					&si, &want_clusters,
														
 
															-					&xattr_credits, &xattr_ac);
														
 
															+				       &si, &want_clusters,
														
 
															+				       &xattr_credits, &want_meta);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
 
															 	}
														
 
															 	/* Reserve a cluster if creating an extent based directory. */
														
 
															-	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
														
 
															+	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
														
 
															 		want_clusters += 1;
														
 
															+		/* Dir indexing requires extra space as well */
														
 
															+		if (ocfs2_supports_indexed_dirs(osb))
														
 
															+			want_meta++;
														
 
															+	}
														
 
															+
														
 
															+	status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
														
 
															+	if (status < 0) {
														
 
															+		if (status != -ENOSPC)
														
 
															+			mlog_errno(status);
														
 
															+		goto leave;
														
 
															+	}
														
 
															+
														
 
															 	status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
														
 
															 	if (status < 0) {
														
 
															 		if (status != -ENOSPC)
														
@@ -326,8 +339,9 @@ static int ocfs2_mknod(struct inode *dir,
 
															 		goto leave;
														
 
															 	}
														
 
															-	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
														
 
															-				   xattr_credits);
														
 
															+	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
														
 
															+							    S_ISDIR(mode),
														
 
															+							    xattr_credits));
														
 
															 	if (IS_ERR(handle)) {
														
 
															 		status = PTR_ERR(handle);
														
 
															 		handle = NULL;
														
@@ -355,7 +369,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	if (S_ISDIR(mode)) {
														
 
															 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
														
 
															-					    new_fe_bh, data_ac);
														
 
															+					    new_fe_bh, data_ac, meta_ac);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto leave;
														
@@ -367,7 +381,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 			mlog_errno(status);
														
 
															 			goto leave;
														
 
															 		}
														
 
															-		le16_add_cpu(&dirfe->i_links_count, 1);
														
 
															+		ocfs2_add_links_count(dirfe, 1);
														
 
															 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
@@ -377,7 +391,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	}
														
 
															 	status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
														
 
															-				xattr_ac, data_ac);
														
 
															+				meta_ac, data_ac);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -385,7 +399,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	if (si.enable) {
														
 
															 		status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
														
 
															-						 xattr_ac, data_ac);
														
 
															+						 meta_ac, data_ac);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto leave;
														
@@ -394,7 +408,7 @@ static int ocfs2_mknod(struct inode *dir,
 
															 	status = ocfs2_add_entry(handle, dentry, inode,
														
 
															 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
														
 
															-				 de_bh);
														
 
															+				 &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -423,11 +437,12 @@ leave:
 
															 		mlog(0, "Disk is full\n");
														
 
															 	brelse(new_fe_bh);
														
 
															-	brelse(de_bh);
														
 
															 	brelse(parent_fe_bh);
														
 
															 	kfree(si.name);
														
 
															 	kfree(si.value);
														
 
															+	ocfs2_free_dir_lookup_result(&lookup);
														
 
															+
														
 
															 	if ((status < 0) && inode) {
														
 
															 		clear_nlink(inode);
														
 
															 		iput(inode);
														
@@ -439,8 +454,8 @@ leave:
 
															 	if (data_ac)
														
 
															 		ocfs2_free_alloc_context(data_ac);
														
 
															-	if (xattr_ac)
														
 
															-		ocfs2_free_alloc_context(xattr_ac);
														
 
															+	if (meta_ac)
														
 
															+		ocfs2_free_alloc_context(meta_ac);
														
 
															 	mlog_exit(status);
														
@@ -462,6 +477,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
															 	struct ocfs2_extent_list *fel;
														
 
															 	u64 fe_blkno = 0;
														
 
															 	u16 suballoc_bit;
														
 
															+	u16 feat;
														
 
															 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
														
 
															 		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
														
@@ -469,8 +485,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
															 	*new_fe_bh = NULL;
														
 
															-	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
														
 
															-				       &fe_blkno);
														
 
															+	status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
														
 
															+				       inode_ac, &suballoc_bit, &fe_blkno);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -513,7 +529,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
															 	fe->i_mode = cpu_to_le16(inode->i_mode);
														
 
															 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
														
 
															 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
														
 
															-	fe->i_links_count = cpu_to_le16(inode->i_nlink);
														
 
															+
														
 
															+	ocfs2_set_links_count(fe, inode->i_nlink);
														
 
															 	fe->i_last_eb_blk = 0;
														
 
															 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
														
@@ -525,11 +542,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
															 	fe->i_dtime = 0;
														
 
															 	/*
														
 
															-	 * If supported, directories start with inline data.
														
 
															+	 * If supported, directories start with inline data. If inline
														
 
															+	 * isn't supported, but indexing is, we start them as indexed.
														
 
															 	 */
														
 
															+	feat = le16_to_cpu(fe->i_dyn_features);
														
 
															 	if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
														
 
															-		u16 feat = le16_to_cpu(fe->i_dyn_features);
														
 
															-
														
 
															 		fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
														
 
															 		fe->id2.i_data.id_count = cpu_to_le16(
														
@@ -608,9 +625,9 @@ static int ocfs2_link(struct dentry *old_dentry,
 
															 	int err;
														
 
															 	struct buffer_head *fe_bh = NULL;
														
 
															 	struct buffer_head *parent_fe_bh = NULL;
														
 
															-	struct buffer_head *de_bh = NULL;
														
 
															 	struct ocfs2_dinode *fe = NULL;
														
 
															 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
														
 
															+	struct ocfs2_dir_lookup_result lookup = { NULL, };
														
 
															 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
														
 
															 		   old_dentry->d_name.len, old_dentry->d_name.name,
														
@@ -638,7 +655,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
															 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
														
 
															 					   dentry->d_name.name,
														
 
															-					   dentry->d_name.len, &de_bh);
														
 
															+					   dentry->d_name.len, &lookup);
														
 
															 	if (err < 0) {
														
 
															 		mlog_errno(err);
														
 
															 		goto out;
														
@@ -652,7 +669,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
															 	}
														
 
															 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
														
 
															-	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
														
 
															+	if (ocfs2_read_links_count(fe) >= ocfs2_link_max(osb)) {
														
 
															 		err = -EMLINK;
														
 
															 		goto out_unlock_inode;
														
 
															 	}
														
@@ -674,13 +691,13 @@ static int ocfs2_link(struct dentry *old_dentry,
 
															 	inc_nlink(inode);
														
 
															 	inode->i_ctime = CURRENT_TIME;
														
 
															-	fe->i_links_count = cpu_to_le16(inode->i_nlink);
														
 
															+	ocfs2_set_links_count(fe, inode->i_nlink);
														
 
															 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
														
 
															 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
														
 
															 	err = ocfs2_journal_dirty(handle, fe_bh);
														
 
															 	if (err < 0) {
														
 
															-		le16_add_cpu(&fe->i_links_count, -1);
														
 
															+		ocfs2_add_links_count(fe, -1);
														
 
															 		drop_nlink(inode);
														
 
															 		mlog_errno(err);
														
 
															 		goto out_commit;
														
@@ -688,9 +705,9 @@ static int ocfs2_link(struct dentry *old_dentry,
 
															 	err = ocfs2_add_entry(handle, dentry, inode,
														
 
															 			      OCFS2_I(inode)->ip_blkno,
														
 
															-			      parent_fe_bh, de_bh);
														
 
															+			      parent_fe_bh, &lookup);
														
 
															 	if (err) {
														
 
															-		le16_add_cpu(&fe->i_links_count, -1);
														
 
															+		ocfs2_add_links_count(fe, -1);
														
 
															 		drop_nlink(inode);
														
 
															 		mlog_errno(err);
														
 
															 		goto out_commit;
														
@@ -714,10 +731,11 @@ out_unlock_inode:
 
															 out:
														
 
															 	ocfs2_inode_unlock(dir, 1);
														
 
															-	brelse(de_bh);
														
 
															 	brelse(fe_bh);
														
 
															 	brelse(parent_fe_bh);
														
 
															+	ocfs2_free_dir_lookup_result(&lookup);
														
 
															+
														
 
															 	mlog_exit(err);
														
 
															 	return err;
														
@@ -766,10 +784,9 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	struct buffer_head *fe_bh = NULL;
														
 
															 	struct buffer_head *parent_node_bh = NULL;
														
 
															 	handle_t *handle = NULL;
														
 
															-	struct ocfs2_dir_entry *dirent = NULL;
														
 
															-	struct buffer_head *dirent_bh = NULL;
														
 
															 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
														
 
															-	struct buffer_head *orphan_entry_bh = NULL;
														
 
															+	struct ocfs2_dir_lookup_result lookup = { NULL, };
														
 
															+	struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
														
 
															 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
														
 
															 		   dentry->d_name.len, dentry->d_name.name);
														
@@ -791,8 +808,8 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	}
														
 
															 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
														
 
															-					  dentry->d_name.len, &blkno,
														
 
															-					  dir, &dirent_bh, &dirent);
														
 
															+					  dentry->d_name.len, &blkno, dir,
														
 
															+					  &lookup);
														
 
															 	if (status < 0) {
														
 
															 		if (status != -ENOENT)
														
 
															 			mlog_errno(status);
														
@@ -817,10 +834,7 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	child_locked = 1;
														
 
															 	if (S_ISDIR(inode->i_mode)) {
														
 
															-	       	if (!ocfs2_empty_dir(inode)) {
														
 
															-			status = -ENOTEMPTY;
														
 
															-			goto leave;
														
 
															-		} else if (inode->i_nlink != 2) {
														
 
															+		if (inode->i_nlink != 2 || !ocfs2_empty_dir(inode)) {
														
 
															 			status = -ENOTEMPTY;
														
 
															 			goto leave;
														
 
															 		}
														
@@ -836,8 +850,7 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	if (inode_is_unlinkable(inode)) {
														
 
															 		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
														
 
															-						  orphan_name,
														
 
															-						  &orphan_entry_bh);
														
 
															+						  orphan_name, &orphan_insert);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto leave;
														
@@ -863,7 +876,7 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	if (inode_is_unlinkable(inode)) {
														
 
															 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
														
 
															-					  orphan_entry_bh, orphan_dir);
														
 
															+					  &orphan_insert, orphan_dir);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto leave;
														
@@ -871,7 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	}
														
 
															 	/* delete the name from the parent dir */
														
 
															-	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
														
 
															+	status = ocfs2_delete_entry(handle, dir, &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -880,7 +893,7 @@ static int ocfs2_unlink(struct inode *dir,
 
															 	if (S_ISDIR(inode->i_mode))
														
 
															 		drop_nlink(inode);
														
 
															 	drop_nlink(inode);
														
 
															-	fe->i_links_count = cpu_to_le16(inode->i_nlink);
														
 
															+	ocfs2_set_links_count(fe, inode->i_nlink);
														
 
															 	status = ocfs2_journal_dirty(handle, fe_bh);
														
 
															 	if (status < 0) {
														
@@ -916,9 +929,10 @@ leave:
 
															 	}
														
 
															 	brelse(fe_bh);
														
 
															-	brelse(dirent_bh);
														
 
															 	brelse(parent_node_bh);
														
 
															-	brelse(orphan_entry_bh);
														
 
															+
														
 
															+	ocfs2_free_dir_lookup_result(&orphan_insert);
														
 
															+	ocfs2_free_dir_lookup_result(&lookup);
														
 
															 	mlog_exit(status);
														
@@ -1004,8 +1018,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 			struct inode *new_dir,
														
 
															 			struct dentry *new_dentry)
														
 
															 {
														
 
															-	int status = 0, rename_lock = 0, parents_locked = 0;
														
 
															-	int old_child_locked = 0, new_child_locked = 0;
														
 
															+	int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0;
														
 
															+	int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0;
														
 
															 	struct inode *old_inode = old_dentry->d_inode;
														
 
															 	struct inode *new_inode = new_dentry->d_inode;
														
 
															 	struct inode *orphan_dir = NULL;
														
@@ -1020,13 +1034,13 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 	handle_t *handle = NULL;
														
 
															 	struct buffer_head *old_dir_bh = NULL;
														
 
															 	struct buffer_head *new_dir_bh = NULL;
														
 
															-	struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
														
 
															-		*new_de = NULL;
														
 
															-	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
														
 
															-	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
														
 
															-						    // this is the 1st dirent bh
														
 
															 	nlink_t old_dir_nlink = old_dir->i_nlink;
														
 
															 	struct ocfs2_dinode *old_di;
														
 
															+	struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
														
 
															+	struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
														
 
															+	struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
														
 
															+	struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
														
 
															+	struct ocfs2_dir_lookup_result target_insert = { NULL, };
														
 
															 	/* At some point it might be nice to break this function up a
														
 
															 	 * bit. */
														
@@ -1108,9 +1122,10 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 	if (S_ISDIR(old_inode->i_mode)) {
														
 
															 		u64 old_inode_parent;
														
 
															+		update_dot_dot = 1;
														
 
															 		status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
														
 
															-						  old_inode, &old_inode_de_bh,
														
 
															-						  &old_inode_dot_dot_de);
														
 
															+						  old_inode,
														
 
															+						  &old_inode_dot_dot_res);
														
 
															 		if (status) {
														
 
															 			status = -EIO;
														
 
															 			goto bail;
														
@@ -1122,7 +1137,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		}
														
 
															 		if (!new_inode && new_dir != old_dir &&
														
 
															-		    new_dir->i_nlink >= OCFS2_LINK_MAX) {
														
 
															+		    new_dir->i_nlink >= ocfs2_link_max(osb)) {
														
 
															 			status = -EMLINK;
														
 
															 			goto bail;
														
 
															 		}
														
@@ -1151,8 +1166,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 	 * to delete it */
														
 
															 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
														
 
															 					  new_dentry->d_name.len,
														
 
															-					  &newfe_blkno, new_dir, &new_de_bh,
														
 
															-					  &new_de);
														
 
															+					  &newfe_blkno, new_dir,
														
 
															+					  &target_lookup_res);
														
 
															 	/* The only error we allow here is -ENOENT because the new
														
 
															 	 * file not existing is perfectly valid. */
														
 
															 	if ((status < 0) && (status != -ENOENT)) {
														
@@ -1161,8 +1176,10 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		mlog_errno(status);
														
 
															 		goto bail;
														
 
															 	}
														
 
															+	if (status == 0)
														
 
															+		target_exists = 1;
														
 
															-	if (!new_de && new_inode) {
														
 
															+	if (!target_exists && new_inode) {
														
 
															 		/*
														
 
															 		 * Target was unlinked by another node while we were
														
 
															 		 * waiting to get to ocfs2_rename(). There isn't
														
@@ -1175,7 +1192,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 	/* In case we need to overwrite an existing file, we blow it
														
 
															 	 * away first */
														
 
															-	if (new_de) {
														
 
															+	if (target_exists) {
														
 
															 		/* VFS didn't think there existed an inode here, but
														
 
															 		 * someone else in the cluster must have raced our
														
 
															 		 * rename to create one. Today we error cleanly, in
														
@@ -1216,8 +1233,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
														
 
															-		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
														
 
															-		     "newfebh=%p bhblocknr=%llu\n", new_de,
														
 
															+		mlog(0, "aha rename over existing... new_blkno=%llu "
														
 
															+		     "newfebh=%p bhblocknr=%llu\n",
														
 
															 		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
														
 
															 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
														
@@ -1225,7 +1242,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
														
 
															 							  new_inode,
														
 
															 							  orphan_name,
														
 
															-							  &orphan_entry_bh);
														
 
															+							  &orphan_insert);
														
 
															 			if (status < 0) {
														
 
															 				mlog_errno(status);
														
 
															 				goto bail;
														
@@ -1243,7 +1260,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
														
 
															 						      new_dentry->d_name.name,
														
 
															 						      new_dentry->d_name.len,
														
 
															-						      &insert_entry_bh);
														
 
															+						      &target_insert);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto bail;
														
@@ -1258,10 +1275,10 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		goto bail;
														
 
															 	}
														
 
															-	if (new_de) {
														
 
															+	if (target_exists) {
														
 
															 		if (S_ISDIR(new_inode->i_mode)) {
														
 
															-			if (!ocfs2_empty_dir(new_inode) ||
														
 
															-			    new_inode->i_nlink != 2) {
														
 
															+			if (new_inode->i_nlink != 2 ||
														
 
															+			    !ocfs2_empty_dir(new_inode)) {
														
 
															 				status = -ENOTEMPTY;
														
 
															 				goto bail;
														
 
															 			}
														
@@ -1274,10 +1291,10 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		}
														
 
															 		if (S_ISDIR(new_inode->i_mode) ||
														
 
															-		    (newfe->i_links_count == cpu_to_le16(1))){
														
 
															+		    (ocfs2_read_links_count(newfe) == 1)) {
														
 
															 			status = ocfs2_orphan_add(osb, handle, new_inode,
														
 
															 						  newfe, orphan_name,
														
 
															-						  orphan_entry_bh, orphan_dir);
														
 
															+						  &orphan_insert, orphan_dir);
														
 
															 			if (status < 0) {
														
 
															 				mlog_errno(status);
														
 
															 				goto bail;
														
@@ -1285,8 +1302,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		}
														
 
															 		/* change the dirent to point to the correct inode */
														
 
															-		status = ocfs2_update_entry(new_dir, handle, new_de_bh,
														
 
															-					    new_de, old_inode);
														
 
															+		status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
														
 
															+					    old_inode);
														
 
															 		if (status < 0) {
														
 
															 			mlog_errno(status);
														
 
															 			goto bail;
														
@@ -1294,9 +1311,9 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		new_dir->i_version++;
														
 
															 		if (S_ISDIR(new_inode->i_mode))
														
 
															-			newfe->i_links_count = 0;
														
 
															+			ocfs2_set_links_count(newfe, 0);
														
 
															 		else
														
 
															-			le16_add_cpu(&newfe->i_links_count, -1);
														
 
															+			ocfs2_add_links_count(newfe, -1);
														
 
															 		status = ocfs2_journal_dirty(handle, newfe_bh);
														
 
															 		if (status < 0) {
														
@@ -1307,7 +1324,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		/* if the name was not found in new_dir, add it now */
														
 
															 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
														
 
															 					 OCFS2_I(old_inode)->ip_blkno,
														
 
															-					 new_dir_bh, insert_entry_bh);
														
 
															+					 new_dir_bh, &target_insert);
														
 
															 	}
														
 
															 	old_inode->i_ctime = CURRENT_TIME;
														
@@ -1334,15 +1351,13 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 	 * because the insert might have changed the type of directory
														
 
															 	 * we're dealing with.
														
 
															 	 */
														
 
															-	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
														
 
															-				     old_dentry->d_name.len,
														
 
															-				     old_dir, &old_de);
														
 
															-	if (!old_de_bh) {
														
 
															-		status = -EIO;
														
 
															+	status = ocfs2_find_entry(old_dentry->d_name.name,
														
 
															+				  old_dentry->d_name.len, old_dir,
														
 
															+				  &old_entry_lookup);
														
 
															+	if (status)
														
 
															 		goto bail;
														
 
															-	}
														
 
															-	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
														
 
															+	status = ocfs2_delete_entry(handle, old_dir, &old_entry_lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto bail;
														
@@ -1353,9 +1368,10 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		new_inode->i_ctime = CURRENT_TIME;
														
 
															 	}
														
 
															 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
														
 
															-	if (old_inode_de_bh) {
														
 
															-		status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
														
 
															-					    old_inode_dot_dot_de, new_dir);
														
 
															+
														
 
															+	if (update_dot_dot) {
														
 
															+		status = ocfs2_update_entry(old_inode, handle,
														
 
															+					    &old_inode_dot_dot_res, new_dir);
														
 
															 		old_dir->i_nlink--;
														
 
															 		if (new_inode) {
														
 
															 			new_inode->i_nlink--;
														
@@ -1391,14 +1407,13 @@ static int ocfs2_rename(struct inode *old_dir,
 
															 		} else {
														
 
															 			struct ocfs2_dinode *fe;
														
 
															 			status = ocfs2_journal_access_di(handle, old_dir,
														
 
															-							 old_dir_bh,
														
 
															-							 OCFS2_JOURNAL_ACCESS_WRITE);
														
 
															+						      old_dir_bh,
														
 
															+						      OCFS2_JOURNAL_ACCESS_WRITE);
														
 
															 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
														
 
															-			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
														
 
															+			ocfs2_set_links_count(fe, old_dir->i_nlink);
														
 
															 			status = ocfs2_journal_dirty(handle, old_dir_bh);
														
 
															 		}
														
 
															 	}
														
 
															-
														
 
															 	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
														
 
															 	status = 0;
														
 
															 bail:
														
@@ -1429,13 +1444,17 @@ bail:
 
															 	if (new_inode)
														
 
															 		iput(new_inode);
														
 
															+
														
 
															+	ocfs2_free_dir_lookup_result(&target_lookup_res);
														
 
															+	ocfs2_free_dir_lookup_result(&old_entry_lookup);
														
 
															+	ocfs2_free_dir_lookup_result(&old_inode_dot_dot_res);
														
 
															+	ocfs2_free_dir_lookup_result(&orphan_insert);
														
 
															+	ocfs2_free_dir_lookup_result(&target_insert);
														
 
															+
														
 
															 	brelse(newfe_bh);
														
 
															 	brelse(old_inode_bh);
														
 
															 	brelse(old_dir_bh);
														
 
															 	brelse(new_dir_bh);
														
 
															-	brelse(new_de_bh);
														
 
															-	brelse(old_de_bh);
														
 
															-	brelse(old_inode_de_bh);
														
 
															 	brelse(orphan_entry_bh);
														
 
															 	brelse(insert_entry_bh);
														
@@ -1558,7 +1577,6 @@ static int ocfs2_symlink(struct inode *dir,
 
															 	struct inode *inode = NULL;
														
 
															 	struct super_block *sb;
														
 
															 	struct buffer_head *new_fe_bh = NULL;
														
 
															-	struct buffer_head *de_bh = NULL;
														
 
															 	struct buffer_head *parent_fe_bh = NULL;
														
 
															 	struct ocfs2_dinode *fe = NULL;
														
 
															 	struct ocfs2_dinode *dirfe;
														
@@ -1572,6 +1590,7 @@ static int ocfs2_symlink(struct inode *dir,
 
															 		.enable = 1,
														
 
															 	};
														
 
															 	int did_quota = 0, did_quota_inode = 0;
														
 
															+	struct ocfs2_dir_lookup_result lookup = { NULL, };
														
 
															 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
														
 
															 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
														
@@ -1592,7 +1611,7 @@ static int ocfs2_symlink(struct inode *dir,
 
															 	}
														
 
															 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
														
 
															-	if (!dirfe->i_links_count) {
														
 
															+	if (!ocfs2_read_links_count(dirfe)) {
														
 
															 		/* can't make a file in a deleted directory. */
														
 
															 		status = -ENOENT;
														
 
															 		goto bail;
														
@@ -1605,7 +1624,7 @@ static int ocfs2_symlink(struct inode *dir,
 
															 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
														
 
															 					      dentry->d_name.name,
														
 
															-					      dentry->d_name.len, &de_bh);
														
 
															+					      dentry->d_name.len, &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto bail;
														
@@ -1744,7 +1763,7 @@ static int ocfs2_symlink(struct inode *dir,
 
															 	status = ocfs2_add_entry(handle, dentry, inode,
														
 
															 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
														
 
															-				 de_bh);
														
 
															+				 &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto bail;
														
@@ -1772,9 +1791,9 @@ bail:
 
															 	brelse(new_fe_bh);
														
 
															 	brelse(parent_fe_bh);
														
 
															-	brelse(de_bh);
														
 
															 	kfree(si.name);
														
 
															 	kfree(si.value);
														
 
															+	ocfs2_free_dir_lookup_result(&lookup);
														
 
															 	if (inode_ac)
														
 
															 		ocfs2_free_alloc_context(inode_ac);
														
 
															 	if (data_ac)
														
@@ -1826,7 +1845,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
															 				    struct inode **ret_orphan_dir,
														
 
															 				    struct inode *inode,
														
 
															 				    char *name,
														
 
															-				    struct buffer_head **de_bh)
														
 
															+				    struct ocfs2_dir_lookup_result *lookup)
														
 
															 {
														
 
															 	struct inode *orphan_dir_inode;
														
 
															 	struct buffer_head *orphan_dir_bh = NULL;
														
@@ -1857,7 +1876,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 
															 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
														
 
															 					      orphan_dir_bh, name,
														
 
															-					      OCFS2_ORPHAN_NAMELEN, de_bh);
														
 
															+					      OCFS2_ORPHAN_NAMELEN, lookup);
														
 
															 	if (status < 0) {
														
 
															 		ocfs2_inode_unlock(orphan_dir_inode, 1);
														
@@ -1884,7 +1903,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 
															 			    struct inode *inode,
														
 
															 			    struct ocfs2_dinode *fe,
														
 
															 			    char *name,
														
 
															-			    struct buffer_head *de_bh,
														
 
															+			    struct ocfs2_dir_lookup_result *lookup,
														
 
															 			    struct inode *orphan_dir_inode)
														
 
															 {
														
 
															 	struct buffer_head *orphan_dir_bh = NULL;
														
@@ -1910,8 +1929,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 
															 	 * underneath us... */
														
 
															 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
														
 
															 	if (S_ISDIR(inode->i_mode))
														
 
															-		le16_add_cpu(&orphan_fe->i_links_count, 1);
														
 
															-	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
														
 
															+		ocfs2_add_links_count(orphan_fe, 1);
														
 
															+	orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
														
 
															 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
														
 
															 	if (status < 0) {
														
@@ -1922,7 +1941,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 
															 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
														
 
															 				   OCFS2_ORPHAN_NAMELEN, inode,
														
 
															 				   OCFS2_I(inode)->ip_blkno,
														
 
															-				   orphan_dir_bh, de_bh);
														
 
															+				   orphan_dir_bh, lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -1955,8 +1974,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 
															 	char name[OCFS2_ORPHAN_NAMELEN + 1];
														
 
															 	struct ocfs2_dinode *orphan_fe;
														
 
															 	int status = 0;
														
 
															-	struct buffer_head *target_de_bh = NULL;
														
 
															-	struct ocfs2_dir_entry *target_de = NULL;
														
 
															+	struct ocfs2_dir_lookup_result lookup = { NULL, };
														
 
															 	mlog_entry_void();
														
@@ -1971,17 +1989,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 
															 	     OCFS2_ORPHAN_NAMELEN);
														
 
															 	/* find it's spot in the orphan directory */
														
 
															-	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
														
 
															-					orphan_dir_inode, &target_de);
														
 
															-	if (!target_de_bh) {
														
 
															-		status = -ENOENT;
														
 
															+	status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
														
 
															+				  &lookup);
														
 
															+	if (status) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
 
															 	}
														
 
															 	/* remove it from the orphan directory */
														
 
															-	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
														
 
															-				    target_de_bh);
														
 
															+	status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
														
 
															 	if (status < 0) {
														
 
															 		mlog_errno(status);
														
 
															 		goto leave;
														
@@ -1997,8 +2013,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 
															 	/* do the i_nlink dance! :) */
														
 
															 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
														
 
															 	if (S_ISDIR(inode->i_mode))
														
 
															-		le16_add_cpu(&orphan_fe->i_links_count, -1);
														
 
															-	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
														
 
															+		ocfs2_add_links_count(orphan_fe, -1);
														
 
															+	orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
														
 
															 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
														
 
															 	if (status < 0) {
														
@@ -2007,7 +2023,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 
															 	}
														
 
															 leave:
														
 
															-	brelse(target_de_bh);
														
 
															+	ocfs2_free_dir_lookup_result(&lookup);
														
 
															 	mlog_exit(status);
														
 
															 	return status;
														
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -209,6 +209,7 @@ enum ocfs2_mount_options
 
															 struct ocfs2_journal;
														
 
															 struct ocfs2_slot_info;
														
 
															 struct ocfs2_recovery_map;
														
 
															+struct ocfs2_replay_map;
														
 
															 struct ocfs2_quota_recovery;
														
 
															 struct ocfs2_dentry_lock;
														
 
															 struct ocfs2_super
														
@@ -264,6 +265,7 @@ struct ocfs2_super
 
															 	atomic_t vol_state;
														
 
															 	struct mutex recovery_lock;
														
 
															 	struct ocfs2_recovery_map *recovery_map;
														
 
															+	struct ocfs2_replay_map *replay_map;
														
 
															 	struct task_struct *recovery_thread_task;
														
 
															 	int disable_recovery;
														
 
															 	wait_queue_head_t checkpoint_event;
														
@@ -287,11 +289,6 @@ struct ocfs2_super
 
															 	u64 la_last_gd;
														
 
															-#ifdef CONFIG_OCFS2_FS_STATS
														
 
															-	struct dentry *local_alloc_debug;
														
 
															-	char *local_alloc_debug_buf;
														
 
															-#endif
														
 
															-
														
 
															 	/* Next three fields are for local node slot recovery during
														
 
															 	 * mount. */
														
 
															 	int dirty;
														
@@ -305,9 +302,11 @@ struct ocfs2_super
 
															 	struct ocfs2_cluster_connection *cconn;
														
 
															 	struct ocfs2_lock_res osb_super_lockres;
														
 
															 	struct ocfs2_lock_res osb_rename_lockres;
														
 
															+	struct ocfs2_lock_res osb_nfs_sync_lockres;
														
 
															 	struct ocfs2_dlm_debug *osb_dlm_debug;
														
 
															 	struct dentry *osb_debug_root;
														
 
															+	struct dentry *osb_ctxt;
														
 
															 	wait_queue_head_t recovery_event;
														
@@ -344,6 +343,12 @@ struct ocfs2_super
 
															 	/* used to protect metaecc calculation check of xattr. */
														
 
															 	spinlock_t osb_xattr_lock;
														
 
															+
														
 
															+	unsigned int			osb_dx_mask;
														
 
															+	u32				osb_dx_seed[4];
														
 
															+
														
 
															+	/* the group we used to allocate inodes. */
														
 
															+	u64				osb_inode_alloc_group;
														
 
															 };
														
 
															 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
														
@@ -402,6 +407,51 @@ static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
 
															 	return 0;
														
 
															 }
														
 
															+static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
														
 
															+{
														
 
															+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
														
 
															+{
														
 
															+	if (ocfs2_supports_indexed_dirs(osb))
														
 
															+		return OCFS2_DX_LINK_MAX;
														
 
															+	return OCFS2_LINK_MAX;
														
 
															+}
														
 
															+
														
 
															+static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
														
 
															+{
														
 
															+	u32 nlink = le16_to_cpu(di->i_links_count);
														
 
															+	u32 hi = le16_to_cpu(di->i_links_count_hi);
														
 
															+
														
 
															+	if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
														
 
															+		nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
														
 
															+
														
 
															+	return nlink;
														
 
															+}
														
 
															+
														
 
															+static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
														
 
															+{
														
 
															+	u16 lo, hi;
														
 
															+
														
 
															+	lo = nlink;
														
 
															+	hi = nlink >> OCFS2_LINKS_HI_SHIFT;
														
 
															+
														
 
															+	di->i_links_count = cpu_to_le16(lo);
														
 
															+	di->i_links_count_hi = cpu_to_le16(hi);
														
 
															+}
														
 
															+
														
 
															+static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
														
 
															+{
														
 
															+	u32 links = ocfs2_read_links_count(di);
														
 
															+
														
 
															+	links += n;
														
 
															+
														
 
															+	ocfs2_set_links_count(di, links);
														
 
															+}
														
 
															+
														
 
															 /* set / clear functions because cluster events can make these happen
														
 
															  * in parallel so we want the transitions to be atomic. this also
														
 
															  * means that any future flags osb_flags must be protected by spinlock
														
@@ -482,6 +532,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 
															 #define OCFS2_IS_VALID_DIR_TRAILER(ptr)					\
														
 
															 	(!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
														
 
															+#define OCFS2_IS_VALID_DX_ROOT(ptr)					\
														
 
															+	(!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
														
 
															+
														
 
															+#define OCFS2_IS_VALID_DX_LEAF(ptr)					\
														
 
															+	(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
														
 
															+
														
 
															 static inline unsigned long ino_from_blkno(struct super_block *sb,
														
 
															 					   u64 blkno)
														
 
															 {
														
@@ -532,6 +588,16 @@ static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
 
															 	return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
														
 
															 }
														
 
															+static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
														
 
															+					       u64 blocks)
														
 
															+{
														
 
															+	int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
														
 
															+	unsigned int clusters;
														
 
															+
														
 
															+	clusters = ocfs2_blocks_to_clusters(sb, blocks);
														
 
															+	return (u64)clusters << bits;
														
 
															+}
														
 
															+
														
 
															 static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
														
 
															 						u64 bytes)
														
 
															 {
														
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -66,6 +66,8 @@
 
															 #define OCFS2_GROUP_DESC_SIGNATURE      "GROUP01"
														
 
															 #define OCFS2_XATTR_BLOCK_SIGNATURE	"XATTR01"
														
 
															 #define OCFS2_DIR_TRAILER_SIGNATURE	"DIRTRL1"
														
 
															+#define OCFS2_DX_ROOT_SIGNATURE		"DXDIR01"
														
 
															+#define OCFS2_DX_LEAF_SIGNATURE		"DXLEAF1"
														
 
															 /* Compatibility flags */
														
 
															 #define OCFS2_HAS_COMPAT_FEATURE(sb,mask)			\
														
@@ -95,7 +97,8 @@
 
															 					 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
														
 
															 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
														
 
															 					 | OCFS2_FEATURE_INCOMPAT_XATTR \
														
 
															-					 | OCFS2_FEATURE_INCOMPAT_META_ECC)
														
 
															+					 | OCFS2_FEATURE_INCOMPAT_META_ECC \
														
 
															+					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
														
 
															 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
														
 
															 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
														
 
															 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
														
@@ -151,6 +154,9 @@
 
															 /* Support for extended attributes */
														
 
															 #define OCFS2_FEATURE_INCOMPAT_XATTR		0x0200
														
 
															+/* Support for indexed directores */
														
 
															+#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS	0x0400
														
 
															+
														
 
															 /* Metadata checksum and error correction */
														
 
															 #define OCFS2_FEATURE_INCOMPAT_META_ECC		0x0800
														
@@ -411,8 +417,12 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
 
															 #define OCFS2_DIR_REC_LEN(name_len)	(((name_len) + OCFS2_DIR_MEMBER_LEN + \
														
 
															                                           OCFS2_DIR_ROUND) & \
														
 
															 					 ~OCFS2_DIR_ROUND)
														
 
															+#define OCFS2_DIR_MIN_REC_LEN	OCFS2_DIR_REC_LEN(1)
														
 
															 #define OCFS2_LINK_MAX		32000
														
 
															+#define	OCFS2_DX_LINK_MAX	((1U << 31) - 1U)
														
 
															+#define	OCFS2_LINKS_HI_SHIFT	16
														
 
															+#define	OCFS2_DX_ENTRIES_MAX	(0xffffffffU)
														
 
															 #define S_SHIFT			12
														
 
															 static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
														
@@ -628,8 +638,9 @@ struct ocfs2_super_block {
 
															 /*B8*/	__le16 s_xattr_inline_size;	/* extended attribute inline size
														
 
															 					   for this fs*/
														
 
															 	__le16 s_reserved0;
														
 
															-	__le32 s_reserved1;
														
 
															-/*C0*/  __le64 s_reserved2[16];		/* Fill out superblock */
														
 
															+	__le32 s_dx_seed[3];		/* seed[0-2] for dx dir hash.
														
 
															+					 * s_uuid_hash serves as seed[3]. */
														
 
															+/*C0*/  __le64 s_reserved2[15];		/* Fill out superblock */
														
 
															 /*140*/
														
 
															 	/*
														
@@ -679,7 +690,7 @@ struct ocfs2_dinode {
 
															 					   belongs to */
														
 
															 	__le16 i_suballoc_bit;		/* Bit offset in suballocator
														
 
															 					   block group */
														
 
															-/*10*/	__le16 i_reserved0;
														
 
															+/*10*/	__le16 i_links_count_hi;	/* High 16 bits of links count */
														
 
															 	__le16 i_xattr_inline_size;
														
 
															 	__le32 i_clusters;		/* Cluster count */
														
 
															 	__le32 i_uid;			/* Owner UID */
														
@@ -705,7 +716,8 @@ struct ocfs2_dinode {
 
															 	__le16 i_dyn_features;
														
 
															 	__le64 i_xattr_loc;
														
 
															 /*80*/	struct ocfs2_block_check i_check;	/* Error checking */
														
 
															-/*88*/	__le64 i_reserved2[6];
														
 
															+/*88*/	__le64 i_dx_root;		/* Pointer to dir index root block */
														
 
															+	__le64 i_reserved2[5];
														
 
															 /*B8*/	union {
														
 
															 		__le64 i_pad1;		/* Generic way to refer to this
														
 
															 					   64bit union */
														
@@ -781,6 +793,90 @@ struct ocfs2_dir_block_trailer {
 
															 /*40*/
														
 
															 };
														
 
															+ /*
														
 
															+ * A directory entry in the indexed tree. We don't store the full name here,
														
 
															+ * but instead provide a pointer to the full dirent in the unindexed tree.
														
 
															+ *
														
 
															+ * We also store name_len here so as to reduce the number of leaf blocks we
														
 
															+ * need to search in case of collisions.
														
 
															+ */
														
 
															+struct ocfs2_dx_entry {
														
 
															+	__le32		dx_major_hash;	/* Used to find logical
														
 
															+					 * cluster in index */
														
 
															+	__le32		dx_minor_hash;	/* Lower bits used to find
														
 
															+					 * block in cluster */
														
 
															+	__le64		dx_dirent_blk;	/* Physical block in unindexed
														
 
															+					 * tree holding this dirent. */
														
 
															+};
														
 
															+
														
 
															+struct ocfs2_dx_entry_list {
														
 
															+	__le32		de_reserved;
														
 
															+	__le16		de_count;	/* Maximum number of entries
														
 
															+					 * possible in de_entries */
														
 
															+	__le16		de_num_used;	/* Current number of
														
 
															+					 * de_entries entries */
														
 
															+	struct	ocfs2_dx_entry		de_entries[0];	/* Indexed dir entries
														
 
															+							 * in a packed array of
														
 
															+							 * length de_num_used */
														
 
															+};
														
 
															+
														
 
															+#define OCFS2_DX_FLAG_INLINE	0x01
														
 
															+
														
 
															+/*
														
 
															+ * A directory indexing block. Each indexed directory has one of these,
														
 
															+ * pointed to by ocfs2_dinode.
														
 
															+ *
														
 
															+ * This block stores an indexed btree root, and a set of free space
														
 
															+ * start-of-list pointers.
														
 
															+ */
														
 
															+struct ocfs2_dx_root_block {
														
 
															+	__u8		dr_signature[8];	/* Signature for verification */
														
 
															+	struct ocfs2_block_check dr_check;	/* Error checking */
														
 
															+	__le16		dr_suballoc_slot;	/* Slot suballocator this
														
 
															+						 * block belongs to. */
														
 
															+	__le16		dr_suballoc_bit;	/* Bit offset in suballocator
														
 
															+						 * block group */
														
 
															+	__le32		dr_fs_generation;	/* Must match super block */
														
 
															+	__le64		dr_blkno;		/* Offset on disk, in blocks */
														
 
															+	__le64		dr_last_eb_blk;		/* Pointer to last
														
 
															+						 * extent block */
														
 
															+	__le32		dr_clusters;		/* Clusters allocated
														
 
															+						 * to the indexed tree. */
														
 
															+	__u8		dr_flags;		/* OCFS2_DX_FLAG_* flags */
														
 
															+	__u8		dr_reserved0;
														
 
															+	__le16		dr_reserved1;
														
 
															+	__le64		dr_dir_blkno;		/* Pointer to parent inode */
														
 
															+	__le32		dr_num_entries;		/* Total number of
														
 
															+						 * names stored in
														
 
															+						 * this directory.*/
														
 
															+	__le32		dr_reserved2;
														
 
															+	__le64		dr_free_blk;		/* Pointer to head of free
														
 
															+						 * unindexed block list. */
														
 
															+	__le64		dr_reserved3[15];
														
 
															+	union {
														
 
															+		struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
														
 
															+						   * bits for maximum space
														
 
															+						   * efficiency. */
														
 
															+		struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
														
 
															+							* entries. We grow out
														
 
															+							* to extents if this
														
 
															+							* gets too big. */
														
 
															+	};
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * The header of a leaf block in the indexed tree.
														
 
															+ */
														
 
															+struct ocfs2_dx_leaf {
														
 
															+	__u8		dl_signature[8];/* Signature for verification */
														
 
															+	struct ocfs2_block_check dl_check;	/* Error checking */
														
 
															+	__le64		dl_blkno;	/* Offset on disk, in blocks */
														
 
															+	__le32		dl_fs_generation;/* Must match super block */
														
 
															+	__le32		dl_reserved0;
														
 
															+	__le64		dl_reserved1;
														
 
															+	struct ocfs2_dx_entry_list	dl_list;
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * On disk allocator group structure for OCFS2
														
 
															  */
														
@@ -1112,6 +1208,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr(
 
															 	return size / sizeof(struct ocfs2_extent_rec);
														
 
															 }
														
 
															+static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb)
														
 
															+{
														
 
															+	int size;
														
 
															+
														
 
															+	size = sb->s_blocksize -
														
 
															+		offsetof(struct ocfs2_dx_root_block, dr_list.l_recs);
														
 
															+
														
 
															+	return size / sizeof(struct ocfs2_extent_rec);
														
 
															+}
														
 
															+
														
 
															 static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
														
 
															 {
														
 
															 	int size;
														
@@ -1132,6 +1238,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
 
															 	return size / sizeof(struct ocfs2_extent_rec);
														
 
															 }
														
 
															+static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
														
 
															+{
														
 
															+	int size;
														
 
															+
														
 
															+	size = sb->s_blocksize -
														
 
															+		offsetof(struct ocfs2_dx_leaf, dl_list.de_entries);
														
 
															+
														
 
															+	return size / sizeof(struct ocfs2_dx_entry);
														
 
															+}
														
 
															+
														
 
															+static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
														
 
															+{
														
 
															+	int size;
														
 
															+
														
 
															+	size = sb->s_blocksize -
														
 
															+		offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
														
 
															+
														
 
															+	return size / sizeof(struct ocfs2_dx_entry);
														
 
															+}
														
 
															+
														
 
															 static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
														
 
															 {
														
 
															 	u16 size;
														
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -47,6 +47,7 @@ enum ocfs2_lock_type {
 
															 	OCFS2_LOCK_TYPE_OPEN,
														
 
															 	OCFS2_LOCK_TYPE_FLOCK,
														
 
															 	OCFS2_LOCK_TYPE_QINFO,
														
 
															+	OCFS2_LOCK_TYPE_NFS_SYNC,
														
 
															 	OCFS2_NUM_LOCK_TYPES
														
 
															 };
														
@@ -81,6 +82,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 
															 		case OCFS2_LOCK_TYPE_QINFO:
														
 
															 			c = 'Q';
														
 
															 			break;
														
 
															+		case OCFS2_LOCK_TYPE_NFS_SYNC:
														
 
															+			c = 'Y';
														
 
															+			break;
														
 
															 		default:
														
 
															 			c = '\0';
														
 
															 	}
														
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -48,7 +48,8 @@
 
															 #include "buffer_head_io.h"
														
 
															 #define NOT_ALLOC_NEW_GROUP		0
														
 
															-#define ALLOC_NEW_GROUP			1
														
 
															+#define ALLOC_NEW_GROUP			0x1
														
 
															+#define ALLOC_GROUPS_FROM_GLOBAL	0x2
														
 
															 #define OCFS2_MAX_INODES_TO_STEAL	1024
														
@@ -64,7 +65,9 @@ static int ocfs2_block_group_fill(handle_t *handle,
 
															 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
														
 
															 				   struct inode *alloc_inode,
														
 
															 				   struct buffer_head *bh,
														
 
															-				   u64 max_block);
														
 
															+				   u64 max_block,
														
 
															+				   u64 *last_alloc_group,
														
 
															+				   int flags);
														
 
															 static int ocfs2_cluster_group_search(struct inode *inode,
														
 
															 				      struct buffer_head *group_bh,
														
@@ -116,6 +119,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 
															 						u16 *bg_bit_off);
														
 
															 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
														
 
															 					     u32 bits_wanted, u64 max_block,
														
 
															+					     int flags,
														
 
															 					     struct ocfs2_alloc_context **ac);
														
 
															 void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
														
@@ -403,7 +407,9 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
 
															 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
														
 
															 				   struct inode *alloc_inode,
														
 
															 				   struct buffer_head *bh,
														
 
															-				   u64 max_block)
														
 
															+				   u64 max_block,
														
 
															+				   u64 *last_alloc_group,
														
 
															+				   int flags)
														
 
															 {
														
 
															 	int status, credits;
														
 
															 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
														
@@ -423,7 +429,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
															 	cl = &fe->id2.i_chain;
														
 
															 	status = ocfs2_reserve_clusters_with_limit(osb,
														
 
															 						   le16_to_cpu(cl->cl_cpg),
														
 
															-						   max_block, &ac);
														
 
															+						   max_block, flags, &ac);
														
 
															 	if (status < 0) {
														
 
															 		if (status != -ENOSPC)
														
 
															 			mlog_errno(status);
														
@@ -440,6 +446,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
															 		goto bail;
														
 
															 	}
														
 
															+	if (last_alloc_group && *last_alloc_group != 0) {
														
 
															+		mlog(0, "use old allocation group %llu for block group alloc\n",
														
 
															+		     (unsigned long long)*last_alloc_group);
														
 
															+		ac->ac_last_group = *last_alloc_group;
														
 
															+	}
														
 
															 	status = ocfs2_claim_clusters(osb,
														
 
															 				      handle,
														
 
															 				      ac,
														
@@ -514,6 +525,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
															 	alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
														
 
															 	status = 0;
														
 
															+
														
 
															+	/* save the new last alloc group so that the caller can cache it. */
														
 
															+	if (last_alloc_group)
														
 
															+		*last_alloc_group = ac->ac_last_group;
														
 
															+
														
 
															 bail:
														
 
															 	if (handle)
														
 
															 		ocfs2_commit_trans(osb, handle);
														
@@ -531,7 +547,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
															 				       struct ocfs2_alloc_context *ac,
														
 
															 				       int type,
														
 
															 				       u32 slot,
														
 
															-				       int alloc_new_group)
														
 
															+				       u64 *last_alloc_group,
														
 
															+				       int flags)
														
 
															 {
														
 
															 	int status;
														
 
															 	u32 bits_wanted = ac->ac_bits_wanted;
														
@@ -587,7 +604,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
															 			goto bail;
														
 
															 		}
														
 
															-		if (alloc_new_group != ALLOC_NEW_GROUP) {
														
 
															+		if (!(flags & ALLOC_NEW_GROUP)) {
														
 
															 			mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
														
 
															 			     "and we don't alloc a new group for it.\n",
														
 
															 			     slot, bits_wanted, free_bits);
														
@@ -596,7 +613,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 
															 		}
														
 
															 		status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
														
 
															-						 ac->ac_max_block);
														
 
															+						 ac->ac_max_block,
														
 
															+						 last_alloc_group, flags);
														
 
															 		if (status < 0) {
														
 
															 			if (status != -ENOSPC)
														
 
															 				mlog_errno(status);
														
@@ -640,7 +658,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
 
															 	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
														
 
															 					     EXTENT_ALLOC_SYSTEM_INODE,
														
 
															-					     slot, ALLOC_NEW_GROUP);
														
 
															+					     slot, NULL, ALLOC_NEW_GROUP);
														
 
															 	if (status < 0) {
														
 
															 		if (status != -ENOSPC)
														
 
															 			mlog_errno(status);
														
@@ -686,7 +704,8 @@ static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
 
															 		status = ocfs2_reserve_suballoc_bits(osb, ac,
														
 
															 						     INODE_ALLOC_SYSTEM_INODE,
														
 
															-						     slot, NOT_ALLOC_NEW_GROUP);
														
 
															+						     slot, NULL,
														
 
															+						     NOT_ALLOC_NEW_GROUP);
														
 
															 		if (status >= 0) {
														
 
															 			ocfs2_set_inode_steal_slot(osb, slot);
														
 
															 			break;
														
@@ -703,6 +722,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 
															 {
														
 
															 	int status;
														
 
															 	s16 slot = ocfs2_get_inode_steal_slot(osb);
														
 
															+	u64 alloc_group;
														
 
															 	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
														
 
															 	if (!(*ac)) {
														
@@ -738,12 +758,22 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 
															 		goto inode_steal;
														
 
															 	atomic_set(&osb->s_num_inodes_stolen, 0);
														
 
															+	alloc_group = osb->osb_inode_alloc_group;
														
 
															 	status = ocfs2_reserve_suballoc_bits(osb, *ac,
														
 
															 					     INODE_ALLOC_SYSTEM_INODE,
														
 
															-					     osb->slot_num, ALLOC_NEW_GROUP);
														
 
															+					     osb->slot_num,
														
 
															+					     &alloc_group,
														
 
															+					     ALLOC_NEW_GROUP |
														
 
															+					     ALLOC_GROUPS_FROM_GLOBAL);
														
 
															 	if (status >= 0) {
														
 
															 		status = 0;
														
 
															+		spin_lock(&osb->osb_lock);
														
 
															+		osb->osb_inode_alloc_group = alloc_group;
														
 
															+		spin_unlock(&osb->osb_lock);
														
 
															+		mlog(0, "after reservation, new allocation group is "
														
 
															+		     "%llu\n", (unsigned long long)alloc_group);
														
 
															+
														
 
															 		/*
														
 
															 		 * Some inodes must be freed by us, so try to allocate
														
 
															 		 * from our own next time.
														
@@ -790,7 +820,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 
															 	status = ocfs2_reserve_suballoc_bits(osb, ac,
														
 
															 					     GLOBAL_BITMAP_SYSTEM_INODE,
														
 
															-					     OCFS2_INVALID_SLOT,
														
 
															+					     OCFS2_INVALID_SLOT, NULL,
														
 
															 					     ALLOC_NEW_GROUP);
														
 
															 	if (status < 0 && status != -ENOSPC) {
														
 
															 		mlog_errno(status);
														
@@ -806,6 +836,7 @@ bail:
 
															  * things a bit. */
														
 
															 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
														
 
															 					     u32 bits_wanted, u64 max_block,
														
 
															+					     int flags,
														
 
															 					     struct ocfs2_alloc_context **ac)
														
 
															 {
														
 
															 	int status;
														
@@ -823,7 +854,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
 
															 	(*ac)->ac_max_block = max_block;
														
 
															 	status = -ENOSPC;
														
 
															-	if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
														
 
															+	if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
														
 
															+	    ocfs2_alloc_should_use_local(osb, bits_wanted)) {
														
 
															 		status = ocfs2_reserve_local_alloc_bits(osb,
														
 
															 							bits_wanted,
														
 
															 							*ac);
														
@@ -861,7 +893,8 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
															 			   u32 bits_wanted,
														
 
															 			   struct ocfs2_alloc_context **ac)
														
 
															 {
														
 
															-	return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
														
 
															+	return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
														
 
															+						 ALLOC_NEW_GROUP, ac);
														
 
															 }
														
 
															 /*
														
@@ -1618,8 +1651,41 @@ bail:
 
															 	return status;
														
 
															 }
														
 
															+static void ocfs2_init_inode_ac_group(struct inode *dir,
														
 
															+				      struct buffer_head *parent_fe_bh,
														
 
															+				      struct ocfs2_alloc_context *ac)
														
 
															+{
														
 
															+	struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
														
 
															+	/*
														
 
															+	 * Try to allocate inodes from some specific group.
														
 
															+	 *
														
 
															+	 * If the parent dir has recorded the last group used in allocation,
														
 
															+	 * cool, use it. Otherwise if we try to allocate new inode from the
														
 
															+	 * same slot the parent dir belongs to, use the same chunk.
														
 
															+	 *
														
 
															+	 * We are very careful here to avoid the mistake of setting
														
 
															+	 * ac_last_group to a group descriptor from a different (unlocked) slot.
														
 
															+	 */
														
 
															+	if (OCFS2_I(dir)->ip_last_used_group &&
														
 
															+	    OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
														
 
															+		ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
														
 
															+	else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
														
 
															+		ac->ac_last_group = ocfs2_which_suballoc_group(
														
 
															+					le64_to_cpu(fe->i_blkno),
														
 
															+					le16_to_cpu(fe->i_suballoc_bit));
														
 
															+}
														
 
															+
														
 
															+static inline void ocfs2_save_inode_ac_group(struct inode *dir,
														
 
															+					     struct ocfs2_alloc_context *ac)
														
 
															+{
														
 
															+	OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
														
 
															+	OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
														
 
															+}
														
 
															+
														
 
															 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
														
 
															 			  handle_t *handle,
														
 
															+			  struct inode *dir,
														
 
															+			  struct buffer_head *parent_fe_bh,
														
 
															 			  struct ocfs2_alloc_context *ac,
														
 
															 			  u16 *suballoc_bit,
														
 
															 			  u64 *fe_blkno)
														
@@ -1635,6 +1701,8 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
 
															 	BUG_ON(ac->ac_bits_wanted != 1);
														
 
															 	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
														
 
															+	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
														
 
															+
														
 
															 	status = ocfs2_claim_suballoc_bits(osb,
														
 
															 					   ac,
														
 
															 					   handle,
														
@@ -1653,6 +1721,7 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
 
															 	*fe_blkno = bg_blkno + (u64) (*suballoc_bit);
														
 
															 	ac->ac_bits_given++;
														
 
															+	ocfs2_save_inode_ac_group(dir, ac);
														
 
															 	status = 0;
														
 
															 bail:
														
 
															 	mlog_exit(status);
														
@@ -2116,3 +2185,162 @@ out:
 
															 	return ret;
														
 
															 }
														
 
															+
														
 
															+/*
														
 
															+ * Read the inode specified by blkno to get suballoc_slot and
														
 
															+ * suballoc_bit.
														
 
															+ */
														
 
															+static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
														
 
															+				       u16 *suballoc_slot, u16 *suballoc_bit)
														
 
															+{
														
 
															+	int status;
														
 
															+	struct buffer_head *inode_bh = NULL;
														
 
															+	struct ocfs2_dinode *inode_fe;
														
 
															+
														
 
															+	mlog_entry("blkno: %llu\n", blkno);
														
 
															+
														
 
															+	/* dirty read disk */
														
 
															+	status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
														
 
															+	if (status < 0) {
														
 
															+		mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
														
 
															+	if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
														
 
															+		mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
														
 
															+		status = -EINVAL;
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
														
 
															+	    (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
														
 
															+		mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
														
 
															+		     blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
														
 
															+		status = -EINVAL;
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	if (suballoc_slot)
														
 
															+		*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
														
 
															+	if (suballoc_bit)
														
 
															+		*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
														
 
															+
														
 
															+bail:
														
 
															+	brelse(inode_bh);
														
 
															+
														
 
															+	mlog_exit(status);
														
 
															+	return status;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * test whether bit is SET in allocator bitmap or not.  on success, 0
														
 
															+ * is returned and *res is 1 for SET; 0 otherwise.  when fails, errno
														
 
															+ * is returned and *res is meaningless.  Call this after you have
														
 
															+ * cluster locked against suballoc, or you may get a result based on
														
 
															+ * non-up2date contents
														
 
															+ */
														
 
															+static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
														
 
															+				   struct inode *suballoc,
														
 
															+				   struct buffer_head *alloc_bh, u64 blkno,
														
 
															+				   u16 bit, int *res)
														
 
															+{
														
 
															+	struct ocfs2_dinode *alloc_fe;
														
 
															+	struct ocfs2_group_desc *group;
														
 
															+	struct buffer_head *group_bh = NULL;
														
 
															+	u64 bg_blkno;
														
 
															+	int status;
														
 
															+
														
 
															+	mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
														
 
															+
														
 
															+	alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
														
 
															+	if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
														
 
															+		mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
														
 
															+		     (unsigned int)bit,
														
 
															+		     ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
														
 
															+		status = -EINVAL;
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
														
 
															+	status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
														
 
															+					     &group_bh);
														
 
															+	if (status < 0) {
														
 
															+		mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	group = (struct ocfs2_group_desc *) group_bh->b_data;
														
 
															+	*res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
														
 
															+
														
 
															+bail:
														
 
															+	brelse(group_bh);
														
 
															+
														
 
															+	mlog_exit(status);
														
 
															+	return status;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Test if the bit representing this inode (blkno) is set in the
														
 
															+ * suballocator.
														
 
															+ *
														
 
															+ * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
														
 
															+ *
														
 
															+ * In the event of failure, a negative value is returned and *res is
														
 
															+ * meaningless.
														
 
															+ *
														
 
															+ * Callers must make sure to hold nfs_sync_lock to prevent
														
 
															+ * ocfs2_delete_inode() on another node from accessing the same
														
 
															+ * suballocator concurrently.
														
 
															+ */
														
 
															+int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
														
 
															+{
														
 
															+	int status;
														
 
															+	u16 suballoc_bit = 0, suballoc_slot = 0;
														
 
															+	struct inode *inode_alloc_inode;
														
 
															+	struct buffer_head *alloc_bh = NULL;
														
 
															+
														
 
															+	mlog_entry("blkno: %llu", blkno);
														
 
															+
														
 
															+	status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
														
 
															+					     &suballoc_bit);
														
 
															+	if (status < 0) {
														
 
															+		mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	inode_alloc_inode =
														
 
															+		ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
														
 
															+					    suballoc_slot);
														
 
															+	if (!inode_alloc_inode) {
														
 
															+		/* the error code could be inaccurate, but we are not able to
														
 
															+		 * get the correct one. */
														
 
															+		status = -EINVAL;
														
 
															+		mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
														
 
															+		     (u32)suballoc_slot);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	mutex_lock(&inode_alloc_inode->i_mutex);
														
 
															+	status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
														
 
															+	if (status < 0) {
														
 
															+		mutex_unlock(&inode_alloc_inode->i_mutex);
														
 
															+		mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
														
 
															+		     (u32)suballoc_slot, status);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															+	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
														
 
															+					 blkno, suballoc_bit, res);
														
 
															+	if (status < 0)
														
 
															+		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
														
 
															+
														
 
															+	ocfs2_inode_unlock(inode_alloc_inode, 0);
														
 
															+	mutex_unlock(&inode_alloc_inode->i_mutex);
														
 
															+
														
 
															+	iput(inode_alloc_inode);
														
 
															+	brelse(alloc_bh);
														
 
															+bail:
														
 
															+	mlog_exit(status);
														
 
															+	return status;
														
 
															+}
														
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
 
															 			 u64 *blkno_start);
														
 
															 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
														
 
															 			  handle_t *handle,
														
 
															+			  struct inode *dir,
														
 
															+			  struct buffer_head *parent_fe_bh,
														
 
															 			  struct ocfs2_alloc_context *ac,
														
 
															 			  u16 *suballoc_bit,
														
 
															 			  u64 *fe_blkno);
														
@@ -186,4 +188,6 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
 
															 			  u32 clusters_to_add, u32 extents_to_split,
														
 
															 			  struct ocfs2_alloc_context **data_ac,
														
 
															 			  struct ocfs2_alloc_context **meta_ac);
														
 
															+
														
 
															+int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
														
 
															 #endif /* _CHAINALLOC_H_ */
														
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -201,6 +201,170 @@ static const match_table_t tokens = {
 
															 	{Opt_err, NULL}
														
 
															 };
														
 
															+#ifdef CONFIG_DEBUG_FS
														
 
															+static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
														
 
															+{
														
 
															+	int out = 0;
														
 
															+	int i;
														
 
															+	struct ocfs2_cluster_connection *cconn = osb->cconn;
														
 
															+	struct ocfs2_recovery_map *rm = osb->recovery_map;
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
														
 
															+			"Device", osb->dev_str, osb->uuid_str,
														
 
															+			osb->fs_generation, osb->vol_label);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => State: %d  Flags: 0x%lX\n", "Volume",
														
 
															+			atomic_read(&osb->vol_state), osb->osb_flags);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Block: %lu  Cluster: %d\n", "Sizes",
														
 
															+			osb->sb->s_blocksize, osb->s_clustersize);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Compat: 0x%X  Incompat: 0x%X  "
														
 
															+			"ROcompat: 0x%X\n",
														
 
															+			"Features", osb->s_feature_compat,
														
 
															+			osb->s_feature_incompat, osb->s_feature_ro_compat);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
														
 
															+			osb->s_mount_opt, osb->s_atime_quantum);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Stack: %s  Name: %*s  Version: %d.%d\n",
														
 
															+			"Cluster",
														
 
															+			(*osb->osb_cluster_stack == '\0' ?
														
 
															+			 "o2cb" : osb->osb_cluster_stack),
														
 
															+			cconn->cc_namelen, cconn->cc_name,
														
 
															+			cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
														
 
															+
														
 
															+	spin_lock(&osb->dc_task_lock);
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
														
 
															+			"WorkSeq: %lu\n", "DownCnvt",
														
 
															+			task_pid_nr(osb->dc_task), osb->blocked_lock_count,
														
 
															+			osb->dc_wake_sequence, osb->dc_work_sequence);
														
 
															+	spin_unlock(&osb->dc_task_lock);
														
 
															+
														
 
															+	spin_lock(&osb->osb_lock);
														
 
															+	out += snprintf(buf + out, len - out, "%10s => Pid: %d  Nodes:",
														
 
															+			"Recovery",
														
 
															+			(osb->recovery_thread_task ?
														
 
															+			 task_pid_nr(osb->recovery_thread_task) : -1));
														
 
															+	if (rm->rm_used == 0)
														
 
															+		out += snprintf(buf + out, len - out, " None\n");
														
 
															+	else {
														
 
															+		for (i = 0; i < rm->rm_used; i++)
														
 
															+			out += snprintf(buf + out, len - out, " %d",
														
 
															+					rm->rm_entries[i]);
														
 
															+		out += snprintf(buf + out, len - out, "\n");
														
 
															+	}
														
 
															+	spin_unlock(&osb->osb_lock);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Pid: %d  Interval: %lu  Needs: %d\n", "Commit",
														
 
															+			task_pid_nr(osb->commit_task), osb->osb_commit_interval,
														
 
															+			atomic_read(&osb->needs_checkpoint));
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => State: %d  NumTxns: %d  TxnId: %lu\n",
														
 
															+			"Journal", osb->journal->j_state,
														
 
															+			atomic_read(&osb->journal->j_num_trans),
														
 
															+			osb->journal->j_trans_id);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => GlobalAllocs: %d  LocalAllocs: %d  "
														
 
															+			"SubAllocs: %d  LAWinMoves: %d  SAExtends: %d\n",
														
 
															+			"Stats",
														
 
															+			atomic_read(&osb->alloc_stats.bitmap_data),
														
 
															+			atomic_read(&osb->alloc_stats.local_data),
														
 
															+			atomic_read(&osb->alloc_stats.bg_allocs),
														
 
															+			atomic_read(&osb->alloc_stats.moves),
														
 
															+			atomic_read(&osb->alloc_stats.bg_extends));
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => State: %u  Descriptor: %llu  Size: %u bits  "
														
 
															+			"Default: %u bits\n",
														
 
															+			"LocalAlloc", osb->local_alloc_state,
														
 
															+			(unsigned long long)osb->la_last_gd,
														
 
															+			osb->local_alloc_bits, osb->local_alloc_default_bits);
														
 
															+
														
 
															+	spin_lock(&osb->osb_lock);
														
 
															+	out += snprintf(buf + out, len - out,
														
 
															+			"%10s => Slot: %d  NumStolen: %d\n", "Steal",
														
 
															+			osb->s_inode_steal_slot,
														
 
															+			atomic_read(&osb->s_num_inodes_stolen));
														
 
															+	spin_unlock(&osb->osb_lock);
														
 
															+
														
 
															+	out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
														
 
															+			"Slots", "Num", "RecoGen");
														
 
															+
														
 
															+	for (i = 0; i < osb->max_slots; ++i) {
														
 
															+		out += snprintf(buf + out, len - out,
														
 
															+				"%10s  %c %3d  %10d\n",
														
 
															+				" ",
														
 
															+				(i == osb->slot_num ? '*' : ' '),
														
 
															+				i, osb->slot_recovery_generations[i]);
														
 
															+	}
														
 
															+
														
 
															+	return out;
														
 
															+}
														
 
															+
														
 
															+static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	struct ocfs2_super *osb = inode->i_private;
														
 
															+	char *buf = NULL;
														
 
															+
														
 
															+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
														
 
															+	if (!buf)
														
 
															+		goto bail;
														
 
															+
														
 
															+	i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE));
														
 
															+
														
 
															+	file->private_data = buf;
														
 
															+
														
 
															+	return 0;
														
 
															+bail:
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+static int ocfs2_debug_release(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	kfree(file->private_data);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
														
 
															+				size_t nbytes, loff_t *ppos)
														
 
															+{
														
 
															+	return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
														
 
															+				       i_size_read(file->f_mapping->host));
														
 
															+}
														
 
															+#else
														
 
															+static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+static int ocfs2_debug_release(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
														
 
															+				size_t nbytes, loff_t *ppos)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif	/* CONFIG_DEBUG_FS */
														
 
															+
														
 
															+static struct file_operations ocfs2_osb_debug_fops = {
														
 
															+	.open =		ocfs2_osb_debug_open,
														
 
															+	.release =	ocfs2_debug_release,
														
 
															+	.read =		ocfs2_debug_read,
														
 
															+	.llseek =	generic_file_llseek,
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * write_super and sync_fs ripped right out of ext3.
														
 
															  */
														
@@ -926,6 +1090,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
															 		goto read_super_error;
														
 
															 	}
														
 
															+	osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
														
 
															+					    osb->osb_debug_root,
														
 
															+					    osb,
														
 
															+					    &ocfs2_osb_debug_fops);
														
 
															+	if (!osb->osb_ctxt) {
														
 
															+		status = -EINVAL;
														
 
															+		mlog_errno(status);
														
 
															+		goto read_super_error;
														
 
															+	}
														
 
															+
														
 
															 	status = ocfs2_mount_volume(sb);
														
 
															 	if (osb->root_inode)
														
 
															 		inode = igrab(osb->root_inode);
														
@@ -1620,6 +1794,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
															 	osb = OCFS2_SB(sb);
														
 
															 	BUG_ON(!osb);
														
 
															+	debugfs_remove(osb->osb_ctxt);
														
 
															+
														
 
															 	ocfs2_disable_quotas(osb);
														
 
															 	ocfs2_shutdown_local_alloc(osb);
														
@@ -1742,6 +1918,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
															 	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
														
 
															 	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
														
 
															+	osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
														
 
															+
														
 
															+	for (i = 0; i < 3; i++)
														
 
															+		osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]);
														
 
															+	osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
														
 
															+
														
 
															 	osb->sb = sb;
														
 
															 	/* Save off for ocfs2_rw_direct */
														
 
															 	osb->s_sectsize_bits = blksize_bits(sector_size);
														
@@ -2130,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
 
															 	 * lock, and it's marked as dirty, set the bit in the recover
														
 
															 	 * map and launch a recovery thread for it. */
														
 
															 	status = ocfs2_mark_dead_nodes(osb);
														
 
															+	if (status < 0) {
														
 
															+		mlog_errno(status);
														
 
															+		goto finally;
														
 
															+	}
														
 
															+
														
 
															+	status = ocfs2_compute_replay_slots(osb);
														
 
															 	if (status < 0)
														
 
															 		mlog_errno(status);
														
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -512,7 +512,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 
															 			  struct ocfs2_security_xattr_info *si,
														
 
															 			  int *want_clusters,
														
 
															 			  int *xattr_credits,
														
 
															-			  struct ocfs2_alloc_context **xattr_ac)
														
 
															+			  int *want_meta)
														
 
															 {
														
 
															 	int ret = 0;
														
 
															 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
														
@@ -554,11 +554,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
 
															 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
														
 
															 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
														
 
															 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
														
 
															-		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
														
 
															-		if (ret) {
														
 
															-			mlog_errno(ret);
														
 
															-			return ret;
														
 
															-		}
														
 
															+		*want_meta = *want_meta + 1;
														
 
															 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
														
 
															 	}
														
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
 
															 			     int *, int *, struct ocfs2_alloc_context **);
														
 
															 int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
														
 
															 			  int, struct ocfs2_security_xattr_info *,
														
 
															-			  int *, int *, struct ocfs2_alloc_context **);
														
 
															+			  int *, int *, int *);
														
 
															 /*
														
 
															  * xattrs can live inside an inode, as part of an external xattr block,