12 years ago · d7e09d0397
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -140,4 +140,6 @@ source "drivers/staging/netlogic/Kconfig"
 
				 
			
 
				 source "drivers/staging/dwc2/Kconfig"
			
 
				 
			
 
				+source "drivers/staging/lustre/Kconfig"
			
 
				+
			
 
				 endif # STAGING
			
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -62,3 +62,4 @@ obj-$(CONFIG_FIREWIRE_SERIAL)	+= fwserial/
 
				 obj-$(CONFIG_ZCACHE)		+= zcache/
			
 
				 obj-$(CONFIG_GOLDFISH)		+= goldfish/
			
 
				 obj-$(CONFIG_USB_DWC2)		+= dwc2/
			
 
				+obj-$(CONFIG_LUSTRE_FS)		+= lustre/
			
--- a/drivers/staging/lustre/Kconfig
+++ b/drivers/staging/lustre/Kconfig
@@ -0,0 +1,3 @@
 
				+source "drivers/staging/lustre/lustre/Kconfig"
			
 
				+
			
 
				+source "drivers/staging/lustre/lnet/Kconfig"
			
--- a/drivers/staging/lustre/Makefile
+++ b/drivers/staging/lustre/Makefile
@@ -0,0 +1,4 @@
 
				+subdir-ccflags-y := -I$(src)/include/
			
 
				+
			
 
				+obj-$(CONFIG_LUSTRE_FS)		+= lustre/
			
 
				+obj-$(CONFIG_LNET)		+= lnet/
			
--- a/drivers/staging/lustre/TODO
+++ b/drivers/staging/lustre/TODO
@@ -0,0 +1,13 @@
 
				+* Possible remaining coding style fix.
			
 
				+* Remove deadcode.
			
 
				+* Seperate client/server functionality. Functions only used by server can be
			
 
				+  removed from client.
			
 
				+* Clean up libcfs layer. Ideally we can remove include/linux/libcfs entirely.
			
 
				+* Clean up CLIO layer. Lustre client readahead/writeback control needs to better
			
 
				+  suit kernel providings.
			
 
				+* Add documents in Documentation.
			
 
				+* Other minor misc cleanups...
			
 
				+
			
 
				+Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
			
 
				+<andreas.dilger@intel.com> and Peng Tao <tao.peng@emc.com>. CCing
			
 
				+hpdd-discuss <hpdd-discuss@lists.01.org> would be great too.
			
--- a/drivers/staging/lustre/include/linux/libcfs/bitmap.h
+++ b/drivers/staging/lustre/include/linux/libcfs/bitmap.h
@@ -0,0 +1,111 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+#ifndef _LIBCFS_BITMAP_H_
			
 
				+#define _LIBCFS_BITMAP_H_
			
 
				+
			
 
				+
			
 
				+typedef struct {
			
 
				+	int	     size;
			
 
				+	unsigned long   data[0];
			
 
				+} cfs_bitmap_t;
			
 
				+
			
 
				+#define CFS_BITMAP_SIZE(nbits) \
			
 
				+     (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(cfs_bitmap_t))
			
 
				+
			
 
				+static inline
			
 
				+cfs_bitmap_t *CFS_ALLOCATE_BITMAP(int size)
			
 
				+{
			
 
				+	cfs_bitmap_t *ptr;
			
 
				+
			
 
				+	OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size));
			
 
				+	if (ptr == NULL)
			
 
				+		RETURN(ptr);
			
 
				+
			
 
				+	ptr->size = size;
			
 
				+
			
 
				+	RETURN (ptr);
			
 
				+}
			
 
				+
			
 
				+#define CFS_FREE_BITMAP(ptr)	OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size))
			
 
				+
			
 
				+static inline
			
 
				+void cfs_bitmap_set(cfs_bitmap_t *bitmap, int nbit)
			
 
				+{
			
 
				+	set_bit(nbit, bitmap->data);
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+void cfs_bitmap_clear(cfs_bitmap_t *bitmap, int nbit)
			
 
				+{
			
 
				+	test_and_clear_bit(nbit, bitmap->data);
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+int cfs_bitmap_check(cfs_bitmap_t *bitmap, int nbit)
			
 
				+{
			
 
				+	return test_bit(nbit, bitmap->data);
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+int cfs_bitmap_test_and_clear(cfs_bitmap_t *bitmap, int nbit)
			
 
				+{
			
 
				+	return test_and_clear_bit(nbit, bitmap->data);
			
 
				+}
			
 
				+
			
 
				+/* return 0 is bitmap has none set bits */
			
 
				+static inline
			
 
				+int cfs_bitmap_check_empty(cfs_bitmap_t *bitmap)
			
 
				+{
			
 
				+	return find_first_bit(bitmap->data, bitmap->size) == bitmap->size;
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+void cfs_bitmap_copy(cfs_bitmap_t *new, cfs_bitmap_t *old)
			
 
				+{
			
 
				+	int newsize;
			
 
				+
			
 
				+	LASSERT(new->size >= old->size);
			
 
				+	newsize = new->size;
			
 
				+	memcpy(new, old, CFS_BITMAP_SIZE(old->size));
			
 
				+	new->size = newsize;
			
 
				+}
			
 
				+
			
 
				+#define cfs_foreach_bit(bitmap, pos)					\
			
 
				+	for ((pos) = find_first_bit((bitmap)->data, bitmap->size);	\
			
 
				+	     (pos) < (bitmap)->size;					\
			
 
				+	     (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos) + 1))
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/curproc.h
+++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h
@@ -0,0 +1,110 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/curproc.h
			
 
				+ *
			
 
				+ * Lustre curproc API declaration
			
 
				+ *
			
 
				+ * Author: Nikita Danilov <nikita@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_CURPROC_H__
			
 
				+#define __LIBCFS_CURPROC_H__
			
 
				+
			
 
				+/*
			
 
				+ * Portable API to access common characteristics of "current" UNIX process.
			
 
				+ *
			
 
				+ * Implemented in portals/include/libcfs/<os>/
			
 
				+ */
			
 
				+int    cfs_curproc_groups_nr(void);
			
 
				+int    current_is_in_group(gid_t group);
			
 
				+void   cfs_curproc_groups_dump(gid_t *array, int size);
			
 
				+
			
 
				+/*
			
 
				+ * Plus, platform-specific constant
			
 
				+ *
			
 
				+ * CFS_CURPROC_COMM_MAX,
			
 
				+ *
			
 
				+ * and opaque scalar type
			
 
				+ *
			
 
				+ * kernel_cap_t
			
 
				+ */
			
 
				+
			
 
				+/* check if task is running in compat mode.*/
			
 
				+int current_is_32bit(void);
			
 
				+#define current_pid()		(current->pid)
			
 
				+#define current_comm()		(current->comm)
			
 
				+int cfs_get_environ(const char *key, char *value, int *val_len);
			
 
				+
			
 
				+typedef __u32 cfs_cap_t;
			
 
				+
			
 
				+#define CFS_CAP_CHOWN		   0
			
 
				+#define CFS_CAP_DAC_OVERRIDE	    1
			
 
				+#define CFS_CAP_DAC_READ_SEARCH	 2
			
 
				+#define CFS_CAP_FOWNER		  3
			
 
				+#define CFS_CAP_FSETID		  4
			
 
				+#define CFS_CAP_LINUX_IMMUTABLE	 9
			
 
				+#define CFS_CAP_SYS_ADMIN	      21
			
 
				+#define CFS_CAP_SYS_BOOT	       23
			
 
				+#define CFS_CAP_SYS_RESOURCE	   24
			
 
				+
			
 
				+#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) |		 \
			
 
				+			 (1 << CFS_CAP_DAC_OVERRIDE) |	  \
			
 
				+			 (1 << CFS_CAP_DAC_READ_SEARCH) |       \
			
 
				+			 (1 << CFS_CAP_FOWNER) |		\
			
 
				+			 (1 << CFS_CAP_FSETID ) |	       \
			
 
				+			 (1 << CFS_CAP_LINUX_IMMUTABLE) |       \
			
 
				+			 (1 << CFS_CAP_SYS_ADMIN) |	     \
			
 
				+			 (1 << CFS_CAP_SYS_BOOT) |	      \
			
 
				+			 (1 << CFS_CAP_SYS_RESOURCE))
			
 
				+
			
 
				+void cfs_cap_raise(cfs_cap_t cap);
			
 
				+void cfs_cap_lower(cfs_cap_t cap);
			
 
				+int cfs_cap_raised(cfs_cap_t cap);
			
 
				+cfs_cap_t cfs_curproc_cap_pack(void);
			
 
				+void cfs_curproc_cap_unpack(cfs_cap_t cap);
			
 
				+int cfs_capable(cfs_cap_t cap);
			
 
				+
			
 
				+/* __LIBCFS_CURPROC_H__ */
			
 
				+#endif
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * c-indentation-style: "K&R"
			
 
				+ * c-basic-offset: 8
			
 
				+ * tab-width: 8
			
 
				+ * fill-column: 80
			
 
				+ * scroll-step: 1
			
 
				+ * End:
			
 
				+ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -0,0 +1,286 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#define __LIBCFS_LIBCFS_H__
			
 
				+
			
 
				+#if !__GNUC__
			
 
				+#define __attribute__(x)
			
 
				+#endif
			
 
				+
			
 
				+#include <linux/libcfs/linux/libcfs.h>
			
 
				+
			
 
				+#include "curproc.h"
			
 
				+
			
 
				+#ifndef offsetof
			
 
				+# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb)))
			
 
				+#endif
			
 
				+
			
 
				+#ifndef ARRAY_SIZE
			
 
				+#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0])))
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(swap)
			
 
				+#define swap(x,y) do { typeof(x) z = x; x = y; y = z; } while (0)
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(container_of)
			
 
				+/* given a pointer @ptr to the field @member embedded into type (usually
			
 
				+ * struct) @type, return pointer to the embedding instance of @type. */
			
 
				+#define container_of(ptr, type, member) \
			
 
				+	((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
			
 
				+#endif
			
 
				+
			
 
				+static inline int __is_po2(unsigned long long val)
			
 
				+{
			
 
				+	return !(val & (val - 1));
			
 
				+}
			
 
				+
			
 
				+#define IS_PO2(val) __is_po2((unsigned long long)(val))
			
 
				+
			
 
				+#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
			
 
				+
			
 
				+/*
			
 
				+ * Lustre Error Checksum: calculates checksum
			
 
				+ * of Hex number by XORing each bit.
			
 
				+ */
			
 
				+#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
			
 
				+			   ((hexnum) >> 8 & 0xf))
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
			
 
				+ * Lustre RETURN(NULL) macro.
			
 
				+ */
			
 
				+#if defined(NULL)
			
 
				+#undef NULL
			
 
				+#endif
			
 
				+
			
 
				+#define NULL ((void *)0)
			
 
				+
			
 
				+#define LUSTRE_SRV_LNET_PID      LUSTRE_LNET_PID
			
 
				+
			
 
				+
			
 
				+#include <linux/list.h>
			
 
				+
			
 
				+#ifndef cfs_for_each_possible_cpu
			
 
				+#  error cfs_for_each_possible_cpu is not supported by kernel!
			
 
				+#endif
			
 
				+
			
 
				+/* libcfs tcpip */
			
 
				+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
			
 
				+int libcfs_ipif_enumerate(char ***names);
			
 
				+void libcfs_ipif_free_enumeration(char **names, int n);
			
 
				+int libcfs_sock_listen(socket_t **sockp, __u32 ip, int port, int backlog);
			
 
				+int libcfs_sock_accept(socket_t **newsockp, socket_t *sock);
			
 
				+void libcfs_sock_abort_accept(socket_t *sock);
			
 
				+int libcfs_sock_connect(socket_t **sockp, int *fatal,
			
 
				+			__u32 local_ip, int local_port,
			
 
				+			__u32 peer_ip, int peer_port);
			
 
				+int libcfs_sock_setbuf(socket_t *socket, int txbufsize, int rxbufsize);
			
 
				+int libcfs_sock_getbuf(socket_t *socket, int *txbufsize, int *rxbufsize);
			
 
				+int libcfs_sock_getaddr(socket_t *socket, int remote, __u32 *ip, int *port);
			
 
				+int libcfs_sock_write(socket_t *sock, void *buffer, int nob, int timeout);
			
 
				+int libcfs_sock_read(socket_t *sock, void *buffer, int nob, int timeout);
			
 
				+void libcfs_sock_release(socket_t *sock);
			
 
				+
			
 
				+/* libcfs watchdogs */
			
 
				+struct lc_watchdog;
			
 
				+
			
 
				+/* Add a watchdog which fires after "time" milliseconds of delay.  You have to
			
 
				+ * touch it once to enable it. */
			
 
				+struct lc_watchdog *lc_watchdog_add(int time,
			
 
				+				    void (*cb)(pid_t pid, void *),
			
 
				+				    void *data);
			
 
				+
			
 
				+/* Enables a watchdog and resets its timer. */
			
 
				+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout);
			
 
				+#define CFS_GET_TIMEOUT(svc) (max_t(int, obd_timeout,		   \
			
 
				+			  AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * \
			
 
				+			  svc->srv_watchdog_factor)
			
 
				+
			
 
				+/* Disable a watchdog; touch it to restart it. */
			
 
				+void lc_watchdog_disable(struct lc_watchdog *lcw);
			
 
				+
			
 
				+/* Clean up the watchdog */
			
 
				+void lc_watchdog_delete(struct lc_watchdog *lcw);
			
 
				+
			
 
				+/* Dump a debug log */
			
 
				+void lc_watchdog_dumplog(pid_t pid, void *data);
			
 
				+
			
 
				+
			
 
				+/* need both kernel and user-land acceptor */
			
 
				+#define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
			
 
				+#define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
			
 
				+
			
 
				+/*
			
 
				+ * libcfs pseudo device operations
			
 
				+ *
			
 
				+ * struct psdev_t and
			
 
				+ * misc_register() and
			
 
				+ * misc_deregister() are declared in
			
 
				+ * libcfs/<os>/<os>-prim.h
			
 
				+ *
			
 
				+ * It's just draft now.
			
 
				+ */
			
 
				+
			
 
				+struct cfs_psdev_file {
			
 
				+	unsigned long   off;
			
 
				+	void	    *private_data;
			
 
				+	unsigned long   reserved1;
			
 
				+	unsigned long   reserved2;
			
 
				+};
			
 
				+
			
 
				+struct cfs_psdev_ops {
			
 
				+	int (*p_open)(unsigned long, void *);
			
 
				+	int (*p_close)(unsigned long, void *);
			
 
				+	int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
			
 
				+	int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
			
 
				+	int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Drop into debugger, if possible. Implementation is provided by platform.
			
 
				+ */
			
 
				+
			
 
				+void cfs_enter_debugger(void);
			
 
				+
			
 
				+/*
			
 
				+ * Defined by platform
			
 
				+ */
			
 
				+int unshare_fs_struct(void);
			
 
				+sigset_t cfs_get_blocked_sigs(void);
			
 
				+sigset_t cfs_block_allsigs(void);
			
 
				+sigset_t cfs_block_sigs(unsigned long sigs);
			
 
				+sigset_t cfs_block_sigsinv(unsigned long sigs);
			
 
				+void cfs_restore_sigs(sigset_t);
			
 
				+int cfs_signal_pending(void);
			
 
				+void cfs_clear_sigpending(void);
			
 
				+
			
 
				+int convert_server_error(__u64 ecode);
			
 
				+int convert_client_oflag(int cflag, int *result);
			
 
				+
			
 
				+/*
			
 
				+ * Stack-tracing filling.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Platform-dependent data-type to hold stack frames.
			
 
				+ */
			
 
				+struct cfs_stack_trace;
			
 
				+
			
 
				+/*
			
 
				+ * Fill @trace with current back-trace.
			
 
				+ */
			
 
				+void cfs_stack_trace_fill(struct cfs_stack_trace *trace);
			
 
				+
			
 
				+/*
			
 
				+ * Return instruction pointer for frame @frame_no. NULL if @frame_no is
			
 
				+ * invalid.
			
 
				+ */
			
 
				+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no);
			
 
				+
			
 
				+#ifndef O_NOACCESS
			
 
				+#define O_NOACCESS O_NONBLOCK
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Universal open flags.
			
 
				+ */
			
 
				+#define CFS_O_NOACCESS	  0003
			
 
				+#define CFS_O_ACCMODE	   CFS_O_NOACCESS
			
 
				+#define CFS_O_CREAT	     0100
			
 
				+#define CFS_O_EXCL	      0200
			
 
				+#define CFS_O_NOCTTY	    0400
			
 
				+#define CFS_O_TRUNC	     01000
			
 
				+#define CFS_O_APPEND	    02000
			
 
				+#define CFS_O_NONBLOCK	  04000
			
 
				+#define CFS_O_NDELAY	    CFS_O_NONBLOCK
			
 
				+#define CFS_O_SYNC	      010000
			
 
				+#define CFS_O_ASYNC	     020000
			
 
				+#define CFS_O_DIRECT	    040000
			
 
				+#define CFS_O_LARGEFILE	 0100000
			
 
				+#define CFS_O_DIRECTORY	 0200000
			
 
				+#define CFS_O_NOFOLLOW	  0400000
			
 
				+#define CFS_O_NOATIME	   01000000
			
 
				+
			
 
				+/* convert local open flags to universal open flags */
			
 
				+int cfs_oflags2univ(int flags);
			
 
				+/* convert universal open flags to local open flags */
			
 
				+int cfs_univ2oflags(int flags);
			
 
				+
			
 
				+/*
			
 
				+ * Random number handling
			
 
				+ */
			
 
				+
			
 
				+/* returns a random 32-bit integer */
			
 
				+unsigned int cfs_rand(void);
			
 
				+/* seed the generator */
			
 
				+void cfs_srand(unsigned int, unsigned int);
			
 
				+void cfs_get_random_bytes(void *buf, int size);
			
 
				+
			
 
				+#include <linux/libcfs/libcfs_debug.h>
			
 
				+#include <linux/libcfs/libcfs_cpu.h>
			
 
				+#include <linux/libcfs/libcfs_private.h>
			
 
				+#include <linux/libcfs/libcfs_ioctl.h>
			
 
				+#include <linux/libcfs/libcfs_prim.h>
			
 
				+#include <linux/libcfs/libcfs_time.h>
			
 
				+#include <linux/libcfs/libcfs_string.h>
			
 
				+#include <linux/libcfs/libcfs_kernelcomm.h>
			
 
				+#include <linux/libcfs/libcfs_workitem.h>
			
 
				+#include <linux/libcfs/libcfs_hash.h>
			
 
				+#include <linux/libcfs/libcfs_heap.h>
			
 
				+#include <linux/libcfs/libcfs_fail.h>
			
 
				+#include <linux/libcfs/params_tree.h>
			
 
				+#include <linux/libcfs/libcfs_crypto.h>
			
 
				+
			
 
				+/* container_of depends on "likely" which is defined in libcfs_private.h */
			
 
				+static inline void *__container_of(void *ptr, unsigned long shift)
			
 
				+{
			
 
				+	if (unlikely(IS_ERR(ptr) || ptr == NULL))
			
 
				+		return ptr;
			
 
				+	else
			
 
				+		return (char *)ptr - shift;
			
 
				+}
			
 
				+
			
 
				+#define container_of0(ptr, type, member) \
			
 
				+	((type *)__container_of((void *)(ptr), offsetof(type, member)))
			
 
				+
			
 
				+#define SET_BUT_UNUSED(a) do { } while(sizeof(a) - sizeof(a))
			
 
				+
			
 
				+#define _LIBCFS_H
			
 
				+
			
 
				+#endif /* _LIBCFS_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -0,0 +1,214 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_cpu.h
			
 
				+ *
			
 
				+ * CPU partition
			
 
				+ *   . CPU partition is virtual processing unit
			
 
				+ *
			
 
				+ *   . CPU partition can present 1-N cores, or 1-N NUMA nodes,
			
 
				+ *     in other words, CPU partition is a processors pool.
			
 
				+ *
			
 
				+ * CPU Partition Table (CPT)
			
 
				+ *   . a set of CPU partitions
			
 
				+ *
			
 
				+ *   . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
			
 
				+ *
			
 
				+ *   . User can specify total number of CPU partitions while creating a
			
 
				+ *     CPT, ID of CPU partition is always start from 0.
			
 
				+ *
			
 
				+ *     Example: if there are 8 cores on the system, while creating a CPT
			
 
				+ *     with cpu_npartitions=4:
			
 
				+ *	      core[0, 1] = partition[0], core[2, 3] = partition[1]
			
 
				+ *	      core[4, 5] = partition[2], core[6, 7] = partition[3]
			
 
				+ *
			
 
				+ *	  cpu_npartitions=1:
			
 
				+ *	      core[0, 1, ... 7] = partition[0]
			
 
				+ *
			
 
				+ *   . User can also specify CPU partitions by string pattern
			
 
				+ *
			
 
				+ *     Examples: cpu_partitions="0[0,1], 1[2,3]"
			
 
				+ *	       cpu_partitions="N 0[0-3], 1[4-8]"
			
 
				+ *
			
 
				+ *     The first character "N" means following numbers are numa ID
			
 
				+ *
			
 
				+ *   . NUMA allocators, CPU affinity threads are built over CPU partitions,
			
 
				+ *     instead of HW CPUs or HW nodes.
			
 
				+ *
			
 
				+ *   . By default, Lustre modules should refer to the global cfs_cpt_table,
			
 
				+ *     instead of accessing HW CPUs directly, so concurrency of Lustre can be
			
 
				+ *     configured by cpu_npartitions of the global cfs_cpt_table
			
 
				+ *
			
 
				+ *   . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
			
 
				+ *     same way as 2.2 or earlier versions
			
 
				+ *
			
 
				+ * Author: liang@whamcloud.com
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_CPU_H__
			
 
				+#define __LIBCFS_CPU_H__
			
 
				+
			
 
				+#ifndef HAVE_LIBCFS_CPT
			
 
				+
			
 
				+typedef unsigned long		cpumask_t;
			
 
				+typedef unsigned long		nodemask_t;
			
 
				+
			
 
				+struct cfs_cpt_table {
			
 
				+	/* # of CPU partitions */
			
 
				+	int			ctb_nparts;
			
 
				+	/* cpu mask */
			
 
				+	cpumask_t		ctb_mask;
			
 
				+	/* node mask */
			
 
				+	nodemask_t		ctb_nodemask;
			
 
				+	/* version */
			
 
				+	__u64			ctb_version;
			
 
				+};
			
 
				+
			
 
				+#endif /* !HAVE_LIBCFS_CPT */
			
 
				+
			
 
				+/* any CPU partition */
			
 
				+#define CFS_CPT_ANY		(-1)
			
 
				+
			
 
				+extern struct cfs_cpt_table	*cfs_cpt_table;
			
 
				+
			
 
				+/**
			
 
				+ * destroy a CPU partition table
			
 
				+ */
			
 
				+void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
			
 
				+/**
			
 
				+ * create a cfs_cpt_table with \a ncpt number of partitions
			
 
				+ */
			
 
				+struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
			
 
				+/**
			
 
				+ * print string information of cpt-table
			
 
				+ */
			
 
				+int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
			
 
				+/**
			
 
				+ * return total number of CPU partitions in \a cptab
			
 
				+ */
			
 
				+int
			
 
				+cfs_cpt_number(struct cfs_cpt_table *cptab);
			
 
				+/**
			
 
				+ * return number of HW cores or hypter-threadings in a CPU partition \a cpt
			
 
				+ */
			
 
				+int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * is there any online CPU in CPU partition \a cpt
			
 
				+ */
			
 
				+int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * return cpumask of CPU partition \a cpt
			
 
				+ */
			
 
				+cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * return nodemask of CPU partition \a cpt
			
 
				+ */
			
 
				+nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * shadow current HW processor ID to CPU-partition ID of \a cptab
			
 
				+ */
			
 
				+int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
			
 
				+/**
			
 
				+ * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
			
 
				+ */
			
 
				+int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
			
 
				+/**
			
 
				+ * bind current thread on a CPU-partition \a cpt of \a cptab
			
 
				+ */
			
 
				+int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * add \a cpu to CPU partion @cpt of \a cptab, return 1 for success,
			
 
				+ * otherwise 0 is returned
			
 
				+ */
			
 
				+int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
			
 
				+/**
			
 
				+ * remove \a cpu from CPU partition \a cpt of \a cptab
			
 
				+ */
			
 
				+void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
			
 
				+/**
			
 
				+ * add all cpus in \a mask to CPU partition \a cpt
			
 
				+ * return 1 if successfully set all CPUs, otherwise return 0
			
 
				+ */
			
 
				+int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
			
 
				+			int cpt, cpumask_t *mask);
			
 
				+/**
			
 
				+ * remove all cpus in \a mask from CPU partition \a cpt
			
 
				+ */
			
 
				+void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
			
 
				+			   int cpt, cpumask_t *mask);
			
 
				+/**
			
 
				+ * add all cpus in NUMA node \a node to CPU partition \a cpt
			
 
				+ * return 1 if successfully set all CPUs, otherwise return 0
			
 
				+ */
			
 
				+int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
			
 
				+/**
			
 
				+ * remove all cpus in NUMA node \a node from CPU partition \a cpt
			
 
				+ */
			
 
				+void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
			
 
				+
			
 
				+/**
			
 
				+ * add all cpus in node mask \a mask to CPU partition \a cpt
			
 
				+ * return 1 if successfully set all CPUs, otherwise return 0
			
 
				+ */
			
 
				+int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
			
 
				+			 int cpt, nodemask_t *mask);
			
 
				+/**
			
 
				+ * remove all cpus in node mask \a mask from CPU partition \a cpt
			
 
				+ */
			
 
				+void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
			
 
				+			    int cpt, nodemask_t *mask);
			
 
				+/**
			
 
				+ * unset all cpus for CPU partition \a cpt
			
 
				+ */
			
 
				+void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
			
 
				+/**
			
 
				+ * convert partition id \a cpt to numa node id, if there are more than one
			
 
				+ * nodes in this partition, it might return a different node id each time.
			
 
				+ */
			
 
				+int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
			
 
				+
			
 
				+/**
			
 
				+ * iterate over all CPU partitions in \a cptab
			
 
				+ */
			
 
				+#define cfs_cpt_for_each(i, cptab)	\
			
 
				+	for (i = 0; i < cfs_cpt_number(cptab); i++)
			
 
				+
			
 
				+#ifndef __read_mostly
			
 
				+# define __read_mostly
			
 
				+#endif
			
 
				+
			
 
				+#ifndef ____cacheline_aligned
			
 
				+#define ____cacheline_aligned
			
 
				+#endif
			
 
				+
			
 
				+int  cfs_cpu_init(void);
			
 
				+void cfs_cpu_fini(void);
			
 
				+
			
 
				+#endif /* __LIBCFS_CPU_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
@@ -0,0 +1,201 @@
 
				+/* GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				+ *
			
 
				+ * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				+ * information or have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Copyright 2012 Xyratex Technology Limited
			
 
				+ */
			
 
				+
			
 
				+#ifndef _LIBCFS_CRYPTO_H
			
 
				+#define _LIBCFS_CRYPTO_H
			
 
				+
			
 
				+struct cfs_crypto_hash_type {
			
 
				+	char		*cht_name;      /**< hash algorithm name, equal to
			
 
				+					 * format name for crypto api */
			
 
				+	unsigned int    cht_key;	/**< init key by default (vaild for
			
 
				+					 * 4 bytes context like crc32, adler */
			
 
				+	unsigned int    cht_size;       /**< hash digest size */
			
 
				+};
			
 
				+
			
 
				+enum cfs_crypto_hash_alg {
			
 
				+	CFS_HASH_ALG_NULL       = 0,
			
 
				+	CFS_HASH_ALG_ADLER32,
			
 
				+	CFS_HASH_ALG_CRC32,
			
 
				+	CFS_HASH_ALG_MD5,
			
 
				+	CFS_HASH_ALG_SHA1,
			
 
				+	CFS_HASH_ALG_SHA256,
			
 
				+	CFS_HASH_ALG_SHA384,
			
 
				+	CFS_HASH_ALG_SHA512,
			
 
				+	CFS_HASH_ALG_CRC32C,
			
 
				+	CFS_HASH_ALG_MAX
			
 
				+};
			
 
				+
			
 
				+static struct cfs_crypto_hash_type hash_types[] = {
			
 
				+	[CFS_HASH_ALG_NULL]    = { "null",     0,      0 },
			
 
				+	[CFS_HASH_ALG_ADLER32] = { "adler32",  1,      4 },
			
 
				+	[CFS_HASH_ALG_CRC32]   = { "crc32",   ~0,      4 },
			
 
				+	[CFS_HASH_ALG_CRC32C]  = { "crc32c",  ~0,      4 },
			
 
				+	[CFS_HASH_ALG_MD5]     = { "md5",      0,     16 },
			
 
				+	[CFS_HASH_ALG_SHA1]    = { "sha1",     0,     20 },
			
 
				+	[CFS_HASH_ALG_SHA256]  = { "sha256",   0,     32 },
			
 
				+	[CFS_HASH_ALG_SHA384]  = { "sha384",   0,     48 },
			
 
				+	[CFS_HASH_ALG_SHA512]  = { "sha512",   0,     64 },
			
 
				+};
			
 
				+
			
 
				+/**    Return pointer to type of hash for valid hash algorithm identifier */
			
 
				+static inline const struct cfs_crypto_hash_type *
			
 
				+		    cfs_crypto_hash_type(unsigned char hash_alg)
			
 
				+{
			
 
				+	struct cfs_crypto_hash_type *ht;
			
 
				+
			
 
				+	if (hash_alg < CFS_HASH_ALG_MAX) {
			
 
				+		ht = &hash_types[hash_alg];
			
 
				+		if (ht->cht_name)
			
 
				+			return ht;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/**     Return hash name for valid hash algorithm identifier or "unknown" */
			
 
				+static inline const char *cfs_crypto_hash_name(unsigned char hash_alg)
			
 
				+{
			
 
				+	const struct cfs_crypto_hash_type *ht;
			
 
				+
			
 
				+	ht = cfs_crypto_hash_type(hash_alg);
			
 
				+	if (ht)
			
 
				+		return ht->cht_name;
			
 
				+	else
			
 
				+		return "unknown";
			
 
				+}
			
 
				+
			
 
				+/**     Return digest size for valid algorithm identifier or 0 */
			
 
				+static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg)
			
 
				+{
			
 
				+	const struct cfs_crypto_hash_type *ht;
			
 
				+
			
 
				+	ht = cfs_crypto_hash_type(hash_alg);
			
 
				+	if (ht)
			
 
				+		return ht->cht_size;
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+/**     Return hash identifier for valid hash algorithm name or 0xFF */
			
 
				+static inline unsigned char cfs_crypto_hash_alg(const char *algname)
			
 
				+{
			
 
				+	unsigned char   i;
			
 
				+
			
 
				+	for (i = 0; i < CFS_HASH_ALG_MAX; i++)
			
 
				+		if (!strcmp(hash_types[i].cht_name, algname))
			
 
				+			break;
			
 
				+	return (i == CFS_HASH_ALG_MAX ? 0xFF : i);
			
 
				+}
			
 
				+
			
 
				+/**     Calculate hash digest for buffer.
			
 
				+ *      @param alg	    id of hash algorithm
			
 
				+ *      @param buf	    buffer of data
			
 
				+ *      @param buf_len	buffer len
			
 
				+ *      @param key	    initial value for algorithm, if it is NULL,
			
 
				+ *			    default initial value should be used.
			
 
				+ *      @param key_len	len of initial value
			
 
				+ *      @param hash	   [out] pointer to hash, if it is NULL, hash_len is
			
 
				+ *			    set to valid digest size in bytes, retval -ENOSPC.
			
 
				+ *      @param hash_len       [in,out] size of hash buffer
			
 
				+ *      @returns	      status of operation
			
 
				+ *      @retval -EINVAL       if buf, buf_len, hash_len or alg_id is invalid
			
 
				+ *      @retval -ENODEV       if this algorithm is unsupported
			
 
				+ *      @retval -ENOSPC       if pointer to hash is NULL, or hash_len less than
			
 
				+ *			    digest size
			
 
				+ *      @retval 0	     for success
			
 
				+ *      @retval < 0	   other errors from lower layers.
			
 
				+ */
			
 
				+int cfs_crypto_hash_digest(unsigned char alg,
			
 
				+			   const void *buf, unsigned int buf_len,
			
 
				+			   unsigned char *key, unsigned int key_len,
			
 
				+			   unsigned char *hash, unsigned int *hash_len);
			
 
				+
			
 
				+/* cfs crypto hash descriptor */
			
 
				+struct cfs_crypto_hash_desc;
			
 
				+
			
 
				+/**     Allocate and initialize desriptor for hash algorithm.
			
 
				+ *      @param alg	    algorithm id
			
 
				+ *      @param key	    initial value for algorithm, if it is NULL,
			
 
				+ *			    default initial value should be used.
			
 
				+ *      @param key_len	len of initial value
			
 
				+ *      @returns	      pointer to descriptor of hash instance
			
 
				+ *      @retval ERR_PTR(error) when errors occured.
			
 
				+ */
			
 
				+struct cfs_crypto_hash_desc*
			
 
				+	cfs_crypto_hash_init(unsigned char alg,
			
 
				+			     unsigned char *key, unsigned int key_len);
			
 
				+
			
 
				+/**    Update digest by part of data.
			
 
				+ *     @param desc	      hash descriptor
			
 
				+ *     @param page	      data page
			
 
				+ *     @param offset	    data offset
			
 
				+ *     @param len	       data len
			
 
				+ *     @returns		 status of operation
			
 
				+ *     @retval 0		for success.
			
 
				+ */
			
 
				+int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc,
			
 
				+				struct page *page, unsigned int offset,
			
 
				+				unsigned int len);
			
 
				+
			
 
				+/**    Update digest by part of data.
			
 
				+ *     @param desc	      hash descriptor
			
 
				+ *     @param buf	       pointer to data buffer
			
 
				+ *     @param buf_len	   size of data at buffer
			
 
				+ *     @returns		 status of operation
			
 
				+ *     @retval 0		for success.
			
 
				+ */
			
 
				+int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf,
			
 
				+			   unsigned int buf_len);
			
 
				+
			
 
				+/**    Finalize hash calculation, copy hash digest to buffer, destroy hash
			
 
				+ *     descriptor.
			
 
				+ *     @param desc	      hash descriptor
			
 
				+ *     @param hash	      buffer pointer to store hash digest
			
 
				+ *     @param hash_len	  pointer to hash buffer size, if NULL
			
 
				+ *			      destory hash descriptor
			
 
				+ *     @returns		 status of operation
			
 
				+ *     @retval -ENOSPC	  if hash is NULL, or *hash_len less than
			
 
				+ *			      digest size
			
 
				+ *     @retval 0		for success
			
 
				+ *     @retval < 0	      other errors from lower layers.
			
 
				+ */
			
 
				+int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc,
			
 
				+			  unsigned char *hash, unsigned int *hash_len);
			
 
				+/**
			
 
				+ *      Register crypto hash algorithms
			
 
				+ */
			
 
				+int cfs_crypto_register(void);
			
 
				+
			
 
				+/**
			
 
				+ *      Unregister
			
 
				+ */
			
 
				+void cfs_crypto_unregister(void);
			
 
				+
			
 
				+/**     Return hash speed in Mbytes per second for valid hash algorithm
			
 
				+ *      identifier. If test was unsuccessfull -1 would be return.
			
 
				+ */
			
 
				+int cfs_crypto_hash_speed(unsigned char hash_alg);
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -0,0 +1,350 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_debug.h
			
 
				+ *
			
 
				+ * Debug messages and assertions
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_DEBUG_H__
			
 
				+#define __LIBCFS_DEBUG_H__
			
 
				+
			
 
				+/*
			
 
				+ *  Debugging
			
 
				+ */
			
 
				+extern unsigned int libcfs_subsystem_debug;
			
 
				+extern unsigned int libcfs_stack;
			
 
				+extern unsigned int libcfs_debug;
			
 
				+extern unsigned int libcfs_printk;
			
 
				+extern unsigned int libcfs_console_ratelimit;
			
 
				+extern unsigned int libcfs_watchdog_ratelimit;
			
 
				+extern unsigned int libcfs_console_max_delay;
			
 
				+extern unsigned int libcfs_console_min_delay;
			
 
				+extern unsigned int libcfs_console_backoff;
			
 
				+extern unsigned int libcfs_debug_binary;
			
 
				+extern char libcfs_debug_file_path_arr[PATH_MAX];
			
 
				+
			
 
				+int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
			
 
				+int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
			
 
				+
			
 
				+/* Has there been an LBUG? */
			
 
				+extern unsigned int libcfs_catastrophe;
			
 
				+extern unsigned int libcfs_panic_on_lbug;
			
 
				+
			
 
				+/**
			
 
				+ * Format for debug message headers
			
 
				+ */
			
 
				+struct ptldebug_header {
			
 
				+	__u32 ph_len;
			
 
				+	__u32 ph_flags;
			
 
				+	__u32 ph_subsys;
			
 
				+	__u32 ph_mask;
			
 
				+	__u16 ph_cpu_id;
			
 
				+	__u16 ph_type;
			
 
				+	__u32 ph_sec;
			
 
				+	__u64 ph_usec;
			
 
				+	__u32 ph_stack;
			
 
				+	__u32 ph_pid;
			
 
				+	__u32 ph_extern_pid;
			
 
				+	__u32 ph_line_num;
			
 
				+} __attribute__((packed));
			
 
				+
			
 
				+
			
 
				+#define PH_FLAG_FIRST_RECORD 1
			
 
				+
			
 
				+/* Debugging subsystems (32 bits, non-overlapping) */
			
 
				+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
			
 
				+#define S_UNDEFINED   0x00000001
			
 
				+#define S_MDC	 0x00000002
			
 
				+#define S_MDS	 0x00000004
			
 
				+#define S_OSC	 0x00000008
			
 
				+#define S_OST	 0x00000010
			
 
				+#define S_CLASS       0x00000020
			
 
				+#define S_LOG	 0x00000040
			
 
				+#define S_LLITE       0x00000080
			
 
				+#define S_RPC	 0x00000100
			
 
				+#define S_MGMT	0x00000200
			
 
				+#define S_LNET	0x00000400
			
 
				+#define S_LND	 0x00000800 /* ALL LNDs */
			
 
				+#define S_PINGER      0x00001000
			
 
				+#define S_FILTER      0x00002000
			
 
				+/* unused */
			
 
				+#define S_ECHO	0x00008000
			
 
				+#define S_LDLM	0x00010000
			
 
				+#define S_LOV	 0x00020000
			
 
				+#define S_LQUOTA      0x00040000
			
 
				+#define S_OSD		0x00080000
			
 
				+/* unused */
			
 
				+/* unused */
			
 
				+/* unused */
			
 
				+#define S_LMV	 0x00800000 /* b_new_cmd */
			
 
				+/* unused */
			
 
				+#define S_SEC	 0x02000000 /* upcall cache */
			
 
				+#define S_GSS	 0x04000000 /* b_new_cmd */
			
 
				+/* unused */
			
 
				+#define S_MGC	 0x10000000
			
 
				+#define S_MGS	 0x20000000
			
 
				+#define S_FID	 0x40000000 /* b_new_cmd */
			
 
				+#define S_FLD	 0x80000000 /* b_new_cmd */
			
 
				+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
			
 
				+
			
 
				+/* Debugging masks (32 bits, non-overlapping) */
			
 
				+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
			
 
				+#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
			
 
				+#define D_INODE       0x00000002
			
 
				+#define D_SUPER       0x00000004
			
 
				+#define D_EXT2	0x00000008 /* anything from ext2_debug */
			
 
				+#define D_MALLOC      0x00000010 /* print malloc, free information */
			
 
				+#define D_CACHE       0x00000020 /* cache-related items */
			
 
				+#define D_INFO	0x00000040 /* general information */
			
 
				+#define D_IOCTL       0x00000080 /* ioctl related information */
			
 
				+#define D_NETERROR    0x00000100 /* network errors */
			
 
				+#define D_NET	 0x00000200 /* network communications */
			
 
				+#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
			
 
				+#define D_BUFFS       0x00000800
			
 
				+#define D_OTHER       0x00001000
			
 
				+#define D_DENTRY      0x00002000
			
 
				+#define D_NETTRACE    0x00004000
			
 
				+#define D_PAGE	0x00008000 /* bulk page handling */
			
 
				+#define D_DLMTRACE    0x00010000
			
 
				+#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
			
 
				+#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
			
 
				+#define D_HA	  0x00080000 /* recovery and failover */
			
 
				+#define D_RPCTRACE    0x00100000 /* for distributed debugging */
			
 
				+#define D_VFSTRACE    0x00200000
			
 
				+#define D_READA       0x00400000 /* read-ahead */
			
 
				+#define D_MMAP	0x00800000
			
 
				+#define D_CONFIG      0x01000000
			
 
				+#define D_CONSOLE     0x02000000
			
 
				+#define D_QUOTA       0x04000000
			
 
				+#define D_SEC	 0x08000000
			
 
				+#define D_LFSCK	      0x10000000 /* For both OI scrub and LFSCK */
			
 
				+/* keep these in sync with lnet/{utils,libcfs}/debug.c */
			
 
				+
			
 
				+#define D_HSM	 D_TRACE
			
 
				+
			
 
				+#define D_CANTMASK   (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
			
 
				+
			
 
				+#ifndef DEBUG_SUBSYSTEM
			
 
				+# define DEBUG_SUBSYSTEM S_UNDEFINED
			
 
				+#endif
			
 
				+
			
 
				+#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600))	 /* jiffies */
			
 
				+#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */
			
 
				+#define CDEBUG_DEFAULT_BACKOFF   2
			
 
				+typedef struct {
			
 
				+	cfs_time_t      cdls_next;
			
 
				+	unsigned int    cdls_delay;
			
 
				+	int	     cdls_count;
			
 
				+} cfs_debug_limit_state_t;
			
 
				+
			
 
				+struct libcfs_debug_msg_data {
			
 
				+	const char	       *msg_file;
			
 
				+	const char	       *msg_fn;
			
 
				+	int		      msg_subsys;
			
 
				+	int		      msg_line;
			
 
				+	int		      msg_mask;
			
 
				+	cfs_debug_limit_state_t  *msg_cdls;
			
 
				+};
			
 
				+
			
 
				+#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls)	\
			
 
				+do {							\
			
 
				+	(data)->msg_subsys = DEBUG_SUBSYSTEM;	       \
			
 
				+	(data)->msg_file   = __FILE__;		      \
			
 
				+	(data)->msg_fn     = __FUNCTION__;		  \
			
 
				+	(data)->msg_line   = __LINE__;		      \
			
 
				+	(data)->msg_cdls   = (cdls);			\
			
 
				+	(data)->msg_mask   = (mask);			\
			
 
				+} while (0)
			
 
				+
			
 
				+#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls)    \
			
 
				+	static struct libcfs_debug_msg_data dataname = {    \
			
 
				+	       .msg_subsys = DEBUG_SUBSYSTEM,	       \
			
 
				+	       .msg_file   = __FILE__,		      \
			
 
				+	       .msg_fn     = __FUNCTION__,		  \
			
 
				+	       .msg_line   = __LINE__,		      \
			
 
				+	       .msg_cdls   = (cdls)	 };	      \
			
 
				+	dataname.msg_mask   = (mask);
			
 
				+
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * Filters out logging messages based on mask and subsystem.
			
 
				+ */
			
 
				+static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
			
 
				+{
			
 
				+	return mask & D_CANTMASK ||
			
 
				+		((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
			
 
				+}
			
 
				+
			
 
				+#define __CDEBUG(cdls, mask, format, ...)			       \
			
 
				+do {								    \
			
 
				+	static struct libcfs_debug_msg_data msgdata;		    \
			
 
				+									\
			
 
				+	CFS_CHECK_STACK(&msgdata, mask, cdls);			  \
			
 
				+									\
			
 
				+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
			
 
				+		LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls);       \
			
 
				+		libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__);     \
			
 
				+	}							       \
			
 
				+} while (0)
			
 
				+
			
 
				+#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
			
 
				+
			
 
				+#define CDEBUG_LIMIT(mask, format, ...)	 \
			
 
				+do {					    \
			
 
				+	static cfs_debug_limit_state_t cdls;    \
			
 
				+						\
			
 
				+	__CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\
			
 
				+} while (0)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#define CWARN(format, ...)	  CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
			
 
				+#define CERROR(format, ...)	 CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
			
 
				+#define CNETERR(format, a...)       CDEBUG_LIMIT(D_NETERROR, format, ## a)
			
 
				+#define CEMERG(format, ...)	 CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
			
 
				+
			
 
				+#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
			
 
				+#define LCONSOLE_INFO(format, ...)  CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
			
 
				+#define LCONSOLE_WARN(format, ...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
			
 
				+#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
			
 
				+			   "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
			
 
				+#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
			
 
				+
			
 
				+#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
			
 
				+
			
 
				+
			
 
				+void libcfs_log_goto(struct libcfs_debug_msg_data *, const char *, long_ptr_t);
			
 
				+#define GOTO(label, rc)						 \
			
 
				+do {								    \
			
 
				+	if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) {		\
			
 
				+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL);     \
			
 
				+		libcfs_log_goto(&msgdata, #label, (long_ptr_t)(rc));    \
			
 
				+	} else {							\
			
 
				+		(void)(rc);					     \
			
 
				+	}							       \
			
 
				+	goto label;						     \
			
 
				+} while (0)
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
			
 
				+ * there will be a warning in osx.
			
 
				+ */
			
 
				+#if defined(__GNUC__)
			
 
				+
			
 
				+long libcfs_log_return(struct libcfs_debug_msg_data *, long rc);
			
 
				+#if BITS_PER_LONG > 32
			
 
				+#define RETURN(rc)							\
			
 
				+do {									\
			
 
				+	EXIT_NESTING;							\
			
 
				+	if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) {		\
			
 
				+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL);	\
			
 
				+		return (typeof(rc))libcfs_log_return(&msgdata,		\
			
 
				+						     (long)(rc));	\
			
 
				+	}								\
			
 
				+									\
			
 
				+	return (rc);							\
			
 
				+} while (0)
			
 
				+#else /* BITS_PER_LONG == 32 */
			
 
				+/* We need an on-stack variable, because we cannot case a 32-bit pointer
			
 
				+ * directly to (long long) without generating a complier warning/error, yet
			
 
				+ * casting directly to (long) will truncate 64-bit return values. The log
			
 
				+ * values will print as 32-bit values, but they always have been. LU-1436
			
 
				+ */
			
 
				+#define RETURN(rc)							\
			
 
				+do {									\
			
 
				+	EXIT_NESTING;							\
			
 
				+	if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) {		\
			
 
				+		typeof(rc) __rc = (rc);					\
			
 
				+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL);	\
			
 
				+		libcfs_log_return(&msgdata, (long_ptr_t)__rc);		\
			
 
				+		return __rc;						\
			
 
				+	}								\
			
 
				+									\
			
 
				+	return (rc);							\
			
 
				+} while (0)
			
 
				+#endif /* BITS_PER_LONG > 32 */
			
 
				+
			
 
				+#elif defined(_MSC_VER)
			
 
				+#define RETURN(rc)						      \
			
 
				+do {								    \
			
 
				+	CDEBUG(D_TRACE, "Process leaving.\n");			  \
			
 
				+	EXIT_NESTING;						   \
			
 
				+	return (rc);						    \
			
 
				+} while (0)
			
 
				+#else
			
 
				+# error "Unkown compiler"
			
 
				+#endif /* __GNUC__ */
			
 
				+
			
 
				+#define ENTRY							   \
			
 
				+ENTRY_NESTING;							  \
			
 
				+do {								    \
			
 
				+	CDEBUG(D_TRACE, "Process entered\n");			   \
			
 
				+} while (0)
			
 
				+
			
 
				+#define EXIT							    \
			
 
				+do {								    \
			
 
				+	CDEBUG(D_TRACE, "Process leaving\n");			   \
			
 
				+	EXIT_NESTING;						   \
			
 
				+} while(0)
			
 
				+
			
 
				+#define RETURN_EXIT							\
			
 
				+do {									\
			
 
				+	EXIT;								\
			
 
				+	return;								\
			
 
				+} while (0)
			
 
				+
			
 
				+extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
			
 
				+			    const char *format1, ...)
			
 
				+	__attribute__ ((format (printf, 2, 3)));
			
 
				+
			
 
				+extern int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
			
 
				+			      const char *format1,
			
 
				+			      va_list args, const char *format2, ...)
			
 
				+	__attribute__ ((format (printf, 4, 5)));
			
 
				+
			
 
				+/* other external symbols that tracefile provides: */
			
 
				+extern int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
			
 
				+				   const char *usr_buffer, int usr_buffer_nob);
			
 
				+extern int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
			
 
				+				    const char *knl_buffer, char *append);
			
 
				+
			
 
				+#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
			
 
				+
			
 
				+#endif	/* __LIBCFS_DEBUG_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -0,0 +1,170 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				+ *
			
 
				+ * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
			
 
				+ * CA 94065 USA or visit www.oracle.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Oracle Corporation, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef _LIBCFS_FAIL_H
			
 
				+#define _LIBCFS_FAIL_H
			
 
				+
			
 
				+extern unsigned long cfs_fail_loc;
			
 
				+extern unsigned int cfs_fail_val;
			
 
				+
			
 
				+extern wait_queue_head_t cfs_race_waitq;
			
 
				+extern int cfs_race_state;
			
 
				+
			
 
				+int __cfs_fail_check_set(__u32 id, __u32 value, int set);
			
 
				+int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
			
 
				+
			
 
				+enum {
			
 
				+	CFS_FAIL_LOC_NOSET      = 0,
			
 
				+	CFS_FAIL_LOC_ORSET      = 1,
			
 
				+	CFS_FAIL_LOC_RESET      = 2,
			
 
				+	CFS_FAIL_LOC_VALUE      = 3
			
 
				+};
			
 
				+
			
 
				+/* Failure injection control */
			
 
				+#define CFS_FAIL_MASK_SYS    0x0000FF00
			
 
				+#define CFS_FAIL_MASK_LOC   (0x000000FF | CFS_FAIL_MASK_SYS)
			
 
				+
			
 
				+#define CFS_FAILED_BIT       30
			
 
				+/* CFS_FAILED is 0x40000000 */
			
 
				+#define CFS_FAILED	  (1 << CFS_FAILED_BIT)
			
 
				+
			
 
				+#define CFS_FAIL_ONCE_BIT    31
			
 
				+/* CFS_FAIL_ONCE is 0x80000000 */
			
 
				+#define CFS_FAIL_ONCE       (1 << CFS_FAIL_ONCE_BIT)
			
 
				+
			
 
				+/* The following flags aren't made to be combined */
			
 
				+#define CFS_FAIL_SKIP	0x20000000 /* skip N times then fail */
			
 
				+#define CFS_FAIL_SOME	0x10000000 /* only fail N times */
			
 
				+#define CFS_FAIL_RAND	0x08000000 /* fail 1/N of the times */
			
 
				+#define CFS_FAIL_USR1	0x04000000 /* user flag */
			
 
				+
			
 
				+#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc &&				\
			
 
				+			      (cfs_fail_loc & CFS_FAIL_MASK_LOC) ==	   \
			
 
				+			      ((id) & CFS_FAIL_MASK_LOC))
			
 
				+
			
 
				+static inline int cfs_fail_check_set(__u32 id, __u32 value,
			
 
				+				     int set, int quiet)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (unlikely(CFS_FAIL_PRECHECK(id) &&
			
 
				+		     (ret = __cfs_fail_check_set(id, value, set)))) {
			
 
				+		if (quiet) {
			
 
				+			CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
			
 
				+			       id, value);
			
 
				+		} else {
			
 
				+			LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
			
 
				+				      id, value);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
			
 
				+#define CFS_FAIL_CHECK(id) \
			
 
				+	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
			
 
				+#define CFS_FAIL_CHECK_QUIET(id) \
			
 
				+	cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
			
 
				+
			
 
				+/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
			
 
				+ * otherwise return 0 */
			
 
				+#define CFS_FAIL_CHECK_VALUE(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
			
 
				+#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
			
 
				+
			
 
				+/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
			
 
				+ * otherwise return 0 */
			
 
				+#define CFS_FAIL_CHECK_ORSET(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
			
 
				+#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
			
 
				+
			
 
				+/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
			
 
				+ * otherwise return 0 */
			
 
				+#define CFS_FAIL_CHECK_RESET(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
			
 
				+#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
			
 
				+	cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
			
 
				+
			
 
				+static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
			
 
				+{
			
 
				+	if (unlikely(CFS_FAIL_PRECHECK(id)))
			
 
				+		return __cfs_fail_timeout_set(id, value, ms, set);
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
			
 
				+#define CFS_FAIL_TIMEOUT(id, secs) \
			
 
				+	cfs_fail_timeout_set(id, 0, secs * 1000, CFS_FAIL_LOC_NOSET)
			
 
				+
			
 
				+#define CFS_FAIL_TIMEOUT_MS(id, ms) \
			
 
				+	cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
			
 
				+
			
 
				+/* If id hit cfs_fail_loc, cfs_fail_loc |= value and
			
 
				+ * sleep seconds or milliseconds */
			
 
				+#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
			
 
				+	cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
			
 
				+
			
 
				+#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
			
 
				+	cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
			
 
				+
			
 
				+/* The idea here is to synchronise two threads to force a race. The
			
 
				+ * first thread that calls this with a matching fail_loc is put to
			
 
				+ * sleep. The next thread that calls with the same fail_loc wakes up
			
 
				+ * the first and continues. */
			
 
				+static inline void cfs_race(__u32 id)
			
 
				+{
			
 
				+
			
 
				+	if (CFS_FAIL_PRECHECK(id)) {
			
 
				+		if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
			
 
				+			int rc;
			
 
				+			cfs_race_state = 0;
			
 
				+			CERROR("cfs_race id %x sleeping\n", id);
			
 
				+			cfs_wait_event_interruptible(cfs_race_waitq,
			
 
				+						     cfs_race_state != 0, rc);
			
 
				+			CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
			
 
				+		} else {
			
 
				+			CERROR("cfs_fail_race id %x waking\n", id);
			
 
				+			cfs_race_state = 1;
			
 
				+			wake_up(&cfs_race_waitq);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+#define CFS_RACE(id) cfs_race(id)
			
 
				+
			
 
				+#endif /* _LIBCFS_FAIL_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -0,0 +1,850 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_hash.h
			
 
				+ *
			
 
				+ * Hashing routines
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_HASH_H__
			
 
				+#define __LIBCFS_HASH_H__
			
 
				+/*
			
 
				+ * Knuth recommends primes in approximately golden ratio to the maximum
			
 
				+ * integer representable by a machine word for multiplicative hashing.
			
 
				+ * Chuck Lever verified the effectiveness of this technique:
			
 
				+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
			
 
				+ *
			
 
				+ * These primes are chosen to be bit-sparse, that is operations on
			
 
				+ * them can use shifts and additions instead of multiplications for
			
 
				+ * machines where multiplications are slow.
			
 
				+ */
			
 
				+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
			
 
				+#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
			
 
				+/*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
			
 
				+#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
			
 
				+
			
 
				+/*
			
 
				+ * Ideally we would use HAVE_HASH_LONG for this, but on linux we configure
			
 
				+ * the linux kernel and user space at the same time, so we need to differentiate
			
 
				+ * between them explicitely. If this is not needed on other architectures, then
			
 
				+ * we'll need to move the functions to archi specific headers.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/hash.h>
			
 
				+
			
 
				+#define cfs_hash_long(val, bits)    hash_long(val, bits)
			
 
				+
			
 
				+/** disable debug */
			
 
				+#define CFS_HASH_DEBUG_NONE	 0
			
 
				+/** record hash depth and output to console when it's too deep,
			
 
				+ *  computing overhead is low but consume more memory */
			
 
				+#define CFS_HASH_DEBUG_1	    1
			
 
				+/** expensive, check key validation */
			
 
				+#define CFS_HASH_DEBUG_2	    2
			
 
				+
			
 
				+#define CFS_HASH_DEBUG_LEVEL	CFS_HASH_DEBUG_NONE
			
 
				+
			
 
				+struct cfs_hash_ops;
			
 
				+struct cfs_hash_lock_ops;
			
 
				+struct cfs_hash_hlist_ops;
			
 
				+
			
 
				+typedef union {
			
 
				+	rwlock_t		rw;		/**< rwlock */
			
 
				+	spinlock_t		spin;		/**< spinlock */
			
 
				+} cfs_hash_lock_t;
			
 
				+
			
 
				+/**
			
 
				+ * cfs_hash_bucket is a container of:
			
 
				+ * - lock, couter ...
			
 
				+ * - array of hash-head starting from hsb_head[0], hash-head can be one of
			
 
				+ *   . cfs_hash_head_t
			
 
				+ *   . cfs_hash_head_dep_t
			
 
				+ *   . cfs_hash_dhead_t
			
 
				+ *   . cfs_hash_dhead_dep_t
			
 
				+ *   which depends on requirement of user
			
 
				+ * - some extra bytes (caller can require it while creating hash)
			
 
				+ */
			
 
				+typedef struct cfs_hash_bucket {
			
 
				+	cfs_hash_lock_t		hsb_lock;	/**< bucket lock */
			
 
				+	__u32			hsb_count;	/**< current entries */
			
 
				+	__u32			hsb_version;	/**< change version */
			
 
				+	unsigned int		hsb_index;	/**< index of bucket */
			
 
				+	int			hsb_depmax;	/**< max depth on bucket */
			
 
				+	long			hsb_head[0];	/**< hash-head array */
			
 
				+} cfs_hash_bucket_t;
			
 
				+
			
 
				+/**
			
 
				+ * cfs_hash bucket descriptor, it's normally in stack of caller
			
 
				+ */
			
 
				+typedef struct cfs_hash_bd {
			
 
				+	cfs_hash_bucket_t	  *bd_bucket;      /**< address of bucket */
			
 
				+	unsigned int		bd_offset;      /**< offset in bucket */
			
 
				+} cfs_hash_bd_t;
			
 
				+
			
 
				+#define CFS_HASH_NAME_LEN	   16      /**< default name length */
			
 
				+#define CFS_HASH_BIGNAME_LEN	64      /**< bigname for param tree */
			
 
				+
			
 
				+#define CFS_HASH_BKT_BITS	   3       /**< default bits of bucket */
			
 
				+#define CFS_HASH_BITS_MAX	   30      /**< max bits of bucket */
			
 
				+#define CFS_HASH_BITS_MIN	   CFS_HASH_BKT_BITS
			
 
				+
			
 
				+/**
			
 
				+ * common hash attributes.
			
 
				+ */
			
 
				+enum cfs_hash_tag {
			
 
				+	/**
			
 
				+	 * don't need any lock, caller will protect operations with it's
			
 
				+	 * own lock. With this flag:
			
 
				+	 *  . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
			
 
				+	 *    will be ignored.
			
 
				+	 *  . Some functions will be disabled with this flag, i.e:
			
 
				+	 *    cfs_hash_for_each_empty, cfs_hash_rehash
			
 
				+	 */
			
 
				+	CFS_HASH_NO_LOCK	= 1 << 0,
			
 
				+	/** no bucket lock, use one spinlock to protect the whole hash */
			
 
				+	CFS_HASH_NO_BKTLOCK     = 1 << 1,
			
 
				+	/** rwlock to protect bucket */
			
 
				+	CFS_HASH_RW_BKTLOCK     = 1 << 2,
			
 
				+	/** spinlcok to protect bucket */
			
 
				+	CFS_HASH_SPIN_BKTLOCK   = 1 << 3,
			
 
				+	/** always add new item to tail */
			
 
				+	CFS_HASH_ADD_TAIL       = 1 << 4,
			
 
				+	/** hash-table doesn't have refcount on item */
			
 
				+	CFS_HASH_NO_ITEMREF     = 1 << 5,
			
 
				+	/** big name for param-tree */
			
 
				+	CFS_HASH_BIGNAME	= 1 << 6,
			
 
				+	/** track global count */
			
 
				+	CFS_HASH_COUNTER	= 1 << 7,
			
 
				+	/** rehash item by new key */
			
 
				+	CFS_HASH_REHASH_KEY     = 1 << 8,
			
 
				+	/** Enable dynamic hash resizing */
			
 
				+	CFS_HASH_REHASH	 = 1 << 9,
			
 
				+	/** can shrink hash-size */
			
 
				+	CFS_HASH_SHRINK	 = 1 << 10,
			
 
				+	/** assert hash is empty on exit */
			
 
				+	CFS_HASH_ASSERT_EMPTY   = 1 << 11,
			
 
				+	/** record hlist depth */
			
 
				+	CFS_HASH_DEPTH	  = 1 << 12,
			
 
				+	/**
			
 
				+	 * rehash is always scheduled in a different thread, so current
			
 
				+	 * change on hash table is non-blocking
			
 
				+	 */
			
 
				+	CFS_HASH_NBLK_CHANGE    = 1 << 13,
			
 
				+	/** NB, we typed hs_flags as  __u16, please change it
			
 
				+	 * if you need to extend >=16 flags */
			
 
				+};
			
 
				+
			
 
				+/** most used attributes */
			
 
				+#define CFS_HASH_DEFAULT       (CFS_HASH_RW_BKTLOCK | \
			
 
				+				CFS_HASH_COUNTER | CFS_HASH_REHASH)
			
 
				+
			
 
				+/**
			
 
				+ * cfs_hash is a hash-table implementation for general purpose, it can support:
			
 
				+ *    . two refcount modes
			
 
				+ *      hash-table with & without refcount
			
 
				+ *    . four lock modes
			
 
				+ *      nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
			
 
				+ *    . general operations
			
 
				+ *      lookup, add(add_tail or add_head), delete
			
 
				+ *    . rehash
			
 
				+ *      grows or shrink
			
 
				+ *    . iteration
			
 
				+ *      locked iteration and unlocked iteration
			
 
				+ *    . bigname
			
 
				+ *      support long name hash
			
 
				+ *    . debug
			
 
				+ *      trace max searching depth
			
 
				+ *
			
 
				+ * Rehash:
			
 
				+ * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
			
 
				+ * is spawned to handle the rehash in the background, it's possible that other
			
 
				+ * processes can concurrently perform additions, deletions, and lookups
			
 
				+ * without being blocked on rehash completion, because rehash will release
			
 
				+ * the global wrlock for each bucket.
			
 
				+ *
			
 
				+ * rehash and iteration can't run at the same time because it's too tricky
			
 
				+ * to keep both of them safe and correct.
			
 
				+ * As they are relatively rare operations, so:
			
 
				+ *   . if iteration is in progress while we try to launch rehash, then
			
 
				+ *     it just giveup, iterator will launch rehash at the end.
			
 
				+ *   . if rehash is in progress while we try to iterate the hash table,
			
 
				+ *     then we just wait (shouldn't be very long time), anyway, nobody
			
 
				+ *     should expect iteration of whole hash-table to be non-blocking.
			
 
				+ *
			
 
				+ * During rehashing, a (key,object) pair may be in one of two buckets,
			
 
				+ * depending on whether the worker task has yet to transfer the object
			
 
				+ * to its new location in the table. Lookups and deletions need to search both
			
 
				+ * locations; additions must take care to only insert into the new bucket.
			
 
				+ */
			
 
				+
			
 
				+typedef struct cfs_hash {
			
 
				+	/** serialize with rehash, or serialize all operations if
			
 
				+	 * the hash-table has CFS_HASH_NO_BKTLOCK */
			
 
				+	cfs_hash_lock_t	     hs_lock;
			
 
				+	/** hash operations */
			
 
				+	struct cfs_hash_ops	*hs_ops;
			
 
				+	/** hash lock operations */
			
 
				+	struct cfs_hash_lock_ops   *hs_lops;
			
 
				+	/** hash list operations */
			
 
				+	struct cfs_hash_hlist_ops  *hs_hops;
			
 
				+	/** hash buckets-table */
			
 
				+	cfs_hash_bucket_t	 **hs_buckets;
			
 
				+	/** total number of items on this hash-table */
			
 
				+	atomic_t		hs_count;
			
 
				+	/** hash flags, see cfs_hash_tag for detail */
			
 
				+	__u16		       hs_flags;
			
 
				+	/** # of extra-bytes for bucket, for user saving extended attributes */
			
 
				+	__u16		       hs_extra_bytes;
			
 
				+	/** wants to iterate */
			
 
				+	__u8			hs_iterating;
			
 
				+	/** hash-table is dying */
			
 
				+	__u8			hs_exiting;
			
 
				+	/** current hash bits */
			
 
				+	__u8			hs_cur_bits;
			
 
				+	/** min hash bits */
			
 
				+	__u8			hs_min_bits;
			
 
				+	/** max hash bits */
			
 
				+	__u8			hs_max_bits;
			
 
				+	/** bits for rehash */
			
 
				+	__u8			hs_rehash_bits;
			
 
				+	/** bits for each bucket */
			
 
				+	__u8			hs_bkt_bits;
			
 
				+	/** resize min threshold */
			
 
				+	__u16		       hs_min_theta;
			
 
				+	/** resize max threshold */
			
 
				+	__u16		       hs_max_theta;
			
 
				+	/** resize count */
			
 
				+	__u32		       hs_rehash_count;
			
 
				+	/** # of iterators (caller of cfs_hash_for_each_*) */
			
 
				+	__u32		       hs_iterators;
			
 
				+	/** rehash workitem */
			
 
				+	cfs_workitem_t	      hs_rehash_wi;
			
 
				+	/** refcount on this hash table */
			
 
				+	atomic_t		hs_refcount;
			
 
				+	/** rehash buckets-table */
			
 
				+	cfs_hash_bucket_t	 **hs_rehash_buckets;
			
 
				+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
			
 
				+	/** serialize debug members */
			
 
				+	spinlock_t			hs_dep_lock;
			
 
				+	/** max depth */
			
 
				+	unsigned int		hs_dep_max;
			
 
				+	/** id of the deepest bucket */
			
 
				+	unsigned int		hs_dep_bkt;
			
 
				+	/** offset in the deepest bucket */
			
 
				+	unsigned int		hs_dep_off;
			
 
				+	/** bits when we found the max depth */
			
 
				+	unsigned int		hs_dep_bits;
			
 
				+	/** workitem to output max depth */
			
 
				+	cfs_workitem_t	      hs_dep_wi;
			
 
				+#endif
			
 
				+	/** name of htable */
			
 
				+	char			hs_name[0];
			
 
				+} cfs_hash_t;
			
 
				+
			
 
				+typedef struct cfs_hash_lock_ops {
			
 
				+	/** lock the hash table */
			
 
				+	void    (*hs_lock)(cfs_hash_lock_t *lock, int exclusive);
			
 
				+	/** unlock the hash table */
			
 
				+	void    (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive);
			
 
				+	/** lock the hash bucket */
			
 
				+	void    (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive);
			
 
				+	/** unlock the hash bucket */
			
 
				+	void    (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive);
			
 
				+} cfs_hash_lock_ops_t;
			
 
				+
			
 
				+typedef struct cfs_hash_hlist_ops {
			
 
				+	/** return hlist_head of hash-head of @bd */
			
 
				+	struct hlist_head *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd);
			
 
				+	/** return hash-head size */
			
 
				+	int (*hop_hhead_size)(cfs_hash_t *hs);
			
 
				+	/** add @hnode to hash-head of @bd */
			
 
				+	int (*hop_hnode_add)(cfs_hash_t *hs,
			
 
				+			     cfs_hash_bd_t *bd, struct hlist_node *hnode);
			
 
				+	/** remove @hnode from hash-head of @bd */
			
 
				+	int (*hop_hnode_del)(cfs_hash_t *hs,
			
 
				+			     cfs_hash_bd_t *bd, struct hlist_node *hnode);
			
 
				+} cfs_hash_hlist_ops_t;
			
 
				+
			
 
				+typedef struct cfs_hash_ops {
			
 
				+	/** return hashed value from @key */
			
 
				+	unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask);
			
 
				+	/** return key address of @hnode */
			
 
				+	void *   (*hs_key)(struct hlist_node *hnode);
			
 
				+	/** copy key from @hnode to @key */
			
 
				+	void     (*hs_keycpy)(struct hlist_node *hnode, void *key);
			
 
				+	/**
			
 
				+	 *  compare @key with key of @hnode
			
 
				+	 *  returns 1 on a match
			
 
				+	 */
			
 
				+	int      (*hs_keycmp)(const void *key, struct hlist_node *hnode);
			
 
				+	/** return object address of @hnode, i.e: container_of(...hnode) */
			
 
				+	void *   (*hs_object)(struct hlist_node *hnode);
			
 
				+	/** get refcount of item, always called with holding bucket-lock */
			
 
				+	void     (*hs_get)(cfs_hash_t *hs, struct hlist_node *hnode);
			
 
				+	/** release refcount of item */
			
 
				+	void     (*hs_put)(cfs_hash_t *hs, struct hlist_node *hnode);
			
 
				+	/** release refcount of item, always called with holding bucket-lock */
			
 
				+	void     (*hs_put_locked)(cfs_hash_t *hs, struct hlist_node *hnode);
			
 
				+	/** it's called before removing of @hnode */
			
 
				+	void     (*hs_exit)(cfs_hash_t *hs, struct hlist_node *hnode);
			
 
				+} cfs_hash_ops_t;
			
 
				+
			
 
				+/** total number of buckets in @hs */
			
 
				+#define CFS_HASH_NBKT(hs)       \
			
 
				+	(1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
			
 
				+
			
 
				+/** total number of buckets in @hs while rehashing */
			
 
				+#define CFS_HASH_RH_NBKT(hs)    \
			
 
				+	(1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
			
 
				+
			
 
				+/** number of hlist for in bucket */
			
 
				+#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
			
 
				+
			
 
				+/** total number of hlist in @hs */
			
 
				+#define CFS_HASH_NHLIST(hs)     (1U << (hs)->hs_cur_bits)
			
 
				+
			
 
				+/** total number of hlist in @hs while rehashing */
			
 
				+#define CFS_HASH_RH_NHLIST(hs)  (1U << (hs)->hs_rehash_bits)
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_no_lock(cfs_hash_t *hs)
			
 
				+{
			
 
				+	/* caller will serialize all operations for this hash-table */
			
 
				+	return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_no_bktlock(cfs_hash_t *hs)
			
 
				+{
			
 
				+	/* no bucket lock, one single lock to protect the hash-table */
			
 
				+	return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_rw_bktlock(cfs_hash_t *hs)
			
 
				+{
			
 
				+	/* rwlock to protect hash bucket */
			
 
				+	return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_spin_bktlock(cfs_hash_t *hs)
			
 
				+{
			
 
				+	/* spinlock to protect hash bucket */
			
 
				+	return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_add_tail(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_no_itemref(cfs_hash_t *hs)
			
 
				+{
			
 
				+	/* hash-table doesn't keep refcount on item,
			
 
				+	 * item can't be removed from hash unless it's
			
 
				+	 * ZERO refcount */
			
 
				+	return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_bigname(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_counter(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_rehash(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_REHASH) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_rehash_key(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_shrink(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_assert_empty(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_depth(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_with_nblk_change(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_is_exiting(cfs_hash_t *hs)
			
 
				+{       /* cfs_hash_destroy is called */
			
 
				+	return hs->hs_exiting;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_is_rehashing(cfs_hash_t *hs)
			
 
				+{       /* rehash is launched */
			
 
				+	return hs->hs_rehash_bits != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_is_iterating(cfs_hash_t *hs)
			
 
				+{       /* someone is calling cfs_hash_for_each_* */
			
 
				+	return hs->hs_iterating || hs->hs_iterators != 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_bkt_size(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return offsetof(cfs_hash_bucket_t, hsb_head[0]) +
			
 
				+	       hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
			
 
				+	       hs->hs_extra_bytes;
			
 
				+}
			
 
				+
			
 
				+#define CFS_HOP(hs, op)	   (hs)->hs_ops->hs_ ## op
			
 
				+
			
 
				+static inline unsigned
			
 
				+cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask)
			
 
				+{
			
 
				+	return CFS_HOP(hs, hash)(hs, key, mask);
			
 
				+}
			
 
				+
			
 
				+static inline void *
			
 
				+cfs_hash_key(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	return CFS_HOP(hs, key)(hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_keycpy(cfs_hash_t *hs, struct hlist_node *hnode, void *key)
			
 
				+{
			
 
				+	if (CFS_HOP(hs, keycpy) != NULL)
			
 
				+		CFS_HOP(hs, keycpy)(hnode, key);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Returns 1 on a match,
			
 
				+ */
			
 
				+static inline int
			
 
				+cfs_hash_keycmp(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
			
 
				+{
			
 
				+	return CFS_HOP(hs, keycmp)(key, hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void *
			
 
				+cfs_hash_object(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	return CFS_HOP(hs, object)(hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_get(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	return CFS_HOP(hs, get)(hs, hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	LASSERT(CFS_HOP(hs, put_locked) != NULL);
			
 
				+
			
 
				+	return CFS_HOP(hs, put_locked)(hs, hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_put(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	LASSERT(CFS_HOP(hs, put) != NULL);
			
 
				+
			
 
				+	return CFS_HOP(hs, put)(hs, hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_exit(cfs_hash_t *hs, struct hlist_node *hnode)
			
 
				+{
			
 
				+	if (CFS_HOP(hs, exit))
			
 
				+		CFS_HOP(hs, exit)(hs, hnode);
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_hash_lock(cfs_hash_t *hs, int excl)
			
 
				+{
			
 
				+	hs->hs_lops->hs_lock(&hs->hs_lock, excl);
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl)
			
 
				+{
			
 
				+	hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs,
			
 
				+					atomic_t *condition)
			
 
				+{
			
 
				+	LASSERT(cfs_hash_with_no_bktlock(hs));
			
 
				+	return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_hash_bd_lock(cfs_hash_t *hs,
			
 
				+				    cfs_hash_bd_t *bd, int excl)
			
 
				+{
			
 
				+	hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_hash_bd_unlock(cfs_hash_t *hs,
			
 
				+				      cfs_hash_bd_t *bd, int excl)
			
 
				+{
			
 
				+	hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * operations on cfs_hash bucket (bd: bucket descriptor),
			
 
				+ * they are normally for hash-table without rehash
			
 
				+ */
			
 
				+void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd);
			
 
				+
			
 
				+static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key,
			
 
				+					    cfs_hash_bd_t *bd, int excl)
			
 
				+{
			
 
				+	cfs_hash_bd_get(hs, key, bd);
			
 
				+	cfs_hash_bd_lock(hs, bd, excl);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_hash_bd_index_set(cfs_hash_t *hs,
			
 
				+					 unsigned index, cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
			
 
				+	bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
			
 
				+}
			
 
				+
			
 
				+static inline void *
			
 
				+cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	return (void *)bd->bd_bucket +
			
 
				+	       cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
			
 
				+}
			
 
				+
			
 
				+static inline __u32
			
 
				+cfs_hash_bd_version_get(cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	/* need hold cfs_hash_bd_lock */
			
 
				+	return bd->bd_bucket->hsb_version;
			
 
				+}
			
 
				+
			
 
				+static inline __u32
			
 
				+cfs_hash_bd_count_get(cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	/* need hold cfs_hash_bd_lock */
			
 
				+	return bd->bd_bucket->hsb_count;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	return bd->bd_bucket->hsb_depmax;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
			
 
				+{
			
 
				+	if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
			
 
				+		return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
			
 
				+
			
 
				+	if (bd1->bd_offset != bd2->bd_offset)
			
 
				+		return bd1->bd_offset - bd2->bd_offset;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+			    struct hlist_node *hnode);
			
 
				+void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+			    struct hlist_node *hnode);
			
 
				+void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
			
 
				+			     cfs_hash_bd_t *bd_new, struct hlist_node *hnode);
			
 
				+
			
 
				+static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+					   atomic_t *condition)
			
 
				+{
			
 
				+	LASSERT(cfs_hash_with_spin_bktlock(hs));
			
 
				+	return atomic_dec_and_lock(condition,
			
 
				+				       &bd->bd_bucket->hsb_lock.spin);
			
 
				+}
			
 
				+
			
 
				+static inline struct hlist_head *cfs_hash_bd_hhead(cfs_hash_t *hs,
			
 
				+						  cfs_hash_bd_t *bd)
			
 
				+{
			
 
				+	return hs->hs_hops->hop_hhead(hs, bd);
			
 
				+}
			
 
				+
			
 
				+struct hlist_node *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
			
 
				+					    cfs_hash_bd_t *bd, const void *key);
			
 
				+struct hlist_node *cfs_hash_bd_peek_locked(cfs_hash_t *hs,
			
 
				+					  cfs_hash_bd_t *bd, const void *key);
			
 
				+struct hlist_node *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
			
 
				+					     cfs_hash_bd_t *bd, const void *key,
			
 
				+					     struct hlist_node *hnode,
			
 
				+					     int insist_add);
			
 
				+struct hlist_node *cfs_hash_bd_finddel_locked(cfs_hash_t *hs,
			
 
				+					     cfs_hash_bd_t *bd, const void *key,
			
 
				+					     struct hlist_node *hnode);
			
 
				+
			
 
				+/**
			
 
				+ * operations on cfs_hash bucket (bd: bucket descriptor),
			
 
				+ * they are safe for hash-table with rehash
			
 
				+ */
			
 
				+void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds);
			
 
				+void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
			
 
				+void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
			
 
				+
			
 
				+static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key,
			
 
				+						 cfs_hash_bd_t *bds, int excl)
			
 
				+{
			
 
				+	cfs_hash_dual_bd_get(hs, key, bds);
			
 
				+	cfs_hash_dual_bd_lock(hs, bds, excl);
			
 
				+}
			
 
				+
			
 
				+struct hlist_node *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs,
			
 
				+						 cfs_hash_bd_t *bds,
			
 
				+						 const void *key);
			
 
				+struct hlist_node *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs,
			
 
				+						  cfs_hash_bd_t *bds,
			
 
				+						  const void *key,
			
 
				+						  struct hlist_node *hnode,
			
 
				+						  int insist_add);
			
 
				+struct hlist_node *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs,
			
 
				+						  cfs_hash_bd_t *bds,
			
 
				+						  const void *key,
			
 
				+						  struct hlist_node *hnode);
			
 
				+
			
 
				+/* Hash init/cleanup functions */
			
 
				+cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
			
 
				+			    unsigned bkt_bits, unsigned extra_bytes,
			
 
				+			    unsigned min_theta, unsigned max_theta,
			
 
				+			    cfs_hash_ops_t *ops, unsigned flags);
			
 
				+
			
 
				+cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs);
			
 
				+void cfs_hash_putref(cfs_hash_t *hs);
			
 
				+
			
 
				+/* Hash addition functions */
			
 
				+void cfs_hash_add(cfs_hash_t *hs, const void *key,
			
 
				+		  struct hlist_node *hnode);
			
 
				+int cfs_hash_add_unique(cfs_hash_t *hs, const void *key,
			
 
				+			struct hlist_node *hnode);
			
 
				+void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key,
			
 
				+			      struct hlist_node *hnode);
			
 
				+
			
 
				+/* Hash deletion functions */
			
 
				+void *cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode);
			
 
				+void *cfs_hash_del_key(cfs_hash_t *hs, const void *key);
			
 
				+
			
 
				+/* Hash lookup/for_each functions */
			
 
				+#define CFS_HASH_LOOP_HOG       1024
			
 
				+
			
 
				+typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+				      struct hlist_node *node, void *data);
			
 
				+void *cfs_hash_lookup(cfs_hash_t *hs, const void *key);
			
 
				+void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
			
 
				+void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
			
 
				+int  cfs_hash_for_each_nolock(cfs_hash_t *hs,
			
 
				+			      cfs_hash_for_each_cb_t, void *data);
			
 
				+int  cfs_hash_for_each_empty(cfs_hash_t *hs,
			
 
				+			     cfs_hash_for_each_cb_t, void *data);
			
 
				+void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key,
			
 
				+			   cfs_hash_for_each_cb_t, void *data);
			
 
				+typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
			
 
				+void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data);
			
 
				+
			
 
				+void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
			
 
				+			     cfs_hash_for_each_cb_t, void *data);
			
 
				+int  cfs_hash_is_empty(cfs_hash_t *hs);
			
 
				+__u64 cfs_hash_size_get(cfs_hash_t *hs);
			
 
				+
			
 
				+/*
			
 
				+ * Rehash - Theta is calculated to be the average chained
			
 
				+ * hash depth assuming a perfectly uniform hash funcion.
			
 
				+ */
			
 
				+void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs);
			
 
				+void cfs_hash_rehash_cancel(cfs_hash_t *hs);
			
 
				+int  cfs_hash_rehash(cfs_hash_t *hs, int do_rehash);
			
 
				+void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key,
			
 
				+			 void *new_key, struct hlist_node *hnode);
			
 
				+
			
 
				+#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
			
 
				+/* Validate hnode references the correct key */
			
 
				+static inline void
			
 
				+cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
			
 
				+		      struct hlist_node *hnode)
			
 
				+{
			
 
				+	LASSERT(cfs_hash_keycmp(hs, key, hnode));
			
 
				+}
			
 
				+
			
 
				+/* Validate hnode is in the correct bucket */
			
 
				+static inline void
			
 
				+cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+			 struct hlist_node *hnode)
			
 
				+{
			
 
				+	cfs_hash_bd_t   bds[2];
			
 
				+
			
 
				+	cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
			
 
				+	LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
			
 
				+		bds[1].bd_bucket == bd->bd_bucket);
			
 
				+}
			
 
				+
			
 
				+#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
			
 
				+		      struct hlist_node *hnode) {}
			
 
				+
			
 
				+static inline void
			
 
				+cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
			
 
				+			 struct hlist_node *hnode) {}
			
 
				+
			
 
				+#endif /* CFS_HASH_DEBUG_LEVEL */
			
 
				+
			
 
				+#define CFS_HASH_THETA_BITS  10
			
 
				+#define CFS_HASH_MIN_THETA  (1U << (CFS_HASH_THETA_BITS - 1))
			
 
				+#define CFS_HASH_MAX_THETA  (1U << (CFS_HASH_THETA_BITS + 1))
			
 
				+
			
 
				+/* Return integer component of theta */
			
 
				+static inline int __cfs_hash_theta_int(int theta)
			
 
				+{
			
 
				+	return (theta >> CFS_HASH_THETA_BITS);
			
 
				+}
			
 
				+
			
 
				+/* Return a fractional value between 0 and 999 */
			
 
				+static inline int __cfs_hash_theta_frac(int theta)
			
 
				+{
			
 
				+	return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
			
 
				+	       (__cfs_hash_theta_int(theta) * 1000);
			
 
				+}
			
 
				+
			
 
				+static inline int __cfs_hash_theta(cfs_hash_t *hs)
			
 
				+{
			
 
				+	return (atomic_read(&hs->hs_count) <<
			
 
				+		CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
			
 
				+}
			
 
				+
			
 
				+static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max)
			
 
				+{
			
 
				+	LASSERT(min < max);
			
 
				+	hs->hs_min_theta = (__u16)min;
			
 
				+	hs->hs_max_theta = (__u16)max;
			
 
				+}
			
 
				+
			
 
				+/* Generic debug formatting routines mainly for proc handler */
			
 
				+int cfs_hash_debug_header(char *str, int size);
			
 
				+int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size);
			
 
				+
			
 
				+/*
			
 
				+ * Generic djb2 hash algorithm for character arrays.
			
 
				+ */
			
 
				+static inline unsigned
			
 
				+cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
			
 
				+{
			
 
				+	unsigned i, hash = 5381;
			
 
				+
			
 
				+	LASSERT(key != NULL);
			
 
				+
			
 
				+	for (i = 0; i < size; i++)
			
 
				+		hash = hash * 33 + ((char *)key)[i];
			
 
				+
			
 
				+	return (hash & mask);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Generic u32 hash algorithm.
			
 
				+ */
			
 
				+static inline unsigned
			
 
				+cfs_hash_u32_hash(const __u32 key, unsigned mask)
			
 
				+{
			
 
				+	return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Generic u64 hash algorithm.
			
 
				+ */
			
 
				+static inline unsigned
			
 
				+cfs_hash_u64_hash(const __u64 key, unsigned mask)
			
 
				+{
			
 
				+	return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
			
 
				+}
			
 
				+
			
 
				+/** iterate over all buckets in @bds (array of cfs_hash_bd_t) */
			
 
				+#define cfs_hash_for_each_bd(bds, n, i) \
			
 
				+	for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
			
 
				+
			
 
				+/** iterate over all buckets of @hs */
			
 
				+#define cfs_hash_for_each_bucket(hs, bd, pos)		   \
			
 
				+	for (pos = 0;					   \
			
 
				+	     pos < CFS_HASH_NBKT(hs) &&			 \
			
 
				+	     ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
			
 
				+
			
 
				+/** iterate over all hlist of bucket @bd */
			
 
				+#define cfs_hash_bd_for_each_hlist(hs, bd, hlist)	       \
			
 
				+	for ((bd)->bd_offset = 0;			       \
			
 
				+	     (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) &&       \
			
 
				+	     (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL;       \
			
 
				+	     (bd)->bd_offset++)
			
 
				+
			
 
				+/* !__LIBCFS__HASH_H__ */
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h
@@ -0,0 +1,200 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License version 2 for more details.  A copy is
			
 
				+ * included in the COPYING file that accompanied this code.
			
 
				+
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2011 Intel Corporation
			
 
				+ */
			
 
				+/*
			
 
				+ * libcfs/include/libcfs/heap.h
			
 
				+ *
			
 
				+ * Author: Eric Barton	<eeb@whamcloud.com>
			
 
				+ *	   Liang Zhen	<liang@whamcloud.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_HEAP_H__
			
 
				+#define __LIBCFS_HEAP_H__
			
 
				+
			
 
				+/** \defgroup heap Binary heap
			
 
				+ *
			
 
				+ * The binary heap is a scalable data structure created using a binary tree. It
			
 
				+ * is capable of maintaining large sets of elements sorted usually by one or
			
 
				+ * more element properties, but really based on anything that can be used as a
			
 
				+ * binary predicate in order to determine the relevant ordering of any two nodes
			
 
				+ * that belong to the set. There is no search operation, rather the intention is
			
 
				+ * for the element of the lowest priority which will always be at the root of
			
 
				+ * the tree (as this is an implementation of a min-heap) to be removed by users
			
 
				+ * for consumption.
			
 
				+ *
			
 
				+ * Users of the heap should embed a \e cfs_binheap_node_t object instance on
			
 
				+ * every object of the set that they wish the binary heap instance to handle,
			
 
				+ * and (at a minimum) provide a cfs_binheap_ops_t::hop_compare() implementation
			
 
				+ * which is used by the heap as the binary predicate during its internal sorting
			
 
				+ * operations.
			
 
				+ *
			
 
				+ * The current implementation enforces no locking scheme, and so assumes the
			
 
				+ * user caters for locking between calls to insert, delete and lookup
			
 
				+ * operations. Since the only consumer for the data structure at this point
			
 
				+ * are NRS policies, and these operate on a per-CPT basis, binary heap instances
			
 
				+ * are tied to a specific CPT.
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Binary heap node.
			
 
				+ *
			
 
				+ * Objects of this type are embedded into objects of the ordered set that is to
			
 
				+ * be maintained by a \e cfs_binheap_t instance.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/** Index into the binary tree */
			
 
				+	unsigned int	chn_index;
			
 
				+} cfs_binheap_node_t;
			
 
				+
			
 
				+#define CBH_SHIFT	9
			
 
				+#define CBH_SIZE       (1 << CBH_SHIFT)		    /* # ptrs per level */
			
 
				+#define CBH_MASK       (CBH_SIZE - 1)
			
 
				+#define CBH_NOB	(CBH_SIZE * sizeof(cfs_binheap_node_t *))
			
 
				+
			
 
				+#define CBH_POISON	0xdeadbeef
			
 
				+
			
 
				+/**
			
 
				+ * Binary heap flags.
			
 
				+ */
			
 
				+enum {
			
 
				+	CBH_FLAG_ATOMIC_GROW	= 1,
			
 
				+};
			
 
				+
			
 
				+struct cfs_binheap;
			
 
				+
			
 
				+/**
			
 
				+ * Binary heap operations.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/**
			
 
				+	 * Called right before inserting a node into the binary heap.
			
 
				+	 *
			
 
				+	 * Implementing this operation is optional.
			
 
				+	 *
			
 
				+	 * \param[in] h The heap
			
 
				+	 * \param[in] e The node
			
 
				+	 *
			
 
				+	 * \retval 0 success
			
 
				+	 * \retval != 0 error
			
 
				+	 */
			
 
				+	int		(*hop_enter)(struct cfs_binheap *h,
			
 
				+				     cfs_binheap_node_t *e);
			
 
				+	/**
			
 
				+	 * Called right after removing a node from the binary heap.
			
 
				+	 *
			
 
				+	 * Implementing this operation is optional.
			
 
				+	 *
			
 
				+	 * \param[in] h The heap
			
 
				+	 * \param[in] e The node
			
 
				+	 */
			
 
				+	void		(*hop_exit)(struct cfs_binheap *h,
			
 
				+				    cfs_binheap_node_t *e);
			
 
				+	/**
			
 
				+	 * A binary predicate which is called during internal heap sorting
			
 
				+	 * operations, and used in order to determine the relevant ordering of
			
 
				+	 * two heap nodes.
			
 
				+	 *
			
 
				+	 * Implementing this operation is mandatory.
			
 
				+	 *
			
 
				+	 * \param[in] a The first heap node
			
 
				+	 * \param[in] b The second heap node
			
 
				+	 *
			
 
				+	 * \retval 0 Node a > node b
			
 
				+	 * \retval 1 Node a < node b
			
 
				+	 *
			
 
				+	 * \see cfs_binheap_bubble()
			
 
				+	 * \see cfs_biheap_sink()
			
 
				+	 */
			
 
				+	int		(*hop_compare)(cfs_binheap_node_t *a,
			
 
				+				       cfs_binheap_node_t *b);
			
 
				+} cfs_binheap_ops_t;
			
 
				+
			
 
				+/**
			
 
				+ * Binary heap object.
			
 
				+ *
			
 
				+ * Sorts elements of type \e cfs_binheap_node_t
			
 
				+ */
			
 
				+typedef struct cfs_binheap {
			
 
				+	/** Triple indirect */
			
 
				+	cfs_binheap_node_t  ****cbh_elements3;
			
 
				+	/** double indirect */
			
 
				+	cfs_binheap_node_t   ***cbh_elements2;
			
 
				+	/** single indirect */
			
 
				+	cfs_binheap_node_t    **cbh_elements1;
			
 
				+	/** # elements referenced */
			
 
				+	unsigned int		cbh_nelements;
			
 
				+	/** high water mark */
			
 
				+	unsigned int		cbh_hwm;
			
 
				+	/** user flags */
			
 
				+	unsigned int		cbh_flags;
			
 
				+	/** operations table */
			
 
				+	cfs_binheap_ops_t      *cbh_ops;
			
 
				+	/** private data */
			
 
				+	void		       *cbh_private;
			
 
				+	/** associated CPT table */
			
 
				+	struct cfs_cpt_table   *cbh_cptab;
			
 
				+	/** associated CPT id of this cfs_binheap_t::cbh_cptab */
			
 
				+	int			cbh_cptid;
			
 
				+} cfs_binheap_t;
			
 
				+
			
 
				+void cfs_binheap_destroy(cfs_binheap_t *h);
			
 
				+cfs_binheap_t *cfs_binheap_create(cfs_binheap_ops_t *ops, unsigned int flags,
			
 
				+				  unsigned count, void *arg,
			
 
				+				  struct cfs_cpt_table *cptab, int cptid);
			
 
				+cfs_binheap_node_t *cfs_binheap_find(cfs_binheap_t *h, unsigned int idx);
			
 
				+int cfs_binheap_insert(cfs_binheap_t *h, cfs_binheap_node_t *e);
			
 
				+void cfs_binheap_remove(cfs_binheap_t *h, cfs_binheap_node_t *e);
			
 
				+
			
 
				+static inline int
			
 
				+cfs_binheap_size(cfs_binheap_t *h)
			
 
				+{
			
 
				+	return h->cbh_nelements;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_binheap_is_empty(cfs_binheap_t *h)
			
 
				+{
			
 
				+	return h->cbh_nelements == 0;
			
 
				+}
			
 
				+
			
 
				+static inline cfs_binheap_node_t *
			
 
				+cfs_binheap_root(cfs_binheap_t *h)
			
 
				+{
			
 
				+	return cfs_binheap_find(h, 0);
			
 
				+}
			
 
				+
			
 
				+static inline cfs_binheap_node_t *
			
 
				+cfs_binheap_remove_root(cfs_binheap_t *h)
			
 
				+{
			
 
				+	cfs_binheap_node_t *e = cfs_binheap_find(h, 0);
			
 
				+
			
 
				+	if (e != NULL)
			
 
				+		cfs_binheap_remove(h, e);
			
 
				+	return e;
			
 
				+}
			
 
				+
			
 
				+/** @} heap */
			
 
				+
			
 
				+#endif /* __LIBCFS_HEAP_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -0,0 +1,222 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_ioctl.h
			
 
				+ *
			
 
				+ * Low-level ioctl data structures. Kernel ioctl functions declared here,
			
 
				+ * and user space functions are in libcfsutil_ioctl.h.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_IOCTL_H__
			
 
				+#define __LIBCFS_IOCTL_H__
			
 
				+
			
 
				+
			
 
				+#define LIBCFS_IOCTL_VERSION 0x0001000a
			
 
				+
			
 
				+struct libcfs_ioctl_data {
			
 
				+	__u32 ioc_len;
			
 
				+	__u32 ioc_version;
			
 
				+
			
 
				+	__u64 ioc_nid;
			
 
				+	__u64 ioc_u64[1];
			
 
				+
			
 
				+	__u32 ioc_flags;
			
 
				+	__u32 ioc_count;
			
 
				+	__u32 ioc_net;
			
 
				+	__u32 ioc_u32[7];
			
 
				+
			
 
				+	__u32 ioc_inllen1;
			
 
				+	char *ioc_inlbuf1;
			
 
				+	__u32 ioc_inllen2;
			
 
				+	char *ioc_inlbuf2;
			
 
				+
			
 
				+	__u32 ioc_plen1; /* buffers in userspace */
			
 
				+	char *ioc_pbuf1;
			
 
				+	__u32 ioc_plen2; /* buffers in userspace */
			
 
				+	char *ioc_pbuf2;
			
 
				+
			
 
				+	char ioc_bulk[0];
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct libcfs_ioctl_hdr {
			
 
				+	__u32 ioc_len;
			
 
				+	__u32 ioc_version;
			
 
				+};
			
 
				+
			
 
				+struct libcfs_debug_ioctl_data
			
 
				+{
			
 
				+	struct libcfs_ioctl_hdr hdr;
			
 
				+	unsigned int subs;
			
 
				+	unsigned int debug;
			
 
				+};
			
 
				+
			
 
				+#define LIBCFS_IOC_INIT(data)			   \
			
 
				+do {						    \
			
 
				+	memset(&data, 0, sizeof(data));		 \
			
 
				+	data.ioc_version = LIBCFS_IOCTL_VERSION;	\
			
 
				+	data.ioc_len = sizeof(data);		    \
			
 
				+} while (0)
			
 
				+
			
 
				+
			
 
				+struct libcfs_ioctl_handler {
			
 
				+	struct list_head item;
			
 
				+	int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data);
			
 
				+};
			
 
				+
			
 
				+#define DECLARE_IOCTL_HANDLER(ident, func)		      \
			
 
				+	struct libcfs_ioctl_handler ident = {		   \
			
 
				+		/* .item = */ LIST_HEAD_INIT(ident.item),   \
			
 
				+		/* .handle_ioctl = */ func		      \
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+/* FIXME check conflict with lustre_lib.h */
			
 
				+#define LIBCFS_IOC_DEBUG_MASK	     _IOWR('f', 250, long)
			
 
				+
			
 
				+
			
 
				+/* ioctls for manipulating snapshots 30- */
			
 
				+#define IOC_LIBCFS_TYPE		   'e'
			
 
				+#define IOC_LIBCFS_MIN_NR		 30
			
 
				+/* libcfs ioctls */
			
 
				+#define IOC_LIBCFS_PANIC		   _IOWR('e', 30, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_CLEAR_DEBUG	     _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_MARK_DEBUG	      _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_LWT_CONTROL	     _IOWR('e', 33, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_LWT_SNAPSHOT	    _IOWR('e', 34, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_LWT_LOOKUP_STRING       _IOWR('e', 35, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_MEMHOG		  _IOWR('e', 36, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_PING_TEST	       _IOWR('e', 37, IOCTL_LIBCFS_TYPE)
			
 
				+/* lnet ioctls */
			
 
				+#define IOC_LIBCFS_GET_NI		  _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_FAIL_NID		_IOWR('e', 51, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_ADD_ROUTE	       _IOWR('e', 52, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_DEL_ROUTE	       _IOWR('e', 53, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_GET_ROUTE	       _IOWR('e', 54, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_NOTIFY_ROUTER	   _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_UNCONFIGURE	     _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_PORTALS_COMPATIBILITY   _IOWR('e', 57, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_LNET_DIST	       _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_CONFIGURE	       _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_TESTPROTOCOMPAT	 _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_PING		    _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_DEBUG_PEER	      _IOWR('e', 62, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_LNETST		  _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
			
 
				+/* lnd ioctls */
			
 
				+#define IOC_LIBCFS_REGISTER_MYNID	  _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_CLOSE_CONNECTION	_IOWR('e', 71, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_PUSH_CONNECTION	 _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_GET_CONN		_IOWR('e', 73, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_DEL_PEER		_IOWR('e', 74, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_ADD_PEER		_IOWR('e', 75, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_GET_PEER		_IOWR('e', 76, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_GET_TXDESC	      _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_ADD_INTERFACE	   _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_DEL_INTERFACE	   _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
			
 
				+#define IOC_LIBCFS_GET_INTERFACE	   _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
			
 
				+
			
 
				+#define IOC_LIBCFS_MAX_NR			     80
			
 
				+
			
 
				+static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
			
 
				+{
			
 
				+	int len = sizeof(*data);
			
 
				+	len += cfs_size_round(data->ioc_inllen1);
			
 
				+	len += cfs_size_round(data->ioc_inllen2);
			
 
				+	return len;
			
 
				+}
			
 
				+
			
 
				+static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
			
 
				+{
			
 
				+	if (data->ioc_len > (1<<30)) {
			
 
				+		CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inllen1 > (1<<30)) {
			
 
				+		CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inllen2 > (1<<30)) {
			
 
				+		CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
			
 
				+		CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
			
 
				+		CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_pbuf1 && !data->ioc_plen1) {
			
 
				+		CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_pbuf2 && !data->ioc_plen2) {
			
 
				+		CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_plen1 && !data->ioc_pbuf1) {
			
 
				+		CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_plen2 && !data->ioc_pbuf2) {
			
 
				+		CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
			
 
				+		CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inllen1 &&
			
 
				+	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
			
 
				+		CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (data->ioc_inllen2 &&
			
 
				+	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
			
 
				+			   data->ioc_inllen2 - 1] != '\0') {
			
 
				+		CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
			
 
				+		return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+extern int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
			
 
				+extern int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
			
 
				+extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg);
			
 
				+extern int libcfs_ioctl_popdata(void *arg, void *buf, int size);
			
 
				+
			
 
				+
			
 
				+#endif /* __LIBCFS_IOCTL_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h
@@ -0,0 +1,117 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * Author: Nathan Rutman <nathan.rutman@sun.com>
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_kernelcomm.h
			
 
				+ *
			
 
				+ * Kernel <-> userspace communication routines.
			
 
				+ * The definitions below are used in the kernel and userspace.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_KERNELCOMM_H__
			
 
				+#define __LIBCFS_KERNELCOMM_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+/* KUC message header.
			
 
				+ * All current and future KUC messages should use this header.
			
 
				+ * To avoid having to include Lustre headers from libcfs, define this here.
			
 
				+ */
			
 
				+struct kuc_hdr {
			
 
				+	__u16 kuc_magic;
			
 
				+	__u8  kuc_transport;  /* Each new Lustre feature should use a different
			
 
				+				 transport */
			
 
				+	__u8  kuc_flags;
			
 
				+	__u16 kuc_msgtype;    /* Message type or opcode, transport-specific */
			
 
				+	__u16 kuc_msglen;     /* Including header */
			
 
				+} __attribute__((aligned(sizeof(__u64))));
			
 
				+
			
 
				+#define KUC_MAGIC  0x191C /*Lustre9etLinC */
			
 
				+#define KUC_FL_BLOCK 0x01   /* Wait for send */
			
 
				+
			
 
				+/* kuc_msgtype values are defined in each transport */
			
 
				+enum kuc_transport_type {
			
 
				+	KUC_TRANSPORT_GENERIC   = 1,
			
 
				+	KUC_TRANSPORT_HSM       = 2,
			
 
				+	KUC_TRANSPORT_CHANGELOG = 3,
			
 
				+};
			
 
				+
			
 
				+enum kuc_generic_message_type {
			
 
				+	KUC_MSG_SHUTDOWN = 1,
			
 
				+};
			
 
				+
			
 
				+/* prototype for callback function on kuc groups */
			
 
				+typedef int (*libcfs_kkuc_cb_t)(__u32 data, void *cb_arg);
			
 
				+
			
 
				+/* KUC Broadcast Groups. This determines which userspace process hears which
			
 
				+ * messages.  Mutliple transports may be used within a group, or multiple
			
 
				+ * groups may use the same transport.  Broadcast
			
 
				+ * groups need not be used if e.g. a UID is specified instead;
			
 
				+ * use group 0 to signify unicast.
			
 
				+ */
			
 
				+#define KUC_GRP_HSM	   0x02
			
 
				+#define KUC_GRP_MAX	   KUC_GRP_HSM
			
 
				+
			
 
				+/* Kernel methods */
			
 
				+extern int libcfs_kkuc_msg_put(struct file *fp, void *payload);
			
 
				+extern int libcfs_kkuc_group_put(int group, void *payload);
			
 
				+extern int libcfs_kkuc_group_add(struct file *fp, int uid, int group,
			
 
				+				 __u32 data);
			
 
				+extern int libcfs_kkuc_group_rem(int uid, int group);
			
 
				+extern int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func,
			
 
				+				     void *cb_arg);
			
 
				+
			
 
				+#define LK_FLG_STOP 0x01
			
 
				+
			
 
				+/* kernelcomm control structure, passed from userspace to kernel */
			
 
				+typedef struct lustre_kernelcomm {
			
 
				+	__u32 lk_wfd;
			
 
				+	__u32 lk_rfd;
			
 
				+	__u32 lk_uid;
			
 
				+	__u32 lk_group;
			
 
				+	__u32 lk_data;
			
 
				+	__u32 lk_flags;
			
 
				+} __attribute__((packed)) lustre_kernelcomm;
			
 
				+
			
 
				+/* Userspace methods */
			
 
				+extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups);
			
 
				+extern int libcfs_ukuc_stop(lustre_kernelcomm *l);
			
 
				+extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize,
			
 
				+			       int transport);
			
 
				+
			
 
				+#endif /* __LIBCFS_KERNELCOMM_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
@@ -0,0 +1,101 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_prim.h
			
 
				+ *
			
 
				+ * General primitives.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_PRIM_H__
			
 
				+#define __LIBCFS_PRIM_H__
			
 
				+
			
 
				+#ifndef EXPORT_SYMBOL
			
 
				+# define EXPORT_SYMBOL(s)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Schedule
			
 
				+ */
			
 
				+void cfs_pause(cfs_duration_t ticks);
			
 
				+
			
 
				+/*
			
 
				+ * Timer
			
 
				+ */
			
 
				+typedef  void (cfs_timer_func_t)(ulong_ptr_t);
			
 
				+void schedule_timeout_and_set_state(cfs_task_state_t, int64_t);
			
 
				+
			
 
				+void init_waitqueue_entry_current(wait_queue_t *link);
			
 
				+int64_t waitq_timedwait(wait_queue_t *, cfs_task_state_t, int64_t);
			
 
				+void waitq_wait(wait_queue_t *, cfs_task_state_t);
			
 
				+void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *);
			
 
				+
			
 
				+void cfs_init_timer(timer_list_t *t);
			
 
				+void cfs_timer_init(timer_list_t *t, cfs_timer_func_t *func, void *arg);
			
 
				+void cfs_timer_done(timer_list_t *t);
			
 
				+void cfs_timer_arm(timer_list_t *t, cfs_time_t deadline);
			
 
				+void cfs_timer_disarm(timer_list_t *t);
			
 
				+int  cfs_timer_is_armed(timer_list_t *t);
			
 
				+cfs_time_t cfs_timer_deadline(timer_list_t *t);
			
 
				+
			
 
				+/*
			
 
				+ * Memory
			
 
				+ */
			
 
				+#ifndef memory_pressure_get
			
 
				+#define memory_pressure_get() (0)
			
 
				+#endif
			
 
				+#ifndef memory_pressure_set
			
 
				+#define memory_pressure_set() do {} while (0)
			
 
				+#endif
			
 
				+#ifndef memory_pressure_clr
			
 
				+#define memory_pressure_clr() do {} while (0)
			
 
				+#endif
			
 
				+
			
 
				+static inline int cfs_memory_pressure_get_and_set(void)
			
 
				+{
			
 
				+	int old = memory_pressure_get();
			
 
				+
			
 
				+	if (!old)
			
 
				+		memory_pressure_set();
			
 
				+	return old;
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_memory_pressure_restore(int old)
			
 
				+{
			
 
				+	if (old)
			
 
				+		memory_pressure_set();
			
 
				+	else
			
 
				+		memory_pressure_clr();
			
 
				+	return;
			
 
				+}
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -0,0 +1,568 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_private.h
			
 
				+ *
			
 
				+ * Various defines for libcfs.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_PRIVATE_H__
			
 
				+#define __LIBCFS_PRIVATE_H__
			
 
				+
			
 
				+/* XXX this layering violation is for nidstrings */
			
 
				+#include <linux/lnet/types.h>
			
 
				+
			
 
				+#ifndef DEBUG_SUBSYSTEM
			
 
				+# define DEBUG_SUBSYSTEM S_UNDEFINED
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * When this is on, LASSERT macro includes check for assignment used instead
			
 
				+ * of equality check, but doesn't have unlikely(). Turn this on from time to
			
 
				+ * time to make test-builds. This shouldn't be on for production release.
			
 
				+ */
			
 
				+#define LASSERT_CHECKED (0)
			
 
				+
			
 
				+
			
 
				+#define LASSERTF(cond, fmt, ...)					\
			
 
				+do {									\
			
 
				+	if (unlikely(!(cond))) {					\
			
 
				+		LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL);	\
			
 
				+		libcfs_debug_msg(&__msg_data,				\
			
 
				+				 "ASSERTION( %s ) failed: " fmt, #cond,	\
			
 
				+				 ## __VA_ARGS__);			\
			
 
				+		lbug_with_loc(&__msg_data);				\
			
 
				+	}								\
			
 
				+} while (0)
			
 
				+
			
 
				+#define LASSERT(cond) LASSERTF(cond, "\n")
			
 
				+
			
 
				+# define LINVRNT(exp) ((void)sizeof!!(exp))
			
 
				+
			
 
				+#define KLASSERT(e) LASSERT(e)
			
 
				+
			
 
				+void lbug_with_loc(struct libcfs_debug_msg_data *) __attribute__((noreturn));
			
 
				+
			
 
				+#define LBUG()							  \
			
 
				+do {								    \
			
 
				+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL);	     \
			
 
				+	lbug_with_loc(&msgdata);					\
			
 
				+} while(0)
			
 
				+
			
 
				+extern atomic_t libcfs_kmemory;
			
 
				+/*
			
 
				+ * Memory
			
 
				+ */
			
 
				+
			
 
				+# define libcfs_kmem_inc(ptr, size)		\
			
 
				+do {						\
			
 
				+	atomic_add(size, &libcfs_kmemory);	\
			
 
				+} while (0)
			
 
				+
			
 
				+# define libcfs_kmem_dec(ptr, size)		\
			
 
				+do {						\
			
 
				+	atomic_sub(size, &libcfs_kmemory);	\
			
 
				+} while (0)
			
 
				+
			
 
				+# define libcfs_kmem_read()			\
			
 
				+	atomic_read(&libcfs_kmemory)
			
 
				+
			
 
				+
			
 
				+#ifndef LIBCFS_VMALLOC_SIZE
			
 
				+#define LIBCFS_VMALLOC_SIZE	(2 << PAGE_CACHE_SHIFT) /* 2 pages */
			
 
				+#endif
			
 
				+
			
 
				+#define LIBCFS_ALLOC_PRE(size, mask)					    \
			
 
				+do {									    \
			
 
				+	LASSERT(!in_interrupt() ||					    \
			
 
				+		((size) <= LIBCFS_VMALLOC_SIZE &&			    \
			
 
				+		 ((mask) & GFP_ATOMIC)) != 0);			    \
			
 
				+} while (0)
			
 
				+
			
 
				+#define LIBCFS_ALLOC_POST(ptr, size)					    \
			
 
				+do {									    \
			
 
				+	if (unlikely((ptr) == NULL)) {					    \
			
 
				+		CERROR("LNET: out of memory at %s:%d (tried to alloc '"	    \
			
 
				+		       #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));  \
			
 
				+		CERROR("LNET: %d total bytes allocated by lnet\n",	    \
			
 
				+		       libcfs_kmem_read());				    \
			
 
				+	} else {							    \
			
 
				+		memset((ptr), 0, (size));				    \
			
 
				+		libcfs_kmem_inc((ptr), (size));				    \
			
 
				+		CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n",  \
			
 
				+		       (int)(size), (ptr), libcfs_kmem_read());		    \
			
 
				+	}								   \
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ * allocate memory with GFP flags @mask
			
 
				+ */
			
 
				+#define LIBCFS_ALLOC_GFP(ptr, size, mask)				    \
			
 
				+do {									    \
			
 
				+	LIBCFS_ALLOC_PRE((size), (mask));				    \
			
 
				+	(ptr) = (size) <= LIBCFS_VMALLOC_SIZE ?				    \
			
 
				+		kmalloc((size), (mask)) : vmalloc(size);	    \
			
 
				+	LIBCFS_ALLOC_POST((ptr), (size));				    \
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ * default allocator
			
 
				+ */
			
 
				+#define LIBCFS_ALLOC(ptr, size) \
			
 
				+	LIBCFS_ALLOC_GFP(ptr, size, __GFP_IO)
			
 
				+
			
 
				+/**
			
 
				+ * non-sleeping allocator
			
 
				+ */
			
 
				+#define LIBCFS_ALLOC_ATOMIC(ptr, size) \
			
 
				+	LIBCFS_ALLOC_GFP(ptr, size, GFP_ATOMIC)
			
 
				+
			
 
				+/**
			
 
				+ * allocate memory for specified CPU partition
			
 
				+ *   \a cptab != NULL, \a cpt is CPU partition id of \a cptab
			
 
				+ *   \a cptab == NULL, \a cpt is HW NUMA node id
			
 
				+ */
			
 
				+#define LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, mask)		    \
			
 
				+do {									    \
			
 
				+	LIBCFS_ALLOC_PRE((size), (mask));				    \
			
 
				+	(ptr) = (size) <= LIBCFS_VMALLOC_SIZE ?				    \
			
 
				+		cfs_cpt_malloc((cptab), (cpt), (size), (mask)) :	    \
			
 
				+		cfs_cpt_vmalloc((cptab), (cpt), (size));		    \
			
 
				+	LIBCFS_ALLOC_POST((ptr), (size));				    \
			
 
				+} while (0)
			
 
				+
			
 
				+/** default numa allocator */
			
 
				+#define LIBCFS_CPT_ALLOC(ptr, cptab, cpt, size)				    \
			
 
				+	LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO)
			
 
				+
			
 
				+#define LIBCFS_FREE(ptr, size)					  \
			
 
				+do {								    \
			
 
				+	int s = (size);						 \
			
 
				+	if (unlikely((ptr) == NULL)) {				  \
			
 
				+		CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
			
 
				+		       "%s:%d\n", s, __FILE__, __LINE__);	       \
			
 
				+		break;						  \
			
 
				+	}							       \
			
 
				+	libcfs_kmem_dec((ptr), s);				      \
			
 
				+	CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
			
 
				+	       s, (ptr), libcfs_kmem_read());				\
			
 
				+	if (unlikely(s > LIBCFS_VMALLOC_SIZE))			  \
			
 
				+		vfree(ptr);				    \
			
 
				+	else							    \
			
 
				+		kfree(ptr);					  \
			
 
				+} while (0)
			
 
				+
			
 
				+/******************************************************************************/
			
 
				+
			
 
				+/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */
			
 
				+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
			
 
				+#define ___htonl(x) __cpu_to_be32(x)
			
 
				+#define ___htons(x) __cpu_to_be16(x)
			
 
				+#define ___ntohl(x) __be32_to_cpu(x)
			
 
				+#define ___ntohs(x) __be16_to_cpu(x)
			
 
				+#define htonl(x) ___htonl(x)
			
 
				+#define ntohl(x) ___ntohl(x)
			
 
				+#define htons(x) ___htons(x)
			
 
				+#define ntohs(x) ___ntohs(x)
			
 
				+#endif
			
 
				+
			
 
				+void libcfs_debug_dumpstack(task_t *tsk);
			
 
				+void libcfs_run_upcall(char **argv);
			
 
				+void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *);
			
 
				+void libcfs_debug_dumplog(void);
			
 
				+int libcfs_debug_init(unsigned long bufsize);
			
 
				+int libcfs_debug_cleanup(void);
			
 
				+int libcfs_debug_clear_buffer(void);
			
 
				+int libcfs_debug_mark_buffer(const char *text);
			
 
				+
			
 
				+void libcfs_debug_set_level(unsigned int debug_level);
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * allocate per-cpu-partition data, returned value is an array of pointers,
			
 
				+ * variable can be indexed by CPU ID.
			
 
				+ *	cptable != NULL: size of array is number of CPU partitions
			
 
				+ *	cptable == NULL: size of array is number of HW cores
			
 
				+ */
			
 
				+void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
			
 
				+/*
			
 
				+ * destory per-cpu-partition variable
			
 
				+ */
			
 
				+void  cfs_percpt_free(void *vars);
			
 
				+int   cfs_percpt_number(void *vars);
			
 
				+void *cfs_percpt_current(void *vars);
			
 
				+void *cfs_percpt_index(void *vars, int idx);
			
 
				+
			
 
				+#define cfs_percpt_for_each(var, i, vars)		\
			
 
				+	for (i = 0; i < cfs_percpt_number(vars) &&	\
			
 
				+		    ((var) = (vars)[i]) != NULL; i++)
			
 
				+
			
 
				+/*
			
 
				+ * allocate a variable array, returned value is an array of pointers.
			
 
				+ * Caller can specify length of array by count.
			
 
				+ */
			
 
				+void *cfs_array_alloc(int count, unsigned int size);
			
 
				+void  cfs_array_free(void *vars);
			
 
				+
			
 
				+#define LASSERT_ATOMIC_ENABLED	  (1)
			
 
				+
			
 
				+#if LASSERT_ATOMIC_ENABLED
			
 
				+
			
 
				+/** assert value of @a is equal to @v */
			
 
				+#define LASSERT_ATOMIC_EQ(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) == v,		       \
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is unequal to @v */
			
 
				+#define LASSERT_ATOMIC_NE(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) != v,		       \
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is little than @v */
			
 
				+#define LASSERT_ATOMIC_LT(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) < v,			\
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is little/equal to @v */
			
 
				+#define LASSERT_ATOMIC_LE(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) <= v,		       \
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great than @v */
			
 
				+#define LASSERT_ATOMIC_GT(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) > v,			\
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great/equal to @v */
			
 
				+#define LASSERT_ATOMIC_GE(a, v)				 \
			
 
				+do {							    \
			
 
				+	LASSERTF(atomic_read(a) >= v,		       \
			
 
				+		 "value: %d\n", atomic_read((a)));	  \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great than @v1 and little than @v2 */
			
 
				+#define LASSERT_ATOMIC_GT_LT(a, v1, v2)			 \
			
 
				+do {							    \
			
 
				+	int __v = atomic_read(a);			   \
			
 
				+	LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v);     \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great than @v1 and little/equal to @v2 */
			
 
				+#define LASSERT_ATOMIC_GT_LE(a, v1, v2)			 \
			
 
				+do {							    \
			
 
				+	int __v = atomic_read(a);			   \
			
 
				+	LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v);    \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great/equal to @v1 and little than @v2 */
			
 
				+#define LASSERT_ATOMIC_GE_LT(a, v1, v2)			 \
			
 
				+do {							    \
			
 
				+	int __v = atomic_read(a);			   \
			
 
				+	LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v);    \
			
 
				+} while (0)
			
 
				+
			
 
				+/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
			
 
				+#define LASSERT_ATOMIC_GE_LE(a, v1, v2)			 \
			
 
				+do {							    \
			
 
				+	int __v = atomic_read(a);			   \
			
 
				+	LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v);   \
			
 
				+} while (0)
			
 
				+
			
 
				+#else /* !LASSERT_ATOMIC_ENABLED */
			
 
				+
			
 
				+#define LASSERT_ATOMIC_EQ(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_NE(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_LT(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_LE(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GT(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GE(a, v)		 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GT_LT(a, v1, v2)	 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GT_LE(a, v1, v2)	 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GE_LT(a, v1, v2)	 do {} while (0)
			
 
				+#define LASSERT_ATOMIC_GE_LE(a, v1, v2)	 do {} while (0)
			
 
				+
			
 
				+#endif /* LASSERT_ATOMIC_ENABLED */
			
 
				+
			
 
				+#define LASSERT_ATOMIC_ZERO(a)		  LASSERT_ATOMIC_EQ(a, 0)
			
 
				+#define LASSERT_ATOMIC_POS(a)		   LASSERT_ATOMIC_GT(a, 0)
			
 
				+
			
 
				+#define CFS_ALLOC_PTR(ptr)      LIBCFS_ALLOC(ptr, sizeof (*(ptr)));
			
 
				+#define CFS_FREE_PTR(ptr)       LIBCFS_FREE(ptr, sizeof (*(ptr)));
			
 
				+
			
 
				+/*
			
 
				+ * percpu partition lock
			
 
				+ *
			
 
				+ * There are some use-cases like this in Lustre:
			
 
				+ * . each CPU partition has it's own private data which is frequently changed,
			
 
				+ *   and mostly by the local CPU partition.
			
 
				+ * . all CPU partitions share some global data, these data are rarely changed.
			
 
				+ *
			
 
				+ * LNet is typical example.
			
 
				+ * CPU partition lock is designed for this kind of use-cases:
			
 
				+ * . each CPU partition has it's own private lock
			
 
				+ * . change on private data just needs to take the private lock
			
 
				+ * . read on shared data just needs to take _any_ of private locks
			
 
				+ * . change on shared data needs to take _all_ private locks,
			
 
				+ *   which is slow and should be really rare.
			
 
				+ */
			
 
				+
			
 
				+enum {
			
 
				+	CFS_PERCPT_LOCK_EX	= -1, /* negative */
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct cfs_percpt_lock {
			
 
				+	/* cpu-partition-table for this lock */
			
 
				+	struct cfs_cpt_table	*pcl_cptab;
			
 
				+	/* exclusively locked */
			
 
				+	unsigned int		pcl_locked;
			
 
				+	/* private lock table */
			
 
				+	spinlock_t		**pcl_locks;
			
 
				+};
			
 
				+
			
 
				+/* return number of private locks */
			
 
				+static inline int
			
 
				+cfs_percpt_lock_num(struct cfs_percpt_lock *pcl)
			
 
				+{
			
 
				+	return cfs_cpt_number(pcl->pcl_cptab);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * create a cpu-partition lock based on CPU partition table \a cptab,
			
 
				+ * each private lock has extra \a psize bytes padding data
			
 
				+ */
			
 
				+struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab);
			
 
				+/* destroy a cpu-partition lock */
			
 
				+void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
			
 
				+
			
 
				+/* lock private lock \a index of \a pcl */
			
 
				+void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
			
 
				+/* unlock private lock \a index of \a pcl */
			
 
				+void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
			
 
				+/* create percpt (atomic) refcount based on @cptab */
			
 
				+atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val);
			
 
				+/* destroy percpt refcount */
			
 
				+void cfs_percpt_atomic_free(atomic_t **refs);
			
 
				+/* return sum of all percpu refs */
			
 
				+int cfs_percpt_atomic_summary(atomic_t **refs);
			
 
				+
			
 
				+
			
 
				+/** Compile-time assertion.
			
 
				+
			
 
				+ * Check an invariant described by a constant expression at compile time by
			
 
				+ * forcing a compiler error if it does not hold.  \a cond must be a constant
			
 
				+ * expression as defined by the ISO C Standard:
			
 
				+ *
			
 
				+ *       6.8.4.2  The switch statement
			
 
				+ *       ....
			
 
				+ *       [#3] The expression of each case label shall be  an  integer
			
 
				+ *       constant   expression  and  no  two  of  the  case  constant
			
 
				+ *       expressions in the same switch statement shall have the same
			
 
				+ *       value  after  conversion...
			
 
				+ *
			
 
				+ */
			
 
				+#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0)
			
 
				+
			
 
				+/* support decl needed both by kernel and liblustre */
			
 
				+int	 libcfs_isknown_lnd(int type);
			
 
				+char       *libcfs_lnd2modname(int type);
			
 
				+char       *libcfs_lnd2str(int type);
			
 
				+int	 libcfs_str2lnd(const char *str);
			
 
				+char       *libcfs_net2str(__u32 net);
			
 
				+char       *libcfs_nid2str(lnet_nid_t nid);
			
 
				+__u32       libcfs_str2net(const char *str);
			
 
				+lnet_nid_t  libcfs_str2nid(const char *str);
			
 
				+int	 libcfs_str2anynid(lnet_nid_t *nid, const char *str);
			
 
				+char       *libcfs_id2str(lnet_process_id_t id);
			
 
				+void	cfs_free_nidlist(struct list_head *list);
			
 
				+int	 cfs_parse_nidlist(char *str, int len, struct list_head *list);
			
 
				+int	 cfs_match_nid(lnet_nid_t nid, struct list_head *list);
			
 
				+
			
 
				+/** \addtogroup lnet_addr
			
 
				+ * @{ */
			
 
				+/* how an LNET NID encodes net:address */
			
 
				+/** extract the address part of an lnet_nid_t */
			
 
				+#define LNET_NIDADDR(nid)      ((__u32)((nid) & 0xffffffff))
			
 
				+/** extract the network part of an lnet_nid_t */
			
 
				+#define LNET_NIDNET(nid)       ((__u32)(((nid) >> 32)) & 0xffffffff)
			
 
				+/** make an lnet_nid_t from a network part and an address part */
			
 
				+#define LNET_MKNID(net,addr)   ((((__u64)(net))<<32)|((__u64)(addr)))
			
 
				+/* how net encodes type:number */
			
 
				+#define LNET_NETNUM(net)       ((net) & 0xffff)
			
 
				+#define LNET_NETTYP(net)       (((net) >> 16) & 0xffff)
			
 
				+#define LNET_MKNET(typ,num)    ((((__u32)(typ))<<16)|((__u32)(num)))
			
 
				+/** @} lnet_addr */
			
 
				+
			
 
				+/* max value for numeric network address */
			
 
				+#define MAX_NUMERIC_VALUE 0xffffffff
			
 
				+
			
 
				+/* implication */
			
 
				+#define ergo(a, b) (!(a) || (b))
			
 
				+/* logical equivalence */
			
 
				+#define equi(a, b) (!!(a) == !!(b))
			
 
				+
			
 
				+#ifndef CFS_CURRENT_TIME
			
 
				+# define CFS_CURRENT_TIME time(0)
			
 
				+#endif
			
 
				+
			
 
				+/* --------------------------------------------------------------------
			
 
				+ * Light-weight trace
			
 
				+ * Support for temporary event tracing with minimal Heisenberg effect.
			
 
				+ * All stuff about lwt are put in arch/kp30.h
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+struct libcfs_device_userstate
			
 
				+{
			
 
				+	int	   ldu_memhog_pages;
			
 
				+	struct page   *ldu_memhog_root_page;
			
 
				+};
			
 
				+
			
 
				+/* what used to be in portals_lib.h */
			
 
				+#ifndef MIN
			
 
				+# define MIN(a,b) (((a)<(b)) ? (a): (b))
			
 
				+#endif
			
 
				+#ifndef MAX
			
 
				+# define MAX(a,b) (((a)>(b)) ? (a): (b))
			
 
				+#endif
			
 
				+
			
 
				+#define MKSTR(ptr) ((ptr))? (ptr) : ""
			
 
				+
			
 
				+static inline int cfs_size_round4 (int val)
			
 
				+{
			
 
				+	return (val + 3) & (~0x3);
			
 
				+}
			
 
				+
			
 
				+#ifndef HAVE_CFS_SIZE_ROUND
			
 
				+static inline int cfs_size_round (int val)
			
 
				+{
			
 
				+	return (val + 7) & (~0x7);
			
 
				+}
			
 
				+#define HAVE_CFS_SIZE_ROUND
			
 
				+#endif
			
 
				+
			
 
				+static inline int cfs_size_round16(int val)
			
 
				+{
			
 
				+	return (val + 0xf) & (~0xf);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_size_round32(int val)
			
 
				+{
			
 
				+	return (val + 0x1f) & (~0x1f);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_size_round0(int val)
			
 
				+{
			
 
				+	if (!val)
			
 
				+		return 0;
			
 
				+	return (val + 1 + 7) & (~0x7);
			
 
				+}
			
 
				+
			
 
				+static inline size_t cfs_round_strlen(char *fset)
			
 
				+{
			
 
				+	return (size_t)cfs_size_round((int)strlen(fset) + 1);
			
 
				+}
			
 
				+
			
 
				+/* roundup \a val to power2 */
			
 
				+static inline unsigned int cfs_power2_roundup(unsigned int val)
			
 
				+{
			
 
				+	if (val != LOWEST_BIT_SET(val)) { /* not a power of 2 already */
			
 
				+		do {
			
 
				+			val &= ~LOWEST_BIT_SET(val);
			
 
				+		} while (val != LOWEST_BIT_SET(val));
			
 
				+		/* ...and round up */
			
 
				+		val <<= 1;
			
 
				+	}
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+#define LOGL(var,len,ptr)				       \
			
 
				+do {							    \
			
 
				+	if (var)						\
			
 
				+		memcpy((char *)ptr, (const char *)var, len);    \
			
 
				+	ptr += cfs_size_round(len);			     \
			
 
				+} while (0)
			
 
				+
			
 
				+#define LOGU(var,len,ptr)				       \
			
 
				+do {							    \
			
 
				+	if (var)						\
			
 
				+		memcpy((char *)var, (const char *)ptr, len);    \
			
 
				+	ptr += cfs_size_round(len);			     \
			
 
				+} while (0)
			
 
				+
			
 
				+#define LOGL0(var,len,ptr)			      \
			
 
				+do {						    \
			
 
				+	if (!len)				       \
			
 
				+		break;				  \
			
 
				+	memcpy((char *)ptr, (const char *)var, len);    \
			
 
				+	*((char *)(ptr) + len) = 0;		     \
			
 
				+	ptr += cfs_size_round(len + 1);		 \
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ *  Lustre Network Driver types.
			
 
				+ */
			
 
				+enum {
			
 
				+	/* Only add to these values (i.e. don't ever change or redefine them):
			
 
				+	 * network addresses depend on them... */
			
 
				+	QSWLND    = 1,
			
 
				+	SOCKLND   = 2,
			
 
				+	GMLND     = 3, /* obsolete, keep it so that libcfs_nid2str works */
			
 
				+	PTLLND    = 4,
			
 
				+	O2IBLND   = 5,
			
 
				+	CIBLND    = 6,
			
 
				+	OPENIBLND = 7,
			
 
				+	IIBLND    = 8,
			
 
				+	LOLND     = 9,
			
 
				+	RALND     = 10,
			
 
				+	VIBLND    = 11,
			
 
				+	MXLND     = 12,
			
 
				+	GNILND    = 13,
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
@@ -0,0 +1,137 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_string.h
			
 
				+ *
			
 
				+ * Generic string manipulation functions.
			
 
				+ *
			
 
				+ * Author: Nathan Rutman <nathan.rutman@sun.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_STRING_H__
			
 
				+#define __LIBCFS_STRING_H__
			
 
				+
			
 
				+/* libcfs_string.c */
			
 
				+/* string comparison ignoring case */
			
 
				+int cfs_strncasecmp(const char *s1, const char *s2, size_t n);
			
 
				+/* Convert a text string to a bitmask */
			
 
				+int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
			
 
				+		 int *oldmask, int minmask, int allmask);
			
 
				+
			
 
				+/* Allocate space for and copy an existing string.
			
 
				+ * Must free with kfree().
			
 
				+ */
			
 
				+char *cfs_strdup(const char *str, u_int32_t flags);
			
 
				+
			
 
				+/* safe vsnprintf */
			
 
				+int cfs_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
			
 
				+
			
 
				+/* safe snprintf */
			
 
				+int cfs_snprintf(char *buf, size_t size, const char *fmt, ...);
			
 
				+
			
 
				+/* trim leading and trailing space characters */
			
 
				+char *cfs_firststr(char *str, size_t size);
			
 
				+
			
 
				+/**
			
 
				+ * Structure to represent NULL-less strings.
			
 
				+ */
			
 
				+struct cfs_lstr {
			
 
				+	char		*ls_str;
			
 
				+	int		ls_len;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Structure to represent \<range_expr\> token of the syntax.
			
 
				+ */
			
 
				+struct cfs_range_expr {
			
 
				+	/*
			
 
				+	 * Link to cfs_expr_list::el_exprs.
			
 
				+	 */
			
 
				+	struct list_head	re_link;
			
 
				+	__u32		re_lo;
			
 
				+	__u32		re_hi;
			
 
				+	__u32		re_stride;
			
 
				+};
			
 
				+
			
 
				+struct cfs_expr_list {
			
 
				+	struct list_head	el_link;
			
 
				+	struct list_head	el_exprs;
			
 
				+};
			
 
				+
			
 
				+static inline int
			
 
				+cfs_iswhite(char c)
			
 
				+{
			
 
				+	switch (c) {
			
 
				+	case ' ':
			
 
				+	case '\t':
			
 
				+	case '\n':
			
 
				+	case '\r':
			
 
				+		return 1;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+char *cfs_trimwhite(char *str);
			
 
				+int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
			
 
				+int cfs_str2num_check(char *str, int nob, unsigned *num,
			
 
				+		      unsigned min, unsigned max);
			
 
				+int cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
			
 
				+			 int single_tok, struct cfs_range_expr **expr);
			
 
				+int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list);
			
 
				+int cfs_expr_list_values(struct cfs_expr_list *expr_list,
			
 
				+			 int max, __u32 **values);
			
 
				+static inline void
			
 
				+cfs_expr_list_values_free(__u32 *values, int num)
			
 
				+{
			
 
				+	/* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
			
 
				+	 * by OBD_FREE() if it's called by module other than libcfs & LNet,
			
 
				+	 * otherwise we will see fake memory leak */
			
 
				+	LIBCFS_FREE(values, num * sizeof(values[0]));
			
 
				+}
			
 
				+
			
 
				+void cfs_expr_list_free(struct cfs_expr_list *expr_list);
			
 
				+void cfs_expr_list_print(struct cfs_expr_list *expr_list);
			
 
				+int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
			
 
				+			struct cfs_expr_list **elpp);
			
 
				+void cfs_expr_list_free_list(struct list_head *list);
			
 
				+int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
			
 
				+int cfs_ip_addr_match(__u32 addr, struct list_head *list);
			
 
				+void cfs_ip_addr_free(struct list_head *list);
			
 
				+
			
 
				+#define	strtoul(str, endp, base)	simple_strtoul(str, endp, base)
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
@@ -0,0 +1,132 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_time.h
			
 
				+ *
			
 
				+ * Time functions.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_TIME_H__
			
 
				+#define __LIBCFS_TIME_H__
			
 
				+/*
			
 
				+ * generic time manipulation functions.
			
 
				+ */
			
 
				+
			
 
				+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
			
 
				+{
			
 
				+	return (cfs_time_t)(t + d);
			
 
				+}
			
 
				+
			
 
				+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
			
 
				+{
			
 
				+	return (cfs_time_t)(t1 - t2);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
			
 
				+{
			
 
				+	return cfs_time_before(t2, t1);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
			
 
				+{
			
 
				+	return cfs_time_beforeq(t2, t1);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static inline cfs_time_t cfs_time_shift(int seconds)
			
 
				+{
			
 
				+	return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
			
 
				+}
			
 
				+
			
 
				+static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
			
 
				+				   struct timeval *result)
			
 
				+{
			
 
				+	long r = (long) (
			
 
				+		(large->tv_sec - small->tv_sec) * ONE_MILLION +
			
 
				+		(large->tv_usec - small->tv_usec));
			
 
				+	if (result != NULL) {
			
 
				+		result->tv_usec = r % ONE_MILLION;
			
 
				+		result->tv_sec = r / ONE_MILLION;
			
 
				+	}
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
			
 
				+{
			
 
				+	if (cfs_time_after(cfs_time_current(),
			
 
				+			   cfs_time_add(now, cfs_time_seconds(15))))
			
 
				+		CERROR("slow %s "CFS_TIME_T" sec\n", msg,
			
 
				+		       cfs_duration_sec(cfs_time_sub(cfs_time_current(),now)));
			
 
				+}
			
 
				+
			
 
				+#define CFS_RATELIMIT(seconds)				  \
			
 
				+({							      \
			
 
				+	/*						      \
			
 
				+	 * XXX nikita: non-portable initializer		 \
			
 
				+	 */						     \
			
 
				+	static time_t __next_message = 0;		       \
			
 
				+	int result;					     \
			
 
				+								\
			
 
				+	if (cfs_time_after(cfs_time_current(), __next_message)) \
			
 
				+		result = 1;				     \
			
 
				+	else {						  \
			
 
				+		__next_message = cfs_time_shift(seconds);       \
			
 
				+		result = 0;				     \
			
 
				+	}						       \
			
 
				+	result;						 \
			
 
				+})
			
 
				+
			
 
				+/*
			
 
				+ * helper function similar to do_gettimeofday() of Linux kernel
			
 
				+ */
			
 
				+static inline void cfs_fs_timeval(struct timeval *tv)
			
 
				+{
			
 
				+	cfs_fs_time_t time;
			
 
				+
			
 
				+	cfs_fs_time_current(&time);
			
 
				+	cfs_fs_time_usec(&time, tv);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * return valid time-out based on user supplied one. Currently we only check
			
 
				+ * that time-out is not shorted than allowed.
			
 
				+ */
			
 
				+static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
			
 
				+{
			
 
				+	if (timeout < CFS_TICK)
			
 
				+		timeout = CFS_TICK;
			
 
				+	return timeout;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
@@ -0,0 +1,110 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/libcfs_workitem.h
			
 
				+ *
			
 
				+ * Author: Isaac Huang  <he.h.huang@oracle.com>
			
 
				+ *	 Liang Zhen   <zhen.liang@sun.com>
			
 
				+ *
			
 
				+ * A workitems is deferred work with these semantics:
			
 
				+ * - a workitem always runs in thread context.
			
 
				+ * - a workitem can be concurrent with other workitems but is strictly
			
 
				+ *   serialized with respect to itself.
			
 
				+ * - no CPU affinity, a workitem does not necessarily run on the same CPU
			
 
				+ *   that schedules it. However, this might change in the future.
			
 
				+ * - if a workitem is scheduled again before it has a chance to run, it
			
 
				+ *   runs only once.
			
 
				+ * - if a workitem is scheduled while it runs, it runs again after it
			
 
				+ *   completes; this ensures that events occurring while other events are
			
 
				+ *   being processed receive due attention. This behavior also allows a
			
 
				+ *   workitem to reschedule itself.
			
 
				+ *
			
 
				+ * Usage notes:
			
 
				+ * - a workitem can sleep but it should be aware of how that sleep might
			
 
				+ *   affect others.
			
 
				+ * - a workitem runs inside a kernel thread so there's no user space to access.
			
 
				+ * - do not use a workitem if the scheduling latency can't be tolerated.
			
 
				+ *
			
 
				+ * When wi_action returns non-zero, it means the workitem has either been
			
 
				+ * freed or reused and workitem scheduler won't touch it any more.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_WORKITEM_H__
			
 
				+#define __LIBCFS_WORKITEM_H__
			
 
				+
			
 
				+struct cfs_wi_sched;
			
 
				+
			
 
				+void cfs_wi_sched_destroy(struct cfs_wi_sched *);
			
 
				+int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
			
 
				+			int nthrs, struct cfs_wi_sched **);
			
 
				+
			
 
				+struct cfs_workitem;
			
 
				+
			
 
				+typedef int (*cfs_wi_action_t) (struct cfs_workitem *);
			
 
				+typedef struct cfs_workitem {
			
 
				+	/** chain on runq or rerunq */
			
 
				+	struct list_head       wi_list;
			
 
				+	/** working function */
			
 
				+	cfs_wi_action_t  wi_action;
			
 
				+	/** arg for working function */
			
 
				+	void	    *wi_data;
			
 
				+	/** in running */
			
 
				+	unsigned short   wi_running:1;
			
 
				+	/** scheduled */
			
 
				+	unsigned short   wi_scheduled:1;
			
 
				+} cfs_workitem_t;
			
 
				+
			
 
				+static inline void
			
 
				+cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action)
			
 
				+{
			
 
				+	INIT_LIST_HEAD(&wi->wi_list);
			
 
				+
			
 
				+	wi->wi_running   = 0;
			
 
				+	wi->wi_scheduled = 0;
			
 
				+	wi->wi_data      = data;
			
 
				+	wi->wi_action    = action;
			
 
				+}
			
 
				+
			
 
				+void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
			
 
				+int  cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
			
 
				+void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
			
 
				+
			
 
				+int  cfs_wi_startup(void);
			
 
				+void cfs_wi_shutdown(void);
			
 
				+
			
 
				+/** # workitem scheduler loops before reschedule */
			
 
				+#define CFS_WI_RESCHED    128
			
 
				+
			
 
				+#endif /* __LIBCFS_WORKITEM_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h
@@ -0,0 +1,286 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_KP30_H__
			
 
				+#define __LIBCFS_LINUX_KP30_H__
			
 
				+
			
 
				+
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/unistd.h>
			
 
				+#include <linux/kmod.h>
			
 
				+#include <linux/notifier.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/miscdevice.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/time.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/interrupt.h>
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/version.h>
			
 
				+#include <asm/atomic.h>
			
 
				+#include <asm/uaccess.h>
			
 
				+#include <linux/rwsem.h>
			
 
				+#include <linux/proc_fs.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/smp.h>
			
 
				+#include <linux/ctype.h>
			
 
				+#include <linux/compiler.h>
			
 
				+#ifdef HAVE_MM_INLINE
			
 
				+# include <linux/mm_inline.h>
			
 
				+#endif
			
 
				+#include <linux/kallsyms.h>
			
 
				+#include <linux/moduleparam.h>
			
 
				+#include <linux/scatterlist.h>
			
 
				+
			
 
				+#include <linux/libcfs/linux/portals_compat25.h>
			
 
				+
			
 
				+
			
 
				+#define prepare_work(wq,cb,cbdata)					    \
			
 
				+do {									  \
			
 
				+	INIT_WORK((wq), (void *)(cb));					\
			
 
				+} while (0)
			
 
				+
			
 
				+#define cfs_get_work_data(type,field,data) container_of(data,type,field)
			
 
				+
			
 
				+
			
 
				+#define our_recalc_sigpending(current) recalc_sigpending()
			
 
				+#define strtok(a,b) strpbrk(a, b)
			
 
				+#define work_struct_t      struct work_struct
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+#else
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#define SEM_COUNT(sem)	  ((sem)->count)
			
 
				+
			
 
				+
			
 
				+/* ------------------------------------------------------------------- */
			
 
				+
			
 
				+#define PORTAL_SYMBOL_REGISTER(x)
			
 
				+#define PORTAL_SYMBOL_UNREGISTER(x)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/******************************************************************************/
			
 
				+/* Module parameter support */
			
 
				+#define CFS_MODULE_PARM(name, t, type, perm, desc) \
			
 
				+	module_param(name, type, perm);\
			
 
				+	MODULE_PARM_DESC(name, desc)
			
 
				+
			
 
				+#define CFS_SYSFS_MODULE_PARM  1 /* module parameters accessible via sysfs */
			
 
				+
			
 
				+/******************************************************************************/
			
 
				+
			
 
				+#if (__GNUC__)
			
 
				+/* Use the special GNU C __attribute__ hack to have the compiler check the
			
 
				+ * printf style argument string against the actual argument count and
			
 
				+ * types.
			
 
				+ */
			
 
				+#ifdef printf
			
 
				+# warning printf has been defined as a macro...
			
 
				+# undef printf
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __GNUC__ */
			
 
				+
			
 
				+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
			
 
				+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
			
 
				+# define time(a) CURRENT_TIME
			
 
				+
			
 
				+# define cfs_num_present_cpus()  num_present_cpus()
			
 
				+
			
 
				+/******************************************************************************/
			
 
				+/* Light-weight trace
			
 
				+ * Support for temporary event tracing with minimal Heisenberg effect. */
			
 
				+#define LWT_SUPPORT  0
			
 
				+
			
 
				+#define LWT_MEMORY   (16<<20)
			
 
				+
			
 
				+#ifndef KLWT_SUPPORT
			
 
				+#  if !defined(BITS_PER_LONG)
			
 
				+#   error "BITS_PER_LONG not defined"
			
 
				+#  endif
			
 
				+
			
 
				+/* kernel hasn't defined this? */
			
 
				+typedef struct {
			
 
				+	long long   lwte_when;
			
 
				+	char       *lwte_where;
			
 
				+	void       *lwte_task;
			
 
				+	long	lwte_p1;
			
 
				+	long	lwte_p2;
			
 
				+	long	lwte_p3;
			
 
				+	long	lwte_p4;
			
 
				+# if BITS_PER_LONG > 32
			
 
				+	long	lwte_pad;
			
 
				+# endif
			
 
				+} lwt_event_t;
			
 
				+#endif /* !KLWT_SUPPORT */
			
 
				+
			
 
				+#if LWT_SUPPORT
			
 
				+#  if !KLWT_SUPPORT
			
 
				+
			
 
				+typedef struct _lwt_page {
			
 
				+	struct list_head	       lwtp_list;
			
 
				+	struct page	     *lwtp_page;
			
 
				+	lwt_event_t	     *lwtp_events;
			
 
				+} lwt_page_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		lwtc_current_index;
			
 
				+	lwt_page_t	*lwtc_current_page;
			
 
				+} lwt_cpu_t;
			
 
				+
			
 
				+extern int       lwt_enabled;
			
 
				+extern lwt_cpu_t lwt_cpus[];
			
 
				+
			
 
				+/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
			
 
				+ * This stuff is meant for finding specific problems; it never stays in
			
 
				+ * production code... */
			
 
				+
			
 
				+#define LWTSTR(n)       #n
			
 
				+#define LWTWHERE(f,l)   f ":" LWTSTR(l)
			
 
				+#define LWT_EVENTS_PER_PAGE (PAGE_CACHE_SIZE / sizeof (lwt_event_t))
			
 
				+
			
 
				+#define LWT_EVENT(p1, p2, p3, p4)				       \
			
 
				+do {								    \
			
 
				+	unsigned long    flags;					 \
			
 
				+	lwt_cpu_t       *cpu;					   \
			
 
				+	lwt_page_t      *p;					     \
			
 
				+	lwt_event_t     *e;					     \
			
 
				+									\
			
 
				+	if (lwt_enabled) {					      \
			
 
				+		local_irq_save (flags);				 \
			
 
				+									\
			
 
				+		cpu = &lwt_cpus[smp_processor_id()];		    \
			
 
				+		p = cpu->lwtc_current_page;			     \
			
 
				+		e = &p->lwtp_events[cpu->lwtc_current_index++];	 \
			
 
				+									\
			
 
				+		if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) {   \
			
 
				+			cpu->lwtc_current_page =			\
			
 
				+				list_entry (p->lwtp_list.next,      \
			
 
				+						lwt_page_t, lwtp_list); \
			
 
				+			cpu->lwtc_current_index = 0;		    \
			
 
				+		}						       \
			
 
				+									\
			
 
				+		e->lwte_when  = get_cycles();			   \
			
 
				+		e->lwte_where = LWTWHERE(__FILE__,__LINE__);	    \
			
 
				+		e->lwte_task  = current;				\
			
 
				+		e->lwte_p1    = (long)(p1);			     \
			
 
				+		e->lwte_p2    = (long)(p2);			     \
			
 
				+		e->lwte_p3    = (long)(p3);			     \
			
 
				+		e->lwte_p4    = (long)(p4);			     \
			
 
				+									\
			
 
				+		local_irq_restore (flags);			      \
			
 
				+	}							       \
			
 
				+} while (0)
			
 
				+
			
 
				+#endif /* !KLWT_SUPPORT */
			
 
				+
			
 
				+extern int  lwt_init (void);
			
 
				+extern void lwt_fini (void);
			
 
				+extern int  lwt_lookup_string (int *size, char *knlptr,
			
 
				+			       char *usrptr, int usrsize);
			
 
				+extern int  lwt_control (int enable, int clear);
			
 
				+extern int  lwt_snapshot (cfs_cycles_t *now, int *ncpu, int *total_size,
			
 
				+			  void *user_ptr, int user_size);
			
 
				+#endif /* LWT_SUPPORT */
			
 
				+
			
 
				+/* ------------------------------------------------------------------ */
			
 
				+
			
 
				+#define IOCTL_LIBCFS_TYPE long
			
 
				+
			
 
				+#ifdef __CYGWIN__
			
 
				+# ifndef BITS_PER_LONG
			
 
				+#   define BITS_PER_LONG 64
			
 
				+# endif
			
 
				+#endif
			
 
				+
			
 
				+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
			
 
				+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
			
 
				+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
			
 
				+
			
 
				+/* this is a bit chunky */
			
 
				+
			
 
				+#define _LWORDSIZE BITS_PER_LONG
			
 
				+
			
 
				+# define LPU64 "%llu"
			
 
				+# define LPD64 "%lld"
			
 
				+# define LPX64 "%#llx"
			
 
				+# define LPX64i "%llx"
			
 
				+# define LPO64 "%#llo"
			
 
				+# define LPF64 "L"
			
 
				+
			
 
				+/*
			
 
				+ * long_ptr_t & ulong_ptr_t, same to "long" for gcc
			
 
				+ */
			
 
				+# define LPLU "%lu"
			
 
				+# define LPLD "%ld"
			
 
				+# define LPLX "%#lx"
			
 
				+
			
 
				+/*
			
 
				+ * pid_t
			
 
				+ */
			
 
				+# define LPPID "%d"
			
 
				+
			
 
				+
			
 
				+#undef _LWORDSIZE
			
 
				+
			
 
				+/* compat macroses */
			
 
				+
			
 
				+
			
 
				+#ifndef get_cpu
			
 
				+# ifdef CONFIG_PREEMPT
			
 
				+#  define get_cpu()  ({ preempt_disable(); smp_processor_id(); })
			
 
				+#  define put_cpu()  preempt_enable()
			
 
				+# else
			
 
				+#  define get_cpu()  smp_processor_id()
			
 
				+#  define put_cpu()
			
 
				+# endif
			
 
				+#else
			
 
				+#endif /* get_cpu & put_cpu */
			
 
				+
			
 
				+#define INIT_CTL_NAME(a)
			
 
				+#define INIT_STRATEGY(a)
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -0,0 +1,131 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_LIBCFS_H__
			
 
				+#define __LIBCFS_LINUX_LIBCFS_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include <stdarg.h>
			
 
				+#include <linux/libcfs/linux/linux-cpu.h>
			
 
				+#include <linux/libcfs/linux/linux-time.h>
			
 
				+#include <linux/libcfs/linux/linux-mem.h>
			
 
				+#include <linux/libcfs/linux/linux-prim.h>
			
 
				+#include <linux/libcfs/linux/linux-lock.h>
			
 
				+#include <linux/libcfs/linux/linux-fs.h>
			
 
				+#include <linux/libcfs/linux/linux-tcpip.h>
			
 
				+#include <linux/libcfs/linux/linux-bitops.h>
			
 
				+#include <linux/libcfs/linux/linux-types.h>
			
 
				+#include <linux/libcfs/linux/kp30.h>
			
 
				+
			
 
				+#include <asm/types.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <asm/timex.h>
			
 
				+#include <linux/sched.h> /* THREAD_SIZE */
			
 
				+#include <linux/rbtree.h>
			
 
				+
			
 
				+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
			
 
				+
			
 
				+#if !defined(__x86_64__)
			
 
				+# ifdef  __ia64__
			
 
				+#  define CDEBUG_STACK() (THREAD_SIZE -				 \
			
 
				+			  ((unsigned long)__builtin_dwarf_cfa() &       \
			
 
				+			   (THREAD_SIZE - 1)))
			
 
				+# else
			
 
				+#  define CDEBUG_STACK() (THREAD_SIZE -				 \
			
 
				+			  ((unsigned long)__builtin_frame_address(0) &  \
			
 
				+			   (THREAD_SIZE - 1)))
			
 
				+# endif /* __ia64__ */
			
 
				+
			
 
				+#define __CHECK_STACK(msgdata, mask, cdls)			      \
			
 
				+do {								    \
			
 
				+	if (unlikely(CDEBUG_STACK() > libcfs_stack)) {		  \
			
 
				+		LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL);   \
			
 
				+		libcfs_stack = CDEBUG_STACK();			  \
			
 
				+		libcfs_debug_msg(msgdata,			       \
			
 
				+				 "maximum lustre stack %lu\n",	  \
			
 
				+				 CDEBUG_STACK());		       \
			
 
				+		(msgdata)->msg_mask = mask;			     \
			
 
				+		(msgdata)->msg_cdls = cdls;			     \
			
 
				+		dump_stack();					   \
			
 
				+	      /*panic("LBUG");*/					\
			
 
				+	}							       \
			
 
				+} while (0)
			
 
				+#define CFS_CHECK_STACK(msgdata, mask, cdls)  __CHECK_STACK(msgdata, mask, cdls)
			
 
				+#else /* __x86_64__ */
			
 
				+#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while(0)
			
 
				+#define CDEBUG_STACK() (0L)
			
 
				+#endif /* __x86_64__ */
			
 
				+
			
 
				+/* initial pid  */
			
 
				+#define LUSTRE_LNET_PID	  12345
			
 
				+
			
 
				+#define ENTRY_NESTING_SUPPORT (1)
			
 
				+#define ENTRY_NESTING   do {;} while (0)
			
 
				+#define EXIT_NESTING   do {;} while (0)
			
 
				+#define __current_nesting_level() (0)
			
 
				+
			
 
				+/**
			
 
				+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
			
 
				+ *
			
 
				+ * Implementation is in linux-curproc.c
			
 
				+ */
			
 
				+#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
			
 
				+
			
 
				+#include <linux/capability.h>
			
 
				+
			
 
				+/*
			
 
				+ * No stack-back-tracing in Linux for now.
			
 
				+ */
			
 
				+struct cfs_stack_trace {
			
 
				+};
			
 
				+
			
 
				+/* long integer with size equal to pointer */
			
 
				+typedef unsigned long ulong_ptr_t;
			
 
				+typedef long long_ptr_t;
			
 
				+
			
 
				+#ifndef WITH_WATCHDOG
			
 
				+#define WITH_WATCHDOG
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif /* _LINUX_LIBCFS_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h
@@ -0,0 +1,38 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-bitops.h
			
 
				+ */
			
 
				+#include <linux/bitops.h>
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -0,0 +1,175 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-mem.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ *
			
 
				+ * Author: liang@whamcloud.com
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CPU_H__
			
 
				+#define __LIBCFS_LINUX_CPU_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <linux/cpu.h>
			
 
				+#include <linux/cpuset.h>
			
 
				+#include <linux/topology.h>
			
 
				+#include <linux/version.h>
			
 
				+
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				+#define HAVE_LIBCFS_CPT
			
 
				+
			
 
				+/** virtual processing unit */
			
 
				+struct cfs_cpu_partition {
			
 
				+	/* CPUs mask for this partition */
			
 
				+	cpumask_t			*cpt_cpumask;
			
 
				+	/* nodes mask for this partition */
			
 
				+	nodemask_t			*cpt_nodemask;
			
 
				+	/* spread rotor for NUMA allocator */
			
 
				+	unsigned			cpt_spread_rotor;
			
 
				+};
			
 
				+
			
 
				+/** descriptor for CPU partitions */
			
 
				+struct cfs_cpt_table {
			
 
				+	/* version, reserved for hotplug */
			
 
				+	unsigned			ctb_version;
			
 
				+	/* spread rotor for NUMA allocator */
			
 
				+	unsigned			ctb_spread_rotor;
			
 
				+	/* # of CPU partitions */
			
 
				+	unsigned			ctb_nparts;
			
 
				+	/* partitions tables */
			
 
				+	struct cfs_cpu_partition	*ctb_parts;
			
 
				+	/* shadow HW CPU to CPU partition ID */
			
 
				+	int				*ctb_cpu2cpt;
			
 
				+	/* all cpus in this partition table */
			
 
				+	cpumask_t			*ctb_cpumask;
			
 
				+	/* all nodes in this partition table */
			
 
				+	nodemask_t			*ctb_nodemask;
			
 
				+};
			
 
				+
			
 
				+void cfs_cpu_core_siblings(int cpu, cpumask_t *mask);
			
 
				+void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask);
			
 
				+void cfs_node_to_cpumask(int node, cpumask_t *mask);
			
 
				+int cfs_cpu_core_nsiblings(int cpu);
			
 
				+int cfs_cpu_ht_nsiblings(int cpu);
			
 
				+
			
 
				+/**
			
 
				+ * comment out definitions for compatible layer
			
 
				+ * #define CFS_CPU_NR			  NR_CPUS
			
 
				+ *
			
 
				+ * typedef cpumask_t			   cfs_cpumask_t;
			
 
				+ *
			
 
				+ * #define cfs_cpu_current()		   smp_processor_id()
			
 
				+ * #define cfs_cpu_online(i)		   cpu_online(i)
			
 
				+ * #define cfs_cpu_online_num()		num_online_cpus()
			
 
				+ * #define cfs_cpu_online_for_each(i)	  for_each_online_cpu(i)
			
 
				+ * #define cfs_cpu_possible_num()	      num_possible_cpus()
			
 
				+ * #define cfs_cpu_possible_for_each(i)	for_each_possible_cpu(i)
			
 
				+ *
			
 
				+ * #ifdef CONFIG_CPUMASK_SIZE
			
 
				+ * #define cfs_cpu_mask_size()		 cpumask_size()
			
 
				+ * #else
			
 
				+ * #define cfs_cpu_mask_size()		 sizeof(cfs_cpumask_t)
			
 
				+ * #endif
			
 
				+ *
			
 
				+ * #define cfs_cpu_mask_set(i, mask)	   cpu_set(i, mask)
			
 
				+ * #define cfs_cpu_mask_unset(i, mask)	 cpu_clear(i, mask)
			
 
				+ * #define cfs_cpu_mask_isset(i, mask)	 cpu_isset(i, mask)
			
 
				+ * #define cfs_cpu_mask_clear(mask)	    cpus_clear(mask)
			
 
				+ * #define cfs_cpu_mask_empty(mask)	    cpus_empty(mask)
			
 
				+ * #define cfs_cpu_mask_weight(mask)	   cpus_weight(mask)
			
 
				+ * #define cfs_cpu_mask_first(mask)	    first_cpu(mask)
			
 
				+ * #define cfs_cpu_mask_any_online(mask)      (any_online_cpu(mask) != NR_CPUS)
			
 
				+ * #define cfs_cpu_mask_for_each(i, mask)      for_each_cpu_mask(i, mask)
			
 
				+ * #define cfs_cpu_mask_bind(t, mask)	  set_cpus_allowed(t, mask)
			
 
				+ *
			
 
				+ * #ifdef HAVE_CPUMASK_COPY
			
 
				+ * #define cfs_cpu_mask_copy(dst, src)	 cpumask_copy(dst, src)
			
 
				+ * #else
			
 
				+ * #define cfs_cpu_mask_copy(dst, src)	 memcpy(dst, src, sizeof(*src))
			
 
				+ * #endif
			
 
				+ *
			
 
				+ * static inline void
			
 
				+ * cfs_cpu_mask_of_online(cfs_cpumask_t *mask)
			
 
				+ * {
			
 
				+ * cfs_cpu_mask_copy(mask, &cpu_online_map);
			
 
				+ * }
			
 
				+ *
			
 
				+ * #ifdef CONFIG_NUMA
			
 
				+ *
			
 
				+ * #define CFS_NODE_NR			 MAX_NUMNODES
			
 
				+ *
			
 
				+ * typedef nodemask_t			  cfs_node_mask_t;
			
 
				+ *
			
 
				+ * #define cfs_node_of_cpu(cpu)		cpu_to_node(cpu)
			
 
				+ * #define cfs_node_online(i)		  node_online(i)
			
 
				+ * #define cfs_node_online_num()	       num_online_nodes()
			
 
				+ * #define cfs_node_online_for_each(i)	 for_each_online_node(i)
			
 
				+ * #define cfs_node_possible_num()	     num_possible_nodes()
			
 
				+ * #define cfs_node_possible_for_each(i)       for_each_node(i)
			
 
				+ *
			
 
				+ * static inline void cfs_node_to_cpumask(int node, cfs_cpumask_t *mask)
			
 
				+ * {
			
 
				+ * #if defined(HAVE_NODE_TO_CPUMASK)
			
 
				+ *      *mask = node_to_cpumask(node);
			
 
				+ * #elif defined(HAVE_CPUMASK_OF_NODE)
			
 
				+ *      cfs_cpu_mask_copy(mask, cpumask_of_node(node));
			
 
				+ * #else
			
 
				+ * # error "Needs node_to_cpumask or cpumask_of_node"
			
 
				+ * #endif
			
 
				+ * }
			
 
				+ *
			
 
				+ * #define cfs_node_mask_set(i, mask)	  node_set(i, mask)
			
 
				+ * #define cfs_node_mask_unset(i, mask)	node_clear(i, mask)
			
 
				+ * #define cfs_node_mask_isset(i, mask)	node_isset(i, mask)
			
 
				+ * #define cfs_node_mask_clear(mask)	   nodes_reset(mask)
			
 
				+ * #define cfs_node_mask_empty(mask)	   nodes_empty(mask)
			
 
				+ * #define cfs_node_mask_weight(mask)	  nodes_weight(mask)
			
 
				+ * #define cfs_node_mask_for_each(i, mask)     for_each_node_mask(i, mask)
			
 
				+ * #define cfs_node_mask_copy(dst, src)	memcpy(dst, src, sizeof(*src))
			
 
				+ *
			
 
				+ * static inline void
			
 
				+ * cfs_node_mask_of_online(cfs_node_mask_t *mask)
			
 
				+ * {
			
 
				+ *       cfs_node_mask_copy(mask, &node_online_map);
			
 
				+ * }
			
 
				+ *
			
 
				+ * #endif
			
 
				+ */
			
 
				+
			
 
				+#endif /* CONFIG_SMP */
			
 
				+#endif /* __LIBCFS_LINUX_CPU_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h
@@ -0,0 +1,49 @@
 
				+ /*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
			
 
				+ *
			
 
				+ * Please  visit http://www.xyratex.com/contact if you need additional
			
 
				+ * information or have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Copyright 2012 Xyratex Technology Limited
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Linux crypto hash specific functions.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Functions for start/stop shash CRC32 algorithm.
			
 
				+ */
			
 
				+int cfs_crypto_crc32_register(void);
			
 
				+void cfs_crypto_crc32_unregister(void);
			
 
				+
			
 
				+/**
			
 
				+ * Functions for start/stop shash adler32 algorithm.
			
 
				+ */
			
 
				+int cfs_crypto_adler32_register(void);
			
 
				+void cfs_crypto_adler32_unregister(void);
			
 
				+
			
 
				+/**
			
 
				+ * Functions for start/stop shash crc32 pclmulqdq
			
 
				+ */
			
 
				+int cfs_crypto_crc32_pclmul_register(void);
			
 
				+void cfs_crypto_crc32_pclmul_unregister(void);
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h
@@ -0,0 +1,95 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-fs.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CFS_FS_H__
			
 
				+#define __LIBCFS_LINUX_CFS_FS_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/mount.h>
			
 
				+#include <linux/backing-dev.h>
			
 
				+#include <linux/posix_acl_xattr.h>
			
 
				+
			
 
				+#define filp_size(f)					\
			
 
				+	(i_size_read((f)->f_dentry->d_inode))
			
 
				+#define filp_poff(f)					\
			
 
				+	(&(f)->f_pos)
			
 
				+
			
 
				+# define do_fsync(fp, flag)				\
			
 
				+	((fp)->f_op->fsync(fp, 0, LLONG_MAX, flag))
			
 
				+
			
 
				+#define filp_read(fp, buf, size, pos)			\
			
 
				+	((fp)->f_op->read((fp), (buf), (size), pos))
			
 
				+
			
 
				+#define filp_write(fp, buf, size, pos)			\
			
 
				+	((fp)->f_op->write((fp), (buf), (size), pos))
			
 
				+
			
 
				+#define filp_fsync(fp)					\
			
 
				+	do_fsync(fp, 1)
			
 
				+
			
 
				+#define flock_type(fl)			((fl)->fl_type)
			
 
				+#define flock_set_type(fl, type)	do { (fl)->fl_type = (type); } while (0)
			
 
				+#define flock_pid(fl)			((fl)->fl_pid)
			
 
				+#define flock_set_pid(fl, pid)		do { (fl)->fl_pid = (pid); } while (0)
			
 
				+#define flock_start(fl)			((fl)->fl_start)
			
 
				+#define flock_set_start(fl, st)		do { (fl)->fl_start = (st); } while (0)
			
 
				+#define flock_end(fl)			((fl)->fl_end)
			
 
				+#define flock_set_end(fl, end)		do { (fl)->fl_end = (end); } while (0)
			
 
				+
			
 
				+ssize_t filp_user_write(struct file *filp, const void *buf, size_t count,
			
 
				+			loff_t *offset);
			
 
				+
			
 
				+#ifndef IFSHIFT
			
 
				+#define IFSHIFT			12
			
 
				+#endif
			
 
				+
			
 
				+#ifndef IFTODT
			
 
				+#define IFTODT(type)		(((type) & S_IFMT) >> IFSHIFT)
			
 
				+#endif
			
 
				+#ifndef DTTOIF
			
 
				+#define DTTOIF(dirtype)		((dirtype) << IFSHIFT)
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h
@@ -0,0 +1,204 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-lock.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
			
 
				+#define __LIBCFS_LINUX_CFS_LOCK_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <linux/mutex.h>
			
 
				+
			
 
				+/*
			
 
				+ * IMPORTANT !!!!!!!!
			
 
				+ *
			
 
				+ * All locks' declaration are not guaranteed to be initialized,
			
 
				+ * Althought some of they are initialized in Linux. All locks
			
 
				+ * declared by CFS_DECL_* should be initialized explicitly.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * spin_lock "implementation" (use Linux kernel's primitives)
			
 
				+ *
			
 
				+ * - spin_lock_init(x)
			
 
				+ * - spin_lock(x)
			
 
				+ * - spin_lock_bh(x)
			
 
				+ * - spin_lock_bh_init(x)
			
 
				+ * - spin_unlock(x)
			
 
				+ * - spin_unlock_bh(x)
			
 
				+ * - spin_trylock(x)
			
 
				+ * - spin_is_locked(x)
			
 
				+ *
			
 
				+ * - spin_lock_irq(x)
			
 
				+ * - spin_lock_irqsave(x, f)
			
 
				+ * - spin_unlock_irqrestore(x, f)
			
 
				+ * - read_lock_irqsave(lock, f)
			
 
				+ * - write_lock_irqsave(lock, f)
			
 
				+ * - write_unlock_irqrestore(lock, f)
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * spinlock "implementation"
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * rw_semaphore "implementation" (use Linux kernel's primitives)
			
 
				+ *
			
 
				+ * - sema_init(x)
			
 
				+ * - init_rwsem(x)
			
 
				+ * - down_read(x)
			
 
				+ * - up_read(x)
			
 
				+ * - down_write(x)
			
 
				+ * - up_write(x)
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#define fini_rwsem(s)		do {} while (0)
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * rwlock_t "implementation" (use Linux kernel's primitives)
			
 
				+ *
			
 
				+ * - rwlock_init(x)
			
 
				+ * - read_lock(x)
			
 
				+ * - read_unlock(x)
			
 
				+ * - write_lock(x)
			
 
				+ * - write_unlock(x)
			
 
				+ * - write_lock_bh(x)
			
 
				+ * - write_unlock_bh(x)
			
 
				+ *
			
 
				+ * - RW_LOCK_UNLOCKED
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef DEFINE_RWLOCK
			
 
				+#define DEFINE_RWLOCK(lock)	rwlock_t lock = __RW_LOCK_UNLOCKED(lock)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * completion "implementation" (use Linux kernel's primitives)
			
 
				+ *
			
 
				+ * - DECLARE_COMPLETION(work)
			
 
				+ * - INIT_COMPLETION(c)
			
 
				+ * - COMPLETION_INITIALIZER(work)
			
 
				+ * - init_completion(c)
			
 
				+ * - complete(c)
			
 
				+ * - wait_for_completion(c)
			
 
				+ * - wait_for_completion_interruptible(c)
			
 
				+ * - fini_completion(c)
			
 
				+ */
			
 
				+#define fini_completion(c) do { } while (0)
			
 
				+
			
 
				+/*
			
 
				+ * semaphore "implementation" (use Linux kernel's primitives)
			
 
				+ * - DEFINE_SEMAPHORE(name)
			
 
				+ * - sema_init(sem, val)
			
 
				+ * - up(sem)
			
 
				+ * - down(sem)
			
 
				+ * - down_interruptible(sem)
			
 
				+ * - down_trylock(sem)
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * mutex "implementation" (use Linux kernel's primitives)
			
 
				+ *
			
 
				+ * - DEFINE_MUTEX(name)
			
 
				+ * - mutex_init(x)
			
 
				+ * - mutex_lock(x)
			
 
				+ * - mutex_unlock(x)
			
 
				+ * - mutex_trylock(x)
			
 
				+ * - mutex_is_locked(x)
			
 
				+ * - mutex_destroy(x)
			
 
				+ */
			
 
				+
			
 
				+#ifndef lockdep_set_class
			
 
				+
			
 
				+/**************************************************************************
			
 
				+ *
			
 
				+ * Lockdep "implementation". Also see liblustre.h
			
 
				+ *
			
 
				+ **************************************************************************/
			
 
				+
			
 
				+struct lock_class_key {
			
 
				+	;
			
 
				+};
			
 
				+
			
 
				+#define lockdep_set_class(lock, key) \
			
 
				+	do { (void)sizeof(lock); (void)sizeof(key); } while (0)
			
 
				+/* This has to be a macro, so that `subclass' can be undefined in kernels
			
 
				+ * that do not support lockdep. */
			
 
				+
			
 
				+
			
 
				+static inline void lockdep_off(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void lockdep_on(void)
			
 
				+{
			
 
				+}
			
 
				+#else
			
 
				+
			
 
				+#endif /* lockdep_set_class */
			
 
				+
			
 
				+#ifndef CONFIG_DEBUG_LOCK_ALLOC
			
 
				+#ifndef mutex_lock_nested
			
 
				+#define mutex_lock_nested(mutex, subclass) mutex_lock(mutex)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef spin_lock_nested
			
 
				+#define spin_lock_nested(lock, subclass) spin_lock(lock)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef down_read_nested
			
 
				+#define down_read_nested(lock, subclass) down_read(lock)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef down_write_nested
			
 
				+#define down_write_nested(lock, subclass) down_write(lock)
			
 
				+#endif
			
 
				+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
			
 
				+
			
 
				+
			
 
				+#endif /* __LIBCFS_LINUX_CFS_LOCK_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -0,0 +1,139 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-mem.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CFS_MEM_H__
			
 
				+#define __LIBCFS_LINUX_CFS_MEM_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/pagemap.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/memcontrol.h>
			
 
				+#include <linux/mm_inline.h>
			
 
				+
			
 
				+#define CFS_PAGE_MASK		   (~((__u64)PAGE_CACHE_SIZE-1))
			
 
				+#define page_index(p)       ((p)->index)
			
 
				+
			
 
				+#define memory_pressure_get() (current->flags & PF_MEMALLOC)
			
 
				+#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
			
 
				+#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
			
 
				+
			
 
				+#if BITS_PER_LONG == 32
			
 
				+/* limit to lowmem on 32-bit systems */
			
 
				+#define NUM_CACHEPAGES \
			
 
				+	min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
			
 
				+#else
			
 
				+#define NUM_CACHEPAGES num_physpages
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * In Linux there is no way to determine whether current execution context is
			
 
				+ * blockable.
			
 
				+ */
			
 
				+#define ALLOC_ATOMIC_TRY   GFP_ATOMIC
			
 
				+
			
 
				+#define DECL_MMSPACE		mm_segment_t __oldfs
			
 
				+#define MMSPACE_OPEN \
			
 
				+	do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
			
 
				+#define MMSPACE_CLOSE	       set_fs(__oldfs)
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * NUMA allocators
			
 
				+ *
			
 
				+ * NB: we will rename these functions in a separate patch:
			
 
				+ * - rename kmalloc to cfs_malloc
			
 
				+ * - rename kmalloc/free_page to cfs_page_alloc/free
			
 
				+ * - rename kmalloc/free_large to cfs_vmalloc/vfree
			
 
				+ */
			
 
				+extern void *cfs_cpt_malloc(struct cfs_cpt_table *cptab, int cpt,
			
 
				+			    size_t nr_bytes, unsigned int flags);
			
 
				+extern void *cfs_cpt_vmalloc(struct cfs_cpt_table *cptab, int cpt,
			
 
				+			     size_t nr_bytes);
			
 
				+extern struct page *cfs_page_cpt_alloc(struct cfs_cpt_table *cptab,
			
 
				+				      int cpt, unsigned int flags);
			
 
				+extern void *cfs_mem_cache_cpt_alloc(struct kmem_cache *cachep,
			
 
				+				     struct cfs_cpt_table *cptab,
			
 
				+				     int cpt, unsigned int flags);
			
 
				+
			
 
				+/*
			
 
				+ * Shrinker
			
 
				+ */
			
 
				+
			
 
				+# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)  \
			
 
				+		       struct shrinker *shrinker, \
			
 
				+		       struct shrink_control *sc
			
 
				+# define shrink_param(sc, var) ((sc)->var)
			
 
				+
			
 
				+typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
			
 
				+
			
 
				+static inline
			
 
				+struct shrinker *set_shrinker(int seek, shrinker_t func)
			
 
				+{
			
 
				+	struct shrinker *s;
			
 
				+
			
 
				+	s = kmalloc(sizeof(*s), GFP_KERNEL);
			
 
				+	if (s == NULL)
			
 
				+		return (NULL);
			
 
				+
			
 
				+	s->shrink = func;
			
 
				+	s->seeks = seek;
			
 
				+
			
 
				+	register_shrinker(s);
			
 
				+
			
 
				+	return s;
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+void remove_shrinker(struct shrinker *shrinker)
			
 
				+{
			
 
				+	if (shrinker == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	unregister_shrinker(shrinker);
			
 
				+	kfree(shrinker);
			
 
				+}
			
 
				+
			
 
				+#endif /* __LINUX_CFS_MEM_H__ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h
@@ -0,0 +1,243 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-prim.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
			
 
				+#define __LIBCFS_LINUX_CFS_PRIM_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/version.h>
			
 
				+#include <linux/proc_fs.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/timer.h>
			
 
				+#include <linux/signal.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/kthread.h>
			
 
				+#include <linux/random.h>
			
 
				+
			
 
				+#include <linux/miscdevice.h>
			
 
				+#include <linux/libcfs/linux/portals_compat25.h>
			
 
				+#include <asm/div64.h>
			
 
				+
			
 
				+#include <linux/libcfs/linux/linux-time.h>
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * CPU
			
 
				+ */
			
 
				+#ifdef for_each_possible_cpu
			
 
				+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
			
 
				+#elif defined(for_each_cpu)
			
 
				+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef NR_CPUS
			
 
				+#else
			
 
				+#define NR_CPUS     1
			
 
				+#endif
			
 
				+
			
 
				+#define cfs_set_cpus_allowed(t, mask)  set_cpus_allowed(t, mask)
			
 
				+
			
 
				+/*
			
 
				+ * cache
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * IRQs
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Pseudo device register
			
 
				+ */
			
 
				+typedef struct miscdevice		psdev_t;
			
 
				+
			
 
				+/*
			
 
				+ * Sysctl register
			
 
				+ */
			
 
				+typedef struct ctl_table		ctl_table_t;
			
 
				+typedef struct ctl_table_header		ctl_table_header_t;
			
 
				+
			
 
				+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
			
 
				+
			
 
				+#define DECLARE_PROC_HANDLER(name)		      \
			
 
				+static int					      \
			
 
				+LL_PROC_PROTO(name)				     \
			
 
				+{						       \
			
 
				+	DECLARE_LL_PROC_PPOS_DECL;		      \
			
 
				+							\
			
 
				+	return proc_call_handler(table->data, write,    \
			
 
				+				 ppos, buffer, lenp,    \
			
 
				+				 __##name);	     \
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Symbol register
			
 
				+ */
			
 
				+#define cfs_symbol_register(s, p)       do {} while(0)
			
 
				+#define cfs_symbol_unregister(s)	do {} while(0)
			
 
				+#define cfs_symbol_get(s)	       symbol_get(s)
			
 
				+#define cfs_symbol_put(s)	       symbol_put(s)
			
 
				+
			
 
				+typedef struct module module_t;
			
 
				+
			
 
				+/*
			
 
				+ * Proc file system APIs
			
 
				+ */
			
 
				+typedef struct proc_dir_entry	   proc_dir_entry_t;
			
 
				+
			
 
				+/*
			
 
				+ * Wait Queue
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+typedef long			    cfs_task_state_t;
			
 
				+
			
 
				+#define CFS_DECL_WAITQ(wq)		DECLARE_WAIT_QUEUE_HEAD(wq)
			
 
				+
			
 
				+/*
			
 
				+ * Task struct
			
 
				+ */
			
 
				+typedef struct task_struct	      task_t;
			
 
				+#define DECL_JOURNAL_DATA	   void *journal_info
			
 
				+#define PUSH_JOURNAL		do {    \
			
 
				+	journal_info = current->journal_info;   \
			
 
				+	current->journal_info = NULL;	   \
			
 
				+	} while(0)
			
 
				+#define POP_JOURNAL		 do {    \
			
 
				+	current->journal_info = journal_info;   \
			
 
				+	} while(0)
			
 
				+
			
 
				+/* Module interfaces */
			
 
				+#define cfs_module(name, version, init, fini) \
			
 
				+	module_init(init);		    \
			
 
				+	module_exit(fini)
			
 
				+
			
 
				+/*
			
 
				+ * Signal
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Timer
			
 
				+ */
			
 
				+typedef struct timer_list timer_list_t;
			
 
				+
			
 
				+
			
 
				+#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */
			
 
				+#define __wait_event_timeout(wq, condition, timeout, ret)	\
			
 
				+do {							     \
			
 
				+	int __ret = 0;					   \
			
 
				+	if (!(condition)) {				      \
			
 
				+		wait_queue_t __wait;			     \
			
 
				+		unsigned long expire;			    \
			
 
				+								 \
			
 
				+		init_waitqueue_entry(&__wait, current);	  \
			
 
				+		expire = timeout + jiffies;		      \
			
 
				+		add_wait_queue(&wq, &__wait);		    \
			
 
				+		for (;;) {				       \
			
 
				+			set_current_state(TASK_UNINTERRUPTIBLE); \
			
 
				+			if (condition)			   \
			
 
				+				break;			   \
			
 
				+			if (jiffies > expire) {		  \
			
 
				+				ret = jiffies - expire;	  \
			
 
				+				break;			   \
			
 
				+			}					\
			
 
				+			schedule_timeout(timeout);	       \
			
 
				+		}						\
			
 
				+		current->state = TASK_RUNNING;		   \
			
 
				+		remove_wait_queue(&wq, &__wait);		 \
			
 
				+	}							\
			
 
				+} while (0)
			
 
				+/*
			
 
				+   retval == 0; condition met; we're good.
			
 
				+   retval > 0; timed out.
			
 
				+*/
			
 
				+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret)    \
			
 
				+do {								 \
			
 
				+	ret = 0;						     \
			
 
				+	if (!(condition))					    \
			
 
				+		__wait_event_timeout(wq, condition, timeout, ret);   \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret)    \
			
 
				+	ret = wait_event_timeout(wq, condition, timeout)
			
 
				+#endif
			
 
				+
			
 
				+#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \
			
 
				+	ret = wait_event_interruptible_timeout(wq, c, timeout)
			
 
				+
			
 
				+/*
			
 
				+ * atomic
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#define cfs_atomic_add_unless(atom, a, u)    atomic_add_unless(atom, a, u)
			
 
				+#define cfs_atomic_cmpxchg(atom, old, nv)    atomic_cmpxchg(atom, old, nv)
			
 
				+
			
 
				+/*
			
 
				+ * membar
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * interrupt
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * might_sleep
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * group_info
			
 
				+ */
			
 
				+typedef struct group_info group_info_t;
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Random bytes
			
 
				+ */
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h
@@ -0,0 +1,87 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-tcpip.h
			
 
				+ *
			
 
				+ * Basic library routines.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_CFS_TCP_H__
			
 
				+#define __LIBCFS_LINUX_CFS_TCP_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#include <net/sock.h>
			
 
				+
			
 
				+#ifndef HIPQUAD
			
 
				+// XXX Should just kill all users
			
 
				+#if defined(__LITTLE_ENDIAN)
			
 
				+#define HIPQUAD(addr) \
			
 
				+	((unsigned char *)&addr)[3], \
			
 
				+	((unsigned char *)&addr)[2], \
			
 
				+	((unsigned char *)&addr)[1], \
			
 
				+	((unsigned char *)&addr)[0]
			
 
				+#elif defined(__BIG_ENDIAN)
			
 
				+#define HIPQUAD NIPQUAD
			
 
				+#else
			
 
				+#error "Please fix asm/byteorder.h"
			
 
				+#endif /* __LITTLE_ENDIAN */
			
 
				+#endif
			
 
				+
			
 
				+typedef struct socket   socket_t;
			
 
				+
			
 
				+#define SOCK_SNDBUF(so)	 ((so)->sk->sk_sndbuf)
			
 
				+#define SOCK_TEST_NOSPACE(so)   test_bit(SOCK_NOSPACE, &(so)->flags)
			
 
				+
			
 
				+static inline int
			
 
				+cfs_sock_error(struct socket *sock)
			
 
				+{
			
 
				+	return sock->sk->sk_err;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+cfs_sock_wmem_queued(struct socket *sock)
			
 
				+{
			
 
				+	return sock->sk->sk_wmem_queued;
			
 
				+}
			
 
				+
			
 
				+#define cfs_sk_sleep(sk)	sk_sleep(sk)
			
 
				+
			
 
				+#define DEFAULT_NET	(&init_net)
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
@@ -0,0 +1,275 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/linux/linux-time.h
			
 
				+ *
			
 
				+ * Implementation of portable time API for Linux (kernel and user-level).
			
 
				+ *
			
 
				+ * Author: Nikita Danilov <nikita@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
			
 
				+#define __LIBCFS_LINUX_LINUX_TIME_H__
			
 
				+
			
 
				+#ifndef __LIBCFS_LIBCFS_H__
			
 
				+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+/* Portable time API */
			
 
				+
			
 
				+/*
			
 
				+ * Platform provides three opaque data-types:
			
 
				+ *
			
 
				+ *  cfs_time_t	represents point in time. This is internal kernel
			
 
				+ *		    time rather than "wall clock". This time bears no
			
 
				+ *		    relation to gettimeofday().
			
 
				+ *
			
 
				+ *  cfs_duration_t    represents time interval with resolution of internal
			
 
				+ *		    platform clock
			
 
				+ *
			
 
				+ *  cfs_fs_time_t     represents instance in world-visible time. This is
			
 
				+ *		    used in file-system time-stamps
			
 
				+ *
			
 
				+ *  cfs_time_t     cfs_time_current(void);
			
 
				+ *  cfs_time_t     cfs_time_add    (cfs_time_t, cfs_duration_t);
			
 
				+ *  cfs_duration_t cfs_time_sub    (cfs_time_t, cfs_time_t);
			
 
				+ *  int	    cfs_impl_time_before (cfs_time_t, cfs_time_t);
			
 
				+ *  int	    cfs_impl_time_before_eq(cfs_time_t, cfs_time_t);
			
 
				+ *
			
 
				+ *  cfs_duration_t cfs_duration_build(int64_t);
			
 
				+ *
			
 
				+ *  time_t	 cfs_duration_sec (cfs_duration_t);
			
 
				+ *  void	   cfs_duration_usec(cfs_duration_t, struct timeval *);
			
 
				+ *  void	   cfs_duration_nsec(cfs_duration_t, struct timespec *);
			
 
				+ *
			
 
				+ *  void	   cfs_fs_time_current(cfs_fs_time_t *);
			
 
				+ *  time_t	 cfs_fs_time_sec    (cfs_fs_time_t *);
			
 
				+ *  void	   cfs_fs_time_usec   (cfs_fs_time_t *, struct timeval *);
			
 
				+ *  void	   cfs_fs_time_nsec   (cfs_fs_time_t *, struct timespec *);
			
 
				+ *  int	    cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
			
 
				+ *  int	    cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
			
 
				+ *
			
 
				+ *  CFS_TIME_FORMAT
			
 
				+ *  CFS_DURATION_FORMAT
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define ONE_BILLION ((u_int64_t)1000000000)
			
 
				+#define ONE_MILLION 1000000
			
 
				+
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/version.h>
			
 
				+#include <linux/time.h>
			
 
				+#include <asm/div64.h>
			
 
				+
			
 
				+#include <linux/libcfs/linux/portals_compat25.h>
			
 
				+
			
 
				+/*
			
 
				+ * post 2.5 kernels.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/jiffies.h>
			
 
				+
			
 
				+typedef struct timespec cfs_fs_time_t;
			
 
				+
			
 
				+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
			
 
				+{
			
 
				+	v->tv_sec  = t->tv_sec;
			
 
				+	v->tv_usec = t->tv_nsec / 1000;
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
			
 
				+{
			
 
				+	*s = *t;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * internal helper function used by cfs_fs_time_before*()
			
 
				+ */
			
 
				+static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
			
 
				+{
			
 
				+	return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Generic kernel stuff
			
 
				+ */
			
 
				+
			
 
				+typedef unsigned long cfs_time_t;      /* jiffies */
			
 
				+typedef long cfs_duration_t;
			
 
				+typedef cycles_t cfs_cycles_t;
			
 
				+
			
 
				+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
			
 
				+{
			
 
				+	return time_before(t1, t2);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
			
 
				+{
			
 
				+	return time_before_eq(t1, t2);
			
 
				+}
			
 
				+
			
 
				+static inline cfs_time_t cfs_time_current(void)
			
 
				+{
			
 
				+	return jiffies;
			
 
				+}
			
 
				+
			
 
				+static inline time_t cfs_time_current_sec(void)
			
 
				+{
			
 
				+	return get_seconds();
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
			
 
				+{
			
 
				+	*t = CURRENT_TIME;
			
 
				+}
			
 
				+
			
 
				+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
			
 
				+{
			
 
				+	return t->tv_sec;
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
			
 
				+{
			
 
				+	return __cfs_fs_time_flat(t1) <  __cfs_fs_time_flat(t2);
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
			
 
				+{
			
 
				+	return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2);
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+static inline cfs_duration_t cfs_duration_build(int64_t nano)
			
 
				+{
			
 
				+#if (BITS_PER_LONG == 32)
			
 
				+	/* We cannot use do_div(t, ONE_BILLION), do_div can only process
			
 
				+	 * 64 bits n and 32 bits base */
			
 
				+	int64_t  t = nano * HZ;
			
 
				+	do_div(t, 1000);
			
 
				+	do_div(t, 1000000);
			
 
				+	return (cfs_duration_t)t;
			
 
				+#else
			
 
				+	return (nano * HZ / ONE_BILLION);
			
 
				+#endif
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static inline cfs_duration_t cfs_time_seconds(int seconds)
			
 
				+{
			
 
				+	return ((cfs_duration_t)seconds) * HZ;
			
 
				+}
			
 
				+
			
 
				+static inline time_t cfs_duration_sec(cfs_duration_t d)
			
 
				+{
			
 
				+	return d / HZ;
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
			
 
				+{
			
 
				+#if (BITS_PER_LONG == 32) && (HZ > 4096)
			
 
				+	__u64 t;
			
 
				+
			
 
				+	s->tv_sec = d / HZ;
			
 
				+	t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION;
			
 
				+	do_div(t, HZ);
			
 
				+	s->tv_usec = t;
			
 
				+#else
			
 
				+	s->tv_sec = d / HZ;
			
 
				+	s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * \
			
 
				+		ONE_MILLION) / HZ;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
			
 
				+{
			
 
				+#if (BITS_PER_LONG == 32)
			
 
				+	__u64 t;
			
 
				+
			
 
				+	s->tv_sec = d / HZ;
			
 
				+	t = (d - s->tv_sec * HZ) * ONE_BILLION;
			
 
				+	do_div(t, HZ);
			
 
				+	s->tv_nsec = t;
			
 
				+#else
			
 
				+	s->tv_sec = d / HZ;
			
 
				+	s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#define cfs_time_current_64 get_jiffies_64
			
 
				+
			
 
				+static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
			
 
				+{
			
 
				+	return t + d;
			
 
				+}
			
 
				+
			
 
				+static inline __u64 cfs_time_shift_64(int seconds)
			
 
				+{
			
 
				+	return cfs_time_add_64(cfs_time_current_64(),
			
 
				+			       cfs_time_seconds(seconds));
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_time_before_64(__u64 t1, __u64 t2)
			
 
				+{
			
 
				+	return (__s64)t2 - (__s64)t1 > 0;
			
 
				+}
			
 
				+
			
 
				+static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
			
 
				+{
			
 
				+	return (__s64)t2 - (__s64)t1 >= 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * One jiffy
			
 
				+ */
			
 
				+#define CFS_TICK		(1)
			
 
				+
			
 
				+#define CFS_TIME_T	      "%lu"
			
 
				+#define CFS_DURATION_T	  "%ld"
			
 
				+
			
 
				+
			
 
				+#endif /* __LIBCFS_LINUX_LINUX_TIME_H__ */
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * c-indentation-style: "K&R"
			
 
				+ * c-basic-offset: 8
			
 
				+ * tab-width: 8
			
 
				+ * fill-column: 80
			
 
				+ * scroll-step: 1
			
 
				+ * End:
			
 
				+ */
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h
@@ -0,0 +1,36 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * libcfs/include/libcfs/user-bitops.h
			
 
				+ */
			
 
				+#include <linux/types.h>
			
--- a/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h
@@ -0,0 +1,116 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
			
 
				+#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
			
 
				+
			
 
				+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
			
 
				+#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG
			
 
				+#  define SIGNAL_MASK_ASSERT() \
			
 
				+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
			
 
				+#else
			
 
				+# define SIGNAL_MASK_ASSERT()
			
 
				+#endif
			
 
				+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
			
 
				+
			
 
				+#define SIGNAL_MASK_LOCK(task, flags)				  \
			
 
				+	spin_lock_irqsave(&task->sighand->siglock, flags)
			
 
				+#define SIGNAL_MASK_UNLOCK(task, flags)				\
			
 
				+	spin_unlock_irqrestore(&task->sighand->siglock, flags)
			
 
				+#define USERMODEHELPER(path, argv, envp)			       \
			
 
				+	call_usermodehelper(path, argv, envp, 1)
			
 
				+#define clear_tsk_thread_flag(current, TIF_SIGPENDING)	  clear_tsk_thread_flag(current,       \
			
 
				+							TIF_SIGPENDING)
			
 
				+# define smp_num_cpus	      num_online_cpus()
			
 
				+
			
 
				+#define cfs_wait_event_interruptible(wq, condition, ret)	       \
			
 
				+	ret = wait_event_interruptible(wq, condition)
			
 
				+#define cfs_wait_event_interruptible_exclusive(wq, condition, ret)     \
			
 
				+	ret = wait_event_interruptible_exclusive(wq, condition)
			
 
				+
			
 
				+#define THREAD_NAME(comm, len, fmt, a...)			      \
			
 
				+	snprintf(comm, len, fmt, ## a)
			
 
				+
			
 
				+/* 2.6 alloc_page users can use page->lru */
			
 
				+#define PAGE_LIST_ENTRY lru
			
 
				+#define PAGE_LIST(page) ((page)->lru)
			
 
				+
			
 
				+#ifndef __user
			
 
				+#define __user
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __fls
			
 
				+#define __cfs_fls fls
			
 
				+#else
			
 
				+#define __cfs_fls __fls
			
 
				+#endif
			
 
				+
			
 
				+#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos)	\
			
 
				+	proc_dointvec(table, write, buffer, lenp, ppos);
			
 
				+
			
 
				+#define ll_proc_dolongvec(table, write, filp, buffer, lenp, ppos)	\
			
 
				+	proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
			
 
				+#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos)	\
			
 
				+	proc_dostring(table, write, buffer, lenp, ppos);
			
 
				+#define LL_PROC_PROTO(name)					     \
			
 
				+	name(ctl_table_t *table, int write,		      \
			
 
				+	     void __user *buffer, size_t *lenp, loff_t *ppos)
			
 
				+#define DECLARE_LL_PROC_PPOS_DECL
			
 
				+
			
 
				+/* helper for sysctl handlers */
			
 
				+int proc_call_handler(void *data, int write,
			
 
				+		      loff_t *ppos, void *buffer, size_t *lenp,
			
 
				+		      int (*handler)(void *data, int write,
			
 
				+				     loff_t pos, void *buffer, int len));
			
 
				+/*
			
 
				+ * CPU
			
 
				+ */
			
 
				+#ifdef for_each_possible_cpu
			
 
				+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
			
 
				+#elif defined(for_each_cpu)
			
 
				+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef NR_CPUS
			
 
				+#else
			
 
				+#define NR_CPUS     1
			
 
				+#endif
			
 
				+
			
 
				+#define cfs_set_cpus_allowed(t, mask)  set_cpus_allowed(t, mask)
			
 
				+
			
 
				+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
			
 
				+
			
 
				+#endif /* _PORTALS_COMPAT_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/lucache.h
+++ b/drivers/staging/lustre/include/linux/libcfs/lucache.h
@@ -0,0 +1,162 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef _LUCACHE_H
			
 
				+#define _LUCACHE_H
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+
			
 
				+/** \defgroup ucache ucache
			
 
				+ *
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+#define UC_CACHE_NEW	    0x01
			
 
				+#define UC_CACHE_ACQUIRING      0x02
			
 
				+#define UC_CACHE_INVALID	0x04
			
 
				+#define UC_CACHE_EXPIRED	0x08
			
 
				+
			
 
				+#define UC_CACHE_IS_NEW(i)	  ((i)->ue_flags & UC_CACHE_NEW)
			
 
				+#define UC_CACHE_IS_INVALID(i)      ((i)->ue_flags & UC_CACHE_INVALID)
			
 
				+#define UC_CACHE_IS_ACQUIRING(i)    ((i)->ue_flags & UC_CACHE_ACQUIRING)
			
 
				+#define UC_CACHE_IS_EXPIRED(i)      ((i)->ue_flags & UC_CACHE_EXPIRED)
			
 
				+#define UC_CACHE_IS_VALID(i)	((i)->ue_flags == 0)
			
 
				+
			
 
				+#define UC_CACHE_SET_NEW(i)	 (i)->ue_flags |= UC_CACHE_NEW
			
 
				+#define UC_CACHE_SET_INVALID(i)     (i)->ue_flags |= UC_CACHE_INVALID
			
 
				+#define UC_CACHE_SET_ACQUIRING(i)   (i)->ue_flags |= UC_CACHE_ACQUIRING
			
 
				+#define UC_CACHE_SET_EXPIRED(i)     (i)->ue_flags |= UC_CACHE_EXPIRED
			
 
				+#define UC_CACHE_SET_VALID(i)       (i)->ue_flags = 0
			
 
				+
			
 
				+#define UC_CACHE_CLEAR_NEW(i)       (i)->ue_flags &= ~UC_CACHE_NEW
			
 
				+#define UC_CACHE_CLEAR_ACQUIRING(i) (i)->ue_flags &= ~UC_CACHE_ACQUIRING
			
 
				+#define UC_CACHE_CLEAR_INVALID(i)   (i)->ue_flags &= ~UC_CACHE_INVALID
			
 
				+#define UC_CACHE_CLEAR_EXPIRED(i)   (i)->ue_flags &= ~UC_CACHE_EXPIRED
			
 
				+
			
 
				+struct upcall_cache_entry;
			
 
				+
			
 
				+struct md_perm {
			
 
				+	lnet_nid_t      mp_nid;
			
 
				+	__u32	   mp_perm;
			
 
				+};
			
 
				+
			
 
				+struct md_identity {
			
 
				+	struct upcall_cache_entry *mi_uc_entry;
			
 
				+	uid_t		      mi_uid;
			
 
				+	gid_t		      mi_gid;
			
 
				+	group_info_t	  *mi_ginfo;
			
 
				+	int			mi_nperms;
			
 
				+	struct md_perm	    *mi_perms;
			
 
				+};
			
 
				+
			
 
				+struct upcall_cache_entry {
			
 
				+	struct list_head	      ue_hash;
			
 
				+	__u64		   ue_key;
			
 
				+	atomic_t	    ue_refcount;
			
 
				+	int		     ue_flags;
			
 
				+	wait_queue_head_t	     ue_waitq;
			
 
				+	cfs_time_t	      ue_acquire_expire;
			
 
				+	cfs_time_t	      ue_expire;
			
 
				+	union {
			
 
				+		struct md_identity     identity;
			
 
				+	} u;
			
 
				+};
			
 
				+
			
 
				+#define UC_CACHE_HASH_SIZE	(128)
			
 
				+#define UC_CACHE_HASH_INDEX(id)   ((id) & (UC_CACHE_HASH_SIZE - 1))
			
 
				+#define UC_CACHE_UPCALL_MAXPATH   (1024UL)
			
 
				+
			
 
				+struct upcall_cache;
			
 
				+
			
 
				+struct upcall_cache_ops {
			
 
				+	void	    (*init_entry)(struct upcall_cache_entry *, void *args);
			
 
				+	void	    (*free_entry)(struct upcall_cache *,
			
 
				+				      struct upcall_cache_entry *);
			
 
				+	int	     (*upcall_compare)(struct upcall_cache *,
			
 
				+					  struct upcall_cache_entry *,
			
 
				+					  __u64 key, void *args);
			
 
				+	int	     (*downcall_compare)(struct upcall_cache *,
			
 
				+					    struct upcall_cache_entry *,
			
 
				+					    __u64 key, void *args);
			
 
				+	int	     (*do_upcall)(struct upcall_cache *,
			
 
				+				     struct upcall_cache_entry *);
			
 
				+	int	     (*parse_downcall)(struct upcall_cache *,
			
 
				+					  struct upcall_cache_entry *, void *);
			
 
				+};
			
 
				+
			
 
				+struct upcall_cache {
			
 
				+	struct list_head		uc_hashtable[UC_CACHE_HASH_SIZE];
			
 
				+	spinlock_t		uc_lock;
			
 
				+	rwlock_t		uc_upcall_rwlock;
			
 
				+
			
 
				+	char			uc_name[40];		/* for upcall */
			
 
				+	char			uc_upcall[UC_CACHE_UPCALL_MAXPATH];
			
 
				+	int			uc_acquire_expire;	/* seconds */
			
 
				+	int			uc_entry_expire;	/* seconds */
			
 
				+	struct upcall_cache_ops	*uc_ops;
			
 
				+};
			
 
				+
			
 
				+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
			
 
				+						  __u64 key, void *args);
			
 
				+void upcall_cache_put_entry(struct upcall_cache *cache,
			
 
				+			    struct upcall_cache_entry *entry);
			
 
				+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
			
 
				+			  void *args);
			
 
				+void upcall_cache_flush_idle(struct upcall_cache *cache);
			
 
				+void upcall_cache_flush_all(struct upcall_cache *cache);
			
 
				+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args);
			
 
				+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
			
 
				+				       struct upcall_cache_ops *ops);
			
 
				+void upcall_cache_cleanup(struct upcall_cache *cache);
			
 
				+
			
 
				+#if 0
			
 
				+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
			
 
				+						  __u64 key, __u32 primary,
			
 
				+						  __u32 ngroups, __u32 *groups);
			
 
				+void upcall_cache_put_entry(struct upcall_cache *hash,
			
 
				+			    struct upcall_cache_entry *entry);
			
 
				+int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
			
 
				+			  __u32 primary, __u32 ngroups, __u32 *groups);
			
 
				+void upcall_cache_flush_idle(struct upcall_cache *cache);
			
 
				+void upcall_cache_flush_all(struct upcall_cache *cache);
			
 
				+struct upcall_cache *upcall_cache_init(const char *name);
			
 
				+void upcall_cache_cleanup(struct upcall_cache *hash);
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/** @} ucache */
			
 
				+
			
 
				+#endif /* _LUCACHE_H */
			
--- a/drivers/staging/lustre/include/linux/libcfs/params_tree.h
+++ b/drivers/staging/lustre/include/linux/libcfs/params_tree.h
@@ -0,0 +1,230 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * API and structure definitions for params_tree.
			
 
				+ *
			
 
				+ * Author: LiuYing <emoly.liu@oracle.com>
			
 
				+ */
			
 
				+#ifndef __PARAMS_TREE_H__
			
 
				+#define __PARAMS_TREE_H__
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+
			
 
				+#undef LPROCFS
			
 
				+#if  defined(CONFIG_PROC_FS)
			
 
				+# define LPROCFS
			
 
				+#endif
			
 
				+
			
 
				+#ifdef LPROCFS
			
 
				+typedef struct file			     cfs_param_file_t;
			
 
				+typedef struct inode			    cfs_inode_t;
			
 
				+typedef struct proc_inode		       cfs_proc_inode_t;
			
 
				+typedef struct seq_file			 cfs_seq_file_t;
			
 
				+typedef struct seq_operations		   cfs_seq_ops_t;
			
 
				+typedef struct file_operations		  cfs_param_file_ops_t;
			
 
				+typedef module_t			   *cfs_param_module_t;
			
 
				+typedef struct proc_dir_entry		   cfs_param_dentry_t;
			
 
				+typedef struct poll_table_struct		cfs_poll_table_t;
			
 
				+#define CFS_PARAM_MODULE			THIS_MODULE
			
 
				+#define CFS_PDE(value)			  PDE(value)
			
 
				+#define cfs_file_private(file)		  (file->private_data)
			
 
				+#define cfs_dentry_data(dentry)		 (dentry->data)
			
 
				+#define cfs_proc_inode_pde(proc_inode)	  (proc_inode->pde)
			
 
				+#define cfs_proc_inode(proc_inode)	      (proc_inode->vfs_inode)
			
 
				+#define cfs_seq_read_common		     seq_read
			
 
				+#define cfs_seq_lseek_common		    seq_lseek
			
 
				+#define cfs_seq_private(seq)		    (seq->private)
			
 
				+#define cfs_seq_printf(seq, format, ...)	seq_printf(seq, format,  \
			
 
				+							   ## __VA_ARGS__)
			
 
				+#define cfs_seq_release(inode, file)	    seq_release(inode, file)
			
 
				+#define cfs_seq_puts(seq, s)		    seq_puts(seq, s)
			
 
				+#define cfs_seq_putc(seq, s)		    seq_putc(seq, s)
			
 
				+#define cfs_seq_read(file, buf, count, ppos, rc) (rc = seq_read(file, buf, \
			
 
				+							    count, ppos))
			
 
				+#define cfs_seq_open(file, ops, rc)	     (rc = seq_open(file, ops))
			
 
				+
			
 
				+/* in lprocfs_stat.c, to protect the private data for proc entries */
			
 
				+extern struct rw_semaphore		_lprocfs_lock;
			
 
				+
			
 
				+/* to begin from 2.6.23, Linux defines self file_operations (proc_reg_file_ops)
			
 
				+ * in procfs, the proc file_operation defined by Lustre (lprocfs_generic_fops)
			
 
				+ * will be wrapped into the new defined proc_reg_file_ops, which instroduces
			
 
				+ * user count in proc_dir_entrey(pde_users) to protect the proc entry from
			
 
				+ * being deleted. then the protection lock (_lprocfs_lock) defined by Lustre
			
 
				+ * isn't necessary anymore for lprocfs_generic_fops(e.g. lprocfs_fops_read).
			
 
				+ * see bug19706 for detailed information.
			
 
				+ */
			
 
				+#define LPROCFS_ENTRY() do{ }while(0)
			
 
				+#define LPROCFS_EXIT()  do{ }while(0)
			
 
				+
			
 
				+static inline
			
 
				+int LPROCFS_ENTRY_AND_CHECK(struct proc_dir_entry *dp)
			
 
				+{
			
 
				+	int deleted = 0;
			
 
				+
			
 
				+	spin_lock(&(dp)->pde_unload_lock);
			
 
				+	if (dp->proc_fops == NULL)
			
 
				+		deleted = 1;
			
 
				+	spin_unlock(&(dp)->pde_unload_lock);
			
 
				+	if (deleted)
			
 
				+		return -ENODEV;
			
 
				+	return 0;
			
 
				+}
			
 
				+#define LPROCFS_SRCH_ENTRY()	    \
			
 
				+do {				    \
			
 
				+	down_read(&_lprocfs_lock);      \
			
 
				+} while(0)
			
 
				+
			
 
				+#define LPROCFS_SRCH_EXIT()	     \
			
 
				+do {				    \
			
 
				+	up_read(&_lprocfs_lock);	\
			
 
				+} while(0)
			
 
				+
			
 
				+#define LPROCFS_WRITE_ENTRY()		\
			
 
				+do {					\
			
 
				+	down_write(&_lprocfs_lock);	\
			
 
				+} while(0)
			
 
				+
			
 
				+#define LPROCFS_WRITE_EXIT()		\
			
 
				+do {					\
			
 
				+	up_write(&_lprocfs_lock);	\
			
 
				+} while(0)
			
 
				+#else /* !LPROCFS */
			
 
				+
			
 
				+typedef struct cfs_params_file {
			
 
				+	void	   *param_private;
			
 
				+	loff_t	  param_pos;
			
 
				+	unsigned int    param_flags;
			
 
				+} cfs_param_file_t;
			
 
				+
			
 
				+typedef struct cfs_param_inode {
			
 
				+	void    *param_private;
			
 
				+} cfs_inode_t;
			
 
				+
			
 
				+typedef struct cfs_param_dentry {
			
 
				+	void *param_data;
			
 
				+} cfs_param_dentry_t;
			
 
				+
			
 
				+typedef struct cfs_proc_inode {
			
 
				+	cfs_param_dentry_t *param_pde;
			
 
				+	cfs_inode_t	 param_inode;
			
 
				+} cfs_proc_inode_t;
			
 
				+
			
 
				+struct cfs_seq_operations;
			
 
				+typedef struct cfs_seq_file {
			
 
				+	char		      *buf;
			
 
				+	size_t		     size;
			
 
				+	size_t		     from;
			
 
				+	size_t		     count;
			
 
				+	loff_t		     index;
			
 
				+	loff_t		     version;
			
 
				+	struct mutex			lock;
			
 
				+	struct cfs_seq_operations *op;
			
 
				+	void		      *private;
			
 
				+} cfs_seq_file_t;
			
 
				+
			
 
				+typedef struct cfs_seq_operations {
			
 
				+	void *(*start) (cfs_seq_file_t *m, loff_t *pos);
			
 
				+	void  (*stop) (cfs_seq_file_t *m, void *v);
			
 
				+	void *(*next) (cfs_seq_file_t *m, void *v, loff_t *pos);
			
 
				+	int   (*show) (cfs_seq_file_t *m, void *v);
			
 
				+} cfs_seq_ops_t;
			
 
				+
			
 
				+typedef void *cfs_param_module_t;
			
 
				+typedef void *cfs_poll_table_t;
			
 
				+
			
 
				+typedef struct cfs_param_file_ops {
			
 
				+	cfs_param_module_t owner;
			
 
				+	int (*open) (cfs_inode_t *, struct file *);
			
 
				+	loff_t (*llseek)(struct file *, loff_t, int);
			
 
				+	int (*release) (cfs_inode_t *, cfs_param_file_t *);
			
 
				+	unsigned int (*poll) (struct file *, cfs_poll_table_t *);
			
 
				+	ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
			
 
				+	ssize_t (*read)(struct file *, char *, size_t, loff_t *);
			
 
				+} cfs_param_file_ops_t;
			
 
				+typedef cfs_param_file_ops_t *cfs_lproc_filep_t;
			
 
				+
			
 
				+static inline cfs_proc_inode_t *FAKE_PROC_I(const cfs_inode_t *inode)
			
 
				+{
			
 
				+	return container_of(inode, cfs_proc_inode_t, param_inode);
			
 
				+}
			
 
				+
			
 
				+static inline cfs_param_dentry_t *FAKE_PDE(cfs_inode_t *inode)
			
 
				+{
			
 
				+	return FAKE_PROC_I(inode)->param_pde;
			
 
				+}
			
 
				+
			
 
				+#define CFS_PARAM_MODULE			NULL
			
 
				+#define CFS_PDE(value)			  FAKE_PDE(value)
			
 
				+#define cfs_file_private(file)		  (file->param_private)
			
 
				+#define cfs_dentry_data(dentry)		 (dentry->param_data)
			
 
				+#define cfs_proc_inode(proc_inode)	      (proc_inode->param_inode)
			
 
				+#define cfs_proc_inode_pde(proc_inode)	  (proc_inode->param_pde)
			
 
				+#define cfs_seq_read_common		     NULL
			
 
				+#define cfs_seq_lseek_common		    NULL
			
 
				+#define cfs_seq_private(seq)		    (seq->private)
			
 
				+#define cfs_seq_read(file, buf, count, ppos, rc) do {} while(0)
			
 
				+#define cfs_seq_open(file, ops, rc)		     \
			
 
				+do {						    \
			
 
				+	 cfs_seq_file_t *p = cfs_file_private(file);    \
			
 
				+	 if (!p) {				      \
			
 
				+		LIBCFS_ALLOC(p, sizeof(*p));	    \
			
 
				+		if (!p) {			       \
			
 
				+			rc = -ENOMEM;		   \
			
 
				+			break;			  \
			
 
				+		}				       \
			
 
				+		cfs_file_private(file) = p;	     \
			
 
				+	}					       \
			
 
				+	memset(p, 0, sizeof(*p));		       \
			
 
				+	p->op = ops;				    \
			
 
				+	rc = 0;					 \
			
 
				+} while(0)
			
 
				+
			
 
				+#define LPROCFS_ENTRY()	     do {} while(0)
			
 
				+#define LPROCFS_EXIT()	      do {} while(0)
			
 
				+static inline
			
 
				+int LPROCFS_ENTRY_AND_CHECK(cfs_param_dentry_t *dp)
			
 
				+{
			
 
				+	LPROCFS_ENTRY();
			
 
				+	return 0;
			
 
				+}
			
 
				+#define LPROCFS_WRITE_ENTRY()       do {} while(0)
			
 
				+#define LPROCFS_WRITE_EXIT()	do {} while(0)
			
 
				+
			
 
				+#endif /* LPROCFS */
			
 
				+
			
 
				+/* XXX: params_tree APIs */
			
 
				+
			
 
				+#endif  /* __PARAMS_TREE_H__ */
			
--- a/drivers/staging/lustre/include/linux/lnet/api-support.h
+++ b/drivers/staging/lustre/include/linux/lnet/api-support.h
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_API_SUPPORT_H__
			
 
				+#define __LNET_API_SUPPORT_H__
			
 
				+
			
 
				+#include <linux/lnet/linux/api-support.h>
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/types.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/api.h
+++ b/drivers/staging/lustre/include/linux/lnet/api.h
@@ -0,0 +1,220 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_API_H__
			
 
				+#define __LNET_API_H__
			
 
				+
			
 
				+/** \defgroup lnet LNet
			
 
				+ *
			
 
				+ * The Lustre Networking subsystem.
			
 
				+ *
			
 
				+ * LNet is an asynchronous message-passing API, which provides an unreliable
			
 
				+ * connectionless service that can't guarantee any order. It supports OFA IB,
			
 
				+ * TCP/IP, and Cray Portals, and routes between heterogeneous networks.
			
 
				+ *
			
 
				+ * LNet can run both in OS kernel space and in userspace as a library.
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+#include <linux/lnet/types.h>
			
 
				+
			
 
				+/** \defgroup lnet_init_fini Initialization and cleanup
			
 
				+ * The LNet must be properly initialized before any LNet calls can be made.
			
 
				+ * @{ */
			
 
				+int LNetInit(void);
			
 
				+void LNetFini(void);
			
 
				+
			
 
				+int LNetNIInit(lnet_pid_t requested_pid);
			
 
				+int LNetNIFini(void);
			
 
				+/** @} lnet_init_fini */
			
 
				+
			
 
				+/** \defgroup lnet_addr LNet addressing and basic types
			
 
				+ *
			
 
				+ * Addressing scheme and basic data types of LNet.
			
 
				+ *
			
 
				+ * The LNet API is memory-oriented, so LNet must be able to address not only
			
 
				+ * end-points but also memory region within a process address space.
			
 
				+ * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
			
 
				+ * in a node. A portal represents an opening in the address space of a
			
 
				+ * process. Match bits is criteria to identify a region of memory inside a
			
 
				+ * portal, and offset specifies an offset within the memory region.
			
 
				+ *
			
 
				+ * LNet creates a table of portals for each process during initialization.
			
 
				+ * This table has MAX_PORTALS entries and its size can't be dynamically
			
 
				+ * changed. A portal stays empty until the owning process starts to add
			
 
				+ * memory regions to it. A portal is sometimes called an index because
			
 
				+ * it's an entry in the portals table of a process.
			
 
				+ *
			
 
				+ * \see LNetMEAttach
			
 
				+ * @{ */
			
 
				+int LNetGetId(unsigned int index, lnet_process_id_t *id);
			
 
				+int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
			
 
				+void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle);
			
 
				+
			
 
				+/** @} lnet_addr */
			
 
				+
			
 
				+
			
 
				+/** \defgroup lnet_me Match entries
			
 
				+ *
			
 
				+ * A match entry (abbreviated as ME) describes a set of criteria to accept
			
 
				+ * incoming requests.
			
 
				+ *
			
 
				+ * A portal is essentially a match list plus a set of attributes. A match
			
 
				+ * list is a chain of MEs. Each ME includes a pointer to a memory descriptor
			
 
				+ * and a set of match criteria. The match criteria can be used to reject
			
 
				+ * incoming requests based on process ID or the match bits provided in the
			
 
				+ * request. MEs can be dynamically inserted into a match list by LNetMEAttach()
			
 
				+ * and LNetMEInsert(), and removed from its list by LNetMEUnlink().
			
 
				+ * @{ */
			
 
				+int LNetMEAttach(unsigned int      portal,
			
 
				+		 lnet_process_id_t match_id_in,
			
 
				+		 __u64	     match_bits_in,
			
 
				+		 __u64	     ignore_bits_in,
			
 
				+		 lnet_unlink_t     unlink_in,
			
 
				+		 lnet_ins_pos_t    pos_in,
			
 
				+		 lnet_handle_me_t *handle_out);
			
 
				+
			
 
				+int LNetMEInsert(lnet_handle_me_t  current_in,
			
 
				+		 lnet_process_id_t match_id_in,
			
 
				+		 __u64	     match_bits_in,
			
 
				+		 __u64	     ignore_bits_in,
			
 
				+		 lnet_unlink_t     unlink_in,
			
 
				+		 lnet_ins_pos_t    position_in,
			
 
				+		 lnet_handle_me_t *handle_out);
			
 
				+
			
 
				+int LNetMEUnlink(lnet_handle_me_t current_in);
			
 
				+/** @} lnet_me */
			
 
				+
			
 
				+/** \defgroup lnet_md Memory descriptors
			
 
				+ *
			
 
				+ * A memory descriptor contains information about a region of a user's
			
 
				+ * memory (either in kernel or user space) and optionally points to an
			
 
				+ * event queue where information about the operations performed on the
			
 
				+ * memory descriptor are recorded. Memory descriptor is abbreviated as
			
 
				+ * MD and can be used interchangeably with the memory region it describes.
			
 
				+ *
			
 
				+ * The LNet API provides two operations to create MDs: LNetMDAttach()
			
 
				+ * and LNetMDBind(); one operation to unlink and release the resources
			
 
				+ * associated with a MD: LNetMDUnlink().
			
 
				+ * @{ */
			
 
				+int LNetMDAttach(lnet_handle_me_t  current_in,
			
 
				+		 lnet_md_t	 md_in,
			
 
				+		 lnet_unlink_t     unlink_in,
			
 
				+		 lnet_handle_md_t *handle_out);
			
 
				+
			
 
				+int LNetMDBind(lnet_md_t	 md_in,
			
 
				+	       lnet_unlink_t     unlink_in,
			
 
				+	       lnet_handle_md_t *handle_out);
			
 
				+
			
 
				+int LNetMDUnlink(lnet_handle_md_t md_in);
			
 
				+/** @} lnet_md */
			
 
				+
			
 
				+/** \defgroup lnet_eq Events and event queues
			
 
				+ *
			
 
				+ * Event queues (abbreviated as EQ) are used to log operations performed on
			
 
				+ * local MDs. In particular, they signal the completion of a data transmission
			
 
				+ * into or out of a MD. They can also be used to hold acknowledgments for
			
 
				+ * completed PUT operations and indicate when a MD has been unlinked. Multiple
			
 
				+ * MDs can share a single EQ. An EQ may have an optional event handler
			
 
				+ * associated with it. If an event handler exists, it will be run for each
			
 
				+ * event that is deposited into the EQ.
			
 
				+ *
			
 
				+ * In addition to the lnet_handle_eq_t, the LNet API defines two types
			
 
				+ * associated with events: The ::lnet_event_kind_t defines the kinds of events
			
 
				+ * that can be stored in an EQ. The lnet_event_t defines a structure that
			
 
				+ * holds the information about with an event.
			
 
				+ *
			
 
				+ * There are five functions for dealing with EQs: LNetEQAlloc() is used to
			
 
				+ * create an EQ and allocate the resources needed, while LNetEQFree()
			
 
				+ * releases these resources and free the EQ. LNetEQGet() retrieves the next
			
 
				+ * event from an EQ, and LNetEQWait() can be used to block a process until
			
 
				+ * an EQ has at least one event. LNetEQPoll() can be used to test or wait
			
 
				+ * on multiple EQs.
			
 
				+ * @{ */
			
 
				+int LNetEQAlloc(unsigned int       count_in,
			
 
				+		lnet_eq_handler_t  handler,
			
 
				+		lnet_handle_eq_t  *handle_out);
			
 
				+
			
 
				+int LNetEQFree(lnet_handle_eq_t eventq_in);
			
 
				+
			
 
				+int LNetEQGet(lnet_handle_eq_t  eventq_in,
			
 
				+	      lnet_event_t     *event_out);
			
 
				+
			
 
				+
			
 
				+int LNetEQWait(lnet_handle_eq_t  eventq_in,
			
 
				+	       lnet_event_t     *event_out);
			
 
				+
			
 
				+int LNetEQPoll(lnet_handle_eq_t *eventqs_in,
			
 
				+	       int	       neq_in,
			
 
				+	       int	       timeout_ms,
			
 
				+	       lnet_event_t     *event_out,
			
 
				+	       int	      *which_eq_out);
			
 
				+/** @} lnet_eq */
			
 
				+
			
 
				+/** \defgroup lnet_data Data movement operations
			
 
				+ *
			
 
				+ * The LNet API provides two data movement operations: LNetPut()
			
 
				+ * and LNetGet().
			
 
				+ * @{ */
			
 
				+int LNetPut(lnet_nid_t	self,
			
 
				+	    lnet_handle_md_t  md_in,
			
 
				+	    lnet_ack_req_t    ack_req_in,
			
 
				+	    lnet_process_id_t target_in,
			
 
				+	    unsigned int      portal_in,
			
 
				+	    __u64	     match_bits_in,
			
 
				+	    unsigned int      offset_in,
			
 
				+	    __u64	     hdr_data_in);
			
 
				+
			
 
				+int LNetGet(lnet_nid_t	self,
			
 
				+	    lnet_handle_md_t  md_in,
			
 
				+	    lnet_process_id_t target_in,
			
 
				+	    unsigned int      portal_in,
			
 
				+	    __u64	     match_bits_in,
			
 
				+	    unsigned int      offset_in);
			
 
				+/** @} lnet_data */
			
 
				+
			
 
				+
			
 
				+/** \defgroup lnet_misc Miscellaneous operations.
			
 
				+ * Miscellaneous operations.
			
 
				+ * @{ */
			
 
				+
			
 
				+int LNetSetLazyPortal(int portal);
			
 
				+int LNetClearLazyPortal(int portal);
			
 
				+int LNetCtl(unsigned int cmd, void *arg);
			
 
				+int LNetSetAsync(lnet_process_id_t id, int nasync);
			
 
				+
			
 
				+/** @} lnet_misc */
			
 
				+
			
 
				+/** @} lnet */
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -0,0 +1,874 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/lib-lnet.h
			
 
				+ *
			
 
				+ * Top level include for library side routines
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_LIB_LNET_H__
			
 
				+#define __LNET_LIB_LNET_H__
			
 
				+
			
 
				+#include <linux/lnet/linux/lib-lnet.h>
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/types.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+
			
 
				+extern lnet_t  the_lnet;			/* THE network */
			
 
				+
			
 
				+#if  defined(LNET_USE_LIB_FREELIST)
			
 
				+/* 1 CPT, simplify implementation... */
			
 
				+# define LNET_CPT_MAX_BITS      0
			
 
				+
			
 
				+#else /* KERNEL and no freelist */
			
 
				+
			
 
				+# if (BITS_PER_LONG == 32)
			
 
				+/* 2 CPTs, allowing more CPTs might make us under memory pressure */
			
 
				+#  define LNET_CPT_MAX_BITS     1
			
 
				+
			
 
				+# else /* 64-bit system */
			
 
				+/*
			
 
				+ * 256 CPTs for thousands of CPUs, allowing more CPTs might make us
			
 
				+ * under risk of consuming all lh_cookie.
			
 
				+ */
			
 
				+#  define LNET_CPT_MAX_BITS     8
			
 
				+# endif /* BITS_PER_LONG == 32 */
			
 
				+#endif
			
 
				+
			
 
				+/* max allowed CPT number */
			
 
				+#define LNET_CPT_MAX	    (1 << LNET_CPT_MAX_BITS)
			
 
				+
			
 
				+#define LNET_CPT_NUMBER	 (the_lnet.ln_cpt_number)
			
 
				+#define LNET_CPT_BITS	   (the_lnet.ln_cpt_bits)
			
 
				+#define LNET_CPT_MASK	   ((1ULL << LNET_CPT_BITS) - 1)
			
 
				+
			
 
				+/** exclusive lock */
			
 
				+#define LNET_LOCK_EX	    CFS_PERCPT_LOCK_EX
			
 
				+
			
 
				+static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
			
 
				+{
			
 
				+	return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
			
 
				+		wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
			
 
				+}
			
 
				+
			
 
				+static inline int lnet_md_exhausted (lnet_libmd_t *md)
			
 
				+{
			
 
				+	return (md->md_threshold == 0 ||
			
 
				+		((md->md_options & LNET_MD_MAX_SIZE) != 0 &&
			
 
				+		 md->md_offset + md->md_max_size > md->md_length));
			
 
				+}
			
 
				+
			
 
				+static inline int lnet_md_unlinkable (lnet_libmd_t *md)
			
 
				+{
			
 
				+	/* Should unlink md when its refcount is 0 and either:
			
 
				+	 *  - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink,
			
 
				+	 *    in the latter case md may not be exhausted).
			
 
				+	 *  - auto unlink is on and md is exhausted.
			
 
				+	 */
			
 
				+	if (md->md_refcount != 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0)
			
 
				+		return 1;
			
 
				+
			
 
				+	return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
			
 
				+		lnet_md_exhausted(md));
			
 
				+}
			
 
				+
			
 
				+#define lnet_cpt_table()	(the_lnet.ln_cpt_table)
			
 
				+#define lnet_cpt_current()	cfs_cpt_current(the_lnet.ln_cpt_table, 1)
			
 
				+
			
 
				+static inline int
			
 
				+lnet_cpt_of_cookie(__u64 cookie)
			
 
				+{
			
 
				+	unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
			
 
				+
			
 
				+	/* LNET_CPT_NUMBER doesn't have to be power2, which means we can
			
 
				+	 * get illegal cpt from it's invalid cookie */
			
 
				+	return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_res_lock(int cpt)
			
 
				+{
			
 
				+	cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_res_unlock(int cpt)
			
 
				+{
			
 
				+	cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_res_lock_current(void)
			
 
				+{
			
 
				+	int cpt = lnet_cpt_current();
			
 
				+
			
 
				+	lnet_res_lock(cpt);
			
 
				+	return cpt;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_net_lock(int cpt)
			
 
				+{
			
 
				+	cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_net_unlock(int cpt)
			
 
				+{
			
 
				+	cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_net_lock_current(void)
			
 
				+{
			
 
				+	int cpt = lnet_cpt_current();
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+	return cpt;
			
 
				+}
			
 
				+
			
 
				+#define LNET_LOCK()		lnet_net_lock(LNET_LOCK_EX)
			
 
				+#define LNET_UNLOCK()		lnet_net_unlock(LNET_LOCK_EX)
			
 
				+
			
 
				+
			
 
				+#define lnet_ptl_lock(ptl)	spin_lock(&(ptl)->ptl_lock)
			
 
				+#define lnet_ptl_unlock(ptl)	spin_unlock(&(ptl)->ptl_lock)
			
 
				+#define lnet_eq_wait_lock()	spin_lock(&the_lnet.ln_eq_wait_lock)
			
 
				+#define lnet_eq_wait_unlock()	spin_unlock(&the_lnet.ln_eq_wait_lock)
			
 
				+#define lnet_ni_lock(ni)	spin_lock(&(ni)->ni_lock)
			
 
				+#define lnet_ni_unlock(ni)	spin_unlock(&(ni)->ni_lock)
			
 
				+#define LNET_MUTEX_LOCK(m)	mutex_lock(m)
			
 
				+#define LNET_MUTEX_UNLOCK(m)	mutex_unlock(m)
			
 
				+
			
 
				+
			
 
				+#define MAX_PORTALS     64
			
 
				+
			
 
				+/* these are only used by code with LNET_USE_LIB_FREELIST, but we still
			
 
				+ * exported them to !LNET_USE_LIB_FREELIST for easy implemetation */
			
 
				+#define LNET_FL_MAX_MES		2048
			
 
				+#define LNET_FL_MAX_MDS		2048
			
 
				+#define LNET_FL_MAX_EQS		512
			
 
				+#define LNET_FL_MAX_MSGS	2048    /* Outstanding messages */
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+
			
 
				+int lnet_freelist_init(lnet_freelist_t *fl, int n, int size);
			
 
				+void lnet_freelist_fini(lnet_freelist_t *fl);
			
 
				+
			
 
				+static inline void *
			
 
				+lnet_freelist_alloc (lnet_freelist_t *fl)
			
 
				+{
			
 
				+	/* ALWAYS called with liblock held */
			
 
				+	lnet_freeobj_t *o;
			
 
				+
			
 
				+	if (list_empty (&fl->fl_list))
			
 
				+		return (NULL);
			
 
				+
			
 
				+	o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list);
			
 
				+	list_del (&o->fo_list);
			
 
				+	return ((void *)&o->fo_contents);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_freelist_free (lnet_freelist_t *fl, void *obj)
			
 
				+{
			
 
				+	/* ALWAYS called with liblock held */
			
 
				+	lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents);
			
 
				+
			
 
				+	list_add (&o->fo_list, &fl->fl_list);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static inline lnet_eq_t *
			
 
				+lnet_eq_alloc (void)
			
 
				+{
			
 
				+	/* NEVER called with resource lock held */
			
 
				+	struct lnet_res_container *rec = &the_lnet.ln_eq_container;
			
 
				+	lnet_eq_t		  *eq;
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+
			
 
				+	lnet_res_lock(0);
			
 
				+	eq = (lnet_eq_t *)lnet_freelist_alloc(&rec->rec_freelist);
			
 
				+	lnet_res_unlock(0);
			
 
				+
			
 
				+	return eq;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_eq_free_locked(lnet_eq_t *eq)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	struct lnet_res_container *rec = &the_lnet.ln_eq_container;
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+	lnet_freelist_free(&rec->rec_freelist, eq);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_eq_free(lnet_eq_t *eq)
			
 
				+{
			
 
				+	lnet_res_lock(0);
			
 
				+	lnet_eq_free_locked(eq);
			
 
				+	lnet_res_unlock(0);
			
 
				+}
			
 
				+
			
 
				+static inline lnet_libmd_t *
			
 
				+lnet_md_alloc (lnet_md_t *umd)
			
 
				+{
			
 
				+	/* NEVER called with resource lock held */
			
 
				+	struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
			
 
				+	lnet_libmd_t		  *md;
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+
			
 
				+	lnet_res_lock(0);
			
 
				+	md = (lnet_libmd_t *)lnet_freelist_alloc(&rec->rec_freelist);
			
 
				+	lnet_res_unlock(0);
			
 
				+
			
 
				+	if (md != NULL)
			
 
				+		INIT_LIST_HEAD(&md->md_list);
			
 
				+
			
 
				+	return md;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_md_free_locked(lnet_libmd_t *md)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+	lnet_freelist_free(&rec->rec_freelist, md);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_md_free(lnet_libmd_t *md)
			
 
				+{
			
 
				+	lnet_res_lock(0);
			
 
				+	lnet_md_free_locked(md);
			
 
				+	lnet_res_unlock(0);
			
 
				+}
			
 
				+
			
 
				+static inline lnet_me_t *
			
 
				+lnet_me_alloc(void)
			
 
				+{
			
 
				+	/* NEVER called with resource lock held */
			
 
				+	struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
			
 
				+	lnet_me_t		  *me;
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+
			
 
				+	lnet_res_lock(0);
			
 
				+	me = (lnet_me_t *)lnet_freelist_alloc(&rec->rec_freelist);
			
 
				+	lnet_res_unlock(0);
			
 
				+
			
 
				+	return me;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_me_free_locked(lnet_me_t *me)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+	lnet_freelist_free(&rec->rec_freelist, me);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_me_free(lnet_me_t *me)
			
 
				+{
			
 
				+	lnet_res_lock(0);
			
 
				+	lnet_me_free_locked(me);
			
 
				+	lnet_res_unlock(0);
			
 
				+}
			
 
				+
			
 
				+static inline lnet_msg_t *
			
 
				+lnet_msg_alloc (void)
			
 
				+{
			
 
				+	/* NEVER called with network lock held */
			
 
				+	struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
			
 
				+	lnet_msg_t		  *msg;
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+	msg = (lnet_msg_t *)lnet_freelist_alloc(&msc->msc_freelist);
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	if (msg != NULL) {
			
 
				+		/* NULL pointers, clear flags etc */
			
 
				+		memset(msg, 0, sizeof(*msg));
			
 
				+	}
			
 
				+	return msg;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_msg_free_locked(lnet_msg_t *msg)
			
 
				+{
			
 
				+	/* ALWAYS called with network lock held */
			
 
				+	struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+	LASSERT(!msg->msg_onactivelist);
			
 
				+	lnet_freelist_free(&msc->msc_freelist, msg);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_msg_free (lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_net_lock(0);
			
 
				+	lnet_msg_free_locked(msg);
			
 
				+	lnet_net_unlock(0);
			
 
				+}
			
 
				+
			
 
				+#else /* !LNET_USE_LIB_FREELIST */
			
 
				+
			
 
				+static inline lnet_eq_t *
			
 
				+lnet_eq_alloc (void)
			
 
				+{
			
 
				+	/* NEVER called with liblock held */
			
 
				+	lnet_eq_t *eq;
			
 
				+
			
 
				+	LIBCFS_ALLOC(eq, sizeof(*eq));
			
 
				+	return (eq);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_eq_free(lnet_eq_t *eq)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	LIBCFS_FREE(eq, sizeof(*eq));
			
 
				+}
			
 
				+
			
 
				+static inline lnet_libmd_t *
			
 
				+lnet_md_alloc (lnet_md_t *umd)
			
 
				+{
			
 
				+	/* NEVER called with liblock held */
			
 
				+	lnet_libmd_t *md;
			
 
				+	unsigned int  size;
			
 
				+	unsigned int  niov;
			
 
				+
			
 
				+	if ((umd->options & LNET_MD_KIOV) != 0) {
			
 
				+		niov = umd->length;
			
 
				+		size = offsetof(lnet_libmd_t, md_iov.kiov[niov]);
			
 
				+	} else {
			
 
				+		niov = ((umd->options & LNET_MD_IOVEC) != 0) ?
			
 
				+		       umd->length : 1;
			
 
				+		size = offsetof(lnet_libmd_t, md_iov.iov[niov]);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(md, size);
			
 
				+
			
 
				+	if (md != NULL) {
			
 
				+		/* Set here in case of early free */
			
 
				+		md->md_options = umd->options;
			
 
				+		md->md_niov = niov;
			
 
				+		INIT_LIST_HEAD(&md->md_list);
			
 
				+	}
			
 
				+
			
 
				+	return (md);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_md_free(lnet_libmd_t *md)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	unsigned int  size;
			
 
				+
			
 
				+	if ((md->md_options & LNET_MD_KIOV) != 0)
			
 
				+		size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]);
			
 
				+	else
			
 
				+		size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]);
			
 
				+
			
 
				+	LIBCFS_FREE(md, size);
			
 
				+}
			
 
				+
			
 
				+static inline lnet_me_t *
			
 
				+lnet_me_alloc (void)
			
 
				+{
			
 
				+	/* NEVER called with liblock held */
			
 
				+	lnet_me_t *me;
			
 
				+
			
 
				+	LIBCFS_ALLOC(me, sizeof(*me));
			
 
				+	return (me);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_me_free(lnet_me_t *me)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	LIBCFS_FREE(me, sizeof(*me));
			
 
				+}
			
 
				+
			
 
				+static inline lnet_msg_t *
			
 
				+lnet_msg_alloc(void)
			
 
				+{
			
 
				+	/* NEVER called with liblock held */
			
 
				+	lnet_msg_t *msg;
			
 
				+
			
 
				+	LIBCFS_ALLOC(msg, sizeof(*msg));
			
 
				+
			
 
				+	/* no need to zero, LIBCFS_ALLOC does for us */
			
 
				+	return (msg);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_msg_free(lnet_msg_t *msg)
			
 
				+{
			
 
				+	/* ALWAYS called with network lock held */
			
 
				+	LASSERT(!msg->msg_onactivelist);
			
 
				+	LIBCFS_FREE(msg, sizeof(*msg));
			
 
				+}
			
 
				+
			
 
				+#define lnet_eq_free_locked(eq)		lnet_eq_free(eq)
			
 
				+#define lnet_md_free_locked(md)		lnet_md_free(md)
			
 
				+#define lnet_me_free_locked(me)		lnet_me_free(me)
			
 
				+#define lnet_msg_free_locked(msg)	lnet_msg_free(msg)
			
 
				+
			
 
				+#endif /* LNET_USE_LIB_FREELIST */
			
 
				+
			
 
				+lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
			
 
				+				     __u64 cookie);
			
 
				+void lnet_res_lh_initialize(struct lnet_res_container *rec,
			
 
				+			    lnet_libhandle_t *lh);
			
 
				+static inline void
			
 
				+lnet_res_lh_invalidate(lnet_libhandle_t *lh)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	/* NB: cookie is still useful, don't reset it */
			
 
				+	list_del(&lh->lh_hash_chain);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq)
			
 
				+{
			
 
				+	if (eq == NULL) {
			
 
				+		LNetInvalidateHandle(handle);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	handle->cookie = eq->eq_lh.lh_cookie;
			
 
				+}
			
 
				+
			
 
				+static inline lnet_eq_t *
			
 
				+lnet_handle2eq(lnet_handle_eq_t *handle)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	lnet_libhandle_t *lh;
			
 
				+
			
 
				+	lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
			
 
				+	if (lh == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return lh_entry(lh, lnet_eq_t, eq_lh);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md)
			
 
				+{
			
 
				+	handle->cookie = md->md_lh.lh_cookie;
			
 
				+}
			
 
				+
			
 
				+static inline lnet_libmd_t *
			
 
				+lnet_handle2md(lnet_handle_md_t *handle)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	lnet_libhandle_t *lh;
			
 
				+	int		 cpt;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(handle->cookie);
			
 
				+	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
			
 
				+				handle->cookie);
			
 
				+	if (lh == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return lh_entry(lh, lnet_libmd_t, md_lh);
			
 
				+}
			
 
				+
			
 
				+static inline lnet_libmd_t *
			
 
				+lnet_wire_handle2md(lnet_handle_wire_t *wh)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	lnet_libhandle_t *lh;
			
 
				+	int		 cpt;
			
 
				+
			
 
				+	if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
			
 
				+		return NULL;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
			
 
				+	lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
			
 
				+				wh->wh_object_cookie);
			
 
				+	if (lh == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return lh_entry(lh, lnet_libmd_t, md_lh);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me)
			
 
				+{
			
 
				+	handle->cookie = me->me_lh.lh_cookie;
			
 
				+}
			
 
				+
			
 
				+static inline lnet_me_t *
			
 
				+lnet_handle2me(lnet_handle_me_t *handle)
			
 
				+{
			
 
				+	/* ALWAYS called with resource lock held */
			
 
				+	lnet_libhandle_t *lh;
			
 
				+	int		 cpt;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(handle->cookie);
			
 
				+	lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
			
 
				+				handle->cookie);
			
 
				+	if (lh == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	return lh_entry(lh, lnet_me_t, me_lh);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_peer_addref_locked(lnet_peer_t *lp)
			
 
				+{
			
 
				+	LASSERT (lp->lp_refcount > 0);
			
 
				+	lp->lp_refcount++;
			
 
				+}
			
 
				+
			
 
				+extern void lnet_destroy_peer_locked(lnet_peer_t *lp);
			
 
				+
			
 
				+static inline void
			
 
				+lnet_peer_decref_locked(lnet_peer_t *lp)
			
 
				+{
			
 
				+	LASSERT (lp->lp_refcount > 0);
			
 
				+	lp->lp_refcount--;
			
 
				+	if (lp->lp_refcount == 0)
			
 
				+		lnet_destroy_peer_locked(lp);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_isrouter(lnet_peer_t *lp)
			
 
				+{
			
 
				+	return lp->lp_rtr_refcount != 0;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ni_addref_locked(lnet_ni_t *ni, int cpt)
			
 
				+{
			
 
				+	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
			
 
				+	LASSERT(*ni->ni_refs[cpt] >= 0);
			
 
				+
			
 
				+	(*ni->ni_refs[cpt])++;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ni_addref(lnet_ni_t *ni)
			
 
				+{
			
 
				+	lnet_net_lock(0);
			
 
				+	lnet_ni_addref_locked(ni, 0);
			
 
				+	lnet_net_unlock(0);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ni_decref_locked(lnet_ni_t *ni, int cpt)
			
 
				+{
			
 
				+	LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
			
 
				+	LASSERT(*ni->ni_refs[cpt] > 0);
			
 
				+
			
 
				+	(*ni->ni_refs[cpt])--;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ni_decref(lnet_ni_t *ni)
			
 
				+{
			
 
				+	lnet_net_lock(0);
			
 
				+	lnet_ni_decref_locked(ni, 0);
			
 
				+	lnet_net_unlock(0);
			
 
				+}
			
 
				+
			
 
				+void lnet_ni_free(lnet_ni_t *ni);
			
 
				+
			
 
				+static inline int
			
 
				+lnet_nid2peerhash(lnet_nid_t nid)
			
 
				+{
			
 
				+	return cfs_hash_long(nid, LNET_PEER_HASH_BITS);
			
 
				+}
			
 
				+
			
 
				+static inline struct list_head *
			
 
				+lnet_net2rnethash(__u32 net)
			
 
				+{
			
 
				+	return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
			
 
				+		LNET_NETTYP(net)) &
			
 
				+		((1U << the_lnet.ln_remote_nets_hbits) - 1)];
			
 
				+}
			
 
				+
			
 
				+extern lnd_t the_lolnd;
			
 
				+
			
 
				+
			
 
				+extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
			
 
				+extern int lnet_cpt_of_nid(lnet_nid_t nid);
			
 
				+extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
			
 
				+extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
			
 
				+extern lnet_ni_t *lnet_net2ni(__u32 net);
			
 
				+
			
 
				+int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when);
			
 
				+void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when);
			
 
				+int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid);
			
 
				+int lnet_check_routes(void);
			
 
				+int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
			
 
				+void lnet_destroy_routes(void);
			
 
				+int lnet_get_route(int idx, __u32 *net, __u32 *hops,
			
 
				+		   lnet_nid_t *gateway, __u32 *alive);
			
 
				+void lnet_proc_init(void);
			
 
				+void lnet_proc_fini(void);
			
 
				+int  lnet_rtrpools_alloc(int im_a_router);
			
 
				+void lnet_rtrpools_free(void);
			
 
				+lnet_remotenet_t *lnet_find_net_locked (__u32 net);
			
 
				+
			
 
				+int lnet_islocalnid(lnet_nid_t nid);
			
 
				+int lnet_islocalnet(__u32 net);
			
 
				+
			
 
				+void lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
			
 
				+			unsigned int offset, unsigned int mlen);
			
 
				+void lnet_msg_detach_md(lnet_msg_t *msg, int status);
			
 
				+void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev);
			
 
				+void lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type);
			
 
				+void lnet_msg_commit(lnet_msg_t *msg, int cpt);
			
 
				+void lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status);
			
 
				+
			
 
				+void lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev);
			
 
				+void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
			
 
				+		    unsigned int offset, unsigned int len);
			
 
				+int lnet_send(lnet_nid_t nid, lnet_msg_t *msg, lnet_nid_t rtr_nid);
			
 
				+void lnet_return_tx_credits_locked(lnet_msg_t *msg);
			
 
				+void lnet_return_rx_credits_locked(lnet_msg_t *msg);
			
 
				+
			
 
				+/* portals functions */
			
 
				+/* portals attributes */
			
 
				+static inline int
			
 
				+lnet_ptl_is_lazy(lnet_portal_t *ptl)
			
 
				+{
			
 
				+	return !!(ptl->ptl_options & LNET_PTL_LAZY);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_ptl_is_unique(lnet_portal_t *ptl)
			
 
				+{
			
 
				+	return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_ptl_is_wildcard(lnet_portal_t *ptl)
			
 
				+{
			
 
				+	return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ptl_setopt(lnet_portal_t *ptl, int opt)
			
 
				+{
			
 
				+	ptl->ptl_options |= opt;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt)
			
 
				+{
			
 
				+	ptl->ptl_options &= ~opt;
			
 
				+}
			
 
				+
			
 
				+/* match-table functions */
			
 
				+struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
			
 
				+			       lnet_process_id_t id, __u64 mbits);
			
 
				+struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
			
 
				+					   lnet_process_id_t id, __u64 mbits,
			
 
				+					   __u64 ignore_bits,
			
 
				+					   lnet_ins_pos_t pos);
			
 
				+int lnet_mt_match_md(struct lnet_match_table *mtable,
			
 
				+		     struct lnet_match_info *info, struct lnet_msg *msg);
			
 
				+
			
 
				+/* portals match/attach functions */
			
 
				+void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
			
 
				+			struct list_head *matches, struct list_head *drops);
			
 
				+void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md);
			
 
				+int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
			
 
				+
			
 
				+/* initialized and finalize portals */
			
 
				+int lnet_portals_create(void);
			
 
				+void lnet_portals_destroy(void);
			
 
				+
			
 
				+/* message functions */
			
 
				+int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
			
 
				+		lnet_nid_t fromnid, void *private, int rdma_req);
			
 
				+void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
			
 
				+	       unsigned int offset, unsigned int mlen, unsigned int rlen);
			
 
				+lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg);
			
 
				+void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len);
			
 
				+void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc);
			
 
				+void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
			
 
				+void lnet_recv_delayed_msg_list(struct list_head *head);
			
 
				+
			
 
				+int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
			
 
				+void lnet_msg_container_cleanup(struct lnet_msg_container *container);
			
 
				+void lnet_msg_containers_destroy(void);
			
 
				+int lnet_msg_containers_create(void);
			
 
				+
			
 
				+char *lnet_msgtyp2str (int type);
			
 
				+void lnet_print_hdr (lnet_hdr_t * hdr);
			
 
				+int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
			
 
				+
			
 
				+void lnet_counters_get(lnet_counters_t *counters);
			
 
				+void lnet_counters_reset(void);
			
 
				+
			
 
				+unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov);
			
 
				+int lnet_extract_iov (int dst_niov, struct iovec *dst,
			
 
				+		      int src_niov, struct iovec *src,
			
 
				+		      unsigned int offset, unsigned int len);
			
 
				+
			
 
				+unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov);
			
 
				+int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
			
 
				+		      int src_niov, lnet_kiov_t *src,
			
 
				+		      unsigned int offset, unsigned int len);
			
 
				+
			
 
				+void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov,
			
 
				+			unsigned int doffset,
			
 
				+			unsigned int nsiov, struct iovec *siov,
			
 
				+			unsigned int soffset, unsigned int nob);
			
 
				+void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov,
			
 
				+			 unsigned int iovoffset,
			
 
				+			 unsigned int nkiov, lnet_kiov_t *kiov,
			
 
				+			 unsigned int kiovoffset, unsigned int nob);
			
 
				+void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov,
			
 
				+			 unsigned int kiovoffset,
			
 
				+			 unsigned int niov, struct iovec *iov,
			
 
				+			 unsigned int iovoffset, unsigned int nob);
			
 
				+void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov,
			
 
				+			  unsigned int doffset,
			
 
				+			  unsigned int nskiov, lnet_kiov_t *skiov,
			
 
				+			  unsigned int soffset, unsigned int nob);
			
 
				+
			
 
				+static inline void
			
 
				+lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
			
 
				+		   unsigned int nsiov, struct iovec *siov, unsigned int soffset,
			
 
				+		   unsigned int nob)
			
 
				+{
			
 
				+	struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
			
 
				+
			
 
				+	lnet_copy_iov2iov(1, &diov, doffset,
			
 
				+			  nsiov, siov, soffset, nob);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
			
 
				+		    unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset,
			
 
				+		    unsigned int nob)
			
 
				+{
			
 
				+	struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
			
 
				+
			
 
				+	lnet_copy_kiov2iov(1, &diov, doffset,
			
 
				+			   nsiov, skiov, soffset, nob);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset,
			
 
				+		   int slen, void *src, unsigned int soffset, unsigned int nob)
			
 
				+{
			
 
				+	struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
			
 
				+	lnet_copy_iov2iov(ndiov, diov, doffset,
			
 
				+			  1, &siov, soffset, nob);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset,
			
 
				+		    int slen, void *src, unsigned int soffset, unsigned int nob)
			
 
				+{
			
 
				+	struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
			
 
				+	lnet_copy_iov2kiov(ndiov, dkiov, doffset,
			
 
				+			   1, &siov, soffset, nob);
			
 
				+}
			
 
				+
			
 
				+void lnet_me_unlink(lnet_me_t *me);
			
 
				+
			
 
				+void lnet_md_unlink(lnet_libmd_t *md);
			
 
				+void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
			
 
				+
			
 
				+void lnet_register_lnd(lnd_t *lnd);
			
 
				+void lnet_unregister_lnd(lnd_t *lnd);
			
 
				+int lnet_set_ip_niaddr (lnet_ni_t *ni);
			
 
				+
			
 
				+int lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
			
 
				+		 __u32 local_ip, __u32 peer_ip, int peer_port);
			
 
				+void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
			
 
				+				__u32 peer_ip, int port);
			
 
				+int lnet_count_acceptor_nis(void);
			
 
				+int lnet_acceptor_timeout(void);
			
 
				+int lnet_acceptor_port(void);
			
 
				+
			
 
				+int lnet_count_acceptor_nis(void);
			
 
				+int lnet_acceptor_port(void);
			
 
				+
			
 
				+int lnet_acceptor_start(void);
			
 
				+void lnet_acceptor_stop(void);
			
 
				+
			
 
				+void lnet_get_tunables(void);
			
 
				+int lnet_peers_start_down(void);
			
 
				+int lnet_peer_buffer_credits(lnet_ni_t *ni);
			
 
				+
			
 
				+int lnet_router_checker_start(void);
			
 
				+void lnet_router_checker_stop(void);
			
 
				+void lnet_swap_pinginfo(lnet_ping_info_t *info);
			
 
				+
			
 
				+int lnet_ping_target_init(void);
			
 
				+void lnet_ping_target_fini(void);
			
 
				+int lnet_ping(lnet_process_id_t id, int timeout_ms,
			
 
				+	      lnet_process_id_t *ids, int n_ids);
			
 
				+
			
 
				+int lnet_parse_ip2nets (char **networksp, char *ip2nets);
			
 
				+int lnet_parse_routes (char *route_str, int *im_a_router);
			
 
				+int lnet_parse_networks (struct list_head *nilist, char *networks);
			
 
				+
			
 
				+int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
			
 
				+lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
			
 
				+				   lnet_nid_t nid);
			
 
				+void lnet_peer_tables_cleanup(void);
			
 
				+void lnet_peer_tables_destroy(void);
			
 
				+int lnet_peer_tables_create(void);
			
 
				+void lnet_debug_peer(lnet_nid_t nid);
			
 
				+
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -0,0 +1,765 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/lib-types.h
			
 
				+ *
			
 
				+ * Types used by the library side routines that do not need to be
			
 
				+ * exposed to the user application
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_LIB_TYPES_H__
			
 
				+#define __LNET_LIB_TYPES_H__
			
 
				+
			
 
				+#include <linux/lnet/linux/lib-types.h>
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/lnet/types.h>
			
 
				+
			
 
				+#define WIRE_ATTR       __attribute__((packed))
			
 
				+
			
 
				+/* Packed version of lnet_process_id_t to transfer via network */
			
 
				+typedef struct {
			
 
				+	lnet_nid_t nid;
			
 
				+	lnet_pid_t pid;   /* node id / process id */
			
 
				+} WIRE_ATTR lnet_process_id_packed_t;
			
 
				+
			
 
				+/* The wire handle's interface cookie only matches one network interface in
			
 
				+ * one epoch (i.e. new cookie when the interface restarts or the node
			
 
				+ * reboots).  The object cookie only matches one object on that interface
			
 
				+ * during that object's lifetime (i.e. no cookie re-use). */
			
 
				+typedef struct {
			
 
				+	__u64 wh_interface_cookie;
			
 
				+	__u64 wh_object_cookie;
			
 
				+} WIRE_ATTR lnet_handle_wire_t;
			
 
				+
			
 
				+typedef enum {
			
 
				+	LNET_MSG_ACK = 0,
			
 
				+	LNET_MSG_PUT,
			
 
				+	LNET_MSG_GET,
			
 
				+	LNET_MSG_REPLY,
			
 
				+	LNET_MSG_HELLO,
			
 
				+} lnet_msg_type_t;
			
 
				+
			
 
				+/* The variant fields of the portals message header are aligned on an 8
			
 
				+ * byte boundary in the message header.  Note that all types used in these
			
 
				+ * wire structs MUST be fixed size and the smaller types are placed at the
			
 
				+ * end. */
			
 
				+typedef struct lnet_ack {
			
 
				+	lnet_handle_wire_t  dst_wmd;
			
 
				+	__u64	       match_bits;
			
 
				+	__u32	       mlength;
			
 
				+} WIRE_ATTR lnet_ack_t;
			
 
				+
			
 
				+typedef struct lnet_put {
			
 
				+	lnet_handle_wire_t  ack_wmd;
			
 
				+	__u64	       match_bits;
			
 
				+	__u64	       hdr_data;
			
 
				+	__u32	       ptl_index;
			
 
				+	__u32	       offset;
			
 
				+} WIRE_ATTR lnet_put_t;
			
 
				+
			
 
				+typedef struct lnet_get {
			
 
				+	lnet_handle_wire_t  return_wmd;
			
 
				+	__u64	       match_bits;
			
 
				+	__u32	       ptl_index;
			
 
				+	__u32	       src_offset;
			
 
				+	__u32	       sink_length;
			
 
				+} WIRE_ATTR lnet_get_t;
			
 
				+
			
 
				+typedef struct lnet_reply {
			
 
				+	lnet_handle_wire_t  dst_wmd;
			
 
				+} WIRE_ATTR lnet_reply_t;
			
 
				+
			
 
				+typedef struct lnet_hello {
			
 
				+	__u64	      incarnation;
			
 
				+	__u32	      type;
			
 
				+} WIRE_ATTR lnet_hello_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	lnet_nid_t	  dest_nid;
			
 
				+	lnet_nid_t	  src_nid;
			
 
				+	lnet_pid_t	  dest_pid;
			
 
				+	lnet_pid_t	  src_pid;
			
 
				+	__u32	       type;	       /* lnet_msg_type_t */
			
 
				+	__u32	       payload_length;     /* payload data to follow */
			
 
				+	/*<------__u64 aligned------->*/
			
 
				+	union {
			
 
				+		lnet_ack_t   ack;
			
 
				+		lnet_put_t   put;
			
 
				+		lnet_get_t   get;
			
 
				+		lnet_reply_t reply;
			
 
				+		lnet_hello_t hello;
			
 
				+	} msg;
			
 
				+} WIRE_ATTR lnet_hdr_t;
			
 
				+
			
 
				+/* A HELLO message contains a magic number and protocol version
			
 
				+ * code in the header's dest_nid, the peer's NID in the src_nid, and
			
 
				+ * LNET_MSG_HELLO in the type field.  All other common fields are zero
			
 
				+ * (including payload_size; i.e. no payload).
			
 
				+ * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
			
 
				+ * running the same protocol and to find out its NID. These LNDs should
			
 
				+ * exchange HELLO messages when a connection is first established.  Individual
			
 
				+ * LNDs can put whatever else they fancy in lnet_hdr_t::msg.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	__u32   magic;			  /* LNET_PROTO_TCP_MAGIC */
			
 
				+	__u16   version_major;		  /* increment on incompatible change */
			
 
				+	__u16   version_minor;		  /* increment on compatible change */
			
 
				+} WIRE_ATTR lnet_magicversion_t;
			
 
				+
			
 
				+/* PROTO MAGIC for LNDs */
			
 
				+#define LNET_PROTO_IB_MAGIC		 0x0be91b91
			
 
				+#define LNET_PROTO_RA_MAGIC		 0x0be91b92
			
 
				+#define LNET_PROTO_QSW_MAGIC		0x0be91b93
			
 
				+#define LNET_PROTO_GNI_MAGIC		0xb00fbabe /* ask Kim */
			
 
				+#define LNET_PROTO_TCP_MAGIC		0xeebc0ded
			
 
				+#define LNET_PROTO_PTL_MAGIC		0x50746C4E /* 'PtlN' unique magic */
			
 
				+#define LNET_PROTO_MX_MAGIC		 0x4d583130 /* 'MX10'! */
			
 
				+#define LNET_PROTO_ACCEPTOR_MAGIC	   0xacce7100
			
 
				+#define LNET_PROTO_PING_MAGIC	       0x70696E67 /* 'ping' */
			
 
				+
			
 
				+/* Placeholder for a future "unified" protocol across all LNDs */
			
 
				+/* Current LNDs that receive a request with this magic will respond with a
			
 
				+ * "stub" reply using their current protocol */
			
 
				+#define LNET_PROTO_MAGIC		    0x45726963 /* ! */
			
 
				+
			
 
				+
			
 
				+#define LNET_PROTO_TCP_VERSION_MAJOR	1
			
 
				+#define LNET_PROTO_TCP_VERSION_MINOR	0
			
 
				+
			
 
				+/* Acceptor connection request */
			
 
				+typedef struct {
			
 
				+	__u32       acr_magic;		  /* PTL_ACCEPTOR_PROTO_MAGIC */
			
 
				+	__u32       acr_version;		/* protocol version */
			
 
				+	__u64       acr_nid;		    /* target NID */
			
 
				+} WIRE_ATTR lnet_acceptor_connreq_t;
			
 
				+
			
 
				+#define LNET_PROTO_ACCEPTOR_VERSION       1
			
 
				+
			
 
				+/* forward refs */
			
 
				+struct lnet_libmd;
			
 
				+
			
 
				+typedef struct lnet_msg {
			
 
				+	struct list_head	    msg_activelist;
			
 
				+	struct list_head	    msg_list;	   /* Q for credits/MD */
			
 
				+
			
 
				+	lnet_process_id_t     msg_target;
			
 
				+	/* where is it from, it's only for building event */
			
 
				+	lnet_nid_t		msg_from;
			
 
				+	__u32			msg_type;
			
 
				+
			
 
				+	/* commited for sending */
			
 
				+	unsigned int		msg_tx_committed:1;
			
 
				+	/* CPT # this message committed for sending */
			
 
				+	unsigned int		msg_tx_cpt:15;
			
 
				+	/* commited for receiving */
			
 
				+	unsigned int		msg_rx_committed:1;
			
 
				+	/* CPT # this message committed for receiving */
			
 
				+	unsigned int		msg_rx_cpt:15;
			
 
				+	/* queued for tx credit */
			
 
				+	unsigned int		msg_tx_delayed:1;
			
 
				+	/* queued for RX buffer */
			
 
				+	unsigned int		msg_rx_delayed:1;
			
 
				+	/* ready for pending on RX delay list */
			
 
				+	unsigned int		msg_rx_ready_delay:1;
			
 
				+
			
 
				+	unsigned int	  msg_vmflush:1;      /* VM trying to free memory */
			
 
				+	unsigned int	  msg_target_is_router:1; /* sending to a router */
			
 
				+	unsigned int	  msg_routing:1;      /* being forwarded */
			
 
				+	unsigned int	  msg_ack:1;	  /* ack on finalize (PUT) */
			
 
				+	unsigned int	  msg_sending:1;      /* outgoing message */
			
 
				+	unsigned int	  msg_receiving:1;    /* being received */
			
 
				+	unsigned int	  msg_txcredit:1;     /* taken an NI send credit */
			
 
				+	unsigned int	  msg_peertxcredit:1; /* taken a peer send credit */
			
 
				+	unsigned int	  msg_rtrcredit:1;    /* taken a globel router credit */
			
 
				+	unsigned int	  msg_peerrtrcredit:1; /* taken a peer router credit */
			
 
				+	unsigned int	  msg_onactivelist:1; /* on the activelist */
			
 
				+
			
 
				+	struct lnet_peer     *msg_txpeer;	 /* peer I'm sending to */
			
 
				+	struct lnet_peer     *msg_rxpeer;	 /* peer I received from */
			
 
				+
			
 
				+	void		 *msg_private;
			
 
				+	struct lnet_libmd    *msg_md;
			
 
				+
			
 
				+	unsigned int	  msg_len;
			
 
				+	unsigned int	  msg_wanted;
			
 
				+	unsigned int	  msg_offset;
			
 
				+	unsigned int	  msg_niov;
			
 
				+	struct iovec	 *msg_iov;
			
 
				+	lnet_kiov_t	  *msg_kiov;
			
 
				+
			
 
				+	lnet_event_t	  msg_ev;
			
 
				+	lnet_hdr_t	    msg_hdr;
			
 
				+} lnet_msg_t;
			
 
				+
			
 
				+
			
 
				+typedef struct lnet_libhandle {
			
 
				+	struct list_head	    lh_hash_chain;
			
 
				+	__u64		 lh_cookie;
			
 
				+} lnet_libhandle_t;
			
 
				+
			
 
				+#define lh_entry(ptr, type, member) \
			
 
				+	((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
			
 
				+
			
 
				+typedef struct lnet_eq {
			
 
				+	struct list_head		eq_list;
			
 
				+	lnet_libhandle_t	eq_lh;
			
 
				+	lnet_seq_t		eq_enq_seq;
			
 
				+	lnet_seq_t		eq_deq_seq;
			
 
				+	unsigned int		eq_size;
			
 
				+	lnet_eq_handler_t	eq_callback;
			
 
				+	lnet_event_t		*eq_events;
			
 
				+	int			**eq_refs;	/* percpt refcount for EQ */
			
 
				+} lnet_eq_t;
			
 
				+
			
 
				+typedef struct lnet_me {
			
 
				+	struct list_head	     me_list;
			
 
				+	lnet_libhandle_t       me_lh;
			
 
				+	lnet_process_id_t      me_match_id;
			
 
				+	unsigned int	   me_portal;
			
 
				+	unsigned int	   me_pos;		/* hash offset in mt_hash */
			
 
				+	__u64		  me_match_bits;
			
 
				+	__u64		  me_ignore_bits;
			
 
				+	lnet_unlink_t	  me_unlink;
			
 
				+	struct lnet_libmd     *me_md;
			
 
				+} lnet_me_t;
			
 
				+
			
 
				+typedef struct lnet_libmd {
			
 
				+	struct list_head	    md_list;
			
 
				+	lnet_libhandle_t      md_lh;
			
 
				+	lnet_me_t	    *md_me;
			
 
				+	char		 *md_start;
			
 
				+	unsigned int	  md_offset;
			
 
				+	unsigned int	  md_length;
			
 
				+	unsigned int	  md_max_size;
			
 
				+	int		   md_threshold;
			
 
				+	int		   md_refcount;
			
 
				+	unsigned int	  md_options;
			
 
				+	unsigned int	  md_flags;
			
 
				+	void		 *md_user_ptr;
			
 
				+	lnet_eq_t	    *md_eq;
			
 
				+	unsigned int	  md_niov;		/* # frags */
			
 
				+	union {
			
 
				+		struct iovec  iov[LNET_MAX_IOV];
			
 
				+		lnet_kiov_t   kiov[LNET_MAX_IOV];
			
 
				+	} md_iov;
			
 
				+} lnet_libmd_t;
			
 
				+
			
 
				+#define LNET_MD_FLAG_ZOMBIE	   (1 << 0)
			
 
				+#define LNET_MD_FLAG_AUTO_UNLINK      (1 << 1)
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+typedef struct
			
 
				+{
			
 
				+	void		  *fl_objs;	  /* single contiguous array of objects */
			
 
				+	int		    fl_nobjs;	 /* the number of them */
			
 
				+	int		    fl_objsize;       /* the size (including overhead) of each of them */
			
 
				+	struct list_head	     fl_list;	  /* where they are enqueued */
			
 
				+} lnet_freelist_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	struct list_head	     fo_list;	     /* enqueue on fl_list */
			
 
				+	void		  *fo_contents;	 /* aligned contents */
			
 
				+} lnet_freeobj_t;
			
 
				+#endif
			
 
				+
			
 
				+typedef struct {
			
 
				+	/* info about peers we are trying to fail */
			
 
				+	struct list_head	     tp_list;	     /* ln_test_peers */
			
 
				+	lnet_nid_t	     tp_nid;	      /* matching nid */
			
 
				+	unsigned int	   tp_threshold;	/* # failures to simulate */
			
 
				+} lnet_test_peer_t;
			
 
				+
			
 
				+#define LNET_COOKIE_TYPE_MD    1
			
 
				+#define LNET_COOKIE_TYPE_ME    2
			
 
				+#define LNET_COOKIE_TYPE_EQ    3
			
 
				+#define LNET_COOKIE_TYPE_BITS  2
			
 
				+#define LNET_COOKIE_MASK	((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
			
 
				+
			
 
				+struct lnet_ni;				  /* forward ref */
			
 
				+
			
 
				+typedef struct lnet_lnd
			
 
				+{
			
 
				+	/* fields managed by portals */
			
 
				+	struct list_head	    lnd_list;	     /* stash in the LND table */
			
 
				+	int		   lnd_refcount;	 /* # active instances */
			
 
				+
			
 
				+	/* fields initialised by the LND */
			
 
				+	unsigned int	  lnd_type;
			
 
				+
			
 
				+	int  (*lnd_startup) (struct lnet_ni *ni);
			
 
				+	void (*lnd_shutdown) (struct lnet_ni *ni);
			
 
				+	int  (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
			
 
				+
			
 
				+	/* In data movement APIs below, payload buffers are described as a set
			
 
				+	 * of 'niov' fragments which are...
			
 
				+	 * EITHER
			
 
				+	 *    in virtual memory (struct iovec *iov != NULL)
			
 
				+	 * OR
			
 
				+	 *    in pages (kernel only: plt_kiov_t *kiov != NULL).
			
 
				+	 * The LND may NOT overwrite these fragment descriptors.
			
 
				+	 * An 'offset' and may specify a byte offset within the set of
			
 
				+	 * fragments to start from
			
 
				+	 */
			
 
				+
			
 
				+	/* Start sending a preformatted message.  'private' is NULL for PUT and
			
 
				+	 * GET messages; otherwise this is a response to an incoming message
			
 
				+	 * and 'private' is the 'private' passed to lnet_parse().  Return
			
 
				+	 * non-zero for immediate failure, otherwise complete later with
			
 
				+	 * lnet_finalize() */
			
 
				+	int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg);
			
 
				+
			
 
				+	/* Start receiving 'mlen' bytes of payload data, skipping the following
			
 
				+	 * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
			
 
				+	 * lnet_parse().  Return non-zero for immedaite failure, otherwise
			
 
				+	 * complete later with lnet_finalize().  This also gives back a receive
			
 
				+	 * credit if the LND does flow control. */
			
 
				+	int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
			
 
				+			int delayed, unsigned int niov,
			
 
				+			struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+			unsigned int offset, unsigned int mlen, unsigned int rlen);
			
 
				+
			
 
				+	/* lnet_parse() has had to delay processing of this message
			
 
				+	 * (e.g. waiting for a forwarding buffer or send credits).  Give the
			
 
				+	 * LND a chance to free urgently needed resources.  If called, return 0
			
 
				+	 * for success and do NOT give back a receive credit; that has to wait
			
 
				+	 * until lnd_recv() gets called.  On failure return < 0 and
			
 
				+	 * release resources; lnd_recv() will not be called. */
			
 
				+	int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
			
 
				+			      void **new_privatep);
			
 
				+
			
 
				+	/* notification of peer health */
			
 
				+	void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
			
 
				+
			
 
				+	/* query of peer aliveness */
			
 
				+	void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when);
			
 
				+
			
 
				+	/* accept a new connection */
			
 
				+	int (*lnd_accept)(struct lnet_ni *ni, socket_t *sock);
			
 
				+
			
 
				+} lnd_t;
			
 
				+
			
 
				+#define LNET_NI_STATUS_UP      0x15aac0de
			
 
				+#define LNET_NI_STATUS_DOWN    0xdeadface
			
 
				+#define LNET_NI_STATUS_INVALID 0x00000000
			
 
				+typedef struct {
			
 
				+	lnet_nid_t ns_nid;
			
 
				+	__u32      ns_status;
			
 
				+	__u32      ns_unused;
			
 
				+} WIRE_ATTR lnet_ni_status_t;
			
 
				+
			
 
				+struct lnet_tx_queue {
			
 
				+	int			tq_credits;	/* # tx credits free */
			
 
				+	int			tq_credits_min;	/* lowest it's been */
			
 
				+	int			tq_credits_max;	/* total # tx credits */
			
 
				+	struct list_head		tq_delayed;	/* delayed TXs */
			
 
				+};
			
 
				+
			
 
				+#define LNET_MAX_INTERFACES   16
			
 
				+
			
 
				+typedef struct lnet_ni {
			
 
				+	spinlock_t		ni_lock;
			
 
				+	struct list_head		ni_list;	/* chain on ln_nis */
			
 
				+	struct list_head		ni_cptlist;	/* chain on ln_nis_cpt */
			
 
				+	int			ni_maxtxcredits; /* # tx credits  */
			
 
				+	/* # per-peer send credits */
			
 
				+	int			ni_peertxcredits;
			
 
				+	/* # per-peer router buffer credits */
			
 
				+	int			ni_peerrtrcredits;
			
 
				+	/* seconds to consider peer dead */
			
 
				+	int			ni_peertimeout;
			
 
				+	int			ni_ncpts;	/* number of CPTs */
			
 
				+	__u32			*ni_cpts;	/* bond NI on some CPTs */
			
 
				+	lnet_nid_t		ni_nid;		/* interface's NID */
			
 
				+	void			*ni_data;	/* instance-specific data */
			
 
				+	lnd_t			*ni_lnd;	/* procedural interface */
			
 
				+	struct lnet_tx_queue	**ni_tx_queues;	/* percpt TX queues */
			
 
				+	int			**ni_refs;	/* percpt reference count */
			
 
				+	long			ni_last_alive;	/* when I was last alive */
			
 
				+	lnet_ni_status_t	*ni_status;	/* my health status */
			
 
				+	/* equivalent interfaces to use */
			
 
				+	char			*ni_interfaces[LNET_MAX_INTERFACES];
			
 
				+} lnet_ni_t;
			
 
				+
			
 
				+#define LNET_PROTO_PING_MATCHBITS	0x8000000000000000LL
			
 
				+
			
 
				+/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
			
 
				+ * of old LNet, so there shouldn't be any compatibility issue */
			
 
				+#define LNET_PING_FEAT_INVAL		(0)		/* no feature */
			
 
				+#define LNET_PING_FEAT_BASE		(1 << 0)	/* just a ping */
			
 
				+#define LNET_PING_FEAT_NI_STATUS	(1 << 1)	/* return NI status */
			
 
				+
			
 
				+#define LNET_PING_FEAT_MASK		(LNET_PING_FEAT_BASE | \
			
 
				+					 LNET_PING_FEAT_NI_STATUS)
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32			pi_magic;
			
 
				+	__u32			pi_features;
			
 
				+	lnet_pid_t		pi_pid;
			
 
				+	__u32			pi_nnis;
			
 
				+	lnet_ni_status_t	pi_ni[0];
			
 
				+} WIRE_ATTR lnet_ping_info_t;
			
 
				+
			
 
				+/* router checker data, per router */
			
 
				+#define LNET_MAX_RTR_NIS   16
			
 
				+#define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS])
			
 
				+typedef struct {
			
 
				+	/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
			
 
				+	struct list_head		rcd_list;
			
 
				+	lnet_handle_md_t	rcd_mdh;	/* ping buffer MD */
			
 
				+	struct lnet_peer	*rcd_gateway;	/* reference to gateway */
			
 
				+	lnet_ping_info_t	*rcd_pinginfo;	/* ping buffer */
			
 
				+} lnet_rc_data_t;
			
 
				+
			
 
				+typedef struct lnet_peer {
			
 
				+	struct list_head	lp_hashlist;	  /* chain on peer hash */
			
 
				+	struct list_head	lp_txq;	       /* messages blocking for tx credits */
			
 
				+	struct list_head	lp_rtrq;	      /* messages blocking for router credits */
			
 
				+	struct list_head	lp_rtr_list;	  /* chain on router list */
			
 
				+	int	       lp_txcredits;	 /* # tx credits available */
			
 
				+	int	       lp_mintxcredits;      /* low water mark */
			
 
				+	int	       lp_rtrcredits;	/* # router credits */
			
 
				+	int	       lp_minrtrcredits;     /* low water mark */
			
 
				+	unsigned int      lp_alive:1;	   /* alive/dead? */
			
 
				+	unsigned int      lp_notify:1;	  /* notification outstanding? */
			
 
				+	unsigned int      lp_notifylnd:1;       /* outstanding notification for LND? */
			
 
				+	unsigned int      lp_notifying:1;       /* some thread is handling notification */
			
 
				+	unsigned int      lp_ping_notsent;      /* SEND event outstanding from ping */
			
 
				+	int	       lp_alive_count;       /* # times router went dead<->alive */
			
 
				+	long	      lp_txqnob;	    /* bytes queued for sending */
			
 
				+	cfs_time_t	lp_timestamp;	 /* time of last aliveness news */
			
 
				+	cfs_time_t	lp_ping_timestamp;    /* time of last ping attempt */
			
 
				+	cfs_time_t	lp_ping_deadline;     /* != 0 if ping reply expected */
			
 
				+	cfs_time_t	lp_last_alive;	/* when I was last alive */
			
 
				+	cfs_time_t	lp_last_query;	/* when lp_ni was queried last time */
			
 
				+	lnet_ni_t	*lp_ni;		/* interface peer is on */
			
 
				+	lnet_nid_t	lp_nid;	       /* peer's NID */
			
 
				+	int	       lp_refcount;	  /* # refs */
			
 
				+	int			lp_cpt;		/* CPT this peer attached on */
			
 
				+	/* # refs from lnet_route_t::lr_gateway */
			
 
				+	int			lp_rtr_refcount;
			
 
				+	/* returned RC ping features */
			
 
				+	unsigned int		lp_ping_feats;
			
 
				+	struct list_head		lp_routes;	/* routers on this peer */
			
 
				+	lnet_rc_data_t		*lp_rcd;	/* router checker state */
			
 
				+} lnet_peer_t;
			
 
				+
			
 
				+
			
 
				+/* peer hash size */
			
 
				+#define LNET_PEER_HASH_BITS     9
			
 
				+#define LNET_PEER_HASH_SIZE     (1 << LNET_PEER_HASH_BITS)
			
 
				+
			
 
				+/* peer hash table */
			
 
				+struct lnet_peer_table {
			
 
				+	int			pt_version;	/* /proc validity stamp */
			
 
				+	int			pt_number;	/* # peers extant */
			
 
				+	struct list_head		pt_deathrow;	/* zombie peers */
			
 
				+	struct list_head		*pt_hash;	/* NID->peer hash */
			
 
				+};
			
 
				+
			
 
				+/* peer aliveness is enabled only on routers for peers in a network where the
			
 
				+ * lnet_ni_t::ni_peertimeout has been set to a positive value */
			
 
				+#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
			
 
				+					 (lp)->lp_ni->ni_peertimeout > 0)
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head		lr_list;	/* chain on net */
			
 
				+	struct list_head		lr_gwlist;	/* chain on gateway */
			
 
				+	lnet_peer_t		*lr_gateway;	/* router node */
			
 
				+	__u32			lr_net;		/* remote network number */
			
 
				+	int			lr_seq;		/* sequence for round-robin */
			
 
				+	unsigned int		lr_downis;	/* number of down NIs */
			
 
				+	unsigned int		lr_hops;	/* how far I am */
			
 
				+} lnet_route_t;
			
 
				+
			
 
				+#define LNET_REMOTE_NETS_HASH_DEFAULT	(1U << 7)
			
 
				+#define LNET_REMOTE_NETS_HASH_MAX	(1U << 16)
			
 
				+#define LNET_REMOTE_NETS_HASH_SIZE	(1 << the_lnet.ln_remote_nets_hbits)
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	      lrn_list;       /* chain on ln_remote_nets_hash */
			
 
				+	struct list_head	      lrn_routes;     /* routes to me */
			
 
				+	__u32		   lrn_net;	/* my net number */
			
 
				+} lnet_remotenet_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head rbp_bufs;	     /* my free buffer pool */
			
 
				+	struct list_head rbp_msgs;	     /* messages blocking for a buffer */
			
 
				+	int	rbp_npages;	   /* # pages in each buffer */
			
 
				+	int	rbp_nbuffers;	 /* # buffers */
			
 
				+	int	rbp_credits;	  /* # free buffers / blocked messages */
			
 
				+	int	rbp_mincredits;       /* low water mark */
			
 
				+} lnet_rtrbufpool_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	     rb_list;	     /* chain on rbp_bufs */
			
 
				+	lnet_rtrbufpool_t     *rb_pool;	     /* owning pool */
			
 
				+	lnet_kiov_t	    rb_kiov[0];	  /* the buffer space */
			
 
				+} lnet_rtrbuf_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32	msgs_alloc;
			
 
				+	__u32	msgs_max;
			
 
				+	__u32	errors;
			
 
				+	__u32	send_count;
			
 
				+	__u32	recv_count;
			
 
				+	__u32	route_count;
			
 
				+	__u32	drop_count;
			
 
				+	__u64	send_length;
			
 
				+	__u64	recv_length;
			
 
				+	__u64	route_length;
			
 
				+	__u64	drop_length;
			
 
				+} WIRE_ATTR lnet_counters_t;
			
 
				+
			
 
				+#define LNET_PEER_HASHSIZE   503		/* prime! */
			
 
				+
			
 
				+#define LNET_NRBPOOLS	 3		 /* # different router buffer pools */
			
 
				+
			
 
				+enum {
			
 
				+	/* Didn't match anything */
			
 
				+	LNET_MATCHMD_NONE	= (1 << 0),
			
 
				+	/* Matched OK */
			
 
				+	LNET_MATCHMD_OK		= (1 << 1),
			
 
				+	/* Must be discarded */
			
 
				+	LNET_MATCHMD_DROP	= (1 << 2),
			
 
				+	/* match and buffer is exhausted */
			
 
				+	LNET_MATCHMD_EXHAUSTED  = (1 << 3),
			
 
				+	/* match or drop */
			
 
				+	LNET_MATCHMD_FINISH     = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
			
 
				+};
			
 
				+
			
 
				+/* Options for lnet_portal_t::ptl_options */
			
 
				+#define LNET_PTL_LAZY	       (1 << 0)
			
 
				+#define LNET_PTL_MATCH_UNIQUE       (1 << 1)    /* unique match, for RDMA */
			
 
				+#define LNET_PTL_MATCH_WILDCARD     (1 << 2)    /* wildcard match, request portal */
			
 
				+
			
 
				+/* parameter for matching operations (GET, PUT) */
			
 
				+struct lnet_match_info {
			
 
				+	__u64			mi_mbits;
			
 
				+	lnet_process_id_t	mi_id;
			
 
				+	unsigned int		mi_opc;
			
 
				+	unsigned int		mi_portal;
			
 
				+	unsigned int		mi_rlength;
			
 
				+	unsigned int		mi_roffset;
			
 
				+};
			
 
				+
			
 
				+/* ME hash of RDMA portal */
			
 
				+#define LNET_MT_HASH_BITS		8
			
 
				+#define LNET_MT_HASH_SIZE		(1 << LNET_MT_HASH_BITS)
			
 
				+#define LNET_MT_HASH_MASK		(LNET_MT_HASH_SIZE - 1)
			
 
				+/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
			
 
				+ * the last entry is reserved for MEs with ignore-bits */
			
 
				+#define LNET_MT_HASH_IGNORE		LNET_MT_HASH_SIZE
			
 
				+/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
			
 
				+ * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
			
 
				+ * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */
			
 
				+#define LNET_MT_BITS_U64		6	/* 2^6 bits */
			
 
				+#define LNET_MT_EXHAUSTED_BITS		(LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
			
 
				+#define LNET_MT_EXHAUSTED_BMAP		((1 << LNET_MT_EXHAUSTED_BITS) + 1)
			
 
				+
			
 
				+/* portal match table */
			
 
				+struct lnet_match_table {
			
 
				+	/* reserved for upcoming patches, CPU partition ID */
			
 
				+	unsigned int		mt_cpt;
			
 
				+	unsigned int		mt_portal;      /* portal index */
			
 
				+	/* match table is set as "enabled" if there's non-exhausted MD
			
 
				+	 * attached on mt_mhash, it's only valide for wildcard portal */
			
 
				+	unsigned int		mt_enabled;
			
 
				+	/* bitmap to flag whether MEs on mt_hash are exhausted or not */
			
 
				+	__u64			mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
			
 
				+	struct list_head		*mt_mhash;      /* matching hash */
			
 
				+};
			
 
				+
			
 
				+/* these are only useful for wildcard portal */
			
 
				+/* Turn off message rotor for wildcard portals */
			
 
				+#define	LNET_PTL_ROTOR_OFF	0
			
 
				+/* round-robin dispatch all PUT messages for wildcard portals */
			
 
				+#define	LNET_PTL_ROTOR_ON	1
			
 
				+/* round-robin dispatch routed PUT message for wildcard portals */
			
 
				+#define	LNET_PTL_ROTOR_RR_RT	2
			
 
				+/* dispatch routed PUT message by hashing source NID for wildcard portals */
			
 
				+#define	LNET_PTL_ROTOR_HASH_RT	3
			
 
				+
			
 
				+typedef struct lnet_portal {
			
 
				+	spinlock_t		ptl_lock;
			
 
				+	unsigned int		ptl_index;	/* portal ID, reserved */
			
 
				+	/* flags on this portal: lazy, unique... */
			
 
				+	unsigned int		ptl_options;
			
 
				+	/* list of messags which are stealing buffer */
			
 
				+	struct list_head		ptl_msg_stealing;
			
 
				+	/* messages blocking for MD */
			
 
				+	struct list_head		ptl_msg_delayed;
			
 
				+	/* Match table for each CPT */
			
 
				+	struct lnet_match_table	**ptl_mtables;
			
 
				+	/* spread rotor of incoming "PUT" */
			
 
				+	int			ptl_rotor;
			
 
				+	/* # active entries for this portal */
			
 
				+	int		     ptl_mt_nmaps;
			
 
				+	/* array of active entries' cpu-partition-id */
			
 
				+	int		     ptl_mt_maps[0];
			
 
				+} lnet_portal_t;
			
 
				+
			
 
				+#define LNET_LH_HASH_BITS	12
			
 
				+#define LNET_LH_HASH_SIZE	(1ULL << LNET_LH_HASH_BITS)
			
 
				+#define LNET_LH_HASH_MASK	(LNET_LH_HASH_SIZE - 1)
			
 
				+
			
 
				+/* resource container (ME, MD, EQ) */
			
 
				+struct lnet_res_container {
			
 
				+	unsigned int		rec_type;	/* container type */
			
 
				+	__u64			rec_lh_cookie;	/* cookie generator */
			
 
				+	struct list_head		rec_active;	/* active resource list */
			
 
				+	struct list_head		*rec_lh_hash;	/* handle hash */
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	lnet_freelist_t		rec_freelist;	/* freelist for resources */
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+/* message container */
			
 
				+struct lnet_msg_container {
			
 
				+	int			msc_init;	/* initialized or not */
			
 
				+	/* max # threads finalizing */
			
 
				+	int			msc_nfinalizers;
			
 
				+	/* msgs waiting to complete finalizing */
			
 
				+	struct list_head		msc_finalizing;
			
 
				+	struct list_head		msc_active;	/* active message list */
			
 
				+	/* threads doing finalization */
			
 
				+	void			**msc_finalizers;
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	lnet_freelist_t		msc_freelist;	/* freelist for messages */
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+/* Router Checker states */
			
 
				+#define LNET_RC_STATE_SHUTDOWN		0	/* not started */
			
 
				+#define LNET_RC_STATE_RUNNING		1	/* started up OK */
			
 
				+#define LNET_RC_STATE_STOPPING		2	/* telling thread to stop */
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	/* CPU partition table of LNet */
			
 
				+	struct cfs_cpt_table		*ln_cpt_table;
			
 
				+	/* number of CPTs in ln_cpt_table */
			
 
				+	unsigned int			ln_cpt_number;
			
 
				+	unsigned int			ln_cpt_bits;
			
 
				+
			
 
				+	/* protect LNet resources (ME/MD/EQ) */
			
 
				+	struct cfs_percpt_lock		*ln_res_lock;
			
 
				+	/* # portals */
			
 
				+	int				ln_nportals;
			
 
				+	/* the vector of portals */
			
 
				+	lnet_portal_t			**ln_portals;
			
 
				+	/* percpt ME containers */
			
 
				+	struct lnet_res_container	**ln_me_containers;
			
 
				+	/* percpt MD container */
			
 
				+	struct lnet_res_container	**ln_md_containers;
			
 
				+
			
 
				+	/* Event Queue container */
			
 
				+	struct lnet_res_container	ln_eq_container;
			
 
				+	wait_queue_head_t			ln_eq_waitq;
			
 
				+	spinlock_t			ln_eq_wait_lock;
			
 
				+	unsigned int			ln_remote_nets_hbits;
			
 
				+
			
 
				+	/* protect NI, peer table, credits, routers, rtrbuf... */
			
 
				+	struct cfs_percpt_lock		*ln_net_lock;
			
 
				+	/* percpt message containers for active/finalizing/freed message */
			
 
				+	struct lnet_msg_container	**ln_msg_containers;
			
 
				+	lnet_counters_t			**ln_counters;
			
 
				+	struct lnet_peer_table		**ln_peer_tables;
			
 
				+	/* failure simulation */
			
 
				+	struct list_head			ln_test_peers;
			
 
				+
			
 
				+	struct list_head			ln_nis;		/* LND instances */
			
 
				+	/* NIs bond on specific CPT(s) */
			
 
				+	struct list_head			ln_nis_cpt;
			
 
				+	/* dying LND instances */
			
 
				+	struct list_head			ln_nis_zombie;
			
 
				+	lnet_ni_t			*ln_loni;	/* the loopback NI */
			
 
				+	/* NI to wait for events in */
			
 
				+	lnet_ni_t			*ln_eq_waitni;
			
 
				+
			
 
				+	/* remote networks with routes to them */
			
 
				+	struct list_head			*ln_remote_nets_hash;
			
 
				+	/* validity stamp */
			
 
				+	__u64				ln_remote_nets_version;
			
 
				+	/* list of all known routers */
			
 
				+	struct list_head			ln_routers;
			
 
				+	/* validity stamp */
			
 
				+	__u64				ln_routers_version;
			
 
				+	/* percpt router buffer pools */
			
 
				+	lnet_rtrbufpool_t		**ln_rtrpools;
			
 
				+
			
 
				+	lnet_handle_md_t		ln_ping_target_md;
			
 
				+	lnet_handle_eq_t		ln_ping_target_eq;
			
 
				+	lnet_ping_info_t		*ln_ping_info;
			
 
				+
			
 
				+	/* router checker startup/shutdown state */
			
 
				+	int				ln_rc_state;
			
 
				+	/* router checker's event queue */
			
 
				+	lnet_handle_eq_t		ln_rc_eqh;
			
 
				+	/* rcd still pending on net */
			
 
				+	struct list_head			ln_rcd_deathrow;
			
 
				+	/* rcd ready for free */
			
 
				+	struct list_head			ln_rcd_zombie;
			
 
				+	/* serialise startup/shutdown */
			
 
				+	struct semaphore		ln_rc_signal;
			
 
				+
			
 
				+	struct mutex			ln_api_mutex;
			
 
				+	struct mutex			ln_lnd_mutex;
			
 
				+	int				ln_init;	/* LNetInit() called? */
			
 
				+	/* Have I called LNetNIInit myself? */
			
 
				+	int				ln_niinit_self;
			
 
				+	/* LNetNIInit/LNetNIFini counter */
			
 
				+	int				ln_refcount;
			
 
				+	/* shutdown in progress */
			
 
				+	int				ln_shutdown;
			
 
				+
			
 
				+	int				ln_routing;	/* am I a router? */
			
 
				+	lnet_pid_t			ln_pid;		/* requested pid */
			
 
				+	/* uniquely identifies this ni in this epoch */
			
 
				+	__u64				ln_interface_cookie;
			
 
				+	/* registered LNDs */
			
 
				+	struct list_head			ln_lnds;
			
 
				+
			
 
				+	/* space for network names */
			
 
				+	char				*ln_network_tokens;
			
 
				+	int				ln_network_tokens_nob;
			
 
				+	/* test protocol compatibility flags */
			
 
				+	int				ln_testprotocompat;
			
 
				+
			
 
				+} lnet_t;
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/linux/api-support.h
+++ b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h
@@ -0,0 +1,43 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LINUX_API_SUPPORT_H__
			
 
				+#define __LINUX_API_SUPPORT_H__
			
 
				+
			
 
				+#ifndef __LNET_API_SUPPORT_H__
			
 
				+#error Do not #include this file directly. #include <lnet /api-support.h> instead
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h
@@ -0,0 +1,72 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_LINUX_LIB_LNET_H__
			
 
				+#define __LNET_LINUX_LIB_LNET_H__
			
 
				+
			
 
				+#ifndef __LNET_LIB_LNET_H__
			
 
				+#error Do not #include this file directly. #include <linux/lnet/lib-lnet.h> instead
			
 
				+#endif
			
 
				+
			
 
				+# include <asm/page.h>
			
 
				+# include <linux/string.h>
			
 
				+# include <asm/io.h>
			
 
				+# include <linux/libcfs/libcfs.h>
			
 
				+
			
 
				+static inline __u64
			
 
				+lnet_page2phys (struct page *p)
			
 
				+{
			
 
				+	/* compiler optimizer will elide unused branches */
			
 
				+
			
 
				+	switch (sizeof(typeof(page_to_phys(p)))) {
			
 
				+	case 4:
			
 
				+		/* page_to_phys returns a 32 bit physical address.  This must
			
 
				+		 * be a 32 bit machine with <= 4G memory and we must ensure we
			
 
				+		 * don't sign extend when converting to 64 bits. */
			
 
				+		return (unsigned long)page_to_phys(p);
			
 
				+
			
 
				+	case 8:
			
 
				+		/* page_to_phys returns a 64 bit physical address :) */
			
 
				+		return page_to_phys(p);
			
 
				+
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#define LNET_ROUTER
			
 
				+
			
 
				+#endif /* __LNET_LINUX_LIB_LNET_H__ */
			
--- a/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h
@@ -0,0 +1,45 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_LINUX_LIB_TYPES_H__
			
 
				+#define __LNET_LINUX_LIB_TYPES_H__
			
 
				+
			
 
				+#ifndef __LNET_LIB_TYPES_H__
			
 
				+#error Do not #include this file directly. #include <linux/lnet/lib-types.h> instead
			
 
				+#endif
			
 
				+
			
 
				+# include <linux/uio.h>
			
 
				+# include <linux/types.h>
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/linux/lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h
@@ -0,0 +1,56 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_LINUX_LNET_H__
			
 
				+#define __LNET_LINUX_LNET_H__
			
 
				+
			
 
				+#ifndef __LNET_H__
			
 
				+#error Do not #include this file directly. #include <linux/lnet/lnet.h> instead
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * lnet.h
			
 
				+ *
			
 
				+ * User application interface file
			
 
				+ */
			
 
				+
			
 
				+#include <linux/uio.h>
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#define cfs_tcp_sendpage(sk, page, offset, size, flags) \
			
 
				+	tcp_sendpage(sk, page, offset, size, flags)
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h
+++ b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h
@@ -0,0 +1,51 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_SYSCTL_H__
			
 
				+#define __LNET_SYSCTL_H__
			
 
				+
			
 
				+#if defined(CONFIG_SYSCTL)
			
 
				+
			
 
				+
			
 
				+#define CTL_KRANAL      201
			
 
				+#define CTL_O2IBLND     205
			
 
				+#define CTL_PTLLND      206
			
 
				+#define CTL_QSWNAL      207
			
 
				+#define CTL_SOCKLND     208
			
 
				+#define CTL_GNILND      210
			
 
				+
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lnet.h
@@ -0,0 +1,51 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_H__
			
 
				+#define __LNET_H__
			
 
				+
			
 
				+/*
			
 
				+ * lnet.h
			
 
				+ *
			
 
				+ * User application interface file
			
 
				+ */
			
 
				+#include <linux/lnet/linux/lnet.h>
			
 
				+
			
 
				+#include <linux/lnet/types.h>
			
 
				+#include <linux/lnet/api.h>
			
 
				+
			
 
				+#define LNET_NIDSTR_COUNT  1024    /* # of nidstrings */
			
 
				+#define LNET_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lnetctl.h
+++ b/drivers/staging/lustre/include/linux/lnet/lnetctl.h
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+ *   This file is part of Portals, http://www.sf.net/projects/lustre/
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ *
			
 
				+ * header for libptlctl.a
			
 
				+ */
			
 
				+#ifndef _PTLCTL_H_
			
 
				+#define _PTLCTL_H_
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/types.h>
			
 
				+
			
 
				+#define LNET_DEV_ID 0
			
 
				+#define LNET_DEV_PATH "/dev/lnet"
			
 
				+#define LNET_DEV_MAJOR 10
			
 
				+#define LNET_DEV_MINOR 240
			
 
				+#define OBD_DEV_ID 1
			
 
				+#define OBD_DEV_NAME "obd"
			
 
				+#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
			
 
				+#define OBD_DEV_MAJOR 10
			
 
				+#define OBD_DEV_MINOR 241
			
 
				+#define SMFS_DEV_ID  2
			
 
				+#define SMFS_DEV_PATH "/dev/snapdev"
			
 
				+#define SMFS_DEV_MAJOR 10
			
 
				+#define SMFS_DEV_MINOR 242
			
 
				+
			
 
				+int ptl_initialize(int argc, char **argv);
			
 
				+int jt_ptl_network(int argc, char **argv);
			
 
				+int jt_ptl_list_nids(int argc, char **argv);
			
 
				+int jt_ptl_which_nid(int argc, char **argv);
			
 
				+int jt_ptl_print_interfaces(int argc, char **argv);
			
 
				+int jt_ptl_add_interface(int argc, char **argv);
			
 
				+int jt_ptl_del_interface(int argc, char **argv);
			
 
				+int jt_ptl_print_peers (int argc, char **argv);
			
 
				+int jt_ptl_add_peer (int argc, char **argv);
			
 
				+int jt_ptl_del_peer (int argc, char **argv);
			
 
				+int jt_ptl_print_connections (int argc, char **argv);
			
 
				+int jt_ptl_disconnect(int argc, char **argv);
			
 
				+int jt_ptl_push_connection(int argc, char **argv);
			
 
				+int jt_ptl_print_active_txs(int argc, char **argv);
			
 
				+int jt_ptl_ping(int argc, char **argv);
			
 
				+int jt_ptl_mynid(int argc, char **argv);
			
 
				+int jt_ptl_add_uuid(int argc, char **argv);
			
 
				+int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility  */
			
 
				+int jt_ptl_close_uuid(int argc, char **argv);
			
 
				+int jt_ptl_del_uuid(int argc, char **argv);
			
 
				+int jt_ptl_add_route (int argc, char **argv);
			
 
				+int jt_ptl_del_route (int argc, char **argv);
			
 
				+int jt_ptl_notify_router (int argc, char **argv);
			
 
				+int jt_ptl_print_routes (int argc, char **argv);
			
 
				+int jt_ptl_fail_nid (int argc, char **argv);
			
 
				+int jt_ptl_lwt(int argc, char **argv);
			
 
				+int jt_ptl_testprotocompat(int argc, char **argv);
			
 
				+int jt_ptl_memhog(int argc, char **argv);
			
 
				+
			
 
				+int dbg_initialize(int argc, char **argv);
			
 
				+int jt_dbg_filter(int argc, char **argv);
			
 
				+int jt_dbg_show(int argc, char **argv);
			
 
				+int jt_dbg_list(int argc, char **argv);
			
 
				+int jt_dbg_debug_kernel(int argc, char **argv);
			
 
				+int jt_dbg_debug_daemon(int argc, char **argv);
			
 
				+int jt_dbg_debug_file(int argc, char **argv);
			
 
				+int jt_dbg_clear_debug_buf(int argc, char **argv);
			
 
				+int jt_dbg_mark_debug_buf(int argc, char **argv);
			
 
				+int jt_dbg_modules(int argc, char **argv);
			
 
				+int jt_dbg_panic(int argc, char **argv);
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/lnetst.h
+++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h
@@ -0,0 +1,491 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/lnetst.h
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_ST_H__
			
 
				+#define __LNET_ST_H__
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+
			
 
				+#define LST_FEAT_NONE		(0)
			
 
				+#define LST_FEAT_BULK_LEN	(1 << 0)	/* enable variable page size */
			
 
				+
			
 
				+#define LST_FEATS_EMPTY		(LST_FEAT_NONE)
			
 
				+#define LST_FEATS_MASK		(LST_FEAT_NONE | LST_FEAT_BULK_LEN)
			
 
				+
			
 
				+#define LST_NAME_SIZE	   32	      /* max name buffer length */
			
 
				+
			
 
				+#define LSTIO_DEBUG	     0xC00	   /* debug */
			
 
				+#define LSTIO_SESSION_NEW       0xC01	   /* create session */
			
 
				+#define LSTIO_SESSION_END       0xC02	   /* end session */
			
 
				+#define LSTIO_SESSION_INFO      0xC03	   /* query session */
			
 
				+#define LSTIO_GROUP_ADD	 0xC10	   /* add group */
			
 
				+#define LSTIO_GROUP_LIST	0xC11	   /* list all groups in session */
			
 
				+#define LSTIO_GROUP_INFO	0xC12	   /* query defailt infomation of specified group */
			
 
				+#define LSTIO_GROUP_DEL	 0xC13	   /* delete group */
			
 
				+#define LSTIO_NODES_ADD	 0xC14	   /* add nodes to specified group */
			
 
				+#define LSTIO_GROUP_UPDATE      0xC15	   /* update group */
			
 
				+#define LSTIO_BATCH_ADD	 0xC20	   /* add batch */
			
 
				+#define LSTIO_BATCH_START       0xC21	   /* start batch */
			
 
				+#define LSTIO_BATCH_STOP	0xC22	   /* stop batch */
			
 
				+#define LSTIO_BATCH_DEL	 0xC23	   /* delete batch */
			
 
				+#define LSTIO_BATCH_LIST	0xC24	   /* show all batches in the session */
			
 
				+#define LSTIO_BATCH_INFO	0xC25	   /* show defail of specified batch */
			
 
				+#define LSTIO_TEST_ADD	  0xC26	   /* add test (to batch) */
			
 
				+#define LSTIO_BATCH_QUERY       0xC27	   /* query batch status */
			
 
				+#define LSTIO_STAT_QUERY	0xC30	   /* get stats */
			
 
				+
			
 
				+typedef struct {
			
 
				+	lnet_nid_t	      ses_nid;		/* nid of console node */
			
 
				+	__u64		   ses_stamp;	      /* time stamp */
			
 
				+} lst_sid_t;					    /*** session id */
			
 
				+
			
 
				+extern lst_sid_t LST_INVALID_SID;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64		   bat_id;		 /* unique id in session */
			
 
				+} lst_bid_t;					    /*** batch id (group of tests) */
			
 
				+
			
 
				+/* Status of test node */
			
 
				+#define LST_NODE_ACTIVE	 0x1		     /* node in this session */
			
 
				+#define LST_NODE_BUSY	   0x2		     /* node is taken by other session */
			
 
				+#define LST_NODE_DOWN	   0x4		     /* node is down */
			
 
				+#define LST_NODE_UNKNOWN	0x8		     /* node not in session */
			
 
				+
			
 
				+typedef struct {
			
 
				+	lnet_process_id_t       nde_id;		 /* id of node */
			
 
				+	int		     nde_state;	      /* state of node */
			
 
				+} lstcon_node_ent_t;				    /*** node entry, for list_group command */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     nle_nnode;	      /* # of nodes */
			
 
				+	int		     nle_nactive;	    /* # of active nodes */
			
 
				+	int		     nle_nbusy;	      /* # of busy nodes */
			
 
				+	int		     nle_ndown;	      /* # of down nodes */
			
 
				+	int		     nle_nunknown;	   /* # of unknown nodes */
			
 
				+} lstcon_ndlist_ent_t;				  /*** node_list entry, for list_batch command */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     tse_type;	       /* test type */
			
 
				+	int		     tse_loop;	       /* loop count */
			
 
				+	int		     tse_concur;	     /* concurrency of test */
			
 
				+} lstcon_test_ent_t;				    /*** test summary entry, for list_batch command */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     bae_state;	      /* batch status */
			
 
				+	int		     bae_timeout;	    /* batch timeout */
			
 
				+	int		     bae_ntest;	      /* # of tests in the batch */
			
 
				+} lstcon_batch_ent_t;				   /*** batch summary entry, for list_batch command */
			
 
				+
			
 
				+typedef struct {
			
 
				+	lstcon_ndlist_ent_t     tbe_cli_nle;	    /* client (group) node_list entry */
			
 
				+	lstcon_ndlist_ent_t     tbe_srv_nle;	    /* server (group) node_list entry */
			
 
				+	union {
			
 
				+		lstcon_test_ent_t  tbe_test;	    /* test entry */
			
 
				+		lstcon_batch_ent_t tbe_batch;	   /* batch entry */
			
 
				+	} u;
			
 
				+} lstcon_test_batch_ent_t;			      /*** test/batch verbose information entry,
			
 
				+							 *** for list_batch command */
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	      rpe_link;	       /* link chain */
			
 
				+	lnet_process_id_t       rpe_peer;	       /* peer's id */
			
 
				+	struct timeval	  rpe_stamp;	      /* time stamp of RPC */
			
 
				+	int		     rpe_state;	      /* peer's state */
			
 
				+	int		     rpe_rpc_errno;	  /* RPC errno */
			
 
				+
			
 
				+	lst_sid_t	       rpe_sid;		/* peer's session id */
			
 
				+	int		     rpe_fwk_errno;	  /* framework errno */
			
 
				+	int		     rpe_priv[4];	    /* private data */
			
 
				+	char		    rpe_payload[0];	 /* private reply payload */
			
 
				+} lstcon_rpc_ent_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     trs_rpc_stat[4];	/* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */
			
 
				+	int		     trs_rpc_errno;	  /* RPC errno */
			
 
				+	int		     trs_fwk_stat[8];	/* framework stat */
			
 
				+	int		     trs_fwk_errno;	  /* errno of the first remote error */
			
 
				+	void		   *trs_fwk_private;	/* private framework stat */
			
 
				+} lstcon_trans_stat_t;
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
			
 
				+{
			
 
				+	return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
			
 
				+}
			
 
				+
			
 
				+/* create a session */
			
 
				+typedef struct {
			
 
				+	int		     lstio_ses_key;	  /* IN: local key */
			
 
				+	int		     lstio_ses_timeout;      /* IN: session timeout */
			
 
				+	int		     lstio_ses_force;	/* IN: force create ? */
			
 
				+	/** IN: session features */
			
 
				+	unsigned		lstio_ses_feats;
			
 
				+	lst_sid_t	      *lstio_ses_idp;	  /* OUT: session id */
			
 
				+	int		     lstio_ses_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_ses_namep;	/* IN: session name */
			
 
				+} lstio_session_new_args_t;
			
 
				+
			
 
				+/* query current session */
			
 
				+typedef struct {
			
 
				+	lst_sid_t	      *lstio_ses_idp;	  /* OUT: session id */
			
 
				+	int		    *lstio_ses_keyp;	 /* OUT: local key */
			
 
				+	/** OUT: session features */
			
 
				+	unsigned	       *lstio_ses_featp;
			
 
				+	lstcon_ndlist_ent_t    *lstio_ses_ndinfo;       /* OUT: */
			
 
				+	int		     lstio_ses_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_ses_namep;	/* OUT: session name */
			
 
				+} lstio_session_info_args_t;
			
 
				+
			
 
				+/* delete a session */
			
 
				+typedef struct {
			
 
				+	int		     lstio_ses_key;	  /* IN: session key */
			
 
				+} lstio_session_end_args_t;
			
 
				+
			
 
				+#define LST_OPC_SESSION	 1
			
 
				+#define LST_OPC_GROUP	   2
			
 
				+#define LST_OPC_NODES	   3
			
 
				+#define LST_OPC_BATCHCLI	4
			
 
				+#define LST_OPC_BATCHSRV	5
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_dbg_key;	  /* IN: session key */
			
 
				+	int		     lstio_dbg_type;	 /* IN: debug sessin|batch|group|nodes list */
			
 
				+	int		     lstio_dbg_flags;	/* IN: reserved debug flags */
			
 
				+	int		     lstio_dbg_timeout;      /* IN: timeout of debug */
			
 
				+
			
 
				+	int		     lstio_dbg_nmlen;	/* IN: len of name */
			
 
				+	char		   *lstio_dbg_namep;	/* IN: name of group|batch */
			
 
				+	int		     lstio_dbg_count;	/* IN: # of test nodes to debug */
			
 
				+	lnet_process_id_t      *lstio_dbg_idsp;	 /* IN: id of test nodes */
			
 
				+	struct list_head	     *lstio_dbg_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_debug_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_grp_namep;	/* IN: group name */
			
 
				+} lstio_group_add_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_grp_namep;	/* IN: group name */
			
 
				+} lstio_group_del_args_t;
			
 
				+
			
 
				+#define LST_GROUP_CLEAN	 1		       /* remove inactive nodes in the group */
			
 
				+#define LST_GROUP_REFRESH       2		       /* refresh inactive nodes in the group */
			
 
				+#define LST_GROUP_RMND	  3		       /* delete nodes from the group */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_opc;	  /* IN: OPC */
			
 
				+	int		     lstio_grp_args;	 /* IN: arguments */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_grp_namep;	/* IN: group name */
			
 
				+	int		     lstio_grp_count;	/* IN: # of nodes id */
			
 
				+	lnet_process_id_t      *lstio_grp_idsp;	 /* IN: array of nodes */
			
 
				+	struct list_head	     *lstio_grp_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_group_update_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_grp_namep;	/* IN: group name */
			
 
				+	int		     lstio_grp_count;	/* IN: # of nodes */
			
 
				+	/** OUT: session features */
			
 
				+	unsigned	       *lstio_grp_featp;
			
 
				+	lnet_process_id_t      *lstio_grp_idsp;	 /* IN: nodes */
			
 
				+	struct list_head	     *lstio_grp_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_group_nodes_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_idx;	  /* IN: group idx */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name len */
			
 
				+	char		   *lstio_grp_namep;	/* OUT: name */
			
 
				+} lstio_group_list_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_grp_key;	  /* IN: session key */
			
 
				+	int		     lstio_grp_nmlen;	/* IN: name len */
			
 
				+	char		   *lstio_grp_namep;	/* IN: name */
			
 
				+	lstcon_ndlist_ent_t    *lstio_grp_entp;	 /* OUT: description of group */
			
 
				+
			
 
				+	int		    *lstio_grp_idxp;	 /* IN/OUT: node index */
			
 
				+	int		    *lstio_grp_ndentp;       /* IN/OUT: # of nodent */
			
 
				+	lstcon_node_ent_t      *lstio_grp_dentsp;       /* OUT: nodent array */
			
 
				+} lstio_group_info_args_t;
			
 
				+
			
 
				+#define LST_DEFAULT_BATCH       "batch"		 /* default batch name */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+} lstio_batch_add_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+} lstio_batch_del_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_timeout;      /* IN: timeout for the batch */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+	struct list_head	     *lstio_bat_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_batch_run_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_force;	/* IN: abort unfinished test RPC */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+	struct list_head	     *lstio_bat_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_batch_stop_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_testidx;      /* IN: test index */
			
 
				+	int		     lstio_bat_client;       /* IN: is test client? */
			
 
				+	int		     lstio_bat_timeout;      /* IN: timeout for waiting */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+	struct list_head	     *lstio_bat_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_batch_query_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_idx;	  /* IN: index */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: batch name */
			
 
				+} lstio_batch_list_args_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_bat_key;	  /* IN: session key */
			
 
				+	int		     lstio_bat_nmlen;	/* IN: name length */
			
 
				+	char		   *lstio_bat_namep;	/* IN: name */
			
 
				+	int		     lstio_bat_server;       /* IN: query server or not */
			
 
				+	int		     lstio_bat_testidx;      /* IN: test index */
			
 
				+	lstcon_test_batch_ent_t *lstio_bat_entp;	/* OUT: batch ent */
			
 
				+
			
 
				+	int		    *lstio_bat_idxp;	 /* IN/OUT: index of node */
			
 
				+	int		    *lstio_bat_ndentp;       /* IN/OUT: # of nodent */
			
 
				+	lstcon_node_ent_t      *lstio_bat_dentsp;       /* array of nodent */
			
 
				+} lstio_batch_info_args_t;
			
 
				+
			
 
				+/* add stat in session */
			
 
				+typedef struct {
			
 
				+	int		     lstio_sta_key;	  /* IN: session key */
			
 
				+	int		     lstio_sta_timeout;      /* IN: timeout for stat requst */
			
 
				+	int		     lstio_sta_nmlen;	/* IN: group name length */
			
 
				+	char		   *lstio_sta_namep;	/* IN: group name */
			
 
				+	int		     lstio_sta_count;	/* IN: # of pid */
			
 
				+	lnet_process_id_t      *lstio_sta_idsp;	 /* IN: pid */
			
 
				+	struct list_head	     *lstio_sta_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_stat_args_t;
			
 
				+
			
 
				+typedef enum {
			
 
				+	LST_TEST_BULK   = 1,
			
 
				+	LST_TEST_PING   = 2
			
 
				+} lst_test_type_t;
			
 
				+
			
 
				+/* create a test in a batch */
			
 
				+#define LST_MAX_CONCUR	  1024		    /* Max concurrency of test */
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     lstio_tes_key;	  /* IN: session key */
			
 
				+	int		     lstio_tes_bat_nmlen;    /* IN: batch name len */
			
 
				+	char		   *lstio_tes_bat_name;     /* IN: batch name */
			
 
				+	int		     lstio_tes_type;	 /* IN: test type */
			
 
				+	int		     lstio_tes_oneside;      /* IN: one sided test */
			
 
				+	int		     lstio_tes_loop;	 /* IN: loop count */
			
 
				+	int		     lstio_tes_concur;       /* IN: concurrency */
			
 
				+
			
 
				+	int		     lstio_tes_dist;	 /* IN: node distribution in destination groups */
			
 
				+	int		     lstio_tes_span;	 /* IN: node span in destination groups */
			
 
				+	int		     lstio_tes_sgrp_nmlen;   /* IN: source group name length */
			
 
				+	char		   *lstio_tes_sgrp_name;    /* IN: group name */
			
 
				+	int		     lstio_tes_dgrp_nmlen;   /* IN: destination group name length */
			
 
				+	char		   *lstio_tes_dgrp_name;    /* IN: group name */
			
 
				+
			
 
				+	int		     lstio_tes_param_len;    /* IN: param buffer len */
			
 
				+	void		   *lstio_tes_param;	/* IN: parameter for specified test:
			
 
				+							       lstio_bulk_param_t,
			
 
				+							       lstio_ping_param_t,
			
 
				+							       ... more */
			
 
				+	int		    *lstio_tes_retp;	 /* OUT: private returned value */
			
 
				+	struct list_head	     *lstio_tes_resultp;      /* OUT: list head of result buffer */
			
 
				+} lstio_test_args_t;
			
 
				+
			
 
				+typedef enum {
			
 
				+	LST_BRW_READ    = 1,
			
 
				+	LST_BRW_WRITE   = 2
			
 
				+} lst_brw_type_t;
			
 
				+
			
 
				+typedef enum {
			
 
				+	LST_BRW_CHECK_NONE   = 1,
			
 
				+	LST_BRW_CHECK_SIMPLE = 2,
			
 
				+	LST_BRW_CHECK_FULL   = 3
			
 
				+} lst_brw_flags_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     blk_opc;		/* bulk operation code */
			
 
				+	int		     blk_size;	       /* size (bytes) */
			
 
				+	int		     blk_time;	       /* time of running the test*/
			
 
				+	int		     blk_flags;	      /* reserved flags */
			
 
				+} lst_test_bulk_param_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int		     png_size;	       /* size of ping message */
			
 
				+	int		     png_time;	       /* time */
			
 
				+	int		     png_loop;	       /* loop */
			
 
				+	int		     png_flags;	      /* reserved flags */
			
 
				+} lst_test_ping_param_t;
			
 
				+
			
 
				+/* more tests */
			
 
				+typedef struct {
			
 
				+	__u32 errors;
			
 
				+	__u32 rpcs_sent;
			
 
				+	__u32 rpcs_rcvd;
			
 
				+	__u32 rpcs_dropped;
			
 
				+	__u32 rpcs_expired;
			
 
				+	__u64 bulk_get;
			
 
				+	__u64 bulk_put;
			
 
				+} WIRE_ATTR srpc_counters_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	/** milliseconds since current session started */
			
 
				+	__u32 running_ms;
			
 
				+	__u32 active_batches;
			
 
				+	__u32 zombie_sessions;
			
 
				+	__u32 brw_errors;
			
 
				+	__u32 ping_errors;
			
 
				+} WIRE_ATTR sfw_counters_t;
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/ptllnd.h
+++ b/drivers/staging/lustre/include/linux/lnet/ptllnd.h
@@ -0,0 +1,94 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/ptllnd.h
			
 
				+ *
			
 
				+ * Author: PJ Kirner <pjkirner@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * The PTLLND was designed to support Portals with
			
 
				+ * Lustre and non-lustre UNLINK semantics.
			
 
				+ * However for now the two targets are Cray Portals
			
 
				+ * on the XT3 and Lustre Portals (for testing) both
			
 
				+ * have Lustre UNLINK semantics, so this is defined
			
 
				+ * by default.
			
 
				+ */
			
 
				+#define LUSTRE_PORTALS_UNLINK_SEMANTICS
			
 
				+
			
 
				+
			
 
				+#ifdef _USING_LUSTRE_PORTALS_
			
 
				+
			
 
				+/* NIDs are 64-bits on Lustre Portals */
			
 
				+#define FMT_NID LPU64
			
 
				+#define FMT_PID "%d"
			
 
				+
			
 
				+/* When using Lustre Portals Lustre completion semantics are imlicit*/
			
 
				+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS      0
			
 
				+
			
 
				+#else /* _USING_CRAY_PORTALS_ */
			
 
				+
			
 
				+/* NIDs are integers on Cray Portals */
			
 
				+#define FMT_NID "%u"
			
 
				+#define FMT_PID "%d"
			
 
				+
			
 
				+/* When using Cray Portals this is defined in the Cray Portals Header*/
			
 
				+/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */
			
 
				+
			
 
				+/* Can compare handles directly on Cray Portals */
			
 
				+#define PtlHandleIsEqual(a,b) ((a) == (b))
			
 
				+
			
 
				+/* Diffrent error types on Cray Portals*/
			
 
				+#define ptl_err_t ptl_ni_fail_t
			
 
				+
			
 
				+/*
			
 
				+ * The Cray Portals has no maximum number of IOVs.  The
			
 
				+ * maximum is limited only by memory and size of the
			
 
				+ * int parameters (2^31-1).
			
 
				+ * Lustre only really require that the underyling
			
 
				+ * implemenation to support at least LNET_MAX_IOV,
			
 
				+ * so for Cray portals we can safely just use that
			
 
				+ * value here.
			
 
				+ *
			
 
				+ */
			
 
				+#define PTL_MD_MAX_IOV	  LNET_MAX_IOV
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID
			
 
				+
			
 
				+/* Align incoming small request messages to an 8 byte boundary if this is
			
 
				+ * supported to avoid alignment issues on some architectures */
			
 
				+#ifndef PTL_MD_LOCAL_ALIGN8
			
 
				+# define PTL_MD_LOCAL_ALIGN8 0
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h
+++ b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h
@@ -0,0 +1,124 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/ptllnd_wire.h
			
 
				+ *
			
 
				+ * Author: PJ Kirner <pjkirner@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+/* Minimum buffer size that any peer will post to receive ptllnd messages */
			
 
				+#define PTLLND_MIN_BUFFER_SIZE  256
			
 
				+
			
 
				+/************************************************************************
			
 
				+ * Tunable defaults that {u,k}lnds/ptllnd should have in common.
			
 
				+ */
			
 
				+
			
 
				+#define PTLLND_PORTAL	   9	  /* The same portal PTLPRC used when talking to cray portals */
			
 
				+#define PTLLND_PID	      9	  /* The Portals PID */
			
 
				+#define PTLLND_PEERCREDITS      8	  /* concurrent sends to 1 peer */
			
 
				+
			
 
				+/* Default buffer size for kernel ptllnds (guaranteed eager) */
			
 
				+#define PTLLND_MAX_KLND_MSG_SIZE 512
			
 
				+
			
 
				+/* Default buffer size for catamount ptllnds (not guaranteed eager) - large
			
 
				+ * enough to avoid RDMA for anything sent while control is not in liblustre */
			
 
				+#define PTLLND_MAX_ULND_MSG_SIZE 512
			
 
				+
			
 
				+
			
 
				+/************************************************************************
			
 
				+ * Portals LND Wire message format.
			
 
				+ * These are sent in sender's byte order (i.e. receiver flips).
			
 
				+ */
			
 
				+
			
 
				+#define PTL_RESERVED_MATCHBITS  0x100	/* below this value is reserved
			
 
				+					 * above is for bulk data transfer */
			
 
				+#define LNET_MSG_MATCHBITS       0      /* the value for the message channel */
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	lnet_hdr_t	kptlim_hdr;	     /* portals header */
			
 
				+	char	      kptlim_payload[0];      /* piggy-backed payload */
			
 
				+} WIRE_ATTR kptl_immediate_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	lnet_hdr_t	kptlrm_hdr;	     /* portals header */
			
 
				+	__u64	     kptlrm_matchbits;       /* matchbits */
			
 
				+} WIRE_ATTR kptl_rdma_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u64	     kptlhm_matchbits;       /* matchbits */
			
 
				+	__u32	     kptlhm_max_msg_size;    /* max message size */
			
 
				+} WIRE_ATTR kptl_hello_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	/* First 2 fields fixed FOR ALL TIME */
			
 
				+	__u32	   ptlm_magic;     /* I'm a Portals LND message */
			
 
				+	__u16	   ptlm_version;   /* this is my version number */
			
 
				+	__u8	    ptlm_type;      /* the message type */
			
 
				+	__u8	    ptlm_credits;   /* returned credits */
			
 
				+	__u32	   ptlm_nob;       /* # bytes in whole message */
			
 
				+	__u32	   ptlm_cksum;     /* checksum (0 == no checksum) */
			
 
				+	__u64	   ptlm_srcnid;    /* sender's NID */
			
 
				+	__u64	   ptlm_srcstamp;  /* sender's incarnation */
			
 
				+	__u64	   ptlm_dstnid;    /* destination's NID */
			
 
				+	__u64	   ptlm_dststamp;  /* destination's incarnation */
			
 
				+	__u32	   ptlm_srcpid;    /* sender's PID */
			
 
				+	__u32	   ptlm_dstpid;    /* destination's PID */
			
 
				+
			
 
				+	 union {
			
 
				+		kptl_immediate_msg_t    immediate;
			
 
				+		kptl_rdma_msg_t	 rdma;
			
 
				+		kptl_hello_msg_t	hello;
			
 
				+	} WIRE_ATTR ptlm_u;
			
 
				+
			
 
				+} kptl_msg_t;
			
 
				+
			
 
				+/* kptl_msg_t::ptlm_credits is only a __u8 */
			
 
				+#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1)
			
 
				+
			
 
				+#define PTLLND_MSG_MAGIC		LNET_PROTO_PTL_MAGIC
			
 
				+#define PTLLND_MSG_VERSION	      0x04
			
 
				+
			
 
				+#define PTLLND_RDMA_OK		  0x00
			
 
				+#define PTLLND_RDMA_FAIL		0x01
			
 
				+
			
 
				+#define PTLLND_MSG_TYPE_INVALID	 0x00
			
 
				+#define PTLLND_MSG_TYPE_PUT	     0x01
			
 
				+#define PTLLND_MSG_TYPE_GET	     0x02
			
 
				+#define PTLLND_MSG_TYPE_IMMEDIATE       0x03    /* No bulk data xfer*/
			
 
				+#define PTLLND_MSG_TYPE_NOOP	    0x04
			
 
				+#define PTLLND_MSG_TYPE_HELLO	   0x05
			
 
				+#define PTLLND_MSG_TYPE_NAK	     0x06
			
--- a/drivers/staging/lustre/include/linux/lnet/socklnd.h
+++ b/drivers/staging/lustre/include/linux/lnet/socklnd.h
@@ -0,0 +1,103 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/include/lnet/socklnd.h
			
 
				+ *
			
 
				+ * #defines shared between socknal implementation and utilities
			
 
				+ */
			
 
				+#ifndef __LNET_LNET_SOCKLND_H__
			
 
				+#define __LNET_LNET_SOCKLND_H__
			
 
				+
			
 
				+#include <linux/lnet/types.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+
			
 
				+#define SOCKLND_CONN_NONE     (-1)
			
 
				+#define SOCKLND_CONN_ANY	0
			
 
				+#define SOCKLND_CONN_CONTROL    1
			
 
				+#define SOCKLND_CONN_BULK_IN    2
			
 
				+#define SOCKLND_CONN_BULK_OUT   3
			
 
				+#define SOCKLND_CONN_NTYPES     4
			
 
				+
			
 
				+#define SOCKLND_CONN_ACK	SOCKLND_CONN_BULK_IN
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   kshm_magic;     /* magic number of socklnd message */
			
 
				+	__u32		   kshm_version;   /* version of socklnd message */
			
 
				+	lnet_nid_t	      kshm_src_nid;   /* sender's nid */
			
 
				+	lnet_nid_t	      kshm_dst_nid;   /* destination nid */
			
 
				+	lnet_pid_t	      kshm_src_pid;   /* sender's pid */
			
 
				+	lnet_pid_t	      kshm_dst_pid;   /* destination pid */
			
 
				+	__u64		   kshm_src_incarnation; /* sender's incarnation */
			
 
				+	__u64		   kshm_dst_incarnation; /* destination's incarnation */
			
 
				+	__u32		   kshm_ctype;     /* connection type */
			
 
				+	__u32		   kshm_nips;      /* # IP addrs */
			
 
				+	__u32		   kshm_ips[0];    /* IP addrs */
			
 
				+} WIRE_ATTR ksock_hello_msg_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	lnet_hdr_t	      ksnm_hdr;       /* lnet hdr */
			
 
				+
			
 
				+	/*
			
 
				+	 * ksnm_payload is removed because of winnt compiler's limitation:
			
 
				+	 * zero-sized array can only be placed at the tail of [nested]
			
 
				+	 * structure definitions. lnet payload will be stored just after
			
 
				+	 * the body of structure ksock_lnet_msg_t
			
 
				+	 */
			
 
				+} WIRE_ATTR ksock_lnet_msg_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   ksm_type;       /* type of socklnd message */
			
 
				+	__u32		   ksm_csum;       /* checksum if != 0 */
			
 
				+	__u64		   ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */
			
 
				+	union {
			
 
				+		ksock_lnet_msg_t lnetmsg;       /* lnet message, it's empty if it's NOOP */
			
 
				+	} WIRE_ATTR ksm_u;
			
 
				+} WIRE_ATTR ksock_msg_t;
			
 
				+
			
 
				+static inline void
			
 
				+socklnd_init_msg(ksock_msg_t *msg, int type)
			
 
				+{
			
 
				+	msg->ksm_csum	   = 0;
			
 
				+	msg->ksm_type	   = type;
			
 
				+	msg->ksm_zc_cookies[0]  = msg->ksm_zc_cookies[1]  = 0;
			
 
				+}
			
 
				+
			
 
				+#define KSOCK_MSG_NOOP	  0xc0	    /* ksm_u empty */
			
 
				+#define KSOCK_MSG_LNET	  0xc1	    /* lnet msg */
			
 
				+
			
 
				+/* We need to know this number to parse hello msg from ksocklnd in
			
 
				+ * other LND (usocklnd, for example) */
			
 
				+#define KSOCK_PROTO_V2	  2
			
 
				+#define KSOCK_PROTO_V3	  3
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -0,0 +1,503 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LNET_TYPES_H__
			
 
				+#define __LNET_TYPES_H__
			
 
				+
			
 
				+/** \addtogroup lnet
			
 
				+ * @{ */
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+
			
 
				+/** \addtogroup lnet_addr
			
 
				+ * @{ */
			
 
				+
			
 
				+/** Portal reserved for LNet's own use.
			
 
				+ * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
			
 
				+ */
			
 
				+#define LNET_RESERVED_PORTAL      0
			
 
				+
			
 
				+/**
			
 
				+ * Address of an end-point in an LNet network.
			
 
				+ *
			
 
				+ * A node can have multiple end-points and hence multiple addresses.
			
 
				+ * An LNet network can be a simple network (e.g. tcp0) or a network of
			
 
				+ * LNet networks connected by LNet routers. Therefore an end-point address
			
 
				+ * has two parts: network ID, and address within a network.
			
 
				+ *
			
 
				+ * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
			
 
				+ */
			
 
				+typedef __u64 lnet_nid_t;
			
 
				+/**
			
 
				+ * ID of a process in a node. Shortened as PID to distinguish from
			
 
				+ * lnet_process_id_t, the global process ID.
			
 
				+ */
			
 
				+typedef __u32 lnet_pid_t;
			
 
				+
			
 
				+/** wildcard NID that matches any end-point address */
			
 
				+#define LNET_NID_ANY      ((lnet_nid_t) -1)
			
 
				+/** wildcard PID that matches any lnet_pid_t */
			
 
				+#define LNET_PID_ANY      ((lnet_pid_t) -1)
			
 
				+
			
 
				+#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
			
 
				+#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
			
 
				+
			
 
				+#define LNET_TIME_FOREVER    (-1)
			
 
				+
			
 
				+/**
			
 
				+ * Objects maintained by the LNet are accessed through handles. Handle types
			
 
				+ * have names of the form lnet_handle_xx_t, where xx is one of the two letter
			
 
				+ * object type codes ('eq' for event queue, 'md' for memory descriptor, and
			
 
				+ * 'me' for match entry).
			
 
				+ * Each type of object is given a unique handle type to enhance type checking.
			
 
				+ * The type lnet_handle_any_t can be used when a generic handle is needed.
			
 
				+ * Every handle value can be converted into a value of type lnet_handle_any_t
			
 
				+ * without loss of information.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	__u64	 cookie;
			
 
				+} lnet_handle_any_t;
			
 
				+
			
 
				+typedef lnet_handle_any_t lnet_handle_eq_t;
			
 
				+typedef lnet_handle_any_t lnet_handle_md_t;
			
 
				+typedef lnet_handle_any_t lnet_handle_me_t;
			
 
				+
			
 
				+#define LNET_WIRE_HANDLE_COOKIE_NONE   (-1)
			
 
				+
			
 
				+/**
			
 
				+ * Invalidate handle \a h.
			
 
				+ */
			
 
				+static inline void LNetInvalidateHandle(lnet_handle_any_t *h)
			
 
				+{
			
 
				+	h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Compare handles \a h1 and \a h2.
			
 
				+ *
			
 
				+ * \return 1 if handles are equal, 0 if otherwise.
			
 
				+ */
			
 
				+static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2)
			
 
				+{
			
 
				+	return (h1.cookie == h2.cookie);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Check whether handle \a h is invalid.
			
 
				+ *
			
 
				+ * \return 1 if handle is invalid, 0 if valid.
			
 
				+ */
			
 
				+static inline int LNetHandleIsInvalid(lnet_handle_any_t h)
			
 
				+{
			
 
				+	return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Global process ID.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/** node id */
			
 
				+	lnet_nid_t nid;
			
 
				+	/** process id */
			
 
				+	lnet_pid_t pid;
			
 
				+} lnet_process_id_t;
			
 
				+/** @} lnet_addr */
			
 
				+
			
 
				+/** \addtogroup lnet_me
			
 
				+ * @{ */
			
 
				+
			
 
				+/**
			
 
				+ * Specifies whether the match entry or memory descriptor should be unlinked
			
 
				+ * automatically (LNET_UNLINK) or not (LNET_RETAIN).
			
 
				+ */
			
 
				+typedef enum {
			
 
				+	LNET_RETAIN = 0,
			
 
				+	LNET_UNLINK
			
 
				+} lnet_unlink_t;
			
 
				+
			
 
				+/**
			
 
				+ * Values of the type lnet_ins_pos_t are used to control where a new match
			
 
				+ * entry is inserted. The value LNET_INS_BEFORE is used to insert the new
			
 
				+ * entry before the current entry or before the head of the list. The value
			
 
				+ * LNET_INS_AFTER is used to insert the new entry after the current entry
			
 
				+ * or after the last item in the list.
			
 
				+ */
			
 
				+typedef enum {
			
 
				+	/** insert ME before current position or head of the list */
			
 
				+	LNET_INS_BEFORE,
			
 
				+	/** insert ME after current position or tail of the list */
			
 
				+	LNET_INS_AFTER,
			
 
				+	/** attach ME at tail of local CPU partition ME list */
			
 
				+	LNET_INS_LOCAL
			
 
				+} lnet_ins_pos_t;
			
 
				+
			
 
				+/** @} lnet_me */
			
 
				+
			
 
				+/** \addtogroup lnet_md
			
 
				+ * @{ */
			
 
				+
			
 
				+/**
			
 
				+ * Defines the visible parts of a memory descriptor. Values of this type
			
 
				+ * are used to initialize memory descriptors.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/**
			
 
				+	 * Specify the memory region associated with the memory descriptor.
			
 
				+	 * If the options field has:
			
 
				+	 * - LNET_MD_KIOV bit set: The start field points to the starting
			
 
				+	 * address of an array of lnet_kiov_t and the length field specifies
			
 
				+	 * the number of entries in the array. The length can't be bigger
			
 
				+	 * than LNET_MAX_IOV. The lnet_kiov_t is used to describe page-based
			
 
				+	 * fragments that are not necessarily mapped in virtal memory.
			
 
				+	 * - LNET_MD_IOVEC bit set: The start field points to the starting
			
 
				+	 * address of an array of struct iovec and the length field specifies
			
 
				+	 * the number of entries in the array. The length can't be bigger
			
 
				+	 * than LNET_MAX_IOV. The struct iovec is used to describe fragments
			
 
				+	 * that have virtual addresses.
			
 
				+	 * - Otherwise: The memory region is contiguous. The start field
			
 
				+	 * specifies the starting address for the memory region and the
			
 
				+	 * length field specifies its length.
			
 
				+	 *
			
 
				+	 * When the memory region is fragmented, all fragments but the first
			
 
				+	 * one must start on page boundary, and all but the last must end on
			
 
				+	 * page boundary.
			
 
				+	 */
			
 
				+	void	    *start;
			
 
				+	unsigned int     length;
			
 
				+	/**
			
 
				+	 * Specifies the maximum number of operations that can be performed
			
 
				+	 * on the memory descriptor. An operation is any action that could
			
 
				+	 * possibly generate an event. In the usual case, the threshold value
			
 
				+	 * is decremented for each operation on the MD. When the threshold
			
 
				+	 * drops to zero, the MD becomes inactive and does not respond to
			
 
				+	 * operations. A threshold value of LNET_MD_THRESH_INF indicates that
			
 
				+	 * there is no bound on the number of operations that may be applied
			
 
				+	 * to a MD.
			
 
				+	 */
			
 
				+	int	      threshold;
			
 
				+	/**
			
 
				+	 * Specifies the largest incoming request that the memory descriptor
			
 
				+	 * should respond to. When the unused portion of a MD (length -
			
 
				+	 * local offset) falls below this value, the MD becomes inactive and
			
 
				+	 * does not respond to further operations. This value is only used
			
 
				+	 * if the LNET_MD_MAX_SIZE option is set.
			
 
				+	 */
			
 
				+	int	      max_size;
			
 
				+	/**
			
 
				+	 * Specifies the behavior of the memory descriptor. A bitwise OR
			
 
				+	 * of the following values can be used:
			
 
				+	 * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
			
 
				+	 * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
			
 
				+	 * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
			
 
				+	 *   region is provided by the incoming request. By default, the
			
 
				+	 *   offset is maintained locally. When maintained locally, the
			
 
				+	 *   offset is incremented by the length of the request so that
			
 
				+	 *   the next operation (PUT or GET) will access the next part of
			
 
				+	 *   the memory region. Note that only one offset variable exists
			
 
				+	 *   per memory descriptor. If both PUT and GET operations are
			
 
				+	 *   performed on a memory descriptor, the offset is updated each time.
			
 
				+	 * - LNET_MD_TRUNCATE: The length provided in the incoming request can
			
 
				+	 *   be reduced to match the memory available in the region (determined
			
 
				+	 *   by subtracting the offset from the length of the memory region).
			
 
				+	 *   By default, if the length in the incoming operation is greater
			
 
				+	 *   than the amount of memory available, the operation is rejected.
			
 
				+	 * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
			
 
				+	 *   incoming PUT operations, even if requested. By default,
			
 
				+	 *   acknowledgments are sent for PUT operations that request an
			
 
				+	 *   acknowledgment. Acknowledgments are never sent for GET operations.
			
 
				+	 *   The data sent in the REPLY serves as an implicit acknowledgment.
			
 
				+	 * - LNET_MD_KIOV: The start and length fields specify an array of
			
 
				+	 *   lnet_kiov_t.
			
 
				+	 * - LNET_MD_IOVEC: The start and length fields specify an array of
			
 
				+	 *   struct iovec.
			
 
				+	 * - LNET_MD_MAX_SIZE: The max_size field is valid.
			
 
				+	 *
			
 
				+	 * Note:
			
 
				+	 * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
			
 
				+	 *   capability for memory descriptors. They can't be both set.
			
 
				+	 * - When LNET_MD_MAX_SIZE is set, the total length of the memory
			
 
				+	 *   region (i.e. sum of all fragment lengths) must not be less than
			
 
				+	 *   \a max_size.
			
 
				+	 */
			
 
				+	unsigned int     options;
			
 
				+	/**
			
 
				+	 * A user-specified value that is associated with the memory
			
 
				+	 * descriptor. The value does not need to be a pointer, but must fit
			
 
				+	 * in the space used by a pointer. This value is recorded in events
			
 
				+	 * associated with operations on this MD.
			
 
				+	 */
			
 
				+	void	    *user_ptr;
			
 
				+	/**
			
 
				+	 * A handle for the event queue used to log the operations performed on
			
 
				+	 * the memory region. If this argument is a NULL handle (i.e. nullified
			
 
				+	 * by LNetInvalidateHandle()), operations performed on this memory
			
 
				+	 * descriptor are not logged.
			
 
				+	 */
			
 
				+	lnet_handle_eq_t eq_handle;
			
 
				+} lnet_md_t;
			
 
				+
			
 
				+/* Max Transfer Unit (minimum supported everywhere).
			
 
				+ * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
			
 
				+ * these limits are system wide and not interface-local. */
			
 
				+#define LNET_MTU_BITS	20
			
 
				+#define LNET_MTU	(1 << LNET_MTU_BITS)
			
 
				+
			
 
				+/** limit on the number of fragments in discontiguous MDs */
			
 
				+#define LNET_MAX_IOV    256
			
 
				+
			
 
				+/* Max payload size */
			
 
				+# define LNET_MAX_PAYLOAD	CONFIG_LNET_MAX_PAYLOAD
			
 
				+# if (LNET_MAX_PAYLOAD < LNET_MTU)
			
 
				+#  error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
			
 
				+# else
			
 
				+#  if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
			
 
				+/*  PAGE_SIZE is a constant: check with cpp! */
			
 
				+#   error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
			
 
				+#  endif
			
 
				+# endif
			
 
				+
			
 
				+/**
			
 
				+ * Options for the MD structure. See lnet_md_t::options.
			
 
				+ */
			
 
				+#define LNET_MD_OP_PUT	       (1 << 0)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_OP_GET	       (1 << 1)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_MANAGE_REMOTE	(1 << 2)
			
 
				+/* unused			    (1 << 3) */
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_TRUNCATE	     (1 << 4)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_ACK_DISABLE	  (1 << 5)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_IOVEC		(1 << 6)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_MAX_SIZE	     (1 << 7)
			
 
				+/** See lnet_md_t::options. */
			
 
				+#define LNET_MD_KIOV		 (1 << 8)
			
 
				+
			
 
				+/* For compatibility with Cray Portals */
			
 
				+#define LNET_MD_PHYS			 0
			
 
				+
			
 
				+/** Infinite threshold on MD operations. See lnet_md_t::threshold */
			
 
				+#define LNET_MD_THRESH_INF       (-1)
			
 
				+
			
 
				+/* NB lustre portals uses struct iovec internally! */
			
 
				+typedef struct iovec lnet_md_iovec_t;
			
 
				+
			
 
				+/**
			
 
				+ * A page-based fragment of a MD.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/** Pointer to the page where the fragment resides */
			
 
				+	struct page      *kiov_page;
			
 
				+	/** Length in bytes of the fragment */
			
 
				+	unsigned int     kiov_len;
			
 
				+	/**
			
 
				+	 * Starting offset of the fragment within the page. Note that the
			
 
				+	 * end of the fragment must not pass the end of the page; i.e.,
			
 
				+	 * kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
			
 
				+	 */
			
 
				+	unsigned int     kiov_offset;
			
 
				+} lnet_kiov_t;
			
 
				+/** @} lnet_md */
			
 
				+
			
 
				+/** \addtogroup lnet_eq
			
 
				+ * @{ */
			
 
				+
			
 
				+/**
			
 
				+ * Six types of events can be logged in an event queue.
			
 
				+ */
			
 
				+typedef enum {
			
 
				+	/** An incoming GET operation has completed on the MD. */
			
 
				+	LNET_EVENT_GET		= 1,
			
 
				+	/**
			
 
				+	 * An incoming PUT operation has completed on the MD. The
			
 
				+	 * underlying layers will not alter the memory (on behalf of this
			
 
				+	 * operation) once this event has been logged.
			
 
				+	 */
			
 
				+	LNET_EVENT_PUT,
			
 
				+	/**
			
 
				+	 * A REPLY operation has completed. This event is logged after the
			
 
				+	 * data (if any) from the REPLY has been written into the MD.
			
 
				+	 */
			
 
				+	LNET_EVENT_REPLY,
			
 
				+	/** An acknowledgment has been received. */
			
 
				+	LNET_EVENT_ACK,
			
 
				+	/**
			
 
				+	 * An outgoing send (PUT or GET) operation has completed. This event
			
 
				+	 * is logged after the entire buffer has been sent and it is safe for
			
 
				+	 * the caller to reuse the buffer.
			
 
				+	 *
			
 
				+	 * Note:
			
 
				+	 * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
			
 
				+	 *   happen even when the message has not yet been put out on wire.
			
 
				+	 * - It's unsafe to assume that in an outgoing GET operation
			
 
				+	 *   the LNET_EVENT_SEND event would happen before the
			
 
				+	 *   LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
			
 
				+	 *   LNET_EVENT_ACK events in an outgoing PUT operation.
			
 
				+	 */
			
 
				+	LNET_EVENT_SEND,
			
 
				+	/**
			
 
				+	 * A MD has been unlinked. Note that LNetMDUnlink() does not
			
 
				+	 * necessarily trigger an LNET_EVENT_UNLINK event.
			
 
				+	 * \see LNetMDUnlink
			
 
				+	 */
			
 
				+	LNET_EVENT_UNLINK,
			
 
				+} lnet_event_kind_t;
			
 
				+
			
 
				+#define LNET_SEQ_BASETYPE       long
			
 
				+typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t;
			
 
				+#define LNET_SEQ_GT(a,b)	(((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0)
			
 
				+
			
 
				+/* XXX
			
 
				+ * cygwin need the pragma line, not clear if it's needed in other places.
			
 
				+ * checking!!!
			
 
				+ */
			
 
				+#ifdef __CYGWIN__
			
 
				+#pragma pack(push, 4)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+ * Information about an event on a MD.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	/** The identifier (nid, pid) of the target. */
			
 
				+	lnet_process_id_t   target;
			
 
				+	/** The identifier (nid, pid) of the initiator. */
			
 
				+	lnet_process_id_t   initiator;
			
 
				+	/**
			
 
				+	 * The NID of the immediate sender. If the request has been forwarded
			
 
				+	 * by routers, this is the NID of the last hop; otherwise it's the
			
 
				+	 * same as the initiator.
			
 
				+	 */
			
 
				+	lnet_nid_t	  sender;
			
 
				+	/** Indicates the type of the event. */
			
 
				+	lnet_event_kind_t   type;
			
 
				+	/** The portal table index specified in the request */
			
 
				+	unsigned int	pt_index;
			
 
				+	/** A copy of the match bits specified in the request. */
			
 
				+	__u64	       match_bits;
			
 
				+	/** The length (in bytes) specified in the request. */
			
 
				+	unsigned int	rlength;
			
 
				+	/**
			
 
				+	 * The length (in bytes) of the data that was manipulated by the
			
 
				+	 * operation. For truncated operations, the manipulated length will be
			
 
				+	 * the number of bytes specified by the MD (possibly with an offset,
			
 
				+	 * see lnet_md_t). For all other operations, the manipulated length
			
 
				+	 * will be the length of the requested operation, i.e. rlength.
			
 
				+	 */
			
 
				+	unsigned int	mlength;
			
 
				+	/**
			
 
				+	 * The handle to the MD associated with the event. The handle may be
			
 
				+	 * invalid if the MD has been unlinked.
			
 
				+	 */
			
 
				+	lnet_handle_md_t    md_handle;
			
 
				+	/**
			
 
				+	 * A snapshot of the state of the MD immediately after the event has
			
 
				+	 * been processed. In particular, the threshold field in md will
			
 
				+	 * reflect the value of the threshold after the operation occurred.
			
 
				+	 */
			
 
				+	lnet_md_t	   md;
			
 
				+	/**
			
 
				+	 * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
			
 
				+	 * \see LNetPut
			
 
				+	 */
			
 
				+	__u64	       hdr_data;
			
 
				+	/**
			
 
				+	 * Indicates the completion status of the operation. It's 0 for
			
 
				+	 * successful operations, otherwise it's an error code.
			
 
				+	 */
			
 
				+	int		 status;
			
 
				+	/**
			
 
				+	 * Indicates whether the MD has been unlinked. Note that:
			
 
				+	 * - An event with unlinked set is the last event on the MD.
			
 
				+	 * - This field is also set for an explicit LNET_EVENT_UNLINK event.
			
 
				+	 * \see LNetMDUnlink
			
 
				+	 */
			
 
				+	int		 unlinked;
			
 
				+	/**
			
 
				+	 * The displacement (in bytes) into the memory region that the
			
 
				+	 * operation used. The offset can be determined by the operation for
			
 
				+	 * a remote managed MD or by the local MD.
			
 
				+	 * \see lnet_md_t::options
			
 
				+	 */
			
 
				+	unsigned int	offset;
			
 
				+	/**
			
 
				+	 * The sequence number for this event. Sequence numbers are unique
			
 
				+	 * to each event.
			
 
				+	 */
			
 
				+	volatile lnet_seq_t sequence;
			
 
				+} lnet_event_t;
			
 
				+#ifdef __CYGWIN__
			
 
				+#pragma pop
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+ * Event queue handler function type.
			
 
				+ *
			
 
				+ * The EQ handler runs for each event that is deposited into the EQ. The
			
 
				+ * handler is supplied with a pointer to the event that triggered the
			
 
				+ * handler invocation.
			
 
				+ *
			
 
				+ * The handler must not block, must be reentrant, and must not call any LNet
			
 
				+ * API functions. It should return as quickly as possible.
			
 
				+ */
			
 
				+typedef void (*lnet_eq_handler_t)(lnet_event_t *event);
			
 
				+#define LNET_EQ_HANDLER_NONE NULL
			
 
				+/** @} lnet_eq */
			
 
				+
			
 
				+/** \addtogroup lnet_data
			
 
				+ * @{ */
			
 
				+
			
 
				+/**
			
 
				+ * Specify whether an acknowledgment should be sent by target when the PUT
			
 
				+ * operation completes (i.e., when the data has been written to a MD of the
			
 
				+ * target process).
			
 
				+ *
			
 
				+ * \see lnet_md_t::options for the discussion on LNET_MD_ACK_DISABLE by which
			
 
				+ * acknowledgments can be disabled for a MD.
			
 
				+ */
			
 
				+typedef enum {
			
 
				+	/** Request an acknowledgment */
			
 
				+	LNET_ACK_REQ,
			
 
				+	/** Request that no acknowledgment should be generated. */
			
 
				+	LNET_NOACK_REQ
			
 
				+} lnet_ack_req_t;
			
 
				+/** @} lnet_data */
			
 
				+
			
 
				+/** @} lnet */
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/Kconfig
+++ b/drivers/staging/lustre/lnet/Kconfig
@@ -0,0 +1,40 @@
 
				+config LNET
			
 
				+	tristate "Lustre networking subsystem"
			
 
				+	depends on LUSTRE_FS
			
 
				+
			
 
				+config LNET_MAX_PAYLOAD
			
 
				+	int "Lustre lnet max transfer payload (default 2MB)"
			
 
				+	depends on LUSTRE_FS
			
 
				+	default "1048576"
			
 
				+	help
			
 
				+	  This option defines the maximum size of payload in bytes that lnet
			
 
				+	  can put into its transport.
			
 
				+
			
 
				+	  If unsure, use default.
			
 
				+
			
 
				+config LNET_SELFTEST
			
 
				+	tristate "Lustre networking self testing"
			
 
				+	depends on LNET
			
 
				+	help
			
 
				+	  Choose Y here if you want to do lnet self testing. To compile this
			
 
				+	  as a module, choose M here: the module will be called lnet_selftest.
			
 
				+
			
 
				+	  To compile this as a kernel modules, choose M here and it will be
			
 
				+	  called lnet_selftest.
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				+	  See also http://wiki.lustre.org/
			
 
				+
			
 
				+config LNET_XPRT_IB
			
 
				+	tristate "LNET infiniband support"
			
 
				+	depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS
			
 
				+	default LNET && INFINIBAND
			
 
				+	help
			
 
				+	  This option allows the LNET users to use infiniband as an
			
 
				+	  RDMA-enabled transport.
			
 
				+
			
 
				+	  To compile this as a kernel module, choose M here and it will be
			
 
				+	  called ko2iblnd.
			
 
				+
			
 
				+	  If unsure, say N.
			
--- a/drivers/staging/lustre/lnet/Makefile
+++ b/drivers/staging/lustre/lnet/Makefile
@@ -0,0 +1 @@
 
				+obj-$(CONFIG_LNET) := klnds/ lnet/ selftest/
			
--- a/drivers/staging/lustre/lnet/klnds/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/Makefile
@@ -0,0 +1 @@
 
				+obj-$(CONFIG_LNET) += o2iblnd/  socklnd/
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
@@ -0,0 +1,5 @@
 
				+obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
			
 
				+ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
			
 
				+
			
 
				+
			
 
				+ccflags-y := -I$(src)/../../include
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -0,0 +1,3256 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/klnds/o2iblnd/o2iblnd.c
			
 
				+ *
			
 
				+ * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ */
			
 
				+
			
 
				+#include "o2iblnd.h"
			
 
				+
			
 
				+lnd_t the_o2iblnd = {
			
 
				+	.lnd_type       = O2IBLND,
			
 
				+	.lnd_startup    = kiblnd_startup,
			
 
				+	.lnd_shutdown   = kiblnd_shutdown,
			
 
				+	.lnd_ctl	= kiblnd_ctl,
			
 
				+	.lnd_query      = kiblnd_query,
			
 
				+	.lnd_send       = kiblnd_send,
			
 
				+	.lnd_recv       = kiblnd_recv,
			
 
				+};
			
 
				+
			
 
				+kib_data_t	      kiblnd_data;
			
 
				+
			
 
				+__u32
			
 
				+kiblnd_cksum (void *ptr, int nob)
			
 
				+{
			
 
				+	char  *c  = ptr;
			
 
				+	__u32  sum = 0;
			
 
				+
			
 
				+	while (nob-- > 0)
			
 
				+		sum = ((sum << 1) | (sum >> 31)) + *c++;
			
 
				+
			
 
				+	/* ensure I don't return 0 (== no checksum) */
			
 
				+	return (sum == 0) ? 1 : sum;
			
 
				+}
			
 
				+
			
 
				+static char *
			
 
				+kiblnd_msgtype2str(int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case IBLND_MSG_CONNREQ:
			
 
				+		return "CONNREQ";
			
 
				+
			
 
				+	case IBLND_MSG_CONNACK:
			
 
				+		return "CONNACK";
			
 
				+
			
 
				+	case IBLND_MSG_NOOP:
			
 
				+		return "NOOP";
			
 
				+
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+		return "IMMEDIATE";
			
 
				+
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+		return "PUT_REQ";
			
 
				+
			
 
				+	case IBLND_MSG_PUT_NAK:
			
 
				+		return "PUT_NAK";
			
 
				+
			
 
				+	case IBLND_MSG_PUT_ACK:
			
 
				+		return "PUT_ACK";
			
 
				+
			
 
				+	case IBLND_MSG_PUT_DONE:
			
 
				+		return "PUT_DONE";
			
 
				+
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		return "GET_REQ";
			
 
				+
			
 
				+	case IBLND_MSG_GET_DONE:
			
 
				+		return "GET_DONE";
			
 
				+
			
 
				+	default:
			
 
				+		return "???";
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_msgtype2size(int type)
			
 
				+{
			
 
				+	const int hdr_size = offsetof(kib_msg_t, ibm_u);
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case IBLND_MSG_CONNREQ:
			
 
				+	case IBLND_MSG_CONNACK:
			
 
				+		return hdr_size + sizeof(kib_connparams_t);
			
 
				+
			
 
				+	case IBLND_MSG_NOOP:
			
 
				+		return hdr_size;
			
 
				+
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+		return offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]);
			
 
				+
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+		return hdr_size + sizeof(kib_putreq_msg_t);
			
 
				+
			
 
				+	case IBLND_MSG_PUT_ACK:
			
 
				+		return hdr_size + sizeof(kib_putack_msg_t);
			
 
				+
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		return hdr_size + sizeof(kib_get_msg_t);
			
 
				+
			
 
				+	case IBLND_MSG_PUT_NAK:
			
 
				+	case IBLND_MSG_PUT_DONE:
			
 
				+	case IBLND_MSG_GET_DONE:
			
 
				+		return hdr_size + sizeof(kib_completion_msg_t);
			
 
				+	default:
			
 
				+		return -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_unpack_rd(kib_msg_t *msg, int flip)
			
 
				+{
			
 
				+	kib_rdma_desc_t   *rd;
			
 
				+	int		nob;
			
 
				+	int		n;
			
 
				+	int		i;
			
 
				+
			
 
				+	LASSERT (msg->ibm_type == IBLND_MSG_GET_REQ ||
			
 
				+		 msg->ibm_type == IBLND_MSG_PUT_ACK);
			
 
				+
			
 
				+	rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
			
 
				+			      &msg->ibm_u.get.ibgm_rd :
			
 
				+			      &msg->ibm_u.putack.ibpam_rd;
			
 
				+
			
 
				+	if (flip) {
			
 
				+		__swab32s(&rd->rd_key);
			
 
				+		__swab32s(&rd->rd_nfrags);
			
 
				+	}
			
 
				+
			
 
				+	n = rd->rd_nfrags;
			
 
				+
			
 
				+	if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
			
 
				+		CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
			
 
				+		       n, IBLND_MAX_RDMA_FRAGS);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	nob = offsetof (kib_msg_t, ibm_u) +
			
 
				+	      kiblnd_rd_msg_size(rd, msg->ibm_type, n);
			
 
				+
			
 
				+	if (msg->ibm_nob < nob) {
			
 
				+		CERROR("Short %s: %d(%d)\n",
			
 
				+		       kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	if (!flip)
			
 
				+		return 0;
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		__swab32s(&rd->rd_frags[i].rf_nob);
			
 
				+		__swab64s(&rd->rd_frags[i].rf_addr);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int version,
			
 
				+		 int credits, lnet_nid_t dstnid, __u64 dststamp)
			
 
				+{
			
 
				+	kib_net_t *net = ni->ni_data;
			
 
				+
			
 
				+	/* CAVEAT EMPTOR! all message fields not set here should have been
			
 
				+	 * initialised previously. */
			
 
				+	msg->ibm_magic    = IBLND_MSG_MAGIC;
			
 
				+	msg->ibm_version  = version;
			
 
				+	/*   ibm_type */
			
 
				+	msg->ibm_credits  = credits;
			
 
				+	/*   ibm_nob */
			
 
				+	msg->ibm_cksum    = 0;
			
 
				+	msg->ibm_srcnid   = ni->ni_nid;
			
 
				+	msg->ibm_srcstamp = net->ibn_incarnation;
			
 
				+	msg->ibm_dstnid   = dstnid;
			
 
				+	msg->ibm_dststamp = dststamp;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_cksum) {
			
 
				+		/* NB ibm_cksum zero while computing cksum */
			
 
				+		msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_unpack_msg(kib_msg_t *msg, int nob)
			
 
				+{
			
 
				+	const int hdr_size = offsetof(kib_msg_t, ibm_u);
			
 
				+	__u32     msg_cksum;
			
 
				+	__u16     version;
			
 
				+	int       msg_nob;
			
 
				+	int       flip;
			
 
				+
			
 
				+	/* 6 bytes are enough to have received magic + version */
			
 
				+	if (nob < 6) {
			
 
				+		CERROR("Short message: %d\n", nob);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_magic == IBLND_MSG_MAGIC) {
			
 
				+		flip = 0;
			
 
				+	} else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
			
 
				+		flip = 1;
			
 
				+	} else {
			
 
				+		CERROR("Bad magic: %08x\n", msg->ibm_magic);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
			
 
				+	if (version != IBLND_MSG_VERSION &&
			
 
				+	    version != IBLND_MSG_VERSION_1) {
			
 
				+		CERROR("Bad version: %x\n", version);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (nob < hdr_size) {
			
 
				+		CERROR("Short message: %d\n", nob);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
			
 
				+	if (msg_nob > nob) {
			
 
				+		CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	/* checksum must be computed with ibm_cksum zero and BEFORE anything
			
 
				+	 * gets flipped */
			
 
				+	msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
			
 
				+	msg->ibm_cksum = 0;
			
 
				+	if (msg_cksum != 0 &&
			
 
				+	    msg_cksum != kiblnd_cksum(msg, msg_nob)) {
			
 
				+		CERROR("Bad checksum\n");
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	msg->ibm_cksum = msg_cksum;
			
 
				+
			
 
				+	if (flip) {
			
 
				+		/* leave magic unflipped as a clue to peer endianness */
			
 
				+		msg->ibm_version = version;
			
 
				+		CLASSERT (sizeof(msg->ibm_type) == 1);
			
 
				+		CLASSERT (sizeof(msg->ibm_credits) == 1);
			
 
				+		msg->ibm_nob     = msg_nob;
			
 
				+		__swab64s(&msg->ibm_srcnid);
			
 
				+		__swab64s(&msg->ibm_srcstamp);
			
 
				+		__swab64s(&msg->ibm_dstnid);
			
 
				+		__swab64s(&msg->ibm_dststamp);
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_srcnid == LNET_NID_ANY) {
			
 
				+		CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
			
 
				+		CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
			
 
				+		       msg_nob, kiblnd_msgtype2size(msg->ibm_type));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	switch (msg->ibm_type) {
			
 
				+	default:
			
 
				+		CERROR("Unknown message type %x\n", msg->ibm_type);
			
 
				+		return -EPROTO;
			
 
				+
			
 
				+	case IBLND_MSG_NOOP:
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_ACK:
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		if (kiblnd_unpack_rd(msg, flip))
			
 
				+			return -EPROTO;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_NAK:
			
 
				+	case IBLND_MSG_PUT_DONE:
			
 
				+	case IBLND_MSG_GET_DONE:
			
 
				+		if (flip)
			
 
				+			__swab32s(&msg->ibm_u.completion.ibcm_status);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_CONNREQ:
			
 
				+	case IBLND_MSG_CONNACK:
			
 
				+		if (flip) {
			
 
				+			__swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
			
 
				+			__swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
			
 
				+			__swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
			
 
				+{
			
 
				+	kib_peer_t	*peer;
			
 
				+	kib_net_t	*net = ni->ni_data;
			
 
				+	int		cpt = lnet_cpt_of_nid(nid);
			
 
				+	unsigned long   flags;
			
 
				+
			
 
				+	LASSERT(net != NULL);
			
 
				+	LASSERT(nid != LNET_NID_ANY);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
			
 
				+	if (peer == NULL) {
			
 
				+		CERROR("Cannot allocate peer\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(peer, 0, sizeof(*peer));	 /* zero flags etc */
			
 
				+
			
 
				+	peer->ibp_ni = ni;
			
 
				+	peer->ibp_nid = nid;
			
 
				+	peer->ibp_error = 0;
			
 
				+	peer->ibp_last_alive = 0;
			
 
				+	atomic_set(&peer->ibp_refcount, 1);  /* 1 ref for caller */
			
 
				+
			
 
				+	INIT_LIST_HEAD(&peer->ibp_list);     /* not in the peer table yet */
			
 
				+	INIT_LIST_HEAD(&peer->ibp_conns);
			
 
				+	INIT_LIST_HEAD(&peer->ibp_tx_queue);
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	/* always called with a ref on ni, which prevents ni being shutdown */
			
 
				+	LASSERT (net->ibn_shutdown == 0);
			
 
				+
			
 
				+	/* npeers only grows with the global lock held */
			
 
				+	atomic_inc(&net->ibn_npeers);
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	*peerp = peer;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_peer (kib_peer_t *peer)
			
 
				+{
			
 
				+	kib_net_t *net = peer->ibp_ni->ni_data;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+	LASSERT (atomic_read(&peer->ibp_refcount) == 0);
			
 
				+	LASSERT (!kiblnd_peer_active(peer));
			
 
				+	LASSERT (peer->ibp_connecting == 0);
			
 
				+	LASSERT (peer->ibp_accepting == 0);
			
 
				+	LASSERT (list_empty(&peer->ibp_conns));
			
 
				+	LASSERT (list_empty(&peer->ibp_tx_queue));
			
 
				+
			
 
				+	LIBCFS_FREE(peer, sizeof(*peer));
			
 
				+
			
 
				+	/* NB a peer's connections keep a reference on their peer until
			
 
				+	 * they are destroyed, so we can be assured that _all_ state to do
			
 
				+	 * with this peer has been cleaned up when its refcount drops to
			
 
				+	 * zero. */
			
 
				+	atomic_dec(&net->ibn_npeers);
			
 
				+}
			
 
				+
			
 
				+kib_peer_t *
			
 
				+kiblnd_find_peer_locked (lnet_nid_t nid)
			
 
				+{
			
 
				+	/* the caller is responsible for accounting the additional reference
			
 
				+	 * that this creates */
			
 
				+	struct list_head       *peer_list = kiblnd_nid2peerlist(nid);
			
 
				+	struct list_head       *tmp;
			
 
				+	kib_peer_t       *peer;
			
 
				+
			
 
				+	list_for_each (tmp, peer_list) {
			
 
				+
			
 
				+		peer = list_entry(tmp, kib_peer_t, ibp_list);
			
 
				+
			
 
				+		LASSERT (peer->ibp_connecting > 0 || /* creating conns */
			
 
				+			 peer->ibp_accepting > 0 ||
			
 
				+			 !list_empty(&peer->ibp_conns));  /* active conn */
			
 
				+
			
 
				+		if (peer->ibp_nid != nid)
			
 
				+			continue;
			
 
				+
			
 
				+		CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
			
 
				+		       peer, libcfs_nid2str(nid),
			
 
				+		       atomic_read(&peer->ibp_refcount),
			
 
				+		       peer->ibp_version);
			
 
				+		return peer;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_unlink_peer_locked (kib_peer_t *peer)
			
 
				+{
			
 
				+	LASSERT (list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+	LASSERT (kiblnd_peer_active(peer));
			
 
				+	list_del_init(&peer->ibp_list);
			
 
				+	/* lose peerlist's ref */
			
 
				+	kiblnd_peer_decref(peer);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_get_peer_info (lnet_ni_t *ni, int index,
			
 
				+		      lnet_nid_t *nidp, int *count)
			
 
				+{
			
 
				+	kib_peer_t	    *peer;
			
 
				+	struct list_head	    *ptmp;
			
 
				+	int		    i;
			
 
				+	unsigned long	  flags;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
			
 
				+
			
 
				+		list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
			
 
				+
			
 
				+			peer = list_entry(ptmp, kib_peer_t, ibp_list);
			
 
				+			LASSERT (peer->ibp_connecting > 0 ||
			
 
				+				 peer->ibp_accepting > 0 ||
			
 
				+				 !list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+			if (peer->ibp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			if (index-- > 0)
			
 
				+				continue;
			
 
				+
			
 
				+			*nidp = peer->ibp_nid;
			
 
				+			*count = atomic_read(&peer->ibp_refcount);
			
 
				+
			
 
				+			read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				+					       flags);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_del_peer_locked (kib_peer_t *peer)
			
 
				+{
			
 
				+	struct list_head	   *ctmp;
			
 
				+	struct list_head	   *cnxt;
			
 
				+	kib_conn_t	   *conn;
			
 
				+
			
 
				+	if (list_empty(&peer->ibp_conns)) {
			
 
				+		kiblnd_unlink_peer_locked(peer);
			
 
				+	} else {
			
 
				+		list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
			
 
				+			conn = list_entry(ctmp, kib_conn_t, ibc_list);
			
 
				+
			
 
				+			kiblnd_close_conn_locked(conn, 0);
			
 
				+		}
			
 
				+		/* NB closing peer's last conn unlinked it. */
			
 
				+	}
			
 
				+	/* NB peer now unlinked; might even be freed if the peer table had the
			
 
				+	 * last ref on it. */
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid)
			
 
				+{
			
 
				+	LIST_HEAD	 (zombies);
			
 
				+	struct list_head	    *ptmp;
			
 
				+	struct list_head	    *pnxt;
			
 
				+	kib_peer_t	    *peer;
			
 
				+	int		    lo;
			
 
				+	int		    hi;
			
 
				+	int		    i;
			
 
				+	unsigned long	  flags;
			
 
				+	int		    rc = -ENOENT;
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (nid != LNET_NID_ANY) {
			
 
				+		lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				+	} else {
			
 
				+		lo = 0;
			
 
				+		hi = kiblnd_data.kib_peer_hash_size - 1;
			
 
				+	}
			
 
				+
			
 
				+	for (i = lo; i <= hi; i++) {
			
 
				+		list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
			
 
				+			peer = list_entry(ptmp, kib_peer_t, ibp_list);
			
 
				+			LASSERT (peer->ibp_connecting > 0 ||
			
 
				+				 peer->ibp_accepting > 0 ||
			
 
				+				 !list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+			if (peer->ibp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
			
 
				+				continue;
			
 
				+
			
 
				+			if (!list_empty(&peer->ibp_tx_queue)) {
			
 
				+				LASSERT (list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+				list_splice_init(&peer->ibp_tx_queue,
			
 
				+						     &zombies);
			
 
				+			}
			
 
				+
			
 
				+			kiblnd_del_peer_locked(peer);
			
 
				+			rc = 0;	 /* matched something */
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	kiblnd_txlist_done(ni, &zombies, -EIO);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+kib_conn_t *
			
 
				+kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index)
			
 
				+{
			
 
				+	kib_peer_t	    *peer;
			
 
				+	struct list_head	    *ptmp;
			
 
				+	kib_conn_t	    *conn;
			
 
				+	struct list_head	    *ctmp;
			
 
				+	int		    i;
			
 
				+	unsigned long	  flags;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
			
 
				+		list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
			
 
				+
			
 
				+			peer = list_entry(ptmp, kib_peer_t, ibp_list);
			
 
				+			LASSERT (peer->ibp_connecting > 0 ||
			
 
				+				 peer->ibp_accepting > 0 ||
			
 
				+				 !list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+			if (peer->ibp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			list_for_each (ctmp, &peer->ibp_conns) {
			
 
				+				if (index-- > 0)
			
 
				+					continue;
			
 
				+
			
 
				+				conn = list_entry(ctmp, kib_conn_t,
			
 
				+						      ibc_list);
			
 
				+				kiblnd_conn_addref(conn);
			
 
				+				read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				+						       flags);
			
 
				+				return conn;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_debug_rx (kib_rx_t *rx)
			
 
				+{
			
 
				+	CDEBUG(D_CONSOLE, "      %p status %d msg_type %x cred %d\n",
			
 
				+	       rx, rx->rx_status, rx->rx_msg->ibm_type,
			
 
				+	       rx->rx_msg->ibm_credits);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_debug_tx (kib_tx_t *tx)
			
 
				+{
			
 
				+	CDEBUG(D_CONSOLE, "      %p snd %d q %d w %d rc %d dl %lx "
			
 
				+	       "cookie "LPX64" msg %s%s type %x cred %d\n",
			
 
				+	       tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
			
 
				+	       tx->tx_status, tx->tx_deadline, tx->tx_cookie,
			
 
				+	       tx->tx_lntmsg[0] == NULL ? "-" : "!",
			
 
				+	       tx->tx_lntmsg[1] == NULL ? "-" : "!",
			
 
				+	       tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_debug_conn (kib_conn_t *conn)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	int		i;
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s: \n",
			
 
				+	       atomic_read(&conn->ibc_refcount), conn,
			
 
				+	       conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+	CDEBUG(D_CONSOLE, "   state %d nposted %d/%d cred %d o_cred %d r_cred %d\n",
			
 
				+	       conn->ibc_state, conn->ibc_noops_posted,
			
 
				+	       conn->ibc_nsends_posted, conn->ibc_credits,
			
 
				+	       conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
			
 
				+	CDEBUG(D_CONSOLE, "   comms_err %d\n", conn->ibc_comms_error);
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   early_rxs:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_early_rxs)
			
 
				+		kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   tx_noops:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_tx_noops)
			
 
				+		kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   tx_queue_nocred:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_tx_queue_nocred)
			
 
				+		kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   tx_queue_rsrvd:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
			
 
				+		kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   tx_queue:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_tx_queue)
			
 
				+		kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   active_txs:\n");
			
 
				+	list_for_each(tmp, &conn->ibc_active_txs)
			
 
				+		kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
			
 
				+
			
 
				+	CDEBUG(D_CONSOLE, "   rxs:\n");
			
 
				+	for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++)
			
 
				+		kiblnd_debug_rx(&conn->ibc_rxs[i]);
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_translate_mtu(int value)
			
 
				+{
			
 
				+	switch (value) {
			
 
				+	default:
			
 
				+		return -1;
			
 
				+	case 0:
			
 
				+		return 0;
			
 
				+	case 256:
			
 
				+		return IB_MTU_256;
			
 
				+	case 512:
			
 
				+		return IB_MTU_512;
			
 
				+	case 1024:
			
 
				+		return IB_MTU_1024;
			
 
				+	case 2048:
			
 
				+		return IB_MTU_2048;
			
 
				+	case 4096:
			
 
				+		return IB_MTU_4096;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
			
 
				+{
			
 
				+	int	   mtu;
			
 
				+
			
 
				+	/* XXX There is no path record for iWARP, set by netdev->change_mtu? */
			
 
				+	if (cmid->route.path_rec == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
			
 
				+	LASSERT (mtu >= 0);
			
 
				+	if (mtu != 0)
			
 
				+		cmid->route.path_rec->mtu = mtu;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
			
 
				+{
			
 
				+	cpumask_t	*mask;
			
 
				+	int		vectors;
			
 
				+	int		off;
			
 
				+	int		i;
			
 
				+
			
 
				+	vectors = conn->ibc_cmid->device->num_comp_vectors;
			
 
				+	if (vectors <= 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
			
 
				+
			
 
				+	/* hash NID to CPU id in this partition... */
			
 
				+	off = conn->ibc_peer->ibp_nid % cpus_weight(*mask);
			
 
				+	for_each_cpu_mask(i, *mask) {
			
 
				+		if (off-- == 0)
			
 
				+			return i % vectors;
			
 
				+	}
			
 
				+
			
 
				+	LBUG();
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+kib_conn_t *
			
 
				+kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
			
 
				+		   int state, int version)
			
 
				+{
			
 
				+	/* CAVEAT EMPTOR:
			
 
				+	 * If the new conn is created successfully it takes over the caller's
			
 
				+	 * ref on 'peer'.  It also "owns" 'cmid' and destroys it when it itself
			
 
				+	 * is destroyed.  On failure, the caller's ref on 'peer' remains and
			
 
				+	 * she must dispose of 'cmid'.  (Actually I'd block forever if I tried
			
 
				+	 * to destroy 'cmid' here since I'm called from the CM which still has
			
 
				+	 * its ref on 'cmid'). */
			
 
				+	rwlock_t		*glock = &kiblnd_data.kib_global_lock;
			
 
				+	kib_net_t	      *net = peer->ibp_ni->ni_data;
			
 
				+	kib_dev_t	      *dev;
			
 
				+	struct ib_qp_init_attr *init_qp_attr;
			
 
				+	struct kib_sched_info	*sched;
			
 
				+	kib_conn_t		*conn;
			
 
				+	struct ib_cq		*cq;
			
 
				+	unsigned long		flags;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT(net != NULL);
			
 
				+	LASSERT(!in_interrupt());
			
 
				+
			
 
				+	dev = net->ibn_dev;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_nid(peer->ibp_nid);
			
 
				+	sched = kiblnd_data.kib_scheds[cpt];
			
 
				+
			
 
				+	LASSERT(sched->ibs_nthreads > 0);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt,
			
 
				+			 sizeof(*init_qp_attr));
			
 
				+	if (init_qp_attr == NULL) {
			
 
				+		CERROR("Can't allocate qp_attr for %s\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+		goto failed_0;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn));
			
 
				+	if (conn == NULL) {
			
 
				+		CERROR("Can't allocate connection for %s\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+		goto failed_1;
			
 
				+	}
			
 
				+
			
 
				+	conn->ibc_state = IBLND_CONN_INIT;
			
 
				+	conn->ibc_version = version;
			
 
				+	conn->ibc_peer = peer;		  /* I take the caller's ref */
			
 
				+	cmid->context = conn;		   /* for future CM callbacks */
			
 
				+	conn->ibc_cmid = cmid;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&conn->ibc_early_rxs);
			
 
				+	INIT_LIST_HEAD(&conn->ibc_tx_noops);
			
 
				+	INIT_LIST_HEAD(&conn->ibc_tx_queue);
			
 
				+	INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
			
 
				+	INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
			
 
				+	INIT_LIST_HEAD(&conn->ibc_active_txs);
			
 
				+	spin_lock_init(&conn->ibc_lock);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(conn->ibc_connvars, lnet_cpt_table(), cpt,
			
 
				+			 sizeof(*conn->ibc_connvars));
			
 
				+	if (conn->ibc_connvars == NULL) {
			
 
				+		CERROR("Can't allocate in-progress connection state\n");
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	write_lock_irqsave(glock, flags);
			
 
				+	if (dev->ibd_failover) {
			
 
				+		write_unlock_irqrestore(glock, flags);
			
 
				+		CERROR("%s: failover in progress\n", dev->ibd_ifname);
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
			
 
				+		/* wakeup failover thread and teardown connection */
			
 
				+		if (kiblnd_dev_can_failover(dev)) {
			
 
				+			list_add_tail(&dev->ibd_fail_list,
			
 
				+				      &kiblnd_data.kib_failed_devs);
			
 
				+			wake_up(&kiblnd_data.kib_failover_waitq);
			
 
				+		}
			
 
				+
			
 
				+		write_unlock_irqrestore(glock, flags);
			
 
				+		CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
			
 
				+		       cmid->device->name, dev->ibd_ifname);
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_hdev_addref_locked(dev->ibd_hdev);
			
 
				+	conn->ibc_hdev = dev->ibd_hdev;
			
 
				+
			
 
				+	kiblnd_setup_mtu_locked(cmid);
			
 
				+
			
 
				+	write_unlock_irqrestore(glock, flags);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
			
 
				+			 IBLND_RX_MSGS(version) * sizeof(kib_rx_t));
			
 
				+	if (conn->ibc_rxs == NULL) {
			
 
				+		CERROR("Cannot allocate RX buffers\n");
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
			
 
				+				IBLND_RX_MSG_PAGES(version));
			
 
				+	if (rc != 0)
			
 
				+		goto failed_2;
			
 
				+
			
 
				+	kiblnd_map_rx_descs(conn);
			
 
				+
			
 
				+	cq = ib_create_cq(cmid->device,
			
 
				+			  kiblnd_cq_completion, kiblnd_cq_event, conn,
			
 
				+			  IBLND_CQ_ENTRIES(version),
			
 
				+			  kiblnd_get_completion_vector(conn, cpt));
			
 
				+	if (IS_ERR(cq)) {
			
 
				+		CERROR("Can't create CQ: %ld, cqe: %d\n",
			
 
				+		       PTR_ERR(cq), IBLND_CQ_ENTRIES(version));
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	conn->ibc_cq = cq;
			
 
				+
			
 
				+	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't request completion notificiation: %d\n", rc);
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	init_qp_attr->event_handler = kiblnd_qp_event;
			
 
				+	init_qp_attr->qp_context = conn;
			
 
				+	init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version);
			
 
				+	init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version);
			
 
				+	init_qp_attr->cap.max_send_sge = 1;
			
 
				+	init_qp_attr->cap.max_recv_sge = 1;
			
 
				+	init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
			
 
				+	init_qp_attr->qp_type = IB_QPT_RC;
			
 
				+	init_qp_attr->send_cq = cq;
			
 
				+	init_qp_attr->recv_cq = cq;
			
 
				+
			
 
				+	conn->ibc_sched = sched;
			
 
				+
			
 
				+	rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
			
 
				+		       rc, init_qp_attr->cap.max_send_wr,
			
 
				+		       init_qp_attr->cap.max_recv_wr);
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
			
 
				+
			
 
				+	/* 1 ref for caller and each rxmsg */
			
 
				+	atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version));
			
 
				+	conn->ibc_nrx = IBLND_RX_MSGS(version);
			
 
				+
			
 
				+	/* post receives */
			
 
				+	for (i = 0; i < IBLND_RX_MSGS(version); i++) {
			
 
				+		rc = kiblnd_post_rx(&conn->ibc_rxs[i],
			
 
				+				    IBLND_POSTRX_NO_CREDIT);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't post rxmsg: %d\n", rc);
			
 
				+
			
 
				+			/* Make posted receives complete */
			
 
				+			kiblnd_abort_receives(conn);
			
 
				+
			
 
				+			/* correct # of posted buffers
			
 
				+			 * NB locking needed now I'm racing with completion */
			
 
				+			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+			conn->ibc_nrx -= IBLND_RX_MSGS(version) - i;
			
 
				+			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+			/* cmid will be destroyed by CM(ofed) after cm_callback
			
 
				+			 * returned, so we can't refer it anymore
			
 
				+			 * (by kiblnd_connd()->kiblnd_destroy_conn) */
			
 
				+			rdma_destroy_qp(conn->ibc_cmid);
			
 
				+			conn->ibc_cmid = NULL;
			
 
				+
			
 
				+			/* Drop my own and unused rxbuffer refcounts */
			
 
				+			while (i++ <= IBLND_RX_MSGS(version))
			
 
				+				kiblnd_conn_decref(conn);
			
 
				+
			
 
				+			return NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Init successful! */
			
 
				+	LASSERT (state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				+		 state == IBLND_CONN_PASSIVE_WAIT);
			
 
				+	conn->ibc_state = state;
			
 
				+
			
 
				+	/* 1 more conn */
			
 
				+	atomic_inc(&net->ibn_nconns);
			
 
				+	return conn;
			
 
				+
			
 
				+ failed_2:
			
 
				+	kiblnd_destroy_conn(conn);
			
 
				+ failed_1:
			
 
				+	LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
			
 
				+ failed_0:
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_conn (kib_conn_t *conn)
			
 
				+{
			
 
				+	struct rdma_cm_id *cmid = conn->ibc_cmid;
			
 
				+	kib_peer_t	*peer = conn->ibc_peer;
			
 
				+	int		rc;
			
 
				+
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (atomic_read(&conn->ibc_refcount) == 0);
			
 
				+	LASSERT (list_empty(&conn->ibc_early_rxs));
			
 
				+	LASSERT (list_empty(&conn->ibc_tx_noops));
			
 
				+	LASSERT (list_empty(&conn->ibc_tx_queue));
			
 
				+	LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
			
 
				+	LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
			
 
				+	LASSERT (list_empty(&conn->ibc_active_txs));
			
 
				+	LASSERT (conn->ibc_noops_posted == 0);
			
 
				+	LASSERT (conn->ibc_nsends_posted == 0);
			
 
				+
			
 
				+	switch (conn->ibc_state) {
			
 
				+	default:
			
 
				+		/* conn must be completely disengaged from the network */
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_CONN_DISCONNECTED:
			
 
				+		/* connvars should have been freed already */
			
 
				+		LASSERT (conn->ibc_connvars == NULL);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_CONN_INIT:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* conn->ibc_cmid might be destroyed by CM already */
			
 
				+	if (cmid != NULL && cmid->qp != NULL)
			
 
				+		rdma_destroy_qp(cmid);
			
 
				+
			
 
				+	if (conn->ibc_cq != NULL) {
			
 
				+		rc = ib_destroy_cq(conn->ibc_cq);
			
 
				+		if (rc != 0)
			
 
				+			CWARN("Error destroying CQ: %d\n", rc);
			
 
				+	}
			
 
				+
			
 
				+	if (conn->ibc_rx_pages != NULL)
			
 
				+		kiblnd_unmap_rx_descs(conn);
			
 
				+
			
 
				+	if (conn->ibc_rxs != NULL) {
			
 
				+		LIBCFS_FREE(conn->ibc_rxs,
			
 
				+			    IBLND_RX_MSGS(conn->ibc_version) * sizeof(kib_rx_t));
			
 
				+	}
			
 
				+
			
 
				+	if (conn->ibc_connvars != NULL)
			
 
				+		LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
			
 
				+
			
 
				+	if (conn->ibc_hdev != NULL)
			
 
				+		kiblnd_hdev_decref(conn->ibc_hdev);
			
 
				+
			
 
				+	/* See CAVEAT EMPTOR above in kiblnd_create_conn */
			
 
				+	if (conn->ibc_state != IBLND_CONN_INIT) {
			
 
				+		kib_net_t *net = peer->ibp_ni->ni_data;
			
 
				+
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		rdma_destroy_id(cmid);
			
 
				+		atomic_dec(&net->ibn_nconns);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(conn, sizeof(*conn));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why)
			
 
				+{
			
 
				+	kib_conn_t	     *conn;
			
 
				+	struct list_head	     *ctmp;
			
 
				+	struct list_head	     *cnxt;
			
 
				+	int		     count = 0;
			
 
				+
			
 
				+	list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
			
 
				+		conn = list_entry(ctmp, kib_conn_t, ibc_list);
			
 
				+
			
 
				+		CDEBUG(D_NET, "Closing conn -> %s, "
			
 
				+			      "version: %x, reason: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       conn->ibc_version, why);
			
 
				+
			
 
				+		kiblnd_close_conn_locked(conn, why);
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_close_stale_conns_locked (kib_peer_t *peer,
			
 
				+				 int version, __u64 incarnation)
			
 
				+{
			
 
				+	kib_conn_t	     *conn;
			
 
				+	struct list_head	     *ctmp;
			
 
				+	struct list_head	     *cnxt;
			
 
				+	int		     count = 0;
			
 
				+
			
 
				+	list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
			
 
				+		conn = list_entry(ctmp, kib_conn_t, ibc_list);
			
 
				+
			
 
				+		if (conn->ibc_version     == version &&
			
 
				+		    conn->ibc_incarnation == incarnation)
			
 
				+			continue;
			
 
				+
			
 
				+		CDEBUG(D_NET, "Closing stale conn -> %s version: %x, "
			
 
				+			      "incarnation:"LPX64"(%x, "LPX64")\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       conn->ibc_version, conn->ibc_incarnation,
			
 
				+		       version, incarnation);
			
 
				+
			
 
				+		kiblnd_close_conn_locked(conn, -ESTALE);
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid)
			
 
				+{
			
 
				+	kib_peer_t	     *peer;
			
 
				+	struct list_head	     *ptmp;
			
 
				+	struct list_head	     *pnxt;
			
 
				+	int		     lo;
			
 
				+	int		     hi;
			
 
				+	int		     i;
			
 
				+	unsigned long	   flags;
			
 
				+	int		     count = 0;
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (nid != LNET_NID_ANY)
			
 
				+		lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
			
 
				+	else {
			
 
				+		lo = 0;
			
 
				+		hi = kiblnd_data.kib_peer_hash_size - 1;
			
 
				+	}
			
 
				+
			
 
				+	for (i = lo; i <= hi; i++) {
			
 
				+		list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
			
 
				+
			
 
				+			peer = list_entry(ptmp, kib_peer_t, ibp_list);
			
 
				+			LASSERT (peer->ibp_connecting > 0 ||
			
 
				+				 peer->ibp_accepting > 0 ||
			
 
				+				 !list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+			if (peer->ibp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
			
 
				+				continue;
			
 
				+
			
 
				+			count += kiblnd_close_peer_conns_locked(peer, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	/* wildcards always succeed */
			
 
				+	if (nid == LNET_NID_ANY)
			
 
				+		return 0;
			
 
				+
			
 
				+	return (count == 0) ? -ENOENT : 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
			
 
				+{
			
 
				+	struct libcfs_ioctl_data *data = arg;
			
 
				+	int		       rc = -EINVAL;
			
 
				+
			
 
				+	switch(cmd) {
			
 
				+	case IOC_LIBCFS_GET_PEER: {
			
 
				+		lnet_nid_t   nid = 0;
			
 
				+		int	  count = 0;
			
 
				+
			
 
				+		rc = kiblnd_get_peer_info(ni, data->ioc_count,
			
 
				+					  &nid, &count);
			
 
				+		data->ioc_nid    = nid;
			
 
				+		data->ioc_count  = count;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	case IOC_LIBCFS_DEL_PEER: {
			
 
				+		rc = kiblnd_del_peer(ni, data->ioc_nid);
			
 
				+		break;
			
 
				+	}
			
 
				+	case IOC_LIBCFS_GET_CONN: {
			
 
				+		kib_conn_t *conn;
			
 
				+
			
 
				+		rc = 0;
			
 
				+		conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
			
 
				+		if (conn == NULL) {
			
 
				+			rc = -ENOENT;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (conn->ibc_cmid != NULL);
			
 
				+		data->ioc_nid = conn->ibc_peer->ibp_nid;
			
 
				+		if (conn->ibc_cmid->route.path_rec == NULL)
			
 
				+			data->ioc_u32[0] = 0; /* iWarp has no path MTU */
			
 
				+		else
			
 
				+			data->ioc_u32[0] =
			
 
				+			ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+		break;
			
 
				+	}
			
 
				+	case IOC_LIBCFS_CLOSE_CONNECTION: {
			
 
				+		rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
			
 
				+{
			
 
				+	cfs_time_t	last_alive = 0;
			
 
				+	cfs_time_t	now = cfs_time_current();
			
 
				+	rwlock_t	*glock = &kiblnd_data.kib_global_lock;
			
 
				+	kib_peer_t	*peer;
			
 
				+	unsigned long	flags;
			
 
				+
			
 
				+	read_lock_irqsave(glock, flags);
			
 
				+
			
 
				+	peer = kiblnd_find_peer_locked(nid);
			
 
				+	if (peer != NULL) {
			
 
				+		LASSERT (peer->ibp_connecting > 0 || /* creating conns */
			
 
				+			 peer->ibp_accepting > 0 ||
			
 
				+			 !list_empty(&peer->ibp_conns));  /* active conn */
			
 
				+		last_alive = peer->ibp_last_alive;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(glock, flags);
			
 
				+
			
 
				+	if (last_alive != 0)
			
 
				+		*when = last_alive;
			
 
				+
			
 
				+	/* peer is not persistent in hash, trigger peer creation
			
 
				+	 * and connection establishment with a NULL tx */
			
 
				+	if (peer == NULL)
			
 
				+		kiblnd_launch_tx(ni, NULL, nid);
			
 
				+
			
 
				+	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
			
 
				+	       libcfs_nid2str(nid), peer,
			
 
				+	       last_alive ? cfs_duration_sec(now - last_alive) : -1);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_free_pages(kib_pages_t *p)
			
 
				+{
			
 
				+	int	npages = p->ibp_npages;
			
 
				+	int	i;
			
 
				+
			
 
				+	for (i = 0; i < npages; i++) {
			
 
				+		if (p->ibp_pages[i] != NULL)
			
 
				+			__free_page(p->ibp_pages[i]);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(p, offsetof(kib_pages_t, ibp_pages[npages]));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages)
			
 
				+{
			
 
				+	kib_pages_t	*p;
			
 
				+	int		i;
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(p, lnet_cpt_table(), cpt,
			
 
				+			 offsetof(kib_pages_t, ibp_pages[npages]));
			
 
				+	if (p == NULL) {
			
 
				+		CERROR("Can't allocate descriptor for %d pages\n", npages);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
			
 
				+	p->ibp_npages = npages;
			
 
				+
			
 
				+	for (i = 0; i < npages; i++) {
			
 
				+		p->ibp_pages[i] = cfs_page_cpt_alloc(lnet_cpt_table(), cpt,
			
 
				+						     __GFP_IO);
			
 
				+		if (p->ibp_pages[i] == NULL) {
			
 
				+			CERROR("Can't allocate page %d of %d\n", i, npages);
			
 
				+			kiblnd_free_pages(p);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	*pp = p;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_unmap_rx_descs(kib_conn_t *conn)
			
 
				+{
			
 
				+	kib_rx_t *rx;
			
 
				+	int       i;
			
 
				+
			
 
				+	LASSERT (conn->ibc_rxs != NULL);
			
 
				+	LASSERT (conn->ibc_hdev != NULL);
			
 
				+
			
 
				+	for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) {
			
 
				+		rx = &conn->ibc_rxs[i];
			
 
				+
			
 
				+		LASSERT (rx->rx_nob >= 0); /* not posted */
			
 
				+
			
 
				+		kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
			
 
				+					KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
			
 
				+							  rx->rx_msgaddr),
			
 
				+					IBLND_MSG_SIZE, DMA_FROM_DEVICE);
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_free_pages(conn->ibc_rx_pages);
			
 
				+
			
 
				+	conn->ibc_rx_pages = NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_map_rx_descs(kib_conn_t *conn)
			
 
				+{
			
 
				+	kib_rx_t       *rx;
			
 
				+	struct page    *pg;
			
 
				+	int	     pg_off;
			
 
				+	int	     ipg;
			
 
				+	int	     i;
			
 
				+
			
 
				+	for (pg_off = ipg = i = 0;
			
 
				+	     i < IBLND_RX_MSGS(conn->ibc_version); i++) {
			
 
				+		pg = conn->ibc_rx_pages->ibp_pages[ipg];
			
 
				+		rx = &conn->ibc_rxs[i];
			
 
				+
			
 
				+		rx->rx_conn = conn;
			
 
				+		rx->rx_msg = (kib_msg_t *)(((char *)page_address(pg)) + pg_off);
			
 
				+
			
 
				+		rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
			
 
				+						       rx->rx_msg, IBLND_MSG_SIZE,
			
 
				+						       DMA_FROM_DEVICE);
			
 
				+		LASSERT (!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
			
 
				+						   rx->rx_msgaddr));
			
 
				+		KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
			
 
				+
			
 
				+		CDEBUG(D_NET,"rx %d: %p "LPX64"("LPX64")\n",
			
 
				+		       i, rx->rx_msg, rx->rx_msgaddr,
			
 
				+		       lnet_page2phys(pg) + pg_off);
			
 
				+
			
 
				+		pg_off += IBLND_MSG_SIZE;
			
 
				+		LASSERT (pg_off <= PAGE_SIZE);
			
 
				+
			
 
				+		if (pg_off == PAGE_SIZE) {
			
 
				+			pg_off = 0;
			
 
				+			ipg++;
			
 
				+			LASSERT (ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version));
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo)
			
 
				+{
			
 
				+	kib_hca_dev_t  *hdev = tpo->tpo_hdev;
			
 
				+	kib_tx_t       *tx;
			
 
				+	int	     i;
			
 
				+
			
 
				+	LASSERT (tpo->tpo_pool.po_allocated == 0);
			
 
				+
			
 
				+	if (hdev == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < tpo->tpo_pool.po_size; i++) {
			
 
				+		tx = &tpo->tpo_tx_descs[i];
			
 
				+		kiblnd_dma_unmap_single(hdev->ibh_ibdev,
			
 
				+					KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
			
 
				+							  tx->tx_msgaddr),
			
 
				+					IBLND_MSG_SIZE, DMA_TO_DEVICE);
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_hdev_decref(hdev);
			
 
				+	tpo->tpo_hdev = NULL;
			
 
				+}
			
 
				+
			
 
				+static kib_hca_dev_t *
			
 
				+kiblnd_current_hdev(kib_dev_t *dev)
			
 
				+{
			
 
				+	kib_hca_dev_t *hdev;
			
 
				+	unsigned long  flags;
			
 
				+	int	    i = 0;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	while (dev->ibd_failover) {
			
 
				+		read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+		if (i++ % 50 == 0)
			
 
				+			CDEBUG(D_NET, "%s: Wait for failover\n",
			
 
				+			       dev->ibd_ifname);
			
 
				+		schedule_timeout(cfs_time_seconds(1) / 100);
			
 
				+
			
 
				+		read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_hdev_addref_locked(dev->ibd_hdev);
			
 
				+	hdev = dev->ibd_hdev;
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	return hdev;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
			
 
				+{
			
 
				+	kib_pages_t    *txpgs = tpo->tpo_tx_pages;
			
 
				+	kib_pool_t     *pool  = &tpo->tpo_pool;
			
 
				+	kib_net_t      *net   = pool->po_owner->ps_net;
			
 
				+	kib_dev_t      *dev;
			
 
				+	struct page    *page;
			
 
				+	kib_tx_t       *tx;
			
 
				+	int	     page_offset;
			
 
				+	int	     ipage;
			
 
				+	int	     i;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+
			
 
				+	dev = net->ibn_dev;
			
 
				+
			
 
				+	/* pre-mapped messages are not bigger than 1 page */
			
 
				+	CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE);
			
 
				+
			
 
				+	/* No fancy arithmetic when we do the buffer calculations */
			
 
				+	CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0);
			
 
				+
			
 
				+	tpo->tpo_hdev = kiblnd_current_hdev(dev);
			
 
				+
			
 
				+	for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
			
 
				+		page = txpgs->ibp_pages[ipage];
			
 
				+		tx = &tpo->tpo_tx_descs[i];
			
 
				+
			
 
				+		tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
			
 
				+					   page_offset);
			
 
				+
			
 
				+		tx->tx_msgaddr = kiblnd_dma_map_single(
			
 
				+			tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
			
 
				+			IBLND_MSG_SIZE, DMA_TO_DEVICE);
			
 
				+		LASSERT (!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
			
 
				+						   tx->tx_msgaddr));
			
 
				+		KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
			
 
				+
			
 
				+		list_add(&tx->tx_list, &pool->po_free_list);
			
 
				+
			
 
				+		page_offset += IBLND_MSG_SIZE;
			
 
				+		LASSERT (page_offset <= PAGE_SIZE);
			
 
				+
			
 
				+		if (page_offset == PAGE_SIZE) {
			
 
				+			page_offset = 0;
			
 
				+			ipage++;
			
 
				+			LASSERT (ipage <= txpgs->ibp_npages);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct ib_mr *
			
 
				+kiblnd_find_dma_mr(kib_hca_dev_t *hdev, __u64 addr, __u64 size)
			
 
				+{
			
 
				+	__u64   index;
			
 
				+
			
 
				+	LASSERT (hdev->ibh_mrs[0] != NULL);
			
 
				+
			
 
				+	if (hdev->ibh_nmrs == 1)
			
 
				+		return hdev->ibh_mrs[0];
			
 
				+
			
 
				+	index = addr >> hdev->ibh_mr_shift;
			
 
				+
			
 
				+	if (index <  hdev->ibh_nmrs &&
			
 
				+	    index == ((addr + size - 1) >> hdev->ibh_mr_shift))
			
 
				+		return hdev->ibh_mrs[index];
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+struct ib_mr *
			
 
				+kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd)
			
 
				+{
			
 
				+	struct ib_mr *prev_mr;
			
 
				+	struct ib_mr *mr;
			
 
				+	int	   i;
			
 
				+
			
 
				+	LASSERT (hdev->ibh_mrs[0] != NULL);
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_map_on_demand > 0 &&
			
 
				+	    *kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags)
			
 
				+		return NULL;
			
 
				+
			
 
				+	if (hdev->ibh_nmrs == 1)
			
 
				+		return hdev->ibh_mrs[0];
			
 
				+
			
 
				+	for (i = 0, mr = prev_mr = NULL;
			
 
				+	     i < rd->rd_nfrags; i++) {
			
 
				+		mr = kiblnd_find_dma_mr(hdev,
			
 
				+					rd->rd_frags[i].rf_addr,
			
 
				+					rd->rd_frags[i].rf_nob);
			
 
				+		if (prev_mr == NULL)
			
 
				+			prev_mr = mr;
			
 
				+
			
 
				+		if (mr == NULL || prev_mr != mr) {
			
 
				+			/* Can't covered by one single MR */
			
 
				+			mr = NULL;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return mr;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool)
			
 
				+{
			
 
				+	LASSERT (pool->fpo_map_count == 0);
			
 
				+
			
 
				+	if (pool->fpo_fmr_pool != NULL)
			
 
				+		ib_destroy_fmr_pool(pool->fpo_fmr_pool);
			
 
				+
			
 
				+	if (pool->fpo_hdev != NULL)
			
 
				+		kiblnd_hdev_decref(pool->fpo_hdev);
			
 
				+
			
 
				+	LIBCFS_FREE(pool, sizeof(kib_fmr_pool_t));
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_fmr_pool_list(struct list_head *head)
			
 
				+{
			
 
				+	kib_fmr_pool_t *pool;
			
 
				+
			
 
				+	while (!list_empty(head)) {
			
 
				+		pool = list_entry(head->next, kib_fmr_pool_t, fpo_list);
			
 
				+		list_del(&pool->fpo_list);
			
 
				+		kiblnd_destroy_fmr_pool(pool);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int kiblnd_fmr_pool_size(int ncpts)
			
 
				+{
			
 
				+	int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
			
 
				+
			
 
				+	return max(IBLND_FMR_POOL, size);
			
 
				+}
			
 
				+
			
 
				+static int kiblnd_fmr_flush_trigger(int ncpts)
			
 
				+{
			
 
				+	int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
			
 
				+
			
 
				+	return max(IBLND_FMR_POOL_FLUSH, size);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo)
			
 
				+{
			
 
				+	/* FMR pool for RDMA */
			
 
				+	kib_dev_t	       *dev = fps->fps_net->ibn_dev;
			
 
				+	kib_fmr_pool_t	  *fpo;
			
 
				+	struct ib_fmr_pool_param param = {
			
 
				+		.max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
			
 
				+		.page_shift	= PAGE_SHIFT,
			
 
				+		.access	    = (IB_ACCESS_LOCAL_WRITE |
			
 
				+				      IB_ACCESS_REMOTE_WRITE),
			
 
				+		.pool_size	   = fps->fps_pool_size,
			
 
				+		.dirty_watermark   = fps->fps_flush_trigger,
			
 
				+		.flush_function    = NULL,
			
 
				+		.flush_arg	 = NULL,
			
 
				+		.cache	     = !!*kiblnd_tunables.kib_fmr_cache};
			
 
				+	int rc;
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
			
 
				+	if (fpo == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	fpo->fpo_hdev = kiblnd_current_hdev(dev);
			
 
				+
			
 
				+	fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, &param);
			
 
				+	if (IS_ERR(fpo->fpo_fmr_pool)) {
			
 
				+		rc = PTR_ERR(fpo->fpo_fmr_pool);
			
 
				+		CERROR("Failed to create FMR pool: %d\n", rc);
			
 
				+
			
 
				+		kiblnd_hdev_decref(fpo->fpo_hdev);
			
 
				+		LIBCFS_FREE(fpo, sizeof(kib_fmr_pool_t));
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
			
 
				+	fpo->fpo_owner    = fps;
			
 
				+	*pp_fpo = fpo;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, struct list_head *zombies)
			
 
				+{
			
 
				+	if (fps->fps_net == NULL) /* intialized? */
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&fps->fps_lock);
			
 
				+
			
 
				+	while (!list_empty(&fps->fps_pool_list)) {
			
 
				+		kib_fmr_pool_t *fpo = list_entry(fps->fps_pool_list.next,
			
 
				+						 kib_fmr_pool_t, fpo_list);
			
 
				+		fpo->fpo_failed = 1;
			
 
				+		list_del(&fpo->fpo_list);
			
 
				+		if (fpo->fpo_map_count == 0)
			
 
				+			list_add(&fpo->fpo_list, zombies);
			
 
				+		else
			
 
				+			list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&fps->fps_lock);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
			
 
				+{
			
 
				+	if (fps->fps_net != NULL) { /* initialized? */
			
 
				+		kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
			
 
				+		kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net,
			
 
				+			int pool_size, int flush_trigger)
			
 
				+{
			
 
				+	kib_fmr_pool_t *fpo;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	memset(fps, 0, sizeof(kib_fmr_poolset_t));
			
 
				+
			
 
				+	fps->fps_net = net;
			
 
				+	fps->fps_cpt = cpt;
			
 
				+	fps->fps_pool_size = pool_size;
			
 
				+	fps->fps_flush_trigger = flush_trigger;
			
 
				+	spin_lock_init(&fps->fps_lock);
			
 
				+	INIT_LIST_HEAD(&fps->fps_pool_list);
			
 
				+	INIT_LIST_HEAD(&fps->fps_failed_pool_list);
			
 
				+
			
 
				+	rc = kiblnd_create_fmr_pool(fps, &fpo);
			
 
				+	if (rc == 0)
			
 
				+		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, cfs_time_t now)
			
 
				+{
			
 
				+	if (fpo->fpo_map_count != 0) /* still in use */
			
 
				+		return 0;
			
 
				+	if (fpo->fpo_failed)
			
 
				+		return 1;
			
 
				+	return cfs_time_aftereq(now, fpo->fpo_deadline);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
			
 
				+{
			
 
				+	LIST_HEAD     (zombies);
			
 
				+	kib_fmr_pool_t    *fpo = fmr->fmr_pool;
			
 
				+	kib_fmr_poolset_t *fps = fpo->fpo_owner;
			
 
				+	cfs_time_t	 now = cfs_time_current();
			
 
				+	kib_fmr_pool_t    *tmp;
			
 
				+	int		rc;
			
 
				+
			
 
				+	rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	if (status != 0) {
			
 
				+		rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool);
			
 
				+		LASSERT (rc == 0);
			
 
				+	}
			
 
				+
			
 
				+	fmr->fmr_pool = NULL;
			
 
				+	fmr->fmr_pfmr = NULL;
			
 
				+
			
 
				+	spin_lock(&fps->fps_lock);
			
 
				+	fpo->fpo_map_count --;  /* decref the pool */
			
 
				+
			
 
				+	list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
			
 
				+		/* the first pool is persistent */
			
 
				+		if (fps->fps_pool_list.next == &fpo->fpo_list)
			
 
				+			continue;
			
 
				+
			
 
				+		if (kiblnd_fmr_pool_is_idle(fpo, now)) {
			
 
				+			list_move(&fpo->fpo_list, &zombies);
			
 
				+			fps->fps_version ++;
			
 
				+		}
			
 
				+	}
			
 
				+	spin_unlock(&fps->fps_lock);
			
 
				+
			
 
				+	if (!list_empty(&zombies))
			
 
				+		kiblnd_destroy_fmr_pool_list(&zombies);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
			
 
				+		    __u64 iov, kib_fmr_t *fmr)
			
 
				+{
			
 
				+	struct ib_pool_fmr *pfmr;
			
 
				+	kib_fmr_pool_t     *fpo;
			
 
				+	__u64	       version;
			
 
				+	int		 rc;
			
 
				+
			
 
				+ again:
			
 
				+	spin_lock(&fps->fps_lock);
			
 
				+	version = fps->fps_version;
			
 
				+	list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
			
 
				+		fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
			
 
				+		fpo->fpo_map_count++;
			
 
				+		spin_unlock(&fps->fps_lock);
			
 
				+
			
 
				+		pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool,
			
 
				+					    pages, npages, iov);
			
 
				+		if (likely(!IS_ERR(pfmr))) {
			
 
				+			fmr->fmr_pool = fpo;
			
 
				+			fmr->fmr_pfmr = pfmr;
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		spin_lock(&fps->fps_lock);
			
 
				+		fpo->fpo_map_count--;
			
 
				+		if (PTR_ERR(pfmr) != -EAGAIN) {
			
 
				+			spin_unlock(&fps->fps_lock);
			
 
				+			return PTR_ERR(pfmr);
			
 
				+		}
			
 
				+
			
 
				+		/* EAGAIN and ... */
			
 
				+		if (version != fps->fps_version) {
			
 
				+			spin_unlock(&fps->fps_lock);
			
 
				+			goto again;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (fps->fps_increasing) {
			
 
				+		spin_unlock(&fps->fps_lock);
			
 
				+		CDEBUG(D_NET, "Another thread is allocating new "
			
 
				+		       "FMR pool, waiting for her to complete\n");
			
 
				+		schedule();
			
 
				+		goto again;
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	if (cfs_time_before(cfs_time_current(), fps->fps_next_retry)) {
			
 
				+		/* someone failed recently */
			
 
				+		spin_unlock(&fps->fps_lock);
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	fps->fps_increasing = 1;
			
 
				+	spin_unlock(&fps->fps_lock);
			
 
				+
			
 
				+	CDEBUG(D_NET, "Allocate new FMR pool\n");
			
 
				+	rc = kiblnd_create_fmr_pool(fps, &fpo);
			
 
				+	spin_lock(&fps->fps_lock);
			
 
				+	fps->fps_increasing = 0;
			
 
				+	if (rc == 0) {
			
 
				+		fps->fps_version++;
			
 
				+		list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
			
 
				+	} else {
			
 
				+		fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
			
 
				+	}
			
 
				+	spin_unlock(&fps->fps_lock);
			
 
				+
			
 
				+	goto again;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_fini_pool(kib_pool_t *pool)
			
 
				+{
			
 
				+	LASSERT (list_empty(&pool->po_free_list));
			
 
				+	LASSERT (pool->po_allocated == 0);
			
 
				+
			
 
				+	CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size)
			
 
				+{
			
 
				+	CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
			
 
				+
			
 
				+	memset(pool, 0, sizeof(kib_pool_t));
			
 
				+	INIT_LIST_HEAD(&pool->po_free_list);
			
 
				+	pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
			
 
				+	pool->po_owner    = ps;
			
 
				+	pool->po_size     = size;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_pool_list(struct list_head *head)
			
 
				+{
			
 
				+	kib_pool_t *pool;
			
 
				+
			
 
				+	while (!list_empty(head)) {
			
 
				+		pool = list_entry(head->next, kib_pool_t, po_list);
			
 
				+		list_del(&pool->po_list);
			
 
				+
			
 
				+		LASSERT (pool->po_owner != NULL);
			
 
				+		pool->po_owner->ps_pool_destroy(pool);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies)
			
 
				+{
			
 
				+	if (ps->ps_net == NULL) /* intialized? */
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&ps->ps_lock);
			
 
				+	while (!list_empty(&ps->ps_pool_list)) {
			
 
				+		kib_pool_t *po = list_entry(ps->ps_pool_list.next,
			
 
				+					    kib_pool_t, po_list);
			
 
				+		po->po_failed = 1;
			
 
				+		list_del(&po->po_list);
			
 
				+		if (po->po_allocated == 0)
			
 
				+			list_add(&po->po_list, zombies);
			
 
				+		else
			
 
				+			list_add(&po->po_list, &ps->ps_failed_pool_list);
			
 
				+	}
			
 
				+	spin_unlock(&ps->ps_lock);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_fini_poolset(kib_poolset_t *ps)
			
 
				+{
			
 
				+	if (ps->ps_net != NULL) { /* initialized? */
			
 
				+		kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
			
 
				+		kiblnd_destroy_pool_list(&ps->ps_pool_list);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_init_poolset(kib_poolset_t *ps, int cpt,
			
 
				+		    kib_net_t *net, char *name, int size,
			
 
				+		    kib_ps_pool_create_t po_create,
			
 
				+		    kib_ps_pool_destroy_t po_destroy,
			
 
				+		    kib_ps_node_init_t nd_init,
			
 
				+		    kib_ps_node_fini_t nd_fini)
			
 
				+{
			
 
				+	kib_pool_t	*pool;
			
 
				+	int		rc;
			
 
				+
			
 
				+	memset(ps, 0, sizeof(kib_poolset_t));
			
 
				+
			
 
				+	ps->ps_cpt	    = cpt;
			
 
				+	ps->ps_net	  = net;
			
 
				+	ps->ps_pool_create  = po_create;
			
 
				+	ps->ps_pool_destroy = po_destroy;
			
 
				+	ps->ps_node_init    = nd_init;
			
 
				+	ps->ps_node_fini    = nd_fini;
			
 
				+	ps->ps_pool_size    = size;
			
 
				+	if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
			
 
				+	    >= sizeof(ps->ps_name))
			
 
				+		return -E2BIG;
			
 
				+	spin_lock_init(&ps->ps_lock);
			
 
				+	INIT_LIST_HEAD(&ps->ps_pool_list);
			
 
				+	INIT_LIST_HEAD(&ps->ps_failed_pool_list);
			
 
				+
			
 
				+	rc = ps->ps_pool_create(ps, size, &pool);
			
 
				+	if (rc == 0)
			
 
				+		list_add(&pool->po_list, &ps->ps_pool_list);
			
 
				+	else
			
 
				+		CERROR("Failed to create the first pool for %s\n", ps->ps_name);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_pool_is_idle(kib_pool_t *pool, cfs_time_t now)
			
 
				+{
			
 
				+	if (pool->po_allocated != 0) /* still in use */
			
 
				+		return 0;
			
 
				+	if (pool->po_failed)
			
 
				+		return 1;
			
 
				+	return cfs_time_aftereq(now, pool->po_deadline);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node)
			
 
				+{
			
 
				+	LIST_HEAD  (zombies);
			
 
				+	kib_poolset_t  *ps = pool->po_owner;
			
 
				+	kib_pool_t     *tmp;
			
 
				+	cfs_time_t      now = cfs_time_current();
			
 
				+
			
 
				+	spin_lock(&ps->ps_lock);
			
 
				+
			
 
				+	if (ps->ps_node_fini != NULL)
			
 
				+		ps->ps_node_fini(pool, node);
			
 
				+
			
 
				+	LASSERT (pool->po_allocated > 0);
			
 
				+	list_add(node, &pool->po_free_list);
			
 
				+	pool->po_allocated --;
			
 
				+
			
 
				+	list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
			
 
				+		/* the first pool is persistent */
			
 
				+		if (ps->ps_pool_list.next == &pool->po_list)
			
 
				+			continue;
			
 
				+
			
 
				+		if (kiblnd_pool_is_idle(pool, now))
			
 
				+			list_move(&pool->po_list, &zombies);
			
 
				+	}
			
 
				+	spin_unlock(&ps->ps_lock);
			
 
				+
			
 
				+	if (!list_empty(&zombies))
			
 
				+		kiblnd_destroy_pool_list(&zombies);
			
 
				+}
			
 
				+
			
 
				+struct list_head *
			
 
				+kiblnd_pool_alloc_node(kib_poolset_t *ps)
			
 
				+{
			
 
				+	struct list_head	    *node;
			
 
				+	kib_pool_t	    *pool;
			
 
				+	int		    rc;
			
 
				+
			
 
				+ again:
			
 
				+	spin_lock(&ps->ps_lock);
			
 
				+	list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
			
 
				+		if (list_empty(&pool->po_free_list))
			
 
				+			continue;
			
 
				+
			
 
				+		pool->po_allocated ++;
			
 
				+		pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
			
 
				+		node = pool->po_free_list.next;
			
 
				+		list_del(node);
			
 
				+
			
 
				+		if (ps->ps_node_init != NULL) {
			
 
				+			/* still hold the lock */
			
 
				+			ps->ps_node_init(pool, node);
			
 
				+		}
			
 
				+		spin_unlock(&ps->ps_lock);
			
 
				+		return node;
			
 
				+	}
			
 
				+
			
 
				+	/* no available tx pool and ... */
			
 
				+	if (ps->ps_increasing) {
			
 
				+		/* another thread is allocating a new pool */
			
 
				+		spin_unlock(&ps->ps_lock);
			
 
				+		CDEBUG(D_NET, "Another thread is allocating new "
			
 
				+		       "%s pool, waiting for her to complete\n",
			
 
				+		       ps->ps_name);
			
 
				+		schedule();
			
 
				+		goto again;
			
 
				+	}
			
 
				+
			
 
				+	if (cfs_time_before(cfs_time_current(), ps->ps_next_retry)) {
			
 
				+		/* someone failed recently */
			
 
				+		spin_unlock(&ps->ps_lock);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	ps->ps_increasing = 1;
			
 
				+	spin_unlock(&ps->ps_lock);
			
 
				+
			
 
				+	CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
			
 
				+
			
 
				+	rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
			
 
				+
			
 
				+	spin_lock(&ps->ps_lock);
			
 
				+	ps->ps_increasing = 0;
			
 
				+	if (rc == 0) {
			
 
				+		list_add_tail(&pool->po_list, &ps->ps_pool_list);
			
 
				+	} else {
			
 
				+		ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
			
 
				+		CERROR("Can't allocate new %s pool because out of memory\n",
			
 
				+		       ps->ps_name);
			
 
				+	}
			
 
				+	spin_unlock(&ps->ps_lock);
			
 
				+
			
 
				+	goto again;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr)
			
 
				+{
			
 
				+	kib_pmr_pool_t      *ppo = pmr->pmr_pool;
			
 
				+	struct ib_mr	*mr  = pmr->pmr_mr;
			
 
				+
			
 
				+	pmr->pmr_mr = NULL;
			
 
				+	kiblnd_pool_free_node(&ppo->ppo_pool, &pmr->pmr_list);
			
 
				+	if (mr != NULL)
			
 
				+		ib_dereg_mr(mr);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
			
 
				+		    kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr)
			
 
				+{
			
 
				+	kib_phys_mr_t *pmr;
			
 
				+	struct list_head    *node;
			
 
				+	int	    rc;
			
 
				+	int	    i;
			
 
				+
			
 
				+	node = kiblnd_pool_alloc_node(&pps->pps_poolset);
			
 
				+	if (node == NULL) {
			
 
				+		CERROR("Failed to allocate PMR descriptor\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	pmr = container_of(node, kib_phys_mr_t, pmr_list);
			
 
				+	if (pmr->pmr_pool->ppo_hdev != hdev) {
			
 
				+		kiblnd_pool_free_node(&pmr->pmr_pool->ppo_pool, node);
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < rd->rd_nfrags; i ++) {
			
 
				+		pmr->pmr_ipb[i].addr = rd->rd_frags[i].rf_addr;
			
 
				+		pmr->pmr_ipb[i].size = rd->rd_frags[i].rf_nob;
			
 
				+	}
			
 
				+
			
 
				+	pmr->pmr_mr = ib_reg_phys_mr(hdev->ibh_pd,
			
 
				+				     pmr->pmr_ipb, rd->rd_nfrags,
			
 
				+				     IB_ACCESS_LOCAL_WRITE |
			
 
				+				     IB_ACCESS_REMOTE_WRITE,
			
 
				+				     iova);
			
 
				+	if (!IS_ERR(pmr->pmr_mr)) {
			
 
				+		pmr->pmr_iova = *iova;
			
 
				+		*pp_pmr = pmr;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	rc = PTR_ERR(pmr->pmr_mr);
			
 
				+	CERROR("Failed ib_reg_phys_mr: %d\n", rc);
			
 
				+
			
 
				+	pmr->pmr_mr = NULL;
			
 
				+	kiblnd_pool_free_node(&pmr->pmr_pool->ppo_pool, node);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_destroy_pmr_pool(kib_pool_t *pool)
			
 
				+{
			
 
				+	kib_pmr_pool_t *ppo = container_of(pool, kib_pmr_pool_t, ppo_pool);
			
 
				+	kib_phys_mr_t  *pmr;
			
 
				+
			
 
				+	LASSERT (pool->po_allocated == 0);
			
 
				+
			
 
				+	while (!list_empty(&pool->po_free_list)) {
			
 
				+		pmr = list_entry(pool->po_free_list.next,
			
 
				+				     kib_phys_mr_t, pmr_list);
			
 
				+
			
 
				+		LASSERT (pmr->pmr_mr == NULL);
			
 
				+		list_del(&pmr->pmr_list);
			
 
				+
			
 
				+		if (pmr->pmr_ipb != NULL) {
			
 
				+			LIBCFS_FREE(pmr->pmr_ipb,
			
 
				+				    IBLND_MAX_RDMA_FRAGS *
			
 
				+				    sizeof(struct ib_phys_buf));
			
 
				+		}
			
 
				+
			
 
				+		LIBCFS_FREE(pmr, sizeof(kib_phys_mr_t));
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_fini_pool(pool);
			
 
				+	if (ppo->ppo_hdev != NULL)
			
 
				+		kiblnd_hdev_decref(ppo->ppo_hdev);
			
 
				+
			
 
				+	LIBCFS_FREE(ppo, sizeof(kib_pmr_pool_t));
			
 
				+}
			
 
				+
			
 
				+static inline int kiblnd_pmr_pool_size(int ncpts)
			
 
				+{
			
 
				+	int size = *kiblnd_tunables.kib_pmr_pool_size / ncpts;
			
 
				+
			
 
				+	return max(IBLND_PMR_POOL, size);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_create_pmr_pool(kib_poolset_t *ps, int size, kib_pool_t **pp_po)
			
 
				+{
			
 
				+	struct kib_pmr_pool	*ppo;
			
 
				+	struct kib_pool		*pool;
			
 
				+	kib_phys_mr_t		*pmr;
			
 
				+	int			i;
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(ppo, lnet_cpt_table(),
			
 
				+			 ps->ps_cpt, sizeof(kib_pmr_pool_t));
			
 
				+	if (ppo == NULL) {
			
 
				+		CERROR("Failed to allocate PMR pool\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	pool = &ppo->ppo_pool;
			
 
				+	kiblnd_init_pool(ps, pool, size);
			
 
				+
			
 
				+	for (i = 0; i < size; i++) {
			
 
				+		LIBCFS_CPT_ALLOC(pmr, lnet_cpt_table(),
			
 
				+				 ps->ps_cpt, sizeof(kib_phys_mr_t));
			
 
				+		if (pmr == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		pmr->pmr_pool = ppo;
			
 
				+		LIBCFS_CPT_ALLOC(pmr->pmr_ipb, lnet_cpt_table(), ps->ps_cpt,
			
 
				+				 IBLND_MAX_RDMA_FRAGS * sizeof(*pmr->pmr_ipb));
			
 
				+		if (pmr->pmr_ipb == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		list_add(&pmr->pmr_list, &pool->po_free_list);
			
 
				+	}
			
 
				+
			
 
				+	if (i < size) {
			
 
				+		ps->ps_pool_destroy(pool);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	ppo->ppo_hdev = kiblnd_current_hdev(ps->ps_net->ibn_dev);
			
 
				+	*pp_po = pool;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_destroy_tx_pool(kib_pool_t *pool)
			
 
				+{
			
 
				+	kib_tx_pool_t  *tpo = container_of(pool, kib_tx_pool_t, tpo_pool);
			
 
				+	int	     i;
			
 
				+
			
 
				+	LASSERT (pool->po_allocated == 0);
			
 
				+
			
 
				+	if (tpo->tpo_tx_pages != NULL) {
			
 
				+		kiblnd_unmap_tx_pool(tpo);
			
 
				+		kiblnd_free_pages(tpo->tpo_tx_pages);
			
 
				+	}
			
 
				+
			
 
				+	if (tpo->tpo_tx_descs == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	for (i = 0; i < pool->po_size; i++) {
			
 
				+		kib_tx_t *tx = &tpo->tpo_tx_descs[i];
			
 
				+
			
 
				+		list_del(&tx->tx_list);
			
 
				+		if (tx->tx_pages != NULL)
			
 
				+			LIBCFS_FREE(tx->tx_pages,
			
 
				+				    LNET_MAX_IOV *
			
 
				+				    sizeof(*tx->tx_pages));
			
 
				+		if (tx->tx_frags != NULL)
			
 
				+			LIBCFS_FREE(tx->tx_frags,
			
 
				+				    IBLND_MAX_RDMA_FRAGS *
			
 
				+					    sizeof(*tx->tx_frags));
			
 
				+		if (tx->tx_wrq != NULL)
			
 
				+			LIBCFS_FREE(tx->tx_wrq,
			
 
				+				    (1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				+				    sizeof(*tx->tx_wrq));
			
 
				+		if (tx->tx_sge != NULL)
			
 
				+			LIBCFS_FREE(tx->tx_sge,
			
 
				+				    (1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				+				    sizeof(*tx->tx_sge));
			
 
				+		if (tx->tx_rd != NULL)
			
 
				+			LIBCFS_FREE(tx->tx_rd,
			
 
				+				    offsetof(kib_rdma_desc_t,
			
 
				+					     rd_frags[IBLND_MAX_RDMA_FRAGS]));
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tpo->tpo_tx_descs,
			
 
				+		    pool->po_size * sizeof(kib_tx_t));
			
 
				+out:
			
 
				+	kiblnd_fini_pool(pool);
			
 
				+	LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t));
			
 
				+}
			
 
				+
			
 
				+static int kiblnd_tx_pool_size(int ncpts)
			
 
				+{
			
 
				+	int ntx = *kiblnd_tunables.kib_ntx / ncpts;
			
 
				+
			
 
				+	return max(IBLND_TX_POOL, ntx);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_create_tx_pool(kib_poolset_t *ps, int size, kib_pool_t **pp_po)
			
 
				+{
			
 
				+	int	    i;
			
 
				+	int	    npg;
			
 
				+	kib_pool_t    *pool;
			
 
				+	kib_tx_pool_t *tpo;
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(tpo, lnet_cpt_table(), ps->ps_cpt, sizeof(*tpo));
			
 
				+	if (tpo == NULL) {
			
 
				+		CERROR("Failed to allocate TX pool\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	pool = &tpo->tpo_pool;
			
 
				+	kiblnd_init_pool(ps, pool, size);
			
 
				+	tpo->tpo_tx_descs = NULL;
			
 
				+	tpo->tpo_tx_pages = NULL;
			
 
				+
			
 
				+	npg = (size * IBLND_MSG_SIZE + PAGE_SIZE - 1) / PAGE_SIZE;
			
 
				+	if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg) != 0) {
			
 
				+		CERROR("Can't allocate tx pages: %d\n", npg);
			
 
				+		LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(tpo->tpo_tx_descs, lnet_cpt_table(), ps->ps_cpt,
			
 
				+			 size * sizeof(kib_tx_t));
			
 
				+	if (tpo->tpo_tx_descs == NULL) {
			
 
				+		CERROR("Can't allocate %d tx descriptors\n", size);
			
 
				+		ps->ps_pool_destroy(pool);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(tpo->tpo_tx_descs, 0, size * sizeof(kib_tx_t));
			
 
				+
			
 
				+	for (i = 0; i < size; i++) {
			
 
				+		kib_tx_t *tx = &tpo->tpo_tx_descs[i];
			
 
				+
			
 
				+		tx->tx_pool = tpo;
			
 
				+		if (ps->ps_net->ibn_fmr_ps != NULL) {
			
 
				+			LIBCFS_CPT_ALLOC(tx->tx_pages,
			
 
				+					 lnet_cpt_table(), ps->ps_cpt,
			
 
				+					 LNET_MAX_IOV * sizeof(*tx->tx_pages));
			
 
				+			if (tx->tx_pages == NULL)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt,
			
 
				+				 IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags));
			
 
				+		if (tx->tx_frags == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS);
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt,
			
 
				+				 (1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				+				 sizeof(*tx->tx_wrq));
			
 
				+		if (tx->tx_wrq == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt,
			
 
				+				 (1 + IBLND_MAX_RDMA_FRAGS) *
			
 
				+				 sizeof(*tx->tx_sge));
			
 
				+		if (tx->tx_sge == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(tx->tx_rd, lnet_cpt_table(), ps->ps_cpt,
			
 
				+				 offsetof(kib_rdma_desc_t,
			
 
				+					  rd_frags[IBLND_MAX_RDMA_FRAGS]));
			
 
				+		if (tx->tx_rd == NULL)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (i == size) {
			
 
				+		kiblnd_map_tx_pool(tpo);
			
 
				+		*pp_po = pool;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	ps->ps_pool_destroy(pool);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+kiblnd_tx_init(kib_pool_t *pool, struct list_head *node)
			
 
				+{
			
 
				+	kib_tx_poolset_t *tps = container_of(pool->po_owner, kib_tx_poolset_t,
			
 
				+					     tps_poolset);
			
 
				+	kib_tx_t	 *tx  = list_entry(node, kib_tx_t, tx_list);
			
 
				+
			
 
				+	tx->tx_cookie = tps->tps_next_tx_cookie ++;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_net_fini_pools(kib_net_t *net)
			
 
				+{
			
 
				+	int	i;
			
 
				+
			
 
				+	cfs_cpt_for_each(i, lnet_cpt_table()) {
			
 
				+		kib_tx_poolset_t	*tps;
			
 
				+		kib_fmr_poolset_t	*fps;
			
 
				+		kib_pmr_poolset_t	*pps;
			
 
				+
			
 
				+		if (net->ibn_tx_ps != NULL) {
			
 
				+			tps = net->ibn_tx_ps[i];
			
 
				+			kiblnd_fini_poolset(&tps->tps_poolset);
			
 
				+		}
			
 
				+
			
 
				+		if (net->ibn_fmr_ps != NULL) {
			
 
				+			fps = net->ibn_fmr_ps[i];
			
 
				+			kiblnd_fini_fmr_poolset(fps);
			
 
				+		}
			
 
				+
			
 
				+		if (net->ibn_pmr_ps != NULL) {
			
 
				+			pps = net->ibn_pmr_ps[i];
			
 
				+			kiblnd_fini_poolset(&pps->pps_poolset);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (net->ibn_tx_ps != NULL) {
			
 
				+		cfs_percpt_free(net->ibn_tx_ps);
			
 
				+		net->ibn_tx_ps = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (net->ibn_fmr_ps != NULL) {
			
 
				+		cfs_percpt_free(net->ibn_fmr_ps);
			
 
				+		net->ibn_fmr_ps = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (net->ibn_pmr_ps != NULL) {
			
 
				+		cfs_percpt_free(net->ibn_pmr_ps);
			
 
				+		net->ibn_pmr_ps = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
			
 
				+{
			
 
				+	unsigned long	flags;
			
 
				+	int		cpt;
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	if (*kiblnd_tunables.kib_map_on_demand == 0 &&
			
 
				+	    net->ibn_dev->ibd_hdev->ibh_nmrs == 1) {
			
 
				+		read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				+					   flags);
			
 
				+		goto create_tx_pool;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_fmr_pool_size <
			
 
				+	    *kiblnd_tunables.kib_ntx / 4) {
			
 
				+		CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
			
 
				+		       *kiblnd_tunables.kib_fmr_pool_size,
			
 
				+		       *kiblnd_tunables.kib_ntx / 4);
			
 
				+		rc = -EINVAL;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* TX pool must be created later than FMR/PMR, see LU-2268
			
 
				+	 * for details */
			
 
				+	LASSERT(net->ibn_tx_ps == NULL);
			
 
				+
			
 
				+	/* premapping can fail if ibd_nmr > 1, so we always create
			
 
				+	 * FMR/PMR pool and map-on-demand if premapping failed */
			
 
				+
			
 
				+	net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					   sizeof(kib_fmr_poolset_t));
			
 
				+	if (net->ibn_fmr_ps == NULL) {
			
 
				+		CERROR("Failed to allocate FMR pool array\n");
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < ncpts; i++) {
			
 
				+		cpt = (cpts == NULL) ? i : cpts[i];
			
 
				+		rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
			
 
				+					     kiblnd_fmr_pool_size(ncpts),
			
 
				+					     kiblnd_fmr_flush_trigger(ncpts));
			
 
				+		if (rc == -ENOSYS && i == 0) /* no FMR */
			
 
				+			break; /* create PMR pool */
			
 
				+
			
 
				+		if (rc != 0) { /* a real error */
			
 
				+			CERROR("Can't initialize FMR pool for CPT %d: %d\n",
			
 
				+			       cpt, rc);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (i > 0) {
			
 
				+		LASSERT(i == ncpts);
			
 
				+		goto create_tx_pool;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_free(net->ibn_fmr_ps);
			
 
				+	net->ibn_fmr_ps = NULL;
			
 
				+
			
 
				+	CWARN("Device does not support FMR, failing back to PMR\n");
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_pmr_pool_size <
			
 
				+	    *kiblnd_tunables.kib_ntx / 4) {
			
 
				+		CERROR("Can't set pmr pool size (%d) < ntx / 4(%d)\n",
			
 
				+		       *kiblnd_tunables.kib_pmr_pool_size,
			
 
				+		       *kiblnd_tunables.kib_ntx / 4);
			
 
				+		rc = -EINVAL;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	net->ibn_pmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					   sizeof(kib_pmr_poolset_t));
			
 
				+	if (net->ibn_pmr_ps == NULL) {
			
 
				+		CERROR("Failed to allocate PMR pool array\n");
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < ncpts; i++) {
			
 
				+		cpt = (cpts == NULL) ? i : cpts[i];
			
 
				+		rc = kiblnd_init_poolset(&net->ibn_pmr_ps[cpt]->pps_poolset,
			
 
				+					 cpt, net, "PMR",
			
 
				+					 kiblnd_pmr_pool_size(ncpts),
			
 
				+					 kiblnd_create_pmr_pool,
			
 
				+					 kiblnd_destroy_pmr_pool, NULL, NULL);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't initialize PMR pool for CPT %d: %d\n",
			
 
				+			       cpt, rc);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+ create_tx_pool:
			
 
				+	net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					  sizeof(kib_tx_poolset_t));
			
 
				+	if (net->ibn_tx_ps == NULL) {
			
 
				+		CERROR("Failed to allocate tx pool array\n");
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < ncpts; i++) {
			
 
				+		cpt = (cpts == NULL) ? i : cpts[i];
			
 
				+		rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
			
 
				+					 cpt, net, "TX",
			
 
				+					 kiblnd_tx_pool_size(ncpts),
			
 
				+					 kiblnd_create_tx_pool,
			
 
				+					 kiblnd_destroy_tx_pool,
			
 
				+					 kiblnd_tx_init, NULL);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't initialize TX pool for CPT %d: %d\n",
			
 
				+			       cpt, rc);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+ failed:
			
 
				+	kiblnd_net_fini_pools(net);
			
 
				+	LASSERT(rc != 0);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	struct ib_device_attr *attr;
			
 
				+	int		    rc;
			
 
				+
			
 
				+	/* It's safe to assume a HCA can handle a page size
			
 
				+	 * matching that of the native system */
			
 
				+	hdev->ibh_page_shift = PAGE_SHIFT;
			
 
				+	hdev->ibh_page_size  = 1 << PAGE_SHIFT;
			
 
				+	hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
			
 
				+
			
 
				+	LIBCFS_ALLOC(attr, sizeof(*attr));
			
 
				+	if (attr == NULL) {
			
 
				+		CERROR("Out of memory\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	rc = ib_query_device(hdev->ibh_ibdev, attr);
			
 
				+	if (rc == 0)
			
 
				+		hdev->ibh_mr_size = attr->max_mr_size;
			
 
				+
			
 
				+	LIBCFS_FREE(attr, sizeof(*attr));
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to query IB device: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (hdev->ibh_mr_size == ~0ULL) {
			
 
				+		hdev->ibh_mr_shift = 64;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	for (hdev->ibh_mr_shift = 0;
			
 
				+	     hdev->ibh_mr_shift < 64; hdev->ibh_mr_shift ++) {
			
 
				+		if (hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) ||
			
 
				+		    hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) - 1)
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	CERROR("Invalid mr size: "LPX64"\n", hdev->ibh_mr_size);
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	int     i;
			
 
				+
			
 
				+	if (hdev->ibh_nmrs == 0 || hdev->ibh_mrs == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < hdev->ibh_nmrs; i++) {
			
 
				+		if (hdev->ibh_mrs[i] == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		ib_dereg_mr(hdev->ibh_mrs[i]);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
			
 
				+	hdev->ibh_mrs  = NULL;
			
 
				+	hdev->ibh_nmrs = 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	kiblnd_hdev_cleanup_mrs(hdev);
			
 
				+
			
 
				+	if (hdev->ibh_pd != NULL)
			
 
				+		ib_dealloc_pd(hdev->ibh_pd);
			
 
				+
			
 
				+	if (hdev->ibh_cmid != NULL)
			
 
				+		rdma_destroy_id(hdev->ibh_cmid);
			
 
				+
			
 
				+	LIBCFS_FREE(hdev, sizeof(*hdev));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	struct ib_mr *mr;
			
 
				+	int	   i;
			
 
				+	int	   rc;
			
 
				+	__u64	 mm_size;
			
 
				+	__u64	 mr_size;
			
 
				+	int	   acflags = IB_ACCESS_LOCAL_WRITE |
			
 
				+				IB_ACCESS_REMOTE_WRITE;
			
 
				+
			
 
				+	rc = kiblnd_hdev_get_attr(hdev);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (hdev->ibh_mr_shift == 64) {
			
 
				+		LIBCFS_ALLOC(hdev->ibh_mrs, 1 * sizeof(*hdev->ibh_mrs));
			
 
				+		if (hdev->ibh_mrs == NULL) {
			
 
				+			CERROR("Failed to allocate MRs table\n");
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		hdev->ibh_mrs[0] = NULL;
			
 
				+		hdev->ibh_nmrs   = 1;
			
 
				+
			
 
				+		mr = ib_get_dma_mr(hdev->ibh_pd, acflags);
			
 
				+		if (IS_ERR(mr)) {
			
 
				+			CERROR("Failed ib_get_dma_mr : %ld\n", PTR_ERR(mr));
			
 
				+			kiblnd_hdev_cleanup_mrs(hdev);
			
 
				+			return PTR_ERR(mr);
			
 
				+		}
			
 
				+
			
 
				+		hdev->ibh_mrs[0] = mr;
			
 
				+
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	mr_size = (1ULL << hdev->ibh_mr_shift);
			
 
				+	mm_size = (unsigned long)high_memory - PAGE_OFFSET;
			
 
				+
			
 
				+	hdev->ibh_nmrs = (int)((mm_size + mr_size - 1) >> hdev->ibh_mr_shift);
			
 
				+
			
 
				+	if (hdev->ibh_mr_shift < 32 || hdev->ibh_nmrs > 1024) {
			
 
				+		/* it's 4T..., assume we will re-code at that time */
			
 
				+		CERROR("Can't support memory size: x"LPX64
			
 
				+		       " with MR size: x"LPX64"\n", mm_size, mr_size);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	/* create an array of MRs to cover all memory */
			
 
				+	LIBCFS_ALLOC(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
			
 
				+	if (hdev->ibh_mrs == NULL) {
			
 
				+		CERROR("Failed to allocate MRs' table\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(hdev->ibh_mrs, 0, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
			
 
				+
			
 
				+	for (i = 0; i < hdev->ibh_nmrs; i++) {
			
 
				+		struct ib_phys_buf ipb;
			
 
				+		__u64	      iova;
			
 
				+
			
 
				+		ipb.size = hdev->ibh_mr_size;
			
 
				+		ipb.addr = i * mr_size;
			
 
				+		iova     = ipb.addr;
			
 
				+
			
 
				+		mr = ib_reg_phys_mr(hdev->ibh_pd, &ipb, 1, acflags, &iova);
			
 
				+		if (IS_ERR(mr)) {
			
 
				+			CERROR("Failed ib_reg_phys_mr addr "LPX64
			
 
				+			       " size "LPX64" : %ld\n",
			
 
				+			       ipb.addr, ipb.size, PTR_ERR(mr));
			
 
				+			kiblnd_hdev_cleanup_mrs(hdev);
			
 
				+			return PTR_ERR(mr);
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (iova == ipb.addr);
			
 
				+
			
 
				+		hdev->ibh_mrs[i] = mr;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	if (hdev->ibh_mr_size != ~0ULL || hdev->ibh_nmrs != 1)
			
 
				+		LCONSOLE_INFO("Register global MR array, MR size: "
			
 
				+			      LPX64", array size: %d\n",
			
 
				+			      hdev->ibh_mr_size, hdev->ibh_nmrs);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
			
 
				+{       /* DUMMY */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_dev_need_failover(kib_dev_t *dev)
			
 
				+{
			
 
				+	struct rdma_cm_id  *cmid;
			
 
				+	struct sockaddr_in  srcaddr;
			
 
				+	struct sockaddr_in  dstaddr;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	if (dev->ibd_hdev == NULL || /* initializing */
			
 
				+	    dev->ibd_hdev->ibh_cmid == NULL || /* listener is dead */
			
 
				+	    *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
			
 
				+		return 1;
			
 
				+
			
 
				+	/* XXX: it's UGLY, but I don't have better way to find
			
 
				+	 * ib-bonding HCA failover because:
			
 
				+	 *
			
 
				+	 * a. no reliable CM event for HCA failover...
			
 
				+	 * b. no OFED API to get ib_device for current net_device...
			
 
				+	 *
			
 
				+	 * We have only two choices at this point:
			
 
				+	 *
			
 
				+	 * a. rdma_bind_addr(), it will conflict with listener cmid
			
 
				+	 * b. rdma_resolve_addr() to zero addr */
			
 
				+	cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
			
 
				+				     IB_QPT_RC);
			
 
				+	if (IS_ERR(cmid)) {
			
 
				+		rc = PTR_ERR(cmid);
			
 
				+		CERROR("Failed to create cmid for failover: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	memset(&srcaddr, 0, sizeof(srcaddr));
			
 
				+	srcaddr.sin_family      = AF_INET;
			
 
				+	srcaddr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
			
 
				+
			
 
				+	memset(&dstaddr, 0, sizeof(dstaddr));
			
 
				+	dstaddr.sin_family = AF_INET;
			
 
				+	rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
			
 
				+			       (struct sockaddr *)&dstaddr, 1);
			
 
				+	if (rc != 0 || cmid->device == NULL) {
			
 
				+		CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n",
			
 
				+		       dev->ibd_ifname, HIPQUAD(dev->ibd_ifip),
			
 
				+		       cmid->device, rc);
			
 
				+		rdma_destroy_id(cmid);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (dev->ibd_hdev->ibh_ibdev == cmid->device) {
			
 
				+		/* don't need device failover */
			
 
				+		rdma_destroy_id(cmid);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_dev_failover(kib_dev_t *dev)
			
 
				+{
			
 
				+	LIST_HEAD      (zombie_tpo);
			
 
				+	LIST_HEAD      (zombie_ppo);
			
 
				+	LIST_HEAD      (zombie_fpo);
			
 
				+	struct rdma_cm_id  *cmid  = NULL;
			
 
				+	kib_hca_dev_t      *hdev  = NULL;
			
 
				+	kib_hca_dev_t      *old;
			
 
				+	struct ib_pd       *pd;
			
 
				+	kib_net_t	  *net;
			
 
				+	struct sockaddr_in  addr;
			
 
				+	unsigned long       flags;
			
 
				+	int		 rc = 0;
			
 
				+	int		    i;
			
 
				+
			
 
				+	LASSERT (*kiblnd_tunables.kib_dev_failover > 1 ||
			
 
				+		 dev->ibd_can_failover ||
			
 
				+		 dev->ibd_hdev == NULL);
			
 
				+
			
 
				+	rc = kiblnd_dev_need_failover(dev);
			
 
				+	if (rc <= 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (dev->ibd_hdev != NULL &&
			
 
				+	    dev->ibd_hdev->ibh_cmid != NULL) {
			
 
				+		/* XXX it's not good to close old listener at here,
			
 
				+		 * because we can fail to create new listener.
			
 
				+		 * But we have to close it now, otherwise rdma_bind_addr
			
 
				+		 * will return EADDRINUSE... How crap! */
			
 
				+		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+		cmid = dev->ibd_hdev->ibh_cmid;
			
 
				+		/* make next schedule of kiblnd_dev_need_failover()
			
 
				+		 * return 1 for me */
			
 
				+		dev->ibd_hdev->ibh_cmid  = NULL;
			
 
				+		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+		rdma_destroy_id(cmid);
			
 
				+	}
			
 
				+
			
 
				+	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
			
 
				+				     IB_QPT_RC);
			
 
				+	if (IS_ERR(cmid)) {
			
 
				+		rc = PTR_ERR(cmid);
			
 
				+		CERROR("Failed to create cmid for failover: %d\n", rc);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	memset(&addr, 0, sizeof(addr));
			
 
				+	addr.sin_family      = AF_INET;
			
 
				+	addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
			
 
				+	addr.sin_port	= htons(*kiblnd_tunables.kib_service);
			
 
				+
			
 
				+	/* Bind to failover device or port */
			
 
				+	rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
			
 
				+	if (rc != 0 || cmid->device == NULL) {
			
 
				+		CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n",
			
 
				+		       dev->ibd_ifname, HIPQUAD(dev->ibd_ifip),
			
 
				+		       cmid->device, rc);
			
 
				+		rdma_destroy_id(cmid);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(hdev, sizeof(*hdev));
			
 
				+	if (hdev == NULL) {
			
 
				+		CERROR("Failed to allocate kib_hca_dev\n");
			
 
				+		rdma_destroy_id(cmid);
			
 
				+		rc = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	atomic_set(&hdev->ibh_ref, 1);
			
 
				+	hdev->ibh_dev   = dev;
			
 
				+	hdev->ibh_cmid  = cmid;
			
 
				+	hdev->ibh_ibdev = cmid->device;
			
 
				+
			
 
				+	pd = ib_alloc_pd(cmid->device);
			
 
				+	if (IS_ERR(pd)) {
			
 
				+		rc = PTR_ERR(pd);
			
 
				+		CERROR("Can't allocate PD: %d\n", rc);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	hdev->ibh_pd = pd;
			
 
				+
			
 
				+	rc = rdma_listen(cmid, 0);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't start new listener: %d\n", rc);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = kiblnd_hdev_setup_mrs(hdev);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't setup device: %d\n", rc);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	old = dev->ibd_hdev;
			
 
				+	dev->ibd_hdev = hdev; /* take over the refcount */
			
 
				+	hdev = old;
			
 
				+
			
 
				+	list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
			
 
				+		cfs_cpt_for_each(i, lnet_cpt_table()) {
			
 
				+			kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
			
 
				+					    &zombie_tpo);
			
 
				+
			
 
				+			if (net->ibn_fmr_ps != NULL) {
			
 
				+				kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
			
 
				+							&zombie_fpo);
			
 
				+
			
 
				+			} else if (net->ibn_pmr_ps != NULL) {
			
 
				+				kiblnd_fail_poolset(&net->ibn_pmr_ps[i]->
			
 
				+						    pps_poolset, &zombie_ppo);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+ out:
			
 
				+	if (!list_empty(&zombie_tpo))
			
 
				+		kiblnd_destroy_pool_list(&zombie_tpo);
			
 
				+	if (!list_empty(&zombie_ppo))
			
 
				+		kiblnd_destroy_pool_list(&zombie_ppo);
			
 
				+	if (!list_empty(&zombie_fpo))
			
 
				+		kiblnd_destroy_fmr_pool_list(&zombie_fpo);
			
 
				+	if (hdev != NULL)
			
 
				+		kiblnd_hdev_decref(hdev);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		dev->ibd_failed_failover++;
			
 
				+	else
			
 
				+		dev->ibd_failed_failover = 0;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_destroy_dev (kib_dev_t *dev)
			
 
				+{
			
 
				+	LASSERT (dev->ibd_nnets == 0);
			
 
				+	LASSERT (list_empty(&dev->ibd_nets));
			
 
				+
			
 
				+	list_del(&dev->ibd_fail_list);
			
 
				+	list_del(&dev->ibd_list);
			
 
				+
			
 
				+	if (dev->ibd_hdev != NULL)
			
 
				+		kiblnd_hdev_decref(dev->ibd_hdev);
			
 
				+
			
 
				+	LIBCFS_FREE(dev, sizeof(*dev));
			
 
				+}
			
 
				+
			
 
				+kib_dev_t *
			
 
				+kiblnd_create_dev(char *ifname)
			
 
				+{
			
 
				+	struct net_device *netdev;
			
 
				+	kib_dev_t	 *dev;
			
 
				+	__u32	      netmask;
			
 
				+	__u32	      ip;
			
 
				+	int		up;
			
 
				+	int		rc;
			
 
				+
			
 
				+	rc = libcfs_ipif_query(ifname, &up, &ip, &netmask);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't query IPoIB interface %s: %d\n",
			
 
				+		       ifname, rc);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (!up) {
			
 
				+		CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(dev, sizeof(*dev));
			
 
				+	if (dev == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	memset(dev, 0, sizeof(*dev));
			
 
				+	netdev = dev_get_by_name(&init_net, ifname);
			
 
				+	if (netdev == NULL) {
			
 
				+		dev->ibd_can_failover = 0;
			
 
				+	} else {
			
 
				+		dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
			
 
				+		dev_put(netdev);
			
 
				+	}
			
 
				+
			
 
				+	INIT_LIST_HEAD(&dev->ibd_nets);
			
 
				+	INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
			
 
				+	INIT_LIST_HEAD(&dev->ibd_fail_list);
			
 
				+	dev->ibd_ifip = ip;
			
 
				+	strcpy(&dev->ibd_ifname[0], ifname);
			
 
				+
			
 
				+	/* initialize the device */
			
 
				+	rc = kiblnd_dev_failover(dev);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't initialize device: %d\n", rc);
			
 
				+		LIBCFS_FREE(dev, sizeof(*dev));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&dev->ibd_list,
			
 
				+			  &kiblnd_data.kib_devs);
			
 
				+	return dev;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_base_shutdown(void)
			
 
				+{
			
 
				+	struct kib_sched_info	*sched;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT (list_empty(&kiblnd_data.kib_devs));
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n",
			
 
				+	       atomic_read(&libcfs_kmemory));
			
 
				+
			
 
				+	switch (kiblnd_data.kib_init) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_INIT_ALL:
			
 
				+	case IBLND_INIT_DATA:
			
 
				+		LASSERT (kiblnd_data.kib_peers != NULL);
			
 
				+		for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
			
 
				+			LASSERT (list_empty(&kiblnd_data.kib_peers[i]));
			
 
				+		}
			
 
				+		LASSERT (list_empty(&kiblnd_data.kib_connd_zombies));
			
 
				+		LASSERT (list_empty(&kiblnd_data.kib_connd_conns));
			
 
				+
			
 
				+		/* flag threads to terminate; wake and wait for them to die */
			
 
				+		kiblnd_data.kib_shutdown = 1;
			
 
				+
			
 
				+		/* NB: we really want to stop scheduler threads net by net
			
 
				+		 * instead of the whole module, this should be improved
			
 
				+		 * with dynamic configuration LNet */
			
 
				+		cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
			
 
				+			wake_up_all(&sched->ibs_waitq);
			
 
				+
			
 
				+		wake_up_all(&kiblnd_data.kib_connd_waitq);
			
 
				+		wake_up_all(&kiblnd_data.kib_failover_waitq);
			
 
				+
			
 
				+		i = 2;
			
 
				+		while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
			
 
				+			i++;
			
 
				+			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
			
 
				+			       "Waiting for %d threads to terminate\n",
			
 
				+			       atomic_read(&kiblnd_data.kib_nthreads));
			
 
				+			cfs_pause(cfs_time_seconds(1));
			
 
				+		}
			
 
				+
			
 
				+		/* fall through */
			
 
				+
			
 
				+	case IBLND_INIT_NOTHING:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (kiblnd_data.kib_peers != NULL) {
			
 
				+		LIBCFS_FREE(kiblnd_data.kib_peers,
			
 
				+			    sizeof(struct list_head) *
			
 
				+			    kiblnd_data.kib_peer_hash_size);
			
 
				+	}
			
 
				+
			
 
				+	if (kiblnd_data.kib_scheds != NULL)
			
 
				+		cfs_percpt_free(kiblnd_data.kib_scheds);
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n",
			
 
				+	       atomic_read(&libcfs_kmemory));
			
 
				+
			
 
				+	kiblnd_data.kib_init = IBLND_INIT_NOTHING;
			
 
				+	module_put(THIS_MODULE);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_shutdown (lnet_ni_t *ni)
			
 
				+{
			
 
				+	kib_net_t	*net = ni->ni_data;
			
 
				+	rwlock_t     *g_lock = &kiblnd_data.kib_global_lock;
			
 
				+	int	       i;
			
 
				+	unsigned long     flags;
			
 
				+
			
 
				+	LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
			
 
				+
			
 
				+	if (net == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n",
			
 
				+	       atomic_read(&libcfs_kmemory));
			
 
				+
			
 
				+	write_lock_irqsave(g_lock, flags);
			
 
				+	net->ibn_shutdown = 1;
			
 
				+	write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+	switch (net->ibn_init) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_INIT_ALL:
			
 
				+		/* nuke all existing peers within this net */
			
 
				+		kiblnd_del_peer(ni, LNET_NID_ANY);
			
 
				+
			
 
				+		/* Wait for all peer state to clean up */
			
 
				+		i = 2;
			
 
				+		while (atomic_read(&net->ibn_npeers) != 0) {
			
 
				+			i++;
			
 
				+			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
			
 
				+			       "%s: waiting for %d peers to disconnect\n",
			
 
				+			       libcfs_nid2str(ni->ni_nid),
			
 
				+			       atomic_read(&net->ibn_npeers));
			
 
				+			cfs_pause(cfs_time_seconds(1));
			
 
				+		}
			
 
				+
			
 
				+		kiblnd_net_fini_pools(net);
			
 
				+
			
 
				+		write_lock_irqsave(g_lock, flags);
			
 
				+		LASSERT(net->ibn_dev->ibd_nnets > 0);
			
 
				+		net->ibn_dev->ibd_nnets--;
			
 
				+		list_del(&net->ibn_list);
			
 
				+		write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+		/* fall through */
			
 
				+
			
 
				+	case IBLND_INIT_NOTHING:
			
 
				+		LASSERT (atomic_read(&net->ibn_nconns) == 0);
			
 
				+
			
 
				+		if (net->ibn_dev != NULL &&
			
 
				+		    net->ibn_dev->ibd_nnets == 0)
			
 
				+			kiblnd_destroy_dev(net->ibn_dev);
			
 
				+
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n",
			
 
				+	       atomic_read(&libcfs_kmemory));
			
 
				+
			
 
				+	net->ibn_init = IBLND_INIT_NOTHING;
			
 
				+	ni->ni_data = NULL;
			
 
				+
			
 
				+	LIBCFS_FREE(net, sizeof(*net));
			
 
				+
			
 
				+out:
			
 
				+	if (list_empty(&kiblnd_data.kib_devs))
			
 
				+		kiblnd_base_shutdown();
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_base_startup(void)
			
 
				+{
			
 
				+	struct kib_sched_info	*sched;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING);
			
 
				+
			
 
				+	try_module_get(THIS_MODULE);
			
 
				+	memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */
			
 
				+
			
 
				+	rwlock_init(&kiblnd_data.kib_global_lock);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&kiblnd_data.kib_devs);
			
 
				+	INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
			
 
				+
			
 
				+	kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
			
 
				+	LIBCFS_ALLOC(kiblnd_data.kib_peers,
			
 
				+		     sizeof(struct list_head) *
			
 
				+			    kiblnd_data.kib_peer_hash_size);
			
 
				+	if (kiblnd_data.kib_peers == NULL) {
			
 
				+		goto failed;
			
 
				+	}
			
 
				+	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
			
 
				+		INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
			
 
				+
			
 
				+	spin_lock_init(&kiblnd_data.kib_connd_lock);
			
 
				+	INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
			
 
				+	INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
			
 
				+	init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
			
 
				+	init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
			
 
				+
			
 
				+	kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+						  sizeof(*sched));
			
 
				+	if (kiblnd_data.kib_scheds == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
			
 
				+		int	nthrs;
			
 
				+
			
 
				+		spin_lock_init(&sched->ibs_lock);
			
 
				+		INIT_LIST_HEAD(&sched->ibs_conns);
			
 
				+		init_waitqueue_head(&sched->ibs_waitq);
			
 
				+
			
 
				+		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				+		if (*kiblnd_tunables.kib_nscheds > 0) {
			
 
				+			nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
			
 
				+		} else {
			
 
				+			/* max to half of CPUs, another half is reserved for
			
 
				+			 * upper layer modules */
			
 
				+			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
			
 
				+		}
			
 
				+
			
 
				+		sched->ibs_nthreads_max = nthrs;
			
 
				+		sched->ibs_cpt = i;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
			
 
				+
			
 
				+	/* lists/ptrs/locks initialised */
			
 
				+	kiblnd_data.kib_init = IBLND_INIT_DATA;
			
 
				+	/*****************************************************/
			
 
				+
			
 
				+	rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't spawn o2iblnd connd: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_dev_failover != 0)
			
 
				+		rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
			
 
				+					 "kiblnd_failover");
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* flag everything initialised */
			
 
				+	kiblnd_data.kib_init = IBLND_INIT_ALL;
			
 
				+	/*****************************************************/
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	kiblnd_base_shutdown();
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_start_schedulers(struct kib_sched_info *sched)
			
 
				+{
			
 
				+	int	rc = 0;
			
 
				+	int	nthrs;
			
 
				+	int	i;
			
 
				+
			
 
				+	if (sched->ibs_nthreads == 0) {
			
 
				+		if (*kiblnd_tunables.kib_nscheds > 0) {
			
 
				+			nthrs = sched->ibs_nthreads_max;
			
 
				+		} else {
			
 
				+			nthrs = cfs_cpt_weight(lnet_cpt_table(),
			
 
				+					       sched->ibs_cpt);
			
 
				+			nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
			
 
				+			nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
			
 
				+		}
			
 
				+	} else {
			
 
				+		LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
			
 
				+		/* increase one thread if there is new interface */
			
 
				+		nthrs = (sched->ibs_nthreads < sched->ibs_nthreads_max);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < nthrs; i++) {
			
 
				+		long	id;
			
 
				+		char	name[20];
			
 
				+		id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
			
 
				+		snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
			
 
				+			 KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
			
 
				+		rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
			
 
				+		if (rc == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
			
 
				+		       sched->ibs_cpt, sched->ibs_nthreads + i, rc);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	sched->ibs_nthreads += i;
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts, int ncpts)
			
 
				+{
			
 
				+	int	cpt;
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+
			
 
				+	for (i = 0; i < ncpts; i++) {
			
 
				+		struct kib_sched_info *sched;
			
 
				+
			
 
				+		cpt = (cpts == NULL) ? i : cpts[i];
			
 
				+		sched = kiblnd_data.kib_scheds[cpt];
			
 
				+
			
 
				+		if (!newdev && sched->ibs_nthreads > 0)
			
 
				+			continue;
			
 
				+
			
 
				+		rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Failed to start scheduler threads for %s\n",
			
 
				+			       dev->ibd_ifname);
			
 
				+			return rc;
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+kib_dev_t *
			
 
				+kiblnd_dev_search(char *ifname)
			
 
				+{
			
 
				+	kib_dev_t	*alias = NULL;
			
 
				+	kib_dev_t	*dev;
			
 
				+	char		*colon;
			
 
				+	char		*colon2;
			
 
				+
			
 
				+	colon = strchr(ifname, ':');
			
 
				+	list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
			
 
				+		if (strcmp(&dev->ibd_ifname[0], ifname) == 0)
			
 
				+			return dev;
			
 
				+
			
 
				+		if (alias != NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		colon2 = strchr(dev->ibd_ifname, ':');
			
 
				+		if (colon != NULL)
			
 
				+			*colon = 0;
			
 
				+		if (colon2 != NULL)
			
 
				+			*colon2 = 0;
			
 
				+
			
 
				+		if (strcmp(&dev->ibd_ifname[0], ifname) == 0)
			
 
				+			alias = dev;
			
 
				+
			
 
				+		if (colon != NULL)
			
 
				+			*colon = ':';
			
 
				+		if (colon2 != NULL)
			
 
				+			*colon2 = ':';
			
 
				+	}
			
 
				+	return alias;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_startup (lnet_ni_t *ni)
			
 
				+{
			
 
				+	char		     *ifname;
			
 
				+	kib_dev_t		*ibdev = NULL;
			
 
				+	kib_net_t		*net;
			
 
				+	struct timeval	    tv;
			
 
				+	unsigned long	     flags;
			
 
				+	int		       rc;
			
 
				+	int			  newdev;
			
 
				+
			
 
				+	LASSERT (ni->ni_lnd == &the_o2iblnd);
			
 
				+
			
 
				+	if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
			
 
				+		rc = kiblnd_base_startup();
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(net, sizeof(*net));
			
 
				+	ni->ni_data = net;
			
 
				+	if (net == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	memset(net, 0, sizeof(*net));
			
 
				+
			
 
				+	do_gettimeofday(&tv);
			
 
				+	net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
			
 
				+
			
 
				+	ni->ni_peertimeout    = *kiblnd_tunables.kib_peertimeout;
			
 
				+	ni->ni_maxtxcredits   = *kiblnd_tunables.kib_credits;
			
 
				+	ni->ni_peertxcredits  = *kiblnd_tunables.kib_peertxcredits;
			
 
				+	ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
			
 
				+
			
 
				+	if (ni->ni_interfaces[0] != NULL) {
			
 
				+		/* Use the IPoIB interface specified in 'networks=' */
			
 
				+
			
 
				+		CLASSERT (LNET_MAX_INTERFACES > 1);
			
 
				+		if (ni->ni_interfaces[1] != NULL) {
			
 
				+			CERROR("Multiple interfaces not supported\n");
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		ifname = ni->ni_interfaces[0];
			
 
				+	} else {
			
 
				+		ifname = *kiblnd_tunables.kib_default_ipif;
			
 
				+	}
			
 
				+
			
 
				+	if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
			
 
				+		CERROR("IPoIB interface name too long: %s\n", ifname);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	ibdev = kiblnd_dev_search(ifname);
			
 
				+
			
 
				+	newdev = ibdev == NULL;
			
 
				+	/* hmm...create kib_dev even for alias */
			
 
				+	if (ibdev == NULL || strcmp(&ibdev->ibd_ifname[0], ifname) != 0)
			
 
				+		ibdev = kiblnd_create_dev(ifname);
			
 
				+
			
 
				+	if (ibdev == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	net->ibn_dev = ibdev;
			
 
				+	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
			
 
				+
			
 
				+	rc = kiblnd_dev_start_threads(ibdev, newdev,
			
 
				+				      ni->ni_cpts, ni->ni_ncpts);
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to initialize NI pools: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	ibdev->ibd_nnets++;
			
 
				+	list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	net->ibn_init = IBLND_INIT_ALL;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+failed:
			
 
				+	if (net->ibn_dev == NULL && ibdev != NULL)
			
 
				+		kiblnd_destroy_dev(ibdev);
			
 
				+
			
 
				+	kiblnd_shutdown(ni);
			
 
				+
			
 
				+	CDEBUG(D_NET, "kiblnd_startup failed\n");
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+void __exit
			
 
				+kiblnd_module_fini (void)
			
 
				+{
			
 
				+	lnet_unregister_lnd(&the_o2iblnd);
			
 
				+	kiblnd_tunables_fini();
			
 
				+}
			
 
				+
			
 
				+int __init
			
 
				+kiblnd_module_init (void)
			
 
				+{
			
 
				+	int    rc;
			
 
				+
			
 
				+	CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE);
			
 
				+	CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
			
 
				+		  <= IBLND_MSG_SIZE);
			
 
				+	CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
			
 
				+		  <= IBLND_MSG_SIZE);
			
 
				+
			
 
				+	rc = kiblnd_tunables_init();
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	lnet_register_lnd(&the_o2iblnd);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
			
 
				+MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v2.00");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				+module_init(kiblnd_module_init);
			
 
				+module_exit(kiblnd_module_fini);
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -0,0 +1,1057 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/klnds/o2iblnd/o2iblnd.h
			
 
				+ *
			
 
				+ * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ */
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/unistd.h>
			
 
				+#include <linux/uio.h>
			
 
				+
			
 
				+#include <asm/uaccess.h>
			
 
				+#include <asm/io.h>
			
 
				+
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/kmod.h>
			
 
				+#include <linux/sysctl.h>
			
 
				+#include <linux/pci.h>
			
 
				+
			
 
				+#include <net/sock.h>
			
 
				+#include <linux/in.h>
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LND
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include <linux/lnet/lnet-sysctl.h>
			
 
				+
			
 
				+#include <rdma/rdma_cm.h>
			
 
				+#include <rdma/ib_cm.h>
			
 
				+#include <rdma/ib_verbs.h>
			
 
				+#include <rdma/ib_fmr_pool.h>
			
 
				+
			
 
				+#define IBLND_PEER_HASH_SIZE		101	/* # peer lists */
			
 
				+/* # scheduler loops before reschedule */
			
 
				+#define IBLND_RESCHED			100
			
 
				+
			
 
				+#define IBLND_N_SCHED			2
			
 
				+#define IBLND_N_SCHED_HIGH		4
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	int	      *kib_dev_failover;     /* HCA failover */
			
 
				+	unsigned int     *kib_service;	  /* IB service number */
			
 
				+	int	      *kib_min_reconnect_interval; /* first failed connection retry... */
			
 
				+	int	      *kib_max_reconnect_interval; /* ...exponentially increasing to this */
			
 
				+	int	      *kib_cksum;	    /* checksum kib_msg_t? */
			
 
				+	int	      *kib_timeout;	  /* comms timeout (seconds) */
			
 
				+	int	      *kib_keepalive;	/* keepalive timeout (seconds) */
			
 
				+	int	      *kib_ntx;	      /* # tx descs */
			
 
				+	int	      *kib_credits;	  /* # concurrent sends */
			
 
				+	int	      *kib_peertxcredits;    /* # concurrent sends to 1 peer */
			
 
				+	int	      *kib_peerrtrcredits;   /* # per-peer router buffer credits */
			
 
				+	int	      *kib_peercredits_hiw;  /* # when eagerly to return credits */
			
 
				+	int	      *kib_peertimeout;      /* seconds to consider peer dead */
			
 
				+	char	    **kib_default_ipif;     /* default IPoIB interface */
			
 
				+	int	      *kib_retry_count;
			
 
				+	int	      *kib_rnr_retry_count;
			
 
				+	int	      *kib_concurrent_sends; /* send work queue sizing */
			
 
				+	int		 *kib_ib_mtu;		/* IB MTU */
			
 
				+	int	      *kib_map_on_demand;    /* map-on-demand if RD has more fragments
			
 
				+						 * than this value, 0 disable map-on-demand */
			
 
				+	int	      *kib_pmr_pool_size;    /* # physical MR in pool */
			
 
				+	int	      *kib_fmr_pool_size;    /* # FMRs in pool */
			
 
				+	int	      *kib_fmr_flush_trigger; /* When to trigger FMR flush */
			
 
				+	int	      *kib_fmr_cache;	/* enable FMR pool cache? */
			
 
				+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
			
 
				+	ctl_table_header_t *kib_sysctl;  /* sysctl interface */
			
 
				+#endif
			
 
				+	int	      *kib_require_priv_port;/* accept only privileged ports */
			
 
				+	int	      *kib_use_priv_port;    /* use privileged port for active connect */
			
 
				+	/* # threads on each CPT */
			
 
				+	int		 *kib_nscheds;
			
 
				+} kib_tunables_t;
			
 
				+
			
 
				+extern kib_tunables_t  kiblnd_tunables;
			
 
				+
			
 
				+#define IBLND_MSG_QUEUE_SIZE_V1      8	  /* V1 only : # messages/RDMAs in-flight */
			
 
				+#define IBLND_CREDIT_HIGHWATER_V1    7	  /* V1 only : when eagerly to return credits */
			
 
				+
			
 
				+#define IBLND_CREDITS_DEFAULT	8	  /* default # of peer credits */
			
 
				+#define IBLND_CREDITS_MAX	  ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1)  /* Max # of peer credits */
			
 
				+
			
 
				+#define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
			
 
				+				     IBLND_MSG_QUEUE_SIZE_V1 :   \
			
 
				+				     *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
			
 
				+#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
			
 
				+				     IBLND_CREDIT_HIGHWATER_V1 : \
			
 
				+				     *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
			
 
				+
			
 
				+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_concurrent_sends_v1(void)
			
 
				+{
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
			
 
				+		return IBLND_MSG_QUEUE_SIZE_V1 * 2;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
			
 
				+		return IBLND_MSG_QUEUE_SIZE_V1 / 2;
			
 
				+
			
 
				+	return *kiblnd_tunables.kib_concurrent_sends;
			
 
				+}
			
 
				+
			
 
				+#define IBLND_CONCURRENT_SENDS(v)  ((v) == IBLND_MSG_VERSION_1 ? \
			
 
				+				     kiblnd_concurrent_sends_v1() : \
			
 
				+				     *kiblnd_tunables.kib_concurrent_sends)
			
 
				+/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
			
 
				+#define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
			
 
				+#define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)
			
 
				+
			
 
				+#define IBLND_MSG_SIZE	      (4<<10)		 /* max size of queued messages (inc hdr) */
			
 
				+#define IBLND_MAX_RDMA_FRAGS	 LNET_MAX_IOV	   /* max # of fragments supported */
			
 
				+#define IBLND_CFG_RDMA_FRAGS       (*kiblnd_tunables.kib_map_on_demand != 0 ? \
			
 
				+				    *kiblnd_tunables.kib_map_on_demand :      \
			
 
				+				     IBLND_MAX_RDMA_FRAGS)  /* max # of fragments configured by user */
			
 
				+#define IBLND_RDMA_FRAGS(v)	((v) == IBLND_MSG_VERSION_1 ? \
			
 
				+				     IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)
			
 
				+
			
 
				+/************************/
			
 
				+/* derived constants... */
			
 
				+/* Pools (shared by connections on each CPT) */
			
 
				+/* These pools can grow at runtime, so don't need give a very large value */
			
 
				+#define IBLND_TX_POOL			256
			
 
				+#define IBLND_PMR_POOL			256
			
 
				+#define IBLND_FMR_POOL			256
			
 
				+#define IBLND_FMR_POOL_FLUSH		192
			
 
				+
			
 
				+/* TX messages (shared by all connections) */
			
 
				+#define IBLND_TX_MSGS()	    (*kiblnd_tunables.kib_ntx)
			
 
				+
			
 
				+/* RX messages (per connection) */
			
 
				+#define IBLND_RX_MSGS(v)	    (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
			
 
				+#define IBLND_RX_MSG_BYTES(v)       (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
			
 
				+#define IBLND_RX_MSG_PAGES(v)      ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
			
 
				+
			
 
				+/* WRs and CQEs (per connection) */
			
 
				+#define IBLND_RECV_WRS(v)	    IBLND_RX_MSGS(v)
			
 
				+#define IBLND_SEND_WRS(v)	  ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
			
 
				+#define IBLND_CQ_ENTRIES(v)	 (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))
			
 
				+
			
 
				+struct kib_hca_dev;
			
 
				+
			
 
				+/* o2iblnd can run over aliased interface */
			
 
				+#ifdef IFALIASZ
			
 
				+#define KIB_IFNAME_SIZE	      IFALIASZ
			
 
				+#else
			
 
				+#define KIB_IFNAME_SIZE	      256
			
 
				+#endif
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	struct list_head	   ibd_list;	  /* chain on kib_devs */
			
 
				+	struct list_head	   ibd_fail_list;     /* chain on kib_failed_devs */
			
 
				+	__u32		ibd_ifip;	  /* IPoIB interface IP */
			
 
				+	/** IPoIB interface name */
			
 
				+	char		 ibd_ifname[KIB_IFNAME_SIZE];
			
 
				+	int		  ibd_nnets;	 /* # nets extant */
			
 
				+
			
 
				+	cfs_time_t	   ibd_next_failover;
			
 
				+	int		  ibd_failed_failover; /* # failover failures */
			
 
				+	unsigned int	 ibd_failover;      /* failover in progress */
			
 
				+	unsigned int	 ibd_can_failover;  /* IPoIB interface is a bonding master */
			
 
				+	struct list_head	   ibd_nets;
			
 
				+	struct kib_hca_dev  *ibd_hdev;
			
 
				+} kib_dev_t;
			
 
				+
			
 
				+typedef struct kib_hca_dev
			
 
				+{
			
 
				+	struct rdma_cm_id   *ibh_cmid;	  /* listener cmid */
			
 
				+	struct ib_device    *ibh_ibdev;	 /* IB device */
			
 
				+	int		  ibh_page_shift;    /* page shift of current HCA */
			
 
				+	int		  ibh_page_size;     /* page size of current HCA */
			
 
				+	__u64		ibh_page_mask;     /* page mask of current HCA */
			
 
				+	int		  ibh_mr_shift;      /* bits shift of max MR size */
			
 
				+	__u64		ibh_mr_size;       /* size of MR */
			
 
				+	int		  ibh_nmrs;	  /* # of global MRs */
			
 
				+	struct ib_mr       **ibh_mrs;	   /* global MR */
			
 
				+	struct ib_pd	*ibh_pd;	    /* PD */
			
 
				+	kib_dev_t	   *ibh_dev;	   /* owner */
			
 
				+	atomic_t	 ibh_ref;	   /* refcount */
			
 
				+} kib_hca_dev_t;
			
 
				+
			
 
				+/** # of seconds to keep pool alive */
			
 
				+#define IBLND_POOL_DEADLINE     300
			
 
				+/** # of seconds to retry if allocation failed */
			
 
				+#define IBLND_POOL_RETRY	1
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	int		     ibp_npages;	     /* # pages */
			
 
				+	struct page	    *ibp_pages[0];	   /* page array */
			
 
				+} kib_pages_t;
			
 
				+
			
 
				+struct kib_pmr_pool;
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	      pmr_list;	       /* chain node */
			
 
				+	struct ib_phys_buf     *pmr_ipb;		/* physical buffer */
			
 
				+	struct ib_mr	   *pmr_mr;		 /* IB MR */
			
 
				+	struct kib_pmr_pool    *pmr_pool;	       /* owner of this MR */
			
 
				+	__u64		   pmr_iova;	       /* Virtual I/O address */
			
 
				+	int		     pmr_refcount;	   /* reference count */
			
 
				+} kib_phys_mr_t;
			
 
				+
			
 
				+struct kib_pool;
			
 
				+struct kib_poolset;
			
 
				+
			
 
				+typedef int  (*kib_ps_pool_create_t)(struct kib_poolset *ps,
			
 
				+				     int inc, struct kib_pool **pp_po);
			
 
				+typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
			
 
				+typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
			
 
				+typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
			
 
				+
			
 
				+struct kib_net;
			
 
				+
			
 
				+#define IBLND_POOL_NAME_LEN     32
			
 
				+
			
 
				+typedef struct kib_poolset
			
 
				+{
			
 
				+	spinlock_t		ps_lock;		/* serialize */
			
 
				+	struct kib_net	 *ps_net;		 /* network it belongs to */
			
 
				+	char		    ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
			
 
				+	struct list_head	      ps_pool_list;	   /* list of pools */
			
 
				+	struct list_head	      ps_failed_pool_list;    /* failed pool list */
			
 
				+	cfs_time_t	      ps_next_retry;	  /* time stamp for retry if failed to allocate */
			
 
				+	int		     ps_increasing;	  /* is allocating new pool */
			
 
				+	int		     ps_pool_size;	   /* new pool size */
			
 
				+	int			ps_cpt;			/* CPT id */
			
 
				+
			
 
				+	kib_ps_pool_create_t    ps_pool_create;	 /* create a new pool */
			
 
				+	kib_ps_pool_destroy_t   ps_pool_destroy;	/* destroy a pool */
			
 
				+	kib_ps_node_init_t      ps_node_init;	   /* initialize new allocated node */
			
 
				+	kib_ps_node_fini_t      ps_node_fini;	   /* finalize node */
			
 
				+} kib_poolset_t;
			
 
				+
			
 
				+typedef struct kib_pool
			
 
				+{
			
 
				+	struct list_head	      po_list;		/* chain on pool list */
			
 
				+	struct list_head	      po_free_list;	   /* pre-allocated node */
			
 
				+	kib_poolset_t	  *po_owner;	       /* pool_set of this pool */
			
 
				+	cfs_time_t	      po_deadline;	    /* deadline of this pool */
			
 
				+	int		     po_allocated;	   /* # of elements in use */
			
 
				+	int		     po_failed;	      /* pool is created on failed HCA */
			
 
				+	int		     po_size;		/* # of pre-allocated elements */
			
 
				+} kib_pool_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	kib_poolset_t	   tps_poolset;	    /* pool-set */
			
 
				+	__u64		   tps_next_tx_cookie;     /* cookie of TX */
			
 
				+} kib_tx_poolset_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	kib_pool_t	      tpo_pool;	       /* pool */
			
 
				+	struct kib_hca_dev     *tpo_hdev;	       /* device for this pool */
			
 
				+	struct kib_tx	  *tpo_tx_descs;	   /* all the tx descriptors */
			
 
				+	kib_pages_t	    *tpo_tx_pages;	   /* premapped tx msg pages */
			
 
				+} kib_tx_pool_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	kib_poolset_t	   pps_poolset;	    /* pool-set */
			
 
				+} kib_pmr_poolset_t;
			
 
				+
			
 
				+typedef struct kib_pmr_pool {
			
 
				+	struct kib_hca_dev     *ppo_hdev;	       /* device for this pool */
			
 
				+	kib_pool_t	      ppo_pool;	       /* pool */
			
 
				+} kib_pmr_pool_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	spinlock_t		fps_lock;		/* serialize */
			
 
				+	struct kib_net	 *fps_net;		/* IB network */
			
 
				+	struct list_head	      fps_pool_list;	  /* FMR pool list */
			
 
				+	struct list_head	      fps_failed_pool_list;   /* FMR pool list */
			
 
				+	__u64		   fps_version;	    /* validity stamp */
			
 
				+	int			fps_cpt;		/* CPT id */
			
 
				+	int			fps_pool_size;
			
 
				+	int			fps_flush_trigger;
			
 
				+	/* is allocating new pool */
			
 
				+	int			fps_increasing;
			
 
				+	/* time stamp for retry if failed to allocate */
			
 
				+	cfs_time_t		fps_next_retry;
			
 
				+} kib_fmr_poolset_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	struct list_head	      fpo_list;	       /* chain on pool list */
			
 
				+	struct kib_hca_dev     *fpo_hdev;	       /* device for this pool */
			
 
				+	kib_fmr_poolset_t      *fpo_owner;	      /* owner of this pool */
			
 
				+	struct ib_fmr_pool     *fpo_fmr_pool;	   /* IB FMR pool */
			
 
				+	cfs_time_t	      fpo_deadline;	   /* deadline of this pool */
			
 
				+	int		     fpo_failed;	     /* fmr pool is failed */
			
 
				+	int		     fpo_map_count;	  /* # of mapped FMR */
			
 
				+} kib_fmr_pool_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct ib_pool_fmr     *fmr_pfmr;	       /* IB pool fmr */
			
 
				+	kib_fmr_pool_t	 *fmr_pool;	       /* pool of FMR */
			
 
				+} kib_fmr_t;
			
 
				+
			
 
				+typedef struct kib_net
			
 
				+{
			
 
				+	struct list_head	   ibn_list;	  /* chain on kib_dev_t::ibd_nets */
			
 
				+	__u64		ibn_incarnation;   /* my epoch */
			
 
				+	int		  ibn_init;	  /* initialisation state */
			
 
				+	int		  ibn_shutdown;      /* shutting down? */
			
 
				+
			
 
				+	atomic_t		ibn_npeers;	/* # peers extant */
			
 
				+	atomic_t		ibn_nconns;	/* # connections extant */
			
 
				+
			
 
				+	kib_tx_poolset_t	**ibn_tx_ps;	/* tx pool-set */
			
 
				+	kib_fmr_poolset_t	**ibn_fmr_ps;	/* fmr pool-set */
			
 
				+	kib_pmr_poolset_t	**ibn_pmr_ps;	/* pmr pool-set */
			
 
				+
			
 
				+	kib_dev_t		*ibn_dev;	/* underlying IB device */
			
 
				+} kib_net_t;
			
 
				+
			
 
				+#define KIB_THREAD_SHIFT		16
			
 
				+#define KIB_THREAD_ID(cpt, tid)		((cpt) << KIB_THREAD_SHIFT | (tid))
			
 
				+#define KIB_THREAD_CPT(id)		((id) >> KIB_THREAD_SHIFT)
			
 
				+#define KIB_THREAD_TID(id)		((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
			
 
				+
			
 
				+struct kib_sched_info {
			
 
				+	/* serialise */
			
 
				+	spinlock_t		ibs_lock;
			
 
				+	/* schedulers sleep here */
			
 
				+	wait_queue_head_t		ibs_waitq;
			
 
				+	/* conns to check for rx completions */
			
 
				+	struct list_head		ibs_conns;
			
 
				+	/* number of scheduler threads */
			
 
				+	int			ibs_nthreads;
			
 
				+	/* max allowed scheduler threads */
			
 
				+	int			ibs_nthreads_max;
			
 
				+	int			ibs_cpt;	/* CPT id */
			
 
				+};
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	int			kib_init;	/* initialisation state */
			
 
				+	int			kib_shutdown;	/* shut down? */
			
 
				+	struct list_head		kib_devs;	/* IB devices extant */
			
 
				+	/* list head of failed devices */
			
 
				+	struct list_head		kib_failed_devs;
			
 
				+	/* schedulers sleep here */
			
 
				+	wait_queue_head_t		kib_failover_waitq;
			
 
				+	atomic_t		kib_nthreads;	/* # live threads */
			
 
				+	/* stabilize net/dev/peer/conn ops */
			
 
				+	rwlock_t		kib_global_lock;
			
 
				+	/* hash table of all my known peers */
			
 
				+	struct list_head		*kib_peers;
			
 
				+	/* size of kib_peers */
			
 
				+	int			kib_peer_hash_size;
			
 
				+	/* the connd task (serialisation assertions) */
			
 
				+	void			*kib_connd;
			
 
				+	/* connections to setup/teardown */
			
 
				+	struct list_head		kib_connd_conns;
			
 
				+	/* connections with zero refcount */
			
 
				+	struct list_head		kib_connd_zombies;
			
 
				+	/* connection daemon sleeps here */
			
 
				+	wait_queue_head_t		kib_connd_waitq;
			
 
				+	spinlock_t		kib_connd_lock;	/* serialise */
			
 
				+	struct ib_qp_attr	kib_error_qpa;	/* QP->ERROR */
			
 
				+	/* percpt data for schedulers */
			
 
				+	struct kib_sched_info	**kib_scheds;
			
 
				+} kib_data_t;
			
 
				+
			
 
				+#define IBLND_INIT_NOTHING	 0
			
 
				+#define IBLND_INIT_DATA	    1
			
 
				+#define IBLND_INIT_ALL	     2
			
 
				+
			
 
				+/************************************************************************
			
 
				+ * IB Wire message format.
			
 
				+ * These are sent in sender's byte order (i.e. receiver flips).
			
 
				+ */
			
 
				+
			
 
				+typedef struct kib_connparams
			
 
				+{
			
 
				+	__u16	     ibcp_queue_depth;
			
 
				+	__u16	     ibcp_max_frags;
			
 
				+	__u32	     ibcp_max_msg_size;
			
 
				+} WIRE_ATTR kib_connparams_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	lnet_hdr_t	ibim_hdr;	     /* portals header */
			
 
				+	char	      ibim_payload[0];      /* piggy-backed payload */
			
 
				+} WIRE_ATTR kib_immediate_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u32	     rf_nob;	       /* # bytes this frag */
			
 
				+	__u64	     rf_addr;	      /* CAVEAT EMPTOR: misaligned!! */
			
 
				+} WIRE_ATTR kib_rdma_frag_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u32	     rd_key;	       /* local/remote key */
			
 
				+	__u32	     rd_nfrags;	    /* # fragments */
			
 
				+	kib_rdma_frag_t   rd_frags[0];	  /* buffer frags */
			
 
				+} WIRE_ATTR kib_rdma_desc_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	lnet_hdr_t	ibprm_hdr;	    /* portals header */
			
 
				+	__u64	     ibprm_cookie;	 /* opaque completion cookie */
			
 
				+} WIRE_ATTR kib_putreq_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u64	     ibpam_src_cookie;     /* reflected completion cookie */
			
 
				+	__u64	     ibpam_dst_cookie;     /* opaque completion cookie */
			
 
				+	kib_rdma_desc_t   ibpam_rd;	     /* sender's sink buffer */
			
 
				+} WIRE_ATTR kib_putack_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	lnet_hdr_t	ibgm_hdr;	     /* portals header */
			
 
				+	__u64	     ibgm_cookie;	  /* opaque completion cookie */
			
 
				+	kib_rdma_desc_t   ibgm_rd;	      /* rdma descriptor */
			
 
				+} WIRE_ATTR kib_get_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u64	     ibcm_cookie;	  /* opaque completion cookie */
			
 
				+	__s32	     ibcm_status;	  /* < 0 failure: >= 0 length */
			
 
				+} WIRE_ATTR kib_completion_msg_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	/* First 2 fields fixed FOR ALL TIME */
			
 
				+	__u32	     ibm_magic;	    /* I'm an ibnal message */
			
 
				+	__u16	     ibm_version;	  /* this is my version number */
			
 
				+
			
 
				+	__u8	      ibm_type;	     /* msg type */
			
 
				+	__u8	      ibm_credits;	  /* returned credits */
			
 
				+	__u32	     ibm_nob;	      /* # bytes in whole message */
			
 
				+	__u32	     ibm_cksum;	    /* checksum (0 == no checksum) */
			
 
				+	__u64	     ibm_srcnid;	   /* sender's NID */
			
 
				+	__u64	     ibm_srcstamp;	 /* sender's incarnation */
			
 
				+	__u64	     ibm_dstnid;	   /* destination's NID */
			
 
				+	__u64	     ibm_dststamp;	 /* destination's incarnation */
			
 
				+
			
 
				+	union {
			
 
				+		kib_connparams_t      connparams;
			
 
				+		kib_immediate_msg_t   immediate;
			
 
				+		kib_putreq_msg_t      putreq;
			
 
				+		kib_putack_msg_t      putack;
			
 
				+		kib_get_msg_t	 get;
			
 
				+		kib_completion_msg_t  completion;
			
 
				+	} WIRE_ATTR ibm_u;
			
 
				+} WIRE_ATTR kib_msg_t;
			
 
				+
			
 
				+#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC	/* unique magic */
			
 
				+
			
 
				+#define IBLND_MSG_VERSION_1	 0x11
			
 
				+#define IBLND_MSG_VERSION_2	 0x12
			
 
				+#define IBLND_MSG_VERSION	   IBLND_MSG_VERSION_2
			
 
				+
			
 
				+#define IBLND_MSG_CONNREQ	   0xc0	/* connection request */
			
 
				+#define IBLND_MSG_CONNACK	   0xc1	/* connection acknowledge */
			
 
				+#define IBLND_MSG_NOOP	      0xd0	/* nothing (just credits) */
			
 
				+#define IBLND_MSG_IMMEDIATE	 0xd1	/* immediate */
			
 
				+#define IBLND_MSG_PUT_REQ	   0xd2	/* putreq (src->sink) */
			
 
				+#define IBLND_MSG_PUT_NAK	   0xd3	/* completion (sink->src) */
			
 
				+#define IBLND_MSG_PUT_ACK	   0xd4	/* putack (sink->src) */
			
 
				+#define IBLND_MSG_PUT_DONE	  0xd5	/* completion (src->sink) */
			
 
				+#define IBLND_MSG_GET_REQ	   0xd6	/* getreq (sink->src) */
			
 
				+#define IBLND_MSG_GET_DONE	  0xd7	/* completion (src->sink: all OK) */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32	    ibr_magic;	     /* sender's magic */
			
 
				+	__u16	    ibr_version;	   /* sender's version */
			
 
				+	__u8	     ibr_why;	       /* reject reason */
			
 
				+	__u8	     ibr_padding;	   /* padding */
			
 
				+	__u64	    ibr_incarnation;       /* incarnation of peer */
			
 
				+	kib_connparams_t ibr_cp;		/* connection parameters */
			
 
				+} WIRE_ATTR kib_rej_t;
			
 
				+
			
 
				+/* connection rejection reasons */
			
 
				+#define IBLND_REJECT_CONN_RACE       1	  /* You lost connection race */
			
 
				+#define IBLND_REJECT_NO_RESOURCES    2	  /* Out of memory/conns etc */
			
 
				+#define IBLND_REJECT_FATAL	   3	  /* Anything else */
			
 
				+
			
 
				+#define IBLND_REJECT_CONN_UNCOMPAT   4	  /* incompatible version peer */
			
 
				+#define IBLND_REJECT_CONN_STALE      5	  /* stale peer */
			
 
				+
			
 
				+#define IBLND_REJECT_RDMA_FRAGS      6	  /* Fatal: peer's rdma frags can't match mine */
			
 
				+#define IBLND_REJECT_MSG_QUEUE_SIZE  7	  /* Fatal: peer's msg queue size can't match mine */
			
 
				+
			
 
				+/***********************************************************************/
			
 
				+
			
 
				+typedef struct kib_rx			   /* receive message */
			
 
				+{
			
 
				+	struct list_head		rx_list;      /* queue for attention */
			
 
				+	struct kib_conn	  *rx_conn;      /* owning conn */
			
 
				+	int		       rx_nob;       /* # bytes received (-1 while posted) */
			
 
				+	enum ib_wc_status	 rx_status;    /* completion status */
			
 
				+	kib_msg_t		*rx_msg;       /* message buffer (host vaddr) */
			
 
				+	__u64		     rx_msgaddr;   /* message buffer (I/O addr) */
			
 
				+	DECLARE_PCI_UNMAP_ADDR   (rx_msgunmap); /* for dma_unmap_single() */
			
 
				+	struct ib_recv_wr	 rx_wrq;       /* receive work item... */
			
 
				+	struct ib_sge	     rx_sge;       /* ...and its memory */
			
 
				+} kib_rx_t;
			
 
				+
			
 
				+#define IBLND_POSTRX_DONT_POST    0	     /* don't post */
			
 
				+#define IBLND_POSTRX_NO_CREDIT    1	     /* post: no credits */
			
 
				+#define IBLND_POSTRX_PEER_CREDIT  2	     /* post: give peer back 1 credit */
			
 
				+#define IBLND_POSTRX_RSRVD_CREDIT 3	     /* post: give myself back 1 reserved credit */
			
 
				+
			
 
				+typedef struct kib_tx			   /* transmit message */
			
 
				+{
			
 
				+	struct list_head		tx_list;      /* queue on idle_txs ibc_tx_queue etc. */
			
 
				+	kib_tx_pool_t	    *tx_pool;      /* pool I'm from */
			
 
				+	struct kib_conn	  *tx_conn;      /* owning conn */
			
 
				+	short		     tx_sending;   /* # tx callbacks outstanding */
			
 
				+	short		     tx_queued;    /* queued for sending */
			
 
				+	short		     tx_waiting;   /* waiting for peer */
			
 
				+	int		       tx_status;    /* LNET completion status */
			
 
				+	unsigned long	     tx_deadline;  /* completion deadline */
			
 
				+	__u64		     tx_cookie;    /* completion cookie */
			
 
				+	lnet_msg_t	       *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
			
 
				+	kib_msg_t		*tx_msg;       /* message buffer (host vaddr) */
			
 
				+	__u64		     tx_msgaddr;   /* message buffer (I/O addr) */
			
 
				+	DECLARE_PCI_UNMAP_ADDR   (tx_msgunmap); /* for dma_unmap_single() */
			
 
				+	int		       tx_nwrq;      /* # send work items */
			
 
				+	struct ib_send_wr	*tx_wrq;       /* send work items... */
			
 
				+	struct ib_sge	    *tx_sge;       /* ...and their memory */
			
 
				+	kib_rdma_desc_t	  *tx_rd;	/* rdma descriptor */
			
 
				+	int		       tx_nfrags;    /* # entries in... */
			
 
				+	struct scatterlist       *tx_frags;     /* dma_map_sg descriptor */
			
 
				+	__u64		    *tx_pages;     /* rdma phys page addrs */
			
 
				+	union {
			
 
				+		kib_phys_mr_t      *pmr;	/* MR for physical buffer */
			
 
				+		kib_fmr_t	   fmr;	/* FMR */
			
 
				+	}			 tx_u;
			
 
				+	int		       tx_dmadir;    /* dma direction */
			
 
				+} kib_tx_t;
			
 
				+
			
 
				+typedef struct kib_connvars
			
 
				+{
			
 
				+	/* connection-in-progress variables */
			
 
				+	kib_msg_t		 cv_msg;
			
 
				+} kib_connvars_t;
			
 
				+
			
 
				+typedef struct kib_conn
			
 
				+{
			
 
				+	struct kib_sched_info *ibc_sched;	/* scheduler information */
			
 
				+	struct kib_peer     *ibc_peer;	  /* owning peer */
			
 
				+	kib_hca_dev_t       *ibc_hdev;	  /* HCA bound on */
			
 
				+	struct list_head	   ibc_list;	  /* stash on peer's conn list */
			
 
				+	struct list_head	   ibc_sched_list;    /* schedule for attention */
			
 
				+	__u16		ibc_version;       /* version of connection */
			
 
				+	__u64		ibc_incarnation;   /* which instance of the peer */
			
 
				+	atomic_t	 ibc_refcount;      /* # users */
			
 
				+	int		  ibc_state;	 /* what's happening */
			
 
				+	int		  ibc_nsends_posted; /* # uncompleted sends */
			
 
				+	int		  ibc_noops_posted;  /* # uncompleted NOOPs */
			
 
				+	int		  ibc_credits;       /* # credits I have */
			
 
				+	int		  ibc_outstanding_credits; /* # credits to return */
			
 
				+	int		  ibc_reserved_credits;/* # ACK/DONE msg credits */
			
 
				+	int		  ibc_comms_error;   /* set on comms error */
			
 
				+	unsigned int	     ibc_nrx:16;	/* receive buffers owned */
			
 
				+	unsigned int	     ibc_scheduled:1;   /* scheduled for attention */
			
 
				+	unsigned int	     ibc_ready:1;       /* CQ callback fired */
			
 
				+	/* time of last send */
			
 
				+	unsigned long	ibc_last_send;
			
 
				+	/** link chain for kiblnd_check_conns only */
			
 
				+	struct list_head	   ibc_connd_list;
			
 
				+	/** rxs completed before ESTABLISHED */
			
 
				+	struct list_head	   ibc_early_rxs;
			
 
				+	/** IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
			
 
				+	struct list_head	   ibc_tx_noops;
			
 
				+	struct list_head	   ibc_tx_queue;       /* sends that need a credit */
			
 
				+	struct list_head	   ibc_tx_queue_nocred;/* sends that don't need a credit */
			
 
				+	struct list_head	   ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
			
 
				+	struct list_head	   ibc_active_txs;     /* active tx awaiting completion */
			
 
				+	spinlock_t	     ibc_lock;		 /* serialise */
			
 
				+	kib_rx_t	    *ibc_rxs;	    /* the rx descs */
			
 
				+	kib_pages_t	 *ibc_rx_pages;       /* premapped rx msg pages */
			
 
				+
			
 
				+	struct rdma_cm_id   *ibc_cmid;	   /* CM id */
			
 
				+	struct ib_cq	*ibc_cq;	     /* completion queue */
			
 
				+
			
 
				+	kib_connvars_t      *ibc_connvars;       /* in-progress connection state */
			
 
				+} kib_conn_t;
			
 
				+
			
 
				+#define IBLND_CONN_INIT	       0	 /* being initialised */
			
 
				+#define IBLND_CONN_ACTIVE_CONNECT     1	 /* active sending req */
			
 
				+#define IBLND_CONN_PASSIVE_WAIT       2	 /* passive waiting for rtu */
			
 
				+#define IBLND_CONN_ESTABLISHED	3	 /* connection established */
			
 
				+#define IBLND_CONN_CLOSING	    4	 /* being closed */
			
 
				+#define IBLND_CONN_DISCONNECTED       5	 /* disconnected */
			
 
				+
			
 
				+typedef struct kib_peer
			
 
				+{
			
 
				+	struct list_head	   ibp_list;	   /* stash on global peer list */
			
 
				+	lnet_nid_t	   ibp_nid;	    /* who's on the other end(s) */
			
 
				+	lnet_ni_t	   *ibp_ni;	     /* LNet interface */
			
 
				+	atomic_t	 ibp_refcount;       /* # users */
			
 
				+	struct list_head	   ibp_conns;	  /* all active connections */
			
 
				+	struct list_head	   ibp_tx_queue;       /* msgs waiting for a conn */
			
 
				+	__u16		ibp_version;	/* version of peer */
			
 
				+	__u64		ibp_incarnation;    /* incarnation of peer */
			
 
				+	int		  ibp_connecting;     /* current active connection attempts */
			
 
				+	int		  ibp_accepting;      /* current passive connection attempts */
			
 
				+	int		  ibp_error;	  /* errno on closing this peer */
			
 
				+	cfs_time_t	   ibp_last_alive;     /* when (in jiffies) I was last alive */
			
 
				+} kib_peer_t;
			
 
				+
			
 
				+extern kib_data_t      kiblnd_data;
			
 
				+
			
 
				+extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
			
 
				+
			
 
				+static inline void
			
 
				+kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&hdev->ibh_ref) > 0);
			
 
				+	atomic_inc(&hdev->ibh_ref);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+kiblnd_hdev_decref(kib_hca_dev_t *hdev)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&hdev->ibh_ref) > 0);
			
 
				+	if (atomic_dec_and_test(&hdev->ibh_ref))
			
 
				+		kiblnd_hdev_destroy(hdev);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_dev_can_failover(kib_dev_t *dev)
			
 
				+{
			
 
				+	if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
			
 
				+		return 0;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_dev_failover == 0) /* disabled */
			
 
				+		return 0;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
			
 
				+		return 1;
			
 
				+
			
 
				+	return dev->ibd_can_failover;
			
 
				+}
			
 
				+
			
 
				+#define kiblnd_conn_addref(conn)				\
			
 
				+do {							    \
			
 
				+	CDEBUG(D_NET, "conn[%p] (%d)++\n",		      \
			
 
				+	       (conn), atomic_read(&(conn)->ibc_refcount)); \
			
 
				+	atomic_inc(&(conn)->ibc_refcount);		  \
			
 
				+} while (0)
			
 
				+
			
 
				+#define kiblnd_conn_decref(conn)					\
			
 
				+do {									\
			
 
				+	unsigned long flags;						\
			
 
				+									\
			
 
				+	CDEBUG(D_NET, "conn[%p] (%d)--\n",				\
			
 
				+	       (conn), atomic_read(&(conn)->ibc_refcount));		\
			
 
				+	LASSERT_ATOMIC_POS(&(conn)->ibc_refcount);			\
			
 
				+	if (atomic_dec_and_test(&(conn)->ibc_refcount)) {		\
			
 
				+		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);	\
			
 
				+		list_add_tail(&(conn)->ibc_list,			\
			
 
				+				  &kiblnd_data.kib_connd_zombies);	\
			
 
				+		wake_up(&kiblnd_data.kib_connd_waitq);		\
			
 
				+		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
			
 
				+	}								\
			
 
				+} while (0)
			
 
				+
			
 
				+#define kiblnd_peer_addref(peer)				\
			
 
				+do {							    \
			
 
				+	CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n",		\
			
 
				+	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
			
 
				+	       atomic_read (&(peer)->ibp_refcount));	\
			
 
				+	atomic_inc(&(peer)->ibp_refcount);		  \
			
 
				+} while (0)
			
 
				+
			
 
				+#define kiblnd_peer_decref(peer)				\
			
 
				+do {							    \
			
 
				+	CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n",		\
			
 
				+	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
			
 
				+	       atomic_read (&(peer)->ibp_refcount));	\
			
 
				+	LASSERT_ATOMIC_POS(&(peer)->ibp_refcount);	      \
			
 
				+	if (atomic_dec_and_test(&(peer)->ibp_refcount))     \
			
 
				+		kiblnd_destroy_peer(peer);		      \
			
 
				+} while (0)
			
 
				+
			
 
				+static inline struct list_head *
			
 
				+kiblnd_nid2peerlist (lnet_nid_t nid)
			
 
				+{
			
 
				+	unsigned int hash =
			
 
				+		((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
			
 
				+
			
 
				+	return (&kiblnd_data.kib_peers [hash]);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_peer_active (kib_peer_t *peer)
			
 
				+{
			
 
				+	/* Am I in the peer hash table? */
			
 
				+	return (!list_empty(&peer->ibp_list));
			
 
				+}
			
 
				+
			
 
				+static inline kib_conn_t *
			
 
				+kiblnd_get_conn_locked (kib_peer_t *peer)
			
 
				+{
			
 
				+	LASSERT (!list_empty(&peer->ibp_conns));
			
 
				+
			
 
				+	/* just return the first connection */
			
 
				+	return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_send_keepalive(kib_conn_t *conn)
			
 
				+{
			
 
				+	return (*kiblnd_tunables.kib_keepalive > 0) &&
			
 
				+		cfs_time_after(jiffies, conn->ibc_last_send +
			
 
				+			       *kiblnd_tunables.kib_keepalive*HZ);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_need_noop(kib_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+	if (conn->ibc_outstanding_credits <
			
 
				+	    IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
			
 
				+	    !kiblnd_send_keepalive(conn))
			
 
				+		return 0; /* No need to send NOOP */
			
 
				+
			
 
				+	if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
			
 
				+		if (!list_empty(&conn->ibc_tx_queue_nocred))
			
 
				+			return 0; /* NOOP can be piggybacked */
			
 
				+
			
 
				+		/* No tx to piggyback NOOP onto or no credit to send a tx */
			
 
				+		return (list_empty(&conn->ibc_tx_queue) ||
			
 
				+			conn->ibc_credits == 0);
			
 
				+	}
			
 
				+
			
 
				+	if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
			
 
				+	    !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
			
 
				+	    conn->ibc_credits == 0)		    /* no credit */
			
 
				+		return 0;
			
 
				+
			
 
				+	if (conn->ibc_credits == 1 &&      /* last credit reserved for */
			
 
				+	    conn->ibc_outstanding_credits == 0) /* giving back credits */
			
 
				+		return 0;
			
 
				+
			
 
				+	/* No tx to piggyback NOOP onto or no credit to send a tx */
			
 
				+	return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+kiblnd_abort_receives(kib_conn_t *conn)
			
 
				+{
			
 
				+	ib_modify_qp(conn->ibc_cmid->qp,
			
 
				+		     &kiblnd_data.kib_error_qpa, IB_QP_STATE);
			
 
				+}
			
 
				+
			
 
				+static inline const char *
			
 
				+kiblnd_queue2str (kib_conn_t *conn, struct list_head *q)
			
 
				+{
			
 
				+	if (q == &conn->ibc_tx_queue)
			
 
				+		return "tx_queue";
			
 
				+
			
 
				+	if (q == &conn->ibc_tx_queue_rsrvd)
			
 
				+		return "tx_queue_rsrvd";
			
 
				+
			
 
				+	if (q == &conn->ibc_tx_queue_nocred)
			
 
				+		return "tx_queue_nocred";
			
 
				+
			
 
				+	if (q == &conn->ibc_active_txs)
			
 
				+		return "active_txs";
			
 
				+
			
 
				+	LBUG();
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
			
 
				+ * lowest bits of the work request id to stash the work item type. */
			
 
				+
			
 
				+#define IBLND_WID_TX    0
			
 
				+#define IBLND_WID_RDMA  1
			
 
				+#define IBLND_WID_RX    2
			
 
				+#define IBLND_WID_MASK  3UL
			
 
				+
			
 
				+static inline __u64
			
 
				+kiblnd_ptr2wreqid (void *ptr, int type)
			
 
				+{
			
 
				+	unsigned long lptr = (unsigned long)ptr;
			
 
				+
			
 
				+	LASSERT ((lptr & IBLND_WID_MASK) == 0);
			
 
				+	LASSERT ((type & ~IBLND_WID_MASK) == 0);
			
 
				+	return (__u64)(lptr | type);
			
 
				+}
			
 
				+
			
 
				+static inline void *
			
 
				+kiblnd_wreqid2ptr (__u64 wreqid)
			
 
				+{
			
 
				+	return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_wreqid2type (__u64 wreqid)
			
 
				+{
			
 
				+	return (wreqid & IBLND_WID_MASK);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+kiblnd_set_conn_state (kib_conn_t *conn, int state)
			
 
				+{
			
 
				+	conn->ibc_state = state;
			
 
				+	mb();
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob)
			
 
				+{
			
 
				+	msg->ibm_type = type;
			
 
				+	msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_rd_size (kib_rdma_desc_t *rd)
			
 
				+{
			
 
				+	int   i;
			
 
				+	int   size;
			
 
				+
			
 
				+	for (i = size = 0; i < rd->rd_nfrags; i++)
			
 
				+		size += rd->rd_frags[i].rf_nob;
			
 
				+
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static inline __u64
			
 
				+kiblnd_rd_frag_addr(kib_rdma_desc_t *rd, int index)
			
 
				+{
			
 
				+	return rd->rd_frags[index].rf_addr;
			
 
				+}
			
 
				+
			
 
				+static inline __u32
			
 
				+kiblnd_rd_frag_size(kib_rdma_desc_t *rd, int index)
			
 
				+{
			
 
				+	return rd->rd_frags[index].rf_nob;
			
 
				+}
			
 
				+
			
 
				+static inline __u32
			
 
				+kiblnd_rd_frag_key(kib_rdma_desc_t *rd, int index)
			
 
				+{
			
 
				+	return rd->rd_key;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_rd_consume_frag(kib_rdma_desc_t *rd, int index, __u32 nob)
			
 
				+{
			
 
				+	if (nob < rd->rd_frags[index].rf_nob) {
			
 
				+		rd->rd_frags[index].rf_addr += nob;
			
 
				+		rd->rd_frags[index].rf_nob  -= nob;
			
 
				+	} else {
			
 
				+		index ++;
			
 
				+	}
			
 
				+
			
 
				+	return index;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+kiblnd_rd_msg_size(kib_rdma_desc_t *rd, int msgtype, int n)
			
 
				+{
			
 
				+	LASSERT (msgtype == IBLND_MSG_GET_REQ ||
			
 
				+		 msgtype == IBLND_MSG_PUT_ACK);
			
 
				+
			
 
				+	return msgtype == IBLND_MSG_GET_REQ ?
			
 
				+	       offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]) :
			
 
				+	       offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static inline __u64
			
 
				+kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
			
 
				+{
			
 
				+	return ib_dma_mapping_error(dev, dma_addr);
			
 
				+}
			
 
				+
			
 
				+static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
			
 
				+					  void *msg, size_t size,
			
 
				+					  enum dma_data_direction direction)
			
 
				+{
			
 
				+	return ib_dma_map_single(dev, msg, size, direction);
			
 
				+}
			
 
				+
			
 
				+static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
			
 
				+					   __u64 addr, size_t size,
			
 
				+					  enum dma_data_direction direction)
			
 
				+{
			
 
				+	ib_dma_unmap_single(dev, addr, size, direction);
			
 
				+}
			
 
				+
			
 
				+#define KIBLND_UNMAP_ADDR_SET(p, m, a)  do {} while (0)
			
 
				+#define KIBLND_UNMAP_ADDR(p, m, a)      (a)
			
 
				+
			
 
				+static inline int kiblnd_dma_map_sg(struct ib_device *dev,
			
 
				+				    struct scatterlist *sg, int nents,
			
 
				+				    enum dma_data_direction direction)
			
 
				+{
			
 
				+	return ib_dma_map_sg(dev, sg, nents, direction);
			
 
				+}
			
 
				+
			
 
				+static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
			
 
				+				       struct scatterlist *sg, int nents,
			
 
				+				       enum dma_data_direction direction)
			
 
				+{
			
 
				+	ib_dma_unmap_sg(dev, sg, nents, direction);
			
 
				+}
			
 
				+
			
 
				+static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
			
 
				+					  struct scatterlist *sg)
			
 
				+{
			
 
				+	return ib_sg_dma_address(dev, sg);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
			
 
				+					     struct scatterlist *sg)
			
 
				+{
			
 
				+	return ib_sg_dma_len(dev, sg);
			
 
				+}
			
 
				+
			
 
				+/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly
			
 
				+ * right because OFED1.2 defines it as const, to use it we have to add
			
 
				+ * (void *) cast to overcome "const" */
			
 
				+
			
 
				+#define KIBLND_CONN_PARAM(e)	    ((e)->param.conn.private_data)
			
 
				+#define KIBLND_CONN_PARAM_LEN(e)	((e)->param.conn.private_data_len)
			
 
				+
			
 
				+
			
 
				+struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
			
 
				+				    kib_rdma_desc_t *rd);
			
 
				+struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev,
			
 
				+				 __u64 addr, __u64 size);
			
 
				+void kiblnd_map_rx_descs(kib_conn_t *conn);
			
 
				+void kiblnd_unmap_rx_descs(kib_conn_t *conn);
			
 
				+int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
			
 
				+		  kib_rdma_desc_t *rd, int nfrags);
			
 
				+void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx);
			
 
				+void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
			
 
				+struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
			
 
				+
			
 
				+int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
			
 
				+			 int npages, __u64 iov, kib_fmr_t *fmr);
			
 
				+void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
			
 
				+
			
 
				+int  kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
			
 
				+			 kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr);
			
 
				+void kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr);
			
 
				+
			
 
				+int  kiblnd_startup (lnet_ni_t *ni);
			
 
				+void kiblnd_shutdown (lnet_ni_t *ni);
			
 
				+int  kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
			
 
				+void kiblnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
			
 
				+
			
 
				+int  kiblnd_tunables_init(void);
			
 
				+void kiblnd_tunables_fini(void);
			
 
				+
			
 
				+int  kiblnd_connd (void *arg);
			
 
				+int  kiblnd_scheduler(void *arg);
			
 
				+int  kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
			
 
				+int  kiblnd_failover_thread (void *arg);
			
 
				+
			
 
				+int  kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
			
 
				+void kiblnd_free_pages (kib_pages_t *p);
			
 
				+
			
 
				+int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
			
 
				+			struct rdma_cm_event *event);
			
 
				+int  kiblnd_translate_mtu(int value);
			
 
				+
			
 
				+int  kiblnd_dev_failover(kib_dev_t *dev);
			
 
				+int  kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
			
 
				+void kiblnd_destroy_peer (kib_peer_t *peer);
			
 
				+void kiblnd_destroy_dev (kib_dev_t *dev);
			
 
				+void kiblnd_unlink_peer_locked (kib_peer_t *peer);
			
 
				+void kiblnd_peer_alive (kib_peer_t *peer);
			
 
				+kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
			
 
				+void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error);
			
 
				+int  kiblnd_close_stale_conns_locked (kib_peer_t *peer,
			
 
				+				      int version, __u64 incarnation);
			
 
				+int  kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);
			
 
				+
			
 
				+void kiblnd_connreq_done(kib_conn_t *conn, int status);
			
 
				+kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid,
			
 
				+				int state, int version);
			
 
				+void kiblnd_destroy_conn (kib_conn_t *conn);
			
 
				+void kiblnd_close_conn (kib_conn_t *conn, int error);
			
 
				+void kiblnd_close_conn_locked (kib_conn_t *conn, int error);
			
 
				+
			
 
				+int  kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
			
 
				+		       int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
			
 
				+
			
 
				+void kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
			
 
				+void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
			
 
				+void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
			
 
				+void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
			
 
				+void kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
			
 
				+			 int status);
			
 
				+void kiblnd_check_sends (kib_conn_t *conn);
			
 
				+
			
 
				+void kiblnd_qp_event(struct ib_event *event, void *arg);
			
 
				+void kiblnd_cq_event(struct ib_event *event, void *arg);
			
 
				+void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
			
 
				+
			
 
				+void kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int version,
			
 
				+		      int credits, lnet_nid_t dstnid, __u64 dststamp);
			
 
				+int  kiblnd_unpack_msg(kib_msg_t *msg, int nob);
			
 
				+int  kiblnd_post_rx (kib_rx_t *rx, int credit);
			
 
				+
			
 
				+int  kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
			
 
				+int  kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
			
 
				+		 unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+		 unsigned int offset, unsigned int mlen, unsigned int rlen);
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -0,0 +1,3529 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/klnds/o2iblnd/o2iblnd_cb.c
			
 
				+ *
			
 
				+ * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ */
			
 
				+
			
 
				+#include "o2iblnd.h"
			
 
				+
			
 
				+void
			
 
				+kiblnd_tx_done (lnet_ni_t *ni, kib_tx_t *tx)
			
 
				+{
			
 
				+	lnet_msg_t *lntmsg[2];
			
 
				+	kib_net_t  *net = ni->ni_data;
			
 
				+	int	 rc;
			
 
				+	int	 i;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (!tx->tx_queued);	       /* mustn't be queued for sending */
			
 
				+	LASSERT (tx->tx_sending == 0);	  /* mustn't be awaiting sent callback */
			
 
				+	LASSERT (!tx->tx_waiting);	      /* mustn't be awaiting peer response */
			
 
				+	LASSERT (tx->tx_pool != NULL);
			
 
				+
			
 
				+	kiblnd_unmap_tx(ni, tx);
			
 
				+
			
 
				+	/* tx may have up to 2 lnet msgs to finalise */
			
 
				+	lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
			
 
				+	lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
			
 
				+	rc = tx->tx_status;
			
 
				+
			
 
				+	if (tx->tx_conn != NULL) {
			
 
				+		LASSERT (ni == tx->tx_conn->ibc_peer->ibp_ni);
			
 
				+
			
 
				+		kiblnd_conn_decref(tx->tx_conn);
			
 
				+		tx->tx_conn = NULL;
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_nwrq = 0;
			
 
				+	tx->tx_status = 0;
			
 
				+
			
 
				+	kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
			
 
				+
			
 
				+	/* delay finalize until my descs have been freed */
			
 
				+	for (i = 0; i < 2; i++) {
			
 
				+		if (lntmsg[i] == NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		lnet_finalize(ni, lntmsg[i], rc);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status)
			
 
				+{
			
 
				+	kib_tx_t *tx;
			
 
				+
			
 
				+	while (!list_empty (txlist)) {
			
 
				+		tx = list_entry (txlist->next, kib_tx_t, tx_list);
			
 
				+
			
 
				+		list_del(&tx->tx_list);
			
 
				+		/* complete now */
			
 
				+		tx->tx_waiting = 0;
			
 
				+		tx->tx_status = status;
			
 
				+		kiblnd_tx_done(ni, tx);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+kib_tx_t *
			
 
				+kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target)
			
 
				+{
			
 
				+	kib_net_t		*net = (kib_net_t *)ni->ni_data;
			
 
				+	struct list_head		*node;
			
 
				+	kib_tx_t		*tx;
			
 
				+	kib_tx_poolset_t	*tps;
			
 
				+
			
 
				+	tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
			
 
				+	node = kiblnd_pool_alloc_node(&tps->tps_poolset);
			
 
				+	if (node == NULL)
			
 
				+		return NULL;
			
 
				+	tx = container_of(node, kib_tx_t, tx_list);
			
 
				+
			
 
				+	LASSERT (tx->tx_nwrq == 0);
			
 
				+	LASSERT (!tx->tx_queued);
			
 
				+	LASSERT (tx->tx_sending == 0);
			
 
				+	LASSERT (!tx->tx_waiting);
			
 
				+	LASSERT (tx->tx_status == 0);
			
 
				+	LASSERT (tx->tx_conn == NULL);
			
 
				+	LASSERT (tx->tx_lntmsg[0] == NULL);
			
 
				+	LASSERT (tx->tx_lntmsg[1] == NULL);
			
 
				+	LASSERT (tx->tx_u.pmr == NULL);
			
 
				+	LASSERT (tx->tx_nfrags == 0);
			
 
				+
			
 
				+	return tx;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_drop_rx(kib_rx_t *rx)
			
 
				+{
			
 
				+	kib_conn_t		*conn	= rx->rx_conn;
			
 
				+	struct kib_sched_info	*sched	= conn->ibc_sched;
			
 
				+	unsigned long		flags;
			
 
				+
			
 
				+	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+	LASSERT(conn->ibc_nrx > 0);
			
 
				+	conn->ibc_nrx--;
			
 
				+	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+	kiblnd_conn_decref(conn);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_post_rx (kib_rx_t *rx, int credit)
			
 
				+{
			
 
				+	kib_conn_t	 *conn = rx->rx_conn;
			
 
				+	kib_net_t	  *net = conn->ibc_peer->ibp_ni->ni_data;
			
 
				+	struct ib_recv_wr  *bad_wrq = NULL;
			
 
				+	struct ib_mr       *mr;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (credit == IBLND_POSTRX_NO_CREDIT ||
			
 
				+		 credit == IBLND_POSTRX_PEER_CREDIT ||
			
 
				+		 credit == IBLND_POSTRX_RSRVD_CREDIT);
			
 
				+
			
 
				+	mr = kiblnd_find_dma_mr(conn->ibc_hdev, rx->rx_msgaddr, IBLND_MSG_SIZE);
			
 
				+	LASSERT (mr != NULL);
			
 
				+
			
 
				+	rx->rx_sge.lkey   = mr->lkey;
			
 
				+	rx->rx_sge.addr   = rx->rx_msgaddr;
			
 
				+	rx->rx_sge.length = IBLND_MSG_SIZE;
			
 
				+
			
 
				+	rx->rx_wrq.next = NULL;
			
 
				+	rx->rx_wrq.sg_list = &rx->rx_sge;
			
 
				+	rx->rx_wrq.num_sge = 1;
			
 
				+	rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
			
 
				+
			
 
				+	LASSERT (conn->ibc_state >= IBLND_CONN_INIT);
			
 
				+	LASSERT (rx->rx_nob >= 0);	      /* not posted */
			
 
				+
			
 
				+	if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
			
 
				+		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	rx->rx_nob = -1;			/* flag posted */
			
 
				+
			
 
				+	rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
			
 
				+		rx->rx_nob = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
			
 
				+		return rc;
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		kiblnd_close_conn(conn, rc);
			
 
				+		kiblnd_drop_rx(rx);	     /* No more posts for this rx */
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (credit == IBLND_POSTRX_NO_CREDIT)
			
 
				+		return 0;
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+	if (credit == IBLND_POSTRX_PEER_CREDIT)
			
 
				+		conn->ibc_outstanding_credits++;
			
 
				+	else
			
 
				+		conn->ibc_reserved_credits++;
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	kiblnd_check_sends(conn);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+kib_tx_t *
			
 
				+kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
			
 
				+{
			
 
				+	struct list_head   *tmp;
			
 
				+
			
 
				+	list_for_each(tmp, &conn->ibc_active_txs) {
			
 
				+		kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
			
 
				+
			
 
				+		LASSERT (!tx->tx_queued);
			
 
				+		LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
			
 
				+
			
 
				+		if (tx->tx_cookie != cookie)
			
 
				+			continue;
			
 
				+
			
 
				+		if (tx->tx_waiting &&
			
 
				+		    tx->tx_msg->ibm_type == txtype)
			
 
				+			return tx;
			
 
				+
			
 
				+		CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
			
 
				+		      tx->tx_waiting ? "" : "NOT ",
			
 
				+		      tx->tx_msg->ibm_type, txtype);
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
			
 
				+{
			
 
				+	kib_tx_t    *tx;
			
 
				+	lnet_ni_t   *ni = conn->ibc_peer->ibp_ni;
			
 
				+	int	  idle;
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
			
 
				+	if (tx == NULL) {
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+		CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
			
 
				+		      txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		kiblnd_close_conn(conn, -EPROTO);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_status == 0) {	       /* success so far */
			
 
				+		if (status < 0) {	       /* failed? */
			
 
				+			tx->tx_status = status;
			
 
				+		} else if (txtype == IBLND_MSG_GET_REQ) {
			
 
				+			lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_waiting = 0;
			
 
				+
			
 
				+	idle = !tx->tx_queued && (tx->tx_sending == 0);
			
 
				+	if (idle)
			
 
				+		list_del(&tx->tx_list);
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	if (idle)
			
 
				+		kiblnd_tx_done(ni, tx);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_send_completion(kib_conn_t *conn, int type, int status, __u64 cookie)
			
 
				+{
			
 
				+	lnet_ni_t   *ni = conn->ibc_peer->ibp_ni;
			
 
				+	kib_tx_t    *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				+
			
 
				+	if (tx == NULL) {
			
 
				+		CERROR("Can't get tx for completion %x for %s\n",
			
 
				+		       type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_msg->ibm_u.completion.ibcm_status = status;
			
 
				+	tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
			
 
				+	kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t));
			
 
				+
			
 
				+	kiblnd_queue_tx(tx, conn);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_handle_rx (kib_rx_t *rx)
			
 
				+{
			
 
				+	kib_msg_t    *msg = rx->rx_msg;
			
 
				+	kib_conn_t   *conn = rx->rx_conn;
			
 
				+	lnet_ni_t    *ni = conn->ibc_peer->ibp_ni;
			
 
				+	int	   credits = msg->ibm_credits;
			
 
				+	kib_tx_t     *tx;
			
 
				+	int	   rc = 0;
			
 
				+	int	   rc2;
			
 
				+	int	   post_credit;
			
 
				+
			
 
				+	LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+	CDEBUG (D_NET, "Received %x[%d] from %s\n",
			
 
				+		msg->ibm_type, credits,
			
 
				+		libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+
			
 
				+	if (credits != 0) {
			
 
				+		/* Have I received credits that will let me send? */
			
 
				+		spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+		if (conn->ibc_credits + credits >
			
 
				+		    IBLND_MSG_QUEUE_SIZE(conn->ibc_version)) {
			
 
				+			rc2 = conn->ibc_credits;
			
 
				+			spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+			CERROR("Bad credits from %s: %d + %d > %d\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				+			       rc2, credits,
			
 
				+			       IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
			
 
				+
			
 
				+			kiblnd_close_conn(conn, -EPROTO);
			
 
				+			kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		conn->ibc_credits += credits;
			
 
				+
			
 
				+		/* This ensures the credit taken by NOOP can be returned */
			
 
				+		if (msg->ibm_type == IBLND_MSG_NOOP &&
			
 
				+		    !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
			
 
				+			conn->ibc_outstanding_credits++;
			
 
				+
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+		kiblnd_check_sends(conn);
			
 
				+	}
			
 
				+
			
 
				+	switch (msg->ibm_type) {
			
 
				+	default:
			
 
				+		CERROR("Bad IBLND message type %x from %s\n",
			
 
				+		       msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				+		rc = -EPROTO;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_NOOP:
			
 
				+		if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
			
 
				+			post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (credits != 0) /* credit already posted */
			
 
				+			post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				+		else	      /* a keepalive NOOP */
			
 
				+			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				+		rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
			
 
				+				msg->ibm_srcnid, rx, 0);
			
 
				+		if (rc < 0)		     /* repost on error */
			
 
				+			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				+		rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
			
 
				+				msg->ibm_srcnid, rx, 1);
			
 
				+		if (rc < 0)		     /* repost on error */
			
 
				+			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_NAK:
			
 
				+		CWARN ("PUT_NACK from %s\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				+		kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
			
 
				+					 msg->ibm_u.completion.ibcm_status,
			
 
				+					 msg->ibm_u.completion.ibcm_cookie);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_ACK:
			
 
				+		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				+
			
 
				+		spin_lock(&conn->ibc_lock);
			
 
				+		tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
			
 
				+					msg->ibm_u.putack.ibpam_src_cookie);
			
 
				+		if (tx != NULL)
			
 
				+			list_del(&tx->tx_list);
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+		if (tx == NULL) {
			
 
				+			CERROR("Unmatched PUT_ACK from %s\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+			rc = -EPROTO;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (tx->tx_waiting);
			
 
				+		/* CAVEAT EMPTOR: I could be racing with tx_complete, but...
			
 
				+		 * (a) I can overwrite tx_msg since my peer has received it!
			
 
				+		 * (b) tx_waiting set tells tx_complete() it's not done. */
			
 
				+
			
 
				+		tx->tx_nwrq = 0;		/* overwrite PUT_REQ */
			
 
				+
			
 
				+		rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
			
 
				+				       kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
			
 
				+				       &msg->ibm_u.putack.ibpam_rd,
			
 
				+				       msg->ibm_u.putack.ibpam_dst_cookie);
			
 
				+		if (rc2 < 0)
			
 
				+			CERROR("Can't setup rdma for PUT to %s: %d\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
			
 
				+
			
 
				+		spin_lock(&conn->ibc_lock);
			
 
				+		tx->tx_waiting = 0;	/* clear waiting and queue atomically */
			
 
				+		kiblnd_queue_tx_locked(tx, conn);
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_DONE:
			
 
				+		post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+		kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
			
 
				+					 msg->ibm_u.completion.ibcm_status,
			
 
				+					 msg->ibm_u.completion.ibcm_cookie);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		post_credit = IBLND_POSTRX_DONT_POST;
			
 
				+		rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
			
 
				+				msg->ibm_srcnid, rx, 1);
			
 
				+		if (rc < 0)		     /* repost on error */
			
 
				+			post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_GET_DONE:
			
 
				+		post_credit = IBLND_POSTRX_RSRVD_CREDIT;
			
 
				+		kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
			
 
				+					 msg->ibm_u.completion.ibcm_status,
			
 
				+					 msg->ibm_u.completion.ibcm_cookie);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (rc < 0)			     /* protocol error */
			
 
				+		kiblnd_close_conn(conn, rc);
			
 
				+
			
 
				+	if (post_credit != IBLND_POSTRX_DONT_POST)
			
 
				+		kiblnd_post_rx(rx, post_credit);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_rx_complete (kib_rx_t *rx, int status, int nob)
			
 
				+{
			
 
				+	kib_msg_t    *msg = rx->rx_msg;
			
 
				+	kib_conn_t   *conn = rx->rx_conn;
			
 
				+	lnet_ni_t    *ni = conn->ibc_peer->ibp_ni;
			
 
				+	kib_net_t    *net = ni->ni_data;
			
 
				+	int	   rc;
			
 
				+	int	   err = -EIO;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+	LASSERT (rx->rx_nob < 0);	       /* was posted */
			
 
				+	rx->rx_nob = 0;			 /* isn't now */
			
 
				+
			
 
				+	if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
			
 
				+		goto ignore;
			
 
				+
			
 
				+	if (status != IB_WC_SUCCESS) {
			
 
				+		CNETERR("Rx from %s failed: %d\n",
			
 
				+			libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (nob >= 0);
			
 
				+	rx->rx_nob = nob;
			
 
				+
			
 
				+	rc = kiblnd_unpack_msg(msg, rx->rx_nob);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d unpacking rx from %s\n",
			
 
				+			rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
			
 
				+	    msg->ibm_dstnid != ni->ni_nid ||
			
 
				+	    msg->ibm_srcstamp != conn->ibc_incarnation ||
			
 
				+	    msg->ibm_dststamp != net->ibn_incarnation) {
			
 
				+		CERROR ("Stale rx from %s\n",
			
 
				+			libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		err = -ESTALE;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* set time last known alive */
			
 
				+	kiblnd_peer_alive(conn->ibc_peer);
			
 
				+
			
 
				+	/* racing with connection establishment/teardown! */
			
 
				+
			
 
				+	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				+		rwlock_t  *g_lock = &kiblnd_data.kib_global_lock;
			
 
				+		unsigned long  flags;
			
 
				+
			
 
				+		write_lock_irqsave(g_lock, flags);
			
 
				+		/* must check holding global lock to eliminate race */
			
 
				+		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				+			list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+			return;
			
 
				+		}
			
 
				+		write_unlock_irqrestore(g_lock, flags);
			
 
				+	}
			
 
				+	kiblnd_handle_rx(rx);
			
 
				+	return;
			
 
				+
			
 
				+ failed:
			
 
				+	CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
			
 
				+	kiblnd_close_conn(conn, err);
			
 
				+ ignore:
			
 
				+	kiblnd_drop_rx(rx);		     /* Don't re-post rx. */
			
 
				+}
			
 
				+
			
 
				+struct page *
			
 
				+kiblnd_kvaddr_to_page (unsigned long vaddr)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (vaddr >= VMALLOC_START &&
			
 
				+	    vaddr < VMALLOC_END) {
			
 
				+		page = vmalloc_to_page ((void *)vaddr);
			
 
				+		LASSERT (page != NULL);
			
 
				+		return page;
			
 
				+	}
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				+	if (vaddr >= PKMAP_BASE &&
			
 
				+	    vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
			
 
				+		/* No highmem pages only used for bulk (kiov) I/O */
			
 
				+		CERROR("find page for address in highmem\n");
			
 
				+		LBUG();
			
 
				+	}
			
 
				+#endif
			
 
				+	page = virt_to_page (vaddr);
			
 
				+	LASSERT (page != NULL);
			
 
				+	return page;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
			
 
				+{
			
 
				+	kib_hca_dev_t		*hdev;
			
 
				+	__u64			*pages = tx->tx_pages;
			
 
				+	kib_fmr_poolset_t	*fps;
			
 
				+	int			npages;
			
 
				+	int			size;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT(tx->tx_pool != NULL);
			
 
				+	LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL);
			
 
				+
			
 
				+	hdev  = tx->tx_pool->tpo_hdev;
			
 
				+
			
 
				+	for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
			
 
				+		for (size = 0; size <  rd->rd_frags[i].rf_nob;
			
 
				+			       size += hdev->ibh_page_size) {
			
 
				+			pages[npages ++] = (rd->rd_frags[i].rf_addr &
			
 
				+					    hdev->ibh_page_mask) + size;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
			
 
				+
			
 
				+	fps = net->ibn_fmr_ps[cpt];
			
 
				+	rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->tx_u.fmr);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't map %d pages: %d\n", npages, rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* If rd is not tx_rd, it's going to get sent to a peer, who will need
			
 
				+	 * the rkey */
			
 
				+	rd->rd_key = (rd != tx->tx_rd) ? tx->tx_u.fmr.fmr_pfmr->fmr->rkey :
			
 
				+					 tx->tx_u.fmr.fmr_pfmr->fmr->lkey;
			
 
				+	rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
			
 
				+	rd->rd_frags[0].rf_nob   = nob;
			
 
				+	rd->rd_nfrags = 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_pmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
			
 
				+{
			
 
				+	kib_hca_dev_t		*hdev;
			
 
				+	kib_pmr_poolset_t	*pps;
			
 
				+	__u64			iova;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT(tx->tx_pool != NULL);
			
 
				+	LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL);
			
 
				+
			
 
				+	hdev = tx->tx_pool->tpo_hdev;
			
 
				+
			
 
				+	iova = rd->rd_frags[0].rf_addr & ~hdev->ibh_page_mask;
			
 
				+
			
 
				+	cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
			
 
				+
			
 
				+	pps = net->ibn_pmr_ps[cpt];
			
 
				+	rc = kiblnd_pmr_pool_map(pps, hdev, rd, &iova, &tx->tx_u.pmr);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to create MR by phybuf: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* If rd is not tx_rd, it's going to get sent to a peer, who will need
			
 
				+	 * the rkey */
			
 
				+	rd->rd_key = (rd != tx->tx_rd) ? tx->tx_u.pmr->pmr_mr->rkey :
			
 
				+					 tx->tx_u.pmr->pmr_mr->lkey;
			
 
				+	rd->rd_nfrags = 1;
			
 
				+	rd->rd_frags[0].rf_addr = iova;
			
 
				+	rd->rd_frags[0].rf_nob  = nob;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
			
 
				+{
			
 
				+	kib_net_t  *net = ni->ni_data;
			
 
				+
			
 
				+	LASSERT(net != NULL);
			
 
				+
			
 
				+	if (net->ibn_fmr_ps != NULL && tx->tx_u.fmr.fmr_pfmr != NULL) {
			
 
				+		kiblnd_fmr_pool_unmap(&tx->tx_u.fmr, tx->tx_status);
			
 
				+		tx->tx_u.fmr.fmr_pfmr = NULL;
			
 
				+
			
 
				+	} else if (net->ibn_pmr_ps != NULL && tx->tx_u.pmr != NULL) {
			
 
				+		kiblnd_pmr_pool_unmap(tx->tx_u.pmr);
			
 
				+		tx->tx_u.pmr = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_nfrags != 0) {
			
 
				+		kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
			
 
				+				    tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
			
 
				+		tx->tx_nfrags = 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
			
 
				+	      kib_rdma_desc_t *rd, int nfrags)
			
 
				+{
			
 
				+	kib_hca_dev_t      *hdev  = tx->tx_pool->tpo_hdev;
			
 
				+	kib_net_t	  *net   = ni->ni_data;
			
 
				+	struct ib_mr       *mr    = NULL;
			
 
				+	__u32	       nob;
			
 
				+	int		 i;
			
 
				+
			
 
				+	/* If rd is not tx_rd, it's going to get sent to a peer and I'm the
			
 
				+	 * RDMA sink */
			
 
				+	tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
			
 
				+	tx->tx_nfrags = nfrags;
			
 
				+
			
 
				+	rd->rd_nfrags =
			
 
				+		kiblnd_dma_map_sg(hdev->ibh_ibdev,
			
 
				+				  tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
			
 
				+
			
 
				+	for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
			
 
				+		rd->rd_frags[i].rf_nob  = kiblnd_sg_dma_len(
			
 
				+			hdev->ibh_ibdev, &tx->tx_frags[i]);
			
 
				+		rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
			
 
				+			hdev->ibh_ibdev, &tx->tx_frags[i]);
			
 
				+		nob += rd->rd_frags[i].rf_nob;
			
 
				+	}
			
 
				+
			
 
				+	/* looking for pre-mapping MR */
			
 
				+	mr = kiblnd_find_rd_dma_mr(hdev, rd);
			
 
				+	if (mr != NULL) {
			
 
				+		/* found pre-mapping MR */
			
 
				+		rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (net->ibn_fmr_ps != NULL)
			
 
				+		return kiblnd_fmr_map_tx(net, tx, rd, nob);
			
 
				+	else if (net->ibn_pmr_ps != NULL)
			
 
				+		return kiblnd_pmr_map_tx(net, tx, rd, nob);
			
 
				+
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
			
 
				+		    unsigned int niov, struct iovec *iov, int offset, int nob)
			
 
				+{
			
 
				+	kib_net_t	  *net = ni->ni_data;
			
 
				+	struct page	*page;
			
 
				+	struct scatterlist *sg;
			
 
				+	unsigned long       vaddr;
			
 
				+	int		 fragnob;
			
 
				+	int		 page_offset;
			
 
				+
			
 
				+	LASSERT (nob > 0);
			
 
				+	LASSERT (niov > 0);
			
 
				+	LASSERT (net != NULL);
			
 
				+
			
 
				+	while (offset >= iov->iov_len) {
			
 
				+		offset -= iov->iov_len;
			
 
				+		niov--;
			
 
				+		iov++;
			
 
				+		LASSERT (niov > 0);
			
 
				+	}
			
 
				+
			
 
				+	sg = tx->tx_frags;
			
 
				+	do {
			
 
				+		LASSERT (niov > 0);
			
 
				+
			
 
				+		vaddr = ((unsigned long)iov->iov_base) + offset;
			
 
				+		page_offset = vaddr & (PAGE_SIZE - 1);
			
 
				+		page = kiblnd_kvaddr_to_page(vaddr);
			
 
				+		if (page == NULL) {
			
 
				+			CERROR ("Can't find page\n");
			
 
				+			return -EFAULT;
			
 
				+		}
			
 
				+
			
 
				+		fragnob = min((int)(iov->iov_len - offset), nob);
			
 
				+		fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
			
 
				+
			
 
				+		sg_set_page(sg, page, fragnob, page_offset);
			
 
				+		sg++;
			
 
				+
			
 
				+		if (offset + fragnob < iov->iov_len) {
			
 
				+			offset += fragnob;
			
 
				+		} else {
			
 
				+			offset = 0;
			
 
				+			iov++;
			
 
				+			niov--;
			
 
				+		}
			
 
				+		nob -= fragnob;
			
 
				+	} while (nob > 0);
			
 
				+
			
 
				+	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
			
 
				+		      int nkiov, lnet_kiov_t *kiov, int offset, int nob)
			
 
				+{
			
 
				+	kib_net_t	  *net = ni->ni_data;
			
 
				+	struct scatterlist *sg;
			
 
				+	int		 fragnob;
			
 
				+
			
 
				+	CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
			
 
				+
			
 
				+	LASSERT (nob > 0);
			
 
				+	LASSERT (nkiov > 0);
			
 
				+	LASSERT (net != NULL);
			
 
				+
			
 
				+	while (offset >= kiov->kiov_len) {
			
 
				+		offset -= kiov->kiov_len;
			
 
				+		nkiov--;
			
 
				+		kiov++;
			
 
				+		LASSERT (nkiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	sg = tx->tx_frags;
			
 
				+	do {
			
 
				+		LASSERT (nkiov > 0);
			
 
				+
			
 
				+		fragnob = min((int)(kiov->kiov_len - offset), nob);
			
 
				+
			
 
				+		sg_set_page(sg, kiov->kiov_page, fragnob,
			
 
				+			    kiov->kiov_offset + offset);
			
 
				+		sg++;
			
 
				+
			
 
				+		offset = 0;
			
 
				+		kiov++;
			
 
				+		nkiov--;
			
 
				+		nob -= fragnob;
			
 
				+	} while (nob > 0);
			
 
				+
			
 
				+	return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_post_tx_locked (kib_conn_t *conn, kib_tx_t *tx, int credit)
			
 
				+{
			
 
				+	kib_msg_t	 *msg = tx->tx_msg;
			
 
				+	kib_peer_t	*peer = conn->ibc_peer;
			
 
				+	int		ver = conn->ibc_version;
			
 
				+	int		rc;
			
 
				+	int		done;
			
 
				+	struct ib_send_wr *bad_wrq;
			
 
				+
			
 
				+	LASSERT (tx->tx_queued);
			
 
				+	/* We rely on this for QP sizing */
			
 
				+	LASSERT (tx->tx_nwrq > 0);
			
 
				+	LASSERT (tx->tx_nwrq <= 1 + IBLND_RDMA_FRAGS(ver));
			
 
				+
			
 
				+	LASSERT (credit == 0 || credit == 1);
			
 
				+	LASSERT (conn->ibc_outstanding_credits >= 0);
			
 
				+	LASSERT (conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE(ver));
			
 
				+	LASSERT (conn->ibc_credits >= 0);
			
 
				+	LASSERT (conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE(ver));
			
 
				+
			
 
				+	if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
			
 
				+		/* tx completions outstanding... */
			
 
				+		CDEBUG(D_NET, "%s: posted enough\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	if (credit != 0 && conn->ibc_credits == 0) {   /* no credits */
			
 
				+		CDEBUG(D_NET, "%s: no credits\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	if (credit != 0 && !IBLND_OOB_CAPABLE(ver) &&
			
 
				+	    conn->ibc_credits == 1 &&   /* last credit reserved */
			
 
				+	    msg->ibm_type != IBLND_MSG_NOOP) {      /* for NOOP */
			
 
				+		CDEBUG(D_NET, "%s: not using last credit\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	/* NB don't drop ibc_lock before bumping tx_sending */
			
 
				+	list_del(&tx->tx_list);
			
 
				+	tx->tx_queued = 0;
			
 
				+
			
 
				+	if (msg->ibm_type == IBLND_MSG_NOOP &&
			
 
				+	    (!kiblnd_need_noop(conn) ||     /* redundant NOOP */
			
 
				+	     (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
			
 
				+	      conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
			
 
				+		/* OK to drop when posted enough NOOPs, since
			
 
				+		 * kiblnd_check_sends will queue NOOP again when
			
 
				+		 * posted NOOPs complete */
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+		kiblnd_tx_done(peer->ibp_ni, tx);
			
 
				+		spin_lock(&conn->ibc_lock);
			
 
				+		CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       conn->ibc_noops_posted);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
			
 
				+			peer->ibp_nid, conn->ibc_incarnation);
			
 
				+
			
 
				+	conn->ibc_credits -= credit;
			
 
				+	conn->ibc_outstanding_credits = 0;
			
 
				+	conn->ibc_nsends_posted++;
			
 
				+	if (msg->ibm_type == IBLND_MSG_NOOP)
			
 
				+		conn->ibc_noops_posted++;
			
 
				+
			
 
				+	/* CAVEAT EMPTOR!  This tx could be the PUT_DONE of an RDMA
			
 
				+	 * PUT.  If so, it was first queued here as a PUT_REQ, sent and
			
 
				+	 * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
			
 
				+	 * and then re-queued here.  It's (just) possible that
			
 
				+	 * tx_sending is non-zero if we've not done the tx_complete()
			
 
				+	 * from the first send; hence the ++ rather than = below. */
			
 
				+	tx->tx_sending++;
			
 
				+	list_add(&tx->tx_list, &conn->ibc_active_txs);
			
 
				+
			
 
				+	/* I'm still holding ibc_lock! */
			
 
				+	if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
			
 
				+		rc = -ECONNABORTED;
			
 
				+	} else if (tx->tx_pool->tpo_pool.po_failed ||
			
 
				+		 conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
			
 
				+		/* close_conn will launch failover */
			
 
				+		rc = -ENETDOWN;
			
 
				+	} else {
			
 
				+		rc = ib_post_send(conn->ibc_cmid->qp,
			
 
				+				  tx->tx_wrq, &bad_wrq);
			
 
				+	}
			
 
				+
			
 
				+	conn->ibc_last_send = jiffies;
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* NB credits are transferred in the actual
			
 
				+	 * message, which can only be the last work item */
			
 
				+	conn->ibc_credits += credit;
			
 
				+	conn->ibc_outstanding_credits += msg->ibm_credits;
			
 
				+	conn->ibc_nsends_posted--;
			
 
				+	if (msg->ibm_type == IBLND_MSG_NOOP)
			
 
				+		conn->ibc_noops_posted--;
			
 
				+
			
 
				+	tx->tx_status = rc;
			
 
				+	tx->tx_waiting = 0;
			
 
				+	tx->tx_sending--;
			
 
				+
			
 
				+	done = (tx->tx_sending == 0);
			
 
				+	if (done)
			
 
				+		list_del(&tx->tx_list);
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
			
 
				+		CERROR("Error %d posting transmit to %s\n",
			
 
				+		       rc, libcfs_nid2str(peer->ibp_nid));
			
 
				+	else
			
 
				+		CDEBUG(D_NET, "Error %d posting transmit to %s\n",
			
 
				+		       rc, libcfs_nid2str(peer->ibp_nid));
			
 
				+
			
 
				+	kiblnd_close_conn(conn, rc);
			
 
				+
			
 
				+	if (done)
			
 
				+		kiblnd_tx_done(peer->ibp_ni, tx);
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	return -EIO;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_check_sends (kib_conn_t *conn)
			
 
				+{
			
 
				+	int	ver = conn->ibc_version;
			
 
				+	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
			
 
				+	kib_tx_t  *tx;
			
 
				+
			
 
				+	/* Don't send anything until after the connection is established */
			
 
				+	if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				+		CDEBUG(D_NET, "%s too soon\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	LASSERT (conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver));
			
 
				+	LASSERT (!IBLND_OOB_CAPABLE(ver) ||
			
 
				+		 conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
			
 
				+	LASSERT (conn->ibc_reserved_credits >= 0);
			
 
				+
			
 
				+	while (conn->ibc_reserved_credits > 0 &&
			
 
				+	       !list_empty(&conn->ibc_tx_queue_rsrvd)) {
			
 
				+		tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
			
 
				+				    kib_tx_t, tx_list);
			
 
				+		list_del(&tx->tx_list);
			
 
				+		list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
			
 
				+		conn->ibc_reserved_credits--;
			
 
				+	}
			
 
				+
			
 
				+	if (kiblnd_need_noop(conn)) {
			
 
				+		spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				+		if (tx != NULL)
			
 
				+			kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
			
 
				+
			
 
				+		spin_lock(&conn->ibc_lock);
			
 
				+		if (tx != NULL)
			
 
				+			kiblnd_queue_tx_locked(tx, conn);
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */
			
 
				+
			
 
				+	for (;;) {
			
 
				+		int credit;
			
 
				+
			
 
				+		if (!list_empty(&conn->ibc_tx_queue_nocred)) {
			
 
				+			credit = 0;
			
 
				+			tx = list_entry(conn->ibc_tx_queue_nocred.next,
			
 
				+					    kib_tx_t, tx_list);
			
 
				+		} else if (!list_empty(&conn->ibc_tx_noops)) {
			
 
				+			LASSERT (!IBLND_OOB_CAPABLE(ver));
			
 
				+			credit = 1;
			
 
				+			tx = list_entry(conn->ibc_tx_noops.next,
			
 
				+					kib_tx_t, tx_list);
			
 
				+		} else if (!list_empty(&conn->ibc_tx_queue)) {
			
 
				+			credit = 1;
			
 
				+			tx = list_entry(conn->ibc_tx_queue.next,
			
 
				+					    kib_tx_t, tx_list);
			
 
				+		} else
			
 
				+			break;
			
 
				+
			
 
				+		if (kiblnd_post_tx_locked(conn, tx, credit) != 0)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	kiblnd_conn_decref(conn); /* ...until here */
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_tx_complete (kib_tx_t *tx, int status)
			
 
				+{
			
 
				+	int	   failed = (status != IB_WC_SUCCESS);
			
 
				+	kib_conn_t   *conn = tx->tx_conn;
			
 
				+	int	   idle;
			
 
				+
			
 
				+	LASSERT (tx->tx_sending > 0);
			
 
				+
			
 
				+	if (failed) {
			
 
				+		if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
			
 
				+			CNETERR("Tx -> %s cookie "LPX64
			
 
				+				" sending %d waiting %d: failed %d\n",
			
 
				+				libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				+				tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
			
 
				+				status);
			
 
				+
			
 
				+		kiblnd_close_conn(conn, -EIO);
			
 
				+	} else {
			
 
				+		kiblnd_peer_alive(conn->ibc_peer);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	/* I could be racing with rdma completion.  Whoever makes 'tx' idle
			
 
				+	 * gets to free it, which also drops its ref on 'conn'. */
			
 
				+
			
 
				+	tx->tx_sending--;
			
 
				+	conn->ibc_nsends_posted--;
			
 
				+	if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
			
 
				+		conn->ibc_noops_posted--;
			
 
				+
			
 
				+	if (failed) {
			
 
				+		tx->tx_waiting = 0;	     /* don't wait for peer */
			
 
				+		tx->tx_status = -EIO;
			
 
				+	}
			
 
				+
			
 
				+	idle = (tx->tx_sending == 0) &&	 /* This is the final callback */
			
 
				+	       !tx->tx_waiting &&	       /* Not waiting for peer */
			
 
				+	       !tx->tx_queued;		  /* Not re-queued (PUT_DONE) */
			
 
				+	if (idle)
			
 
				+		list_del(&tx->tx_list);
			
 
				+
			
 
				+	kiblnd_conn_addref(conn);	       /* 1 ref for me.... */
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	if (idle)
			
 
				+		kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
			
 
				+
			
 
				+	kiblnd_check_sends(conn);
			
 
				+
			
 
				+	kiblnd_conn_decref(conn);	       /* ...until here */
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
			
 
				+{
			
 
				+	kib_hca_dev_t     *hdev = tx->tx_pool->tpo_hdev;
			
 
				+	struct ib_sge     *sge = &tx->tx_sge[tx->tx_nwrq];
			
 
				+	struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
			
 
				+	int		nob = offsetof (kib_msg_t, ibm_u) + body_nob;
			
 
				+	struct ib_mr      *mr;
			
 
				+
			
 
				+	LASSERT (tx->tx_nwrq >= 0);
			
 
				+	LASSERT (tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
			
 
				+	LASSERT (nob <= IBLND_MSG_SIZE);
			
 
				+
			
 
				+	kiblnd_init_msg(tx->tx_msg, type, body_nob);
			
 
				+
			
 
				+	mr = kiblnd_find_dma_mr(hdev, tx->tx_msgaddr, nob);
			
 
				+	LASSERT (mr != NULL);
			
 
				+
			
 
				+	sge->lkey   = mr->lkey;
			
 
				+	sge->addr   = tx->tx_msgaddr;
			
 
				+	sge->length = nob;
			
 
				+
			
 
				+	memset(wrq, 0, sizeof(*wrq));
			
 
				+
			
 
				+	wrq->next       = NULL;
			
 
				+	wrq->wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
			
 
				+	wrq->sg_list    = sge;
			
 
				+	wrq->num_sge    = 1;
			
 
				+	wrq->opcode     = IB_WR_SEND;
			
 
				+	wrq->send_flags = IB_SEND_SIGNALED;
			
 
				+
			
 
				+	tx->tx_nwrq++;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
			
 
				+		  int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie)
			
 
				+{
			
 
				+	kib_msg_t	 *ibmsg = tx->tx_msg;
			
 
				+	kib_rdma_desc_t   *srcrd = tx->tx_rd;
			
 
				+	struct ib_sge     *sge = &tx->tx_sge[0];
			
 
				+	struct ib_send_wr *wrq = &tx->tx_wrq[0];
			
 
				+	int		rc  = resid;
			
 
				+	int		srcidx;
			
 
				+	int		dstidx;
			
 
				+	int		wrknob;
			
 
				+
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (tx->tx_nwrq == 0);
			
 
				+	LASSERT (type == IBLND_MSG_GET_DONE ||
			
 
				+		 type == IBLND_MSG_PUT_DONE);
			
 
				+
			
 
				+	srcidx = dstidx = 0;
			
 
				+
			
 
				+	while (resid > 0) {
			
 
				+		if (srcidx >= srcrd->rd_nfrags) {
			
 
				+			CERROR("Src buffer exhausted: %d frags\n", srcidx);
			
 
				+			rc = -EPROTO;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (dstidx == dstrd->rd_nfrags) {
			
 
				+			CERROR("Dst buffer exhausted: %d frags\n", dstidx);
			
 
				+			rc = -EPROTO;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (tx->tx_nwrq == IBLND_RDMA_FRAGS(conn->ibc_version)) {
			
 
				+			CERROR("RDMA too fragmented for %s (%d): "
			
 
				+			       "%d/%d src %d/%d dst frags\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				+			       IBLND_RDMA_FRAGS(conn->ibc_version),
			
 
				+			       srcidx, srcrd->rd_nfrags,
			
 
				+			       dstidx, dstrd->rd_nfrags);
			
 
				+			rc = -EMSGSIZE;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		wrknob = MIN(MIN(kiblnd_rd_frag_size(srcrd, srcidx),
			
 
				+				 kiblnd_rd_frag_size(dstrd, dstidx)), resid);
			
 
				+
			
 
				+		sge = &tx->tx_sge[tx->tx_nwrq];
			
 
				+		sge->addr   = kiblnd_rd_frag_addr(srcrd, srcidx);
			
 
				+		sge->lkey   = kiblnd_rd_frag_key(srcrd, srcidx);
			
 
				+		sge->length = wrknob;
			
 
				+
			
 
				+		wrq = &tx->tx_wrq[tx->tx_nwrq];
			
 
				+
			
 
				+		wrq->next       = wrq + 1;
			
 
				+		wrq->wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
			
 
				+		wrq->sg_list    = sge;
			
 
				+		wrq->num_sge    = 1;
			
 
				+		wrq->opcode     = IB_WR_RDMA_WRITE;
			
 
				+		wrq->send_flags = 0;
			
 
				+
			
 
				+		wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
			
 
				+		wrq->wr.rdma.rkey	= kiblnd_rd_frag_key(dstrd, dstidx);
			
 
				+
			
 
				+		srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
			
 
				+		dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
			
 
				+
			
 
				+		resid -= wrknob;
			
 
				+
			
 
				+		tx->tx_nwrq++;
			
 
				+		wrq++;
			
 
				+		sge++;
			
 
				+	}
			
 
				+
			
 
				+	if (rc < 0)			     /* no RDMA if completing with failure */
			
 
				+		tx->tx_nwrq = 0;
			
 
				+
			
 
				+	ibmsg->ibm_u.completion.ibcm_status = rc;
			
 
				+	ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
			
 
				+	kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
			
 
				+			   type, sizeof (kib_completion_msg_t));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
			
 
				+{
			
 
				+	struct list_head   *q;
			
 
				+
			
 
				+	LASSERT (tx->tx_nwrq > 0);	      /* work items set up */
			
 
				+	LASSERT (!tx->tx_queued);	       /* not queued for sending already */
			
 
				+	LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+	tx->tx_queued = 1;
			
 
				+	tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ);
			
 
				+
			
 
				+	if (tx->tx_conn == NULL) {
			
 
				+		kiblnd_conn_addref(conn);
			
 
				+		tx->tx_conn = conn;
			
 
				+		LASSERT (tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
			
 
				+	} else {
			
 
				+		/* PUT_DONE first attached to conn as a PUT_REQ */
			
 
				+		LASSERT (tx->tx_conn == conn);
			
 
				+		LASSERT (tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
			
 
				+	}
			
 
				+
			
 
				+	switch (tx->tx_msg->ibm_type) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		q = &conn->ibc_tx_queue_rsrvd;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_NAK:
			
 
				+	case IBLND_MSG_PUT_ACK:
			
 
				+	case IBLND_MSG_PUT_DONE:
			
 
				+	case IBLND_MSG_GET_DONE:
			
 
				+		q = &conn->ibc_tx_queue_nocred;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_NOOP:
			
 
				+		if (IBLND_OOB_CAPABLE(conn->ibc_version))
			
 
				+			q = &conn->ibc_tx_queue_nocred;
			
 
				+		else
			
 
				+			q = &conn->ibc_tx_noops;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+		q = &conn->ibc_tx_queue;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&tx->tx_list, q);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
			
 
				+{
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+	kiblnd_queue_tx_locked(tx, conn);
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	kiblnd_check_sends(conn);
			
 
				+}
			
 
				+
			
 
				+static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
			
 
				+			       struct sockaddr_in *srcaddr,
			
 
				+			       struct sockaddr_in *dstaddr,
			
 
				+			       int timeout_ms)
			
 
				+{
			
 
				+	unsigned short port;
			
 
				+	int rc;
			
 
				+
			
 
				+	/* allow the port to be reused */
			
 
				+	rc = rdma_set_reuseaddr(cmid, 1);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Unable to set reuse on cmid: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* look for a free privileged port */
			
 
				+	for (port = PROT_SOCK-1; port > 0; port--) {
			
 
				+		srcaddr->sin_port = htons(port);
			
 
				+		rc = rdma_resolve_addr(cmid,
			
 
				+				       (struct sockaddr *)srcaddr,
			
 
				+				       (struct sockaddr *)dstaddr,
			
 
				+				       timeout_ms);
			
 
				+		if (rc == 0) {
			
 
				+			CDEBUG(D_NET, "bound to port %hu\n", port);
			
 
				+			return 0;
			
 
				+		} else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
			
 
				+			CDEBUG(D_NET, "bind to port %hu failed: %d\n",
			
 
				+			       port, rc);
			
 
				+		} else {
			
 
				+			return rc;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	CERROR("Failed to bind to a free privileged port\n");
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_connect_peer (kib_peer_t *peer)
			
 
				+{
			
 
				+	struct rdma_cm_id *cmid;
			
 
				+	kib_dev_t	 *dev;
			
 
				+	kib_net_t	 *net = peer->ibp_ni->ni_data;
			
 
				+	struct sockaddr_in srcaddr;
			
 
				+	struct sockaddr_in dstaddr;
			
 
				+	int		rc;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+	LASSERT (peer->ibp_connecting > 0);
			
 
				+
			
 
				+	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
			
 
				+				     IB_QPT_RC);
			
 
				+
			
 
				+	if (IS_ERR(cmid)) {
			
 
				+		CERROR("Can't create CMID for %s: %ld\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
			
 
				+		rc = PTR_ERR(cmid);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	dev = net->ibn_dev;
			
 
				+	memset(&srcaddr, 0, sizeof(srcaddr));
			
 
				+	srcaddr.sin_family = AF_INET;
			
 
				+	srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
			
 
				+
			
 
				+	memset(&dstaddr, 0, sizeof(dstaddr));
			
 
				+	dstaddr.sin_family = AF_INET;
			
 
				+	dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
			
 
				+	dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
			
 
				+
			
 
				+	kiblnd_peer_addref(peer);	       /* cmid's ref */
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_use_priv_port) {
			
 
				+		rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
			
 
				+					 *kiblnd_tunables.kib_timeout * 1000);
			
 
				+	} else {
			
 
				+		rc = rdma_resolve_addr(cmid,
			
 
				+				       (struct sockaddr *)&srcaddr,
			
 
				+				       (struct sockaddr *)&dstaddr,
			
 
				+				       *kiblnd_tunables.kib_timeout * 1000);
			
 
				+	}
			
 
				+	if (rc != 0) {
			
 
				+		/* Can't initiate address resolution:  */
			
 
				+		CERROR("Can't resolve addr for %s: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				+		goto failed2;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (cmid->device != NULL);
			
 
				+	CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
			
 
				+	       libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
			
 
				+	       HIPQUAD(dev->ibd_ifip), cmid->device->name);
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+ failed2:
			
 
				+	kiblnd_peer_decref(peer);	       /* cmid's ref */
			
 
				+	rdma_destroy_id(cmid);
			
 
				+ failed:
			
 
				+	kiblnd_peer_connect_failed(peer, 1, rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
			
 
				+{
			
 
				+	kib_peer_t	*peer;
			
 
				+	kib_peer_t	*peer2;
			
 
				+	kib_conn_t	*conn;
			
 
				+	rwlock_t	*g_lock = &kiblnd_data.kib_global_lock;
			
 
				+	unsigned long      flags;
			
 
				+	int		rc;
			
 
				+
			
 
				+	/* If I get here, I've committed to send, so I complete the tx with
			
 
				+	 * failure on any problems */
			
 
				+
			
 
				+	LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */
			
 
				+	LASSERT (tx == NULL || tx->tx_nwrq > 0);     /* work items have been set up */
			
 
				+
			
 
				+	/* First time, just use a read lock since I expect to find my peer
			
 
				+	 * connected */
			
 
				+	read_lock_irqsave(g_lock, flags);
			
 
				+
			
 
				+	peer = kiblnd_find_peer_locked(nid);
			
 
				+	if (peer != NULL && !list_empty(&peer->ibp_conns)) {
			
 
				+		/* Found a peer with an established connection */
			
 
				+		conn = kiblnd_get_conn_locked(peer);
			
 
				+		kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				+
			
 
				+		read_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+		if (tx != NULL)
			
 
				+			kiblnd_queue_tx(tx, conn);
			
 
				+		kiblnd_conn_decref(conn); /* ...to here */
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(g_lock);
			
 
				+	/* Re-try with a write lock */
			
 
				+	write_lock(g_lock);
			
 
				+
			
 
				+	peer = kiblnd_find_peer_locked(nid);
			
 
				+	if (peer != NULL) {
			
 
				+		if (list_empty(&peer->ibp_conns)) {
			
 
				+			/* found a peer, but it's still connecting... */
			
 
				+			LASSERT (peer->ibp_connecting != 0 ||
			
 
				+				 peer->ibp_accepting != 0);
			
 
				+			if (tx != NULL)
			
 
				+				list_add_tail(&tx->tx_list,
			
 
				+						  &peer->ibp_tx_queue);
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+		} else {
			
 
				+			conn = kiblnd_get_conn_locked(peer);
			
 
				+			kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				+
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+			if (tx != NULL)
			
 
				+				kiblnd_queue_tx(tx, conn);
			
 
				+			kiblnd_conn_decref(conn); /* ...to here */
			
 
				+		}
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+	/* Allocate a peer ready to add to the peer table and retry */
			
 
				+	rc = kiblnd_create_peer(ni, &peer, nid);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
			
 
				+		if (tx != NULL) {
			
 
				+			tx->tx_status = -EHOSTUNREACH;
			
 
				+			tx->tx_waiting = 0;
			
 
				+			kiblnd_tx_done(ni, tx);
			
 
				+		}
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	write_lock_irqsave(g_lock, flags);
			
 
				+
			
 
				+	peer2 = kiblnd_find_peer_locked(nid);
			
 
				+	if (peer2 != NULL) {
			
 
				+		if (list_empty(&peer2->ibp_conns)) {
			
 
				+			/* found a peer, but it's still connecting... */
			
 
				+			LASSERT (peer2->ibp_connecting != 0 ||
			
 
				+				 peer2->ibp_accepting != 0);
			
 
				+			if (tx != NULL)
			
 
				+				list_add_tail(&tx->tx_list,
			
 
				+						  &peer2->ibp_tx_queue);
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+		} else {
			
 
				+			conn = kiblnd_get_conn_locked(peer2);
			
 
				+			kiblnd_conn_addref(conn); /* 1 ref for me... */
			
 
				+
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+			if (tx != NULL)
			
 
				+				kiblnd_queue_tx(tx, conn);
			
 
				+			kiblnd_conn_decref(conn); /* ...to here */
			
 
				+		}
			
 
				+
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* Brand new peer */
			
 
				+	LASSERT (peer->ibp_connecting == 0);
			
 
				+	peer->ibp_connecting = 1;
			
 
				+
			
 
				+	/* always called with a ref on ni, which prevents ni being shutdown */
			
 
				+	LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
			
 
				+
			
 
				+	if (tx != NULL)
			
 
				+		list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
			
 
				+
			
 
				+	kiblnd_peer_addref(peer);
			
 
				+	list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
			
 
				+
			
 
				+	write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+	kiblnd_connect_peer(peer);
			
 
				+	kiblnd_peer_decref(peer);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
			
 
				+{
			
 
				+	lnet_hdr_t       *hdr = &lntmsg->msg_hdr;
			
 
				+	int	       type = lntmsg->msg_type;
			
 
				+	lnet_process_id_t target = lntmsg->msg_target;
			
 
				+	int	       target_is_router = lntmsg->msg_target_is_router;
			
 
				+	int	       routing = lntmsg->msg_routing;
			
 
				+	unsigned int      payload_niov = lntmsg->msg_niov;
			
 
				+	struct iovec     *payload_iov = lntmsg->msg_iov;
			
 
				+	lnet_kiov_t      *payload_kiov = lntmsg->msg_kiov;
			
 
				+	unsigned int      payload_offset = lntmsg->msg_offset;
			
 
				+	unsigned int      payload_nob = lntmsg->msg_len;
			
 
				+	kib_msg_t	*ibmsg;
			
 
				+	kib_tx_t	 *tx;
			
 
				+	int	       nob;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	/* NB 'private' is different depending on what we're sending.... */
			
 
				+
			
 
				+	CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
			
 
				+	       payload_nob, payload_niov, libcfs_id2str(target));
			
 
				+
			
 
				+	LASSERT (payload_nob == 0 || payload_niov > 0);
			
 
				+	LASSERT (payload_niov <= LNET_MAX_IOV);
			
 
				+
			
 
				+	/* Thread context */
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	/* payload is either all vaddrs or all pages */
			
 
				+	LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
			
 
				+
			
 
				+	switch (type) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+		return (-EIO);
			
 
				+
			
 
				+	case LNET_MSG_ACK:
			
 
				+		LASSERT (payload_nob == 0);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_GET:
			
 
				+		if (routing || target_is_router)
			
 
				+			break;		  /* send IMMEDIATE */
			
 
				+
			
 
				+		/* is the REPLY message too small for RDMA? */
			
 
				+		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
			
 
				+		if (nob <= IBLND_MSG_SIZE)
			
 
				+			break;		  /* send IMMEDIATE */
			
 
				+
			
 
				+		tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				+		if (tx == NULL) {
			
 
				+			CERROR("Can't allocate txd for GET to %s\n",
			
 
				+			       libcfs_nid2str(target.nid));
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		ibmsg = tx->tx_msg;
			
 
				+
			
 
				+		if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
			
 
				+			rc = kiblnd_setup_rd_iov(ni, tx,
			
 
				+						 &ibmsg->ibm_u.get.ibgm_rd,
			
 
				+						 lntmsg->msg_md->md_niov,
			
 
				+						 lntmsg->msg_md->md_iov.iov,
			
 
				+						 0, lntmsg->msg_md->md_length);
			
 
				+		else
			
 
				+			rc = kiblnd_setup_rd_kiov(ni, tx,
			
 
				+						  &ibmsg->ibm_u.get.ibgm_rd,
			
 
				+						  lntmsg->msg_md->md_niov,
			
 
				+						  lntmsg->msg_md->md_iov.kiov,
			
 
				+						  0, lntmsg->msg_md->md_length);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't setup GET sink for %s: %d\n",
			
 
				+			       libcfs_nid2str(target.nid), rc);
			
 
				+			kiblnd_tx_done(ni, tx);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+
			
 
				+		nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[tx->tx_nfrags]);
			
 
				+		ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
			
 
				+		ibmsg->ibm_u.get.ibgm_hdr = *hdr;
			
 
				+
			
 
				+		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
			
 
				+
			
 
				+		tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
			
 
				+		if (tx->tx_lntmsg[1] == NULL) {
			
 
				+			CERROR("Can't create reply for GET -> %s\n",
			
 
				+			       libcfs_nid2str(target.nid));
			
 
				+			kiblnd_tx_done(ni, tx);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+
			
 
				+		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg[0,1] on completion */
			
 
				+		tx->tx_waiting = 1;	     /* waiting for GET_DONE */
			
 
				+		kiblnd_launch_tx(ni, tx, target.nid);
			
 
				+		return 0;
			
 
				+
			
 
				+	case LNET_MSG_REPLY:
			
 
				+	case LNET_MSG_PUT:
			
 
				+		/* Is the payload small enough not to need RDMA? */
			
 
				+		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
			
 
				+		if (nob <= IBLND_MSG_SIZE)
			
 
				+			break;		  /* send IMMEDIATE */
			
 
				+
			
 
				+		tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				+		if (tx == NULL) {
			
 
				+			CERROR("Can't allocate %s txd for %s\n",
			
 
				+			       type == LNET_MSG_PUT ? "PUT" : "REPLY",
			
 
				+			       libcfs_nid2str(target.nid));
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		if (payload_kiov == NULL)
			
 
				+			rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
			
 
				+						 payload_niov, payload_iov,
			
 
				+						 payload_offset, payload_nob);
			
 
				+		else
			
 
				+			rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
			
 
				+						  payload_niov, payload_kiov,
			
 
				+						  payload_offset, payload_nob);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't setup PUT src for %s: %d\n",
			
 
				+			       libcfs_nid2str(target.nid), rc);
			
 
				+			kiblnd_tx_done(ni, tx);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+
			
 
				+		ibmsg = tx->tx_msg;
			
 
				+		ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
			
 
				+		ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
			
 
				+		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
			
 
				+
			
 
				+		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
			
 
				+		tx->tx_waiting = 1;	     /* waiting for PUT_{ACK,NAK} */
			
 
				+		kiblnd_launch_tx(ni, tx, target.nid);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* send IMMEDIATE */
			
 
				+
			
 
				+	LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
			
 
				+		 <= IBLND_MSG_SIZE);
			
 
				+
			
 
				+	tx = kiblnd_get_idle_tx(ni, target.nid);
			
 
				+	if (tx == NULL) {
			
 
				+		CERROR ("Can't send %d to %s: tx descs exhausted\n",
			
 
				+			type, libcfs_nid2str(target.nid));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	ibmsg = tx->tx_msg;
			
 
				+	ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
			
 
				+
			
 
				+	if (payload_kiov != NULL)
			
 
				+		lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
			
 
				+				    offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
			
 
				+				    payload_niov, payload_kiov,
			
 
				+				    payload_offset, payload_nob);
			
 
				+	else
			
 
				+		lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
			
 
				+				   offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
			
 
				+				   payload_niov, payload_iov,
			
 
				+				   payload_offset, payload_nob);
			
 
				+
			
 
				+	nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
			
 
				+	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
			
 
				+
			
 
				+	tx->tx_lntmsg[0] = lntmsg;	      /* finalise lntmsg on completion */
			
 
				+	kiblnd_launch_tx(ni, tx, target.nid);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
			
 
				+{
			
 
				+	lnet_process_id_t target = lntmsg->msg_target;
			
 
				+	unsigned int      niov = lntmsg->msg_niov;
			
 
				+	struct iovec     *iov = lntmsg->msg_iov;
			
 
				+	lnet_kiov_t      *kiov = lntmsg->msg_kiov;
			
 
				+	unsigned int      offset = lntmsg->msg_offset;
			
 
				+	unsigned int      nob = lntmsg->msg_len;
			
 
				+	kib_tx_t	 *tx;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
			
 
				+	if (tx == NULL) {
			
 
				+		CERROR("Can't get tx for REPLY to %s\n",
			
 
				+		       libcfs_nid2str(target.nid));
			
 
				+		goto failed_0;
			
 
				+	}
			
 
				+
			
 
				+	if (nob == 0)
			
 
				+		rc = 0;
			
 
				+	else if (kiov == NULL)
			
 
				+		rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
			
 
				+					 niov, iov, offset, nob);
			
 
				+	else
			
 
				+		rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
			
 
				+					  niov, kiov, offset, nob);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't setup GET src for %s: %d\n",
			
 
				+		       libcfs_nid2str(target.nid), rc);
			
 
				+		goto failed_1;
			
 
				+	}
			
 
				+
			
 
				+	rc = kiblnd_init_rdma(rx->rx_conn, tx,
			
 
				+			      IBLND_MSG_GET_DONE, nob,
			
 
				+			      &rx->rx_msg->ibm_u.get.ibgm_rd,
			
 
				+			      rx->rx_msg->ibm_u.get.ibgm_cookie);
			
 
				+	if (rc < 0) {
			
 
				+		CERROR("Can't setup rdma for GET from %s: %d\n",
			
 
				+		       libcfs_nid2str(target.nid), rc);
			
 
				+		goto failed_1;
			
 
				+	}
			
 
				+
			
 
				+	if (nob == 0) {
			
 
				+		/* No RDMA: local completion may happen now! */
			
 
				+		lnet_finalize(ni, lntmsg, 0);
			
 
				+	} else {
			
 
				+		/* RDMA: lnet_finalize(lntmsg) when it
			
 
				+		 * completes */
			
 
				+		tx->tx_lntmsg[0] = lntmsg;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_queue_tx(tx, rx->rx_conn);
			
 
				+	return;
			
 
				+
			
 
				+ failed_1:
			
 
				+	kiblnd_tx_done(ni, tx);
			
 
				+ failed_0:
			
 
				+	lnet_finalize(ni, lntmsg, -EIO);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
			
 
				+	     unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+	     unsigned int offset, unsigned int mlen, unsigned int rlen)
			
 
				+{
			
 
				+	kib_rx_t    *rx = private;
			
 
				+	kib_msg_t   *rxmsg = rx->rx_msg;
			
 
				+	kib_conn_t  *conn = rx->rx_conn;
			
 
				+	kib_tx_t    *tx;
			
 
				+	kib_msg_t   *txmsg;
			
 
				+	int	  nob;
			
 
				+	int	  post_credit = IBLND_POSTRX_PEER_CREDIT;
			
 
				+	int	  rc = 0;
			
 
				+
			
 
				+	LASSERT (mlen <= rlen);
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	/* Either all pages or all vaddrs */
			
 
				+	LASSERT (!(kiov != NULL && iov != NULL));
			
 
				+
			
 
				+	switch (rxmsg->ibm_type) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_MSG_IMMEDIATE:
			
 
				+		nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
			
 
				+		if (nob > rx->rx_nob) {
			
 
				+			CERROR ("Immediate message from %s too big: %d(%d)\n",
			
 
				+				libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
			
 
				+				nob, rx->rx_nob);
			
 
				+			rc = -EPROTO;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (kiov != NULL)
			
 
				+			lnet_copy_flat2kiov(niov, kiov, offset,
			
 
				+					    IBLND_MSG_SIZE, rxmsg,
			
 
				+					    offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
			
 
				+					    mlen);
			
 
				+		else
			
 
				+			lnet_copy_flat2iov(niov, iov, offset,
			
 
				+					   IBLND_MSG_SIZE, rxmsg,
			
 
				+					   offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
			
 
				+					   mlen);
			
 
				+		lnet_finalize (ni, lntmsg, 0);
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_PUT_REQ:
			
 
				+		if (mlen == 0) {
			
 
				+			lnet_finalize(ni, lntmsg, 0);
			
 
				+			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
			
 
				+					       rxmsg->ibm_u.putreq.ibprm_cookie);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
			
 
				+		if (tx == NULL) {
			
 
				+			CERROR("Can't allocate tx for %s\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+			/* Not replying will break the connection */
			
 
				+			rc = -ENOMEM;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		txmsg = tx->tx_msg;
			
 
				+		if (kiov == NULL)
			
 
				+			rc = kiblnd_setup_rd_iov(ni, tx,
			
 
				+						 &txmsg->ibm_u.putack.ibpam_rd,
			
 
				+						 niov, iov, offset, mlen);
			
 
				+		else
			
 
				+			rc = kiblnd_setup_rd_kiov(ni, tx,
			
 
				+						  &txmsg->ibm_u.putack.ibpam_rd,
			
 
				+						  niov, kiov, offset, mlen);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't setup PUT sink for %s: %d\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
			
 
				+			kiblnd_tx_done(ni, tx);
			
 
				+			/* tell peer it's over */
			
 
				+			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
			
 
				+					       rxmsg->ibm_u.putreq.ibprm_cookie);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[tx->tx_nfrags]);
			
 
				+		txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
			
 
				+		txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
			
 
				+
			
 
				+		kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
			
 
				+
			
 
				+		tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
			
 
				+		tx->tx_waiting = 1;	     /* waiting for PUT_DONE */
			
 
				+		kiblnd_queue_tx(tx, conn);
			
 
				+
			
 
				+		/* reposted buffer reserved for PUT_DONE */
			
 
				+		post_credit = IBLND_POSTRX_NO_CREDIT;
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_MSG_GET_REQ:
			
 
				+		if (lntmsg != NULL) {
			
 
				+			/* Optimized GET; RDMA lntmsg's payload */
			
 
				+			kiblnd_reply(ni, rx, lntmsg);
			
 
				+		} else {
			
 
				+			/* GET didn't match anything */
			
 
				+			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
			
 
				+					       -ENODATA,
			
 
				+					       rxmsg->ibm_u.get.ibgm_cookie);
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_post_rx(rx, post_credit);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
			
 
				+{
			
 
				+	task_t *task = kthread_run(fn, arg, name);
			
 
				+
			
 
				+	if (IS_ERR(task))
			
 
				+		return PTR_ERR(task);
			
 
				+
			
 
				+	atomic_inc(&kiblnd_data.kib_nthreads);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_thread_fini (void)
			
 
				+{
			
 
				+	atomic_dec (&kiblnd_data.kib_nthreads);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_peer_alive (kib_peer_t *peer)
			
 
				+{
			
 
				+	/* This is racy, but everyone's only writing cfs_time_current() */
			
 
				+	peer->ibp_last_alive = cfs_time_current();
			
 
				+	mb();
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_peer_notify (kib_peer_t *peer)
			
 
				+{
			
 
				+	int	   error = 0;
			
 
				+	cfs_time_t    last_alive = 0;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (list_empty(&peer->ibp_conns) &&
			
 
				+	    peer->ibp_accepting == 0 &&
			
 
				+	    peer->ibp_connecting == 0 &&
			
 
				+	    peer->ibp_error != 0) {
			
 
				+		error = peer->ibp_error;
			
 
				+		peer->ibp_error = 0;
			
 
				+
			
 
				+		last_alive = peer->ibp_last_alive;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (error != 0)
			
 
				+		lnet_notify(peer->ibp_ni,
			
 
				+			    peer->ibp_nid, 0, last_alive);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_close_conn_locked (kib_conn_t *conn, int error)
			
 
				+{
			
 
				+	/* This just does the immediate housekeeping.  'error' is zero for a
			
 
				+	 * normal shutdown which can happen only after the connection has been
			
 
				+	 * established.  If the connection is established, schedule the
			
 
				+	 * connection to be finished off by the connd.  Otherwise the connd is
			
 
				+	 * already dealing with it (either to set it up or tear it down).
			
 
				+	 * Caller holds kib_global_lock exclusively in irq context */
			
 
				+	kib_peer_t       *peer = conn->ibc_peer;
			
 
				+	kib_dev_t	*dev;
			
 
				+	unsigned long     flags;
			
 
				+
			
 
				+	LASSERT (error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+	if (error != 0 && conn->ibc_comms_error == 0)
			
 
				+		conn->ibc_comms_error = error;
			
 
				+
			
 
				+	if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
			
 
				+		return; /* already being handled  */
			
 
				+
			
 
				+	if (error == 0 &&
			
 
				+	    list_empty(&conn->ibc_tx_noops) &&
			
 
				+	    list_empty(&conn->ibc_tx_queue) &&
			
 
				+	    list_empty(&conn->ibc_tx_queue_rsrvd) &&
			
 
				+	    list_empty(&conn->ibc_tx_queue_nocred) &&
			
 
				+	    list_empty(&conn->ibc_active_txs)) {
			
 
				+		CDEBUG(D_NET, "closing conn to %s\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid));
			
 
				+	} else {
			
 
				+		CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), error,
			
 
				+		       list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
			
 
				+		       list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
			
 
				+		       list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
			
 
				+		       list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
			
 
				+		       list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
			
 
				+	}
			
 
				+
			
 
				+	dev = ((kib_net_t *)peer->ibp_ni->ni_data)->ibn_dev;
			
 
				+	list_del(&conn->ibc_list);
			
 
				+	/* connd (see below) takes over ibc_list's ref */
			
 
				+
			
 
				+	if (list_empty (&peer->ibp_conns) &&    /* no more conns */
			
 
				+	    kiblnd_peer_active(peer)) {	 /* still in peer table */
			
 
				+		kiblnd_unlink_peer_locked(peer);
			
 
				+
			
 
				+		/* set/clear error on last conn */
			
 
				+		peer->ibp_error = conn->ibc_comms_error;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
			
 
				+
			
 
				+	if (error != 0 &&
			
 
				+	    kiblnd_dev_can_failover(dev)) {
			
 
				+		list_add_tail(&dev->ibd_fail_list,
			
 
				+			      &kiblnd_data.kib_failed_devs);
			
 
				+		wake_up(&kiblnd_data.kib_failover_waitq);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+
			
 
				+	list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
			
 
				+	wake_up(&kiblnd_data.kib_connd_waitq);
			
 
				+
			
 
				+	spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_close_conn(kib_conn_t *conn, int error)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	kiblnd_close_conn_locked(conn, error);
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_handle_early_rxs(kib_conn_t *conn)
			
 
				+{
			
 
				+	unsigned long    flags;
			
 
				+	kib_rx_t	*rx;
			
 
				+
			
 
				+	LASSERT(!in_interrupt());
			
 
				+	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	while (!list_empty(&conn->ibc_early_rxs)) {
			
 
				+		rx = list_entry(conn->ibc_early_rxs.next,
			
 
				+				    kib_rx_t, rx_list);
			
 
				+		list_del(&rx->rx_list);
			
 
				+		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+		kiblnd_handle_rx(rx);
			
 
				+
			
 
				+		write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	}
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs)
			
 
				+{
			
 
				+	LIST_HEAD       (zombies);
			
 
				+	struct list_head	  *tmp;
			
 
				+	struct list_head	  *nxt;
			
 
				+	kib_tx_t	    *tx;
			
 
				+
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+	list_for_each_safe (tmp, nxt, txs) {
			
 
				+		tx = list_entry (tmp, kib_tx_t, tx_list);
			
 
				+
			
 
				+		if (txs == &conn->ibc_active_txs) {
			
 
				+			LASSERT (!tx->tx_queued);
			
 
				+			LASSERT (tx->tx_waiting ||
			
 
				+				 tx->tx_sending != 0);
			
 
				+		} else {
			
 
				+			LASSERT (tx->tx_queued);
			
 
				+		}
			
 
				+
			
 
				+		tx->tx_status = -ECONNABORTED;
			
 
				+		tx->tx_waiting = 0;
			
 
				+
			
 
				+		if (tx->tx_sending == 0) {
			
 
				+			tx->tx_queued = 0;
			
 
				+			list_del (&tx->tx_list);
			
 
				+			list_add (&tx->tx_list, &zombies);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_finalise_conn (kib_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (conn->ibc_state > IBLND_CONN_INIT);
			
 
				+
			
 
				+	kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
			
 
				+
			
 
				+	/* abort_receives moves QP state to IB_QPS_ERR.  This is only required
			
 
				+	 * for connections that didn't get as far as being connected, because
			
 
				+	 * rdma_disconnect() does this for free. */
			
 
				+	kiblnd_abort_receives(conn);
			
 
				+
			
 
				+	/* Complete all tx descs not waiting for sends to complete.
			
 
				+	 * NB we should be safe from RDMA now that the QP has changed state */
			
 
				+
			
 
				+	kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
			
 
				+	kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
			
 
				+	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
			
 
				+	kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
			
 
				+	kiblnd_abort_txs(conn, &conn->ibc_active_txs);
			
 
				+
			
 
				+	kiblnd_handle_early_rxs(conn);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error)
			
 
				+{
			
 
				+	LIST_HEAD    (zombies);
			
 
				+	unsigned long     flags;
			
 
				+
			
 
				+	LASSERT (error != 0);
			
 
				+	LASSERT (!in_interrupt());
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (active) {
			
 
				+		LASSERT (peer->ibp_connecting > 0);
			
 
				+		peer->ibp_connecting--;
			
 
				+	} else {
			
 
				+		LASSERT (peer->ibp_accepting > 0);
			
 
				+		peer->ibp_accepting--;
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ibp_connecting != 0 ||
			
 
				+	    peer->ibp_accepting != 0) {
			
 
				+		/* another connection attempt under way... */
			
 
				+		write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
			
 
				+					    flags);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (list_empty(&peer->ibp_conns)) {
			
 
				+		/* Take peer's blocked transmits to complete with error */
			
 
				+		list_add(&zombies, &peer->ibp_tx_queue);
			
 
				+		list_del_init(&peer->ibp_tx_queue);
			
 
				+
			
 
				+		if (kiblnd_peer_active(peer))
			
 
				+			kiblnd_unlink_peer_locked(peer);
			
 
				+
			
 
				+		peer->ibp_error = error;
			
 
				+	} else {
			
 
				+		/* Can't have blocked transmits if there are connections */
			
 
				+		LASSERT (list_empty(&peer->ibp_tx_queue));
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	kiblnd_peer_notify(peer);
			
 
				+
			
 
				+	if (list_empty (&zombies))
			
 
				+		return;
			
 
				+
			
 
				+	CNETERR("Deleting messages for %s: connection failed\n",
			
 
				+		libcfs_nid2str(peer->ibp_nid));
			
 
				+
			
 
				+	kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_connreq_done(kib_conn_t *conn, int status)
			
 
				+{
			
 
				+	kib_peer_t	*peer = conn->ibc_peer;
			
 
				+	kib_tx_t	  *tx;
			
 
				+	struct list_head	 txs;
			
 
				+	unsigned long      flags;
			
 
				+	int		active;
			
 
				+
			
 
				+	active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				+
			
 
				+	CDEBUG(D_NET,"%s: active(%d), version(%x), status(%d)\n",
			
 
				+	       libcfs_nid2str(peer->ibp_nid), active,
			
 
				+	       conn->ibc_version, status);
			
 
				+
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
			
 
				+		  peer->ibp_connecting > 0) ||
			
 
				+		 (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
			
 
				+		  peer->ibp_accepting > 0));
			
 
				+
			
 
				+	LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
			
 
				+	conn->ibc_connvars = NULL;
			
 
				+
			
 
				+	if (status != 0) {
			
 
				+		/* failed to establish connection */
			
 
				+		kiblnd_peer_connect_failed(peer, active, status);
			
 
				+		kiblnd_finalise_conn(conn);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* connection established */
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	conn->ibc_last_send = jiffies;
			
 
				+	kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
			
 
				+	kiblnd_peer_alive(peer);
			
 
				+
			
 
				+	/* Add conn to peer's list and nuke any dangling conns from a different
			
 
				+	 * peer instance... */
			
 
				+	kiblnd_conn_addref(conn);	       /* +1 ref for ibc_list */
			
 
				+	list_add(&conn->ibc_list, &peer->ibp_conns);
			
 
				+	if (active)
			
 
				+		peer->ibp_connecting--;
			
 
				+	else
			
 
				+		peer->ibp_accepting--;
			
 
				+
			
 
				+	if (peer->ibp_version == 0) {
			
 
				+		peer->ibp_version     = conn->ibc_version;
			
 
				+		peer->ibp_incarnation = conn->ibc_incarnation;
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ibp_version     != conn->ibc_version ||
			
 
				+	    peer->ibp_incarnation != conn->ibc_incarnation) {
			
 
				+		kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
			
 
				+						conn->ibc_incarnation);
			
 
				+		peer->ibp_version     = conn->ibc_version;
			
 
				+		peer->ibp_incarnation = conn->ibc_incarnation;
			
 
				+	}
			
 
				+
			
 
				+	/* grab pending txs while I have the lock */
			
 
				+	list_add(&txs, &peer->ibp_tx_queue);
			
 
				+	list_del_init(&peer->ibp_tx_queue);
			
 
				+
			
 
				+	if (!kiblnd_peer_active(peer) ||	/* peer has been deleted */
			
 
				+	    conn->ibc_comms_error != 0) {       /* error has happened already */
			
 
				+		lnet_ni_t *ni = peer->ibp_ni;
			
 
				+
			
 
				+		/* start to shut down connection */
			
 
				+		kiblnd_close_conn_locked(conn, -ECONNABORTED);
			
 
				+		write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+		kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
			
 
				+
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	/* Schedule blocked txs */
			
 
				+	spin_lock(&conn->ibc_lock);
			
 
				+	while (!list_empty(&txs)) {
			
 
				+		tx = list_entry(txs.next, kib_tx_t, tx_list);
			
 
				+		list_del(&tx->tx_list);
			
 
				+
			
 
				+		kiblnd_queue_tx_locked(tx, conn);
			
 
				+	}
			
 
				+	spin_unlock(&conn->ibc_lock);
			
 
				+
			
 
				+	kiblnd_check_sends(conn);
			
 
				+
			
 
				+	/* schedule blocked rxs */
			
 
				+	kiblnd_handle_early_rxs(conn);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_reject(struct rdma_cm_id *cmid, kib_rej_t *rej)
			
 
				+{
			
 
				+	int	  rc;
			
 
				+
			
 
				+	rc = rdma_reject(cmid, rej, sizeof(*rej));
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		CWARN("Error %d sending reject\n", rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
			
 
				+{
			
 
				+	rwlock_t		*g_lock = &kiblnd_data.kib_global_lock;
			
 
				+	kib_msg_t	     *reqmsg = priv;
			
 
				+	kib_msg_t	     *ackmsg;
			
 
				+	kib_dev_t	     *ibdev;
			
 
				+	kib_peer_t	    *peer;
			
 
				+	kib_peer_t	    *peer2;
			
 
				+	kib_conn_t	    *conn;
			
 
				+	lnet_ni_t	     *ni  = NULL;
			
 
				+	kib_net_t	     *net = NULL;
			
 
				+	lnet_nid_t	     nid;
			
 
				+	struct rdma_conn_param cp;
			
 
				+	kib_rej_t	      rej;
			
 
				+	int		    version = IBLND_MSG_VERSION;
			
 
				+	unsigned long	  flags;
			
 
				+	int		    rc;
			
 
				+	struct sockaddr_in    *peer_addr;
			
 
				+	LASSERT (!in_interrupt());
			
 
				+
			
 
				+	/* cmid inherits 'context' from the corresponding listener id */
			
 
				+	ibdev = (kib_dev_t *)cmid->context;
			
 
				+	LASSERT (ibdev != NULL);
			
 
				+
			
 
				+	memset(&rej, 0, sizeof(rej));
			
 
				+	rej.ibr_magic		= IBLND_MSG_MAGIC;
			
 
				+	rej.ibr_why		  = IBLND_REJECT_FATAL;
			
 
				+	rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				+
			
 
				+	peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
			
 
				+	if (*kiblnd_tunables.kib_require_priv_port &&
			
 
				+	    ntohs(peer_addr->sin_port) >= PROT_SOCK) {
			
 
				+		__u32 ip = ntohl(peer_addr->sin_addr.s_addr);
			
 
				+		CERROR("Peer's port (%u.%u.%u.%u:%hu) is not privileged\n",
			
 
				+		       HIPQUAD(ip), ntohs(peer_addr->sin_port));
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
			
 
				+		CERROR("Short connection request\n");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* Future protocol version compatibility support!  If the
			
 
				+	 * o2iblnd-specific protocol changes, or when LNET unifies
			
 
				+	 * protocols over all LNDs, the initial connection will
			
 
				+	 * negotiate a protocol version.  I trap this here to avoid
			
 
				+	 * console errors; the reject tells the peer which protocol I
			
 
				+	 * speak. */
			
 
				+	if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
			
 
				+	    reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
			
 
				+		goto failed;
			
 
				+	if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
			
 
				+	    reqmsg->ibm_version != IBLND_MSG_VERSION &&
			
 
				+	    reqmsg->ibm_version != IBLND_MSG_VERSION_1)
			
 
				+		goto failed;
			
 
				+	if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
			
 
				+	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
			
 
				+	    reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = kiblnd_unpack_msg(reqmsg, priv_nob);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't parse connection request: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	nid = reqmsg->ibm_srcnid;
			
 
				+	ni  = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
			
 
				+
			
 
				+	if (ni != NULL) {
			
 
				+		net = (kib_net_t *)ni->ni_data;
			
 
				+		rej.ibr_incarnation = net->ibn_incarnation;
			
 
				+	}
			
 
				+
			
 
				+	if (ni == NULL ||			 /* no matching net */
			
 
				+	    ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
			
 
				+	    net->ibn_dev != ibdev) {	      /* wrong device */
			
 
				+		CERROR("Can't accept %s on %s (%s:%d:%u.%u.%u.%u): "
			
 
				+		       "bad dst nid %s\n", libcfs_nid2str(nid),
			
 
				+		       ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
			
 
				+		       ibdev->ibd_ifname, ibdev->ibd_nnets,
			
 
				+		       HIPQUAD(ibdev->ibd_ifip),
			
 
				+		       libcfs_nid2str(reqmsg->ibm_dstnid));
			
 
				+
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+       /* check time stamp as soon as possible */
			
 
				+	if (reqmsg->ibm_dststamp != 0 &&
			
 
				+	    reqmsg->ibm_dststamp != net->ibn_incarnation) {
			
 
				+		CWARN("Stale connection request\n");
			
 
				+		rej.ibr_why = IBLND_REJECT_CONN_STALE;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* I can accept peer's version */
			
 
				+	version = reqmsg->ibm_version;
			
 
				+
			
 
				+	if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
			
 
				+		CERROR("Unexpected connreq msg type: %x from %s\n",
			
 
				+		       reqmsg->ibm_type, libcfs_nid2str(nid));
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (reqmsg->ibm_u.connparams.ibcp_queue_depth !=
			
 
				+	    IBLND_MSG_QUEUE_SIZE(version)) {
			
 
				+		CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n",
			
 
				+		       libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth,
			
 
				+		       IBLND_MSG_QUEUE_SIZE(version));
			
 
				+
			
 
				+		if (version == IBLND_MSG_VERSION)
			
 
				+			rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
			
 
				+
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (reqmsg->ibm_u.connparams.ibcp_max_frags !=
			
 
				+	    IBLND_RDMA_FRAGS(version)) {
			
 
				+		CERROR("Can't accept %s(version %x): "
			
 
				+		       "incompatible max_frags %d (%d wanted)\n",
			
 
				+		       libcfs_nid2str(nid), version,
			
 
				+		       reqmsg->ibm_u.connparams.ibcp_max_frags,
			
 
				+		       IBLND_RDMA_FRAGS(version));
			
 
				+
			
 
				+		if (version == IBLND_MSG_VERSION)
			
 
				+			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
			
 
				+
			
 
				+		goto failed;
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
			
 
				+		CERROR("Can't accept %s: message size %d too big (%d max)\n",
			
 
				+		       libcfs_nid2str(nid),
			
 
				+		       reqmsg->ibm_u.connparams.ibcp_max_msg_size,
			
 
				+		       IBLND_MSG_SIZE);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* assume 'nid' is a new peer; create  */
			
 
				+	rc = kiblnd_create_peer(ni, &peer, nid);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
			
 
				+		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	write_lock_irqsave(g_lock, flags);
			
 
				+
			
 
				+	peer2 = kiblnd_find_peer_locked(nid);
			
 
				+	if (peer2 != NULL) {
			
 
				+		if (peer2->ibp_version == 0) {
			
 
				+			peer2->ibp_version     = version;
			
 
				+			peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
			
 
				+		}
			
 
				+
			
 
				+		/* not the guy I've talked with */
			
 
				+		if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
			
 
				+		    peer2->ibp_version     != version) {
			
 
				+			kiblnd_close_peer_conns_locked(peer2, -ESTALE);
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+			CWARN("Conn stale %s [old ver: %x, new ver: %x]\n",
			
 
				+			      libcfs_nid2str(nid), peer2->ibp_version, version);
			
 
				+
			
 
				+			kiblnd_peer_decref(peer);
			
 
				+			rej.ibr_why = IBLND_REJECT_CONN_STALE;
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		/* tie-break connection race in favour of the higher NID */
			
 
				+		if (peer2->ibp_connecting != 0 &&
			
 
				+		    nid < ni->ni_nid) {
			
 
				+			write_unlock_irqrestore(g_lock, flags);
			
 
				+
			
 
				+			CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
			
 
				+
			
 
				+			kiblnd_peer_decref(peer);
			
 
				+			rej.ibr_why = IBLND_REJECT_CONN_RACE;
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		peer2->ibp_accepting++;
			
 
				+		kiblnd_peer_addref(peer2);
			
 
				+
			
 
				+		write_unlock_irqrestore(g_lock, flags);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		peer = peer2;
			
 
				+	} else {
			
 
				+		/* Brand new peer */
			
 
				+		LASSERT (peer->ibp_accepting == 0);
			
 
				+		LASSERT (peer->ibp_version == 0 &&
			
 
				+			 peer->ibp_incarnation == 0);
			
 
				+
			
 
				+		peer->ibp_accepting   = 1;
			
 
				+		peer->ibp_version     = version;
			
 
				+		peer->ibp_incarnation = reqmsg->ibm_srcstamp;
			
 
				+
			
 
				+		/* I have a ref on ni that prevents it being shutdown */
			
 
				+		LASSERT (net->ibn_shutdown == 0);
			
 
				+
			
 
				+		kiblnd_peer_addref(peer);
			
 
				+		list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
			
 
				+
			
 
				+		write_unlock_irqrestore(g_lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version);
			
 
				+	if (conn == NULL) {
			
 
				+		kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* conn now "owns" cmid, so I return success from here on to ensure the
			
 
				+	 * CM callback doesn't destroy cmid. */
			
 
				+
			
 
				+	conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
			
 
				+	conn->ibc_credits	  = IBLND_MSG_QUEUE_SIZE(version);
			
 
				+	conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(version);
			
 
				+	LASSERT (conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version)
			
 
				+		 <= IBLND_RX_MSGS(version));
			
 
				+
			
 
				+	ackmsg = &conn->ibc_connvars->cv_msg;
			
 
				+	memset(ackmsg, 0, sizeof(*ackmsg));
			
 
				+
			
 
				+	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
			
 
				+			sizeof(ackmsg->ibm_u.connparams));
			
 
				+	ackmsg->ibm_u.connparams.ibcp_queue_depth  = IBLND_MSG_QUEUE_SIZE(version);
			
 
				+	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				+	ackmsg->ibm_u.connparams.ibcp_max_frags    = IBLND_RDMA_FRAGS(version);
			
 
				+
			
 
				+	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
			
 
				+
			
 
				+	memset(&cp, 0, sizeof(cp));
			
 
				+	cp.private_data	= ackmsg;
			
 
				+	cp.private_data_len    = ackmsg->ibm_nob;
			
 
				+	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
			
 
				+	cp.initiator_depth     = 0;
			
 
				+	cp.flow_control	= 1;
			
 
				+	cp.retry_count	 = *kiblnd_tunables.kib_retry_count;
			
 
				+	cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
			
 
				+
			
 
				+	rc = rdma_accept(cmid, &cp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
			
 
				+		rej.ibr_version = version;
			
 
				+		rej.ibr_why     = IBLND_REJECT_FATAL;
			
 
				+
			
 
				+		kiblnd_reject(cmid, &rej);
			
 
				+		kiblnd_connreq_done(conn, rc);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+	}
			
 
				+
			
 
				+	lnet_ni_decref(ni);
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	if (ni != NULL)
			
 
				+		lnet_ni_decref(ni);
			
 
				+
			
 
				+	rej.ibr_version = version;
			
 
				+	rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
			
 
				+	rej.ibr_cp.ibcp_max_frags   = IBLND_RDMA_FRAGS(version);
			
 
				+	kiblnd_reject(cmid, &rej);
			
 
				+
			
 
				+	return -ECONNREFUSED;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_reconnect (kib_conn_t *conn, int version,
			
 
				+		  __u64 incarnation, int why, kib_connparams_t *cp)
			
 
				+{
			
 
				+	kib_peer_t    *peer = conn->ibc_peer;
			
 
				+	char	  *reason;
			
 
				+	int	    retry = 0;
			
 
				+	unsigned long  flags;
			
 
				+
			
 
				+	LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				+	LASSERT (peer->ibp_connecting > 0);     /* 'conn' at least */
			
 
				+
			
 
				+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	/* retry connection if it's still needed and no other connection
			
 
				+	 * attempts (active or passive) are in progress
			
 
				+	 * NB: reconnect is still needed even when ibp_tx_queue is
			
 
				+	 * empty if ibp_version != version because reconnect may be
			
 
				+	 * initiated by kiblnd_query() */
			
 
				+	if ((!list_empty(&peer->ibp_tx_queue) ||
			
 
				+	     peer->ibp_version != version) &&
			
 
				+	    peer->ibp_connecting == 1 &&
			
 
				+	    peer->ibp_accepting == 0) {
			
 
				+		retry = 1;
			
 
				+		peer->ibp_connecting++;
			
 
				+
			
 
				+		peer->ibp_version     = version;
			
 
				+		peer->ibp_incarnation = incarnation;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (!retry)
			
 
				+		return;
			
 
				+
			
 
				+	switch (why) {
			
 
				+	default:
			
 
				+		reason = "Unknown";
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_REJECT_CONN_STALE:
			
 
				+		reason = "stale";
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_REJECT_CONN_RACE:
			
 
				+		reason = "conn race";
			
 
				+		break;
			
 
				+
			
 
				+	case IBLND_REJECT_CONN_UNCOMPAT:
			
 
				+		reason = "version negotiation";
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	CNETERR("%s: retrying (%s), %x, %x, "
			
 
				+		"queue_dep: %d, max_frag: %d, msg_size: %d\n",
			
 
				+		libcfs_nid2str(peer->ibp_nid),
			
 
				+		reason, IBLND_MSG_VERSION, version,
			
 
				+		cp != NULL? cp->ibcp_queue_depth :IBLND_MSG_QUEUE_SIZE(version),
			
 
				+		cp != NULL? cp->ibcp_max_frags   : IBLND_RDMA_FRAGS(version),
			
 
				+		cp != NULL? cp->ibcp_max_msg_size: IBLND_MSG_SIZE);
			
 
				+
			
 
				+	kiblnd_connect_peer(peer);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob)
			
 
				+{
			
 
				+	kib_peer_t    *peer = conn->ibc_peer;
			
 
				+
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
			
 
				+
			
 
				+	switch (reason) {
			
 
				+	case IB_CM_REJ_STALE_CONN:
			
 
				+		kiblnd_reconnect(conn, IBLND_MSG_VERSION, 0,
			
 
				+				 IBLND_REJECT_CONN_STALE, NULL);
			
 
				+		break;
			
 
				+
			
 
				+	case IB_CM_REJ_INVALID_SERVICE_ID:
			
 
				+		CNETERR("%s rejected: no listener at %d\n",
			
 
				+			libcfs_nid2str(peer->ibp_nid),
			
 
				+			*kiblnd_tunables.kib_service);
			
 
				+		break;
			
 
				+
			
 
				+	case IB_CM_REJ_CONSUMER_DEFINED:
			
 
				+		if (priv_nob >= offsetof(kib_rej_t, ibr_padding)) {
			
 
				+			kib_rej_t	*rej	 = priv;
			
 
				+			kib_connparams_t *cp	  = NULL;
			
 
				+			int	       flip	= 0;
			
 
				+			__u64	     incarnation = -1;
			
 
				+
			
 
				+			/* NB. default incarnation is -1 because:
			
 
				+			 * a) V1 will ignore dst incarnation in connreq.
			
 
				+			 * b) V2 will provide incarnation while rejecting me,
			
 
				+			 *    -1 will be overwrote.
			
 
				+			 *
			
 
				+			 * if I try to connect to a V1 peer with V2 protocol,
			
 
				+			 * it rejected me then upgrade to V2, I have no idea
			
 
				+			 * about the upgrading and try to reconnect with V1,
			
 
				+			 * in this case upgraded V2 can find out I'm trying to
			
 
				+			 * talk to the old guy and reject me(incarnation is -1).
			
 
				+			 */
			
 
				+
			
 
				+			if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
			
 
				+			    rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
			
 
				+				__swab32s(&rej->ibr_magic);
			
 
				+				__swab16s(&rej->ibr_version);
			
 
				+				flip = 1;
			
 
				+			}
			
 
				+
			
 
				+			if (priv_nob >= sizeof(kib_rej_t) &&
			
 
				+			    rej->ibr_version > IBLND_MSG_VERSION_1) {
			
 
				+				/* priv_nob is always 148 in current version
			
 
				+				 * of OFED, so we still need to check version.
			
 
				+				 * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) */
			
 
				+				cp = &rej->ibr_cp;
			
 
				+
			
 
				+				if (flip) {
			
 
				+					__swab64s(&rej->ibr_incarnation);
			
 
				+					__swab16s(&cp->ibcp_queue_depth);
			
 
				+					__swab16s(&cp->ibcp_max_frags);
			
 
				+					__swab32s(&cp->ibcp_max_msg_size);
			
 
				+				}
			
 
				+
			
 
				+				incarnation = rej->ibr_incarnation;
			
 
				+			}
			
 
				+
			
 
				+			if (rej->ibr_magic != IBLND_MSG_MAGIC &&
			
 
				+			    rej->ibr_magic != LNET_PROTO_MAGIC) {
			
 
				+				CERROR("%s rejected: consumer defined fatal error\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid));
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			if (rej->ibr_version != IBLND_MSG_VERSION &&
			
 
				+			    rej->ibr_version != IBLND_MSG_VERSION_1) {
			
 
				+				CERROR("%s rejected: o2iblnd version %x error\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid),
			
 
				+				       rej->ibr_version);
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			if (rej->ibr_why     == IBLND_REJECT_FATAL &&
			
 
				+			    rej->ibr_version == IBLND_MSG_VERSION_1) {
			
 
				+				CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
			
 
				+
			
 
				+				if (conn->ibc_version != IBLND_MSG_VERSION_1)
			
 
				+					rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
			
 
				+			}
			
 
				+
			
 
				+			switch (rej->ibr_why) {
			
 
				+			case IBLND_REJECT_CONN_RACE:
			
 
				+			case IBLND_REJECT_CONN_STALE:
			
 
				+			case IBLND_REJECT_CONN_UNCOMPAT:
			
 
				+				kiblnd_reconnect(conn, rej->ibr_version,
			
 
				+						 incarnation, rej->ibr_why, cp);
			
 
				+				break;
			
 
				+
			
 
				+			case IBLND_REJECT_MSG_QUEUE_SIZE:
			
 
				+				CERROR("%s rejected: incompatible message queue depth %d, %d\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid), cp->ibcp_queue_depth,
			
 
				+				       IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
			
 
				+				break;
			
 
				+
			
 
				+			case IBLND_REJECT_RDMA_FRAGS:
			
 
				+				CERROR("%s rejected: incompatible # of RDMA fragments %d, %d\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid), cp->ibcp_max_frags,
			
 
				+				       IBLND_RDMA_FRAGS(conn->ibc_version));
			
 
				+				break;
			
 
				+
			
 
				+			case IBLND_REJECT_NO_RESOURCES:
			
 
				+				CERROR("%s rejected: o2iblnd no resources\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid));
			
 
				+				break;
			
 
				+
			
 
				+			case IBLND_REJECT_FATAL:
			
 
				+				CERROR("%s rejected: o2iblnd fatal error\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid));
			
 
				+				break;
			
 
				+
			
 
				+			default:
			
 
				+				CERROR("%s rejected: o2iblnd reason %d\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid),
			
 
				+				       rej->ibr_why);
			
 
				+				break;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		/* fall through */
			
 
				+	default:
			
 
				+		CNETERR("%s rejected: reason %d, size %d\n",
			
 
				+			libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_connreq_done(conn, -ECONNREFUSED);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
			
 
				+{
			
 
				+	kib_peer_t    *peer = conn->ibc_peer;
			
 
				+	lnet_ni_t     *ni   = peer->ibp_ni;
			
 
				+	kib_net_t     *net  = ni->ni_data;
			
 
				+	kib_msg_t     *msg  = priv;
			
 
				+	int	    ver  = conn->ibc_version;
			
 
				+	int	    rc   = kiblnd_unpack_msg(msg, priv_nob);
			
 
				+	unsigned long  flags;
			
 
				+
			
 
				+	LASSERT (net != NULL);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't unpack connack from %s: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_type != IBLND_MSG_CONNACK) {
			
 
				+		CERROR("Unexpected message %d from %s\n",
			
 
				+		       msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
			
 
				+		rc = -EPROTO;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (ver != msg->ibm_version) {
			
 
				+		CERROR("%s replied version %x is different with "
			
 
				+		       "requested version %x\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
			
 
				+		rc = -EPROTO;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_u.connparams.ibcp_queue_depth !=
			
 
				+	    IBLND_MSG_QUEUE_SIZE(ver)) {
			
 
				+		CERROR("%s has incompatible queue depth %d(%d wanted)\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       msg->ibm_u.connparams.ibcp_queue_depth,
			
 
				+		       IBLND_MSG_QUEUE_SIZE(ver));
			
 
				+		rc = -EPROTO;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_u.connparams.ibcp_max_frags !=
			
 
				+	    IBLND_RDMA_FRAGS(ver)) {
			
 
				+		CERROR("%s has incompatible max_frags %d (%d wanted)\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       msg->ibm_u.connparams.ibcp_max_frags,
			
 
				+		       IBLND_RDMA_FRAGS(ver));
			
 
				+		rc = -EPROTO;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
			
 
				+		CERROR("%s max message size %d too big (%d max)\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid),
			
 
				+		       msg->ibm_u.connparams.ibcp_max_msg_size,
			
 
				+		       IBLND_MSG_SIZE);
			
 
				+		rc = -EPROTO;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+	if (msg->ibm_dstnid == ni->ni_nid &&
			
 
				+	    msg->ibm_dststamp == net->ibn_incarnation)
			
 
				+		rc = 0;
			
 
				+	else
			
 
				+		rc = -ESTALE;
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Bad connection reply from %s, rc = %d, "
			
 
				+		       "version: %x max_frags: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), rc,
			
 
				+		       msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	conn->ibc_incarnation      = msg->ibm_srcstamp;
			
 
				+	conn->ibc_credits	  =
			
 
				+	conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(ver);
			
 
				+	LASSERT (conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver)
			
 
				+		 <= IBLND_RX_MSGS(ver));
			
 
				+
			
 
				+	kiblnd_connreq_done(conn, 0);
			
 
				+	return;
			
 
				+
			
 
				+ failed:
			
 
				+	/* NB My QP has already established itself, so I handle anything going
			
 
				+	 * wrong here by setting ibc_comms_error.
			
 
				+	 * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
			
 
				+	 * immediately tears it down. */
			
 
				+
			
 
				+	LASSERT (rc != 0);
			
 
				+	conn->ibc_comms_error = rc;
			
 
				+	kiblnd_connreq_done(conn, 0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_active_connect (struct rdma_cm_id *cmid)
			
 
				+{
			
 
				+	kib_peer_t	      *peer = (kib_peer_t *)cmid->context;
			
 
				+	kib_conn_t	      *conn;
			
 
				+	kib_msg_t	       *msg;
			
 
				+	struct rdma_conn_param   cp;
			
 
				+	int		      version;
			
 
				+	__u64		    incarnation;
			
 
				+	unsigned long	    flags;
			
 
				+	int		      rc;
			
 
				+
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	incarnation = peer->ibp_incarnation;
			
 
				+	version     = (peer->ibp_version == 0) ? IBLND_MSG_VERSION :
			
 
				+						 peer->ibp_version;
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, version);
			
 
				+	if (conn == NULL) {
			
 
				+		kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
			
 
				+		kiblnd_peer_decref(peer); /* lose cmid's ref */
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* conn "owns" cmid now, so I return success from here on to ensure the
			
 
				+	 * CM callback doesn't destroy cmid. conn also takes over cmid's ref
			
 
				+	 * on peer */
			
 
				+
			
 
				+	msg = &conn->ibc_connvars->cv_msg;
			
 
				+
			
 
				+	memset(msg, 0, sizeof(*msg));
			
 
				+	kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
			
 
				+	msg->ibm_u.connparams.ibcp_queue_depth  = IBLND_MSG_QUEUE_SIZE(version);
			
 
				+	msg->ibm_u.connparams.ibcp_max_frags    = IBLND_RDMA_FRAGS(version);
			
 
				+	msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
			
 
				+
			
 
				+	kiblnd_pack_msg(peer->ibp_ni, msg, version,
			
 
				+			0, peer->ibp_nid, incarnation);
			
 
				+
			
 
				+	memset(&cp, 0, sizeof(cp));
			
 
				+	cp.private_data	= msg;
			
 
				+	cp.private_data_len    = msg->ibm_nob;
			
 
				+	cp.responder_resources = 0;	     /* No atomic ops or RDMA reads */
			
 
				+	cp.initiator_depth     = 0;
			
 
				+	cp.flow_control	= 1;
			
 
				+	cp.retry_count	 = *kiblnd_tunables.kib_retry_count;
			
 
				+	cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
			
 
				+
			
 
				+	LASSERT(cmid->context == (void *)conn);
			
 
				+	LASSERT(conn->ibc_cmid == cmid);
			
 
				+
			
 
				+	rc = rdma_connect(cmid, &cp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't connect to %s: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				+		kiblnd_connreq_done(conn, rc);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
			
 
				+{
			
 
				+	kib_peer_t  *peer;
			
 
				+	kib_conn_t  *conn;
			
 
				+	int	  rc;
			
 
				+
			
 
				+	switch (event->event) {
			
 
				+	default:
			
 
				+		CERROR("Unexpected event: %d, status: %d\n",
			
 
				+		       event->event, event->status);
			
 
				+		LBUG();
			
 
				+
			
 
				+	case RDMA_CM_EVENT_CONNECT_REQUEST:
			
 
				+		/* destroy cmid on failure */
			
 
				+		rc = kiblnd_passive_connect(cmid,
			
 
				+					    (void *)KIBLND_CONN_PARAM(event),
			
 
				+					    KIBLND_CONN_PARAM_LEN(event));
			
 
				+		CDEBUG(D_NET, "connreq: %d\n", rc);
			
 
				+		return rc;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ADDR_ERROR:
			
 
				+		peer = (kib_peer_t *)cmid->context;
			
 
				+		CNETERR("%s: ADDR ERROR %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		return -EHOSTUNREACH;      /* rc != 0 destroys cmid */
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ADDR_RESOLVED:
			
 
				+		peer = (kib_peer_t *)cmid->context;
			
 
				+
			
 
				+		CDEBUG(D_NET,"%s Addr resolved: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+
			
 
				+		if (event->status != 0) {
			
 
				+			CNETERR("Can't resolve address for %s: %d\n",
			
 
				+				libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+			rc = event->status;
			
 
				+		} else {
			
 
				+			rc = rdma_resolve_route(
			
 
				+				cmid, *kiblnd_tunables.kib_timeout * 1000);
			
 
				+			if (rc == 0)
			
 
				+				return 0;
			
 
				+			/* Can't initiate route resolution */
			
 
				+			CERROR("Can't resolve route for %s: %d\n",
			
 
				+			       libcfs_nid2str(peer->ibp_nid), rc);
			
 
				+		}
			
 
				+		kiblnd_peer_connect_failed(peer, 1, rc);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		return rc;		      /* rc != 0 destroys cmid */
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ROUTE_ERROR:
			
 
				+		peer = (kib_peer_t *)cmid->context;
			
 
				+		CNETERR("%s: ROUTE ERROR %d\n",
			
 
				+			libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+		kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		return -EHOSTUNREACH;	   /* rc != 0 destroys cmid */
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
			
 
				+		peer = (kib_peer_t *)cmid->context;
			
 
				+		CDEBUG(D_NET,"%s Route resolved: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+
			
 
				+		if (event->status == 0)
			
 
				+			return kiblnd_active_connect(cmid);
			
 
				+
			
 
				+		CNETERR("Can't resolve route for %s: %d\n",
			
 
				+		       libcfs_nid2str(peer->ibp_nid), event->status);
			
 
				+		kiblnd_peer_connect_failed(peer, 1, event->status);
			
 
				+		kiblnd_peer_decref(peer);
			
 
				+		return event->status;	   /* rc != 0 destroys cmid */
			
 
				+
			
 
				+	case RDMA_CM_EVENT_UNREACHABLE:
			
 
				+		conn = (kib_conn_t *)cmid->context;
			
 
				+		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				+			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
			
 
				+		CNETERR("%s: UNREACHABLE %d\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
			
 
				+		kiblnd_connreq_done(conn, -ENETDOWN);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_CONNECT_ERROR:
			
 
				+		conn = (kib_conn_t *)cmid->context;
			
 
				+		LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
			
 
				+			conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
			
 
				+		CNETERR("%s: CONNECT ERROR %d\n",
			
 
				+			libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
			
 
				+		kiblnd_connreq_done(conn, -ENOTCONN);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_REJECTED:
			
 
				+		conn = (kib_conn_t *)cmid->context;
			
 
				+		switch (conn->ibc_state) {
			
 
				+		default:
			
 
				+			LBUG();
			
 
				+
			
 
				+		case IBLND_CONN_PASSIVE_WAIT:
			
 
				+			CERROR ("%s: REJECTED %d\n",
			
 
				+				libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				+				event->status);
			
 
				+			kiblnd_connreq_done(conn, -ECONNRESET);
			
 
				+			break;
			
 
				+
			
 
				+		case IBLND_CONN_ACTIVE_CONNECT:
			
 
				+			kiblnd_rejected(conn, event->status,
			
 
				+					(void *)KIBLND_CONN_PARAM(event),
			
 
				+					KIBLND_CONN_PARAM_LEN(event));
			
 
				+			break;
			
 
				+		}
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ESTABLISHED:
			
 
				+		conn = (kib_conn_t *)cmid->context;
			
 
				+		switch (conn->ibc_state) {
			
 
				+		default:
			
 
				+			LBUG();
			
 
				+
			
 
				+		case IBLND_CONN_PASSIVE_WAIT:
			
 
				+			CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+			kiblnd_connreq_done(conn, 0);
			
 
				+			break;
			
 
				+
			
 
				+		case IBLND_CONN_ACTIVE_CONNECT:
			
 
				+			CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+			kiblnd_check_connreply(conn,
			
 
				+					       (void *)KIBLND_CONN_PARAM(event),
			
 
				+					       KIBLND_CONN_PARAM_LEN(event));
			
 
				+			break;
			
 
				+		}
			
 
				+		/* net keeps its ref on conn! */
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
			
 
				+		CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
			
 
				+		return 0;
			
 
				+	case RDMA_CM_EVENT_DISCONNECTED:
			
 
				+		conn = (kib_conn_t *)cmid->context;
			
 
				+		if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
			
 
				+			CERROR("%s DISCONNECTED\n",
			
 
				+			       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+			kiblnd_connreq_done(conn, -ECONNRESET);
			
 
				+		} else {
			
 
				+			kiblnd_close_conn(conn, 0);
			
 
				+		}
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+		cmid->context = NULL;
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
			
 
				+		LCONSOLE_ERROR_MSG(0x131,
			
 
				+				   "Received notification of device removal\n"
			
 
				+				   "Please shutdown LNET to allow this to proceed\n");
			
 
				+		/* Can't remove network from underneath LNET for now, so I have
			
 
				+		 * to ignore this */
			
 
				+		return 0;
			
 
				+
			
 
				+	case RDMA_CM_EVENT_ADDR_CHANGE:
			
 
				+		LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_check_txs_locked(kib_conn_t *conn, struct list_head *txs)
			
 
				+{
			
 
				+	kib_tx_t	  *tx;
			
 
				+	struct list_head	*ttmp;
			
 
				+
			
 
				+	list_for_each (ttmp, txs) {
			
 
				+		tx = list_entry (ttmp, kib_tx_t, tx_list);
			
 
				+
			
 
				+		if (txs != &conn->ibc_active_txs) {
			
 
				+			LASSERT (tx->tx_queued);
			
 
				+		} else {
			
 
				+			LASSERT (!tx->tx_queued);
			
 
				+			LASSERT (tx->tx_waiting || tx->tx_sending != 0);
			
 
				+		}
			
 
				+
			
 
				+		if (cfs_time_aftereq (jiffies, tx->tx_deadline)) {
			
 
				+			CERROR("Timed out tx: %s, %lu seconds\n",
			
 
				+			       kiblnd_queue2str(conn, txs),
			
 
				+			       cfs_duration_sec(jiffies - tx->tx_deadline));
			
 
				+			return 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+kiblnd_conn_timed_out_locked(kib_conn_t *conn)
			
 
				+{
			
 
				+	return  kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
			
 
				+		kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
			
 
				+		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
			
 
				+		kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
			
 
				+		kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_check_conns (int idx)
			
 
				+{
			
 
				+	LIST_HEAD (closes);
			
 
				+	LIST_HEAD (checksends);
			
 
				+	struct list_head    *peers = &kiblnd_data.kib_peers[idx];
			
 
				+	struct list_head    *ptmp;
			
 
				+	kib_peer_t    *peer;
			
 
				+	kib_conn_t    *conn;
			
 
				+	struct list_head    *ctmp;
			
 
				+	unsigned long  flags;
			
 
				+
			
 
				+	/* NB. We expect to have a look at all the peers and not find any
			
 
				+	 * RDMAs to time out, so we just use a shared lock while we
			
 
				+	 * take a look... */
			
 
				+	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	list_for_each (ptmp, peers) {
			
 
				+		peer = list_entry (ptmp, kib_peer_t, ibp_list);
			
 
				+
			
 
				+		list_for_each (ctmp, &peer->ibp_conns) {
			
 
				+			int timedout;
			
 
				+			int sendnoop;
			
 
				+
			
 
				+			conn = list_entry(ctmp, kib_conn_t, ibc_list);
			
 
				+
			
 
				+			LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED);
			
 
				+
			
 
				+			spin_lock(&conn->ibc_lock);
			
 
				+
			
 
				+			sendnoop = kiblnd_need_noop(conn);
			
 
				+			timedout = kiblnd_conn_timed_out_locked(conn);
			
 
				+			if (!sendnoop && !timedout) {
			
 
				+				spin_unlock(&conn->ibc_lock);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if (timedout) {
			
 
				+				CERROR("Timed out RDMA with %s (%lu): "
			
 
				+				       "c: %u, oc: %u, rc: %u\n",
			
 
				+				       libcfs_nid2str(peer->ibp_nid),
			
 
				+				       cfs_duration_sec(cfs_time_current() -
			
 
				+							peer->ibp_last_alive),
			
 
				+				       conn->ibc_credits,
			
 
				+				       conn->ibc_outstanding_credits,
			
 
				+				       conn->ibc_reserved_credits);
			
 
				+				list_add(&conn->ibc_connd_list, &closes);
			
 
				+			} else {
			
 
				+				list_add(&conn->ibc_connd_list,
			
 
				+					     &checksends);
			
 
				+			}
			
 
				+			/* +ref for 'closes' or 'checksends' */
			
 
				+			kiblnd_conn_addref(conn);
			
 
				+
			
 
				+			spin_unlock(&conn->ibc_lock);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
			
 
				+
			
 
				+	/* Handle timeout by closing the whole
			
 
				+	 * connection. We can only be sure RDMA activity
			
 
				+	 * has ceased once the QP has been modified. */
			
 
				+	while (!list_empty(&closes)) {
			
 
				+		conn = list_entry(closes.next,
			
 
				+				      kib_conn_t, ibc_connd_list);
			
 
				+		list_del(&conn->ibc_connd_list);
			
 
				+		kiblnd_close_conn(conn, -ETIMEDOUT);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+	}
			
 
				+
			
 
				+	/* In case we have enough credits to return via a
			
 
				+	 * NOOP, but there were no non-blocking tx descs
			
 
				+	 * free to do it last time... */
			
 
				+	while (!list_empty(&checksends)) {
			
 
				+		conn = list_entry(checksends.next,
			
 
				+				      kib_conn_t, ibc_connd_list);
			
 
				+		list_del(&conn->ibc_connd_list);
			
 
				+		kiblnd_check_sends(conn);
			
 
				+		kiblnd_conn_decref(conn);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_disconnect_conn (kib_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (!in_interrupt());
			
 
				+	LASSERT (current == kiblnd_data.kib_connd);
			
 
				+	LASSERT (conn->ibc_state == IBLND_CONN_CLOSING);
			
 
				+
			
 
				+	rdma_disconnect(conn->ibc_cmid);
			
 
				+	kiblnd_finalise_conn(conn);
			
 
				+
			
 
				+	kiblnd_peer_notify(conn->ibc_peer);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_connd (void *arg)
			
 
				+{
			
 
				+	wait_queue_t     wait;
			
 
				+	unsigned long      flags;
			
 
				+	kib_conn_t	*conn;
			
 
				+	int		timeout;
			
 
				+	int		i;
			
 
				+	int		dropped_lock;
			
 
				+	int		peer_index = 0;
			
 
				+	unsigned long      deadline = jiffies;
			
 
				+
			
 
				+	cfs_block_allsigs ();
			
 
				+
			
 
				+	init_waitqueue_entry_current (&wait);
			
 
				+	kiblnd_data.kib_connd = current;
			
 
				+
			
 
				+	spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+
			
 
				+	while (!kiblnd_data.kib_shutdown) {
			
 
				+
			
 
				+		dropped_lock = 0;
			
 
				+
			
 
				+		if (!list_empty (&kiblnd_data.kib_connd_zombies)) {
			
 
				+			conn = list_entry(kiblnd_data. \
			
 
				+					      kib_connd_zombies.next,
			
 
				+					      kib_conn_t, ibc_list);
			
 
				+			list_del(&conn->ibc_list);
			
 
				+
			
 
				+			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
			
 
				+					       flags);
			
 
				+			dropped_lock = 1;
			
 
				+
			
 
				+			kiblnd_destroy_conn(conn);
			
 
				+
			
 
				+			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&kiblnd_data.kib_connd_conns)) {
			
 
				+			conn = list_entry(kiblnd_data.kib_connd_conns.next,
			
 
				+					      kib_conn_t, ibc_list);
			
 
				+			list_del(&conn->ibc_list);
			
 
				+
			
 
				+			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
			
 
				+					       flags);
			
 
				+			dropped_lock = 1;
			
 
				+
			
 
				+			kiblnd_disconnect_conn(conn);
			
 
				+			kiblnd_conn_decref(conn);
			
 
				+
			
 
				+			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+		}
			
 
				+
			
 
				+		/* careful with the jiffy wrap... */
			
 
				+		timeout = (int)(deadline - jiffies);
			
 
				+		if (timeout <= 0) {
			
 
				+			const int n = 4;
			
 
				+			const int p = 1;
			
 
				+			int       chunk = kiblnd_data.kib_peer_hash_size;
			
 
				+
			
 
				+			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
			
 
				+			dropped_lock = 1;
			
 
				+
			
 
				+			/* Time to check for RDMA timeouts on a few more
			
 
				+			 * peers: I do checks every 'p' seconds on a
			
 
				+			 * proportion of the peer table and I need to check
			
 
				+			 * every connection 'n' times within a timeout
			
 
				+			 * interval, to ensure I detect a timeout on any
			
 
				+			 * connection within (n+1)/n times the timeout
			
 
				+			 * interval. */
			
 
				+
			
 
				+			if (*kiblnd_tunables.kib_timeout > n * p)
			
 
				+				chunk = (chunk * n * p) /
			
 
				+					*kiblnd_tunables.kib_timeout;
			
 
				+			if (chunk == 0)
			
 
				+				chunk = 1;
			
 
				+
			
 
				+			for (i = 0; i < chunk; i++) {
			
 
				+				kiblnd_check_conns(peer_index);
			
 
				+				peer_index = (peer_index + 1) %
			
 
				+					     kiblnd_data.kib_peer_hash_size;
			
 
				+			}
			
 
				+
			
 
				+			deadline += p * HZ;
			
 
				+			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+		}
			
 
				+
			
 
				+		if (dropped_lock)
			
 
				+			continue;
			
 
				+
			
 
				+		/* Nothing to do for 'timeout'  */
			
 
				+		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
			
 
				+		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
			
 
				+
			
 
				+		waitq_timedwait(&wait, TASK_INTERRUPTIBLE, timeout);
			
 
				+
			
 
				+		set_current_state(TASK_RUNNING);
			
 
				+		remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
			
 
				+		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
			
 
				+
			
 
				+	kiblnd_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_qp_event(struct ib_event *event, void *arg)
			
 
				+{
			
 
				+	kib_conn_t *conn = arg;
			
 
				+
			
 
				+	switch (event->event) {
			
 
				+	case IB_EVENT_COMM_EST:
			
 
				+		CDEBUG(D_NET, "%s established\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
			
 
				+		return;
			
 
				+
			
 
				+	default:
			
 
				+		CERROR("%s: Async QP event type %d\n",
			
 
				+		       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_complete (struct ib_wc *wc)
			
 
				+{
			
 
				+	switch (kiblnd_wreqid2type(wc->wr_id)) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case IBLND_WID_RDMA:
			
 
				+		/* We only get RDMA completion notification if it fails.  All
			
 
				+		 * subsequent work items, including the final SEND will fail
			
 
				+		 * too.  However we can't print out any more info about the
			
 
				+		 * failing RDMA because 'tx' might be back on the idle list or
			
 
				+		 * even reused already if we didn't manage to post all our work
			
 
				+		 * items */
			
 
				+		CNETERR("RDMA (tx: %p) failed: %d\n",
			
 
				+			kiblnd_wreqid2ptr(wc->wr_id), wc->status);
			
 
				+		return;
			
 
				+
			
 
				+	case IBLND_WID_TX:
			
 
				+		kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
			
 
				+		return;
			
 
				+
			
 
				+	case IBLND_WID_RX:
			
 
				+		kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
			
 
				+				   wc->byte_len);
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_cq_completion(struct ib_cq *cq, void *arg)
			
 
				+{
			
 
				+	/* NB I'm not allowed to schedule this conn once its refcount has
			
 
				+	 * reached 0.  Since fundamentally I'm racing with scheduler threads
			
 
				+	 * consuming my CQ I could be called after all completions have
			
 
				+	 * occurred.  But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0
			
 
				+	 * and this CQ is about to be destroyed so I NOOP. */
			
 
				+	kib_conn_t		*conn = (kib_conn_t *)arg;
			
 
				+	struct kib_sched_info	*sched = conn->ibc_sched;
			
 
				+	unsigned long		flags;
			
 
				+
			
 
				+	LASSERT(cq == conn->ibc_cq);
			
 
				+
			
 
				+	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+
			
 
				+	conn->ibc_ready = 1;
			
 
				+
			
 
				+	if (!conn->ibc_scheduled &&
			
 
				+	    (conn->ibc_nrx > 0 ||
			
 
				+	     conn->ibc_nsends_posted > 0)) {
			
 
				+		kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
			
 
				+		conn->ibc_scheduled = 1;
			
 
				+		list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
			
 
				+
			
 
				+		if (waitqueue_active(&sched->ibs_waitq))
			
 
				+			wake_up(&sched->ibs_waitq);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_cq_event(struct ib_event *event, void *arg)
			
 
				+{
			
 
				+	kib_conn_t *conn = arg;
			
 
				+
			
 
				+	CERROR("%s: async CQ event type %d\n",
			
 
				+	       libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_scheduler(void *arg)
			
 
				+{
			
 
				+	long			id = (long)arg;
			
 
				+	struct kib_sched_info	*sched;
			
 
				+	kib_conn_t		*conn;
			
 
				+	wait_queue_t		wait;
			
 
				+	unsigned long		flags;
			
 
				+	struct ib_wc		wc;
			
 
				+	int			did_something;
			
 
				+	int			busy_loops = 0;
			
 
				+	int			rc;
			
 
				+
			
 
				+	cfs_block_allsigs();
			
 
				+
			
 
				+	init_waitqueue_entry_current(&wait);
			
 
				+
			
 
				+	sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
			
 
				+
			
 
				+	rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
			
 
				+	if (rc != 0) {
			
 
				+		CWARN("Failed to bind on CPT %d, please verify whether "
			
 
				+		      "all CPUs are healthy and reload modules if necessary, "
			
 
				+		      "otherwise your system might under risk of low "
			
 
				+		      "performance\n", sched->ibs_cpt);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+
			
 
				+	while (!kiblnd_data.kib_shutdown) {
			
 
				+		if (busy_loops++ >= IBLND_RESCHED) {
			
 
				+			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+			cond_resched();
			
 
				+			busy_loops = 0;
			
 
				+
			
 
				+			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+		}
			
 
				+
			
 
				+		did_something = 0;
			
 
				+
			
 
				+		if (!list_empty(&sched->ibs_conns)) {
			
 
				+			conn = list_entry(sched->ibs_conns.next,
			
 
				+					      kib_conn_t, ibc_sched_list);
			
 
				+			/* take over kib_sched_conns' ref on conn... */
			
 
				+			LASSERT(conn->ibc_scheduled);
			
 
				+			list_del(&conn->ibc_sched_list);
			
 
				+			conn->ibc_ready = 0;
			
 
				+
			
 
				+			spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+			rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
			
 
				+			if (rc == 0) {
			
 
				+				rc = ib_req_notify_cq(conn->ibc_cq,
			
 
				+						      IB_CQ_NEXT_COMP);
			
 
				+				if (rc < 0) {
			
 
				+					CWARN("%s: ib_req_notify_cq failed: %d, "
			
 
				+					      "closing connection\n",
			
 
				+					      libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
			
 
				+					kiblnd_close_conn(conn, -EIO);
			
 
				+					kiblnd_conn_decref(conn);
			
 
				+					spin_lock_irqsave(&sched->ibs_lock,
			
 
				+							      flags);
			
 
				+					continue;
			
 
				+				}
			
 
				+
			
 
				+				rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
			
 
				+			}
			
 
				+
			
 
				+			if (rc < 0) {
			
 
				+				CWARN("%s: ib_poll_cq failed: %d, "
			
 
				+				      "closing connection\n",
			
 
				+				      libcfs_nid2str(conn->ibc_peer->ibp_nid),
			
 
				+				      rc);
			
 
				+				kiblnd_close_conn(conn, -EIO);
			
 
				+				kiblnd_conn_decref(conn);
			
 
				+				spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+
			
 
				+			if (rc != 0 || conn->ibc_ready) {
			
 
				+				/* There may be another completion waiting; get
			
 
				+				 * another scheduler to check while I handle
			
 
				+				 * this one... */
			
 
				+				/* +1 ref for sched_conns */
			
 
				+				kiblnd_conn_addref(conn);
			
 
				+				list_add_tail(&conn->ibc_sched_list,
			
 
				+						  &sched->ibs_conns);
			
 
				+				if (waitqueue_active(&sched->ibs_waitq))
			
 
				+					wake_up(&sched->ibs_waitq);
			
 
				+			} else {
			
 
				+				conn->ibc_scheduled = 0;
			
 
				+			}
			
 
				+
			
 
				+			if (rc != 0) {
			
 
				+				spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+				kiblnd_complete(&wc);
			
 
				+
			
 
				+				spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+			}
			
 
				+
			
 
				+			kiblnd_conn_decref(conn); /* ...drop my ref from above */
			
 
				+			did_something = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (did_something)
			
 
				+			continue;
			
 
				+
			
 
				+		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
			
 
				+		spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+		waitq_wait(&wait, TASK_INTERRUPTIBLE);
			
 
				+		busy_loops = 0;
			
 
				+
			
 
				+		remove_wait_queue(&sched->ibs_waitq, &wait);
			
 
				+		set_current_state(TASK_RUNNING);
			
 
				+		spin_lock_irqsave(&sched->ibs_lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irqrestore(&sched->ibs_lock, flags);
			
 
				+
			
 
				+	kiblnd_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+kiblnd_failover_thread(void *arg)
			
 
				+{
			
 
				+	rwlock_t		*glock = &kiblnd_data.kib_global_lock;
			
 
				+	kib_dev_t	 *dev;
			
 
				+	wait_queue_t     wait;
			
 
				+	unsigned long      flags;
			
 
				+	int		rc;
			
 
				+
			
 
				+	LASSERT (*kiblnd_tunables.kib_dev_failover != 0);
			
 
				+
			
 
				+	cfs_block_allsigs ();
			
 
				+
			
 
				+	init_waitqueue_entry_current(&wait);
			
 
				+	write_lock_irqsave(glock, flags);
			
 
				+
			
 
				+	while (!kiblnd_data.kib_shutdown) {
			
 
				+		int     do_failover = 0;
			
 
				+		int     long_sleep;
			
 
				+
			
 
				+		list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
			
 
				+				    ibd_fail_list) {
			
 
				+			if (cfs_time_before(cfs_time_current(),
			
 
				+					    dev->ibd_next_failover))
			
 
				+				continue;
			
 
				+			do_failover = 1;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (do_failover) {
			
 
				+			list_del_init(&dev->ibd_fail_list);
			
 
				+			dev->ibd_failover = 1;
			
 
				+			write_unlock_irqrestore(glock, flags);
			
 
				+
			
 
				+			rc = kiblnd_dev_failover(dev);
			
 
				+
			
 
				+			write_lock_irqsave(glock, flags);
			
 
				+
			
 
				+			LASSERT (dev->ibd_failover);
			
 
				+			dev->ibd_failover = 0;
			
 
				+			if (rc >= 0) { /* Device is OK or failover succeed */
			
 
				+				dev->ibd_next_failover = cfs_time_shift(3);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			/* failed to failover, retry later */
			
 
				+			dev->ibd_next_failover =
			
 
				+				cfs_time_shift(min(dev->ibd_failed_failover, 10));
			
 
				+			if (kiblnd_dev_can_failover(dev)) {
			
 
				+				list_add_tail(&dev->ibd_fail_list,
			
 
				+					      &kiblnd_data.kib_failed_devs);
			
 
				+			}
			
 
				+
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* long sleep if no more pending failover */
			
 
				+		long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
			
 
				+
			
 
				+		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
			
 
				+		write_unlock_irqrestore(glock, flags);
			
 
				+
			
 
				+		rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) :
			
 
				+						   cfs_time_seconds(1));
			
 
				+		set_current_state(TASK_RUNNING);
			
 
				+		remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
			
 
				+		write_lock_irqsave(glock, flags);
			
 
				+
			
 
				+		if (!long_sleep || rc != 0)
			
 
				+			continue;
			
 
				+
			
 
				+		/* have a long sleep, routine check all active devices,
			
 
				+		 * we need checking like this because if there is not active
			
 
				+		 * connection on the dev and no SEND from local, we may listen
			
 
				+		 * on wrong HCA for ever while there is a bonding failover */
			
 
				+		list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
			
 
				+			if (kiblnd_dev_can_failover(dev)) {
			
 
				+				list_add_tail(&dev->ibd_fail_list,
			
 
				+					      &kiblnd_data.kib_failed_devs);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_irqrestore(glock, flags);
			
 
				+
			
 
				+	kiblnd_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -0,0 +1,493 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/klnds/o2iblnd/o2iblnd_modparams.c
			
 
				+ *
			
 
				+ * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ */
			
 
				+
			
 
				+#include "o2iblnd.h"
			
 
				+
			
 
				+static int service = 987;
			
 
				+CFS_MODULE_PARM(service, "i", int, 0444,
			
 
				+		"service number (within RDMA_PS_TCP)");
			
 
				+
			
 
				+static int cksum = 0;
			
 
				+CFS_MODULE_PARM(cksum, "i", int, 0644,
			
 
				+		"set non-zero to enable message (not RDMA) checksums");
			
 
				+
			
 
				+static int timeout = 50;
			
 
				+CFS_MODULE_PARM(timeout, "i", int, 0644,
			
 
				+		"timeout (seconds)");
			
 
				+
			
 
				+/* Number of threads in each scheduler pool which is percpt,
			
 
				+ * we will estimate reasonable value based on CPUs if it's set to zero. */
			
 
				+static int nscheds;
			
 
				+CFS_MODULE_PARM(nscheds, "i", int, 0444,
			
 
				+		"number of threads in each scheduler pool");
			
 
				+
			
 
				+/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				+static int ntx = 512;
			
 
				+CFS_MODULE_PARM(ntx, "i", int, 0444,
			
 
				+		"# of message descriptors allocated for each pool");
			
 
				+
			
 
				+/* NB: this value is shared by all CPTs */
			
 
				+static int credits = 256;
			
 
				+CFS_MODULE_PARM(credits, "i", int, 0444,
			
 
				+		"# concurrent sends");
			
 
				+
			
 
				+static int peer_credits = 8;
			
 
				+CFS_MODULE_PARM(peer_credits, "i", int, 0444,
			
 
				+		"# concurrent sends to 1 peer");
			
 
				+
			
 
				+static int peer_credits_hiw = 0;
			
 
				+CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
			
 
				+		"when eagerly to return credits");
			
 
				+
			
 
				+static int peer_buffer_credits = 0;
			
 
				+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
			
 
				+		"# per-peer router buffer credits");
			
 
				+
			
 
				+static int peer_timeout = 180;
			
 
				+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
			
 
				+		"Seconds without aliveness news to declare peer dead (<=0 to disable)");
			
 
				+
			
 
				+static char *ipif_name = "ib0";
			
 
				+CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
			
 
				+		"IPoIB interface name");
			
 
				+
			
 
				+static int retry_count = 5;
			
 
				+CFS_MODULE_PARM(retry_count, "i", int, 0644,
			
 
				+		"Retransmissions when no ACK received");
			
 
				+
			
 
				+static int rnr_retry_count = 6;
			
 
				+CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
			
 
				+		"RNR retransmissions");
			
 
				+
			
 
				+static int keepalive = 100;
			
 
				+CFS_MODULE_PARM(keepalive, "i", int, 0644,
			
 
				+		"Idle time in seconds before sending a keepalive");
			
 
				+
			
 
				+static int ib_mtu = 0;
			
 
				+CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
			
 
				+		"IB MTU 256/512/1024/2048/4096");
			
 
				+
			
 
				+static int concurrent_sends = 0;
			
 
				+CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
			
 
				+		"send work-queue sizing");
			
 
				+
			
 
				+static int map_on_demand = 0;
			
 
				+CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
			
 
				+		"map on demand");
			
 
				+
			
 
				+/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				+static int fmr_pool_size = 512;
			
 
				+CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
			
 
				+		"size of fmr pool on each CPT (>= ntx / 4)");
			
 
				+
			
 
				+/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				+static int fmr_flush_trigger = 384;
			
 
				+CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
			
 
				+		"# dirty FMRs that triggers pool flush");
			
 
				+
			
 
				+static int fmr_cache = 1;
			
 
				+CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
			
 
				+		"non-zero to enable FMR caching");
			
 
				+
			
 
				+/* NB: this value is shared by all CPTs, it can grow at runtime */
			
 
				+static int pmr_pool_size = 512;
			
 
				+CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
			
 
				+		"size of MR cache pmr pool on each CPT");
			
 
				+
			
 
				+/*
			
 
				+ * 0: disable failover
			
 
				+ * 1: enable failover if necessary
			
 
				+ * 2: force to failover (for debug)
			
 
				+ */
			
 
				+static int dev_failover = 0;
			
 
				+CFS_MODULE_PARM(dev_failover, "i", int, 0444,
			
 
				+	       "HCA failover for bonding (0 off, 1 on, other values reserved)");
			
 
				+
			
 
				+
			
 
				+static int require_privileged_port = 0;
			
 
				+CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
			
 
				+		"require privileged port when accepting connection");
			
 
				+
			
 
				+static int use_privileged_port = 1;
			
 
				+CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
			
 
				+		"use privileged port when initiating connection");
			
 
				+
			
 
				+kib_tunables_t kiblnd_tunables = {
			
 
				+	.kib_dev_failover	   = &dev_failover,
			
 
				+	.kib_service		= &service,
			
 
				+	.kib_cksum		  = &cksum,
			
 
				+	.kib_timeout		= &timeout,
			
 
				+	.kib_keepalive	      = &keepalive,
			
 
				+	.kib_ntx		    = &ntx,
			
 
				+	.kib_credits		= &credits,
			
 
				+	.kib_peertxcredits	  = &peer_credits,
			
 
				+	.kib_peercredits_hiw	= &peer_credits_hiw,
			
 
				+	.kib_peerrtrcredits	 = &peer_buffer_credits,
			
 
				+	.kib_peertimeout	    = &peer_timeout,
			
 
				+	.kib_default_ipif	   = &ipif_name,
			
 
				+	.kib_retry_count	    = &retry_count,
			
 
				+	.kib_rnr_retry_count	= &rnr_retry_count,
			
 
				+	.kib_concurrent_sends       = &concurrent_sends,
			
 
				+	.kib_ib_mtu		 = &ib_mtu,
			
 
				+	.kib_map_on_demand	  = &map_on_demand,
			
 
				+	.kib_fmr_pool_size	  = &fmr_pool_size,
			
 
				+	.kib_fmr_flush_trigger      = &fmr_flush_trigger,
			
 
				+	.kib_fmr_cache	      = &fmr_cache,
			
 
				+	.kib_pmr_pool_size	  = &pmr_pool_size,
			
 
				+	.kib_require_priv_port      = &require_privileged_port,
			
 
				+	.kib_use_priv_port	    = &use_privileged_port,
			
 
				+	.kib_nscheds		    = &nscheds
			
 
				+};
			
 
				+
			
 
				+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
			
 
				+
			
 
				+static char ipif_basename_space[32];
			
 
				+
			
 
				+
			
 
				+enum {
			
 
				+	O2IBLND_SERVICE  = 1,
			
 
				+	O2IBLND_CKSUM,
			
 
				+	O2IBLND_TIMEOUT,
			
 
				+	O2IBLND_NTX,
			
 
				+	O2IBLND_CREDITS,
			
 
				+	O2IBLND_PEER_TXCREDITS,
			
 
				+	O2IBLND_PEER_CREDITS_HIW,
			
 
				+	O2IBLND_PEER_RTRCREDITS,
			
 
				+	O2IBLND_PEER_TIMEOUT,
			
 
				+	O2IBLND_IPIF_BASENAME,
			
 
				+	O2IBLND_RETRY_COUNT,
			
 
				+	O2IBLND_RNR_RETRY_COUNT,
			
 
				+	O2IBLND_KEEPALIVE,
			
 
				+	O2IBLND_CONCURRENT_SENDS,
			
 
				+	O2IBLND_IB_MTU,
			
 
				+	O2IBLND_MAP_ON_DEMAND,
			
 
				+	O2IBLND_FMR_POOL_SIZE,
			
 
				+	O2IBLND_FMR_FLUSH_TRIGGER,
			
 
				+	O2IBLND_FMR_CACHE,
			
 
				+	O2IBLND_PMR_POOL_SIZE,
			
 
				+	O2IBLND_DEV_FAILOVER
			
 
				+};
			
 
				+
			
 
				+static ctl_table_t kiblnd_ctl_table[] = {
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_SERVICE,
			
 
				+		.procname = "service",
			
 
				+		.data     = &service,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_CKSUM,
			
 
				+		.procname = "cksum",
			
 
				+		.data     = &cksum,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_TIMEOUT,
			
 
				+		.procname = "timeout",
			
 
				+		.data     = &timeout,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_NTX,
			
 
				+		.procname = "ntx",
			
 
				+		.data     = &ntx,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_CREDITS,
			
 
				+		.procname = "credits",
			
 
				+		.data     = &credits,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_PEER_TXCREDITS,
			
 
				+		.procname = "peer_credits",
			
 
				+		.data     = &peer_credits,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_PEER_CREDITS_HIW,
			
 
				+		.procname = "peer_credits_hiw",
			
 
				+		.data     = &peer_credits_hiw,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_PEER_RTRCREDITS,
			
 
				+		.procname = "peer_buffer_credits",
			
 
				+		.data     = &peer_buffer_credits,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_PEER_TIMEOUT,
			
 
				+		.procname = "peer_timeout",
			
 
				+		.data     = &peer_timeout,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_IPIF_BASENAME,
			
 
				+		.procname = "ipif_name",
			
 
				+		.data     = ipif_basename_space,
			
 
				+		.maxlen   = sizeof(ipif_basename_space),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dostring
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_RETRY_COUNT,
			
 
				+		.procname = "retry_count",
			
 
				+		.data     = &retry_count,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_RNR_RETRY_COUNT,
			
 
				+		.procname = "rnr_retry_count",
			
 
				+		.data     = &rnr_retry_count,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_KEEPALIVE,
			
 
				+		.procname = "keepalive",
			
 
				+		.data     = &keepalive,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_CONCURRENT_SENDS,
			
 
				+		.procname = "concurrent_sends",
			
 
				+		.data     = &concurrent_sends,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_IB_MTU,
			
 
				+		.procname = "ib_mtu",
			
 
				+		.data     = &ib_mtu,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_MAP_ON_DEMAND,
			
 
				+		.procname = "map_on_demand",
			
 
				+		.data     = &map_on_demand,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_FMR_POOL_SIZE,
			
 
				+		.procname = "fmr_pool_size",
			
 
				+		.data     = &fmr_pool_size,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
			
 
				+		.procname = "fmr_flush_trigger",
			
 
				+		.data     = &fmr_flush_trigger,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_FMR_CACHE,
			
 
				+		.procname = "fmr_cache",
			
 
				+		.data     = &fmr_cache,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_PMR_POOL_SIZE,
			
 
				+		.procname = "pmr_pool_size",
			
 
				+		.data     = &pmr_pool_size,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = O2IBLND_DEV_FAILOVER,
			
 
				+		.procname = "dev_failover",
			
 
				+		.data     = &dev_failover,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+	},
			
 
				+	{0}
			
 
				+};
			
 
				+
			
 
				+static ctl_table_t kiblnd_top_ctl_table[] = {
			
 
				+	{
			
 
				+		.ctl_name = CTL_O2IBLND,
			
 
				+		.procname = "o2iblnd",
			
 
				+		.data     = NULL,
			
 
				+		.maxlen   = 0,
			
 
				+		.mode     = 0555,
			
 
				+		.child    = kiblnd_ctl_table
			
 
				+	},
			
 
				+	{0}
			
 
				+};
			
 
				+
			
 
				+void
			
 
				+kiblnd_initstrtunable(char *space, char *str, int size)
			
 
				+{
			
 
				+	strncpy(space, str, size);
			
 
				+	space[size-1] = 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_sysctl_init (void)
			
 
				+{
			
 
				+	kiblnd_initstrtunable(ipif_basename_space, ipif_name,
			
 
				+			      sizeof(ipif_basename_space));
			
 
				+
			
 
				+	kiblnd_tunables.kib_sysctl =
			
 
				+		cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
			
 
				+
			
 
				+	if (kiblnd_tunables.kib_sysctl == NULL)
			
 
				+		CWARN("Can't setup /proc tunables\n");
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_sysctl_fini (void)
			
 
				+{
			
 
				+	if (kiblnd_tunables.kib_sysctl != NULL)
			
 
				+		unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+void
			
 
				+kiblnd_sysctl_init (void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_sysctl_fini (void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+int
			
 
				+kiblnd_tunables_init (void)
			
 
				+{
			
 
				+	if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
			
 
				+		CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
			
 
				+		       *kiblnd_tunables.kib_ib_mtu);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
			
 
				+		*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
			
 
				+		*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
			
 
				+		*kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
			
 
				+		*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
			
 
				+		*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_map_on_demand < 0 ||
			
 
				+	    *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
			
 
				+		*kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_map_on_demand == 1)
			
 
				+		*kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends == 0) {
			
 
				+		if (*kiblnd_tunables.kib_map_on_demand > 0 &&
			
 
				+		    *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
			
 
				+			*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
			
 
				+		else
			
 
				+			*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
			
 
				+	}
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
			
 
				+		*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
			
 
				+		*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
			
 
				+
			
 
				+	if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
			
 
				+		CWARN("Concurrent sends %d is lower than message queue size: %d, "
			
 
				+		      "performance may drop slightly.\n",
			
 
				+		      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
			
 
				+	}
			
 
				+
			
 
				+	kiblnd_sysctl_init();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+kiblnd_tunables_fini (void)
			
 
				+{
			
 
				+	kiblnd_sysctl_fini();
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
@@ -0,0 +1,7 @@
 
				+obj-$(CONFIG_LNET) += ksocklnd.o
			
 
				+
			
 
				+ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib-linux.o
			
 
				+
			
 
				+
			
 
				+
			
 
				+ccflags-y := -I$(src)/../../include
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -0,0 +1,2902 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/klnds/socklnd/socklnd.c
			
 
				+ *
			
 
				+ * Author: Zach Brown <zab@zabbo.net>
			
 
				+ * Author: Peter J. Braam <braam@clusterfs.com>
			
 
				+ * Author: Phil Schwan <phil@clusterfs.com>
			
 
				+ * Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ */
			
 
				+
			
 
				+#include "socklnd.h"
			
 
				+
			
 
				+lnd_t		   the_ksocklnd;
			
 
				+ksock_nal_data_t	ksocknal_data;
			
 
				+
			
 
				+ksock_interface_t *
			
 
				+ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
			
 
				+{
			
 
				+	ksock_net_t       *net = ni->ni_data;
			
 
				+	int		i;
			
 
				+	ksock_interface_t *iface;
			
 
				+
			
 
				+	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				+		LASSERT(i < LNET_MAX_INTERFACES);
			
 
				+		iface = &net->ksnn_interfaces[i];
			
 
				+
			
 
				+		if (iface->ksni_ipaddr == ip)
			
 
				+			return (iface);
			
 
				+	}
			
 
				+
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+ksock_route_t *
			
 
				+ksocknal_create_route (__u32 ipaddr, int port)
			
 
				+{
			
 
				+	ksock_route_t *route;
			
 
				+
			
 
				+	LIBCFS_ALLOC (route, sizeof (*route));
			
 
				+	if (route == NULL)
			
 
				+		return (NULL);
			
 
				+
			
 
				+	atomic_set (&route->ksnr_refcount, 1);
			
 
				+	route->ksnr_peer = NULL;
			
 
				+	route->ksnr_retry_interval = 0;	 /* OK to connect at any time */
			
 
				+	route->ksnr_ipaddr = ipaddr;
			
 
				+	route->ksnr_port = port;
			
 
				+	route->ksnr_scheduled = 0;
			
 
				+	route->ksnr_connecting = 0;
			
 
				+	route->ksnr_connected = 0;
			
 
				+	route->ksnr_deleted = 0;
			
 
				+	route->ksnr_conn_count = 0;
			
 
				+	route->ksnr_share_count = 0;
			
 
				+
			
 
				+	return (route);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_destroy_route (ksock_route_t *route)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&route->ksnr_refcount) == 0);
			
 
				+
			
 
				+	if (route->ksnr_peer != NULL)
			
 
				+		ksocknal_peer_decref(route->ksnr_peer);
			
 
				+
			
 
				+	LIBCFS_FREE (route, sizeof (*route));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
			
 
				+{
			
 
				+	ksock_net_t   *net = ni->ni_data;
			
 
				+	ksock_peer_t  *peer;
			
 
				+
			
 
				+	LASSERT (id.nid != LNET_NID_ANY);
			
 
				+	LASSERT (id.pid != LNET_PID_ANY);
			
 
				+	LASSERT (!in_interrupt());
			
 
				+
			
 
				+	LIBCFS_ALLOC (peer, sizeof (*peer));
			
 
				+	if (peer == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset (peer, 0, sizeof (*peer));       /* NULL pointers/clear flags etc */
			
 
				+
			
 
				+	peer->ksnp_ni = ni;
			
 
				+	peer->ksnp_id = id;
			
 
				+	atomic_set (&peer->ksnp_refcount, 1);   /* 1 ref for caller */
			
 
				+	peer->ksnp_closing = 0;
			
 
				+	peer->ksnp_accepting = 0;
			
 
				+	peer->ksnp_proto = NULL;
			
 
				+	peer->ksnp_last_alive = 0;
			
 
				+	peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
			
 
				+
			
 
				+	INIT_LIST_HEAD (&peer->ksnp_conns);
			
 
				+	INIT_LIST_HEAD (&peer->ksnp_routes);
			
 
				+	INIT_LIST_HEAD (&peer->ksnp_tx_queue);
			
 
				+	INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
			
 
				+	spin_lock_init(&peer->ksnp_lock);
			
 
				+
			
 
				+	spin_lock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+	if (net->ksnn_shutdown) {
			
 
				+		spin_unlock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+		LIBCFS_FREE(peer, sizeof(*peer));
			
 
				+		CERROR("Can't create peer: network shutdown\n");
			
 
				+		return -ESHUTDOWN;
			
 
				+	}
			
 
				+
			
 
				+	net->ksnn_npeers++;
			
 
				+
			
 
				+	spin_unlock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+	*peerp = peer;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_destroy_peer (ksock_peer_t *peer)
			
 
				+{
			
 
				+	ksock_net_t    *net = peer->ksnp_ni->ni_data;
			
 
				+
			
 
				+	CDEBUG (D_NET, "peer %s %p deleted\n",
			
 
				+		libcfs_id2str(peer->ksnp_id), peer);
			
 
				+
			
 
				+	LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
			
 
				+	LASSERT (peer->ksnp_accepting == 0);
			
 
				+	LASSERT (list_empty (&peer->ksnp_conns));
			
 
				+	LASSERT (list_empty (&peer->ksnp_routes));
			
 
				+	LASSERT (list_empty (&peer->ksnp_tx_queue));
			
 
				+	LASSERT (list_empty (&peer->ksnp_zc_req_list));
			
 
				+
			
 
				+	LIBCFS_FREE (peer, sizeof (*peer));
			
 
				+
			
 
				+	/* NB a peer's connections and routes keep a reference on their peer
			
 
				+	 * until they are destroyed, so we can be assured that _all_ state to
			
 
				+	 * do with this peer has been cleaned up when its refcount drops to
			
 
				+	 * zero. */
			
 
				+	spin_lock_bh(&net->ksnn_lock);
			
 
				+	net->ksnn_npeers--;
			
 
				+	spin_unlock_bh(&net->ksnn_lock);
			
 
				+}
			
 
				+
			
 
				+ksock_peer_t *
			
 
				+ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
			
 
				+{
			
 
				+	struct list_head       *peer_list = ksocknal_nid2peerlist(id.nid);
			
 
				+	struct list_head       *tmp;
			
 
				+	ksock_peer_t     *peer;
			
 
				+
			
 
				+	list_for_each (tmp, peer_list) {
			
 
				+
			
 
				+		peer = list_entry (tmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+		LASSERT (!peer->ksnp_closing);
			
 
				+
			
 
				+		if (peer->ksnp_ni != ni)
			
 
				+			continue;
			
 
				+
			
 
				+		if (peer->ksnp_id.nid != id.nid ||
			
 
				+		    peer->ksnp_id.pid != id.pid)
			
 
				+			continue;
			
 
				+
			
 
				+		CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
			
 
				+		       peer, libcfs_id2str(id),
			
 
				+		       atomic_read(&peer->ksnp_refcount));
			
 
				+		return (peer);
			
 
				+	}
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+ksock_peer_t *
			
 
				+ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
			
 
				+{
			
 
				+	ksock_peer_t     *peer;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+	peer = ksocknal_find_peer_locked(ni, id);
			
 
				+	if (peer != NULL)			/* +1 ref for caller? */
			
 
				+		ksocknal_peer_addref(peer);
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (peer);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_unlink_peer_locked (ksock_peer_t *peer)
			
 
				+{
			
 
				+	int		i;
			
 
				+	__u32	      ip;
			
 
				+	ksock_interface_t *iface;
			
 
				+
			
 
				+	for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
			
 
				+		LASSERT (i < LNET_MAX_INTERFACES);
			
 
				+		ip = peer->ksnp_passive_ips[i];
			
 
				+
			
 
				+		iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
			
 
				+		/* All IPs in peer->ksnp_passive_ips[] come from the
			
 
				+		 * interface list, therefore the call must succeed. */
			
 
				+		LASSERT (iface != NULL);
			
 
				+
			
 
				+		CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
			
 
				+		       peer, iface, iface->ksni_nroutes);
			
 
				+		iface->ksni_npeers--;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (list_empty(&peer->ksnp_conns));
			
 
				+	LASSERT (list_empty(&peer->ksnp_routes));
			
 
				+	LASSERT (!peer->ksnp_closing);
			
 
				+	peer->ksnp_closing = 1;
			
 
				+	list_del (&peer->ksnp_list);
			
 
				+	/* lose peerlist's ref */
			
 
				+	ksocknal_peer_decref(peer);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_get_peer_info (lnet_ni_t *ni, int index,
			
 
				+			lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
			
 
				+			int *port, int *conn_count, int *share_count)
			
 
				+{
			
 
				+	ksock_peer_t      *peer;
			
 
				+	struct list_head	*ptmp;
			
 
				+	ksock_route_t     *route;
			
 
				+	struct list_head	*rtmp;
			
 
				+	int		i;
			
 
				+	int		j;
			
 
				+	int		rc = -ENOENT;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+
			
 
				+		list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				+			peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+			if (peer->ksnp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			if (peer->ksnp_n_passive_ips == 0 &&
			
 
				+			    list_empty(&peer->ksnp_routes)) {
			
 
				+				if (index-- > 0)
			
 
				+					continue;
			
 
				+
			
 
				+				*id = peer->ksnp_id;
			
 
				+				*myip = 0;
			
 
				+				*peer_ip = 0;
			
 
				+				*port = 0;
			
 
				+				*conn_count = 0;
			
 
				+				*share_count = 0;
			
 
				+				rc = 0;
			
 
				+				goto out;
			
 
				+			}
			
 
				+
			
 
				+			for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
			
 
				+				if (index-- > 0)
			
 
				+					continue;
			
 
				+
			
 
				+				*id = peer->ksnp_id;
			
 
				+				*myip = peer->ksnp_passive_ips[j];
			
 
				+				*peer_ip = 0;
			
 
				+				*port = 0;
			
 
				+				*conn_count = 0;
			
 
				+				*share_count = 0;
			
 
				+				rc = 0;
			
 
				+				goto out;
			
 
				+			}
			
 
				+
			
 
				+			list_for_each (rtmp, &peer->ksnp_routes) {
			
 
				+				if (index-- > 0)
			
 
				+					continue;
			
 
				+
			
 
				+				route = list_entry(rtmp, ksock_route_t,
			
 
				+						       ksnr_list);
			
 
				+
			
 
				+				*id = peer->ksnp_id;
			
 
				+				*myip = route->ksnr_myipaddr;
			
 
				+				*peer_ip = route->ksnr_ipaddr;
			
 
				+				*port = route->ksnr_port;
			
 
				+				*conn_count = route->ksnr_conn_count;
			
 
				+				*share_count = route->ksnr_share_count;
			
 
				+				rc = 0;
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+ out:
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_peer_t      *peer = route->ksnr_peer;
			
 
				+	int		type = conn->ksnc_type;
			
 
				+	ksock_interface_t *iface;
			
 
				+
			
 
				+	conn->ksnc_route = route;
			
 
				+	ksocknal_route_addref(route);
			
 
				+
			
 
				+	if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
			
 
				+		if (route->ksnr_myipaddr == 0) {
			
 
				+			/* route wasn't bound locally yet (the initial route) */
			
 
				+			CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
			
 
				+			       libcfs_id2str(peer->ksnp_id),
			
 
				+			       HIPQUAD(route->ksnr_ipaddr),
			
 
				+			       HIPQUAD(conn->ksnc_myipaddr));
			
 
				+		} else {
			
 
				+			CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
			
 
				+			       "%u.%u.%u.%u to %u.%u.%u.%u\n",
			
 
				+			       libcfs_id2str(peer->ksnp_id),
			
 
				+			       HIPQUAD(route->ksnr_ipaddr),
			
 
				+			       HIPQUAD(route->ksnr_myipaddr),
			
 
				+			       HIPQUAD(conn->ksnc_myipaddr));
			
 
				+
			
 
				+			iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				+						  route->ksnr_myipaddr);
			
 
				+			if (iface != NULL)
			
 
				+				iface->ksni_nroutes--;
			
 
				+		}
			
 
				+		route->ksnr_myipaddr = conn->ksnc_myipaddr;
			
 
				+		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				+					  route->ksnr_myipaddr);
			
 
				+		if (iface != NULL)
			
 
				+			iface->ksni_nroutes++;
			
 
				+	}
			
 
				+
			
 
				+	route->ksnr_connected |= (1<<type);
			
 
				+	route->ksnr_conn_count++;
			
 
				+
			
 
				+	/* Successful connection => further attempts can
			
 
				+	 * proceed immediately */
			
 
				+	route->ksnr_retry_interval = 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	ksock_conn_t      *conn;
			
 
				+	ksock_route_t     *route2;
			
 
				+
			
 
				+	LASSERT (!peer->ksnp_closing);
			
 
				+	LASSERT (route->ksnr_peer == NULL);
			
 
				+	LASSERT (!route->ksnr_scheduled);
			
 
				+	LASSERT (!route->ksnr_connecting);
			
 
				+	LASSERT (route->ksnr_connected == 0);
			
 
				+
			
 
				+	/* LASSERT(unique) */
			
 
				+	list_for_each(tmp, &peer->ksnp_routes) {
			
 
				+		route2 = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
			
 
				+			CERROR ("Duplicate route %s %u.%u.%u.%u\n",
			
 
				+				libcfs_id2str(peer->ksnp_id),
			
 
				+				HIPQUAD(route->ksnr_ipaddr));
			
 
				+			LBUG();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	route->ksnr_peer = peer;
			
 
				+	ksocknal_peer_addref(peer);
			
 
				+	/* peer's routelist takes over my ref on 'route' */
			
 
				+	list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
			
 
				+
			
 
				+	list_for_each(tmp, &peer->ksnp_conns) {
			
 
				+		conn = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+		if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
			
 
				+			continue;
			
 
				+
			
 
				+		ksocknal_associate_route_conn_locked(route, conn);
			
 
				+		/* keep going (typed routes) */
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_del_route_locked (ksock_route_t *route)
			
 
				+{
			
 
				+	ksock_peer_t      *peer = route->ksnr_peer;
			
 
				+	ksock_interface_t *iface;
			
 
				+	ksock_conn_t      *conn;
			
 
				+	struct list_head	*ctmp;
			
 
				+	struct list_head	*cnxt;
			
 
				+
			
 
				+	LASSERT (!route->ksnr_deleted);
			
 
				+
			
 
				+	/* Close associated conns */
			
 
				+	list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
			
 
				+		conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+		if (conn->ksnc_route != route)
			
 
				+			continue;
			
 
				+
			
 
				+		ksocknal_close_conn_locked (conn, 0);
			
 
				+	}
			
 
				+
			
 
				+	if (route->ksnr_myipaddr != 0) {
			
 
				+		iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
			
 
				+					  route->ksnr_myipaddr);
			
 
				+		if (iface != NULL)
			
 
				+			iface->ksni_nroutes--;
			
 
				+	}
			
 
				+
			
 
				+	route->ksnr_deleted = 1;
			
 
				+	list_del (&route->ksnr_list);
			
 
				+	ksocknal_route_decref(route);	     /* drop peer's ref */
			
 
				+
			
 
				+	if (list_empty (&peer->ksnp_routes) &&
			
 
				+	    list_empty (&peer->ksnp_conns)) {
			
 
				+		/* I've just removed the last route to a peer with no active
			
 
				+		 * connections */
			
 
				+		ksocknal_unlink_peer_locked (peer);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	ksock_peer_t      *peer;
			
 
				+	ksock_peer_t      *peer2;
			
 
				+	ksock_route_t     *route;
			
 
				+	ksock_route_t     *route2;
			
 
				+	int		rc;
			
 
				+
			
 
				+	if (id.nid == LNET_NID_ANY ||
			
 
				+	    id.pid == LNET_PID_ANY)
			
 
				+		return (-EINVAL);
			
 
				+
			
 
				+	/* Have a brand new peer ready... */
			
 
				+	rc = ksocknal_create_peer(&peer, ni, id);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	route = ksocknal_create_route (ipaddr, port);
			
 
				+	if (route == NULL) {
			
 
				+		ksocknal_peer_decref(peer);
			
 
				+		return (-ENOMEM);
			
 
				+	}
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	/* always called with a ref on ni, so shutdown can't have started */
			
 
				+	LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
			
 
				+
			
 
				+	peer2 = ksocknal_find_peer_locked (ni, id);
			
 
				+	if (peer2 != NULL) {
			
 
				+		ksocknal_peer_decref(peer);
			
 
				+		peer = peer2;
			
 
				+	} else {
			
 
				+		/* peer table takes my ref on peer */
			
 
				+		list_add_tail (&peer->ksnp_list,
			
 
				+				   ksocknal_nid2peerlist (id.nid));
			
 
				+	}
			
 
				+
			
 
				+	route2 = NULL;
			
 
				+	list_for_each (tmp, &peer->ksnp_routes) {
			
 
				+		route2 = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		if (route2->ksnr_ipaddr == ipaddr)
			
 
				+			break;
			
 
				+
			
 
				+		route2 = NULL;
			
 
				+	}
			
 
				+	if (route2 == NULL) {
			
 
				+		ksocknal_add_route_locked(peer, route);
			
 
				+		route->ksnr_share_count++;
			
 
				+	} else {
			
 
				+		ksocknal_route_decref(route);
			
 
				+		route2->ksnr_share_count++;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
			
 
				+{
			
 
				+	ksock_conn_t     *conn;
			
 
				+	ksock_route_t    *route;
			
 
				+	struct list_head       *tmp;
			
 
				+	struct list_head       *nxt;
			
 
				+	int	       nshared;
			
 
				+
			
 
				+	LASSERT (!peer->ksnp_closing);
			
 
				+
			
 
				+	/* Extra ref prevents peer disappearing until I'm done with it */
			
 
				+	ksocknal_peer_addref(peer);
			
 
				+
			
 
				+	list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
			
 
				+		route = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		/* no match */
			
 
				+		if (!(ip == 0 || route->ksnr_ipaddr == ip))
			
 
				+			continue;
			
 
				+
			
 
				+		route->ksnr_share_count = 0;
			
 
				+		/* This deletes associated conns too */
			
 
				+		ksocknal_del_route_locked (route);
			
 
				+	}
			
 
				+
			
 
				+	nshared = 0;
			
 
				+	list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
			
 
				+		route = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+		nshared += route->ksnr_share_count;
			
 
				+	}
			
 
				+
			
 
				+	if (nshared == 0) {
			
 
				+		/* remove everything else if there are no explicit entries
			
 
				+		 * left */
			
 
				+
			
 
				+		list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
			
 
				+			route = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+			/* we should only be removing auto-entries */
			
 
				+			LASSERT(route->ksnr_share_count == 0);
			
 
				+			ksocknal_del_route_locked (route);
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
			
 
				+			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+			ksocknal_close_conn_locked(conn, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_peer_decref(peer);
			
 
				+	/* NB peer unlinks itself when last conn/route is removed */
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
			
 
				+{
			
 
				+	LIST_HEAD     (zombies);
			
 
				+	struct list_head	*ptmp;
			
 
				+	struct list_head	*pnxt;
			
 
				+	ksock_peer_t      *peer;
			
 
				+	int		lo;
			
 
				+	int		hi;
			
 
				+	int		i;
			
 
				+	int		rc = -ENOENT;
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	if (id.nid != LNET_NID_ANY)
			
 
				+		lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				+	else {
			
 
				+		lo = 0;
			
 
				+		hi = ksocknal_data.ksnd_peer_hash_size - 1;
			
 
				+	}
			
 
				+
			
 
				+	for (i = lo; i <= hi; i++) {
			
 
				+		list_for_each_safe (ptmp, pnxt,
			
 
				+					&ksocknal_data.ksnd_peers[i]) {
			
 
				+			peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+			if (peer->ksnp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
			
 
				+			      (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
			
 
				+				continue;
			
 
				+
			
 
				+			ksocknal_peer_addref(peer);     /* a ref for me... */
			
 
				+
			
 
				+			ksocknal_del_peer_locked (peer, ip);
			
 
				+
			
 
				+			if (peer->ksnp_closing &&
			
 
				+			    !list_empty(&peer->ksnp_tx_queue)) {
			
 
				+				LASSERT (list_empty(&peer->ksnp_conns));
			
 
				+				LASSERT (list_empty(&peer->ksnp_routes));
			
 
				+
			
 
				+				list_splice_init(&peer->ksnp_tx_queue,
			
 
				+						     &zombies);
			
 
				+			}
			
 
				+
			
 
				+			ksocknal_peer_decref(peer);     /* ...till here */
			
 
				+
			
 
				+			rc = 0;		 /* matched! */
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	ksocknal_txlist_done(ni, &zombies, 1);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+ksock_conn_t *
			
 
				+ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
			
 
				+{
			
 
				+	ksock_peer_t      *peer;
			
 
				+	struct list_head	*ptmp;
			
 
				+	ksock_conn_t      *conn;
			
 
				+	struct list_head	*ctmp;
			
 
				+	int		i;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+		list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				+			peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+			LASSERT (!peer->ksnp_closing);
			
 
				+
			
 
				+			if (peer->ksnp_ni != ni)
			
 
				+				continue;
			
 
				+
			
 
				+			list_for_each (ctmp, &peer->ksnp_conns) {
			
 
				+				if (index-- > 0)
			
 
				+					continue;
			
 
				+
			
 
				+				conn = list_entry (ctmp, ksock_conn_t,
			
 
				+						       ksnc_list);
			
 
				+				ksocknal_conn_addref(conn);
			
 
				+				read_unlock(&ksocknal_data. \
			
 
				+						 ksnd_global_lock);
			
 
				+				return (conn);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+ksock_sched_t *
			
 
				+ksocknal_choose_scheduler_locked(unsigned int cpt)
			
 
				+{
			
 
				+	struct ksock_sched_info	*info = ksocknal_data.ksnd_sched_info[cpt];
			
 
				+	ksock_sched_t		*sched;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT(info->ksi_nthreads > 0);
			
 
				+
			
 
				+	sched = &info->ksi_scheds[0];
			
 
				+	/*
			
 
				+	 * NB: it's safe so far, but info->ksi_nthreads could be changed
			
 
				+	 * at runtime when we have dynamic LNet configuration, then we
			
 
				+	 * need to take care of this.
			
 
				+	 */
			
 
				+	for (i = 1; i < info->ksi_nthreads; i++) {
			
 
				+		if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
			
 
				+			sched = &info->ksi_scheds[i];
			
 
				+	}
			
 
				+
			
 
				+	return sched;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
			
 
				+{
			
 
				+	ksock_net_t       *net = ni->ni_data;
			
 
				+	int		i;
			
 
				+	int		nip;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	nip = net->ksnn_ninterfaces;
			
 
				+	LASSERT (nip <= LNET_MAX_INTERFACES);
			
 
				+
			
 
				+	/* Only offer interfaces for additional connections if I have
			
 
				+	 * more than one. */
			
 
				+	if (nip < 2) {
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < nip; i++) {
			
 
				+		ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
			
 
				+		LASSERT (ipaddrs[i] != 0);
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+	return (nip);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
			
 
				+{
			
 
				+	int   best_netmatch = 0;
			
 
				+	int   best_xor      = 0;
			
 
				+	int   best	  = -1;
			
 
				+	int   this_xor;
			
 
				+	int   this_netmatch;
			
 
				+	int   i;
			
 
				+
			
 
				+	for (i = 0; i < nips; i++) {
			
 
				+		if (ips[i] == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		this_xor = (ips[i] ^ iface->ksni_ipaddr);
			
 
				+		this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
			
 
				+
			
 
				+		if (!(best < 0 ||
			
 
				+		      best_netmatch < this_netmatch ||
			
 
				+		      (best_netmatch == this_netmatch &&
			
 
				+		       best_xor > this_xor)))
			
 
				+			continue;
			
 
				+
			
 
				+		best = i;
			
 
				+		best_netmatch = this_netmatch;
			
 
				+		best_xor = this_xor;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (best >= 0);
			
 
				+	return (best);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
			
 
				+{
			
 
				+	rwlock_t		*global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				+	ksock_net_t	*net = peer->ksnp_ni->ni_data;
			
 
				+	ksock_interface_t  *iface;
			
 
				+	ksock_interface_t  *best_iface;
			
 
				+	int		 n_ips;
			
 
				+	int		 i;
			
 
				+	int		 j;
			
 
				+	int		 k;
			
 
				+	__u32	       ip;
			
 
				+	__u32	       xor;
			
 
				+	int		 this_netmatch;
			
 
				+	int		 best_netmatch;
			
 
				+	int		 best_npeers;
			
 
				+
			
 
				+	/* CAVEAT EMPTOR: We do all our interface matching with an
			
 
				+	 * exclusive hold of global lock at IRQ priority.  We're only
			
 
				+	 * expecting to be dealing with small numbers of interfaces, so the
			
 
				+	 * O(n**3)-ness shouldn't matter */
			
 
				+
			
 
				+	/* Also note that I'm not going to return more than n_peerips
			
 
				+	 * interfaces, even if I have more myself */
			
 
				+
			
 
				+	write_lock_bh(global_lock);
			
 
				+
			
 
				+	LASSERT (n_peerips <= LNET_MAX_INTERFACES);
			
 
				+	LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
			
 
				+
			
 
				+	/* Only match interfaces for additional connections
			
 
				+	 * if I have > 1 interface */
			
 
				+	n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
			
 
				+		MIN(n_peerips, net->ksnn_ninterfaces);
			
 
				+
			
 
				+	for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
			
 
				+		/*	      ^ yes really... */
			
 
				+
			
 
				+		/* If we have any new interfaces, first tick off all the
			
 
				+		 * peer IPs that match old interfaces, then choose new
			
 
				+		 * interfaces to match the remaining peer IPS.
			
 
				+		 * We don't forget interfaces we've stopped using; we might
			
 
				+		 * start using them again... */
			
 
				+
			
 
				+		if (i < peer->ksnp_n_passive_ips) {
			
 
				+			/* Old interface. */
			
 
				+			ip = peer->ksnp_passive_ips[i];
			
 
				+			best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
			
 
				+
			
 
				+			/* peer passive ips are kept up to date */
			
 
				+			LASSERT(best_iface != NULL);
			
 
				+		} else {
			
 
				+			/* choose a new interface */
			
 
				+			LASSERT (i == peer->ksnp_n_passive_ips);
			
 
				+
			
 
				+			best_iface = NULL;
			
 
				+			best_netmatch = 0;
			
 
				+			best_npeers = 0;
			
 
				+
			
 
				+			for (j = 0; j < net->ksnn_ninterfaces; j++) {
			
 
				+				iface = &net->ksnn_interfaces[j];
			
 
				+				ip = iface->ksni_ipaddr;
			
 
				+
			
 
				+				for (k = 0; k < peer->ksnp_n_passive_ips; k++)
			
 
				+					if (peer->ksnp_passive_ips[k] == ip)
			
 
				+						break;
			
 
				+
			
 
				+				if (k < peer->ksnp_n_passive_ips) /* using it already */
			
 
				+					continue;
			
 
				+
			
 
				+				k = ksocknal_match_peerip(iface, peerips, n_peerips);
			
 
				+				xor = (ip ^ peerips[k]);
			
 
				+				this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
			
 
				+
			
 
				+				if (!(best_iface == NULL ||
			
 
				+				      best_netmatch < this_netmatch ||
			
 
				+				      (best_netmatch == this_netmatch &&
			
 
				+				       best_npeers > iface->ksni_npeers)))
			
 
				+					continue;
			
 
				+
			
 
				+				best_iface = iface;
			
 
				+				best_netmatch = this_netmatch;
			
 
				+				best_npeers = iface->ksni_npeers;
			
 
				+			}
			
 
				+
			
 
				+			best_iface->ksni_npeers++;
			
 
				+			ip = best_iface->ksni_ipaddr;
			
 
				+			peer->ksnp_passive_ips[i] = ip;
			
 
				+			peer->ksnp_n_passive_ips = i+1;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (best_iface != NULL);
			
 
				+
			
 
				+		/* mark the best matching peer IP used */
			
 
				+		j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
			
 
				+		peerips[j] = 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Overwrite input peer IP addresses */
			
 
				+	memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
			
 
				+
			
 
				+	write_unlock_bh(global_lock);
			
 
				+
			
 
				+	return (n_ips);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_create_routes(ksock_peer_t *peer, int port,
			
 
				+		       __u32 *peer_ipaddrs, int npeer_ipaddrs)
			
 
				+{
			
 
				+	ksock_route_t       *newroute = NULL;
			
 
				+	rwlock_t		*global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				+	lnet_ni_t	   *ni = peer->ksnp_ni;
			
 
				+	ksock_net_t	 *net = ni->ni_data;
			
 
				+	struct list_head	  *rtmp;
			
 
				+	ksock_route_t       *route;
			
 
				+	ksock_interface_t   *iface;
			
 
				+	ksock_interface_t   *best_iface;
			
 
				+	int		  best_netmatch;
			
 
				+	int		  this_netmatch;
			
 
				+	int		  best_nroutes;
			
 
				+	int		  i;
			
 
				+	int		  j;
			
 
				+
			
 
				+	/* CAVEAT EMPTOR: We do all our interface matching with an
			
 
				+	 * exclusive hold of global lock at IRQ priority.  We're only
			
 
				+	 * expecting to be dealing with small numbers of interfaces, so the
			
 
				+	 * O(n**3)-ness here shouldn't matter */
			
 
				+
			
 
				+	write_lock_bh(global_lock);
			
 
				+
			
 
				+	if (net->ksnn_ninterfaces < 2) {
			
 
				+		/* Only create additional connections
			
 
				+		 * if I have > 1 interface */
			
 
				+		write_unlock_bh(global_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
			
 
				+
			
 
				+	for (i = 0; i < npeer_ipaddrs; i++) {
			
 
				+		if (newroute != NULL) {
			
 
				+			newroute->ksnr_ipaddr = peer_ipaddrs[i];
			
 
				+		} else {
			
 
				+			write_unlock_bh(global_lock);
			
 
				+
			
 
				+			newroute = ksocknal_create_route(peer_ipaddrs[i], port);
			
 
				+			if (newroute == NULL)
			
 
				+				return;
			
 
				+
			
 
				+			write_lock_bh(global_lock);
			
 
				+		}
			
 
				+
			
 
				+		if (peer->ksnp_closing) {
			
 
				+			/* peer got closed under me */
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* Already got a route? */
			
 
				+		route = NULL;
			
 
				+		list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				+			route = list_entry(rtmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+			if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
			
 
				+				break;
			
 
				+
			
 
				+			route = NULL;
			
 
				+		}
			
 
				+		if (route != NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		best_iface = NULL;
			
 
				+		best_nroutes = 0;
			
 
				+		best_netmatch = 0;
			
 
				+
			
 
				+		LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
			
 
				+
			
 
				+		/* Select interface to connect from */
			
 
				+		for (j = 0; j < net->ksnn_ninterfaces; j++) {
			
 
				+			iface = &net->ksnn_interfaces[j];
			
 
				+
			
 
				+			/* Using this interface already? */
			
 
				+			list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				+				route = list_entry(rtmp, ksock_route_t,
			
 
				+						       ksnr_list);
			
 
				+
			
 
				+				if (route->ksnr_myipaddr == iface->ksni_ipaddr)
			
 
				+					break;
			
 
				+
			
 
				+				route = NULL;
			
 
				+			}
			
 
				+			if (route != NULL)
			
 
				+				continue;
			
 
				+
			
 
				+			this_netmatch = (((iface->ksni_ipaddr ^
			
 
				+					   newroute->ksnr_ipaddr) &
			
 
				+					   iface->ksni_netmask) == 0) ? 1 : 0;
			
 
				+
			
 
				+			if (!(best_iface == NULL ||
			
 
				+			      best_netmatch < this_netmatch ||
			
 
				+			      (best_netmatch == this_netmatch &&
			
 
				+			       best_nroutes > iface->ksni_nroutes)))
			
 
				+				continue;
			
 
				+
			
 
				+			best_iface = iface;
			
 
				+			best_netmatch = this_netmatch;
			
 
				+			best_nroutes = iface->ksni_nroutes;
			
 
				+		}
			
 
				+
			
 
				+		if (best_iface == NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
			
 
				+		best_iface->ksni_nroutes++;
			
 
				+
			
 
				+		ksocknal_add_route_locked(peer, newroute);
			
 
				+		newroute = NULL;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(global_lock);
			
 
				+	if (newroute != NULL)
			
 
				+		ksocknal_route_decref(newroute);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_accept (lnet_ni_t *ni, socket_t *sock)
			
 
				+{
			
 
				+	ksock_connreq_t    *cr;
			
 
				+	int		 rc;
			
 
				+	__u32	       peer_ip;
			
 
				+	int		 peer_port;
			
 
				+
			
 
				+	rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
			
 
				+	LASSERT (rc == 0);		      /* we succeeded before */
			
 
				+
			
 
				+	LIBCFS_ALLOC(cr, sizeof(*cr));
			
 
				+	if (cr == NULL) {
			
 
				+		LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
			
 
				+				   "%u.%u.%u.%u: memory exhausted\n",
			
 
				+				   HIPQUAD(peer_ip));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	lnet_ni_addref(ni);
			
 
				+	cr->ksncr_ni   = ni;
			
 
				+	cr->ksncr_sock = sock;
			
 
				+
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+
			
 
				+	list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
			
 
				+	wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				+
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
			
 
				+{
			
 
				+	ksock_route_t   *route;
			
 
				+
			
 
				+	list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) {
			
 
				+
			
 
				+		if (route->ksnr_ipaddr == ipaddr)
			
 
				+			return route->ksnr_connecting;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
			
 
				+		      socket_t *sock, int type)
			
 
				+{
			
 
				+	rwlock_t		*global_lock = &ksocknal_data.ksnd_global_lock;
			
 
				+	LIST_HEAD     (zombies);
			
 
				+	lnet_process_id_t  peerid;
			
 
				+	struct list_head	*tmp;
			
 
				+	__u64	      incarnation;
			
 
				+	ksock_conn_t      *conn;
			
 
				+	ksock_conn_t      *conn2;
			
 
				+	ksock_peer_t      *peer = NULL;
			
 
				+	ksock_peer_t      *peer2;
			
 
				+	ksock_sched_t     *sched;
			
 
				+	ksock_hello_msg_t *hello;
			
 
				+	int		   cpt;
			
 
				+	ksock_tx_t	*tx;
			
 
				+	ksock_tx_t	*txtmp;
			
 
				+	int		rc;
			
 
				+	int		active;
			
 
				+	char	      *warn = NULL;
			
 
				+
			
 
				+	active = (route != NULL);
			
 
				+
			
 
				+	LASSERT (active == (type != SOCKLND_CONN_NONE));
			
 
				+
			
 
				+	LIBCFS_ALLOC(conn, sizeof(*conn));
			
 
				+	if (conn == NULL) {
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed_0;
			
 
				+	}
			
 
				+
			
 
				+	memset (conn, 0, sizeof (*conn));
			
 
				+
			
 
				+	conn->ksnc_peer = NULL;
			
 
				+	conn->ksnc_route = NULL;
			
 
				+	conn->ksnc_sock = sock;
			
 
				+	/* 2 ref, 1 for conn, another extra ref prevents socket
			
 
				+	 * being closed before establishment of connection */
			
 
				+	atomic_set (&conn->ksnc_sock_refcount, 2);
			
 
				+	conn->ksnc_type = type;
			
 
				+	ksocknal_lib_save_callback(sock, conn);
			
 
				+	atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
			
 
				+
			
 
				+	conn->ksnc_rx_ready = 0;
			
 
				+	conn->ksnc_rx_scheduled = 0;
			
 
				+
			
 
				+	INIT_LIST_HEAD (&conn->ksnc_tx_queue);
			
 
				+	conn->ksnc_tx_ready = 0;
			
 
				+	conn->ksnc_tx_scheduled = 0;
			
 
				+	conn->ksnc_tx_carrier = NULL;
			
 
				+	atomic_set (&conn->ksnc_tx_nob, 0);
			
 
				+
			
 
				+	LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
			
 
				+				     kshm_ips[LNET_MAX_INTERFACES]));
			
 
				+	if (hello == NULL) {
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed_1;
			
 
				+	}
			
 
				+
			
 
				+	/* stash conn's local and remote addrs */
			
 
				+	rc = ksocknal_lib_get_conn_addrs (conn);
			
 
				+	if (rc != 0)
			
 
				+		goto failed_1;
			
 
				+
			
 
				+	/* Find out/confirm peer's NID and connection type and get the
			
 
				+	 * vector of interfaces she's willing to let me connect to.
			
 
				+	 * Passive connections use the listener timeout since the peer sends
			
 
				+	 * eagerly */
			
 
				+
			
 
				+	if (active) {
			
 
				+		peer = route->ksnr_peer;
			
 
				+		LASSERT(ni == peer->ksnp_ni);
			
 
				+
			
 
				+		/* Active connection sends HELLO eagerly */
			
 
				+		hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
			
 
				+		peerid = peer->ksnp_id;
			
 
				+
			
 
				+		write_lock_bh(global_lock);
			
 
				+		conn->ksnc_proto = peer->ksnp_proto;
			
 
				+		write_unlock_bh(global_lock);
			
 
				+
			
 
				+		if (conn->ksnc_proto == NULL) {
			
 
				+			 conn->ksnc_proto = &ksocknal_protocol_v3x;
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+			 if (*ksocknal_tunables.ksnd_protocol == 2)
			
 
				+				 conn->ksnc_proto = &ksocknal_protocol_v2x;
			
 
				+			 else if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				+				 conn->ksnc_proto = &ksocknal_protocol_v1x;
			
 
				+#endif
			
 
				+		}
			
 
				+
			
 
				+		rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
			
 
				+		if (rc != 0)
			
 
				+			goto failed_1;
			
 
				+	} else {
			
 
				+		peerid.nid = LNET_NID_ANY;
			
 
				+		peerid.pid = LNET_PID_ANY;
			
 
				+
			
 
				+		/* Passive, get protocol from peer */
			
 
				+		conn->ksnc_proto = NULL;
			
 
				+	}
			
 
				+
			
 
				+	rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
			
 
				+	if (rc < 0)
			
 
				+		goto failed_1;
			
 
				+
			
 
				+	LASSERT (rc == 0 || active);
			
 
				+	LASSERT (conn->ksnc_proto != NULL);
			
 
				+	LASSERT (peerid.nid != LNET_NID_ANY);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_nid(peerid.nid);
			
 
				+
			
 
				+	if (active) {
			
 
				+		ksocknal_peer_addref(peer);
			
 
				+		write_lock_bh(global_lock);
			
 
				+	} else {
			
 
				+		rc = ksocknal_create_peer(&peer, ni, peerid);
			
 
				+		if (rc != 0)
			
 
				+			goto failed_1;
			
 
				+
			
 
				+		write_lock_bh(global_lock);
			
 
				+
			
 
				+		/* called with a ref on ni, so shutdown can't have started */
			
 
				+		LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
			
 
				+
			
 
				+		peer2 = ksocknal_find_peer_locked(ni, peerid);
			
 
				+		if (peer2 == NULL) {
			
 
				+			/* NB this puts an "empty" peer in the peer
			
 
				+			 * table (which takes my ref) */
			
 
				+			list_add_tail(&peer->ksnp_list,
			
 
				+					  ksocknal_nid2peerlist(peerid.nid));
			
 
				+		} else {
			
 
				+			ksocknal_peer_decref(peer);
			
 
				+			peer = peer2;
			
 
				+		}
			
 
				+
			
 
				+		/* +1 ref for me */
			
 
				+		ksocknal_peer_addref(peer);
			
 
				+		peer->ksnp_accepting++;
			
 
				+
			
 
				+		/* Am I already connecting to this guy?  Resolve in
			
 
				+		 * favour of higher NID... */
			
 
				+		if (peerid.nid < ni->ni_nid &&
			
 
				+		    ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
			
 
				+			rc = EALREADY;
			
 
				+			warn = "connection race resolution";
			
 
				+			goto failed_2;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ksnp_closing ||
			
 
				+	    (active && route->ksnr_deleted)) {
			
 
				+		/* peer/route got closed under me */
			
 
				+		rc = -ESTALE;
			
 
				+		warn = "peer/route removed";
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ksnp_proto == NULL) {
			
 
				+		/* Never connected before.
			
 
				+		 * NB recv_hello may have returned EPROTO to signal my peer
			
 
				+		 * wants a different protocol than the one I asked for.
			
 
				+		 */
			
 
				+		LASSERT (list_empty(&peer->ksnp_conns));
			
 
				+
			
 
				+		peer->ksnp_proto = conn->ksnc_proto;
			
 
				+		peer->ksnp_incarnation = incarnation;
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ksnp_proto != conn->ksnc_proto ||
			
 
				+	    peer->ksnp_incarnation != incarnation) {
			
 
				+		/* Peer rebooted or I've got the wrong protocol version */
			
 
				+		ksocknal_close_peer_conns_locked(peer, 0, 0);
			
 
				+
			
 
				+		peer->ksnp_proto = NULL;
			
 
				+		rc = ESTALE;
			
 
				+		warn = peer->ksnp_incarnation != incarnation ?
			
 
				+		       "peer rebooted" :
			
 
				+		       "wrong proto version";
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	switch (rc) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	case 0:
			
 
				+		break;
			
 
				+	case EALREADY:
			
 
				+		warn = "lost conn race";
			
 
				+		goto failed_2;
			
 
				+	case EPROTO:
			
 
				+		warn = "retry with different protocol version";
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	/* Refuse to duplicate an existing connection, unless this is a
			
 
				+	 * loopback connection */
			
 
				+	if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
			
 
				+		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				+			conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+			if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
			
 
				+			    conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
			
 
				+			    conn2->ksnc_type != conn->ksnc_type)
			
 
				+				continue;
			
 
				+
			
 
				+			/* Reply on a passive connection attempt so the peer
			
 
				+			 * realises we're connected. */
			
 
				+			LASSERT (rc == 0);
			
 
				+			if (!active)
			
 
				+				rc = EALREADY;
			
 
				+
			
 
				+			warn = "duplicate";
			
 
				+			goto failed_2;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* If the connection created by this route didn't bind to the IP
			
 
				+	 * address the route connected to, the connection/route matching
			
 
				+	 * code below probably isn't going to work. */
			
 
				+	if (active &&
			
 
				+	    route->ksnr_ipaddr != conn->ksnc_ipaddr) {
			
 
				+		CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
			
 
				+		       libcfs_id2str(peer->ksnp_id),
			
 
				+		       HIPQUAD(route->ksnr_ipaddr),
			
 
				+		       HIPQUAD(conn->ksnc_ipaddr));
			
 
				+	}
			
 
				+
			
 
				+	/* Search for a route corresponding to the new connection and
			
 
				+	 * create an association.  This allows incoming connections created
			
 
				+	 * by routes in my peer to match my own route entries so I don't
			
 
				+	 * continually create duplicate routes. */
			
 
				+	list_for_each (tmp, &peer->ksnp_routes) {
			
 
				+		route = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
			
 
				+			continue;
			
 
				+
			
 
				+		ksocknal_associate_route_conn_locked(route, conn);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	conn->ksnc_peer = peer;		 /* conn takes my ref on peer */
			
 
				+	peer->ksnp_last_alive = cfs_time_current();
			
 
				+	peer->ksnp_send_keepalive = 0;
			
 
				+	peer->ksnp_error = 0;
			
 
				+
			
 
				+	sched = ksocknal_choose_scheduler_locked(cpt);
			
 
				+	sched->kss_nconns++;
			
 
				+	conn->ksnc_scheduler = sched;
			
 
				+
			
 
				+	conn->ksnc_tx_last_post = cfs_time_current();
			
 
				+	/* Set the deadline for the outgoing HELLO to drain */
			
 
				+	conn->ksnc_tx_bufnob = cfs_sock_wmem_queued(sock);
			
 
				+	conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+	mb();   /* order with adding to peer's conn list */
			
 
				+
			
 
				+	list_add (&conn->ksnc_list, &peer->ksnp_conns);
			
 
				+	ksocknal_conn_addref(conn);
			
 
				+
			
 
				+	ksocknal_new_packet(conn, 0);
			
 
				+
			
 
				+	conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
			
 
				+
			
 
				+	/* Take packets blocking for this connection. */
			
 
				+	list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
			
 
				+		if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO)
			
 
				+				continue;
			
 
				+
			
 
				+		list_del (&tx->tx_list);
			
 
				+		ksocknal_queue_tx_locked (tx, conn);
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(global_lock);
			
 
				+
			
 
				+	/* We've now got a new connection.  Any errors from here on are just
			
 
				+	 * like "normal" comms errors and we close the connection normally.
			
 
				+	 * NB (a) we still have to send the reply HELLO for passive
			
 
				+	 *	connections,
			
 
				+	 *    (b) normal I/O on the conn is blocked until I setup and call the
			
 
				+	 *	socket callbacks.
			
 
				+	 */
			
 
				+
			
 
				+	CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
			
 
				+	       " incarnation:"LPD64" sched[%d:%d]\n",
			
 
				+	       libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
			
 
				+	       HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
			
 
				+	       conn->ksnc_port, incarnation, cpt,
			
 
				+	       (int)(sched - &sched->kss_info->ksi_scheds[0]));
			
 
				+
			
 
				+	if (active) {
			
 
				+		/* additional routes after interface exchange? */
			
 
				+		ksocknal_create_routes(peer, conn->ksnc_port,
			
 
				+				       hello->kshm_ips, hello->kshm_nips);
			
 
				+	} else {
			
 
				+		hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
			
 
				+						       hello->kshm_nips);
			
 
				+		rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
			
 
				+				    kshm_ips[LNET_MAX_INTERFACES]));
			
 
				+
			
 
				+	/* setup the socket AFTER I've received hello (it disables
			
 
				+	 * SO_LINGER).  I might call back to the acceptor who may want
			
 
				+	 * to send a protocol version response and then close the
			
 
				+	 * socket; this ensures the socket only tears down after the
			
 
				+	 * response has been sent. */
			
 
				+	if (rc == 0)
			
 
				+		rc = ksocknal_lib_setup_sock(sock);
			
 
				+
			
 
				+	write_lock_bh(global_lock);
			
 
				+
			
 
				+	/* NB my callbacks block while I hold ksnd_global_lock */
			
 
				+	ksocknal_lib_set_callback(sock, conn);
			
 
				+
			
 
				+	if (!active)
			
 
				+		peer->ksnp_accepting--;
			
 
				+
			
 
				+	write_unlock_bh(global_lock);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		write_lock_bh(global_lock);
			
 
				+		if (!conn->ksnc_closing) {
			
 
				+			/* could be closed by another thread */
			
 
				+			ksocknal_close_conn_locked(conn, rc);
			
 
				+		}
			
 
				+		write_unlock_bh(global_lock);
			
 
				+	} else if (ksocknal_connsock_addref(conn) == 0) {
			
 
				+		/* Allow I/O to proceed. */
			
 
				+		ksocknal_read_callback(conn);
			
 
				+		ksocknal_write_callback(conn);
			
 
				+		ksocknal_connsock_decref(conn);
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+	ksocknal_conn_decref(conn);
			
 
				+	return rc;
			
 
				+
			
 
				+ failed_2:
			
 
				+	if (!peer->ksnp_closing &&
			
 
				+	    list_empty (&peer->ksnp_conns) &&
			
 
				+	    list_empty (&peer->ksnp_routes)) {
			
 
				+		list_add(&zombies, &peer->ksnp_tx_queue);
			
 
				+		list_del_init(&peer->ksnp_tx_queue);
			
 
				+		ksocknal_unlink_peer_locked(peer);
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(global_lock);
			
 
				+
			
 
				+	if (warn != NULL) {
			
 
				+		if (rc < 0)
			
 
				+			CERROR("Not creating conn %s type %d: %s\n",
			
 
				+			       libcfs_id2str(peerid), conn->ksnc_type, warn);
			
 
				+		else
			
 
				+			CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
			
 
				+			      libcfs_id2str(peerid), conn->ksnc_type, warn);
			
 
				+	}
			
 
				+
			
 
				+	if (!active) {
			
 
				+		if (rc > 0) {
			
 
				+			/* Request retry by replying with CONN_NONE
			
 
				+			 * ksnc_proto has been set already */
			
 
				+			conn->ksnc_type = SOCKLND_CONN_NONE;
			
 
				+			hello->kshm_nips = 0;
			
 
				+			ksocknal_send_hello(ni, conn, peerid.nid, hello);
			
 
				+		}
			
 
				+
			
 
				+		write_lock_bh(global_lock);
			
 
				+		peer->ksnp_accepting--;
			
 
				+		write_unlock_bh(global_lock);
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_txlist_done(ni, &zombies, 1);
			
 
				+	ksocknal_peer_decref(peer);
			
 
				+
			
 
				+ failed_1:
			
 
				+	if (hello != NULL)
			
 
				+		LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
			
 
				+					    kshm_ips[LNET_MAX_INTERFACES]));
			
 
				+
			
 
				+	LIBCFS_FREE (conn, sizeof(*conn));
			
 
				+
			
 
				+ failed_0:
			
 
				+	libcfs_sock_release(sock);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
			
 
				+{
			
 
				+	/* This just does the immmediate housekeeping, and queues the
			
 
				+	 * connection for the reaper to terminate.
			
 
				+	 * Caller holds ksnd_global_lock exclusively in irq context */
			
 
				+	ksock_peer_t      *peer = conn->ksnc_peer;
			
 
				+	ksock_route_t     *route;
			
 
				+	ksock_conn_t      *conn2;
			
 
				+	struct list_head	*tmp;
			
 
				+
			
 
				+	LASSERT (peer->ksnp_error == 0);
			
 
				+	LASSERT (!conn->ksnc_closing);
			
 
				+	conn->ksnc_closing = 1;
			
 
				+
			
 
				+	/* ksnd_deathrow_conns takes over peer's ref */
			
 
				+	list_del (&conn->ksnc_list);
			
 
				+
			
 
				+	route = conn->ksnc_route;
			
 
				+	if (route != NULL) {
			
 
				+		/* dissociate conn from route... */
			
 
				+		LASSERT (!route->ksnr_deleted);
			
 
				+		LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
			
 
				+
			
 
				+		conn2 = NULL;
			
 
				+		list_for_each(tmp, &peer->ksnp_conns) {
			
 
				+			conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+			if (conn2->ksnc_route == route &&
			
 
				+			    conn2->ksnc_type == conn->ksnc_type)
			
 
				+				break;
			
 
				+
			
 
				+			conn2 = NULL;
			
 
				+		}
			
 
				+		if (conn2 == NULL)
			
 
				+			route->ksnr_connected &= ~(1 << conn->ksnc_type);
			
 
				+
			
 
				+		conn->ksnc_route = NULL;
			
 
				+
			
 
				+#if 0	   /* irrelevent with only eager routes */
			
 
				+		/* make route least favourite */
			
 
				+		list_del (&route->ksnr_list);
			
 
				+		list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
			
 
				+#endif
			
 
				+		ksocknal_route_decref(route);     /* drop conn's ref on route */
			
 
				+	}
			
 
				+
			
 
				+	if (list_empty (&peer->ksnp_conns)) {
			
 
				+		/* No more connections to this peer */
			
 
				+
			
 
				+		if (!list_empty(&peer->ksnp_tx_queue)) {
			
 
				+			ksock_tx_t *tx;
			
 
				+
			
 
				+			LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
			
 
				+
			
 
				+			/* throw them to the last connection...,
			
 
				+			 * these TXs will be send to /dev/null by scheduler */
			
 
				+			list_for_each_entry(tx, &peer->ksnp_tx_queue,
			
 
				+						tx_list)
			
 
				+				ksocknal_tx_prep(conn, tx);
			
 
				+
			
 
				+			spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
			
 
				+			list_splice_init(&peer->ksnp_tx_queue,
			
 
				+					     &conn->ksnc_tx_queue);
			
 
				+			spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
			
 
				+		}
			
 
				+
			
 
				+		peer->ksnp_proto = NULL;	/* renegotiate protocol version */
			
 
				+		peer->ksnp_error = error;       /* stash last conn close reason */
			
 
				+
			
 
				+		if (list_empty (&peer->ksnp_routes)) {
			
 
				+			/* I've just closed last conn belonging to a
			
 
				+			 * peer with no routes to it */
			
 
				+			ksocknal_unlink_peer_locked (peer);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+	list_add_tail(&conn->ksnc_list,
			
 
				+			  &ksocknal_data.ksnd_deathrow_conns);
			
 
				+	wake_up(&ksocknal_data.ksnd_reaper_waitq);
			
 
				+
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_peer_failed (ksock_peer_t *peer)
			
 
				+{
			
 
				+	int	notify = 0;
			
 
				+	cfs_time_t last_alive = 0;
			
 
				+
			
 
				+	/* There has been a connection failure or comms error; but I'll only
			
 
				+	 * tell LNET I think the peer is dead if it's to another kernel and
			
 
				+	 * there are no connections or connection attempts in existance. */
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
			
 
				+	    list_empty(&peer->ksnp_conns) &&
			
 
				+	    peer->ksnp_accepting == 0 &&
			
 
				+	    ksocknal_find_connecting_route_locked(peer) == NULL) {
			
 
				+		notify = 1;
			
 
				+		last_alive = peer->ksnp_last_alive;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	if (notify)
			
 
				+		lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
			
 
				+			     last_alive);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_finalize_zcreq(ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_peer_t     *peer = conn->ksnc_peer;
			
 
				+	ksock_tx_t       *tx;
			
 
				+	ksock_tx_t       *tmp;
			
 
				+	LIST_HEAD    (zlist);
			
 
				+
			
 
				+	/* NB safe to finalize TXs because closing of socket will
			
 
				+	 * abort all buffered data */
			
 
				+	LASSERT (conn->ksnc_sock == NULL);
			
 
				+
			
 
				+	spin_lock(&peer->ksnp_lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
			
 
				+		if (tx->tx_conn != conn)
			
 
				+			continue;
			
 
				+
			
 
				+		LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0);
			
 
				+
			
 
				+		tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				+		tx->tx_zc_aborted = 1; /* mark it as not-acked */
			
 
				+		list_del(&tx->tx_zc_list);
			
 
				+		list_add(&tx->tx_zc_list, &zlist);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&peer->ksnp_lock);
			
 
				+
			
 
				+	while (!list_empty(&zlist)) {
			
 
				+		tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
			
 
				+
			
 
				+		list_del(&tx->tx_zc_list);
			
 
				+		ksocknal_tx_decref(tx);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_terminate_conn (ksock_conn_t *conn)
			
 
				+{
			
 
				+	/* This gets called by the reaper (guaranteed thread context) to
			
 
				+	 * disengage the socket from its callbacks and close it.
			
 
				+	 * ksnc_refcount will eventually hit zero, and then the reaper will
			
 
				+	 * destroy it. */
			
 
				+	ksock_peer_t     *peer = conn->ksnc_peer;
			
 
				+	ksock_sched_t    *sched = conn->ksnc_scheduler;
			
 
				+	int	       failed = 0;
			
 
				+
			
 
				+	LASSERT(conn->ksnc_closing);
			
 
				+
			
 
				+	/* wake up the scheduler to "send" all remaining packets to /dev/null */
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	/* a closing conn is always ready to tx */
			
 
				+	conn->ksnc_tx_ready = 1;
			
 
				+
			
 
				+	if (!conn->ksnc_tx_scheduled &&
			
 
				+	    !list_empty(&conn->ksnc_tx_queue)){
			
 
				+		list_add_tail (&conn->ksnc_tx_list,
			
 
				+			       &sched->kss_tx_conns);
			
 
				+		conn->ksnc_tx_scheduled = 1;
			
 
				+		/* extra ref for scheduler */
			
 
				+		ksocknal_conn_addref(conn);
			
 
				+
			
 
				+		wake_up (&sched->kss_waitq);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	/* serialise with callbacks */
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
			
 
				+
			
 
				+	/* OK, so this conn may not be completely disengaged from its
			
 
				+	 * scheduler yet, but it _has_ committed to terminate... */
			
 
				+	conn->ksnc_scheduler->kss_nconns--;
			
 
				+
			
 
				+	if (peer->ksnp_error != 0) {
			
 
				+		/* peer's last conn closed in error */
			
 
				+		LASSERT (list_empty (&peer->ksnp_conns));
			
 
				+		failed = 1;
			
 
				+		peer->ksnp_error = 0;     /* avoid multiple notifications */
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	if (failed)
			
 
				+		ksocknal_peer_failed(peer);
			
 
				+
			
 
				+	/* The socket is closed on the final put; either here, or in
			
 
				+	 * ksocknal_{send,recv}msg().  Since we set up the linger2 option
			
 
				+	 * when the connection was established, this will close the socket
			
 
				+	 * immediately, aborting anything buffered in it. Any hung
			
 
				+	 * zero-copy transmits will therefore complete in finite time. */
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_queue_zombie_conn (ksock_conn_t *conn)
			
 
				+{
			
 
				+	/* Queue the conn for the reaper to destroy */
			
 
				+
			
 
				+	LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0);
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+	list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
			
 
				+	wake_up(&ksocknal_data.ksnd_reaper_waitq);
			
 
				+
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_destroy_conn (ksock_conn_t *conn)
			
 
				+{
			
 
				+	cfs_time_t      last_rcv;
			
 
				+
			
 
				+	/* Final coup-de-grace of the reaper */
			
 
				+	CDEBUG (D_NET, "connection %p\n", conn);
			
 
				+
			
 
				+	LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
			
 
				+	LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
			
 
				+	LASSERT (conn->ksnc_sock == NULL);
			
 
				+	LASSERT (conn->ksnc_route == NULL);
			
 
				+	LASSERT (!conn->ksnc_tx_scheduled);
			
 
				+	LASSERT (!conn->ksnc_rx_scheduled);
			
 
				+	LASSERT (list_empty(&conn->ksnc_tx_queue));
			
 
				+
			
 
				+	/* complete current receive if any */
			
 
				+	switch (conn->ksnc_rx_state) {
			
 
				+	case SOCKNAL_RX_LNET_PAYLOAD:
			
 
				+		last_rcv = conn->ksnc_rx_deadline -
			
 
				+			   cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
			
 
				+		CERROR("Completing partial receive from %s[%d]"
			
 
				+		       ", ip %d.%d.%d.%d:%d, with error, wanted: %d, left: %d, "
			
 
				+		       "last alive is %ld secs ago\n",
			
 
				+		       libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
			
 
				+		       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
			
 
				+		       conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
			
 
				+		       cfs_duration_sec(cfs_time_sub(cfs_time_current(),
			
 
				+					last_rcv)));
			
 
				+		lnet_finalize (conn->ksnc_peer->ksnp_ni,
			
 
				+			       conn->ksnc_cookie, -EIO);
			
 
				+		break;
			
 
				+	case SOCKNAL_RX_LNET_HEADER:
			
 
				+		if (conn->ksnc_rx_started)
			
 
				+			CERROR("Incomplete receive of lnet header from %s"
			
 
				+			       ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
			
 
				+			       conn->ksnc_proto->pro_version);
			
 
				+		break;
			
 
				+	case SOCKNAL_RX_KSM_HEADER:
			
 
				+		if (conn->ksnc_rx_started)
			
 
				+			CERROR("Incomplete receive of ksock message from %s"
			
 
				+			       ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
			
 
				+			       conn->ksnc_proto->pro_version);
			
 
				+		break;
			
 
				+	case SOCKNAL_RX_SLOP:
			
 
				+		if (conn->ksnc_rx_started)
			
 
				+			CERROR("Incomplete receive of slops from %s"
			
 
				+			       ", ip %d.%d.%d.%d:%d, with error\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
			
 
				+	       break;
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_peer_decref(conn->ksnc_peer);
			
 
				+
			
 
				+	LIBCFS_FREE (conn, sizeof (*conn));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
			
 
				+{
			
 
				+	ksock_conn_t       *conn;
			
 
				+	struct list_head	 *ctmp;
			
 
				+	struct list_head	 *cnxt;
			
 
				+	int		 count = 0;
			
 
				+
			
 
				+	list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
			
 
				+		conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+		if (ipaddr == 0 ||
			
 
				+		    conn->ksnc_ipaddr == ipaddr) {
			
 
				+			count++;
			
 
				+			ksocknal_close_conn_locked (conn, why);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return (count);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
			
 
				+{
			
 
				+	ksock_peer_t     *peer = conn->ksnc_peer;
			
 
				+	__u32	     ipaddr = conn->ksnc_ipaddr;
			
 
				+	int	       count;
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (count);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
			
 
				+{
			
 
				+	ksock_peer_t       *peer;
			
 
				+	struct list_head	 *ptmp;
			
 
				+	struct list_head	 *pnxt;
			
 
				+	int		 lo;
			
 
				+	int		 hi;
			
 
				+	int		 i;
			
 
				+	int		 count = 0;
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	if (id.nid != LNET_NID_ANY)
			
 
				+		lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
			
 
				+	else {
			
 
				+		lo = 0;
			
 
				+		hi = ksocknal_data.ksnd_peer_hash_size - 1;
			
 
				+	}
			
 
				+
			
 
				+	for (i = lo; i <= hi; i++) {
			
 
				+		list_for_each_safe (ptmp, pnxt,
			
 
				+					&ksocknal_data.ksnd_peers[i]) {
			
 
				+
			
 
				+			peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+			if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
			
 
				+			      (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
			
 
				+				continue;
			
 
				+
			
 
				+			count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	/* wildcards always succeed */
			
 
				+	if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
			
 
				+		return (0);
			
 
				+
			
 
				+	return (count == 0 ? -ENOENT : 0);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
			
 
				+{
			
 
				+	/* The router is telling me she's been notified of a change in
			
 
				+	 * gateway state.... */
			
 
				+	lnet_process_id_t  id = {0};
			
 
				+
			
 
				+	id.nid = gw_nid;
			
 
				+	id.pid = LNET_PID_ANY;
			
 
				+
			
 
				+	CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
			
 
				+		alive ? "up" : "down");
			
 
				+
			
 
				+	if (!alive) {
			
 
				+		/* If the gateway crashed, close all open connections... */
			
 
				+		ksocknal_close_matching_conns (id, 0);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* ...otherwise do nothing.  We can only establish new connections
			
 
				+	 * if we have autroutes, and these connect on demand. */
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
			
 
				+{
			
 
				+	int		connect = 1;
			
 
				+	cfs_time_t	 last_alive = 0;
			
 
				+	cfs_time_t	 now = cfs_time_current();
			
 
				+	ksock_peer_t      *peer = NULL;
			
 
				+	rwlock_t		*glock = &ksocknal_data.ksnd_global_lock;
			
 
				+	lnet_process_id_t  id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
			
 
				+
			
 
				+	read_lock(glock);
			
 
				+
			
 
				+	peer = ksocknal_find_peer_locked(ni, id);
			
 
				+	if (peer != NULL) {
			
 
				+		struct list_head       *tmp;
			
 
				+		ksock_conn_t     *conn;
			
 
				+		int	       bufnob;
			
 
				+
			
 
				+		list_for_each (tmp, &peer->ksnp_conns) {
			
 
				+			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+			bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
			
 
				+
			
 
				+			if (bufnob < conn->ksnc_tx_bufnob) {
			
 
				+				/* something got ACKed */
			
 
				+				conn->ksnc_tx_deadline =
			
 
				+					cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+				peer->ksnp_last_alive = now;
			
 
				+				conn->ksnc_tx_bufnob = bufnob;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		last_alive = peer->ksnp_last_alive;
			
 
				+		if (ksocknal_find_connectable_route_locked(peer) == NULL)
			
 
				+			connect = 0;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(glock);
			
 
				+
			
 
				+	if (last_alive != 0)
			
 
				+		*when = last_alive;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
			
 
				+	       libcfs_nid2str(nid), peer,
			
 
				+	       last_alive ? cfs_duration_sec(now - last_alive) : -1,
			
 
				+	       connect);
			
 
				+
			
 
				+	if (!connect)
			
 
				+		return;
			
 
				+
			
 
				+	ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
			
 
				+
			
 
				+	write_lock_bh(glock);
			
 
				+
			
 
				+	peer = ksocknal_find_peer_locked(ni, id);
			
 
				+	if (peer != NULL)
			
 
				+		ksocknal_launch_all_connections_locked(peer);
			
 
				+
			
 
				+	write_unlock_bh(glock);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_push_peer (ksock_peer_t *peer)
			
 
				+{
			
 
				+	int	       index;
			
 
				+	int	       i;
			
 
				+	struct list_head       *tmp;
			
 
				+	ksock_conn_t     *conn;
			
 
				+
			
 
				+	for (index = 0; ; index++) {
			
 
				+		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		i = 0;
			
 
				+		conn = NULL;
			
 
				+
			
 
				+		list_for_each (tmp, &peer->ksnp_conns) {
			
 
				+			if (i++ == index) {
			
 
				+				conn = list_entry (tmp, ksock_conn_t,
			
 
				+						       ksnc_list);
			
 
				+				ksocknal_conn_addref(conn);
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		if (conn == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		ksocknal_lib_push_conn (conn);
			
 
				+		ksocknal_conn_decref(conn);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
			
 
				+{
			
 
				+	ksock_peer_t      *peer;
			
 
				+	struct list_head	*tmp;
			
 
				+	int		index;
			
 
				+	int		i;
			
 
				+	int		j;
			
 
				+	int		rc = -ENOENT;
			
 
				+
			
 
				+	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+		for (j = 0; ; j++) {
			
 
				+			read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+			index = 0;
			
 
				+			peer = NULL;
			
 
				+
			
 
				+			list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				+				peer = list_entry(tmp, ksock_peer_t,
			
 
				+						      ksnp_list);
			
 
				+
			
 
				+				if (!((id.nid == LNET_NID_ANY ||
			
 
				+				       id.nid == peer->ksnp_id.nid) &&
			
 
				+				      (id.pid == LNET_PID_ANY ||
			
 
				+				       id.pid == peer->ksnp_id.pid))) {
			
 
				+					peer = NULL;
			
 
				+					continue;
			
 
				+				}
			
 
				+
			
 
				+				if (index++ == j) {
			
 
				+					ksocknal_peer_addref(peer);
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+			if (peer != NULL) {
			
 
				+				rc = 0;
			
 
				+				ksocknal_push_peer (peer);
			
 
				+				ksocknal_peer_decref(peer);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
			
 
				+{
			
 
				+	ksock_net_t       *net = ni->ni_data;
			
 
				+	ksock_interface_t *iface;
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+	int		j;
			
 
				+	struct list_head	*ptmp;
			
 
				+	ksock_peer_t      *peer;
			
 
				+	struct list_head	*rtmp;
			
 
				+	ksock_route_t     *route;
			
 
				+
			
 
				+	if (ipaddress == 0 ||
			
 
				+	    netmask == 0)
			
 
				+		return (-EINVAL);
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	iface = ksocknal_ip2iface(ni, ipaddress);
			
 
				+	if (iface != NULL) {
			
 
				+		/* silently ignore dups */
			
 
				+		rc = 0;
			
 
				+	} else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
			
 
				+		rc = -ENOSPC;
			
 
				+	} else {
			
 
				+		iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
			
 
				+
			
 
				+		iface->ksni_ipaddr = ipaddress;
			
 
				+		iface->ksni_netmask = netmask;
			
 
				+		iface->ksni_nroutes = 0;
			
 
				+		iface->ksni_npeers = 0;
			
 
				+
			
 
				+		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+			list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				+				peer = list_entry(ptmp, ksock_peer_t,
			
 
				+						      ksnp_list);
			
 
				+
			
 
				+				for (j = 0; j < peer->ksnp_n_passive_ips; j++)
			
 
				+					if (peer->ksnp_passive_ips[j] == ipaddress)
			
 
				+						iface->ksni_npeers++;
			
 
				+
			
 
				+				list_for_each(rtmp, &peer->ksnp_routes) {
			
 
				+					route = list_entry(rtmp,
			
 
				+							       ksock_route_t,
			
 
				+							       ksnr_list);
			
 
				+
			
 
				+					if (route->ksnr_myipaddr == ipaddress)
			
 
				+						iface->ksni_nroutes++;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		rc = 0;
			
 
				+		/* NB only new connections will pay attention to the new interface! */
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
			
 
				+{
			
 
				+	struct list_head	 *tmp;
			
 
				+	struct list_head	 *nxt;
			
 
				+	ksock_route_t      *route;
			
 
				+	ksock_conn_t       *conn;
			
 
				+	int		 i;
			
 
				+	int		 j;
			
 
				+
			
 
				+	for (i = 0; i < peer->ksnp_n_passive_ips; i++)
			
 
				+		if (peer->ksnp_passive_ips[i] == ipaddr) {
			
 
				+			for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
			
 
				+				peer->ksnp_passive_ips[j-1] =
			
 
				+					peer->ksnp_passive_ips[j];
			
 
				+			peer->ksnp_n_passive_ips--;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+	list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
			
 
				+		route = list_entry (tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		if (route->ksnr_myipaddr != ipaddr)
			
 
				+			continue;
			
 
				+
			
 
				+		if (route->ksnr_share_count != 0) {
			
 
				+			/* Manually created; keep, but unbind */
			
 
				+			route->ksnr_myipaddr = 0;
			
 
				+		} else {
			
 
				+			ksocknal_del_route_locked(route);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
			
 
				+		conn = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+		if (conn->ksnc_myipaddr == ipaddr)
			
 
				+			ksocknal_close_conn_locked (conn, 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
			
 
				+{
			
 
				+	ksock_net_t       *net = ni->ni_data;
			
 
				+	int		rc = -ENOENT;
			
 
				+	struct list_head	*tmp;
			
 
				+	struct list_head	*nxt;
			
 
				+	ksock_peer_t      *peer;
			
 
				+	__u32	      this_ip;
			
 
				+	int		i;
			
 
				+	int		j;
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				+		this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
			
 
				+
			
 
				+		if (!(ipaddress == 0 ||
			
 
				+		      ipaddress == this_ip))
			
 
				+			continue;
			
 
				+
			
 
				+		rc = 0;
			
 
				+
			
 
				+		for (j = i+1; j < net->ksnn_ninterfaces; j++)
			
 
				+			net->ksnn_interfaces[j-1] =
			
 
				+				net->ksnn_interfaces[j];
			
 
				+
			
 
				+		net->ksnn_ninterfaces--;
			
 
				+
			
 
				+		for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
			
 
				+			list_for_each_safe(tmp, nxt,
			
 
				+					       &ksocknal_data.ksnd_peers[j]) {
			
 
				+				peer = list_entry(tmp, ksock_peer_t,
			
 
				+						      ksnp_list);
			
 
				+
			
 
				+				if (peer->ksnp_ni != ni)
			
 
				+					continue;
			
 
				+
			
 
				+				ksocknal_peer_del_interface_locked(peer, this_ip);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
			
 
				+{
			
 
				+	lnet_process_id_t id = {0};
			
 
				+	struct libcfs_ioctl_data *data = arg;
			
 
				+	int rc;
			
 
				+
			
 
				+	switch(cmd) {
			
 
				+	case IOC_LIBCFS_GET_INTERFACE: {
			
 
				+		ksock_net_t       *net = ni->ni_data;
			
 
				+		ksock_interface_t *iface;
			
 
				+
			
 
				+		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
			
 
				+			rc = -ENOENT;
			
 
				+		} else {
			
 
				+			rc = 0;
			
 
				+			iface = &net->ksnn_interfaces[data->ioc_count];
			
 
				+
			
 
				+			data->ioc_u32[0] = iface->ksni_ipaddr;
			
 
				+			data->ioc_u32[1] = iface->ksni_netmask;
			
 
				+			data->ioc_u32[2] = iface->ksni_npeers;
			
 
				+			data->ioc_u32[3] = iface->ksni_nroutes;
			
 
				+		}
			
 
				+
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	case IOC_LIBCFS_ADD_INTERFACE:
			
 
				+		return ksocknal_add_interface(ni,
			
 
				+					      data->ioc_u32[0], /* IP address */
			
 
				+					      data->ioc_u32[1]); /* net mask */
			
 
				+
			
 
				+	case IOC_LIBCFS_DEL_INTERFACE:
			
 
				+		return ksocknal_del_interface(ni,
			
 
				+					      data->ioc_u32[0]); /* IP address */
			
 
				+
			
 
				+	case IOC_LIBCFS_GET_PEER: {
			
 
				+		__u32	    myip = 0;
			
 
				+		__u32	    ip = 0;
			
 
				+		int	      port = 0;
			
 
				+		int	      conn_count = 0;
			
 
				+		int	      share_count = 0;
			
 
				+
			
 
				+		rc = ksocknal_get_peer_info(ni, data->ioc_count,
			
 
				+					    &id, &myip, &ip, &port,
			
 
				+					    &conn_count,  &share_count);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+
			
 
				+		data->ioc_nid    = id.nid;
			
 
				+		data->ioc_count  = share_count;
			
 
				+		data->ioc_u32[0] = ip;
			
 
				+		data->ioc_u32[1] = port;
			
 
				+		data->ioc_u32[2] = myip;
			
 
				+		data->ioc_u32[3] = conn_count;
			
 
				+		data->ioc_u32[4] = id.pid;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	case IOC_LIBCFS_ADD_PEER:
			
 
				+		id.nid = data->ioc_nid;
			
 
				+		id.pid = LUSTRE_SRV_LNET_PID;
			
 
				+		return ksocknal_add_peer (ni, id,
			
 
				+					  data->ioc_u32[0], /* IP */
			
 
				+					  data->ioc_u32[1]); /* port */
			
 
				+
			
 
				+	case IOC_LIBCFS_DEL_PEER:
			
 
				+		id.nid = data->ioc_nid;
			
 
				+		id.pid = LNET_PID_ANY;
			
 
				+		return ksocknal_del_peer (ni, id,
			
 
				+					  data->ioc_u32[0]); /* IP */
			
 
				+
			
 
				+	case IOC_LIBCFS_GET_CONN: {
			
 
				+		int	   txmem;
			
 
				+		int	   rxmem;
			
 
				+		int	   nagle;
			
 
				+		ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
			
 
				+
			
 
				+		if (conn == NULL)
			
 
				+			return -ENOENT;
			
 
				+
			
 
				+		ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
			
 
				+
			
 
				+		data->ioc_count  = txmem;
			
 
				+		data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
			
 
				+		data->ioc_flags  = nagle;
			
 
				+		data->ioc_u32[0] = conn->ksnc_ipaddr;
			
 
				+		data->ioc_u32[1] = conn->ksnc_port;
			
 
				+		data->ioc_u32[2] = conn->ksnc_myipaddr;
			
 
				+		data->ioc_u32[3] = conn->ksnc_type;
			
 
				+		data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
			
 
				+		data->ioc_u32[5] = rxmem;
			
 
				+		data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
			
 
				+		ksocknal_conn_decref(conn);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	case IOC_LIBCFS_CLOSE_CONNECTION:
			
 
				+		id.nid = data->ioc_nid;
			
 
				+		id.pid = LNET_PID_ANY;
			
 
				+		return ksocknal_close_matching_conns (id,
			
 
				+						      data->ioc_u32[0]);
			
 
				+
			
 
				+	case IOC_LIBCFS_REGISTER_MYNID:
			
 
				+		/* Ignore if this is a noop */
			
 
				+		if (data->ioc_nid == ni->ni_nid)
			
 
				+			return 0;
			
 
				+
			
 
				+		CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
			
 
				+		       libcfs_nid2str(data->ioc_nid),
			
 
				+		       libcfs_nid2str(ni->ni_nid));
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	case IOC_LIBCFS_PUSH_CONNECTION:
			
 
				+		id.nid = data->ioc_nid;
			
 
				+		id.pid = LNET_PID_ANY;
			
 
				+		return ksocknal_push(ni, id);
			
 
				+
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	/* not reached */
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_free_buffers (void)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_sched_info != NULL) {
			
 
				+		struct ksock_sched_info	*info;
			
 
				+		int			i;
			
 
				+
			
 
				+		cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
			
 
				+			if (info->ksi_scheds != NULL) {
			
 
				+				LIBCFS_FREE(info->ksi_scheds,
			
 
				+					    info->ksi_nthreads_max *
			
 
				+					    sizeof(info->ksi_scheds[0]));
			
 
				+			}
			
 
				+		}
			
 
				+		cfs_percpt_free(ksocknal_data.ksnd_sched_info);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE (ksocknal_data.ksnd_peers,
			
 
				+		     sizeof (struct list_head) *
			
 
				+		     ksocknal_data.ksnd_peer_hash_size);
			
 
				+
			
 
				+	spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				+
			
 
				+	if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
			
 
				+		struct list_head	zlist;
			
 
				+		ksock_tx_t	*tx;
			
 
				+
			
 
				+		list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
			
 
				+		list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
			
 
				+		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				+
			
 
				+		while (!list_empty(&zlist)) {
			
 
				+			tx = list_entry(zlist.next, ksock_tx_t, tx_list);
			
 
				+			list_del(&tx->tx_list);
			
 
				+			LIBCFS_FREE(tx, tx->tx_desc_size);
			
 
				+		}
			
 
				+	} else {
			
 
				+		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_base_shutdown(void)
			
 
				+{
			
 
				+	struct ksock_sched_info *info;
			
 
				+	ksock_sched_t		*sched;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
			
 
				+	       atomic_read (&libcfs_kmemory));
			
 
				+	LASSERT (ksocknal_data.ksnd_nnets == 0);
			
 
				+
			
 
				+	switch (ksocknal_data.ksnd_init) {
			
 
				+	default:
			
 
				+		LASSERT (0);
			
 
				+
			
 
				+	case SOCKNAL_INIT_ALL:
			
 
				+	case SOCKNAL_INIT_DATA:
			
 
				+		LASSERT (ksocknal_data.ksnd_peers != NULL);
			
 
				+		for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+			LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(list_empty(&ksocknal_data.ksnd_nets));
			
 
				+		LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
			
 
				+		LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
			
 
				+		LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
			
 
				+		LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
			
 
				+
			
 
				+		if (ksocknal_data.ksnd_sched_info != NULL) {
			
 
				+			cfs_percpt_for_each(info, i,
			
 
				+					    ksocknal_data.ksnd_sched_info) {
			
 
				+				if (info->ksi_scheds == NULL)
			
 
				+					continue;
			
 
				+
			
 
				+				for (j = 0; j < info->ksi_nthreads_max; j++) {
			
 
				+
			
 
				+					sched = &info->ksi_scheds[j];
			
 
				+					LASSERT(list_empty(&sched->\
			
 
				+							       kss_tx_conns));
			
 
				+					LASSERT(list_empty(&sched->\
			
 
				+							       kss_rx_conns));
			
 
				+					LASSERT(list_empty(&sched-> \
			
 
				+						  kss_zombie_noop_txs));
			
 
				+					LASSERT(sched->kss_nconns == 0);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* flag threads to terminate; wake and wait for them to die */
			
 
				+		ksocknal_data.ksnd_shuttingdown = 1;
			
 
				+		wake_up_all(&ksocknal_data.ksnd_connd_waitq);
			
 
				+		wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
			
 
				+
			
 
				+		if (ksocknal_data.ksnd_sched_info != NULL) {
			
 
				+			cfs_percpt_for_each(info, i,
			
 
				+					    ksocknal_data.ksnd_sched_info) {
			
 
				+				if (info->ksi_scheds == NULL)
			
 
				+					continue;
			
 
				+
			
 
				+				for (j = 0; j < info->ksi_nthreads_max; j++) {
			
 
				+					sched = &info->ksi_scheds[j];
			
 
				+					wake_up_all(&sched->kss_waitq);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		i = 4;
			
 
				+		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+		while (ksocknal_data.ksnd_nthreads != 0) {
			
 
				+			i++;
			
 
				+			CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
			
 
				+			       "waiting for %d threads to terminate\n",
			
 
				+				ksocknal_data.ksnd_nthreads);
			
 
				+			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+			cfs_pause(cfs_time_seconds(1));
			
 
				+			read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+		}
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		ksocknal_free_buffers();
			
 
				+
			
 
				+		ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
			
 
				+	       atomic_read (&libcfs_kmemory));
			
 
				+
			
 
				+	module_put(THIS_MODULE);
			
 
				+}
			
 
				+
			
 
				+__u64
			
 
				+ksocknal_new_incarnation (void)
			
 
				+{
			
 
				+	struct timeval tv;
			
 
				+
			
 
				+	/* The incarnation number is the time this module loaded and it
			
 
				+	 * identifies this particular instance of the socknal.  Hopefully
			
 
				+	 * we won't be able to reboot more frequently than 1MHz for the
			
 
				+	 * forseeable future :) */
			
 
				+
			
 
				+	do_gettimeofday(&tv);
			
 
				+
			
 
				+	return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_base_startup(void)
			
 
				+{
			
 
				+	struct ksock_sched_info	*info;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
			
 
				+	LASSERT (ksocknal_data.ksnd_nnets == 0);
			
 
				+
			
 
				+	memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
			
 
				+
			
 
				+	ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
			
 
				+	LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
			
 
				+		      sizeof (struct list_head) *
			
 
				+		      ksocknal_data.ksnd_peer_hash_size);
			
 
				+	if (ksocknal_data.ksnd_peers == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
			
 
				+		INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
			
 
				+
			
 
				+	rwlock_init(&ksocknal_data.ksnd_global_lock);
			
 
				+	INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
			
 
				+
			
 
				+	spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
			
 
				+	init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
			
 
				+
			
 
				+	spin_lock_init(&ksocknal_data.ksnd_connd_lock);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
			
 
				+	init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
			
 
				+
			
 
				+	spin_lock_init(&ksocknal_data.ksnd_tx_lock);
			
 
				+	INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
			
 
				+
			
 
				+	/* NB memset above zeros whole of ksocknal_data */
			
 
				+
			
 
				+	/* flag lists/ptrs/locks initialised */
			
 
				+	ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
			
 
				+	try_module_get(THIS_MODULE);
			
 
				+
			
 
				+	ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+							 sizeof(*info));
			
 
				+	if (ksocknal_data.ksnd_sched_info == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
			
 
				+		ksock_sched_t	*sched;
			
 
				+		int		nthrs;
			
 
				+
			
 
				+		nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				+		if (*ksocknal_tunables.ksnd_nscheds > 0) {
			
 
				+			nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
			
 
				+		} else {
			
 
				+			/* max to half of CPUs, assume another half should be
			
 
				+			 * reserved for upper layer modules */
			
 
				+			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
			
 
				+		}
			
 
				+
			
 
				+		info->ksi_nthreads_max = nthrs;
			
 
				+		info->ksi_cpt = i;
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
			
 
				+				 info->ksi_nthreads_max * sizeof(*sched));
			
 
				+		if (info->ksi_scheds == NULL)
			
 
				+			goto failed;
			
 
				+
			
 
				+		for (; nthrs > 0; nthrs--) {
			
 
				+			sched = &info->ksi_scheds[nthrs - 1];
			
 
				+
			
 
				+			sched->kss_info = info;
			
 
				+			spin_lock_init(&sched->kss_lock);
			
 
				+			INIT_LIST_HEAD(&sched->kss_rx_conns);
			
 
				+			INIT_LIST_HEAD(&sched->kss_tx_conns);
			
 
				+			INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
			
 
				+			init_waitqueue_head(&sched->kss_waitq);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_data.ksnd_connd_starting	 = 0;
			
 
				+	ksocknal_data.ksnd_connd_failed_stamp     = 0;
			
 
				+	ksocknal_data.ksnd_connd_starting_stamp   = cfs_time_current_sec();
			
 
				+	/* must have at least 2 connds to remain responsive to accepts while
			
 
				+	 * connecting */
			
 
				+	if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
			
 
				+		*ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_nconnds_max <
			
 
				+	    *ksocknal_tunables.ksnd_nconnds) {
			
 
				+		ksocknal_tunables.ksnd_nconnds_max =
			
 
				+			ksocknal_tunables.ksnd_nconnds;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
			
 
				+		char name[16];
			
 
				+		spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+		ksocknal_data.ksnd_connd_starting++;
			
 
				+		spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+
			
 
				+
			
 
				+		snprintf(name, sizeof(name), "socknal_cd%02d", i);
			
 
				+		rc = ksocknal_thread_start(ksocknal_connd,
			
 
				+					   (void *)((ulong_ptr_t)i), name);
			
 
				+		if (rc != 0) {
			
 
				+			spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+			ksocknal_data.ksnd_connd_starting--;
			
 
				+			spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+			CERROR("Can't spawn socknal connd: %d\n", rc);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't spawn socknal reaper: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	/* flag everything initialised */
			
 
				+	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	ksocknal_base_shutdown();
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_debug_peerhash (lnet_ni_t *ni)
			
 
				+{
			
 
				+	ksock_peer_t	*peer = NULL;
			
 
				+	struct list_head	*tmp;
			
 
				+	int		i;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
			
 
				+		list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
			
 
				+			peer = list_entry (tmp, ksock_peer_t, ksnp_list);
			
 
				+
			
 
				+			if (peer->ksnp_ni == ni) break;
			
 
				+
			
 
				+			peer = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (peer != NULL) {
			
 
				+		ksock_route_t *route;
			
 
				+		ksock_conn_t  *conn;
			
 
				+
			
 
				+		CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
			
 
				+		       "closing %d, accepting %d, err %d, zcookie "LPU64", "
			
 
				+		       "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
			
 
				+		       atomic_read(&peer->ksnp_refcount),
			
 
				+		       peer->ksnp_sharecount, peer->ksnp_closing,
			
 
				+		       peer->ksnp_accepting, peer->ksnp_error,
			
 
				+		       peer->ksnp_zc_next_cookie,
			
 
				+		       !list_empty(&peer->ksnp_tx_queue),
			
 
				+		       !list_empty(&peer->ksnp_zc_req_list));
			
 
				+
			
 
				+		list_for_each (tmp, &peer->ksnp_routes) {
			
 
				+			route = list_entry(tmp, ksock_route_t, ksnr_list);
			
 
				+			CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
			
 
				+			       "del %d\n", atomic_read(&route->ksnr_refcount),
			
 
				+			       route->ksnr_scheduled, route->ksnr_connecting,
			
 
				+			       route->ksnr_connected, route->ksnr_deleted);
			
 
				+		}
			
 
				+
			
 
				+		list_for_each (tmp, &peer->ksnp_conns) {
			
 
				+			conn = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+			CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
			
 
				+			       atomic_read(&conn->ksnc_conn_refcount),
			
 
				+			       atomic_read(&conn->ksnc_sock_refcount),
			
 
				+			       conn->ksnc_type, conn->ksnc_closing);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_shutdown (lnet_ni_t *ni)
			
 
				+{
			
 
				+	ksock_net_t      *net = ni->ni_data;
			
 
				+	int	       i;
			
 
				+	lnet_process_id_t anyid = {0};
			
 
				+
			
 
				+	anyid.nid =  LNET_NID_ANY;
			
 
				+	anyid.pid =  LNET_PID_ANY;
			
 
				+
			
 
				+	LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
			
 
				+	LASSERT(ksocknal_data.ksnd_nnets > 0);
			
 
				+
			
 
				+	spin_lock_bh(&net->ksnn_lock);
			
 
				+	net->ksnn_shutdown = 1;		 /* prevent new peers */
			
 
				+	spin_unlock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+	/* Delete all peers */
			
 
				+	ksocknal_del_peer(ni, anyid, 0);
			
 
				+
			
 
				+	/* Wait for all peer state to clean up */
			
 
				+	i = 2;
			
 
				+	spin_lock_bh(&net->ksnn_lock);
			
 
				+	while (net->ksnn_npeers != 0) {
			
 
				+		spin_unlock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+		i++;
			
 
				+		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
			
 
				+		       "waiting for %d peers to disconnect\n",
			
 
				+		       net->ksnn_npeers);
			
 
				+		cfs_pause(cfs_time_seconds(1));
			
 
				+
			
 
				+		ksocknal_debug_peerhash(ni);
			
 
				+
			
 
				+		spin_lock_bh(&net->ksnn_lock);
			
 
				+	}
			
 
				+	spin_unlock_bh(&net->ksnn_lock);
			
 
				+
			
 
				+	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				+		LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
			
 
				+		LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
			
 
				+	}
			
 
				+
			
 
				+	list_del(&net->ksnn_list);
			
 
				+	LIBCFS_FREE(net, sizeof(*net));
			
 
				+
			
 
				+	ksocknal_data.ksnd_nnets--;
			
 
				+	if (ksocknal_data.ksnd_nnets == 0)
			
 
				+		ksocknal_base_shutdown();
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_enumerate_interfaces(ksock_net_t *net)
			
 
				+{
			
 
				+	char      **names;
			
 
				+	int	 i;
			
 
				+	int	 j;
			
 
				+	int	 rc;
			
 
				+	int	 n;
			
 
				+
			
 
				+	n = libcfs_ipif_enumerate(&names);
			
 
				+	if (n <= 0) {
			
 
				+		CERROR("Can't enumerate interfaces: %d\n", n);
			
 
				+		return n;
			
 
				+	}
			
 
				+
			
 
				+	for (i = j = 0; i < n; i++) {
			
 
				+		int	up;
			
 
				+		__u32      ip;
			
 
				+		__u32      mask;
			
 
				+
			
 
				+		if (!strcmp(names[i], "lo")) /* skip the loopback IF */
			
 
				+			continue;
			
 
				+
			
 
				+		rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
			
 
				+		if (rc != 0) {
			
 
				+			CWARN("Can't get interface %s info: %d\n",
			
 
				+			      names[i], rc);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!up) {
			
 
				+			CWARN("Ignoring interface %s (down)\n",
			
 
				+			      names[i]);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (j == LNET_MAX_INTERFACES) {
			
 
				+			CWARN("Ignoring interface %s (too many interfaces)\n",
			
 
				+			      names[i]);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		net->ksnn_interfaces[j].ksni_ipaddr = ip;
			
 
				+		net->ksnn_interfaces[j].ksni_netmask = mask;
			
 
				+		strncpy(&net->ksnn_interfaces[j].ksni_name[0],
			
 
				+			names[i], IFNAMSIZ);
			
 
				+		j++;
			
 
				+	}
			
 
				+
			
 
				+	libcfs_ipif_free_enumeration(names, n);
			
 
				+
			
 
				+	if (j == 0)
			
 
				+		CERROR("Can't find any usable interfaces\n");
			
 
				+
			
 
				+	return j;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_search_new_ipif(ksock_net_t *net)
			
 
				+{
			
 
				+	int	new_ipif = 0;
			
 
				+	int	i;
			
 
				+
			
 
				+	for (i = 0; i < net->ksnn_ninterfaces; i++) {
			
 
				+		char		*ifnam = &net->ksnn_interfaces[i].ksni_name[0];
			
 
				+		char		*colon = strchr(ifnam, ':');
			
 
				+		int		found  = 0;
			
 
				+		ksock_net_t	*tmp;
			
 
				+		int		j;
			
 
				+
			
 
				+		if (colon != NULL) /* ignore alias device */
			
 
				+			*colon = 0;
			
 
				+
			
 
				+		list_for_each_entry(tmp, &ksocknal_data.ksnd_nets,
			
 
				+					ksnn_list) {
			
 
				+			for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
			
 
				+				char *ifnam2 = &tmp->ksnn_interfaces[j].\
			
 
				+					     ksni_name[0];
			
 
				+				char *colon2 = strchr(ifnam2, ':');
			
 
				+
			
 
				+				if (colon2 != NULL)
			
 
				+					*colon2 = 0;
			
 
				+
			
 
				+				found = strcmp(ifnam, ifnam2) == 0;
			
 
				+				if (colon2 != NULL)
			
 
				+					*colon2 = ':';
			
 
				+			}
			
 
				+			if (found)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		new_ipif += !found;
			
 
				+		if (colon != NULL)
			
 
				+			*colon = ':';
			
 
				+	}
			
 
				+
			
 
				+	return new_ipif;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_start_schedulers(struct ksock_sched_info *info)
			
 
				+{
			
 
				+	int	nthrs;
			
 
				+	int	rc = 0;
			
 
				+	int	i;
			
 
				+
			
 
				+	if (info->ksi_nthreads == 0) {
			
 
				+		if (*ksocknal_tunables.ksnd_nscheds > 0) {
			
 
				+			nthrs = info->ksi_nthreads_max;
			
 
				+		} else {
			
 
				+			nthrs = cfs_cpt_weight(lnet_cpt_table(),
			
 
				+					       info->ksi_cpt);
			
 
				+			nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
			
 
				+			nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
			
 
				+		}
			
 
				+		nthrs = min(nthrs, info->ksi_nthreads_max);
			
 
				+	} else {
			
 
				+		LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
			
 
				+		/* increase two threads if there is new interface */
			
 
				+		nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < nthrs; i++) {
			
 
				+		long		id;
			
 
				+		char		name[20];
			
 
				+		ksock_sched_t	*sched;
			
 
				+		id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
			
 
				+		sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
			
 
				+		snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
			
 
				+			 info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
			
 
				+
			
 
				+		rc = ksocknal_thread_start(ksocknal_scheduler,
			
 
				+					   (void *)id, name);
			
 
				+		if (rc == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
			
 
				+		       info->ksi_cpt, info->ksi_nthreads + i, rc);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	info->ksi_nthreads += i;
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
			
 
				+{
			
 
				+	int	newif = ksocknal_search_new_ipif(net);
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+
			
 
				+	LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
			
 
				+
			
 
				+	for (i = 0; i < ncpts; i++) {
			
 
				+		struct ksock_sched_info	*info;
			
 
				+		int cpt = (cpts == NULL) ? i : cpts[i];
			
 
				+
			
 
				+		LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
			
 
				+		info = ksocknal_data.ksnd_sched_info[cpt];
			
 
				+
			
 
				+		if (!newif && info->ksi_nthreads > 0)
			
 
				+			continue;
			
 
				+
			
 
				+		rc = ksocknal_start_schedulers(info);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_startup (lnet_ni_t *ni)
			
 
				+{
			
 
				+	ksock_net_t  *net;
			
 
				+	int	   rc;
			
 
				+	int	   i;
			
 
				+
			
 
				+	LASSERT (ni->ni_lnd == &the_ksocklnd);
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
			
 
				+		rc = ksocknal_base_startup();
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(net, sizeof(*net));
			
 
				+	if (net == NULL)
			
 
				+		goto fail_0;
			
 
				+
			
 
				+	spin_lock_init(&net->ksnn_lock);
			
 
				+	net->ksnn_incarnation = ksocknal_new_incarnation();
			
 
				+	ni->ni_data = net;
			
 
				+	ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
			
 
				+	ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
			
 
				+	ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
			
 
				+	ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
			
 
				+
			
 
				+	if (ni->ni_interfaces[0] == NULL) {
			
 
				+		rc = ksocknal_enumerate_interfaces(net);
			
 
				+		if (rc <= 0)
			
 
				+			goto fail_1;
			
 
				+
			
 
				+		net->ksnn_ninterfaces = 1;
			
 
				+	} else {
			
 
				+		for (i = 0; i < LNET_MAX_INTERFACES; i++) {
			
 
				+			int    up;
			
 
				+
			
 
				+			if (ni->ni_interfaces[i] == NULL)
			
 
				+				break;
			
 
				+
			
 
				+			rc = libcfs_ipif_query(
			
 
				+				ni->ni_interfaces[i], &up,
			
 
				+				&net->ksnn_interfaces[i].ksni_ipaddr,
			
 
				+				&net->ksnn_interfaces[i].ksni_netmask);
			
 
				+
			
 
				+			if (rc != 0) {
			
 
				+				CERROR("Can't get interface %s info: %d\n",
			
 
				+				       ni->ni_interfaces[i], rc);
			
 
				+				goto fail_1;
			
 
				+			}
			
 
				+
			
 
				+			if (!up) {
			
 
				+				CERROR("Interface %s is down\n",
			
 
				+				       ni->ni_interfaces[i]);
			
 
				+				goto fail_1;
			
 
				+			}
			
 
				+
			
 
				+			strncpy(&net->ksnn_interfaces[i].ksni_name[0],
			
 
				+				ni->ni_interfaces[i], IFNAMSIZ);
			
 
				+		}
			
 
				+		net->ksnn_ninterfaces = i;
			
 
				+	}
			
 
				+
			
 
				+	/* call it before add it to ksocknal_data.ksnd_nets */
			
 
				+	rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
			
 
				+	if (rc != 0)
			
 
				+		goto fail_1;
			
 
				+
			
 
				+	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
			
 
				+				net->ksnn_interfaces[0].ksni_ipaddr);
			
 
				+	list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
			
 
				+
			
 
				+	ksocknal_data.ksnd_nnets++;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ fail_1:
			
 
				+	LIBCFS_FREE(net, sizeof(*net));
			
 
				+ fail_0:
			
 
				+	if (ksocknal_data.ksnd_nnets == 0)
			
 
				+		ksocknal_base_shutdown();
			
 
				+
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void __exit
			
 
				+ksocknal_module_fini (void)
			
 
				+{
			
 
				+	lnet_unregister_lnd(&the_ksocklnd);
			
 
				+	ksocknal_tunables_fini();
			
 
				+}
			
 
				+
			
 
				+int __init
			
 
				+ksocknal_module_init (void)
			
 
				+{
			
 
				+	int    rc;
			
 
				+
			
 
				+	/* check ksnr_connected/connecting field large enough */
			
 
				+	CLASSERT (SOCKLND_CONN_NTYPES <= 4);
			
 
				+	CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN);
			
 
				+
			
 
				+	/* initialize the_ksocklnd */
			
 
				+	the_ksocklnd.lnd_type     = SOCKLND;
			
 
				+	the_ksocklnd.lnd_startup  = ksocknal_startup;
			
 
				+	the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
			
 
				+	the_ksocklnd.lnd_ctl      = ksocknal_ctl;
			
 
				+	the_ksocklnd.lnd_send     = ksocknal_send;
			
 
				+	the_ksocklnd.lnd_recv     = ksocknal_recv;
			
 
				+	the_ksocklnd.lnd_notify   = ksocknal_notify;
			
 
				+	the_ksocklnd.lnd_query    = ksocknal_query;
			
 
				+	the_ksocklnd.lnd_accept   = ksocknal_accept;
			
 
				+
			
 
				+	rc = ksocknal_tunables_init();
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	lnet_register_lnd(&the_ksocklnd);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
			
 
				+MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				+cfs_module(ksocknal, "3.0.0", ksocknal_module_init, ksocknal_module_fini);
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -0,0 +1,602 @@
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   Author: Zach Brown <zab@zabbo.net>
			
 
				+ *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				+ *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				+ *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ *
			
 
				+ *   This file is part of Lustre, http://www.lustre.org
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_PORTAL_ALLOC
			
 
				+#define DEBUG_SUBSYSTEM S_LND
			
 
				+
			
 
				+#include "socklnd_lib-linux.h"
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include <linux/lnet/socklnd.h>
			
 
				+#include <linux/lnet/lnet-sysctl.h>
			
 
				+
			
 
				+#define SOCKNAL_PEER_HASH_SIZE  101	     /* # peer lists */
			
 
				+#define SOCKNAL_RESCHED	 100	     /* # scheduler loops before reschedule */
			
 
				+#define SOCKNAL_INSANITY_RECONN 5000	    /* connd is trying on reconn infinitely */
			
 
				+#define SOCKNAL_ENOMEM_RETRY    CFS_TICK	/* jiffies between retries */
			
 
				+
			
 
				+#define SOCKNAL_SINGLE_FRAG_TX      0	   /* disable multi-fragment sends */
			
 
				+#define SOCKNAL_SINGLE_FRAG_RX      0	   /* disable multi-fragment receives */
			
 
				+
			
 
				+#define SOCKNAL_VERSION_DEBUG       0	   /* enable protocol version debugging */
			
 
				+
			
 
				+/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
			
 
				+ * no risk if we're not running on a CONFIG_HIGHMEM platform. */
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				+# define SOCKNAL_RISK_KMAP_DEADLOCK  0
			
 
				+#else
			
 
				+# define SOCKNAL_RISK_KMAP_DEADLOCK  1
			
 
				+#endif
			
 
				+
			
 
				+struct ksock_sched_info;
			
 
				+
			
 
				+typedef struct				  /* per scheduler state */
			
 
				+{
			
 
				+	spinlock_t		kss_lock;	/* serialise */
			
 
				+	struct list_head		kss_rx_conns;	/* conn waiting to be read */
			
 
				+	/* conn waiting to be written */
			
 
				+	struct list_head		kss_tx_conns;
			
 
				+	/* zombie noop tx list */
			
 
				+	struct list_head		kss_zombie_noop_txs;
			
 
				+	wait_queue_head_t		kss_waitq;	/* where scheduler sleeps */
			
 
				+	/* # connections assigned to this scheduler */
			
 
				+	int			kss_nconns;
			
 
				+	struct ksock_sched_info	*kss_info;	/* owner of it */
			
 
				+	struct page		*kss_rx_scratch_pgs[LNET_MAX_IOV];
			
 
				+	struct iovec		kss_scratch_iov[LNET_MAX_IOV];
			
 
				+} ksock_sched_t;
			
 
				+
			
 
				+struct ksock_sched_info {
			
 
				+	int			ksi_nthreads_max; /* max allowed threads */
			
 
				+	int			ksi_nthreads;	/* number of threads */
			
 
				+	int			ksi_cpt;	/* CPT id */
			
 
				+	ksock_sched_t		*ksi_scheds;	/* array of schedulers */
			
 
				+};
			
 
				+
			
 
				+#define KSOCK_CPT_SHIFT			16
			
 
				+#define KSOCK_THREAD_ID(cpt, sid)	(((cpt) << KSOCK_CPT_SHIFT) | (sid))
			
 
				+#define KSOCK_THREAD_CPT(id)		((id) >> KSOCK_CPT_SHIFT)
			
 
				+#define KSOCK_THREAD_SID(id)		((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
			
 
				+
			
 
				+typedef struct				  /* in-use interface */
			
 
				+{
			
 
				+	__u32		ksni_ipaddr;		/* interface's IP address */
			
 
				+	__u32		ksni_netmask;		/* interface's network mask */
			
 
				+	int		ksni_nroutes;		/* # routes using (active) */
			
 
				+	int		ksni_npeers;		/* # peers using (passive) */
			
 
				+	char		ksni_name[IFNAMSIZ];	/* interface name */
			
 
				+} ksock_interface_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	/* "stuck" socket timeout (seconds) */
			
 
				+	int	      *ksnd_timeout;
			
 
				+	/* # scheduler threads in each pool while starting */
			
 
				+	int		 *ksnd_nscheds;
			
 
				+	int	      *ksnd_nconnds;	 /* # connection daemons */
			
 
				+	int	      *ksnd_nconnds_max;     /* max # connection daemons */
			
 
				+	int	      *ksnd_min_reconnectms; /* first connection retry after (ms)... */
			
 
				+	int	      *ksnd_max_reconnectms; /* ...exponentially increasing to this */
			
 
				+	int	      *ksnd_eager_ack;       /* make TCP ack eagerly? */
			
 
				+	int	      *ksnd_typed_conns;     /* drive sockets by type? */
			
 
				+	int	      *ksnd_min_bulk;	/* smallest "large" message */
			
 
				+	int	      *ksnd_tx_buffer_size;  /* socket tx buffer size */
			
 
				+	int	      *ksnd_rx_buffer_size;  /* socket rx buffer size */
			
 
				+	int	      *ksnd_nagle;	   /* enable NAGLE? */
			
 
				+	int	      *ksnd_round_robin;     /* round robin for multiple interfaces */
			
 
				+	int	      *ksnd_keepalive;       /* # secs for sending keepalive NOOP */
			
 
				+	int	      *ksnd_keepalive_idle;  /* # idle secs before 1st probe */
			
 
				+	int	      *ksnd_keepalive_count; /* # probes */
			
 
				+	int	      *ksnd_keepalive_intvl; /* time between probes */
			
 
				+	int	      *ksnd_credits;	 /* # concurrent sends */
			
 
				+	int	      *ksnd_peertxcredits;   /* # concurrent sends to 1 peer */
			
 
				+	int	      *ksnd_peerrtrcredits;  /* # per-peer router buffer credits */
			
 
				+	int	      *ksnd_peertimeout;     /* seconds to consider peer dead */
			
 
				+	int	      *ksnd_enable_csum;     /* enable check sum */
			
 
				+	int	      *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
			
 
				+	int	      *ksnd_nonblk_zcack;    /* always send zc-ack on non-blocking connection */
			
 
				+	unsigned int     *ksnd_zc_min_payload;  /* minimum zero copy payload size */
			
 
				+	int	      *ksnd_zc_recv;	 /* enable ZC receive (for Chelsio TOE) */
			
 
				+	int	      *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
			
 
				+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
			
 
				+	ctl_table_header_t *ksnd_sysctl;   /* sysctl interface */
			
 
				+#endif
			
 
				+} ksock_tunables_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	__u64		  ksnn_incarnation;	/* my epoch */
			
 
				+	spinlock_t	  ksnn_lock;		/* serialise */
			
 
				+	struct list_head	  ksnn_list;		/* chain on global list */
			
 
				+	int		  ksnn_npeers;		/* # peers */
			
 
				+	int		  ksnn_shutdown;	/* shutting down? */
			
 
				+	int		  ksnn_ninterfaces;	/* IP interfaces */
			
 
				+	ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
			
 
				+} ksock_net_t;
			
 
				+
			
 
				+/** connd timeout */
			
 
				+#define SOCKNAL_CONND_TIMEOUT  120
			
 
				+/** reserved thread for accepting & creating new connd */
			
 
				+#define SOCKNAL_CONND_RESV     1
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	int			ksnd_init;	/* initialisation state */
			
 
				+	int			ksnd_nnets;	/* # networks set up */
			
 
				+	struct list_head		ksnd_nets;	/* list of nets */
			
 
				+	/* stabilize peer/conn ops */
			
 
				+	rwlock_t		ksnd_global_lock;
			
 
				+	/* hash table of all my known peers */
			
 
				+	struct list_head		*ksnd_peers;
			
 
				+	int			ksnd_peer_hash_size; /* size of ksnd_peers */
			
 
				+
			
 
				+	int			ksnd_nthreads;	/* # live threads */
			
 
				+	int			ksnd_shuttingdown; /* tell threads to exit */
			
 
				+	/* schedulers information */
			
 
				+	struct ksock_sched_info	**ksnd_sched_info;
			
 
				+
			
 
				+	atomic_t      ksnd_nactive_txs;    /* #active txs */
			
 
				+
			
 
				+	struct list_head	ksnd_deathrow_conns; /* conns to close: reaper_lock*/
			
 
				+	struct list_head	ksnd_zombie_conns;   /* conns to free: reaper_lock */
			
 
				+	struct list_head	ksnd_enomem_conns;   /* conns to retry: reaper_lock*/
			
 
				+	wait_queue_head_t       ksnd_reaper_waitq;   /* reaper sleeps here */
			
 
				+	cfs_time_t	ksnd_reaper_waketime;/* when reaper will wake */
			
 
				+	spinlock_t	  ksnd_reaper_lock;	/* serialise */
			
 
				+
			
 
				+	int	       ksnd_enomem_tx;      /* test ENOMEM sender */
			
 
				+	int	       ksnd_stall_tx;       /* test sluggish sender */
			
 
				+	int	       ksnd_stall_rx;       /* test sluggish receiver */
			
 
				+
			
 
				+	struct list_head	ksnd_connd_connreqs; /* incoming connection requests */
			
 
				+	struct list_head	ksnd_connd_routes;   /* routes waiting to be connected */
			
 
				+	wait_queue_head_t       ksnd_connd_waitq;    /* connds sleep here */
			
 
				+	int	       ksnd_connd_connecting;/* # connds connecting */
			
 
				+	/** time stamp of the last failed connecting attempt */
			
 
				+	long	      ksnd_connd_failed_stamp;
			
 
				+	/** # starting connd */
			
 
				+	unsigned	  ksnd_connd_starting;
			
 
				+	/** time stamp of the last starting connd */
			
 
				+	long	      ksnd_connd_starting_stamp;
			
 
				+	/** # running connd */
			
 
				+	unsigned	  ksnd_connd_running;
			
 
				+	spinlock_t	  ksnd_connd_lock;	/* serialise */
			
 
				+
			
 
				+	struct list_head	  ksnd_idle_noop_txs;	/* list head for freed noop tx */
			
 
				+	spinlock_t	  ksnd_tx_lock;		/* serialise, g_lock unsafe */
			
 
				+
			
 
				+} ksock_nal_data_t;
			
 
				+
			
 
				+#define SOCKNAL_INIT_NOTHING    0
			
 
				+#define SOCKNAL_INIT_DATA       1
			
 
				+#define SOCKNAL_INIT_ALL	2
			
 
				+
			
 
				+/* A packet just assembled for transmission is represented by 1 or more
			
 
				+ * struct iovec fragments (the first frag contains the portals header),
			
 
				+ * followed by 0 or more lnet_kiov_t fragments.
			
 
				+ *
			
 
				+ * On the receive side, initially 1 struct iovec fragment is posted for
			
 
				+ * receive (the header).  Once the header has been received, the payload is
			
 
				+ * received into either struct iovec or lnet_kiov_t fragments, depending on
			
 
				+ * what the header matched or whether the message needs forwarding. */
			
 
				+
			
 
				+struct ksock_conn;			      /* forward ref */
			
 
				+struct ksock_peer;			      /* forward ref */
			
 
				+struct ksock_route;			     /* forward ref */
			
 
				+struct ksock_proto;			     /* forward ref */
			
 
				+
			
 
				+typedef struct				  /* transmit packet */
			
 
				+{
			
 
				+	struct list_head     tx_list;	/* queue on conn for transmission etc */
			
 
				+	struct list_head     tx_zc_list;     /* queue on peer for ZC request */
			
 
				+	atomic_t   tx_refcount;    /* tx reference count */
			
 
				+	int	    tx_nob;	 /* # packet bytes */
			
 
				+	int	    tx_resid;       /* residual bytes */
			
 
				+	int	    tx_niov;	/* # packet iovec frags */
			
 
				+	struct iovec  *tx_iov;	 /* packet iovec frags */
			
 
				+	int	    tx_nkiov;       /* # packet page frags */
			
 
				+	unsigned short tx_zc_aborted;  /* aborted ZC request */
			
 
				+	unsigned short tx_zc_capable:1; /* payload is large enough for ZC */
			
 
				+	unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */
			
 
				+	unsigned short tx_nonblk:1;    /* it's a non-blocking ACK */
			
 
				+	lnet_kiov_t   *tx_kiov;	/* packet page frags */
			
 
				+	struct ksock_conn  *tx_conn;	/* owning conn */
			
 
				+	lnet_msg_t    *tx_lnetmsg;     /* lnet message for lnet_finalize() */
			
 
				+	cfs_time_t     tx_deadline;    /* when (in jiffies) tx times out */
			
 
				+	ksock_msg_t    tx_msg;	 /* socklnd message buffer */
			
 
				+	int	    tx_desc_size;   /* size of this descriptor */
			
 
				+	union {
			
 
				+		struct {
			
 
				+			struct iovec iov;       /* virt hdr */
			
 
				+			lnet_kiov_t  kiov[0];   /* paged payload */
			
 
				+		}		  paged;
			
 
				+		struct {
			
 
				+			struct iovec iov[1];    /* virt hdr + payload */
			
 
				+		}		  virt;
			
 
				+	}		       tx_frags;
			
 
				+} ksock_tx_t;
			
 
				+
			
 
				+#define KSOCK_NOOP_TX_SIZE  ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0]))
			
 
				+
			
 
				+/* network zero copy callback descriptor embedded in ksock_tx_t */
			
 
				+
			
 
				+/* space for the rx frag descriptors; we either read a single contiguous
			
 
				+ * header, or up to LNET_MAX_IOV frags of payload of either type. */
			
 
				+typedef union {
			
 
				+	struct iovec     iov[LNET_MAX_IOV];
			
 
				+	lnet_kiov_t      kiov[LNET_MAX_IOV];
			
 
				+} ksock_rxiovspace_t;
			
 
				+
			
 
				+#define SOCKNAL_RX_KSM_HEADER   1	       /* reading ksock message header */
			
 
				+#define SOCKNAL_RX_LNET_HEADER  2	       /* reading lnet message header */
			
 
				+#define SOCKNAL_RX_PARSE	3	       /* Calling lnet_parse() */
			
 
				+#define SOCKNAL_RX_PARSE_WAIT   4	       /* waiting to be told to read the body */
			
 
				+#define SOCKNAL_RX_LNET_PAYLOAD 5	       /* reading lnet payload (to deliver here) */
			
 
				+#define SOCKNAL_RX_SLOP	 6	       /* skipping body */
			
 
				+
			
 
				+typedef struct ksock_conn
			
 
				+{
			
 
				+	struct ksock_peer  *ksnc_peer;	 /* owning peer */
			
 
				+	struct ksock_route *ksnc_route;	/* owning route */
			
 
				+	struct list_head	  ksnc_list;	 /* stash on peer's conn list */
			
 
				+	socket_t       *ksnc_sock;	 /* actual socket */
			
 
				+	void	       *ksnc_saved_data_ready; /* socket's original data_ready() callback */
			
 
				+	void	       *ksnc_saved_write_space; /* socket's original write_space() callback */
			
 
				+	atomic_t	ksnc_conn_refcount; /* conn refcount */
			
 
				+	atomic_t	ksnc_sock_refcount; /* sock refcount */
			
 
				+	ksock_sched_t      *ksnc_scheduler;  /* who schedules this connection */
			
 
				+	__u32	       ksnc_myipaddr;   /* my IP */
			
 
				+	__u32	       ksnc_ipaddr;     /* peer's IP */
			
 
				+	int		 ksnc_port;       /* peer's port */
			
 
				+	signed int	  ksnc_type:3;     /* type of connection,
			
 
				+					      * should be signed value */
			
 
				+	unsigned int	    ksnc_closing:1;  /* being shut down */
			
 
				+	unsigned int	    ksnc_flip:1;     /* flip or not, only for V2.x */
			
 
				+	unsigned int	    ksnc_zc_capable:1; /* enable to ZC */
			
 
				+	struct ksock_proto *ksnc_proto;      /* protocol for the connection */
			
 
				+
			
 
				+	/* reader */
			
 
				+	struct list_head  ksnc_rx_list;     /* where I enq waiting input or a forwarding descriptor */
			
 
				+	cfs_time_t	    ksnc_rx_deadline; /* when (in jiffies) receive times out */
			
 
				+	__u8		  ksnc_rx_started;  /* started receiving a message */
			
 
				+	__u8		  ksnc_rx_ready;    /* data ready to read */
			
 
				+	__u8		  ksnc_rx_scheduled;/* being progressed */
			
 
				+	__u8		  ksnc_rx_state;    /* what is being read */
			
 
				+	int		   ksnc_rx_nob_left; /* # bytes to next hdr/body */
			
 
				+	int		   ksnc_rx_nob_wanted; /* bytes actually wanted */
			
 
				+	int		   ksnc_rx_niov;     /* # iovec frags */
			
 
				+	struct iovec	 *ksnc_rx_iov;      /* the iovec frags */
			
 
				+	int		   ksnc_rx_nkiov;    /* # page frags */
			
 
				+	lnet_kiov_t	  *ksnc_rx_kiov;     /* the page frags */
			
 
				+	ksock_rxiovspace_t    ksnc_rx_iov_space;/* space for frag descriptors */
			
 
				+	__u32		 ksnc_rx_csum;     /* partial checksum for incoming data */
			
 
				+	void		 *ksnc_cookie;      /* rx lnet_finalize passthru arg */
			
 
				+	ksock_msg_t	   ksnc_msg;	 /* incoming message buffer:
			
 
				+						 * V2.x message takes the
			
 
				+						 * whole struct
			
 
				+						 * V1.x message is a bare
			
 
				+						 * lnet_hdr_t, it's stored in
			
 
				+						 * ksnc_msg.ksm_u.lnetmsg */
			
 
				+
			
 
				+	/* WRITER */
			
 
				+	struct list_head	    ksnc_tx_list;     /* where I enq waiting for output space */
			
 
				+	struct list_head	    ksnc_tx_queue;    /* packets waiting to be sent */
			
 
				+	ksock_tx_t	   *ksnc_tx_carrier;  /* next TX that can carry a LNet message or ZC-ACK */
			
 
				+	cfs_time_t	    ksnc_tx_deadline; /* when (in jiffies) tx times out */
			
 
				+	int		   ksnc_tx_bufnob;     /* send buffer marker */
			
 
				+	atomic_t	  ksnc_tx_nob;	/* # bytes queued */
			
 
				+	int		   ksnc_tx_ready;      /* write space */
			
 
				+	int		   ksnc_tx_scheduled;  /* being progressed */
			
 
				+	cfs_time_t	    ksnc_tx_last_post;  /* time stamp of the last posted TX */
			
 
				+} ksock_conn_t;
			
 
				+
			
 
				+typedef struct ksock_route
			
 
				+{
			
 
				+	struct list_head	    ksnr_list;	/* chain on peer route list */
			
 
				+	struct list_head	    ksnr_connd_list;  /* chain on ksnr_connd_routes */
			
 
				+	struct ksock_peer    *ksnr_peer;	/* owning peer */
			
 
				+	atomic_t	  ksnr_refcount;    /* # users */
			
 
				+	cfs_time_t	    ksnr_timeout;     /* when (in jiffies) reconnection can happen next */
			
 
				+	cfs_duration_t	ksnr_retry_interval; /* how long between retries */
			
 
				+	__u32		 ksnr_myipaddr;    /* my IP */
			
 
				+	__u32		 ksnr_ipaddr;      /* IP address to connect to */
			
 
				+	int		   ksnr_port;	/* port to connect to */
			
 
				+	unsigned int	  ksnr_scheduled:1; /* scheduled for attention */
			
 
				+	unsigned int	  ksnr_connecting:1;/* connection establishment in progress */
			
 
				+	unsigned int	  ksnr_connected:4; /* connections established by type */
			
 
				+	unsigned int	  ksnr_deleted:1;   /* been removed from peer? */
			
 
				+	unsigned int	  ksnr_share_count; /* created explicitly? */
			
 
				+	int		   ksnr_conn_count;  /* # conns established by this route */
			
 
				+} ksock_route_t;
			
 
				+
			
 
				+#define SOCKNAL_KEEPALIVE_PING	  1       /* cookie for keepalive ping */
			
 
				+
			
 
				+typedef struct ksock_peer
			
 
				+{
			
 
				+	struct list_head	    ksnp_list;	/* stash on global peer list */
			
 
				+	cfs_time_t	    ksnp_last_alive;  /* when (in jiffies) I was last alive */
			
 
				+	lnet_process_id_t     ksnp_id;       /* who's on the other end(s) */
			
 
				+	atomic_t	  ksnp_refcount; /* # users */
			
 
				+	int		   ksnp_sharecount;  /* lconf usage counter */
			
 
				+	int		   ksnp_closing;  /* being closed */
			
 
				+	int		   ksnp_accepting;/* # passive connections pending */
			
 
				+	int		   ksnp_error;    /* errno on closing last conn */
			
 
				+	__u64		 ksnp_zc_next_cookie;/* ZC completion cookie */
			
 
				+	__u64		 ksnp_incarnation;   /* latest known peer incarnation */
			
 
				+	struct ksock_proto   *ksnp_proto;    /* latest known peer protocol */
			
 
				+	struct list_head	    ksnp_conns;    /* all active connections */
			
 
				+	struct list_head	    ksnp_routes;   /* routes */
			
 
				+	struct list_head	    ksnp_tx_queue; /* waiting packets */
			
 
				+	spinlock_t	      ksnp_lock;	/* serialize, g_lock unsafe */
			
 
				+	struct list_head	    ksnp_zc_req_list;   /* zero copy requests wait for ACK  */
			
 
				+	cfs_time_t	    ksnp_send_keepalive; /* time to send keepalive */
			
 
				+	lnet_ni_t	    *ksnp_ni;       /* which network */
			
 
				+	int		   ksnp_n_passive_ips; /* # of... */
			
 
				+	__u32		 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
			
 
				+} ksock_peer_t;
			
 
				+
			
 
				+typedef struct ksock_connreq
			
 
				+{
			
 
				+	struct list_head	    ksncr_list;     /* stash on ksnd_connd_connreqs */
			
 
				+	lnet_ni_t	    *ksncr_ni;       /* chosen NI */
			
 
				+	socket_t	 *ksncr_sock;     /* accepted socket */
			
 
				+} ksock_connreq_t;
			
 
				+
			
 
				+extern ksock_nal_data_t ksocknal_data;
			
 
				+extern ksock_tunables_t ksocknal_tunables;
			
 
				+
			
 
				+#define SOCKNAL_MATCH_NO	0	/* TX can't match type of connection */
			
 
				+#define SOCKNAL_MATCH_YES       1	/* TX matches type of connection */
			
 
				+#define SOCKNAL_MATCH_MAY       2	/* TX can be sent on the connection, but not preferred */
			
 
				+
			
 
				+typedef struct ksock_proto
			
 
				+{
			
 
				+	int	   pro_version;					      /* version number of protocol */
			
 
				+	int	 (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *);     /* handshake function */
			
 
				+	int	 (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */
			
 
				+	void	(*pro_pack)(ksock_tx_t *);				  /* message pack */
			
 
				+	void	(*pro_unpack)(ksock_msg_t *);			       /* message unpack */
			
 
				+	ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *);	  /* queue tx on the connection */
			
 
				+	int	 (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64); /* queue ZC ack on the connection */
			
 
				+	int	 (*pro_handle_zcreq)(ksock_conn_t *, __u64, int);	    /* handle ZC request */
			
 
				+	int	 (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64);	  /* handle ZC ACK */
			
 
				+	int	 (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int);	 /* msg type matches the connection type:
			
 
				+										 * return value:
			
 
				+										 *   return MATCH_NO  : no
			
 
				+										 *   return MATCH_YES : matching type
			
 
				+										 *   return MATCH_MAY : can be backup */
			
 
				+} ksock_proto_t;
			
 
				+
			
 
				+extern ksock_proto_t ksocknal_protocol_v1x;
			
 
				+extern ksock_proto_t ksocknal_protocol_v2x;
			
 
				+extern ksock_proto_t ksocknal_protocol_v3x;
			
 
				+
			
 
				+#define KSOCK_PROTO_V1_MAJOR    LNET_PROTO_TCP_VERSION_MAJOR
			
 
				+#define KSOCK_PROTO_V1_MINOR    LNET_PROTO_TCP_VERSION_MINOR
			
 
				+#define KSOCK_PROTO_V1	  KSOCK_PROTO_V1_MAJOR
			
 
				+
			
 
				+#ifndef CPU_MASK_NONE
			
 
				+#define CPU_MASK_NONE   0UL
			
 
				+#endif
			
 
				+
			
 
				+static inline int
			
 
				+ksocknal_route_mask(void)
			
 
				+{
			
 
				+	if (!*ksocknal_tunables.ksnd_typed_conns)
			
 
				+		return (1 << SOCKLND_CONN_ANY);
			
 
				+
			
 
				+	return ((1 << SOCKLND_CONN_CONTROL) |
			
 
				+		(1 << SOCKLND_CONN_BULK_IN) |
			
 
				+		(1 << SOCKLND_CONN_BULK_OUT));
			
 
				+}
			
 
				+
			
 
				+static inline struct list_head *
			
 
				+ksocknal_nid2peerlist (lnet_nid_t nid)
			
 
				+{
			
 
				+	unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
			
 
				+
			
 
				+	return (&ksocknal_data.ksnd_peers [hash]);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_conn_addref (ksock_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				+	atomic_inc(&conn->ksnc_conn_refcount);
			
 
				+}
			
 
				+
			
 
				+extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
			
 
				+extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_conn_decref (ksock_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				+	if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
			
 
				+		ksocknal_queue_zombie_conn(conn);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+ksocknal_connsock_addref (ksock_conn_t *conn)
			
 
				+{
			
 
				+	int   rc = -ESHUTDOWN;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+	if (!conn->ksnc_closing) {
			
 
				+		LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
			
 
				+		atomic_inc(&conn->ksnc_sock_refcount);
			
 
				+		rc = 0;
			
 
				+	}
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_connsock_decref (ksock_conn_t *conn)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
			
 
				+	if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
			
 
				+		LASSERT (conn->ksnc_closing);
			
 
				+		libcfs_sock_release(conn->ksnc_sock);
			
 
				+		conn->ksnc_sock = NULL;
			
 
				+		ksocknal_finalize_zcreq(conn);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_tx_addref (ksock_tx_t *tx)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&tx->tx_refcount) > 0);
			
 
				+	atomic_inc(&tx->tx_refcount);
			
 
				+}
			
 
				+
			
 
				+extern void ksocknal_tx_prep (ksock_conn_t *, ksock_tx_t *tx);
			
 
				+extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx);
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_tx_decref (ksock_tx_t *tx)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&tx->tx_refcount) > 0);
			
 
				+	if (atomic_dec_and_test(&tx->tx_refcount))
			
 
				+		ksocknal_tx_done(NULL, tx);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_route_addref (ksock_route_t *route)
			
 
				+{
			
 
				+	LASSERT (atomic_read(&route->ksnr_refcount) > 0);
			
 
				+	atomic_inc(&route->ksnr_refcount);
			
 
				+}
			
 
				+
			
 
				+extern void ksocknal_destroy_route (ksock_route_t *route);
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_route_decref (ksock_route_t *route)
			
 
				+{
			
 
				+	LASSERT (atomic_read (&route->ksnr_refcount) > 0);
			
 
				+	if (atomic_dec_and_test(&route->ksnr_refcount))
			
 
				+		ksocknal_destroy_route (route);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_peer_addref (ksock_peer_t *peer)
			
 
				+{
			
 
				+	LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
			
 
				+	atomic_inc(&peer->ksnp_refcount);
			
 
				+}
			
 
				+
			
 
				+extern void ksocknal_destroy_peer (ksock_peer_t *peer);
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_peer_decref (ksock_peer_t *peer)
			
 
				+{
			
 
				+	LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
			
 
				+	if (atomic_dec_and_test(&peer->ksnp_refcount))
			
 
				+		ksocknal_destroy_peer (peer);
			
 
				+}
			
 
				+
			
 
				+int ksocknal_startup (lnet_ni_t *ni);
			
 
				+void ksocknal_shutdown (lnet_ni_t *ni);
			
 
				+int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
			
 
				+int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
			
 
				+int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
			
 
				+		  int delayed, unsigned int niov,
			
 
				+		  struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+		  unsigned int offset, unsigned int mlen, unsigned int rlen);
			
 
				+int ksocknal_accept(lnet_ni_t *ni, socket_t *sock);
			
 
				+
			
 
				+extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
			
 
				+extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
			
 
				+extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
			
 
				+extern void ksocknal_peer_failed (ksock_peer_t *peer);
			
 
				+extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
			
 
				+				 socket_t *sock, int type);
			
 
				+extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
			
 
				+extern void ksocknal_terminate_conn (ksock_conn_t *conn);
			
 
				+extern void ksocknal_destroy_conn (ksock_conn_t *conn);
			
 
				+extern int  ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
			
 
				+					      __u32 ipaddr, int why);
			
 
				+extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
			
 
				+extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
			
 
				+extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
			
 
				+					       ksock_tx_t *tx, int nonblk);
			
 
				+
			
 
				+extern int  ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
			
 
				+				   lnet_process_id_t id);
			
 
				+extern ksock_tx_t *ksocknal_alloc_tx(int type, int size);
			
 
				+extern void ksocknal_free_tx (ksock_tx_t *tx);
			
 
				+extern ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
			
 
				+extern void ksocknal_next_tx_carrier(ksock_conn_t *conn);
			
 
				+extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
			
 
				+extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
			
 
				+				  int error);
			
 
				+extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
			
 
				+extern void ksocknal_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
			
 
				+extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
			
 
				+extern void ksocknal_thread_fini (void);
			
 
				+extern void ksocknal_launch_all_connections_locked (ksock_peer_t *peer);
			
 
				+extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_t *peer);
			
 
				+extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
			
 
				+extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
			
 
				+extern int ksocknal_scheduler (void *arg);
			
 
				+extern int ksocknal_connd (void *arg);
			
 
				+extern int ksocknal_reaper (void *arg);
			
 
				+extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
			
 
				+				lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
			
 
				+extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
			
 
				+				ksock_hello_msg_t *hello, lnet_process_id_t *id,
			
 
				+				__u64 *incarnation);
			
 
				+extern void ksocknal_read_callback(ksock_conn_t *conn);
			
 
				+extern void ksocknal_write_callback(ksock_conn_t *conn);
			
 
				+
			
 
				+extern int ksocknal_lib_zc_capable(ksock_conn_t *conn);
			
 
				+extern void ksocknal_lib_save_callback(socket_t *sock, ksock_conn_t *conn);
			
 
				+extern void ksocknal_lib_set_callback(socket_t *sock,  ksock_conn_t *conn);
			
 
				+extern void ksocknal_lib_reset_callback(socket_t *sock, ksock_conn_t *conn);
			
 
				+extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_setup_sock (socket_t *so);
			
 
				+extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
			
 
				+extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
			
 
				+extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
			
 
				+					   int *rxmem, int *nagle);
			
 
				+
			
 
				+extern int ksocknal_tunables_init(void);
			
 
				+extern void ksocknal_tunables_fini(void);
			
 
				+extern int ksocknal_lib_tunables_init(void);
			
 
				+extern void ksocknal_lib_tunables_fini(void);
			
 
				+
			
 
				+extern void ksocknal_lib_csum_tx(ksock_tx_t *tx);
			
 
				+
			
 
				+extern int ksocknal_lib_memory_pressure(ksock_conn_t *conn);
			
 
				+extern int ksocknal_lib_bind_thread_to_cpu(int id);
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -0,0 +1,2664 @@
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   Author: Zach Brown <zab@zabbo.net>
			
 
				+ *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				+ *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				+ *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ *
			
 
				+ *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ */
			
 
				+
			
 
				+#include "socklnd.h"
			
 
				+
			
 
				+ksock_tx_t *
			
 
				+ksocknal_alloc_tx(int type, int size)
			
 
				+{
			
 
				+	ksock_tx_t *tx = NULL;
			
 
				+
			
 
				+	if (type == KSOCK_MSG_NOOP) {
			
 
				+		LASSERT(size == KSOCK_NOOP_TX_SIZE);
			
 
				+
			
 
				+		/* searching for a noop tx in free list */
			
 
				+		spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				+
			
 
				+		if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
			
 
				+			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
			
 
				+					    next, ksock_tx_t, tx_list);
			
 
				+			LASSERT(tx->tx_desc_size == size);
			
 
				+			list_del(&tx->tx_list);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				+	}
			
 
				+
			
 
				+	if (tx == NULL)
			
 
				+		LIBCFS_ALLOC(tx, size);
			
 
				+
			
 
				+	if (tx == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	atomic_set(&tx->tx_refcount, 1);
			
 
				+	tx->tx_zc_aborted = 0;
			
 
				+	tx->tx_zc_capable = 0;
			
 
				+	tx->tx_zc_checked = 0;
			
 
				+	tx->tx_desc_size  = size;
			
 
				+
			
 
				+	atomic_inc(&ksocknal_data.ksnd_nactive_txs);
			
 
				+
			
 
				+	return tx;
			
 
				+}
			
 
				+
			
 
				+ksock_tx_t *
			
 
				+ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
			
 
				+{
			
 
				+	ksock_tx_t *tx;
			
 
				+
			
 
				+	tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
			
 
				+	if (tx == NULL) {
			
 
				+		CERROR("Can't allocate noop tx desc\n");
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_conn     = NULL;
			
 
				+	tx->tx_lnetmsg  = NULL;
			
 
				+	tx->tx_kiov     = NULL;
			
 
				+	tx->tx_nkiov    = 0;
			
 
				+	tx->tx_iov      = tx->tx_frags.virt.iov;
			
 
				+	tx->tx_niov     = 1;
			
 
				+	tx->tx_nonblk   = nonblk;
			
 
				+
			
 
				+	socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP);
			
 
				+	tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				+
			
 
				+	return tx;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void
			
 
				+ksocknal_free_tx (ksock_tx_t *tx)
			
 
				+{
			
 
				+	atomic_dec(&ksocknal_data.ksnd_nactive_txs);
			
 
				+
			
 
				+	if (tx->tx_lnetmsg == NULL && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
			
 
				+		/* it's a noop tx */
			
 
				+		spin_lock(&ksocknal_data.ksnd_tx_lock);
			
 
				+
			
 
				+		list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
			
 
				+
			
 
				+		spin_unlock(&ksocknal_data.ksnd_tx_lock);
			
 
				+	} else {
			
 
				+		LIBCFS_FREE(tx, tx->tx_desc_size);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	struct iovec  *iov = tx->tx_iov;
			
 
				+	int    nob;
			
 
				+	int    rc;
			
 
				+
			
 
				+	LASSERT (tx->tx_niov > 0);
			
 
				+
			
 
				+	/* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
			
 
				+	rc = ksocknal_lib_send_iov(conn, tx);
			
 
				+
			
 
				+	if (rc <= 0)			    /* sent nothing? */
			
 
				+		return (rc);
			
 
				+
			
 
				+	nob = rc;
			
 
				+	LASSERT (nob <= tx->tx_resid);
			
 
				+	tx->tx_resid -= nob;
			
 
				+
			
 
				+	/* "consume" iov */
			
 
				+	do {
			
 
				+		LASSERT (tx->tx_niov > 0);
			
 
				+
			
 
				+		if (nob < (int) iov->iov_len) {
			
 
				+			iov->iov_base = (void *)((char *)iov->iov_base + nob);
			
 
				+			iov->iov_len -= nob;
			
 
				+			return (rc);
			
 
				+		}
			
 
				+
			
 
				+		nob -= iov->iov_len;
			
 
				+		tx->tx_iov = ++iov;
			
 
				+		tx->tx_niov--;
			
 
				+	} while (nob != 0);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	lnet_kiov_t    *kiov = tx->tx_kiov;
			
 
				+	int     nob;
			
 
				+	int     rc;
			
 
				+
			
 
				+	LASSERT (tx->tx_niov == 0);
			
 
				+	LASSERT (tx->tx_nkiov > 0);
			
 
				+
			
 
				+	/* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
			
 
				+	rc = ksocknal_lib_send_kiov(conn, tx);
			
 
				+
			
 
				+	if (rc <= 0)			    /* sent nothing? */
			
 
				+		return (rc);
			
 
				+
			
 
				+	nob = rc;
			
 
				+	LASSERT (nob <= tx->tx_resid);
			
 
				+	tx->tx_resid -= nob;
			
 
				+
			
 
				+	/* "consume" kiov */
			
 
				+	do {
			
 
				+		LASSERT(tx->tx_nkiov > 0);
			
 
				+
			
 
				+		if (nob < (int)kiov->kiov_len) {
			
 
				+			kiov->kiov_offset += nob;
			
 
				+			kiov->kiov_len -= nob;
			
 
				+			return rc;
			
 
				+		}
			
 
				+
			
 
				+		nob -= (int)kiov->kiov_len;
			
 
				+		tx->tx_kiov = ++kiov;
			
 
				+		tx->tx_nkiov--;
			
 
				+	} while (nob != 0);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	int      rc;
			
 
				+	int      bufnob;
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_stall_tx != 0) {
			
 
				+		cfs_pause(cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (tx->tx_resid != 0);
			
 
				+
			
 
				+	rc = ksocknal_connsock_addref(conn);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (conn->ksnc_closing);
			
 
				+		return (-ESHUTDOWN);
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		if (ksocknal_data.ksnd_enomem_tx > 0) {
			
 
				+			/* testing... */
			
 
				+			ksocknal_data.ksnd_enomem_tx--;
			
 
				+			rc = -EAGAIN;
			
 
				+		} else if (tx->tx_niov != 0) {
			
 
				+			rc = ksocknal_send_iov (conn, tx);
			
 
				+		} else {
			
 
				+			rc = ksocknal_send_kiov (conn, tx);
			
 
				+		}
			
 
				+
			
 
				+		bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
			
 
				+		if (rc > 0)		     /* sent something? */
			
 
				+			conn->ksnc_tx_bufnob += rc; /* account it */
			
 
				+
			
 
				+		if (bufnob < conn->ksnc_tx_bufnob) {
			
 
				+			/* allocated send buffer bytes < computed; infer
			
 
				+			 * something got ACKed */
			
 
				+			conn->ksnc_tx_deadline =
			
 
				+				cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+			conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
			
 
				+			conn->ksnc_tx_bufnob = bufnob;
			
 
				+			mb();
			
 
				+		}
			
 
				+
			
 
				+		if (rc <= 0) { /* Didn't write anything? */
			
 
				+
			
 
				+			if (rc == 0) /* some stacks return 0 instead of -EAGAIN */
			
 
				+				rc = -EAGAIN;
			
 
				+
			
 
				+			/* Check if EAGAIN is due to memory pressure */
			
 
				+			if(rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
			
 
				+				rc = -ENOMEM;
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* socket's wmem_queued now includes 'rc' bytes */
			
 
				+		atomic_sub (rc, &conn->ksnc_tx_nob);
			
 
				+		rc = 0;
			
 
				+
			
 
				+	} while (tx->tx_resid != 0);
			
 
				+
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_recv_iov (ksock_conn_t *conn)
			
 
				+{
			
 
				+	struct iovec *iov = conn->ksnc_rx_iov;
			
 
				+	int     nob;
			
 
				+	int     rc;
			
 
				+
			
 
				+	LASSERT (conn->ksnc_rx_niov > 0);
			
 
				+
			
 
				+	/* Never touch conn->ksnc_rx_iov or change connection
			
 
				+	 * status inside ksocknal_lib_recv_iov */
			
 
				+	rc = ksocknal_lib_recv_iov(conn);
			
 
				+
			
 
				+	if (rc <= 0)
			
 
				+		return (rc);
			
 
				+
			
 
				+	/* received something... */
			
 
				+	nob = rc;
			
 
				+
			
 
				+	conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
			
 
				+	conn->ksnc_rx_deadline =
			
 
				+		cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+	mb();		       /* order with setting rx_started */
			
 
				+	conn->ksnc_rx_started = 1;
			
 
				+
			
 
				+	conn->ksnc_rx_nob_wanted -= nob;
			
 
				+	conn->ksnc_rx_nob_left -= nob;
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (conn->ksnc_rx_niov > 0);
			
 
				+
			
 
				+		if (nob < (int)iov->iov_len) {
			
 
				+			iov->iov_len -= nob;
			
 
				+			iov->iov_base = (void *)((char *)iov->iov_base + nob);
			
 
				+			return (-EAGAIN);
			
 
				+		}
			
 
				+
			
 
				+		nob -= iov->iov_len;
			
 
				+		conn->ksnc_rx_iov = ++iov;
			
 
				+		conn->ksnc_rx_niov--;
			
 
				+	} while (nob != 0);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_recv_kiov (ksock_conn_t *conn)
			
 
				+{
			
 
				+	lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
			
 
				+	int     nob;
			
 
				+	int     rc;
			
 
				+	LASSERT (conn->ksnc_rx_nkiov > 0);
			
 
				+
			
 
				+	/* Never touch conn->ksnc_rx_kiov or change connection
			
 
				+	 * status inside ksocknal_lib_recv_iov */
			
 
				+	rc = ksocknal_lib_recv_kiov(conn);
			
 
				+
			
 
				+	if (rc <= 0)
			
 
				+		return (rc);
			
 
				+
			
 
				+	/* received something... */
			
 
				+	nob = rc;
			
 
				+
			
 
				+	conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
			
 
				+	conn->ksnc_rx_deadline =
			
 
				+		cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+	mb();		       /* order with setting rx_started */
			
 
				+	conn->ksnc_rx_started = 1;
			
 
				+
			
 
				+	conn->ksnc_rx_nob_wanted -= nob;
			
 
				+	conn->ksnc_rx_nob_left -= nob;
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (conn->ksnc_rx_nkiov > 0);
			
 
				+
			
 
				+		if (nob < (int) kiov->kiov_len) {
			
 
				+			kiov->kiov_offset += nob;
			
 
				+			kiov->kiov_len -= nob;
			
 
				+			return -EAGAIN;
			
 
				+		}
			
 
				+
			
 
				+		nob -= kiov->kiov_len;
			
 
				+		conn->ksnc_rx_kiov = ++kiov;
			
 
				+		conn->ksnc_rx_nkiov--;
			
 
				+	} while (nob != 0);
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_receive (ksock_conn_t *conn)
			
 
				+{
			
 
				+	/* Return 1 on success, 0 on EOF, < 0 on error.
			
 
				+	 * Caller checks ksnc_rx_nob_wanted to determine
			
 
				+	 * progress/completion. */
			
 
				+	int     rc;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_stall_rx != 0) {
			
 
				+		cfs_pause(cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
			
 
				+	}
			
 
				+
			
 
				+	rc = ksocknal_connsock_addref(conn);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (conn->ksnc_closing);
			
 
				+		return (-ESHUTDOWN);
			
 
				+	}
			
 
				+
			
 
				+	for (;;) {
			
 
				+		if (conn->ksnc_rx_niov != 0)
			
 
				+			rc = ksocknal_recv_iov (conn);
			
 
				+		else
			
 
				+			rc = ksocknal_recv_kiov (conn);
			
 
				+
			
 
				+		if (rc <= 0) {
			
 
				+			/* error/EOF or partial receive */
			
 
				+			if (rc == -EAGAIN) {
			
 
				+				rc = 1;
			
 
				+			} else if (rc == 0 && conn->ksnc_rx_started) {
			
 
				+				/* EOF in the middle of a message */
			
 
				+				rc = -EPROTO;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* Completed a fragment */
			
 
				+
			
 
				+		if (conn->ksnc_rx_nob_wanted == 0) {
			
 
				+			rc = 1;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+	RETURN (rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx)
			
 
				+{
			
 
				+	lnet_msg_t  *lnetmsg = tx->tx_lnetmsg;
			
 
				+	int	  rc = (tx->tx_resid == 0 && !tx->tx_zc_aborted) ? 0 : -EIO;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	LASSERT(ni != NULL || tx->tx_conn != NULL);
			
 
				+
			
 
				+	if (tx->tx_conn != NULL)
			
 
				+		ksocknal_conn_decref(tx->tx_conn);
			
 
				+
			
 
				+	if (ni == NULL && tx->tx_conn != NULL)
			
 
				+		ni = tx->tx_conn->ksnc_peer->ksnp_ni;
			
 
				+
			
 
				+	ksocknal_free_tx (tx);
			
 
				+	if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */
			
 
				+		lnet_finalize (ni, lnetmsg, rc);
			
 
				+
			
 
				+	EXIT;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error)
			
 
				+{
			
 
				+	ksock_tx_t *tx;
			
 
				+
			
 
				+	while (!list_empty (txlist)) {
			
 
				+		tx = list_entry (txlist->next, ksock_tx_t, tx_list);
			
 
				+
			
 
				+		if (error && tx->tx_lnetmsg != NULL) {
			
 
				+			CNETERR("Deleting packet type %d len %d %s->%s\n",
			
 
				+				le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type),
			
 
				+				le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length),
			
 
				+				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
			
 
				+				libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
			
 
				+		} else if (error) {
			
 
				+			CNETERR("Deleting noop packet\n");
			
 
				+		}
			
 
				+
			
 
				+		list_del (&tx->tx_list);
			
 
				+
			
 
				+		LASSERT (atomic_read(&tx->tx_refcount) == 1);
			
 
				+		ksocknal_tx_done (ni, tx);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_check_zc_req(ksock_tx_t *tx)
			
 
				+{
			
 
				+	ksock_conn_t   *conn = tx->tx_conn;
			
 
				+	ksock_peer_t   *peer = conn->ksnc_peer;
			
 
				+
			
 
				+	/* Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
			
 
				+	 * to ksnp_zc_req_list if some fragment of this message should be sent
			
 
				+	 * zero-copy.  Our peer will send an ACK containing this cookie when
			
 
				+	 * she has received this message to tell us we can signal completion.
			
 
				+	 * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
			
 
				+	 * ksnp_zc_req_list. */
			
 
				+	LASSERT (tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				+	LASSERT (tx->tx_zc_capable);
			
 
				+
			
 
				+	tx->tx_zc_checked = 1;
			
 
				+
			
 
				+	if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
			
 
				+	    !conn->ksnc_zc_capable)
			
 
				+		return;
			
 
				+
			
 
				+	/* assign cookie and queue tx to pending list, it will be released when
			
 
				+	 * a matching ack is received. See ksocknal_handle_zcack() */
			
 
				+
			
 
				+	ksocknal_tx_addref(tx);
			
 
				+
			
 
				+	spin_lock(&peer->ksnp_lock);
			
 
				+
			
 
				+	/* ZC_REQ is going to be pinned to the peer */
			
 
				+	tx->tx_deadline =
			
 
				+		cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+
			
 
				+	LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
			
 
				+
			
 
				+	tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
			
 
				+
			
 
				+	if (peer->ksnp_zc_next_cookie == 0)
			
 
				+		peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
			
 
				+
			
 
				+	list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
			
 
				+
			
 
				+	spin_unlock(&peer->ksnp_lock);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_uncheck_zc_req(ksock_tx_t *tx)
			
 
				+{
			
 
				+	ksock_peer_t   *peer = tx->tx_conn->ksnc_peer;
			
 
				+
			
 
				+	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				+	LASSERT(tx->tx_zc_capable);
			
 
				+
			
 
				+	tx->tx_zc_checked = 0;
			
 
				+
			
 
				+	spin_lock(&peer->ksnp_lock);
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
			
 
				+		/* Not waiting for an ACK */
			
 
				+		spin_unlock(&peer->ksnp_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				+	list_del(&tx->tx_zc_list);
			
 
				+
			
 
				+	spin_unlock(&peer->ksnp_lock);
			
 
				+
			
 
				+	ksocknal_tx_decref(tx);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	int	    rc;
			
 
				+
			
 
				+	if (tx->tx_zc_capable && !tx->tx_zc_checked)
			
 
				+		ksocknal_check_zc_req(tx);
			
 
				+
			
 
				+	rc = ksocknal_transmit (conn, tx);
			
 
				+
			
 
				+	CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
			
 
				+
			
 
				+	if (tx->tx_resid == 0) {
			
 
				+		/* Sent everything OK */
			
 
				+		LASSERT (rc == 0);
			
 
				+
			
 
				+		return (0);
			
 
				+	}
			
 
				+
			
 
				+	if (rc == -EAGAIN)
			
 
				+		return (rc);
			
 
				+
			
 
				+	if (rc == -ENOMEM) {
			
 
				+		static int counter;
			
 
				+
			
 
				+		counter++;   /* exponential backoff warnings */
			
 
				+		if ((counter & (-counter)) == counter)
			
 
				+			CWARN("%u ENOMEM tx %p (%u allocated)\n",
			
 
				+			      counter, conn, atomic_read(&libcfs_kmemory));
			
 
				+
			
 
				+		/* Queue on ksnd_enomem_conns for retry after a timeout */
			
 
				+		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+		/* enomem list takes over scheduler's ref... */
			
 
				+		LASSERT (conn->ksnc_tx_scheduled);
			
 
				+		list_add_tail(&conn->ksnc_tx_list,
			
 
				+				  &ksocknal_data.ksnd_enomem_conns);
			
 
				+		if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
			
 
				+						   SOCKNAL_ENOMEM_RETRY),
			
 
				+				   ksocknal_data.ksnd_reaper_waketime))
			
 
				+			wake_up (&ksocknal_data.ksnd_reaper_waitq);
			
 
				+
			
 
				+		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	/* Actual error */
			
 
				+	LASSERT (rc < 0);
			
 
				+
			
 
				+	if (!conn->ksnc_closing) {
			
 
				+		switch (rc) {
			
 
				+		case -ECONNRESET:
			
 
				+			LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection "
			
 
				+				      "while we were sending data; it may have "
			
 
				+				      "rebooted.\n",
			
 
				+				      HIPQUAD(conn->ksnc_ipaddr));
			
 
				+			break;
			
 
				+		default:
			
 
				+			LCONSOLE_WARN("There was an unexpected network error "
			
 
				+				      "while writing to %u.%u.%u.%u: %d.\n",
			
 
				+				      HIPQUAD(conn->ksnc_ipaddr), rc);
			
 
				+			break;
			
 
				+		}
			
 
				+		CDEBUG(D_NET, "[%p] Error %d on write to %s"
			
 
				+		       " ip %d.%d.%d.%d:%d\n", conn, rc,
			
 
				+		       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+		       HIPQUAD(conn->ksnc_ipaddr),
			
 
				+		       conn->ksnc_port);
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_zc_checked)
			
 
				+		ksocknal_uncheck_zc_req(tx);
			
 
				+
			
 
				+	/* it's not an error if conn is being closed */
			
 
				+	ksocknal_close_conn_and_siblings (conn,
			
 
				+					  (conn->ksnc_closing) ? 0 : rc);
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_launch_connection_locked (ksock_route_t *route)
			
 
				+{
			
 
				+
			
 
				+	/* called holding write lock on ksnd_global_lock */
			
 
				+
			
 
				+	LASSERT (!route->ksnr_scheduled);
			
 
				+	LASSERT (!route->ksnr_connecting);
			
 
				+	LASSERT ((ksocknal_route_mask() & ~route->ksnr_connected) != 0);
			
 
				+
			
 
				+	route->ksnr_scheduled = 1;	      /* scheduling conn for connd */
			
 
				+	ksocknal_route_addref(route);	   /* extra ref for connd */
			
 
				+
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+
			
 
				+	list_add_tail(&route->ksnr_connd_list,
			
 
				+			  &ksocknal_data.ksnd_connd_routes);
			
 
				+	wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				+
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_launch_all_connections_locked (ksock_peer_t *peer)
			
 
				+{
			
 
				+	ksock_route_t *route;
			
 
				+
			
 
				+	/* called holding write lock on ksnd_global_lock */
			
 
				+	for (;;) {
			
 
				+		/* launch any/all connections that need it */
			
 
				+		route = ksocknal_find_connectable_route_locked(peer);
			
 
				+		if (route == NULL)
			
 
				+			return;
			
 
				+
			
 
				+		ksocknal_launch_connection_locked(route);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+ksock_conn_t *
			
 
				+ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
			
 
				+{
			
 
				+	struct list_head       *tmp;
			
 
				+	ksock_conn_t     *conn;
			
 
				+	ksock_conn_t     *typed = NULL;
			
 
				+	ksock_conn_t     *fallback = NULL;
			
 
				+	int	       tnob     = 0;
			
 
				+	int	       fnob     = 0;
			
 
				+
			
 
				+	list_for_each (tmp, &peer->ksnp_conns) {
			
 
				+		ksock_conn_t *c  = list_entry(tmp, ksock_conn_t, ksnc_list);
			
 
				+		int	   nob = atomic_read(&c->ksnc_tx_nob) +
			
 
				+				    cfs_sock_wmem_queued(c->ksnc_sock);
			
 
				+		int	   rc;
			
 
				+
			
 
				+		LASSERT (!c->ksnc_closing);
			
 
				+		LASSERT (c->ksnc_proto != NULL &&
			
 
				+			 c->ksnc_proto->pro_match_tx != NULL);
			
 
				+
			
 
				+		rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
			
 
				+
			
 
				+		switch (rc) {
			
 
				+		default:
			
 
				+			LBUG();
			
 
				+		case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
			
 
				+			continue;
			
 
				+
			
 
				+		case SOCKNAL_MATCH_YES: /* typed connection */
			
 
				+			if (typed == NULL || tnob > nob ||
			
 
				+			    (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
			
 
				+			     cfs_time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
			
 
				+				typed = c;
			
 
				+				tnob  = nob;
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				+		case SOCKNAL_MATCH_MAY: /* fallback connection */
			
 
				+			if (fallback == NULL || fnob > nob ||
			
 
				+			    (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
			
 
				+			     cfs_time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
			
 
				+				fallback = c;
			
 
				+				fnob     = nob;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* prefer the typed selection */
			
 
				+	conn = (typed != NULL) ? typed : fallback;
			
 
				+
			
 
				+	if (conn != NULL)
			
 
				+		conn->ksnc_tx_last_post = cfs_time_current();
			
 
				+
			
 
				+	return conn;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_tx_prep(ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	conn->ksnc_proto->pro_pack(tx);
			
 
				+
			
 
				+	atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
			
 
				+	ksocknal_conn_addref(conn); /* +1 ref for tx */
			
 
				+	tx->tx_conn = conn;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_sched_t *sched = conn->ksnc_scheduler;
			
 
				+	ksock_msg_t   *msg = &tx->tx_msg;
			
 
				+	ksock_tx_t    *ztx = NULL;
			
 
				+	int	    bufnob = 0;
			
 
				+
			
 
				+	/* called holding global lock (read or irq-write) and caller may
			
 
				+	 * not have dropped this lock between finding conn and calling me,
			
 
				+	 * so we don't need the {get,put}connsock dance to deref
			
 
				+	 * ksnc_sock... */
			
 
				+	LASSERT(!conn->ksnc_closing);
			
 
				+
			
 
				+	CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n",
			
 
				+		libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+		HIPQUAD(conn->ksnc_ipaddr),
			
 
				+		conn->ksnc_port);
			
 
				+
			
 
				+	ksocknal_tx_prep(conn, tx);
			
 
				+
			
 
				+	/* Ensure the frags we've been given EXACTLY match the number of
			
 
				+	 * bytes we want to send.  Many TCP/IP stacks disregard any total
			
 
				+	 * size parameters passed to them and just look at the frags.
			
 
				+	 *
			
 
				+	 * We always expect at least 1 mapped fragment containing the
			
 
				+	 * complete ksocknal message header. */
			
 
				+	LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) +
			
 
				+		 lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
			
 
				+		 (unsigned int)tx->tx_nob);
			
 
				+	LASSERT (tx->tx_niov >= 1);
			
 
				+	LASSERT (tx->tx_resid == tx->tx_nob);
			
 
				+
			
 
				+	CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
			
 
				+		tx, (tx->tx_lnetmsg != NULL) ? tx->tx_lnetmsg->msg_hdr.type:
			
 
				+					       KSOCK_MSG_NOOP,
			
 
				+		tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
			
 
				+
			
 
				+	/*
			
 
				+	 * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
			
 
				+	 * but they're used inside spinlocks a lot.
			
 
				+	 */
			
 
				+	bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) {
			
 
				+		/* First packet starts the timeout */
			
 
				+		conn->ksnc_tx_deadline =
			
 
				+			cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+		if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
			
 
				+			conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
			
 
				+		conn->ksnc_tx_bufnob = 0;
			
 
				+		mb(); /* order with adding to tx_queue */
			
 
				+	}
			
 
				+
			
 
				+	if (msg->ksm_type == KSOCK_MSG_NOOP) {
			
 
				+		/* The packet is noop ZC ACK, try to piggyback the ack_cookie
			
 
				+		 * on a normal packet so I don't need to send it */
			
 
				+		LASSERT (msg->ksm_zc_cookies[1] != 0);
			
 
				+		LASSERT (conn->ksnc_proto->pro_queue_tx_zcack != NULL);
			
 
				+
			
 
				+		if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
			
 
				+			ztx = tx; /* ZC ACK piggybacked on ztx release tx later */
			
 
				+
			
 
				+	} else {
			
 
				+		/* It's a normal packet - can it piggback a noop zc-ack that
			
 
				+		 * has been queued already? */
			
 
				+		LASSERT (msg->ksm_zc_cookies[1] == 0);
			
 
				+		LASSERT (conn->ksnc_proto->pro_queue_tx_msg != NULL);
			
 
				+
			
 
				+		ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
			
 
				+		/* ztx will be released later */
			
 
				+	}
			
 
				+
			
 
				+	if (ztx != NULL) {
			
 
				+		atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob);
			
 
				+		list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
			
 
				+	}
			
 
				+
			
 
				+	if (conn->ksnc_tx_ready &&      /* able to send */
			
 
				+	    !conn->ksnc_tx_scheduled) { /* not scheduled to send */
			
 
				+		/* +1 ref for scheduler */
			
 
				+		ksocknal_conn_addref(conn);
			
 
				+		list_add_tail (&conn->ksnc_tx_list,
			
 
				+				   &sched->kss_tx_conns);
			
 
				+		conn->ksnc_tx_scheduled = 1;
			
 
				+		wake_up (&sched->kss_waitq);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+ksock_route_t *
			
 
				+ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
			
 
				+{
			
 
				+	cfs_time_t     now = cfs_time_current();
			
 
				+	struct list_head    *tmp;
			
 
				+	ksock_route_t *route;
			
 
				+
			
 
				+	list_for_each (tmp, &peer->ksnp_routes) {
			
 
				+		route = list_entry (tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
			
 
				+
			
 
				+		if (route->ksnr_scheduled)      /* connections being established */
			
 
				+			continue;
			
 
				+
			
 
				+		/* all route types connected ? */
			
 
				+		if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		if (!(route->ksnr_retry_interval == 0 || /* first attempt */
			
 
				+		      cfs_time_aftereq(now, route->ksnr_timeout))) {
			
 
				+			CDEBUG(D_NET,
			
 
				+			       "Too soon to retry route %u.%u.%u.%u "
			
 
				+			       "(cnted %d, interval %ld, %ld secs later)\n",
			
 
				+			       HIPQUAD(route->ksnr_ipaddr),
			
 
				+			       route->ksnr_connected,
			
 
				+			       route->ksnr_retry_interval,
			
 
				+			       cfs_duration_sec(route->ksnr_timeout - now));
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		return (route);
			
 
				+	}
			
 
				+
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+ksock_route_t *
			
 
				+ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	ksock_route_t     *route;
			
 
				+
			
 
				+	list_for_each (tmp, &peer->ksnp_routes) {
			
 
				+		route = list_entry (tmp, ksock_route_t, ksnr_list);
			
 
				+
			
 
				+		LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
			
 
				+
			
 
				+		if (route->ksnr_scheduled)
			
 
				+			return (route);
			
 
				+	}
			
 
				+
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
			
 
				+{
			
 
				+	ksock_peer_t     *peer;
			
 
				+	ksock_conn_t     *conn;
			
 
				+	rwlock_t     *g_lock;
			
 
				+	int	       retry;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	LASSERT (tx->tx_conn == NULL);
			
 
				+
			
 
				+	g_lock = &ksocknal_data.ksnd_global_lock;
			
 
				+
			
 
				+	for (retry = 0;; retry = 1) {
			
 
				+		read_lock(g_lock);
			
 
				+		peer = ksocknal_find_peer_locked(ni, id);
			
 
				+		if (peer != NULL) {
			
 
				+			if (ksocknal_find_connectable_route_locked(peer) == NULL) {
			
 
				+				conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
			
 
				+				if (conn != NULL) {
			
 
				+					/* I've got no routes that need to be
			
 
				+					 * connecting and I do have an actual
			
 
				+					 * connection... */
			
 
				+					ksocknal_queue_tx_locked (tx, conn);
			
 
				+					read_unlock(g_lock);
			
 
				+					return (0);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* I'll need a write lock... */
			
 
				+		read_unlock(g_lock);
			
 
				+
			
 
				+		write_lock_bh(g_lock);
			
 
				+
			
 
				+		peer = ksocknal_find_peer_locked(ni, id);
			
 
				+		if (peer != NULL)
			
 
				+			break;
			
 
				+
			
 
				+		write_unlock_bh(g_lock);
			
 
				+
			
 
				+		if ((id.pid & LNET_PID_USERFLAG) != 0) {
			
 
				+			CERROR("Refusing to create a connection to "
			
 
				+			       "userspace process %s\n", libcfs_id2str(id));
			
 
				+			return -EHOSTUNREACH;
			
 
				+		}
			
 
				+
			
 
				+		if (retry) {
			
 
				+			CERROR("Can't find peer %s\n", libcfs_id2str(id));
			
 
				+			return -EHOSTUNREACH;
			
 
				+		}
			
 
				+
			
 
				+		rc = ksocknal_add_peer(ni, id,
			
 
				+				       LNET_NIDADDR(id.nid),
			
 
				+				       lnet_acceptor_port());
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't add peer %s: %d\n",
			
 
				+			       libcfs_id2str(id), rc);
			
 
				+			return rc;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_launch_all_connections_locked(peer);
			
 
				+
			
 
				+	conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
			
 
				+	if (conn != NULL) {
			
 
				+		/* Connection exists; queue message on it */
			
 
				+		ksocknal_queue_tx_locked (tx, conn);
			
 
				+		write_unlock_bh(g_lock);
			
 
				+		return (0);
			
 
				+	}
			
 
				+
			
 
				+	if (peer->ksnp_accepting > 0 ||
			
 
				+	    ksocknal_find_connecting_route_locked (peer) != NULL) {
			
 
				+		/* the message is going to be pinned to the peer */
			
 
				+		tx->tx_deadline =
			
 
				+			cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
			
 
				+
			
 
				+		/* Queue the message until a connection is established */
			
 
				+		list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
			
 
				+		write_unlock_bh(g_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(g_lock);
			
 
				+
			
 
				+	/* NB Routes may be ignored if connections to them failed recently */
			
 
				+	CNETERR("No usable routes to %s\n", libcfs_id2str(id));
			
 
				+	return (-EHOSTUNREACH);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
			
 
				+{
			
 
				+	int	       mpflag = 0;
			
 
				+	int	       type = lntmsg->msg_type;
			
 
				+	lnet_process_id_t target = lntmsg->msg_target;
			
 
				+	unsigned int      payload_niov = lntmsg->msg_niov;
			
 
				+	struct iovec     *payload_iov = lntmsg->msg_iov;
			
 
				+	lnet_kiov_t      *payload_kiov = lntmsg->msg_kiov;
			
 
				+	unsigned int      payload_offset = lntmsg->msg_offset;
			
 
				+	unsigned int      payload_nob = lntmsg->msg_len;
			
 
				+	ksock_tx_t       *tx;
			
 
				+	int	       desc_size;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	/* NB 'private' is different depending on what we're sending.
			
 
				+	 * Just ignore it... */
			
 
				+
			
 
				+	CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
			
 
				+	       payload_nob, payload_niov, libcfs_id2str(target));
			
 
				+
			
 
				+	LASSERT (payload_nob == 0 || payload_niov > 0);
			
 
				+	LASSERT (payload_niov <= LNET_MAX_IOV);
			
 
				+	/* payload is either all vaddrs or all pages */
			
 
				+	LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	if (payload_iov != NULL)
			
 
				+		desc_size = offsetof(ksock_tx_t,
			
 
				+				     tx_frags.virt.iov[1 + payload_niov]);
			
 
				+	else
			
 
				+		desc_size = offsetof(ksock_tx_t,
			
 
				+				     tx_frags.paged.kiov[payload_niov]);
			
 
				+
			
 
				+	if (lntmsg->msg_vmflush)
			
 
				+		mpflag = cfs_memory_pressure_get_and_set();
			
 
				+	tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
			
 
				+	if (tx == NULL) {
			
 
				+		CERROR("Can't allocate tx desc type %d size %d\n",
			
 
				+		       type, desc_size);
			
 
				+		if (lntmsg->msg_vmflush)
			
 
				+			cfs_memory_pressure_restore(mpflag);
			
 
				+		return (-ENOMEM);
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_conn = NULL;		     /* set when assigned a conn */
			
 
				+	tx->tx_lnetmsg = lntmsg;
			
 
				+
			
 
				+	if (payload_iov != NULL) {
			
 
				+		tx->tx_kiov = NULL;
			
 
				+		tx->tx_nkiov = 0;
			
 
				+		tx->tx_iov = tx->tx_frags.virt.iov;
			
 
				+		tx->tx_niov = 1 +
			
 
				+			      lnet_extract_iov(payload_niov, &tx->tx_iov[1],
			
 
				+					       payload_niov, payload_iov,
			
 
				+					       payload_offset, payload_nob);
			
 
				+	} else {
			
 
				+		tx->tx_niov = 1;
			
 
				+		tx->tx_iov = &tx->tx_frags.paged.iov;
			
 
				+		tx->tx_kiov = tx->tx_frags.paged.kiov;
			
 
				+		tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
			
 
				+						 payload_niov, payload_kiov,
			
 
				+						 payload_offset, payload_nob);
			
 
				+
			
 
				+		if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
			
 
				+			tx->tx_zc_capable = 1;
			
 
				+	}
			
 
				+
			
 
				+	socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_LNET);
			
 
				+
			
 
				+	/* The first fragment will be set later in pro_pack */
			
 
				+	rc = ksocknal_launch_packet(ni, tx, target);
			
 
				+	if (lntmsg->msg_vmflush)
			
 
				+		cfs_memory_pressure_restore(mpflag);
			
 
				+	if (rc == 0)
			
 
				+		return (0);
			
 
				+
			
 
				+	ksocknal_free_tx(tx);
			
 
				+	return (-EIO);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
			
 
				+{
			
 
				+	task_t *task = kthread_run(fn, arg, name);
			
 
				+
			
 
				+	if (IS_ERR(task))
			
 
				+		return PTR_ERR(task);
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+	ksocknal_data.ksnd_nthreads++;
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_thread_fini (void)
			
 
				+{
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+	ksocknal_data.ksnd_nthreads--;
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
			
 
				+{
			
 
				+	static char ksocknal_slop_buffer[4096];
			
 
				+
			
 
				+	int	    nob;
			
 
				+	unsigned int   niov;
			
 
				+	int	    skipped;
			
 
				+
			
 
				+	LASSERT(conn->ksnc_proto != NULL);
			
 
				+
			
 
				+	if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) {
			
 
				+		/* Remind the socket to ack eagerly... */
			
 
				+		ksocknal_lib_eager_ack(conn);
			
 
				+	}
			
 
				+
			
 
				+	if (nob_to_skip == 0) {	 /* right at next packet boundary now */
			
 
				+		conn->ksnc_rx_started = 0;
			
 
				+		mb();		       /* racing with timeout thread */
			
 
				+
			
 
				+		switch (conn->ksnc_proto->pro_version) {
			
 
				+		case  KSOCK_PROTO_V2:
			
 
				+		case  KSOCK_PROTO_V3:
			
 
				+			conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
			
 
				+			conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
			
 
				+			conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg;
			
 
				+
			
 
				+			conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u);
			
 
				+			conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u);
			
 
				+			conn->ksnc_rx_iov[0].iov_len  = offsetof(ksock_msg_t, ksm_u);
			
 
				+			break;
			
 
				+
			
 
				+		case KSOCK_PROTO_V1:
			
 
				+			/* Receiving bare lnet_hdr_t */
			
 
				+			conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
			
 
				+			conn->ksnc_rx_nob_wanted = sizeof(lnet_hdr_t);
			
 
				+			conn->ksnc_rx_nob_left = sizeof(lnet_hdr_t);
			
 
				+
			
 
				+			conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
			
 
				+			conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
			
 
				+			conn->ksnc_rx_iov[0].iov_len  = sizeof (lnet_hdr_t);
			
 
				+			break;
			
 
				+
			
 
				+		default:
			
 
				+			LBUG ();
			
 
				+		}
			
 
				+		conn->ksnc_rx_niov = 1;
			
 
				+
			
 
				+		conn->ksnc_rx_kiov = NULL;
			
 
				+		conn->ksnc_rx_nkiov = 0;
			
 
				+		conn->ksnc_rx_csum = ~0;
			
 
				+		return (1);
			
 
				+	}
			
 
				+
			
 
				+	/* Set up to skip as much as possible now.  If there's more left
			
 
				+	 * (ran out of iov entries) we'll get called again */
			
 
				+
			
 
				+	conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
			
 
				+	conn->ksnc_rx_nob_left = nob_to_skip;
			
 
				+	conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
			
 
				+	skipped = 0;
			
 
				+	niov = 0;
			
 
				+
			
 
				+	do {
			
 
				+		nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
			
 
				+
			
 
				+		conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
			
 
				+		conn->ksnc_rx_iov[niov].iov_len  = nob;
			
 
				+		niov++;
			
 
				+		skipped += nob;
			
 
				+		nob_to_skip -=nob;
			
 
				+
			
 
				+	} while (nob_to_skip != 0 &&    /* mustn't overflow conn's rx iov */
			
 
				+		 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
			
 
				+
			
 
				+	conn->ksnc_rx_niov = niov;
			
 
				+	conn->ksnc_rx_kiov = NULL;
			
 
				+	conn->ksnc_rx_nkiov = 0;
			
 
				+	conn->ksnc_rx_nob_wanted = skipped;
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_process_receive (ksock_conn_t *conn)
			
 
				+{
			
 
				+	lnet_hdr_t	*lhdr;
			
 
				+	lnet_process_id_t *id;
			
 
				+	int		rc;
			
 
				+
			
 
				+	LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
			
 
				+
			
 
				+	/* NB: sched lock NOT held */
			
 
				+	/* SOCKNAL_RX_LNET_HEADER is here for backward compatability */
			
 
				+	LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
			
 
				+		 conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
			
 
				+		 conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
			
 
				+		 conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
			
 
				+ again:
			
 
				+	if (conn->ksnc_rx_nob_wanted != 0) {
			
 
				+		rc = ksocknal_receive(conn);
			
 
				+
			
 
				+		if (rc <= 0) {
			
 
				+			LASSERT (rc != -EAGAIN);
			
 
				+
			
 
				+			if (rc == 0)
			
 
				+				CDEBUG (D_NET, "[%p] EOF from %s"
			
 
				+					" ip %d.%d.%d.%d:%d\n", conn,
			
 
				+					libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+					HIPQUAD(conn->ksnc_ipaddr),
			
 
				+					conn->ksnc_port);
			
 
				+			else if (!conn->ksnc_closing)
			
 
				+				CERROR ("[%p] Error %d on read from %s"
			
 
				+					" ip %d.%d.%d.%d:%d\n",
			
 
				+					conn, rc,
			
 
				+					libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+					HIPQUAD(conn->ksnc_ipaddr),
			
 
				+					conn->ksnc_port);
			
 
				+
			
 
				+			/* it's not an error if conn is being closed */
			
 
				+			ksocknal_close_conn_and_siblings (conn,
			
 
				+							  (conn->ksnc_closing) ? 0 : rc);
			
 
				+			return (rc == 0 ? -ESHUTDOWN : rc);
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_rx_nob_wanted != 0) {
			
 
				+			/* short read */
			
 
				+			return (-EAGAIN);
			
 
				+		}
			
 
				+	}
			
 
				+	switch (conn->ksnc_rx_state) {
			
 
				+	case SOCKNAL_RX_KSM_HEADER:
			
 
				+		if (conn->ksnc_flip) {
			
 
				+			__swab32s(&conn->ksnc_msg.ksm_type);
			
 
				+			__swab32s(&conn->ksnc_msg.ksm_csum);
			
 
				+			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
			
 
				+			__swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
			
 
				+		    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
			
 
				+			CERROR("%s: Unknown message type: %x\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       conn->ksnc_msg.ksm_type);
			
 
				+			ksocknal_new_packet(conn, 0);
			
 
				+			ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				+			return (-EPROTO);
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
			
 
				+		    conn->ksnc_msg.ksm_csum != 0 &&     /* has checksum */
			
 
				+		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
			
 
				+			/* NOOP Checksum error */
			
 
				+			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
			
 
				+			ksocknal_new_packet(conn, 0);
			
 
				+			ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				+			return (-EIO);
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) {
			
 
				+			__u64 cookie = 0;
			
 
				+
			
 
				+			LASSERT (conn->ksnc_proto != &ksocknal_protocol_v1x);
			
 
				+
			
 
				+			if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
			
 
				+				cookie = conn->ksnc_msg.ksm_zc_cookies[0];
			
 
				+
			
 
				+			rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
			
 
				+					       conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				+
			
 
				+			if (rc != 0) {
			
 
				+				CERROR("%s: Unknown ZC-ACK cookie: "LPU64", "LPU64"\n",
			
 
				+				       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+				       cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
			
 
				+				ksocknal_new_packet(conn, 0);
			
 
				+				ksocknal_close_conn_and_siblings(conn, -EPROTO);
			
 
				+				return (rc);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
			
 
				+			ksocknal_new_packet (conn, 0);
			
 
				+			return 0;       /* NOOP is done and just return */
			
 
				+		}
			
 
				+
			
 
				+		conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
			
 
				+		conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
			
 
				+		conn->ksnc_rx_nob_left = sizeof(ksock_lnet_msg_t);
			
 
				+
			
 
				+		conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
			
 
				+		conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
			
 
				+		conn->ksnc_rx_iov[0].iov_len  = sizeof(ksock_lnet_msg_t);
			
 
				+
			
 
				+		conn->ksnc_rx_niov = 1;
			
 
				+		conn->ksnc_rx_kiov = NULL;
			
 
				+		conn->ksnc_rx_nkiov = 0;
			
 
				+
			
 
				+		goto again;     /* read lnet header now */
			
 
				+
			
 
				+	case SOCKNAL_RX_LNET_HEADER:
			
 
				+		/* unpack message header */
			
 
				+		conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
			
 
				+
			
 
				+		if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
			
 
				+			/* Userspace peer */
			
 
				+			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
			
 
				+			id   = &conn->ksnc_peer->ksnp_id;
			
 
				+
			
 
				+			/* Substitute process ID assigned at connection time */
			
 
				+			lhdr->src_pid = cpu_to_le32(id->pid);
			
 
				+			lhdr->src_nid = cpu_to_le64(id->nid);
			
 
				+		}
			
 
				+
			
 
				+		conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
			
 
				+		ksocknal_conn_addref(conn);     /* ++ref while parsing */
			
 
				+
			
 
				+		rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
			
 
				+				&conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
			
 
				+				conn->ksnc_peer->ksnp_id.nid, conn, 0);
			
 
				+		if (rc < 0) {
			
 
				+			/* I just received garbage: give up on this conn */
			
 
				+			ksocknal_new_packet(conn, 0);
			
 
				+			ksocknal_close_conn_and_siblings (conn, rc);
			
 
				+			ksocknal_conn_decref(conn);
			
 
				+			return (-EPROTO);
			
 
				+		}
			
 
				+
			
 
				+		/* I'm racing with ksocknal_recv() */
			
 
				+		LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
			
 
				+			 conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
			
 
				+
			
 
				+		if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
			
 
				+			return 0;
			
 
				+
			
 
				+		/* ksocknal_recv() got called */
			
 
				+		goto again;
			
 
				+
			
 
				+	case SOCKNAL_RX_LNET_PAYLOAD:
			
 
				+		/* payload all received */
			
 
				+		rc = 0;
			
 
				+
			
 
				+		if (conn->ksnc_rx_nob_left == 0 &&   /* not truncating */
			
 
				+		    conn->ksnc_msg.ksm_csum != 0 &&  /* has checksum */
			
 
				+		    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
			
 
				+			CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
			
 
				+			       libcfs_id2str(conn->ksnc_peer->ksnp_id),
			
 
				+			       conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
			
 
				+			rc = -EIO;
			
 
				+		}
			
 
				+
			
 
				+		if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) {
			
 
				+			LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
			
 
				+
			
 
				+			lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
			
 
				+			id   = &conn->ksnc_peer->ksnp_id;
			
 
				+
			
 
				+			rc = conn->ksnc_proto->pro_handle_zcreq(conn,
			
 
				+					conn->ksnc_msg.ksm_zc_cookies[0],
			
 
				+					*ksocknal_tunables.ksnd_nonblk_zcack ||
			
 
				+					le64_to_cpu(lhdr->src_nid) != id->nid);
			
 
				+		}
			
 
				+
			
 
				+		lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
			
 
				+
			
 
				+		if (rc != 0) {
			
 
				+			ksocknal_new_packet(conn, 0);
			
 
				+			ksocknal_close_conn_and_siblings (conn, rc);
			
 
				+			return (-EPROTO);
			
 
				+		}
			
 
				+		/* Fall through */
			
 
				+
			
 
				+	case SOCKNAL_RX_SLOP:
			
 
				+		/* starting new packet? */
			
 
				+		if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
			
 
				+			return 0;       /* come back later */
			
 
				+		goto again;	     /* try to finish reading slop now */
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* Not Reached */
			
 
				+	LBUG ();
			
 
				+	return (-EINVAL);		       /* keep gcc happy */
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
			
 
				+	       unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+	       unsigned int offset, unsigned int mlen, unsigned int rlen)
			
 
				+{
			
 
				+	ksock_conn_t  *conn = (ksock_conn_t *)private;
			
 
				+	ksock_sched_t *sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+	LASSERT (mlen <= rlen);
			
 
				+	LASSERT (niov <= LNET_MAX_IOV);
			
 
				+
			
 
				+	conn->ksnc_cookie = msg;
			
 
				+	conn->ksnc_rx_nob_wanted = mlen;
			
 
				+	conn->ksnc_rx_nob_left   = rlen;
			
 
				+
			
 
				+	if (mlen == 0 || iov != NULL) {
			
 
				+		conn->ksnc_rx_nkiov = 0;
			
 
				+		conn->ksnc_rx_kiov = NULL;
			
 
				+		conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
			
 
				+		conn->ksnc_rx_niov =
			
 
				+			lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
			
 
				+					 niov, iov, offset, mlen);
			
 
				+	} else {
			
 
				+		conn->ksnc_rx_niov = 0;
			
 
				+		conn->ksnc_rx_iov  = NULL;
			
 
				+		conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
			
 
				+		conn->ksnc_rx_nkiov =
			
 
				+			lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
			
 
				+					  niov, kiov, offset, mlen);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (mlen ==
			
 
				+		 lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
			
 
				+		 lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
			
 
				+
			
 
				+	LASSERT (conn->ksnc_rx_scheduled);
			
 
				+
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	switch (conn->ksnc_rx_state) {
			
 
				+	case SOCKNAL_RX_PARSE_WAIT:
			
 
				+		list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
			
 
				+		wake_up (&sched->kss_waitq);
			
 
				+		LASSERT (conn->ksnc_rx_ready);
			
 
				+		break;
			
 
				+
			
 
				+	case SOCKNAL_RX_PARSE:
			
 
				+		/* scheduler hasn't noticed I'm parsing yet */
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+	ksocknal_conn_decref(conn);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+ksocknal_sched_cansleep(ksock_sched_t *sched)
			
 
				+{
			
 
				+	int	   rc;
			
 
				+
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	rc = (!ksocknal_data.ksnd_shuttingdown &&
			
 
				+	      list_empty(&sched->kss_rx_conns) &&
			
 
				+	      list_empty(&sched->kss_tx_conns));
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int ksocknal_scheduler(void *arg)
			
 
				+{
			
 
				+	struct ksock_sched_info	*info;
			
 
				+	ksock_sched_t		*sched;
			
 
				+	ksock_conn_t		*conn;
			
 
				+	ksock_tx_t		*tx;
			
 
				+	int			rc;
			
 
				+	int			nloops = 0;
			
 
				+	long			id = (long)arg;
			
 
				+
			
 
				+	info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
			
 
				+	sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
			
 
				+
			
 
				+	cfs_block_allsigs();
			
 
				+
			
 
				+	rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't set CPT affinity to %d: %d\n",
			
 
				+		       info->ksi_cpt, rc);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				+		int did_something = 0;
			
 
				+
			
 
				+		/* Ensure I progress everything semi-fairly */
			
 
				+
			
 
				+		if (!list_empty (&sched->kss_rx_conns)) {
			
 
				+			conn = list_entry(sched->kss_rx_conns.next,
			
 
				+					      ksock_conn_t, ksnc_rx_list);
			
 
				+			list_del(&conn->ksnc_rx_list);
			
 
				+
			
 
				+			LASSERT(conn->ksnc_rx_scheduled);
			
 
				+			LASSERT(conn->ksnc_rx_ready);
			
 
				+
			
 
				+			/* clear rx_ready in case receive isn't complete.
			
 
				+			 * Do it BEFORE we call process_recv, since
			
 
				+			 * data_ready can set it any time after we release
			
 
				+			 * kss_lock. */
			
 
				+			conn->ksnc_rx_ready = 0;
			
 
				+			spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+			rc = ksocknal_process_receive(conn);
			
 
				+
			
 
				+			spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+			/* I'm the only one that can clear this flag */
			
 
				+			LASSERT(conn->ksnc_rx_scheduled);
			
 
				+
			
 
				+			/* Did process_receive get everything it wanted? */
			
 
				+			if (rc == 0)
			
 
				+				conn->ksnc_rx_ready = 1;
			
 
				+
			
 
				+			if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
			
 
				+				/* Conn blocked waiting for ksocknal_recv()
			
 
				+				 * I change its state (under lock) to signal
			
 
				+				 * it can be rescheduled */
			
 
				+				conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
			
 
				+			} else if (conn->ksnc_rx_ready) {
			
 
				+				/* reschedule for rx */
			
 
				+				list_add_tail (&conn->ksnc_rx_list,
			
 
				+						   &sched->kss_rx_conns);
			
 
				+			} else {
			
 
				+				conn->ksnc_rx_scheduled = 0;
			
 
				+				/* drop my ref */
			
 
				+				ksocknal_conn_decref(conn);
			
 
				+			}
			
 
				+
			
 
				+			did_something = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty (&sched->kss_tx_conns)) {
			
 
				+			LIST_HEAD    (zlist);
			
 
				+
			
 
				+			if (!list_empty(&sched->kss_zombie_noop_txs)) {
			
 
				+				list_add(&zlist,
			
 
				+					     &sched->kss_zombie_noop_txs);
			
 
				+				list_del_init(&sched->kss_zombie_noop_txs);
			
 
				+			}
			
 
				+
			
 
				+			conn = list_entry(sched->kss_tx_conns.next,
			
 
				+					      ksock_conn_t, ksnc_tx_list);
			
 
				+			list_del (&conn->ksnc_tx_list);
			
 
				+
			
 
				+			LASSERT(conn->ksnc_tx_scheduled);
			
 
				+			LASSERT(conn->ksnc_tx_ready);
			
 
				+			LASSERT(!list_empty(&conn->ksnc_tx_queue));
			
 
				+
			
 
				+			tx = list_entry(conn->ksnc_tx_queue.next,
			
 
				+					    ksock_tx_t, tx_list);
			
 
				+
			
 
				+			if (conn->ksnc_tx_carrier == tx)
			
 
				+				ksocknal_next_tx_carrier(conn);
			
 
				+
			
 
				+			/* dequeue now so empty list => more to send */
			
 
				+			list_del(&tx->tx_list);
			
 
				+
			
 
				+			/* Clear tx_ready in case send isn't complete.  Do
			
 
				+			 * it BEFORE we call process_transmit, since
			
 
				+			 * write_space can set it any time after we release
			
 
				+			 * kss_lock. */
			
 
				+			conn->ksnc_tx_ready = 0;
			
 
				+			spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+			if (!list_empty(&zlist)) {
			
 
				+				/* free zombie noop txs, it's fast because
			
 
				+				 * noop txs are just put in freelist */
			
 
				+				ksocknal_txlist_done(NULL, &zlist, 0);
			
 
				+			}
			
 
				+
			
 
				+			rc = ksocknal_process_transmit(conn, tx);
			
 
				+
			
 
				+			if (rc == -ENOMEM || rc == -EAGAIN) {
			
 
				+				/* Incomplete send: replace tx on HEAD of tx_queue */
			
 
				+				spin_lock_bh(&sched->kss_lock);
			
 
				+				list_add(&tx->tx_list,
			
 
				+					     &conn->ksnc_tx_queue);
			
 
				+			} else {
			
 
				+				/* Complete send; tx -ref */
			
 
				+				ksocknal_tx_decref(tx);
			
 
				+
			
 
				+				spin_lock_bh(&sched->kss_lock);
			
 
				+				/* assume space for more */
			
 
				+				conn->ksnc_tx_ready = 1;
			
 
				+			}
			
 
				+
			
 
				+			if (rc == -ENOMEM) {
			
 
				+				/* Do nothing; after a short timeout, this
			
 
				+				 * conn will be reposted on kss_tx_conns. */
			
 
				+			} else if (conn->ksnc_tx_ready &&
			
 
				+				   !list_empty (&conn->ksnc_tx_queue)) {
			
 
				+				/* reschedule for tx */
			
 
				+				list_add_tail (&conn->ksnc_tx_list,
			
 
				+						   &sched->kss_tx_conns);
			
 
				+			} else {
			
 
				+				conn->ksnc_tx_scheduled = 0;
			
 
				+				/* drop my ref */
			
 
				+				ksocknal_conn_decref(conn);
			
 
				+			}
			
 
				+
			
 
				+			did_something = 1;
			
 
				+		}
			
 
				+		if (!did_something ||	   /* nothing to do */
			
 
				+		    ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
			
 
				+			spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+			nloops = 0;
			
 
				+
			
 
				+			if (!did_something) {   /* wait for something to do */
			
 
				+				cfs_wait_event_interruptible_exclusive(
			
 
				+					sched->kss_waitq,
			
 
				+					!ksocknal_sched_cansleep(sched), rc);
			
 
				+				LASSERT (rc == 0);
			
 
				+			} else {
			
 
				+				cond_resched();
			
 
				+			}
			
 
				+
			
 
				+			spin_lock_bh(&sched->kss_lock);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+	ksocknal_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Add connection to kss_rx_conns of scheduler
			
 
				+ * and wakeup the scheduler.
			
 
				+ */
			
 
				+void ksocknal_read_callback (ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_sched_t *sched;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	conn->ksnc_rx_ready = 1;
			
 
				+
			
 
				+	if (!conn->ksnc_rx_scheduled) {  /* not being progressed */
			
 
				+		list_add_tail(&conn->ksnc_rx_list,
			
 
				+				  &sched->kss_rx_conns);
			
 
				+		conn->ksnc_rx_scheduled = 1;
			
 
				+		/* extra ref for scheduler */
			
 
				+		ksocknal_conn_addref(conn);
			
 
				+
			
 
				+		wake_up (&sched->kss_waitq);
			
 
				+	}
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	EXIT;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Add connection to kss_tx_conns of scheduler
			
 
				+ * and wakeup the scheduler.
			
 
				+ */
			
 
				+void ksocknal_write_callback (ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_sched_t *sched;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	conn->ksnc_tx_ready = 1;
			
 
				+
			
 
				+	if (!conn->ksnc_tx_scheduled && // not being progressed
			
 
				+	    !list_empty(&conn->ksnc_tx_queue)){//packets to send
			
 
				+		list_add_tail (&conn->ksnc_tx_list,
			
 
				+				   &sched->kss_tx_conns);
			
 
				+		conn->ksnc_tx_scheduled = 1;
			
 
				+		/* extra ref for scheduler */
			
 
				+		ksocknal_conn_addref(conn);
			
 
				+
			
 
				+		wake_up (&sched->kss_waitq);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	EXIT;
			
 
				+}
			
 
				+
			
 
				+ksock_proto_t *
			
 
				+ksocknal_parse_proto_version (ksock_hello_msg_t *hello)
			
 
				+{
			
 
				+	__u32   version = 0;
			
 
				+
			
 
				+	if (hello->kshm_magic == LNET_PROTO_MAGIC)
			
 
				+		version = hello->kshm_version;
			
 
				+	else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
			
 
				+		version = __swab32(hello->kshm_version);
			
 
				+
			
 
				+	if (version != 0) {
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+		if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				+			return NULL;
			
 
				+
			
 
				+		if (*ksocknal_tunables.ksnd_protocol == 2 &&
			
 
				+		    version == KSOCK_PROTO_V3)
			
 
				+			return NULL;
			
 
				+#endif
			
 
				+		if (version == KSOCK_PROTO_V2)
			
 
				+			return &ksocknal_protocol_v2x;
			
 
				+
			
 
				+		if (version == KSOCK_PROTO_V3)
			
 
				+			return &ksocknal_protocol_v3x;
			
 
				+
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
			
 
				+		lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello;
			
 
				+
			
 
				+		CLASSERT (sizeof (lnet_magicversion_t) ==
			
 
				+			  offsetof (ksock_hello_msg_t, kshm_src_nid));
			
 
				+
			
 
				+		if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) &&
			
 
				+		    hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR))
			
 
				+			return &ksocknal_protocol_v1x;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
			
 
				+		     lnet_nid_t peer_nid, ksock_hello_msg_t *hello)
			
 
				+{
			
 
				+	/* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
			
 
				+	ksock_net_t	 *net = (ksock_net_t *)ni->ni_data;
			
 
				+
			
 
				+	LASSERT (hello->kshm_nips <= LNET_MAX_INTERFACES);
			
 
				+
			
 
				+	/* rely on caller to hold a ref on socket so it wouldn't disappear */
			
 
				+	LASSERT (conn->ksnc_proto != NULL);
			
 
				+
			
 
				+	hello->kshm_src_nid	 = ni->ni_nid;
			
 
				+	hello->kshm_dst_nid	 = peer_nid;
			
 
				+	hello->kshm_src_pid	 = the_lnet.ln_pid;
			
 
				+
			
 
				+	hello->kshm_src_incarnation = net->ksnn_incarnation;
			
 
				+	hello->kshm_ctype	   = conn->ksnc_type;
			
 
				+
			
 
				+	return conn->ksnc_proto->pro_send_hello(conn, hello);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_invert_type(int type)
			
 
				+{
			
 
				+	switch (type)
			
 
				+	{
			
 
				+	case SOCKLND_CONN_ANY:
			
 
				+	case SOCKLND_CONN_CONTROL:
			
 
				+		return (type);
			
 
				+	case SOCKLND_CONN_BULK_IN:
			
 
				+		return SOCKLND_CONN_BULK_OUT;
			
 
				+	case SOCKLND_CONN_BULK_OUT:
			
 
				+		return SOCKLND_CONN_BULK_IN;
			
 
				+	default:
			
 
				+		return (SOCKLND_CONN_NONE);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
			
 
				+		     ksock_hello_msg_t *hello, lnet_process_id_t *peerid,
			
 
				+		     __u64 *incarnation)
			
 
				+{
			
 
				+	/* Return < 0	fatal error
			
 
				+	 *	0	  success
			
 
				+	 *	EALREADY   lost connection race
			
 
				+	 *	EPROTO     protocol version mismatch
			
 
				+	 */
			
 
				+	socket_t	*sock = conn->ksnc_sock;
			
 
				+	int		  active = (conn->ksnc_proto != NULL);
			
 
				+	int		  timeout;
			
 
				+	int		  proto_match;
			
 
				+	int		  rc;
			
 
				+	ksock_proto_t       *proto;
			
 
				+	lnet_process_id_t    recv_id;
			
 
				+
			
 
				+	/* socket type set on active connections - not set on passive */
			
 
				+	LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
			
 
				+
			
 
				+	timeout = active ? *ksocknal_tunables.ksnd_timeout :
			
 
				+			    lnet_acceptor_timeout();
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_magic != LNET_PROTO_MAGIC &&
			
 
				+	    hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
			
 
				+	    hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
			
 
				+		/* Unexpected magic! */
			
 
				+		CERROR ("Bad magic(1) %#08x (%#08x expected) from "
			
 
				+			"%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
			
 
				+			LNET_PROTO_TCP_MAGIC,
			
 
				+			HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &hello->kshm_version,
			
 
				+			      sizeof(hello->kshm_version), timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	proto = ksocknal_parse_proto_version(hello);
			
 
				+	if (proto == NULL) {
			
 
				+		if (!active) {
			
 
				+			/* unknown protocol from peer, tell peer my protocol */
			
 
				+			conn->ksnc_proto = &ksocknal_protocol_v3x;
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+			if (*ksocknal_tunables.ksnd_protocol == 2)
			
 
				+				conn->ksnc_proto = &ksocknal_protocol_v2x;
			
 
				+			else if (*ksocknal_tunables.ksnd_protocol == 1)
			
 
				+				conn->ksnc_proto = &ksocknal_protocol_v1x;
			
 
				+#endif
			
 
				+			hello->kshm_nips = 0;
			
 
				+			ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
			
 
				+		}
			
 
				+
			
 
				+		CERROR ("Unknown protocol version (%d.x expected)"
			
 
				+			" from %u.%u.%u.%u\n",
			
 
				+			conn->ksnc_proto->pro_version,
			
 
				+			HIPQUAD(conn->ksnc_ipaddr));
			
 
				+
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	proto_match = (conn->ksnc_proto == proto);
			
 
				+	conn->ksnc_proto = proto;
			
 
				+
			
 
				+	/* receive the rest of hello message anyway */
			
 
				+	rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n",
			
 
				+		       rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	*incarnation = hello->kshm_src_incarnation;
			
 
				+
			
 
				+	if (hello->kshm_src_nid == LNET_NID_ANY) {
			
 
				+		CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY"
			
 
				+		       "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (!active &&
			
 
				+	    conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
			
 
				+		/* Userspace NAL assigns peer process ID from socket */
			
 
				+		recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
			
 
				+		recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
			
 
				+	} else {
			
 
				+		recv_id.nid = hello->kshm_src_nid;
			
 
				+		recv_id.pid = hello->kshm_src_pid;
			
 
				+	}
			
 
				+
			
 
				+	if (!active) {
			
 
				+		*peerid = recv_id;
			
 
				+
			
 
				+		/* peer determines type */
			
 
				+		conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
			
 
				+		if (conn->ksnc_type == SOCKLND_CONN_NONE) {
			
 
				+			CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
			
 
				+				hello->kshm_ctype, libcfs_id2str(*peerid),
			
 
				+				HIPQUAD(conn->ksnc_ipaddr));
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (peerid->pid != recv_id.pid ||
			
 
				+	    peerid->nid != recv_id.nid) {
			
 
				+		LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host"
			
 
				+				   " %u.%u.%u.%u, but they claimed they were "
			
 
				+				   "%s; please check your Lustre "
			
 
				+				   "configuration.\n",
			
 
				+				   libcfs_id2str(*peerid),
			
 
				+				   HIPQUAD(conn->ksnc_ipaddr),
			
 
				+				   libcfs_id2str(recv_id));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
			
 
				+		/* Possible protocol mismatch or I lost the connection race */
			
 
				+		return proto_match ? EALREADY : EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
			
 
				+		CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
			
 
				+			conn->ksnc_type, libcfs_id2str(*peerid),
			
 
				+			HIPQUAD(conn->ksnc_ipaddr),
			
 
				+			hello->kshm_ctype);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_connect (ksock_route_t *route)
			
 
				+{
			
 
				+	LIST_HEAD    (zombies);
			
 
				+	ksock_peer_t     *peer = route->ksnr_peer;
			
 
				+	int	       type;
			
 
				+	int	       wanted;
			
 
				+	socket_t     *sock;
			
 
				+	cfs_time_t	deadline;
			
 
				+	int	       retry_later = 0;
			
 
				+	int	       rc = 0;
			
 
				+
			
 
				+	deadline = cfs_time_add(cfs_time_current(),
			
 
				+				cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	LASSERT (route->ksnr_scheduled);
			
 
				+	LASSERT (!route->ksnr_connecting);
			
 
				+
			
 
				+	route->ksnr_connecting = 1;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		wanted = ksocknal_route_mask() & ~route->ksnr_connected;
			
 
				+
			
 
				+		/* stop connecting if peer/route got closed under me, or
			
 
				+		 * route got connected while queued */
			
 
				+		if (peer->ksnp_closing || route->ksnr_deleted ||
			
 
				+		    wanted == 0) {
			
 
				+			retry_later = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* reschedule if peer is connecting to me */
			
 
				+		if (peer->ksnp_accepting > 0) {
			
 
				+			CDEBUG(D_NET,
			
 
				+			       "peer %s(%d) already connecting to me, retry later.\n",
			
 
				+			       libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting);
			
 
				+			retry_later = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (retry_later) /* needs reschedule */
			
 
				+			break;
			
 
				+
			
 
				+		if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) {
			
 
				+			type = SOCKLND_CONN_ANY;
			
 
				+		} else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) {
			
 
				+			type = SOCKLND_CONN_CONTROL;
			
 
				+		} else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) {
			
 
				+			type = SOCKLND_CONN_BULK_IN;
			
 
				+		} else {
			
 
				+			LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0);
			
 
				+			type = SOCKLND_CONN_BULK_OUT;
			
 
				+		}
			
 
				+
			
 
				+		write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		if (cfs_time_aftereq(cfs_time_current(), deadline)) {
			
 
				+			rc = -ETIMEDOUT;
			
 
				+			lnet_connect_console_error(rc, peer->ksnp_id.nid,
			
 
				+						   route->ksnr_ipaddr,
			
 
				+						   route->ksnr_port);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		rc = lnet_connect(&sock, peer->ksnp_id.nid,
			
 
				+				  route->ksnr_myipaddr,
			
 
				+				  route->ksnr_ipaddr, route->ksnr_port);
			
 
				+		if (rc != 0)
			
 
				+			goto failed;
			
 
				+
			
 
				+		rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
			
 
				+		if (rc < 0) {
			
 
				+			lnet_connect_console_error(rc, peer->ksnp_id.nid,
			
 
				+						   route->ksnr_ipaddr,
			
 
				+						   route->ksnr_port);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		/* A +ve RC means I have to retry because I lost the connection
			
 
				+		 * race or I have to renegotiate protocol version */
			
 
				+		retry_later = (rc != 0);
			
 
				+		if (retry_later)
			
 
				+			CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
			
 
				+			       libcfs_nid2str(peer->ksnp_id.nid));
			
 
				+
			
 
				+		write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+	}
			
 
				+
			
 
				+	route->ksnr_scheduled = 0;
			
 
				+	route->ksnr_connecting = 0;
			
 
				+
			
 
				+	if (retry_later) {
			
 
				+		/* re-queue for attention; this frees me up to handle
			
 
				+		 * the peer's incoming connection request */
			
 
				+
			
 
				+		if (rc == EALREADY ||
			
 
				+		    (rc == 0 && peer->ksnp_accepting > 0)) {
			
 
				+			/* We want to introduce a delay before next
			
 
				+			 * attempt to connect if we lost conn race,
			
 
				+			 * but the race is resolved quickly usually,
			
 
				+			 * so min_reconnectms should be good heuristic */
			
 
				+			route->ksnr_retry_interval =
			
 
				+				cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000;
			
 
				+			route->ksnr_timeout = cfs_time_add(cfs_time_current(),
			
 
				+							   route->ksnr_retry_interval);
			
 
				+		}
			
 
				+
			
 
				+		ksocknal_launch_connection_locked(route);
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+	return retry_later;
			
 
				+
			
 
				+ failed:
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	route->ksnr_scheduled = 0;
			
 
				+	route->ksnr_connecting = 0;
			
 
				+
			
 
				+	/* This is a retry rather than a new connection */
			
 
				+	route->ksnr_retry_interval *= 2;
			
 
				+	route->ksnr_retry_interval =
			
 
				+		MAX(route->ksnr_retry_interval,
			
 
				+		    cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000);
			
 
				+	route->ksnr_retry_interval =
			
 
				+		MIN(route->ksnr_retry_interval,
			
 
				+		    cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000);
			
 
				+
			
 
				+	LASSERT (route->ksnr_retry_interval != 0);
			
 
				+	route->ksnr_timeout = cfs_time_add(cfs_time_current(),
			
 
				+					   route->ksnr_retry_interval);
			
 
				+
			
 
				+	if (!list_empty(&peer->ksnp_tx_queue) &&
			
 
				+	    peer->ksnp_accepting == 0 &&
			
 
				+	    ksocknal_find_connecting_route_locked(peer) == NULL) {
			
 
				+		ksock_conn_t *conn;
			
 
				+
			
 
				+		/* ksnp_tx_queue is queued on a conn on successful
			
 
				+		 * connection for V1.x and V2.x */
			
 
				+		if (!list_empty (&peer->ksnp_conns)) {
			
 
				+			conn = list_entry(peer->ksnp_conns.next,
			
 
				+					      ksock_conn_t, ksnc_list);
			
 
				+			LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
			
 
				+		}
			
 
				+
			
 
				+		/* take all the blocked packets while I've got the lock and
			
 
				+		 * complete below... */
			
 
				+		list_splice_init(&peer->ksnp_tx_queue, &zombies);
			
 
				+	}
			
 
				+
			
 
				+#if 0	   /* irrelevent with only eager routes */
			
 
				+	if (!route->ksnr_deleted) {
			
 
				+		/* make this route least-favourite for re-selection */
			
 
				+		list_del(&route->ksnr_list);
			
 
				+		list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
			
 
				+	}
			
 
				+#endif
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	ksocknal_peer_failed(peer);
			
 
				+	ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * check whether we need to create more connds.
			
 
				+ * It will try to create new thread if it's necessary, @timeout can
			
 
				+ * be updated if failed to create, so caller wouldn't keep try while
			
 
				+ * running out of resource.
			
 
				+ */
			
 
				+static int
			
 
				+ksocknal_connd_check_start(long sec, long *timeout)
			
 
				+{
			
 
				+	char name[16];
			
 
				+	int rc;
			
 
				+	int total = ksocknal_data.ksnd_connd_starting +
			
 
				+		    ksocknal_data.ksnd_connd_running;
			
 
				+
			
 
				+	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
			
 
				+		/* still in initializing */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
			
 
				+	    total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
			
 
				+		/* can't create more connd, or still have enough
			
 
				+		 * threads to handle more connecting */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
			
 
				+		/* no pending connecting request */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
			
 
				+		/* may run out of resource, retry later */
			
 
				+		*timeout = cfs_time_seconds(1);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_connd_starting > 0) {
			
 
				+		/* serialize starting to avoid flood */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_data.ksnd_connd_starting_stamp = sec;
			
 
				+	ksocknal_data.ksnd_connd_starting++;
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+
			
 
				+	/* NB: total is the next id */
			
 
				+	snprintf(name, sizeof(name), "socknal_cd%02d", total);
			
 
				+	rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
			
 
				+
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
			
 
				+	if (rc == 0)
			
 
				+		return 1;
			
 
				+
			
 
				+	/* we tried ... */
			
 
				+	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
			
 
				+	ksocknal_data.ksnd_connd_starting--;
			
 
				+	ksocknal_data.ksnd_connd_failed_stamp = cfs_time_current_sec();
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * check whether current thread can exit, it will return 1 if there are too
			
 
				+ * many threads and no creating in past 120 seconds.
			
 
				+ * Also, this function may update @timeout to make caller come back
			
 
				+ * again to recheck these conditions.
			
 
				+ */
			
 
				+static int
			
 
				+ksocknal_connd_check_stop(long sec, long *timeout)
			
 
				+{
			
 
				+	int val;
			
 
				+
			
 
				+	if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
			
 
				+		/* still in initializing */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_connd_starting > 0) {
			
 
				+		/* in progress of starting new thread */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (ksocknal_data.ksnd_connd_running <=
			
 
				+	    *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* created thread in past 120 seconds? */
			
 
				+	val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
			
 
				+		    SOCKNAL_CONND_TIMEOUT - sec);
			
 
				+
			
 
				+	*timeout = (val > 0) ? cfs_time_seconds(val) :
			
 
				+			       cfs_time_seconds(SOCKNAL_CONND_TIMEOUT);
			
 
				+	if (val > 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* no creating in past 120 seconds */
			
 
				+
			
 
				+	return ksocknal_data.ksnd_connd_running >
			
 
				+	       ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
			
 
				+}
			
 
				+
			
 
				+/* Go through connd_routes queue looking for a route that we can process
			
 
				+ * right now, @timeout_p can be updated if we need to come back later */
			
 
				+static ksock_route_t *
			
 
				+ksocknal_connd_get_route_locked(signed long *timeout_p)
			
 
				+{
			
 
				+	ksock_route_t *route;
			
 
				+	cfs_time_t     now;
			
 
				+
			
 
				+	now = cfs_time_current();
			
 
				+
			
 
				+	/* connd_routes can contain both pending and ordinary routes */
			
 
				+	list_for_each_entry (route, &ksocknal_data.ksnd_connd_routes,
			
 
				+				 ksnr_connd_list) {
			
 
				+
			
 
				+		if (route->ksnr_retry_interval == 0 ||
			
 
				+		    cfs_time_aftereq(now, route->ksnr_timeout))
			
 
				+			return route;
			
 
				+
			
 
				+		if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
			
 
				+		    (int)*timeout_p > (int)(route->ksnr_timeout - now))
			
 
				+			*timeout_p = (int)(route->ksnr_timeout - now);
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_connd (void *arg)
			
 
				+{
			
 
				+	spinlock_t    *connd_lock = &ksocknal_data.ksnd_connd_lock;
			
 
				+	ksock_connreq_t   *cr;
			
 
				+	wait_queue_t     wait;
			
 
				+	int		nloops = 0;
			
 
				+	int		cons_retry = 0;
			
 
				+
			
 
				+	cfs_block_allsigs ();
			
 
				+
			
 
				+	init_waitqueue_entry_current (&wait);
			
 
				+
			
 
				+	spin_lock_bh(connd_lock);
			
 
				+
			
 
				+	LASSERT(ksocknal_data.ksnd_connd_starting > 0);
			
 
				+	ksocknal_data.ksnd_connd_starting--;
			
 
				+	ksocknal_data.ksnd_connd_running++;
			
 
				+
			
 
				+	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				+		ksock_route_t *route = NULL;
			
 
				+		long sec = cfs_time_current_sec();
			
 
				+		long timeout = MAX_SCHEDULE_TIMEOUT;
			
 
				+		int  dropped_lock = 0;
			
 
				+
			
 
				+		if (ksocknal_connd_check_stop(sec, &timeout)) {
			
 
				+			/* wakeup another one to check stop */
			
 
				+			wake_up(&ksocknal_data.ksnd_connd_waitq);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (ksocknal_connd_check_start(sec, &timeout)) {
			
 
				+			/* created new thread */
			
 
				+			dropped_lock = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
			
 
				+			/* Connection accepted by the listener */
			
 
				+			cr = list_entry(ksocknal_data.ksnd_connd_connreqs. \
			
 
				+					    next, ksock_connreq_t, ksncr_list);
			
 
				+
			
 
				+			list_del(&cr->ksncr_list);
			
 
				+			spin_unlock_bh(connd_lock);
			
 
				+			dropped_lock = 1;
			
 
				+
			
 
				+			ksocknal_create_conn(cr->ksncr_ni, NULL,
			
 
				+					     cr->ksncr_sock, SOCKLND_CONN_NONE);
			
 
				+			lnet_ni_decref(cr->ksncr_ni);
			
 
				+			LIBCFS_FREE(cr, sizeof(*cr));
			
 
				+
			
 
				+			spin_lock_bh(connd_lock);
			
 
				+		}
			
 
				+
			
 
				+		/* Only handle an outgoing connection request if there
			
 
				+		 * is a thread left to handle incoming connections and
			
 
				+		 * create new connd */
			
 
				+		if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
			
 
				+		    ksocknal_data.ksnd_connd_running) {
			
 
				+			route = ksocknal_connd_get_route_locked(&timeout);
			
 
				+		}
			
 
				+		if (route != NULL) {
			
 
				+			list_del (&route->ksnr_connd_list);
			
 
				+			ksocknal_data.ksnd_connd_connecting++;
			
 
				+			spin_unlock_bh(connd_lock);
			
 
				+			dropped_lock = 1;
			
 
				+
			
 
				+			if (ksocknal_connect(route)) {
			
 
				+				/* consecutive retry */
			
 
				+				if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
			
 
				+					CWARN("massive consecutive "
			
 
				+					      "re-connecting to %u.%u.%u.%u\n",
			
 
				+					      HIPQUAD(route->ksnr_ipaddr));
			
 
				+					cons_retry = 0;
			
 
				+				}
			
 
				+			} else {
			
 
				+				cons_retry = 0;
			
 
				+			}
			
 
				+
			
 
				+			ksocknal_route_decref(route);
			
 
				+
			
 
				+			spin_lock_bh(connd_lock);
			
 
				+			ksocknal_data.ksnd_connd_connecting--;
			
 
				+		}
			
 
				+
			
 
				+		if (dropped_lock) {
			
 
				+			if (++nloops < SOCKNAL_RESCHED)
			
 
				+				continue;
			
 
				+			spin_unlock_bh(connd_lock);
			
 
				+			nloops = 0;
			
 
				+			cond_resched();
			
 
				+			spin_lock_bh(connd_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Nothing to do for 'timeout'  */
			
 
				+		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, &wait);
			
 
				+		spin_unlock_bh(connd_lock);
			
 
				+
			
 
				+		nloops = 0;
			
 
				+		waitq_timedwait(&wait, TASK_INTERRUPTIBLE, timeout);
			
 
				+
			
 
				+		set_current_state(TASK_RUNNING);
			
 
				+		remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
			
 
				+		spin_lock_bh(connd_lock);
			
 
				+	}
			
 
				+	ksocknal_data.ksnd_connd_running--;
			
 
				+	spin_unlock_bh(connd_lock);
			
 
				+
			
 
				+	ksocknal_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+ksock_conn_t *
			
 
				+ksocknal_find_timed_out_conn (ksock_peer_t *peer)
			
 
				+{
			
 
				+	/* We're called with a shared lock on ksnd_global_lock */
			
 
				+	ksock_conn_t      *conn;
			
 
				+	struct list_head	*ctmp;
			
 
				+
			
 
				+	list_for_each (ctmp, &peer->ksnp_conns) {
			
 
				+		int     error;
			
 
				+		conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
			
 
				+
			
 
				+		/* Don't need the {get,put}connsock dance to deref ksnc_sock */
			
 
				+		LASSERT (!conn->ksnc_closing);
			
 
				+
			
 
				+		/* SOCK_ERROR will reset error code of socket in
			
 
				+		 * some platform (like Darwin8.x) */
			
 
				+		error = cfs_sock_error(conn->ksnc_sock);
			
 
				+		if (error != 0) {
			
 
				+			ksocknal_conn_addref(conn);
			
 
				+
			
 
				+			switch (error) {
			
 
				+			case ECONNRESET:
			
 
				+				CNETERR("A connection with %s "
			
 
				+					"(%u.%u.%u.%u:%d) was reset; "
			
 
				+					"it may have rebooted.\n",
			
 
				+					libcfs_id2str(peer->ksnp_id),
			
 
				+					HIPQUAD(conn->ksnc_ipaddr),
			
 
				+					conn->ksnc_port);
			
 
				+				break;
			
 
				+			case ETIMEDOUT:
			
 
				+				CNETERR("A connection with %s "
			
 
				+					"(%u.%u.%u.%u:%d) timed out; the "
			
 
				+					"network or node may be down.\n",
			
 
				+					libcfs_id2str(peer->ksnp_id),
			
 
				+					HIPQUAD(conn->ksnc_ipaddr),
			
 
				+					conn->ksnc_port);
			
 
				+				break;
			
 
				+			default:
			
 
				+				CNETERR("An unexpected network error %d "
			
 
				+					"occurred with %s "
			
 
				+					"(%u.%u.%u.%u:%d\n", error,
			
 
				+					libcfs_id2str(peer->ksnp_id),
			
 
				+					HIPQUAD(conn->ksnc_ipaddr),
			
 
				+					conn->ksnc_port);
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			return (conn);
			
 
				+		}
			
 
				+
			
 
				+		if (conn->ksnc_rx_started &&
			
 
				+		    cfs_time_aftereq(cfs_time_current(),
			
 
				+				     conn->ksnc_rx_deadline)) {
			
 
				+			/* Timed out incomplete incoming message */
			
 
				+			ksocknal_conn_addref(conn);
			
 
				+			CNETERR("Timeout receiving from %s (%u.%u.%u.%u:%d), "
			
 
				+				"state %d wanted %d left %d\n",
			
 
				+				libcfs_id2str(peer->ksnp_id),
			
 
				+				HIPQUAD(conn->ksnc_ipaddr),
			
 
				+				conn->ksnc_port,
			
 
				+				conn->ksnc_rx_state,
			
 
				+				conn->ksnc_rx_nob_wanted,
			
 
				+				conn->ksnc_rx_nob_left);
			
 
				+			return (conn);
			
 
				+		}
			
 
				+
			
 
				+		if ((!list_empty(&conn->ksnc_tx_queue) ||
			
 
				+		     cfs_sock_wmem_queued(conn->ksnc_sock) != 0) &&
			
 
				+		    cfs_time_aftereq(cfs_time_current(),
			
 
				+				     conn->ksnc_tx_deadline)) {
			
 
				+			/* Timed out messages queued for sending or
			
 
				+			 * buffered in the socket's send buffer */
			
 
				+			ksocknal_conn_addref(conn);
			
 
				+			CNETERR("Timeout sending data to %s (%u.%u.%u.%u:%d) "
			
 
				+				"the network or that node may be down.\n",
			
 
				+				libcfs_id2str(peer->ksnp_id),
			
 
				+				HIPQUAD(conn->ksnc_ipaddr),
			
 
				+				conn->ksnc_port);
			
 
				+			return (conn);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return (NULL);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+ksocknal_flush_stale_txs(ksock_peer_t *peer)
			
 
				+{
			
 
				+	ksock_tx_t	*tx;
			
 
				+	LIST_HEAD      (stale_txs);
			
 
				+
			
 
				+	write_lock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	while (!list_empty (&peer->ksnp_tx_queue)) {
			
 
				+		tx = list_entry (peer->ksnp_tx_queue.next,
			
 
				+				     ksock_tx_t, tx_list);
			
 
				+
			
 
				+		if (!cfs_time_aftereq(cfs_time_current(),
			
 
				+				      tx->tx_deadline))
			
 
				+			break;
			
 
				+
			
 
				+		list_del (&tx->tx_list);
			
 
				+		list_add_tail (&tx->tx_list, &stale_txs);
			
 
				+	}
			
 
				+
			
 
				+	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_send_keepalive_locked(ksock_peer_t *peer)
			
 
				+{
			
 
				+	ksock_sched_t  *sched;
			
 
				+	ksock_conn_t   *conn;
			
 
				+	ksock_tx_t     *tx;
			
 
				+
			
 
				+	if (list_empty(&peer->ksnp_conns)) /* last_alive will be updated by create_conn */
			
 
				+		return 0;
			
 
				+
			
 
				+	if (peer->ksnp_proto != &ksocknal_protocol_v3x)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
			
 
				+	    cfs_time_before(cfs_time_current(),
			
 
				+			    cfs_time_add(peer->ksnp_last_alive,
			
 
				+					 cfs_time_seconds(*ksocknal_tunables.ksnd_keepalive))))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (cfs_time_before(cfs_time_current(),
			
 
				+			    peer->ksnp_send_keepalive))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* retry 10 secs later, so we wouldn't put pressure
			
 
				+	 * on this peer if we failed to send keepalive this time */
			
 
				+	peer->ksnp_send_keepalive = cfs_time_shift(10);
			
 
				+
			
 
				+	conn = ksocknal_find_conn_locked(peer, NULL, 1);
			
 
				+	if (conn != NULL) {
			
 
				+		sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+		spin_lock_bh(&sched->kss_lock);
			
 
				+		if (!list_empty(&conn->ksnc_tx_queue)) {
			
 
				+			spin_unlock_bh(&sched->kss_lock);
			
 
				+			/* there is an queued ACK, don't need keepalive */
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock_bh(&sched->kss_lock);
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	/* cookie = 1 is reserved for keepalive PING */
			
 
				+	tx = ksocknal_alloc_tx_noop(1, 1);
			
 
				+	if (tx == NULL) {
			
 
				+		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	if (ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id) == 0) {
			
 
				+		read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_free_tx(tx);
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	return -EIO;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void
			
 
				+ksocknal_check_peer_timeouts (int idx)
			
 
				+{
			
 
				+	struct list_head       *peers = &ksocknal_data.ksnd_peers[idx];
			
 
				+	ksock_peer_t     *peer;
			
 
				+	ksock_conn_t     *conn;
			
 
				+	ksock_tx_t       *tx;
			
 
				+
			
 
				+ again:
			
 
				+	/* NB. We expect to have a look at all the peers and not find any
			
 
				+	 * connections to time out, so we just use a shared lock while we
			
 
				+	 * take a look... */
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	list_for_each_entry(peer, peers, ksnp_list) {
			
 
				+		cfs_time_t  deadline = 0;
			
 
				+		int	 resid = 0;
			
 
				+		int	 n     = 0;
			
 
				+
			
 
				+		if (ksocknal_send_keepalive_locked(peer) != 0) {
			
 
				+			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+			goto again;
			
 
				+		}
			
 
				+
			
 
				+		conn = ksocknal_find_timed_out_conn (peer);
			
 
				+
			
 
				+		if (conn != NULL) {
			
 
				+			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+			ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
			
 
				+
			
 
				+			/* NB we won't find this one again, but we can't
			
 
				+			 * just proceed with the next peer, since we dropped
			
 
				+			 * ksnd_global_lock and it might be dead already! */
			
 
				+			ksocknal_conn_decref(conn);
			
 
				+			goto again;
			
 
				+		}
			
 
				+
			
 
				+		/* we can't process stale txs right here because we're
			
 
				+		 * holding only shared lock */
			
 
				+		if (!list_empty (&peer->ksnp_tx_queue)) {
			
 
				+			ksock_tx_t *tx =
			
 
				+				list_entry (peer->ksnp_tx_queue.next,
			
 
				+						ksock_tx_t, tx_list);
			
 
				+
			
 
				+			if (cfs_time_aftereq(cfs_time_current(),
			
 
				+					     tx->tx_deadline)) {
			
 
				+
			
 
				+				ksocknal_peer_addref(peer);
			
 
				+				read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+				ksocknal_flush_stale_txs(peer);
			
 
				+
			
 
				+				ksocknal_peer_decref(peer);
			
 
				+				goto again;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (list_empty(&peer->ksnp_zc_req_list))
			
 
				+			continue;
			
 
				+
			
 
				+		spin_lock(&peer->ksnp_lock);
			
 
				+		list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
			
 
				+			if (!cfs_time_aftereq(cfs_time_current(),
			
 
				+					      tx->tx_deadline))
			
 
				+				break;
			
 
				+			/* ignore the TX if connection is being closed */
			
 
				+			if (tx->tx_conn->ksnc_closing)
			
 
				+				continue;
			
 
				+			n++;
			
 
				+		}
			
 
				+
			
 
				+		if (n == 0) {
			
 
				+			spin_unlock(&peer->ksnp_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		tx = list_entry(peer->ksnp_zc_req_list.next,
			
 
				+				    ksock_tx_t, tx_zc_list);
			
 
				+		deadline = tx->tx_deadline;
			
 
				+		resid    = tx->tx_resid;
			
 
				+		conn     = tx->tx_conn;
			
 
				+		ksocknal_conn_addref(conn);
			
 
				+
			
 
				+		spin_unlock(&peer->ksnp_lock);
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+		CERROR("Total %d stale ZC_REQs for peer %s detected; the "
			
 
				+		       "oldest(%p) timed out %ld secs ago, "
			
 
				+		       "resid: %d, wmem: %d\n",
			
 
				+		       n, libcfs_nid2str(peer->ksnp_id.nid), tx,
			
 
				+		       cfs_duration_sec(cfs_time_current() - deadline),
			
 
				+		       resid, cfs_sock_wmem_queued(conn->ksnc_sock));
			
 
				+
			
 
				+		ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
			
 
				+		ksocknal_conn_decref(conn);
			
 
				+		goto again;
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_reaper (void *arg)
			
 
				+{
			
 
				+	wait_queue_t     wait;
			
 
				+	ksock_conn_t      *conn;
			
 
				+	ksock_sched_t     *sched;
			
 
				+	struct list_head	 enomem_conns;
			
 
				+	int		nenomem_conns;
			
 
				+	cfs_duration_t     timeout;
			
 
				+	int		i;
			
 
				+	int		peer_index = 0;
			
 
				+	cfs_time_t	 deadline = cfs_time_current();
			
 
				+
			
 
				+	cfs_block_allsigs ();
			
 
				+
			
 
				+	INIT_LIST_HEAD(&enomem_conns);
			
 
				+	init_waitqueue_entry_current (&wait);
			
 
				+
			
 
				+	spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+	while (!ksocknal_data.ksnd_shuttingdown) {
			
 
				+
			
 
				+		if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
			
 
				+			conn = list_entry (ksocknal_data. \
			
 
				+					       ksnd_deathrow_conns.next,
			
 
				+					       ksock_conn_t, ksnc_list);
			
 
				+			list_del (&conn->ksnc_list);
			
 
				+
			
 
				+			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+			ksocknal_terminate_conn(conn);
			
 
				+			ksocknal_conn_decref(conn);
			
 
				+
			
 
				+			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
			
 
				+			conn = list_entry (ksocknal_data.ksnd_zombie_conns.\
			
 
				+					       next, ksock_conn_t, ksnc_list);
			
 
				+			list_del (&conn->ksnc_list);
			
 
				+
			
 
				+			spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+			ksocknal_destroy_conn(conn);
			
 
				+
			
 
				+			spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
			
 
				+			list_add(&enomem_conns,
			
 
				+				     &ksocknal_data.ksnd_enomem_conns);
			
 
				+			list_del_init(&ksocknal_data.ksnd_enomem_conns);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+		/* reschedule all the connections that stalled with ENOMEM... */
			
 
				+		nenomem_conns = 0;
			
 
				+		while (!list_empty (&enomem_conns)) {
			
 
				+			conn = list_entry (enomem_conns.next,
			
 
				+					       ksock_conn_t, ksnc_tx_list);
			
 
				+			list_del (&conn->ksnc_tx_list);
			
 
				+
			
 
				+			sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+			spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+			LASSERT(conn->ksnc_tx_scheduled);
			
 
				+			conn->ksnc_tx_ready = 1;
			
 
				+			list_add_tail(&conn->ksnc_tx_list,
			
 
				+					  &sched->kss_tx_conns);
			
 
				+			wake_up(&sched->kss_waitq);
			
 
				+
			
 
				+			spin_unlock_bh(&sched->kss_lock);
			
 
				+			nenomem_conns++;
			
 
				+		}
			
 
				+
			
 
				+		/* careful with the jiffy wrap... */
			
 
				+		while ((timeout = cfs_time_sub(deadline,
			
 
				+					       cfs_time_current())) <= 0) {
			
 
				+			const int n = 4;
			
 
				+			const int p = 1;
			
 
				+			int       chunk = ksocknal_data.ksnd_peer_hash_size;
			
 
				+
			
 
				+			/* Time to check for timeouts on a few more peers: I do
			
 
				+			 * checks every 'p' seconds on a proportion of the peer
			
 
				+			 * table and I need to check every connection 'n' times
			
 
				+			 * within a timeout interval, to ensure I detect a
			
 
				+			 * timeout on any connection within (n+1)/n times the
			
 
				+			 * timeout interval. */
			
 
				+
			
 
				+			if (*ksocknal_tunables.ksnd_timeout > n * p)
			
 
				+				chunk = (chunk * n * p) /
			
 
				+					*ksocknal_tunables.ksnd_timeout;
			
 
				+			if (chunk == 0)
			
 
				+				chunk = 1;
			
 
				+
			
 
				+			for (i = 0; i < chunk; i++) {
			
 
				+				ksocknal_check_peer_timeouts (peer_index);
			
 
				+				peer_index = (peer_index + 1) %
			
 
				+					     ksocknal_data.ksnd_peer_hash_size;
			
 
				+			}
			
 
				+
			
 
				+			deadline = cfs_time_add(deadline, cfs_time_seconds(p));
			
 
				+		}
			
 
				+
			
 
				+		if (nenomem_conns != 0) {
			
 
				+			/* Reduce my timeout if I rescheduled ENOMEM conns.
			
 
				+			 * This also prevents me getting woken immediately
			
 
				+			 * if any go back on my enomem list. */
			
 
				+			timeout = SOCKNAL_ENOMEM_RETRY;
			
 
				+		}
			
 
				+		ksocknal_data.ksnd_reaper_waketime =
			
 
				+			cfs_time_add(cfs_time_current(), timeout);
			
 
				+
			
 
				+		set_current_state (TASK_INTERRUPTIBLE);
			
 
				+		add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
			
 
				+
			
 
				+		if (!ksocknal_data.ksnd_shuttingdown &&
			
 
				+		    list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
			
 
				+		    list_empty (&ksocknal_data.ksnd_zombie_conns))
			
 
				+			waitq_timedwait (&wait, TASK_INTERRUPTIBLE,
			
 
				+					     timeout);
			
 
				+
			
 
				+		set_current_state (TASK_RUNNING);
			
 
				+		remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
			
 
				+
			
 
				+		spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
			
 
				+
			
 
				+	ksocknal_thread_fini();
			
 
				+	return 0;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
@@ -0,0 +1,1088 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#include "socklnd.h"
			
 
				+
			
 
				+# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
			
 
				+
			
 
				+
			
 
				+enum {
			
 
				+	SOCKLND_TIMEOUT = 1,
			
 
				+	SOCKLND_CREDITS,
			
 
				+	SOCKLND_PEER_TXCREDITS,
			
 
				+	SOCKLND_PEER_RTRCREDITS,
			
 
				+	SOCKLND_PEER_TIMEOUT,
			
 
				+	SOCKLND_NCONNDS,
			
 
				+	SOCKLND_RECONNECTS_MIN,
			
 
				+	SOCKLND_RECONNECTS_MAX,
			
 
				+	SOCKLND_EAGER_ACK,
			
 
				+	SOCKLND_ZERO_COPY,
			
 
				+	SOCKLND_TYPED,
			
 
				+	SOCKLND_BULK_MIN,
			
 
				+	SOCKLND_RX_BUFFER_SIZE,
			
 
				+	SOCKLND_TX_BUFFER_SIZE,
			
 
				+	SOCKLND_NAGLE,
			
 
				+	SOCKLND_IRQ_AFFINITY,
			
 
				+	SOCKLND_ROUND_ROBIN,
			
 
				+	SOCKLND_KEEPALIVE,
			
 
				+	SOCKLND_KEEPALIVE_IDLE,
			
 
				+	SOCKLND_KEEPALIVE_COUNT,
			
 
				+	SOCKLND_KEEPALIVE_INTVL,
			
 
				+	SOCKLND_BACKOFF_INIT,
			
 
				+	SOCKLND_BACKOFF_MAX,
			
 
				+	SOCKLND_PROTOCOL,
			
 
				+	SOCKLND_ZERO_COPY_RECV,
			
 
				+	SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
			
 
				+};
			
 
				+
			
 
				+static ctl_table_t ksocknal_ctl_table[] = {
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_TIMEOUT,
			
 
				+		.procname = "timeout",
			
 
				+		.data     = &ksocknal_tunables.ksnd_timeout,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_CREDITS,
			
 
				+		.procname = "credits",
			
 
				+		.data     = &ksocknal_tunables.ksnd_credits,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	 {
			
 
				+		.ctl_name = SOCKLND_PEER_TXCREDITS,
			
 
				+		.procname = "peer_credits",
			
 
				+		.data     = &ksocknal_tunables.ksnd_peertxcredits,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	 {
			
 
				+		.ctl_name = SOCKLND_PEER_RTRCREDITS,
			
 
				+		.procname = "peer_buffer_credits",
			
 
				+		.data     = &ksocknal_tunables.ksnd_peerrtrcredits,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_PEER_TIMEOUT,
			
 
				+		.procname = "peer_timeout",
			
 
				+		.data     = &ksocknal_tunables.ksnd_peertimeout,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_NCONNDS,
			
 
				+		.procname = "nconnds",
			
 
				+		.data     = &ksocknal_tunables.ksnd_nconnds,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_RECONNECTS_MIN,
			
 
				+		.procname = "min_reconnectms",
			
 
				+		.data     = &ksocknal_tunables.ksnd_min_reconnectms,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_RECONNECTS_MAX,
			
 
				+		.procname = "max_reconnectms",
			
 
				+		.data     = &ksocknal_tunables.ksnd_max_reconnectms,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_EAGER_ACK,
			
 
				+		.procname = "eager_ack",
			
 
				+		.data     = &ksocknal_tunables.ksnd_eager_ack,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_ZERO_COPY,
			
 
				+		.procname = "zero_copy",
			
 
				+		.data     = &ksocknal_tunables.ksnd_zc_min_payload,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_ZERO_COPY_RECV,
			
 
				+		.procname = "zero_copy_recv",
			
 
				+		.data     = &ksocknal_tunables.ksnd_zc_recv,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
			
 
				+		.procname = "zero_copy_recv",
			
 
				+		.data     = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_TYPED,
			
 
				+		.procname = "typed",
			
 
				+		.data     = &ksocknal_tunables.ksnd_typed_conns,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_BULK_MIN,
			
 
				+		.procname = "min_bulk",
			
 
				+		.data     = &ksocknal_tunables.ksnd_min_bulk,
			
 
				+		.maxlen   = sizeof (int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_RX_BUFFER_SIZE,
			
 
				+		.procname = "rx_buffer_size",
			
 
				+		.data     = &ksocknal_tunables.ksnd_rx_buffer_size,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_TX_BUFFER_SIZE,
			
 
				+		.procname = "tx_buffer_size",
			
 
				+		.data     = &ksocknal_tunables.ksnd_tx_buffer_size,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_NAGLE,
			
 
				+		.procname = "nagle",
			
 
				+		.data     = &ksocknal_tunables.ksnd_nagle,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_ROUND_ROBIN,
			
 
				+		.procname = "round_robin",
			
 
				+		.data     = &ksocknal_tunables.ksnd_round_robin,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_KEEPALIVE,
			
 
				+		.procname = "keepalive",
			
 
				+		.data     = &ksocknal_tunables.ksnd_keepalive,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_KEEPALIVE_IDLE,
			
 
				+		.procname = "keepalive_idle",
			
 
				+		.data     = &ksocknal_tunables.ksnd_keepalive_idle,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_KEEPALIVE_COUNT,
			
 
				+		.procname = "keepalive_count",
			
 
				+		.data     = &ksocknal_tunables.ksnd_keepalive_count,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_KEEPALIVE_INTVL,
			
 
				+		.procname = "keepalive_intvl",
			
 
				+		.data     = &ksocknal_tunables.ksnd_keepalive_intvl,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+	{
			
 
				+		.ctl_name = SOCKLND_PROTOCOL,
			
 
				+		.procname = "protocol",
			
 
				+		.data     = &ksocknal_tunables.ksnd_protocol,
			
 
				+		.maxlen   = sizeof(int),
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_dointvec,
			
 
				+		.strategy = &sysctl_intvec,
			
 
				+	},
			
 
				+#endif
			
 
				+	{0}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+ctl_table_t ksocknal_top_ctl_table[] = {
			
 
				+	{
			
 
				+		.ctl_name = CTL_SOCKLND,
			
 
				+		.procname = "socknal",
			
 
				+		.data     = NULL,
			
 
				+		.maxlen   = 0,
			
 
				+		.mode     = 0555,
			
 
				+		.child    = ksocknal_ctl_table
			
 
				+	},
			
 
				+	{ 0 }
			
 
				+};
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_tunables_init ()
			
 
				+{
			
 
				+	if (!*ksocknal_tunables.ksnd_typed_conns) {
			
 
				+		int rc = -EINVAL;
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+		if (*ksocknal_tunables.ksnd_protocol < 3)
			
 
				+			rc = 0;
			
 
				+#endif
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Protocol V3.x MUST have typed connections\n");
			
 
				+			return rc;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
			
 
				+		*ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
			
 
				+	if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
			
 
				+		*ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
			
 
				+
			
 
				+	ksocknal_tunables.ksnd_sysctl =
			
 
				+		cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
			
 
				+
			
 
				+	if (ksocknal_tunables.ksnd_sysctl == NULL)
			
 
				+		CWARN("Can't setup /proc tunables\n");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_tunables_fini ()
			
 
				+{
			
 
				+	if (ksocknal_tunables.ksnd_sysctl != NULL)
			
 
				+		unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
			
 
				+}
			
 
				+#else
			
 
				+int
			
 
				+ksocknal_lib_tunables_init ()
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_tunables_fini ()
			
 
				+{
			
 
				+}
			
 
				+#endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
			
 
				+{
			
 
				+	int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
			
 
				+				     &conn->ksnc_ipaddr,
			
 
				+				     &conn->ksnc_port);
			
 
				+
			
 
				+	/* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
			
 
				+	LASSERT (!conn->ksnc_closing);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d getting sock peer IP\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
			
 
				+				 &conn->ksnc_myipaddr, NULL);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d getting sock local IP\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_zc_capable(ksock_conn_t *conn)
			
 
				+{
			
 
				+	int  caps = conn->ksnc_sock->sk->sk_route_caps;
			
 
				+
			
 
				+	if (conn->ksnc_proto == &ksocknal_protocol_v1x)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* ZC if the socket supports scatter/gather and doesn't need software
			
 
				+	 * checksums */
			
 
				+	return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	struct socket *sock = conn->ksnc_sock;
			
 
				+	int	    nob;
			
 
				+	int	    rc;
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_enable_csum	&& /* checksum enabled */
			
 
				+	    conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
			
 
				+	    tx->tx_nob == tx->tx_resid		 && /* frist sending    */
			
 
				+	    tx->tx_msg.ksm_csum == 0)		     /* not checksummed  */
			
 
				+		ksocknal_lib_csum_tx(tx);
			
 
				+
			
 
				+	/* NB we can't trust socket ops to either consume our iovs
			
 
				+	 * or leave them alone. */
			
 
				+
			
 
				+	{
			
 
				+#if SOCKNAL_SINGLE_FRAG_TX
			
 
				+		struct iovec    scratch;
			
 
				+		struct iovec   *scratchiov = &scratch;
			
 
				+		unsigned int    niov = 1;
			
 
				+#else
			
 
				+		struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
			
 
				+		unsigned int    niov = tx->tx_niov;
			
 
				+#endif
			
 
				+		struct msghdr msg = {
			
 
				+			.msg_name       = NULL,
			
 
				+			.msg_namelen    = 0,
			
 
				+			.msg_iov	= scratchiov,
			
 
				+			.msg_iovlen     = niov,
			
 
				+			.msg_control    = NULL,
			
 
				+			.msg_controllen = 0,
			
 
				+			.msg_flags      = MSG_DONTWAIT
			
 
				+		};
			
 
				+		mm_segment_t oldmm = get_fs();
			
 
				+		int  i;
			
 
				+
			
 
				+		for (nob = i = 0; i < niov; i++) {
			
 
				+			scratchiov[i] = tx->tx_iov[i];
			
 
				+			nob += scratchiov[i].iov_len;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				+		    nob < tx->tx_resid)
			
 
				+			msg.msg_flags |= MSG_MORE;
			
 
				+
			
 
				+		set_fs (KERNEL_DS);
			
 
				+		rc = sock_sendmsg(sock, &msg, nob);
			
 
				+		set_fs (oldmm);
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
			
 
				+{
			
 
				+	struct socket *sock = conn->ksnc_sock;
			
 
				+	lnet_kiov_t   *kiov = tx->tx_kiov;
			
 
				+	int	    rc;
			
 
				+	int	    nob;
			
 
				+
			
 
				+	/* Not NOOP message */
			
 
				+	LASSERT (tx->tx_lnetmsg != NULL);
			
 
				+
			
 
				+	/* NB we can't trust socket ops to either consume our iovs
			
 
				+	 * or leave them alone. */
			
 
				+	if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
			
 
				+		/* Zero copy is enabled */
			
 
				+		struct sock   *sk = sock->sk;
			
 
				+		struct page   *page = kiov->kiov_page;
			
 
				+		int	    offset = kiov->kiov_offset;
			
 
				+		int	    fragsize = kiov->kiov_len;
			
 
				+		int	    msgflg = MSG_DONTWAIT;
			
 
				+
			
 
				+		CDEBUG(D_NET, "page %p + offset %x for %d\n",
			
 
				+			       page, offset, kiov->kiov_len);
			
 
				+
			
 
				+		if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				+		    fragsize < tx->tx_resid)
			
 
				+			msgflg |= MSG_MORE;
			
 
				+
			
 
				+		if (sk->sk_prot->sendpage != NULL) {
			
 
				+			rc = sk->sk_prot->sendpage(sk, page,
			
 
				+						   offset, fragsize, msgflg);
			
 
				+		} else {
			
 
				+			rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
			
 
				+					      msgflg);
			
 
				+		}
			
 
				+	} else {
			
 
				+#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
			
 
				+		struct iovec  scratch;
			
 
				+		struct iovec *scratchiov = &scratch;
			
 
				+		unsigned int  niov = 1;
			
 
				+#else
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				+#warning "XXX risk of kmap deadlock on multiple frags..."
			
 
				+#endif
			
 
				+		struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
			
 
				+		unsigned int  niov = tx->tx_nkiov;
			
 
				+#endif
			
 
				+		struct msghdr msg = {
			
 
				+			.msg_name       = NULL,
			
 
				+			.msg_namelen    = 0,
			
 
				+			.msg_iov	= scratchiov,
			
 
				+			.msg_iovlen     = niov,
			
 
				+			.msg_control    = NULL,
			
 
				+			.msg_controllen = 0,
			
 
				+			.msg_flags      = MSG_DONTWAIT
			
 
				+		};
			
 
				+		mm_segment_t  oldmm = get_fs();
			
 
				+		int	   i;
			
 
				+
			
 
				+		for (nob = i = 0; i < niov; i++) {
			
 
				+			scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
			
 
				+						 kiov[i].kiov_offset;
			
 
				+			nob += scratchiov[i].iov_len = kiov[i].kiov_len;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&conn->ksnc_tx_queue) ||
			
 
				+		    nob < tx->tx_resid)
			
 
				+			msg.msg_flags |= MSG_MORE;
			
 
				+
			
 
				+		set_fs (KERNEL_DS);
			
 
				+		rc = sock_sendmsg(sock, &msg, nob);
			
 
				+		set_fs (oldmm);
			
 
				+
			
 
				+		for (i = 0; i < niov; i++)
			
 
				+			kunmap(kiov[i].kiov_page);
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_eager_ack (ksock_conn_t *conn)
			
 
				+{
			
 
				+	int	    opt = 1;
			
 
				+	mm_segment_t   oldmm = get_fs();
			
 
				+	struct socket *sock = conn->ksnc_sock;
			
 
				+
			
 
				+	/* Remind the socket to ACK eagerly.  If I don't, the socket might
			
 
				+	 * think I'm about to send something it could piggy-back the ACK
			
 
				+	 * on, introducing delay in completing zero-copy sends in my
			
 
				+	 * peer. */
			
 
				+
			
 
				+	set_fs(KERNEL_DS);
			
 
				+	sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
			
 
				+			       (char *)&opt, sizeof (opt));
			
 
				+	set_fs(oldmm);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_recv_iov (ksock_conn_t *conn)
			
 
				+{
			
 
				+#if SOCKNAL_SINGLE_FRAG_RX
			
 
				+	struct iovec  scratch;
			
 
				+	struct iovec *scratchiov = &scratch;
			
 
				+	unsigned int  niov = 1;
			
 
				+#else
			
 
				+	struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
			
 
				+	unsigned int  niov = conn->ksnc_rx_niov;
			
 
				+#endif
			
 
				+	struct iovec *iov = conn->ksnc_rx_iov;
			
 
				+	struct msghdr msg = {
			
 
				+		.msg_name       = NULL,
			
 
				+		.msg_namelen    = 0,
			
 
				+		.msg_iov	= scratchiov,
			
 
				+		.msg_iovlen     = niov,
			
 
				+		.msg_control    = NULL,
			
 
				+		.msg_controllen = 0,
			
 
				+		.msg_flags      = 0
			
 
				+	};
			
 
				+	mm_segment_t oldmm = get_fs();
			
 
				+	int	  nob;
			
 
				+	int	  i;
			
 
				+	int	  rc;
			
 
				+	int	  fragnob;
			
 
				+	int	  sum;
			
 
				+	__u32	saved_csum;
			
 
				+
			
 
				+	/* NB we can't trust socket ops to either consume our iovs
			
 
				+	 * or leave them alone. */
			
 
				+	LASSERT (niov > 0);
			
 
				+
			
 
				+	for (nob = i = 0; i < niov; i++) {
			
 
				+		scratchiov[i] = iov[i];
			
 
				+		nob += scratchiov[i].iov_len;
			
 
				+	}
			
 
				+	LASSERT (nob <= conn->ksnc_rx_nob_wanted);
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
			
 
				+	/* NB this is just a boolean..........................^ */
			
 
				+	set_fs (oldmm);
			
 
				+
			
 
				+	saved_csum = 0;
			
 
				+	if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
			
 
				+		saved_csum = conn->ksnc_msg.ksm_csum;
			
 
				+		conn->ksnc_msg.ksm_csum = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (saved_csum != 0) {
			
 
				+		/* accumulate checksum */
			
 
				+		for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
			
 
				+			LASSERT (i < niov);
			
 
				+
			
 
				+			fragnob = iov[i].iov_len;
			
 
				+			if (fragnob > sum)
			
 
				+				fragnob = sum;
			
 
				+
			
 
				+			conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
			
 
				+							   iov[i].iov_base, fragnob);
			
 
				+		}
			
 
				+		conn->ksnc_msg.ksm_csum = saved_csum;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_lib_kiov_vunmap(void *addr)
			
 
				+{
			
 
				+	if (addr == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	vunmap(addr);
			
 
				+}
			
 
				+
			
 
				+static void *
			
 
				+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
			
 
				+		       struct iovec *iov, struct page **pages)
			
 
				+{
			
 
				+	void	     *addr;
			
 
				+	int	       nob;
			
 
				+	int	       i;
			
 
				+
			
 
				+	if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	LASSERT (niov <= LNET_MAX_IOV);
			
 
				+
			
 
				+	if (niov < 2 ||
			
 
				+	    niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
			
 
				+		return NULL;
			
 
				+
			
 
				+	for (nob = i = 0; i < niov; i++) {
			
 
				+		if ((kiov[i].kiov_offset != 0 && i > 0) ||
			
 
				+		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1))
			
 
				+			return NULL;
			
 
				+
			
 
				+		pages[i] = kiov[i].kiov_page;
			
 
				+		nob += kiov[i].kiov_len;
			
 
				+	}
			
 
				+
			
 
				+	addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
			
 
				+	if (addr == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	iov->iov_base = addr + kiov[0].kiov_offset;
			
 
				+	iov->iov_len = nob;
			
 
				+
			
 
				+	return addr;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_recv_kiov (ksock_conn_t *conn)
			
 
				+{
			
 
				+#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
			
 
				+	struct iovec   scratch;
			
 
				+	struct iovec  *scratchiov = &scratch;
			
 
				+	struct page  **pages      = NULL;
			
 
				+	unsigned int   niov       = 1;
			
 
				+#else
			
 
				+#ifdef CONFIG_HIGHMEM
			
 
				+#warning "XXX risk of kmap deadlock on multiple frags..."
			
 
				+#endif
			
 
				+	struct iovec  *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
			
 
				+	struct page  **pages      = conn->ksnc_scheduler->kss_rx_scratch_pgs;
			
 
				+	unsigned int   niov       = conn->ksnc_rx_nkiov;
			
 
				+#endif
			
 
				+	lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
			
 
				+	struct msghdr msg = {
			
 
				+		.msg_name       = NULL,
			
 
				+		.msg_namelen    = 0,
			
 
				+		.msg_iov	= scratchiov,
			
 
				+		.msg_control    = NULL,
			
 
				+		.msg_controllen = 0,
			
 
				+		.msg_flags      = 0
			
 
				+	};
			
 
				+	mm_segment_t oldmm = get_fs();
			
 
				+	int	  nob;
			
 
				+	int	  i;
			
 
				+	int	  rc;
			
 
				+	void	*base;
			
 
				+	void	*addr;
			
 
				+	int	  sum;
			
 
				+	int	  fragnob;
			
 
				+
			
 
				+	/* NB we can't trust socket ops to either consume our iovs
			
 
				+	 * or leave them alone. */
			
 
				+	if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
			
 
				+		nob = scratchiov[0].iov_len;
			
 
				+		msg.msg_iovlen = 1;
			
 
				+
			
 
				+	} else {
			
 
				+		for (nob = i = 0; i < niov; i++) {
			
 
				+			nob += scratchiov[i].iov_len = kiov[i].kiov_len;
			
 
				+			scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
			
 
				+						 kiov[i].kiov_offset;
			
 
				+		}
			
 
				+		msg.msg_iovlen = niov;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (nob <= conn->ksnc_rx_nob_wanted);
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
			
 
				+	/* NB this is just a boolean.......................^ */
			
 
				+	set_fs (oldmm);
			
 
				+
			
 
				+	if (conn->ksnc_msg.ksm_csum != 0) {
			
 
				+		for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
			
 
				+			LASSERT (i < niov);
			
 
				+
			
 
				+			/* Dang! have to kmap again because I have nowhere to stash the
			
 
				+			 * mapped address.  But by doing it while the page is still
			
 
				+			 * mapped, the kernel just bumps the map count and returns me
			
 
				+			 * the address it stashed. */
			
 
				+			base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
			
 
				+			fragnob = kiov[i].kiov_len;
			
 
				+			if (fragnob > sum)
			
 
				+				fragnob = sum;
			
 
				+
			
 
				+			conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
			
 
				+							   base, fragnob);
			
 
				+
			
 
				+			kunmap(kiov[i].kiov_page);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (addr != NULL) {
			
 
				+		ksocknal_lib_kiov_vunmap(addr);
			
 
				+	} else {
			
 
				+		for (i = 0; i < niov; i++)
			
 
				+			kunmap(kiov[i].kiov_page);
			
 
				+	}
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_csum_tx(ksock_tx_t *tx)
			
 
				+{
			
 
				+	int	  i;
			
 
				+	__u32	csum;
			
 
				+	void	*base;
			
 
				+
			
 
				+	LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
			
 
				+	LASSERT(tx->tx_conn != NULL);
			
 
				+	LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
			
 
				+
			
 
				+	tx->tx_msg.ksm_csum = 0;
			
 
				+
			
 
				+	csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
			
 
				+			     tx->tx_iov[0].iov_len);
			
 
				+
			
 
				+	if (tx->tx_kiov != NULL) {
			
 
				+		for (i = 0; i < tx->tx_nkiov; i++) {
			
 
				+			base = kmap(tx->tx_kiov[i].kiov_page) +
			
 
				+			       tx->tx_kiov[i].kiov_offset;
			
 
				+
			
 
				+			csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
			
 
				+
			
 
				+			kunmap(tx->tx_kiov[i].kiov_page);
			
 
				+		}
			
 
				+	} else {
			
 
				+		for (i = 1; i < tx->tx_niov; i++)
			
 
				+			csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
			
 
				+					     tx->tx_iov[i].iov_len);
			
 
				+	}
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_inject_csum_error) {
			
 
				+		csum++;
			
 
				+		*ksocknal_tunables.ksnd_inject_csum_error = 0;
			
 
				+	}
			
 
				+
			
 
				+	tx->tx_msg.ksm_csum = csum;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
			
 
				+{
			
 
				+	mm_segment_t   oldmm = get_fs ();
			
 
				+	struct socket *sock = conn->ksnc_sock;
			
 
				+	int	    len;
			
 
				+	int	    rc;
			
 
				+
			
 
				+	rc = ksocknal_connsock_addref(conn);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (conn->ksnc_closing);
			
 
				+		*txmem = *rxmem = *nagle = 0;
			
 
				+		return (-ESHUTDOWN);
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_getbuf(sock, txmem, rxmem);
			
 
				+	if (rc == 0) {
			
 
				+		len = sizeof(*nagle);
			
 
				+		set_fs(KERNEL_DS);
			
 
				+		rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
			
 
				+					   (char *)nagle, &len);
			
 
				+		set_fs(oldmm);
			
 
				+	}
			
 
				+
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		*nagle = !*nagle;
			
 
				+	else
			
 
				+		*txmem = *rxmem = *nagle = 0;
			
 
				+
			
 
				+	return (rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_setup_sock (struct socket *sock)
			
 
				+{
			
 
				+	mm_segment_t    oldmm = get_fs ();
			
 
				+	int	     rc;
			
 
				+	int	     option;
			
 
				+	int	     keep_idle;
			
 
				+	int	     keep_intvl;
			
 
				+	int	     keep_count;
			
 
				+	int	     do_keepalive;
			
 
				+	struct linger   linger;
			
 
				+
			
 
				+	sock->sk->sk_allocation = GFP_NOFS;
			
 
				+
			
 
				+	/* Ensure this socket aborts active sends immediately when we close
			
 
				+	 * it. */
			
 
				+
			
 
				+	linger.l_onoff = 0;
			
 
				+	linger.l_linger = 0;
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
			
 
				+			      (char *)&linger, sizeof (linger));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set SO_LINGER: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	option = -1;
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
			
 
				+				    (char *)&option, sizeof (option));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set SO_LINGER2: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	if (!*ksocknal_tunables.ksnd_nagle) {
			
 
				+		option = 1;
			
 
				+
			
 
				+		set_fs (KERNEL_DS);
			
 
				+		rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
			
 
				+					    (char *)&option, sizeof (option));
			
 
				+		set_fs (oldmm);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR ("Can't disable nagle: %d\n", rc);
			
 
				+			return (rc);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_setbuf(sock,
			
 
				+				*ksocknal_tunables.ksnd_tx_buffer_size,
			
 
				+				*ksocknal_tunables.ksnd_rx_buffer_size);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
			
 
				+			*ksocknal_tunables.ksnd_tx_buffer_size,
			
 
				+			*ksocknal_tunables.ksnd_rx_buffer_size, rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
			
 
				+
			
 
				+	/* snapshot tunables */
			
 
				+	keep_idle  = *ksocknal_tunables.ksnd_keepalive_idle;
			
 
				+	keep_count = *ksocknal_tunables.ksnd_keepalive_count;
			
 
				+	keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
			
 
				+
			
 
				+	do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
			
 
				+
			
 
				+	option = (do_keepalive ? 1 : 0);
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
			
 
				+			      (char *)&option, sizeof (option));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	if (!do_keepalive)
			
 
				+		return (0);
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
			
 
				+				    (char *)&keep_idle, sizeof (keep_idle));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
			
 
				+				    (char *)&keep_intvl, sizeof (keep_intvl));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	set_fs (KERNEL_DS);
			
 
				+	rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
			
 
				+				    (char *)&keep_count, sizeof (keep_count));
			
 
				+	set_fs (oldmm);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
			
 
				+		return (rc);
			
 
				+	}
			
 
				+
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_push_conn (ksock_conn_t *conn)
			
 
				+{
			
 
				+	struct sock    *sk;
			
 
				+	struct tcp_sock *tp;
			
 
				+	int	     nonagle;
			
 
				+	int	     val = 1;
			
 
				+	int	     rc;
			
 
				+	mm_segment_t    oldmm;
			
 
				+
			
 
				+	rc = ksocknal_connsock_addref(conn);
			
 
				+	if (rc != 0)			    /* being shut down */
			
 
				+		return;
			
 
				+
			
 
				+	sk = conn->ksnc_sock->sk;
			
 
				+	tp = tcp_sk(sk);
			
 
				+
			
 
				+	lock_sock (sk);
			
 
				+	nonagle = tp->nonagle;
			
 
				+	tp->nonagle = 1;
			
 
				+	release_sock (sk);
			
 
				+
			
 
				+	oldmm = get_fs ();
			
 
				+	set_fs (KERNEL_DS);
			
 
				+
			
 
				+	rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
			
 
				+				      (char *)&val, sizeof (val));
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	set_fs (oldmm);
			
 
				+
			
 
				+	lock_sock (sk);
			
 
				+	tp->nonagle = nonagle;
			
 
				+	release_sock (sk);
			
 
				+
			
 
				+	ksocknal_connsock_decref(conn);
			
 
				+}
			
 
				+
			
 
				+extern void ksocknal_read_callback (ksock_conn_t *conn);
			
 
				+extern void ksocknal_write_callback (ksock_conn_t *conn);
			
 
				+/*
			
 
				+ * socket call back in Linux
			
 
				+ */
			
 
				+static void
			
 
				+ksocknal_data_ready (struct sock *sk, int n)
			
 
				+{
			
 
				+	ksock_conn_t  *conn;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	/* interleave correctly with closing sockets... */
			
 
				+	LASSERT(!in_irq());
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	conn = sk->sk_user_data;
			
 
				+	if (conn == NULL) {	     /* raced with ksocknal_terminate_conn */
			
 
				+		LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
			
 
				+		sk->sk_data_ready (sk, n);
			
 
				+	} else
			
 
				+		ksocknal_read_callback(conn);
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	EXIT;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_write_space (struct sock *sk)
			
 
				+{
			
 
				+	ksock_conn_t  *conn;
			
 
				+	int	    wspace;
			
 
				+	int	    min_wpace;
			
 
				+
			
 
				+	/* interleave correctly with closing sockets... */
			
 
				+	LASSERT(!in_irq());
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	conn = sk->sk_user_data;
			
 
				+	wspace = SOCKNAL_WSPACE(sk);
			
 
				+	min_wpace = SOCKNAL_MIN_WSPACE(sk);
			
 
				+
			
 
				+	CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
			
 
				+	       sk, wspace, min_wpace, conn,
			
 
				+	       (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
			
 
				+				      " ready" : " blocked"),
			
 
				+	       (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
			
 
				+				      " scheduled" : " idle"),
			
 
				+	       (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
			
 
				+				      " empty" : " queued"));
			
 
				+
			
 
				+	if (conn == NULL) {	     /* raced with ksocknal_terminate_conn */
			
 
				+		LASSERT (sk->sk_write_space != &ksocknal_write_space);
			
 
				+		sk->sk_write_space (sk);
			
 
				+
			
 
				+		read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (wspace >= min_wpace) {	      /* got enough space */
			
 
				+		ksocknal_write_callback(conn);
			
 
				+
			
 
				+		/* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
			
 
				+		 * ENOMEM check in ksocknal_transmit is race-free (think about
			
 
				+		 * it). */
			
 
				+
			
 
				+		clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
			
 
				+{
			
 
				+	conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
			
 
				+	conn->ksnc_saved_write_space = sock->sk->sk_write_space;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn)
			
 
				+{
			
 
				+	sock->sk->sk_user_data = conn;
			
 
				+	sock->sk->sk_data_ready = ksocknal_data_ready;
			
 
				+	sock->sk->sk_write_space = ksocknal_write_space;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
			
 
				+{
			
 
				+	/* Remove conn's network callbacks.
			
 
				+	 * NB I _have_ to restore the callback, rather than storing a noop,
			
 
				+	 * since the socket could survive past this module being unloaded!! */
			
 
				+	sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
			
 
				+	sock->sk->sk_write_space = conn->ksnc_saved_write_space;
			
 
				+
			
 
				+	/* A callback could be in progress already; they hold a read lock
			
 
				+	 * on ksnd_global_lock (to serialise with me) and NOOP if
			
 
				+	 * sk_user_data is NULL. */
			
 
				+	sock->sk->sk_user_data = NULL;
			
 
				+
			
 
				+	return ;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
			
 
				+{
			
 
				+	int	    rc = 0;
			
 
				+	ksock_sched_t *sched;
			
 
				+
			
 
				+	sched = conn->ksnc_scheduler;
			
 
				+	spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
			
 
				+	    !conn->ksnc_tx_ready) {
			
 
				+		/* SOCK_NOSPACE is set when the socket fills
			
 
				+		 * and cleared in the write_space callback
			
 
				+		 * (which also sets ksnc_tx_ready).  If
			
 
				+		 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
			
 
				+		 * zero, I didn't fill the socket and
			
 
				+		 * write_space won't reschedule me, so I
			
 
				+		 * return -ENOMEM to get my caller to retry
			
 
				+		 * after a timeout */
			
 
				+		rc = -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h
@@ -0,0 +1,91 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_PORTAL_ALLOC
			
 
				+
			
 
				+#ifndef __LINUX_SOCKNAL_LIB_H__
			
 
				+#define __LINUX_SOCKNAL_LIB_H__
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/version.h>
			
 
				+#include <linux/mm.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/unistd.h>
			
 
				+#include <net/sock.h>
			
 
				+#include <net/tcp.h>
			
 
				+#include <linux/uio.h>
			
 
				+#include <linux/if.h>
			
 
				+
			
 
				+#include <asm/uaccess.h>
			
 
				+#include <asm/irq.h>
			
 
				+
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/stat.h>
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/kmod.h>
			
 
				+#include <linux/sysctl.h>
			
 
				+#include <asm/uaccess.h>
			
 
				+#include <asm/div64.h>
			
 
				+#include <linux/syscalls.h>
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/libcfs/linux/portals_compat25.h>
			
 
				+
			
 
				+#include <linux/crc32.h>
			
 
				+static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len)
			
 
				+{
			
 
				+#if 1
			
 
				+	return crc32_le(crc, p, len);
			
 
				+#else
			
 
				+	while (len-- > 0)
			
 
				+		crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
			
 
				+	return crc;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#define SOCKNAL_WSPACE(sk)       sk_stream_wspace(sk)
			
 
				+#define SOCKNAL_MIN_WSPACE(sk)   sk_stream_min_wspace(sk)
			
 
				+
			
 
				+/* assume one thread for each connection type */
			
 
				+#define SOCKNAL_NSCHEDS		3
			
 
				+#define SOCKNAL_NSCHEDS_HIGH	(SOCKNAL_NSCHEDS << 1)
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
@@ -0,0 +1,198 @@
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ */
			
 
				+
			
 
				+#include "socklnd.h"
			
 
				+
			
 
				+static int sock_timeout = 50;
			
 
				+CFS_MODULE_PARM(sock_timeout, "i", int, 0644,
			
 
				+		"dead socket timeout (seconds)");
			
 
				+
			
 
				+static int credits = 256;
			
 
				+CFS_MODULE_PARM(credits, "i", int, 0444,
			
 
				+		"# concurrent sends");
			
 
				+
			
 
				+static int peer_credits = 8;
			
 
				+CFS_MODULE_PARM(peer_credits, "i", int, 0444,
			
 
				+		"# concurrent sends to 1 peer");
			
 
				+
			
 
				+static int peer_buffer_credits = 0;
			
 
				+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
			
 
				+		"# per-peer router buffer credits");
			
 
				+
			
 
				+static int peer_timeout = 180;
			
 
				+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
			
 
				+		"Seconds without aliveness news to declare peer dead (<=0 to disable)");
			
 
				+
			
 
				+/* Number of daemons in each thread pool which is percpt,
			
 
				+ * we will estimate reasonable value based on CPUs if it's not set. */
			
 
				+static unsigned int nscheds;
			
 
				+CFS_MODULE_PARM(nscheds, "i", int, 0444,
			
 
				+		"# scheduler daemons in each pool while starting");
			
 
				+
			
 
				+static int nconnds = 4;
			
 
				+CFS_MODULE_PARM(nconnds, "i", int, 0444,
			
 
				+		"# connection daemons while starting");
			
 
				+
			
 
				+static int nconnds_max = 64;
			
 
				+CFS_MODULE_PARM(nconnds_max, "i", int, 0444,
			
 
				+		"max # connection daemons");
			
 
				+
			
 
				+static int min_reconnectms = 1000;
			
 
				+CFS_MODULE_PARM(min_reconnectms, "i", int, 0644,
			
 
				+		"min connection retry interval (mS)");
			
 
				+
			
 
				+static int max_reconnectms = 60000;
			
 
				+CFS_MODULE_PARM(max_reconnectms, "i", int, 0644,
			
 
				+		"max connection retry interval (mS)");
			
 
				+
			
 
				+# define DEFAULT_EAGER_ACK 0
			
 
				+static int eager_ack = DEFAULT_EAGER_ACK;
			
 
				+CFS_MODULE_PARM(eager_ack, "i", int, 0644,
			
 
				+		"send tcp ack packets eagerly");
			
 
				+
			
 
				+static int typed_conns = 1;
			
 
				+CFS_MODULE_PARM(typed_conns, "i", int, 0444,
			
 
				+		"use different sockets for bulk");
			
 
				+
			
 
				+static int min_bulk = (1<<10);
			
 
				+CFS_MODULE_PARM(min_bulk, "i", int, 0644,
			
 
				+		"smallest 'large' message");
			
 
				+
			
 
				+# define DEFAULT_BUFFER_SIZE 0
			
 
				+static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
			
 
				+CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644,
			
 
				+		"socket tx buffer size (0 for system default)");
			
 
				+
			
 
				+static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
			
 
				+CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644,
			
 
				+		"socket rx buffer size (0 for system default)");
			
 
				+
			
 
				+static int nagle = 0;
			
 
				+CFS_MODULE_PARM(nagle, "i", int, 0644,
			
 
				+		"enable NAGLE?");
			
 
				+
			
 
				+static int round_robin = 1;
			
 
				+CFS_MODULE_PARM(round_robin, "i", int, 0644,
			
 
				+		"Round robin for multiple interfaces");
			
 
				+
			
 
				+static int keepalive = 30;
			
 
				+CFS_MODULE_PARM(keepalive, "i", int, 0644,
			
 
				+		"# seconds before send keepalive");
			
 
				+
			
 
				+static int keepalive_idle = 30;
			
 
				+CFS_MODULE_PARM(keepalive_idle, "i", int, 0644,
			
 
				+		"# idle seconds before probe");
			
 
				+
			
 
				+#define DEFAULT_KEEPALIVE_COUNT  5
			
 
				+static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
			
 
				+CFS_MODULE_PARM(keepalive_count, "i", int, 0644,
			
 
				+		"# missed probes == dead");
			
 
				+
			
 
				+static int keepalive_intvl = 5;
			
 
				+CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
			
 
				+		"seconds between probes");
			
 
				+
			
 
				+static int enable_csum = 0;
			
 
				+CFS_MODULE_PARM(enable_csum, "i", int, 0644,
			
 
				+		"enable check sum");
			
 
				+
			
 
				+static int inject_csum_error = 0;
			
 
				+CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
			
 
				+		"set non-zero to inject a checksum error");
			
 
				+
			
 
				+static int nonblk_zcack = 1;
			
 
				+CFS_MODULE_PARM(nonblk_zcack, "i", int, 0644,
			
 
				+		"always send ZC-ACK on non-blocking connection");
			
 
				+
			
 
				+static unsigned int zc_min_payload = (16 << 10);
			
 
				+CFS_MODULE_PARM(zc_min_payload, "i", int, 0644,
			
 
				+		"minimum payload size to zero copy");
			
 
				+
			
 
				+static unsigned int zc_recv = 0;
			
 
				+CFS_MODULE_PARM(zc_recv, "i", int, 0644,
			
 
				+		"enable ZC recv for Chelsio driver");
			
 
				+
			
 
				+static unsigned int zc_recv_min_nfrags = 16;
			
 
				+CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644,
			
 
				+		"minimum # of fragments to enable ZC recv");
			
 
				+
			
 
				+
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+static int protocol = 3;
			
 
				+CFS_MODULE_PARM(protocol, "i", int, 0644,
			
 
				+		"protocol version");
			
 
				+#endif
			
 
				+
			
 
				+ksock_tunables_t ksocknal_tunables;
			
 
				+
			
 
				+int ksocknal_tunables_init(void)
			
 
				+{
			
 
				+
			
 
				+	/* initialize ksocknal_tunables structure */
			
 
				+	ksocknal_tunables.ksnd_timeout	    = &sock_timeout;
			
 
				+	ksocknal_tunables.ksnd_nscheds		  = &nscheds;
			
 
				+	ksocknal_tunables.ksnd_nconnds	    = &nconnds;
			
 
				+	ksocknal_tunables.ksnd_nconnds_max	= &nconnds_max;
			
 
				+	ksocknal_tunables.ksnd_min_reconnectms    = &min_reconnectms;
			
 
				+	ksocknal_tunables.ksnd_max_reconnectms    = &max_reconnectms;
			
 
				+	ksocknal_tunables.ksnd_eager_ack	  = &eager_ack;
			
 
				+	ksocknal_tunables.ksnd_typed_conns	= &typed_conns;
			
 
				+	ksocknal_tunables.ksnd_min_bulk	   = &min_bulk;
			
 
				+	ksocknal_tunables.ksnd_tx_buffer_size     = &tx_buffer_size;
			
 
				+	ksocknal_tunables.ksnd_rx_buffer_size     = &rx_buffer_size;
			
 
				+	ksocknal_tunables.ksnd_nagle	      = &nagle;
			
 
				+	ksocknal_tunables.ksnd_round_robin	= &round_robin;
			
 
				+	ksocknal_tunables.ksnd_keepalive	  = &keepalive;
			
 
				+	ksocknal_tunables.ksnd_keepalive_idle     = &keepalive_idle;
			
 
				+	ksocknal_tunables.ksnd_keepalive_count    = &keepalive_count;
			
 
				+	ksocknal_tunables.ksnd_keepalive_intvl    = &keepalive_intvl;
			
 
				+	ksocknal_tunables.ksnd_credits	    = &credits;
			
 
				+	ksocknal_tunables.ksnd_peertxcredits      = &peer_credits;
			
 
				+	ksocknal_tunables.ksnd_peerrtrcredits     = &peer_buffer_credits;
			
 
				+	ksocknal_tunables.ksnd_peertimeout	= &peer_timeout;
			
 
				+	ksocknal_tunables.ksnd_enable_csum	= &enable_csum;
			
 
				+	ksocknal_tunables.ksnd_inject_csum_error  = &inject_csum_error;
			
 
				+	ksocknal_tunables.ksnd_nonblk_zcack       = &nonblk_zcack;
			
 
				+	ksocknal_tunables.ksnd_zc_min_payload     = &zc_min_payload;
			
 
				+	ksocknal_tunables.ksnd_zc_recv	    = &zc_recv;
			
 
				+	ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
			
 
				+
			
 
				+
			
 
				+
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+	ksocknal_tunables.ksnd_protocol	   = &protocol;
			
 
				+#endif
			
 
				+
			
 
				+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
			
 
				+	ksocknal_tunables.ksnd_sysctl	     =  NULL;
			
 
				+#endif
			
 
				+
			
 
				+	if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
			
 
				+		*ksocknal_tunables.ksnd_zc_min_payload = (2 << 10);
			
 
				+
			
 
				+	/* initialize platform-sepcific tunables */
			
 
				+	return ksocknal_lib_tunables_init();
			
 
				+};
			
 
				+
			
 
				+void ksocknal_tunables_fini(void)
			
 
				+{
			
 
				+	ksocknal_lib_tunables_fini();
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
@@ -0,0 +1,797 @@
 
				+/*
			
 
				+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   Author: Zach Brown <zab@zabbo.net>
			
 
				+ *   Author: Peter J. Braam <braam@clusterfs.com>
			
 
				+ *   Author: Phil Schwan <phil@clusterfs.com>
			
 
				+ *   Author: Eric Barton <eric@bartonsoftware.com>
			
 
				+ *
			
 
				+ *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ */
			
 
				+
			
 
				+#include "socklnd.h"
			
 
				+
			
 
				+/*
			
 
				+ * Protocol entries :
			
 
				+ *   pro_send_hello       : send hello message
			
 
				+ *   pro_recv_hello       : receive hello message
			
 
				+ *   pro_pack	     : pack message header
			
 
				+ *   pro_unpack	   : unpack message header
			
 
				+ *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
			
 
				+ *			  return 1 if ACK is piggybacked, otherwise return 0
			
 
				+ *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
			
 
				+ *			  return the ACK that piggybacked by my message, or NULL
			
 
				+ *   pro_handle_zcreq()   : handler of incoming ZC-REQ
			
 
				+ *   pro_handle_zcack()   : handler of incoming ZC-ACK
			
 
				+ *   pro_match_tx()       : Called holding glock
			
 
				+ */
			
 
				+
			
 
				+static ksock_tx_t *
			
 
				+ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
			
 
				+{
			
 
				+	/* V1.x, just enqueue it */
			
 
				+	list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+ksocknal_next_tx_carrier(ksock_conn_t *conn)
			
 
				+{
			
 
				+	ksock_tx_t     *tx = conn->ksnc_tx_carrier;
			
 
				+
			
 
				+	/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
			
 
				+	LASSERT (!list_empty(&conn->ksnc_tx_queue));
			
 
				+	LASSERT (tx != NULL);
			
 
				+
			
 
				+	/* Next TX that can carry ZC-ACK or LNet message */
			
 
				+	if (tx->tx_list.next == &conn->ksnc_tx_queue) {
			
 
				+		/* no more packets queued */
			
 
				+		conn->ksnc_tx_carrier = NULL;
			
 
				+	} else {
			
 
				+		conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
			
 
				+						       ksock_tx_t, tx_list);
			
 
				+		LASSERT (conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
			
 
				+			   ksock_tx_t *tx_ack, __u64 cookie)
			
 
				+{
			
 
				+	ksock_tx_t *tx = conn->ksnc_tx_carrier;
			
 
				+
			
 
				+	LASSERT (tx_ack == NULL ||
			
 
				+		 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				+
			
 
				+	/*
			
 
				+	 * Enqueue or piggyback tx_ack / cookie
			
 
				+	 * . no tx can piggyback cookie of tx_ack (or cookie), just
			
 
				+	 *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
			
 
				+	 * . There is tx can piggyback cookie of tx_ack (or cookie),
			
 
				+	 *   piggyback the cookie and return the tx.
			
 
				+	 */
			
 
				+	if (tx == NULL) {
			
 
				+		if (tx_ack != NULL) {
			
 
				+			list_add_tail(&tx_ack->tx_list,
			
 
				+					  &conn->ksnc_tx_queue);
			
 
				+			conn->ksnc_tx_carrier = tx_ack;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
			
 
				+		/* tx is noop zc-ack, can't piggyback zc-ack cookie */
			
 
				+		if (tx_ack != NULL)
			
 
				+			list_add_tail(&tx_ack->tx_list,
			
 
				+					  &conn->ksnc_tx_queue);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
			
 
				+	LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
			
 
				+
			
 
				+	if (tx_ack != NULL)
			
 
				+		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
			
 
				+
			
 
				+	/* piggyback the zc-ack cookie */
			
 
				+	tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				+	/* move on to the next TX which can carry cookie */
			
 
				+	ksocknal_next_tx_carrier(conn);
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static ksock_tx_t *
			
 
				+ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
			
 
				+{
			
 
				+	ksock_tx_t  *tx  = conn->ksnc_tx_carrier;
			
 
				+
			
 
				+	/*
			
 
				+	 * Enqueue tx_msg:
			
 
				+	 * . If there is no NOOP on the connection, just enqueue
			
 
				+	 *   tx_msg and return NULL
			
 
				+	 * . If there is NOOP on the connection, piggyback the cookie
			
 
				+	 *   and replace the NOOP tx, and return the NOOP tx.
			
 
				+	 */
			
 
				+	if (tx == NULL) { /* nothing on queue */
			
 
				+		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				+		conn->ksnc_tx_carrier = tx_msg;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
			
 
				+		list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				+
			
 
				+	/* There is a noop zc-ack can be piggybacked */
			
 
				+	tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
			
 
				+	ksocknal_next_tx_carrier(conn);
			
 
				+
			
 
				+	/* use new_tx to replace the noop zc-ack packet */
			
 
				+	list_add(&tx_msg->tx_list, &tx->tx_list);
			
 
				+	list_del(&tx->tx_list);
			
 
				+
			
 
				+	return tx;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
			
 
				+			   ksock_tx_t *tx_ack, __u64 cookie)
			
 
				+{
			
 
				+	ksock_tx_t *tx;
			
 
				+
			
 
				+	if (conn->ksnc_type != SOCKLND_CONN_ACK)
			
 
				+		return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
			
 
				+
			
 
				+	/* non-blocking ZC-ACK (to router) */
			
 
				+	LASSERT (tx_ack == NULL ||
			
 
				+		 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				+
			
 
				+	if ((tx = conn->ksnc_tx_carrier) == NULL) {
			
 
				+		if (tx_ack != NULL) {
			
 
				+			list_add_tail(&tx_ack->tx_list,
			
 
				+					  &conn->ksnc_tx_queue);
			
 
				+			conn->ksnc_tx_carrier = tx_ack;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* conn->ksnc_tx_carrier != NULL */
			
 
				+
			
 
				+	if (tx_ack != NULL)
			
 
				+		cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
			
 
				+
			
 
				+	if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
			
 
				+		return 1;
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
			
 
				+		/* replace the keepalive PING with a real ACK */
			
 
				+		LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
			
 
				+		tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
			
 
				+	    cookie == tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				+		CWARN("%s: duplicated ZC cookie: "LPU64"\n",
			
 
				+		      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
			
 
				+		return 1; /* XXX return error in the future */
			
 
				+	}
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
			
 
				+		/* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
			
 
				+		if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
			
 
				+			tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
			
 
				+			tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				+		} else {
			
 
				+			tx->tx_msg.ksm_zc_cookies[0] = cookie;
			
 
				+		}
			
 
				+
			
 
				+		if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
			
 
				+			/* not likely to carry more ACKs, skip it to simplify logic */
			
 
				+			ksocknal_next_tx_carrier(conn);
			
 
				+		}
			
 
				+
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	/* takes two or more cookies already */
			
 
				+
			
 
				+	if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				+		__u64   tmp = 0;
			
 
				+
			
 
				+		/* two seperated cookies: (a+2, a) or (a+1, a) */
			
 
				+		LASSERT (tx->tx_msg.ksm_zc_cookies[0] -
			
 
				+			 tx->tx_msg.ksm_zc_cookies[1] <= 2);
			
 
				+
			
 
				+		if (tx->tx_msg.ksm_zc_cookies[0] -
			
 
				+		    tx->tx_msg.ksm_zc_cookies[1] == 2) {
			
 
				+			if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
			
 
				+				tmp = cookie;
			
 
				+		} else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
			
 
				+			tmp = tx->tx_msg.ksm_zc_cookies[1];
			
 
				+		} else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
			
 
				+			tmp = tx->tx_msg.ksm_zc_cookies[0];
			
 
				+		}
			
 
				+
			
 
				+		if (tmp != 0) {
			
 
				+			/* range of cookies */
			
 
				+			tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
			
 
				+			tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+	} else {
			
 
				+		/* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
			
 
				+		if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
			
 
				+		    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
			
 
				+			CWARN("%s: duplicated ZC cookie: "LPU64"\n",
			
 
				+			      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
			
 
				+			return 1; /* XXX: return error in the future */
			
 
				+		}
			
 
				+
			
 
				+		if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
			
 
				+			tx->tx_msg.ksm_zc_cookies[1] = cookie;
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
			
 
				+			tx->tx_msg.ksm_zc_cookies[0] = cookie;
			
 
				+			return 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* failed to piggyback ZC-ACK */
			
 
				+	if (tx_ack != NULL) {
			
 
				+		list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
			
 
				+		/* the next tx can piggyback at least 1 ACK */
			
 
				+		ksocknal_next_tx_carrier(conn);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
			
 
				+{
			
 
				+	int nob;
			
 
				+
			
 
				+#if SOCKNAL_VERSION_DEBUG
			
 
				+	if (!*ksocknal_tunables.ksnd_typed_conns)
			
 
				+		return SOCKNAL_MATCH_YES;
			
 
				+#endif
			
 
				+
			
 
				+	if (tx == NULL || tx->tx_lnetmsg == NULL) {
			
 
				+		/* noop packet */
			
 
				+		nob = offsetof(ksock_msg_t, ksm_u);
			
 
				+	} else {
			
 
				+		nob = tx->tx_lnetmsg->msg_len +
			
 
				+		      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
			
 
				+		       sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
			
 
				+	}
			
 
				+
			
 
				+	/* default checking for typed connection */
			
 
				+	switch (conn->ksnc_type) {
			
 
				+	default:
			
 
				+		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
			
 
				+		LBUG();
			
 
				+	case SOCKLND_CONN_ANY:
			
 
				+		return SOCKNAL_MATCH_YES;
			
 
				+
			
 
				+	case SOCKLND_CONN_BULK_IN:
			
 
				+		return SOCKNAL_MATCH_MAY;
			
 
				+
			
 
				+	case SOCKLND_CONN_BULK_OUT:
			
 
				+		if (nob < *ksocknal_tunables.ksnd_min_bulk)
			
 
				+			return SOCKNAL_MATCH_MAY;
			
 
				+		else
			
 
				+			return SOCKNAL_MATCH_YES;
			
 
				+
			
 
				+	case SOCKLND_CONN_CONTROL:
			
 
				+		if (nob >= *ksocknal_tunables.ksnd_min_bulk)
			
 
				+			return SOCKNAL_MATCH_MAY;
			
 
				+		else
			
 
				+			return SOCKNAL_MATCH_YES;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
			
 
				+{
			
 
				+	int nob;
			
 
				+
			
 
				+	if (tx == NULL || tx->tx_lnetmsg == NULL)
			
 
				+		nob = offsetof(ksock_msg_t, ksm_u);
			
 
				+	else
			
 
				+		nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
			
 
				+
			
 
				+	switch (conn->ksnc_type) {
			
 
				+	default:
			
 
				+		CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
			
 
				+		LBUG();
			
 
				+	case SOCKLND_CONN_ANY:
			
 
				+		return SOCKNAL_MATCH_NO;
			
 
				+
			
 
				+	case SOCKLND_CONN_ACK:
			
 
				+		if (nonblk)
			
 
				+			return SOCKNAL_MATCH_YES;
			
 
				+		else if (tx == NULL || tx->tx_lnetmsg == NULL)
			
 
				+			return SOCKNAL_MATCH_MAY;
			
 
				+		else
			
 
				+			return SOCKNAL_MATCH_NO;
			
 
				+
			
 
				+	case SOCKLND_CONN_BULK_OUT:
			
 
				+		if (nonblk)
			
 
				+			return SOCKNAL_MATCH_NO;
			
 
				+		else if (nob < *ksocknal_tunables.ksnd_min_bulk)
			
 
				+			return SOCKNAL_MATCH_MAY;
			
 
				+		else
			
 
				+			return SOCKNAL_MATCH_YES;
			
 
				+
			
 
				+	case SOCKLND_CONN_CONTROL:
			
 
				+		if (nonblk)
			
 
				+			return SOCKNAL_MATCH_NO;
			
 
				+		else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
			
 
				+			return SOCKNAL_MATCH_MAY;
			
 
				+		else
			
 
				+			return SOCKNAL_MATCH_YES;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* (Sink) handle incoming ZC request from sender */
			
 
				+static int
			
 
				+ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
			
 
				+{
			
 
				+	ksock_peer_t   *peer = c->ksnc_peer;
			
 
				+	ksock_conn_t   *conn;
			
 
				+	ksock_tx_t     *tx;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	read_lock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
			
 
				+	if (conn != NULL) {
			
 
				+		ksock_sched_t *sched = conn->ksnc_scheduler;
			
 
				+
			
 
				+		LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
			
 
				+
			
 
				+		spin_lock_bh(&sched->kss_lock);
			
 
				+
			
 
				+		rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
			
 
				+
			
 
				+		spin_unlock_bh(&sched->kss_lock);
			
 
				+
			
 
				+		if (rc) { /* piggybacked */
			
 
				+			read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	read_unlock(&ksocknal_data.ksnd_global_lock);
			
 
				+
			
 
				+	/* ACK connection is not ready, or can't piggyback the ACK */
			
 
				+	tx = ksocknal_alloc_tx_noop(cookie, !!remote);
			
 
				+	if (tx == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	ksocknal_free_tx(tx);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/* (Sender) handle ZC_ACK from sink */
			
 
				+static int
			
 
				+ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
			
 
				+{
			
 
				+	ksock_peer_t      *peer = conn->ksnc_peer;
			
 
				+	ksock_tx_t	*tx;
			
 
				+	ksock_tx_t	*tmp;
			
 
				+	LIST_HEAD     (zlist);
			
 
				+	int		count;
			
 
				+
			
 
				+	if (cookie1 == 0)
			
 
				+		cookie1 = cookie2;
			
 
				+
			
 
				+	count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
			
 
				+
			
 
				+	if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
			
 
				+	    conn->ksnc_proto == &ksocknal_protocol_v3x) {
			
 
				+		/* keepalive PING for V3.x, just ignore it */
			
 
				+		return count == 1 ? 0 : -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&peer->ksnp_lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(tx, tmp,
			
 
				+				     &peer->ksnp_zc_req_list, tx_zc_list) {
			
 
				+		__u64 c = tx->tx_msg.ksm_zc_cookies[0];
			
 
				+
			
 
				+		if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
			
 
				+			tx->tx_msg.ksm_zc_cookies[0] = 0;
			
 
				+			list_del(&tx->tx_zc_list);
			
 
				+			list_add(&tx->tx_zc_list, &zlist);
			
 
				+
			
 
				+			if (--count == 0)
			
 
				+				break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&peer->ksnp_lock);
			
 
				+
			
 
				+	while (!list_empty(&zlist)) {
			
 
				+		tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
			
 
				+		list_del(&tx->tx_zc_list);
			
 
				+		ksocknal_tx_decref(tx);
			
 
				+	}
			
 
				+
			
 
				+	return count == 0 ? 0 : -EPROTO;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
			
 
				+{
			
 
				+	socket_t	*sock = conn->ksnc_sock;
			
 
				+	lnet_hdr_t	  *hdr;
			
 
				+	lnet_magicversion_t *hmv;
			
 
				+	int		  rc;
			
 
				+	int		  i;
			
 
				+
			
 
				+	CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
			
 
				+
			
 
				+	LIBCFS_ALLOC(hdr, sizeof(*hdr));
			
 
				+	if (hdr == NULL) {
			
 
				+		CERROR("Can't allocate lnet_hdr_t\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	hmv = (lnet_magicversion_t *)&hdr->dest_nid;
			
 
				+
			
 
				+	/* Re-organize V2.x message header to V1.x (lnet_hdr_t)
			
 
				+	 * header and send out */
			
 
				+	hmv->magic	 = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
			
 
				+	hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
			
 
				+	hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
			
 
				+
			
 
				+	if (the_lnet.ln_testprotocompat != 0) {
			
 
				+		/* single-shot proto check */
			
 
				+		LNET_LOCK();
			
 
				+		if ((the_lnet.ln_testprotocompat & 1) != 0) {
			
 
				+			hmv->version_major++;   /* just different! */
			
 
				+			the_lnet.ln_testprotocompat &= ~1;
			
 
				+		}
			
 
				+		if ((the_lnet.ln_testprotocompat & 2) != 0) {
			
 
				+			hmv->magic = LNET_PROTO_MAGIC;
			
 
				+			the_lnet.ln_testprotocompat &= ~2;
			
 
				+		}
			
 
				+		LNET_UNLOCK();
			
 
				+	}
			
 
				+
			
 
				+	hdr->src_nid	= cpu_to_le64 (hello->kshm_src_nid);
			
 
				+	hdr->src_pid	= cpu_to_le32 (hello->kshm_src_pid);
			
 
				+	hdr->type	   = cpu_to_le32 (LNET_MSG_HELLO);
			
 
				+	hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
			
 
				+	hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
			
 
				+	hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
			
 
				+
			
 
				+	rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),lnet_acceptor_timeout());
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_nips == 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	for (i = 0; i < (int) hello->kshm_nips; i++) {
			
 
				+		hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_write(sock, hello->kshm_ips,
			
 
				+			       hello->kshm_nips * sizeof(__u32),
			
 
				+			       lnet_acceptor_timeout());
			
 
				+	if (rc != 0) {
			
 
				+		CNETERR("Error %d sending HELLO payload (%d)"
			
 
				+			" to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
			
 
				+			HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
			
 
				+	}
			
 
				+out:
			
 
				+	LIBCFS_FREE(hdr, sizeof(*hdr));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
			
 
				+{
			
 
				+	socket_t   *sock = conn->ksnc_sock;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	hello->kshm_magic   = LNET_PROTO_MAGIC;
			
 
				+	hello->kshm_version = conn->ksnc_proto->pro_version;
			
 
				+
			
 
				+	if (the_lnet.ln_testprotocompat != 0) {
			
 
				+		/* single-shot proto check */
			
 
				+		LNET_LOCK();
			
 
				+		if ((the_lnet.ln_testprotocompat & 1) != 0) {
			
 
				+			hello->kshm_version++;   /* just different! */
			
 
				+			the_lnet.ln_testprotocompat &= ~1;
			
 
				+		}
			
 
				+		LNET_UNLOCK();
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
			
 
				+			       lnet_acceptor_timeout());
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_nips == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	rc = libcfs_sock_write(sock, hello->kshm_ips,
			
 
				+			       hello->kshm_nips * sizeof(__u32),
			
 
				+			       lnet_acceptor_timeout());
			
 
				+	if (rc != 0) {
			
 
				+		CNETERR("Error %d sending HELLO payload (%d)"
			
 
				+			" to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
			
 
				+			HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
			
 
				+{
			
 
				+	socket_t	*sock = conn->ksnc_sock;
			
 
				+	lnet_hdr_t	  *hdr;
			
 
				+	int		  rc;
			
 
				+	int		  i;
			
 
				+
			
 
				+	LIBCFS_ALLOC(hdr, sizeof(*hdr));
			
 
				+	if (hdr == NULL) {
			
 
				+		CERROR("Can't allocate lnet_hdr_t\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &hdr->src_nid,
			
 
				+			      sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid),
			
 
				+			      timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0 && rc != -EALREADY);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* ...and check we got what we expected */
			
 
				+	if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
			
 
				+		CERROR ("Expecting a HELLO hdr,"
			
 
				+			" but got type %d from %u.%u.%u.%u\n",
			
 
				+			le32_to_cpu (hdr->type),
			
 
				+			HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		rc = -EPROTO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	hello->kshm_src_nid	 = le64_to_cpu (hdr->src_nid);
			
 
				+	hello->kshm_src_pid	 = le32_to_cpu (hdr->src_pid);
			
 
				+	hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation);
			
 
				+	hello->kshm_ctype	   = le32_to_cpu (hdr->msg.hello.type);
			
 
				+	hello->kshm_nips	    = le32_to_cpu (hdr->payload_length) /
			
 
				+					 sizeof (__u32);
			
 
				+
			
 
				+	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
			
 
				+		CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
			
 
				+		       hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		rc = -EPROTO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_nips == 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, hello->kshm_ips,
			
 
				+			      hello->kshm_nips * sizeof(__u32), timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0 && rc != -EALREADY);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < (int) hello->kshm_nips; i++) {
			
 
				+		hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
			
 
				+
			
 
				+		if (hello->kshm_ips[i] == 0) {
			
 
				+			CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
			
 
				+			       i, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+			rc = -EPROTO;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+out:
			
 
				+	LIBCFS_FREE(hdr, sizeof(*hdr));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
			
 
				+{
			
 
				+	socket_t      *sock = conn->ksnc_sock;
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+
			
 
				+	if (hello->kshm_magic == LNET_PROTO_MAGIC)
			
 
				+		conn->ksnc_flip = 0;
			
 
				+	else
			
 
				+		conn->ksnc_flip = 1;
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
			
 
				+			      offsetof(ksock_hello_msg_t, kshm_ips) -
			
 
				+				       offsetof(ksock_hello_msg_t, kshm_src_nid),
			
 
				+			      timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0 && rc != -EALREADY);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (conn->ksnc_flip) {
			
 
				+		__swab32s(&hello->kshm_src_pid);
			
 
				+		__swab64s(&hello->kshm_src_nid);
			
 
				+		__swab32s(&hello->kshm_dst_pid);
			
 
				+		__swab64s(&hello->kshm_dst_nid);
			
 
				+		__swab64s(&hello->kshm_src_incarnation);
			
 
				+		__swab64s(&hello->kshm_dst_incarnation);
			
 
				+		__swab32s(&hello->kshm_ctype);
			
 
				+		__swab32s(&hello->kshm_nips);
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_nips > LNET_MAX_INTERFACES) {
			
 
				+		CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
			
 
				+		       hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (hello->kshm_nips == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, hello->kshm_ips,
			
 
				+			      hello->kshm_nips * sizeof(__u32), timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
			
 
				+			rc, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+		LASSERT (rc < 0 && rc != -EALREADY);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < (int) hello->kshm_nips; i++) {
			
 
				+		if (conn->ksnc_flip)
			
 
				+			__swab32s(&hello->kshm_ips[i]);
			
 
				+
			
 
				+		if (hello->kshm_ips[i] == 0) {
			
 
				+			CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
			
 
				+			       i, HIPQUAD(conn->ksnc_ipaddr));
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_pack_msg_v1(ksock_tx_t *tx)
			
 
				+{
			
 
				+	/* V1.x has no KSOCK_MSG_NOOP */
			
 
				+	LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				+	LASSERT(tx->tx_lnetmsg != NULL);
			
 
				+
			
 
				+	tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr;
			
 
				+	tx->tx_iov[0].iov_len  = sizeof(lnet_hdr_t);
			
 
				+
			
 
				+	tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_pack_msg_v2(ksock_tx_t *tx)
			
 
				+{
			
 
				+	tx->tx_iov[0].iov_base = (void *)&tx->tx_msg;
			
 
				+
			
 
				+	if (tx->tx_lnetmsg != NULL) {
			
 
				+		LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
			
 
				+
			
 
				+		tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
			
 
				+		tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
			
 
				+		tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
			
 
				+	} else {
			
 
				+		LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
			
 
				+
			
 
				+		tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
			
 
				+		tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_hdr);
			
 
				+	}
			
 
				+	/* Don't checksum before start sending, because packet can be piggybacked with ACK */
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_unpack_msg_v1(ksock_msg_t *msg)
			
 
				+{
			
 
				+	msg->ksm_csum	   = 0;
			
 
				+	msg->ksm_type	   = KSOCK_MSG_LNET;
			
 
				+	msg->ksm_zc_cookies[0]  = msg->ksm_zc_cookies[1]  = 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ksocknal_unpack_msg_v2(ksock_msg_t *msg)
			
 
				+{
			
 
				+	return;  /* Do nothing */
			
 
				+}
			
 
				+
			
 
				+ksock_proto_t  ksocknal_protocol_v1x =
			
 
				+{
			
 
				+	.pro_version	    = KSOCK_PROTO_V1,
			
 
				+	.pro_send_hello	 = ksocknal_send_hello_v1,
			
 
				+	.pro_recv_hello	 = ksocknal_recv_hello_v1,
			
 
				+	.pro_pack	       = ksocknal_pack_msg_v1,
			
 
				+	.pro_unpack	     = ksocknal_unpack_msg_v1,
			
 
				+	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v1,
			
 
				+	.pro_handle_zcreq       = NULL,
			
 
				+	.pro_handle_zcack       = NULL,
			
 
				+	.pro_queue_tx_zcack     = NULL,
			
 
				+	.pro_match_tx	   = ksocknal_match_tx
			
 
				+};
			
 
				+
			
 
				+ksock_proto_t  ksocknal_protocol_v2x =
			
 
				+{
			
 
				+	.pro_version	    = KSOCK_PROTO_V2,
			
 
				+	.pro_send_hello	 = ksocknal_send_hello_v2,
			
 
				+	.pro_recv_hello	 = ksocknal_recv_hello_v2,
			
 
				+	.pro_pack	       = ksocknal_pack_msg_v2,
			
 
				+	.pro_unpack	     = ksocknal_unpack_msg_v2,
			
 
				+	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
			
 
				+	.pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v2,
			
 
				+	.pro_handle_zcreq       = ksocknal_handle_zcreq,
			
 
				+	.pro_handle_zcack       = ksocknal_handle_zcack,
			
 
				+	.pro_match_tx	   = ksocknal_match_tx
			
 
				+};
			
 
				+
			
 
				+ksock_proto_t  ksocknal_protocol_v3x =
			
 
				+{
			
 
				+	.pro_version	    = KSOCK_PROTO_V3,
			
 
				+	.pro_send_hello	 = ksocknal_send_hello_v2,
			
 
				+	.pro_recv_hello	 = ksocknal_recv_hello_v2,
			
 
				+	.pro_pack	       = ksocknal_pack_msg_v2,
			
 
				+	.pro_unpack	     = ksocknal_unpack_msg_v2,
			
 
				+	.pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
			
 
				+	.pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v3,
			
 
				+	.pro_handle_zcreq       = ksocknal_handle_zcreq,
			
 
				+	.pro_handle_zcack       = ksocknal_handle_zcack,
			
 
				+	.pro_match_tx	   = ksocknal_match_tx_v3
			
 
				+};
			
--- a/drivers/staging/lustre/lnet/lnet/Makefile
+++ b/drivers/staging/lustre/lnet/lnet/Makefile
@@ -0,0 +1,8 @@
 
				+obj-$(CONFIG_LNET) += lnet.o
			
 
				+
			
 
				+lnet-y := api-errno.o api-ni.o config.o lib-me.o lib-msg.o lib-eq.o	\
			
 
				+	  lib-md.o lib-ptl.o lib-move.o module.o lo.o router.o		\
			
 
				+	  router_proc.o acceptor.o peer.o
			
 
				+
			
 
				+
			
 
				+ccflags-y := -I$(src)/../include
			
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ b/drivers/staging/lustre/lnet/lnet/acceptor.c
@@ -0,0 +1,527 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+
			
 
				+static int   accept_port    = 988;
			
 
				+static int   accept_backlog = 127;
			
 
				+static int   accept_timeout = 5;
			
 
				+
			
 
				+struct {
			
 
				+	int			pta_shutdown;
			
 
				+	socket_t		*pta_sock;
			
 
				+	struct completion	pta_signal;
			
 
				+} lnet_acceptor_state;
			
 
				+
			
 
				+int
			
 
				+lnet_acceptor_port(void)
			
 
				+{
			
 
				+	return accept_port;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+lnet_accept_magic(__u32 magic, __u32 constant)
			
 
				+{
			
 
				+	return (magic == constant ||
			
 
				+		magic == __swab32(constant));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+EXPORT_SYMBOL(lnet_acceptor_port);
			
 
				+
			
 
				+static char *accept = "secure";
			
 
				+
			
 
				+CFS_MODULE_PARM(accept, "s", charp, 0444,
			
 
				+		"Accept connections (secure|all|none)");
			
 
				+CFS_MODULE_PARM(accept_port, "i", int, 0444,
			
 
				+		"Acceptor's port (same on all nodes)");
			
 
				+CFS_MODULE_PARM(accept_backlog, "i", int, 0444,
			
 
				+		"Acceptor's listen backlog");
			
 
				+CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
			
 
				+		"Acceptor's timeout (seconds)");
			
 
				+
			
 
				+static char *accept_type = NULL;
			
 
				+
			
 
				+int
			
 
				+lnet_acceptor_get_tunables(void)
			
 
				+{
			
 
				+	/* Userland acceptor uses 'accept_type' instead of 'accept', due to
			
 
				+	 * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
			
 
				+	 * for compatibility. Hence the trick. */
			
 
				+	accept_type = accept;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_acceptor_timeout(void)
			
 
				+{
			
 
				+	return accept_timeout;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_acceptor_timeout);
			
 
				+
			
 
				+void
			
 
				+lnet_connect_console_error (int rc, lnet_nid_t peer_nid,
			
 
				+			   __u32 peer_ip, int peer_port)
			
 
				+{
			
 
				+	switch (rc) {
			
 
				+	/* "normal" errors */
			
 
				+	case -ECONNREFUSED:
			
 
				+		CNETERR("Connection to %s at host %u.%u.%u.%u on port %d was "
			
 
				+			"refused: check that Lustre is running on that node.\n",
			
 
				+			libcfs_nid2str(peer_nid),
			
 
				+			HIPQUAD(peer_ip), peer_port);
			
 
				+		break;
			
 
				+	case -EHOSTUNREACH:
			
 
				+	case -ENETUNREACH:
			
 
				+		CNETERR("Connection to %s at host %u.%u.%u.%u "
			
 
				+			"was unreachable: the network or that node may "
			
 
				+			"be down, or Lustre may be misconfigured.\n",
			
 
				+			libcfs_nid2str(peer_nid), HIPQUAD(peer_ip));
			
 
				+		break;
			
 
				+	case -ETIMEDOUT:
			
 
				+		CNETERR("Connection to %s at host %u.%u.%u.%u on "
			
 
				+			"port %d took too long: that node may be hung "
			
 
				+			"or experiencing high load.\n",
			
 
				+			libcfs_nid2str(peer_nid),
			
 
				+			HIPQUAD(peer_ip), peer_port);
			
 
				+		break;
			
 
				+	case -ECONNRESET:
			
 
				+		LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %u.%u.%u.%u"
			
 
				+				   " on port %d was reset: "
			
 
				+				   "is it running a compatible version of "
			
 
				+				   "Lustre and is %s one of its NIDs?\n",
			
 
				+				   libcfs_nid2str(peer_nid),
			
 
				+				   HIPQUAD(peer_ip), peer_port,
			
 
				+				   libcfs_nid2str(peer_nid));
			
 
				+		break;
			
 
				+	case -EPROTO:
			
 
				+		LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at "
			
 
				+				   "host %u.%u.%u.%u on port %d: is it running "
			
 
				+				   "a compatible version of Lustre?\n",
			
 
				+				   libcfs_nid2str(peer_nid),
			
 
				+				   HIPQUAD(peer_ip), peer_port);
			
 
				+		break;
			
 
				+	case -EADDRINUSE:
			
 
				+		LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to "
			
 
				+				   "connect to %s at host %u.%u.%u.%u on port "
			
 
				+				   "%d\n", libcfs_nid2str(peer_nid),
			
 
				+				   HIPQUAD(peer_ip), peer_port);
			
 
				+		break;
			
 
				+	default:
			
 
				+		LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s"
			
 
				+				   " at host %u.%u.%u.%u on port %d\n", rc,
			
 
				+				   libcfs_nid2str(peer_nid),
			
 
				+				   HIPQUAD(peer_ip), peer_port);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_connect_console_error);
			
 
				+
			
 
				+int
			
 
				+lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
			
 
				+	    __u32 local_ip, __u32 peer_ip, int peer_port)
			
 
				+{
			
 
				+	lnet_acceptor_connreq_t cr;
			
 
				+	socket_t	   *sock;
			
 
				+	int		     rc;
			
 
				+	int		     port;
			
 
				+	int		     fatal;
			
 
				+
			
 
				+	CLASSERT (sizeof(cr) <= 16);	    /* not too big to be on the stack */
			
 
				+
			
 
				+	for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
			
 
				+	     port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
			
 
				+	     --port) {
			
 
				+		/* Iterate through reserved ports. */
			
 
				+
			
 
				+		rc = libcfs_sock_connect(&sock, &fatal,
			
 
				+					 local_ip, port,
			
 
				+					 peer_ip, peer_port);
			
 
				+		if (rc != 0) {
			
 
				+			if (fatal)
			
 
				+				goto failed;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1);
			
 
				+
			
 
				+		cr.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				+		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				+		cr.acr_nid     = peer_nid;
			
 
				+
			
 
				+		if (the_lnet.ln_testprotocompat != 0) {
			
 
				+			/* single-shot proto check */
			
 
				+			lnet_net_lock(LNET_LOCK_EX);
			
 
				+			if ((the_lnet.ln_testprotocompat & 4) != 0) {
			
 
				+				cr.acr_version++;
			
 
				+				the_lnet.ln_testprotocompat &= ~4;
			
 
				+			}
			
 
				+			if ((the_lnet.ln_testprotocompat & 8) != 0) {
			
 
				+				cr.acr_magic = LNET_PROTO_MAGIC;
			
 
				+				the_lnet.ln_testprotocompat &= ~8;
			
 
				+			}
			
 
				+			lnet_net_unlock(LNET_LOCK_EX);
			
 
				+		}
			
 
				+
			
 
				+		rc = libcfs_sock_write(sock, &cr, sizeof(cr),
			
 
				+				       accept_timeout);
			
 
				+		if (rc != 0)
			
 
				+			goto failed_sock;
			
 
				+
			
 
				+		*sockp = sock;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	rc = -EADDRINUSE;
			
 
				+	goto failed;
			
 
				+
			
 
				+ failed_sock:
			
 
				+	libcfs_sock_release(sock);
			
 
				+ failed:
			
 
				+	lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_connect);
			
 
				+
			
 
				+
			
 
				+/* Below is the code common for both kernel and MT user-space */
			
 
				+
			
 
				+int
			
 
				+lnet_accept(socket_t *sock, __u32 magic)
			
 
				+{
			
 
				+	lnet_acceptor_connreq_t cr;
			
 
				+	__u32		   peer_ip;
			
 
				+	int		     peer_port;
			
 
				+	int		     rc;
			
 
				+	int		     flip;
			
 
				+	lnet_ni_t	      *ni;
			
 
				+	char		   *str;
			
 
				+
			
 
				+	LASSERT (sizeof(cr) <= 16);	     /* not too big for the stack */
			
 
				+
			
 
				+	rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
			
 
				+	LASSERT (rc == 0);		      /* we succeeded before */
			
 
				+
			
 
				+	if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
			
 
				+
			
 
				+		if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
			
 
				+			/* future version compatibility!
			
 
				+			 * When LNET unifies protocols over all LNDs, the first
			
 
				+			 * thing sent will be a version query.  I send back
			
 
				+			 * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */
			
 
				+
			
 
				+			memset (&cr, 0, sizeof(cr));
			
 
				+			cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				+			cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				+			rc = libcfs_sock_write(sock, &cr, sizeof(cr),
			
 
				+					       accept_timeout);
			
 
				+
			
 
				+			if (rc != 0)
			
 
				+				CERROR("Error sending magic+version in response"
			
 
				+				       "to LNET magic from %u.%u.%u.%u: %d\n",
			
 
				+				       HIPQUAD(peer_ip), rc);
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+
			
 
				+		if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC))
			
 
				+			str = "'old' socknal/tcpnal";
			
 
				+		else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC))
			
 
				+			str = "'old' ranal";
			
 
				+		else
			
 
				+			str = "unrecognised";
			
 
				+
			
 
				+		LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %u.%u.%u.%u"
			
 
				+				   " magic %08x: %s acceptor protocol\n",
			
 
				+				   HIPQUAD(peer_ip), magic, str);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &cr.acr_version,
			
 
				+			      sizeof(cr.acr_version),
			
 
				+			      accept_timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Error %d reading connection request version from "
			
 
				+		       "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (flip)
			
 
				+		__swab32s(&cr.acr_version);
			
 
				+
			
 
				+	if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
			
 
				+		/* future version compatibility!
			
 
				+		 * An acceptor-specific protocol rev will first send a version
			
 
				+		 * query.  I send back my current version to tell her I'm
			
 
				+		 * "old". */
			
 
				+		int peer_version = cr.acr_version;
			
 
				+
			
 
				+		memset (&cr, 0, sizeof(cr));
			
 
				+		cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
			
 
				+		cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
			
 
				+
			
 
				+		rc = libcfs_sock_write(sock, &cr, sizeof(cr),
			
 
				+				       accept_timeout);
			
 
				+
			
 
				+		if (rc != 0)
			
 
				+			CERROR("Error sending magic+version in response"
			
 
				+			       "to version %d from %u.%u.%u.%u: %d\n",
			
 
				+			       peer_version, HIPQUAD(peer_ip), rc);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_sock_read(sock, &cr.acr_nid,
			
 
				+			      sizeof(cr) -
			
 
				+			      offsetof(lnet_acceptor_connreq_t, acr_nid),
			
 
				+			      accept_timeout);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Error %d reading connection request from "
			
 
				+		       "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (flip)
			
 
				+		__swab64s(&cr.acr_nid);
			
 
				+
			
 
				+	ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
			
 
				+	if (ni == NULL ||	       /* no matching net */
			
 
				+	    ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
			
 
				+		if (ni != NULL)
			
 
				+			lnet_ni_decref(ni);
			
 
				+		LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %u.%u.%u.%u"
			
 
				+				   " for %s: No matching NI\n",
			
 
				+				   HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
			
 
				+		return -EPERM;
			
 
				+	}
			
 
				+
			
 
				+	if (ni->ni_lnd->lnd_accept == NULL) {
			
 
				+		/* This catches a request for the loopback LND */
			
 
				+		lnet_ni_decref(ni);
			
 
				+		LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %u.%u.%u.%u"
			
 
				+				  " for %s: NI doesn not accept IP connections\n",
			
 
				+				  HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
			
 
				+		return -EPERM;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u\n",
			
 
				+	       libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip));
			
 
				+
			
 
				+	rc = ni->ni_lnd->lnd_accept(ni, sock);
			
 
				+
			
 
				+	lnet_ni_decref(ni);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_acceptor(void *arg)
			
 
				+{
			
 
				+	socket_t  *newsock;
			
 
				+	int	    rc;
			
 
				+	__u32	  magic;
			
 
				+	__u32	  peer_ip;
			
 
				+	int	    peer_port;
			
 
				+	int	    secure = (int)((long_ptr_t)arg);
			
 
				+
			
 
				+	LASSERT (lnet_acceptor_state.pta_sock == NULL);
			
 
				+
			
 
				+	cfs_block_allsigs();
			
 
				+
			
 
				+	rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
			
 
				+				0, accept_port, accept_backlog);
			
 
				+	if (rc != 0) {
			
 
				+		if (rc == -EADDRINUSE)
			
 
				+			LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
			
 
				+					   " %d: port already in use\n",
			
 
				+					   accept_port);
			
 
				+		else
			
 
				+			LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port "
			
 
				+					   "%d: unexpected error %d\n",
			
 
				+					   accept_port, rc);
			
 
				+
			
 
				+		lnet_acceptor_state.pta_sock = NULL;
			
 
				+	} else {
			
 
				+		LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
			
 
				+	}
			
 
				+
			
 
				+	/* set init status and unblock parent */
			
 
				+	lnet_acceptor_state.pta_shutdown = rc;
			
 
				+	complete(&lnet_acceptor_state.pta_signal);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	while (!lnet_acceptor_state.pta_shutdown) {
			
 
				+
			
 
				+		rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
			
 
				+		if (rc != 0) {
			
 
				+			if (rc != -EAGAIN) {
			
 
				+				CWARN("Accept error %d: pausing...\n", rc);
			
 
				+				cfs_pause(cfs_time_seconds(1));
			
 
				+			}
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* maybe we're waken up with libcfs_sock_abort_accept() */
			
 
				+		if (lnet_acceptor_state.pta_shutdown) {
			
 
				+			libcfs_sock_release(newsock);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't determine new connection's address\n");
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
			
 
				+			CERROR("Refusing connection from %u.%u.%u.%u: "
			
 
				+			       "insecure port %d\n",
			
 
				+			       HIPQUAD(peer_ip), peer_port);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		rc = libcfs_sock_read(newsock, &magic, sizeof(magic),
			
 
				+				      accept_timeout);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Error %d reading connection request from "
			
 
				+			       "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		rc = lnet_accept(newsock, magic);
			
 
				+		if (rc != 0)
			
 
				+			goto failed;
			
 
				+
			
 
				+		continue;
			
 
				+
			
 
				+	failed:
			
 
				+		libcfs_sock_release(newsock);
			
 
				+	}
			
 
				+
			
 
				+	libcfs_sock_release(lnet_acceptor_state.pta_sock);
			
 
				+	lnet_acceptor_state.pta_sock = NULL;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Acceptor stopping\n");
			
 
				+
			
 
				+	/* unblock lnet_acceptor_stop() */
			
 
				+	complete(&lnet_acceptor_state.pta_signal);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+accept2secure(const char *acc, long *sec)
			
 
				+{
			
 
				+	if (!strcmp(acc, "secure")) {
			
 
				+		*sec = 1;
			
 
				+		return 1;
			
 
				+	} else if (!strcmp(acc, "all")) {
			
 
				+		*sec = 0;
			
 
				+		return 1;
			
 
				+	} else if (!strcmp(acc, "none")) {
			
 
				+		return 0;
			
 
				+	} else {
			
 
				+		LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
			
 
				+				   acc);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_acceptor_start(void)
			
 
				+{
			
 
				+	int  rc;
			
 
				+	long rc2;
			
 
				+	long secure;
			
 
				+
			
 
				+	LASSERT (lnet_acceptor_state.pta_sock == NULL);
			
 
				+
			
 
				+	rc = lnet_acceptor_get_tunables();
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+
			
 
				+	init_completion(&lnet_acceptor_state.pta_signal);
			
 
				+	rc = accept2secure(accept_type, &secure);
			
 
				+	if (rc <= 0) {
			
 
				+		fini_completion(&lnet_acceptor_state.pta_signal);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (lnet_count_acceptor_nis() == 0)  /* not required */
			
 
				+		return 0;
			
 
				+
			
 
				+	rc2 = PTR_ERR(kthread_run(lnet_acceptor,
			
 
				+				  (void *)(ulong_ptr_t)secure,
			
 
				+				  "acceptor_%03ld", secure));
			
 
				+	if (IS_ERR_VALUE(rc2)) {
			
 
				+		CERROR("Can't start acceptor thread: %ld\n", rc2);
			
 
				+		fini_completion(&lnet_acceptor_state.pta_signal);
			
 
				+
			
 
				+		return -ESRCH;
			
 
				+	}
			
 
				+
			
 
				+	/* wait for acceptor to startup */
			
 
				+	wait_for_completion(&lnet_acceptor_state.pta_signal);
			
 
				+
			
 
				+	if (!lnet_acceptor_state.pta_shutdown) {
			
 
				+		/* started OK */
			
 
				+		LASSERT(lnet_acceptor_state.pta_sock != NULL);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(lnet_acceptor_state.pta_sock == NULL);
			
 
				+	fini_completion(&lnet_acceptor_state.pta_signal);
			
 
				+
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_acceptor_stop(void)
			
 
				+{
			
 
				+	if (lnet_acceptor_state.pta_sock == NULL) /* not running */
			
 
				+		return;
			
 
				+
			
 
				+	lnet_acceptor_state.pta_shutdown = 1;
			
 
				+	libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock);
			
 
				+
			
 
				+	/* block until acceptor signals exit */
			
 
				+	wait_for_completion(&lnet_acceptor_state.pta_signal);
			
 
				+
			
 
				+	fini_completion(&lnet_acceptor_state.pta_signal);
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/lnet/api-errno.c
+++ b/drivers/staging/lustre/lnet/lnet/api-errno.c
@@ -0,0 +1,39 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/api-errno.c
			
 
				+ *
			
 
				+ * Instantiate the string table of errors
			
 
				+ */
			
 
				+
			
 
				+/* If you change these, you must update the number table in portals/errno.h */
			
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -0,0 +1,1941 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include <linux/log2.h>
			
 
				+
			
 
				+#define D_LNI D_CONSOLE
			
 
				+
			
 
				+lnet_t      the_lnet;			   /* THE state of the network */
			
 
				+EXPORT_SYMBOL(the_lnet);
			
 
				+
			
 
				+
			
 
				+static char *ip2nets = "";
			
 
				+CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
			
 
				+		"LNET network <- IP table");
			
 
				+
			
 
				+static char *networks = "";
			
 
				+CFS_MODULE_PARM(networks, "s", charp, 0444,
			
 
				+		"local networks");
			
 
				+
			
 
				+static char *routes = "";
			
 
				+CFS_MODULE_PARM(routes, "s", charp, 0444,
			
 
				+		"routes to non-local networks");
			
 
				+
			
 
				+static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
			
 
				+CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
			
 
				+		"size of remote network hash table");
			
 
				+
			
 
				+char *
			
 
				+lnet_get_routes(void)
			
 
				+{
			
 
				+	return routes;
			
 
				+}
			
 
				+
			
 
				+char *
			
 
				+lnet_get_networks(void)
			
 
				+{
			
 
				+	char   *nets;
			
 
				+	int     rc;
			
 
				+
			
 
				+	if (*networks != 0 && *ip2nets != 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
			
 
				+				   "'ip2nets' but not both at once\n");
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (*ip2nets != 0) {
			
 
				+		rc = lnet_parse_ip2nets(&nets, ip2nets);
			
 
				+		return (rc == 0) ? nets : NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (*networks != 0)
			
 
				+		return networks;
			
 
				+
			
 
				+	return "tcp";
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_init_locks(void)
			
 
				+{
			
 
				+	spin_lock_init(&the_lnet.ln_eq_wait_lock);
			
 
				+	init_waitqueue_head(&the_lnet.ln_eq_waitq);
			
 
				+	mutex_init(&the_lnet.ln_lnd_mutex);
			
 
				+	mutex_init(&the_lnet.ln_api_mutex);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_fini_locks(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int
			
 
				+lnet_create_remote_nets_table(void)
			
 
				+{
			
 
				+	int		i;
			
 
				+	struct list_head	*hash;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_remote_nets_hash == NULL);
			
 
				+	LASSERT(the_lnet.ln_remote_nets_hbits > 0);
			
 
				+	LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
			
 
				+	if (hash == NULL) {
			
 
				+		CERROR("Failed to create remote nets hash table\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
			
 
				+		INIT_LIST_HEAD(&hash[i]);
			
 
				+	the_lnet.ln_remote_nets_hash = hash;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_destroy_remote_nets_table(void)
			
 
				+{
			
 
				+	int		i;
			
 
				+	struct list_head	*hash;
			
 
				+
			
 
				+	if (the_lnet.ln_remote_nets_hash == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
			
 
				+		LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
			
 
				+
			
 
				+	LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
			
 
				+		    LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
			
 
				+	the_lnet.ln_remote_nets_hash = NULL;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_destroy_locks(void)
			
 
				+{
			
 
				+	if (the_lnet.ln_res_lock != NULL) {
			
 
				+		cfs_percpt_lock_free(the_lnet.ln_res_lock);
			
 
				+		the_lnet.ln_res_lock = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_net_lock != NULL) {
			
 
				+		cfs_percpt_lock_free(the_lnet.ln_net_lock);
			
 
				+		the_lnet.ln_net_lock = NULL;
			
 
				+	}
			
 
				+
			
 
				+	lnet_fini_locks();
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_create_locks(void)
			
 
				+{
			
 
				+	lnet_init_locks();
			
 
				+
			
 
				+	the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
			
 
				+	if (the_lnet.ln_res_lock == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
			
 
				+	if (the_lnet.ln_net_lock == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_destroy_locks();
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+void lnet_assert_wire_constants (void)
			
 
				+{
			
 
				+	/* Wire protocol assertions generated by 'wirecheck'
			
 
				+	 * running on Linux robert.bartonsoftware.com 2.6.8-1.521
			
 
				+	 * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
			
 
				+	 * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
			
 
				+
			
 
				+	/* Constants... */
			
 
				+	CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
			
 
				+	CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
			
 
				+	CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
			
 
				+	CLASSERT (LNET_MSG_ACK == 0);
			
 
				+	CLASSERT (LNET_MSG_PUT == 1);
			
 
				+	CLASSERT (LNET_MSG_GET == 2);
			
 
				+	CLASSERT (LNET_MSG_REPLY == 3);
			
 
				+	CLASSERT (LNET_MSG_HELLO == 4);
			
 
				+
			
 
				+	/* Checks for struct ptl_handle_wire_t */
			
 
				+	CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
			
 
				+	CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
			
 
				+	CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
			
 
				+	CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
			
 
				+
			
 
				+	/* Checks for struct lnet_magicversion_t */
			
 
				+	CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
			
 
				+	CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
			
 
				+	CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
			
 
				+	CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
			
 
				+	CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
			
 
				+
			
 
				+	/* Checks for struct lnet_hdr_t */
			
 
				+	CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
			
 
				+
			
 
				+	/* Ack */
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
			
 
				+
			
 
				+	/* Put */
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
			
 
				+
			
 
				+	/* Get */
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
			
 
				+
			
 
				+	/* Reply */
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
			
 
				+
			
 
				+	/* Hello */
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
			
 
				+	CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
			
 
				+	CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
			
 
				+}
			
 
				+
			
 
				+lnd_t *
			
 
				+lnet_find_lnd_by_type (int type)
			
 
				+{
			
 
				+	lnd_t	      *lnd;
			
 
				+	struct list_head	 *tmp;
			
 
				+
			
 
				+	/* holding lnd mutex */
			
 
				+	list_for_each (tmp, &the_lnet.ln_lnds) {
			
 
				+		lnd = list_entry(tmp, lnd_t, lnd_list);
			
 
				+
			
 
				+		if ((int)lnd->lnd_type == type)
			
 
				+			return lnd;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_register_lnd (lnd_t *lnd)
			
 
				+{
			
 
				+	LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
			
 
				+	LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
			
 
				+
			
 
				+	list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
			
 
				+	lnd->lnd_refcount = 0;
			
 
				+
			
 
				+	CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
			
 
				+
			
 
				+	LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_register_lnd);
			
 
				+
			
 
				+void
			
 
				+lnet_unregister_lnd (lnd_t *lnd)
			
 
				+{
			
 
				+	LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
			
 
				+	LASSERT (lnd->lnd_refcount == 0);
			
 
				+
			
 
				+	list_del (&lnd->lnd_list);
			
 
				+	CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
			
 
				+
			
 
				+	LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_unregister_lnd);
			
 
				+
			
 
				+void
			
 
				+lnet_counters_get(lnet_counters_t *counters)
			
 
				+{
			
 
				+	lnet_counters_t *ctr;
			
 
				+	int		i;
			
 
				+
			
 
				+	memset(counters, 0, sizeof(*counters));
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+
			
 
				+	cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
			
 
				+		counters->msgs_max     += ctr->msgs_max;
			
 
				+		counters->msgs_alloc   += ctr->msgs_alloc;
			
 
				+		counters->errors       += ctr->errors;
			
 
				+		counters->send_count   += ctr->send_count;
			
 
				+		counters->recv_count   += ctr->recv_count;
			
 
				+		counters->route_count  += ctr->route_count;
			
 
				+		counters->drop_length  += ctr->drop_length;
			
 
				+		counters->send_length  += ctr->send_length;
			
 
				+		counters->recv_length  += ctr->recv_length;
			
 
				+		counters->route_length += ctr->route_length;
			
 
				+		counters->drop_length  += ctr->drop_length;
			
 
				+
			
 
				+	}
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_counters_get);
			
 
				+
			
 
				+void
			
 
				+lnet_counters_reset(void)
			
 
				+{
			
 
				+	lnet_counters_t *counters;
			
 
				+	int		i;
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+
			
 
				+	cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
			
 
				+		memset(counters, 0, sizeof(lnet_counters_t));
			
 
				+
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_counters_reset);
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+
			
 
				+int
			
 
				+lnet_freelist_init (lnet_freelist_t *fl, int n, int size)
			
 
				+{
			
 
				+	char *space;
			
 
				+
			
 
				+	LASSERT (n > 0);
			
 
				+
			
 
				+	size += offsetof (lnet_freeobj_t, fo_contents);
			
 
				+
			
 
				+	LIBCFS_ALLOC(space, n * size);
			
 
				+	if (space == NULL)
			
 
				+		return (-ENOMEM);
			
 
				+
			
 
				+	INIT_LIST_HEAD (&fl->fl_list);
			
 
				+	fl->fl_objs = space;
			
 
				+	fl->fl_nobjs = n;
			
 
				+	fl->fl_objsize = size;
			
 
				+
			
 
				+	do
			
 
				+	{
			
 
				+		memset (space, 0, size);
			
 
				+		list_add ((struct list_head *)space, &fl->fl_list);
			
 
				+		space += size;
			
 
				+	} while (--n != 0);
			
 
				+
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_freelist_fini (lnet_freelist_t *fl)
			
 
				+{
			
 
				+	struct list_head       *el;
			
 
				+	int	       count;
			
 
				+
			
 
				+	if (fl->fl_nobjs == 0)
			
 
				+		return;
			
 
				+
			
 
				+	count = 0;
			
 
				+	for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
			
 
				+		count++;
			
 
				+
			
 
				+	LASSERT (count == fl->fl_nobjs);
			
 
				+
			
 
				+	LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
			
 
				+	memset (fl, 0, sizeof (*fl));
			
 
				+}
			
 
				+
			
 
				+#endif /* LNET_USE_LIB_FREELIST */
			
 
				+
			
 
				+__u64
			
 
				+lnet_create_interface_cookie (void)
			
 
				+{
			
 
				+	/* NB the interface cookie in wire handles guards against delayed
			
 
				+	 * replies and ACKs appearing valid after reboot. Initialisation time,
			
 
				+	 * even if it's only implemented to millisecond resolution is probably
			
 
				+	 * easily good enough. */
			
 
				+	struct timeval tv;
			
 
				+	__u64	  cookie;
			
 
				+	do_gettimeofday(&tv);
			
 
				+	cookie = tv.tv_sec;
			
 
				+	cookie *= 1000000;
			
 
				+	cookie += tv.tv_usec;
			
 
				+	return cookie;
			
 
				+}
			
 
				+
			
 
				+static char *
			
 
				+lnet_res_type2str(int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	case LNET_COOKIE_TYPE_MD:
			
 
				+		return "MD";
			
 
				+	case LNET_COOKIE_TYPE_ME:
			
 
				+		return "ME";
			
 
				+	case LNET_COOKIE_TYPE_EQ:
			
 
				+		return "EQ";
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_res_container_cleanup(struct lnet_res_container *rec)
			
 
				+{
			
 
				+	int	count = 0;
			
 
				+
			
 
				+	if (rec->rec_type == 0) /* not set yet, it's uninitialized */
			
 
				+		return;
			
 
				+
			
 
				+	while (!list_empty(&rec->rec_active)) {
			
 
				+		struct list_head *e = rec->rec_active.next;
			
 
				+
			
 
				+		list_del_init(e);
			
 
				+		if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
			
 
				+			lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
			
 
				+
			
 
				+		} else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
			
 
				+			lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
			
 
				+
			
 
				+		} else { /* NB: Active MEs should be attached on portals */
			
 
				+			LBUG();
			
 
				+		}
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	if (count > 0) {
			
 
				+		/* Found alive MD/ME/EQ, user really should unlink/free
			
 
				+		 * all of them before finalize LNet, but if someone didn't,
			
 
				+		 * we have to recycle garbage for him */
			
 
				+		CERROR("%d active elements on exit of %s container\n",
			
 
				+		       count, lnet_res_type2str(rec->rec_type));
			
 
				+	}
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	lnet_freelist_fini(&rec->rec_freelist);
			
 
				+#endif
			
 
				+	if (rec->rec_lh_hash != NULL) {
			
 
				+		LIBCFS_FREE(rec->rec_lh_hash,
			
 
				+			    LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
			
 
				+		rec->rec_lh_hash = NULL;
			
 
				+	}
			
 
				+
			
 
				+	rec->rec_type = 0; /* mark it as finalized */
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_res_container_setup(struct lnet_res_container *rec,
			
 
				+			 int cpt, int type, int objnum, int objsz)
			
 
				+{
			
 
				+	int	rc = 0;
			
 
				+	int	i;
			
 
				+
			
 
				+	LASSERT(rec->rec_type == 0);
			
 
				+
			
 
				+	rec->rec_type = type;
			
 
				+	INIT_LIST_HEAD(&rec->rec_active);
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
			
 
				+	rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
			
 
				+	if (rc != 0)
			
 
				+		goto out;
			
 
				+#endif
			
 
				+	rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
			
 
				+
			
 
				+	/* Arbitrary choice of hash table size */
			
 
				+	LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
			
 
				+			 LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
			
 
				+	if (rec->rec_lh_hash == NULL) {
			
 
				+		rc = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < LNET_LH_HASH_SIZE; i++)
			
 
				+		INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out:
			
 
				+	CERROR("Failed to setup %s resource container\n",
			
 
				+	       lnet_res_type2str(type));
			
 
				+	lnet_res_container_cleanup(rec);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_res_containers_destroy(struct lnet_res_container **recs)
			
 
				+{
			
 
				+	struct lnet_res_container	*rec;
			
 
				+	int				i;
			
 
				+
			
 
				+	cfs_percpt_for_each(rec, i, recs)
			
 
				+		lnet_res_container_cleanup(rec);
			
 
				+
			
 
				+	cfs_percpt_free(recs);
			
 
				+}
			
 
				+
			
 
				+static struct lnet_res_container **
			
 
				+lnet_res_containers_create(int type, int objnum, int objsz)
			
 
				+{
			
 
				+	struct lnet_res_container	**recs;
			
 
				+	struct lnet_res_container	*rec;
			
 
				+	int				rc;
			
 
				+	int				i;
			
 
				+
			
 
				+	recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
			
 
				+	if (recs == NULL) {
			
 
				+		CERROR("Failed to allocate %s resource containers\n",
			
 
				+		       lnet_res_type2str(type));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(rec, i, recs) {
			
 
				+		rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
			
 
				+		if (rc != 0) {
			
 
				+			lnet_res_containers_destroy(recs);
			
 
				+			return NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return recs;
			
 
				+}
			
 
				+
			
 
				+lnet_libhandle_t *
			
 
				+lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
			
 
				+{
			
 
				+	/* ALWAYS called with lnet_res_lock held */
			
 
				+	struct list_head		*head;
			
 
				+	lnet_libhandle_t	*lh;
			
 
				+	unsigned int		hash;
			
 
				+
			
 
				+	if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
			
 
				+		return NULL;
			
 
				+
			
 
				+	hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
			
 
				+	head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
			
 
				+
			
 
				+	list_for_each_entry(lh, head, lh_hash_chain) {
			
 
				+		if (lh->lh_cookie == cookie)
			
 
				+			return lh;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
			
 
				+{
			
 
				+	/* ALWAYS called with lnet_res_lock held */
			
 
				+	unsigned int	ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
			
 
				+	unsigned int	hash;
			
 
				+
			
 
				+	lh->lh_cookie = rec->rec_lh_cookie;
			
 
				+	rec->rec_lh_cookie += 1 << ibits;
			
 
				+
			
 
				+	hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
			
 
				+
			
 
				+	list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int lnet_unprepare(void);
			
 
				+
			
 
				+int
			
 
				+lnet_prepare(lnet_pid_t requested_pid)
			
 
				+{
			
 
				+	/* Prepare to bring up the network */
			
 
				+	struct lnet_res_container **recs;
			
 
				+	int			  rc = 0;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_refcount == 0);
			
 
				+
			
 
				+	the_lnet.ln_routing = 0;
			
 
				+
			
 
				+	LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
			
 
				+	the_lnet.ln_pid = requested_pid;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_test_peers);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_nis);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_routers);
			
 
				+
			
 
				+	rc = lnet_create_remote_nets_table();
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
			
 
				+
			
 
				+	the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+						sizeof(lnet_counters_t));
			
 
				+	if (the_lnet.ln_counters == NULL) {
			
 
				+		CERROR("Failed to allocate counters for LNet\n");
			
 
				+		rc = -ENOMEM;
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	rc = lnet_peer_tables_create();
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = lnet_msg_containers_create();
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
			
 
				+				      LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
			
 
				+				      sizeof(lnet_eq_t));
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
			
 
				+					  sizeof(lnet_me_t));
			
 
				+	if (recs == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	the_lnet.ln_me_containers = recs;
			
 
				+
			
 
				+	recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
			
 
				+					  sizeof(lnet_libmd_t));
			
 
				+	if (recs == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	the_lnet.ln_md_containers = recs;
			
 
				+
			
 
				+	rc = lnet_portals_create();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to create portals for LNet: %d\n", rc);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_unprepare();
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_unprepare (void)
			
 
				+{
			
 
				+	/* NB no LNET_LOCK since this is the last reference.  All LND instances
			
 
				+	 * have shut down already, so it is safe to unlink and free all
			
 
				+	 * descriptors, even those that appear committed to a network op (eg MD
			
 
				+	 * with non-zero pending count) */
			
 
				+
			
 
				+	lnet_fail_nid(LNET_NID_ANY, 0);
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_refcount == 0);
			
 
				+	LASSERT(list_empty(&the_lnet.ln_test_peers));
			
 
				+	LASSERT(list_empty(&the_lnet.ln_nis));
			
 
				+	LASSERT(list_empty(&the_lnet.ln_nis_cpt));
			
 
				+	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
			
 
				+
			
 
				+	lnet_portals_destroy();
			
 
				+
			
 
				+	if (the_lnet.ln_md_containers != NULL) {
			
 
				+		lnet_res_containers_destroy(the_lnet.ln_md_containers);
			
 
				+		the_lnet.ln_md_containers = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_me_containers != NULL) {
			
 
				+		lnet_res_containers_destroy(the_lnet.ln_me_containers);
			
 
				+		the_lnet.ln_me_containers = NULL;
			
 
				+	}
			
 
				+
			
 
				+	lnet_res_container_cleanup(&the_lnet.ln_eq_container);
			
 
				+
			
 
				+	lnet_msg_containers_destroy();
			
 
				+	lnet_peer_tables_destroy();
			
 
				+	lnet_rtrpools_free();
			
 
				+
			
 
				+	if (the_lnet.ln_counters != NULL) {
			
 
				+		cfs_percpt_free(the_lnet.ln_counters);
			
 
				+		the_lnet.ln_counters = NULL;
			
 
				+	}
			
 
				+	lnet_destroy_remote_nets_table();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+lnet_ni_t  *
			
 
				+lnet_net2ni_locked(__u32 net, int cpt)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	lnet_ni_t	*ni;
			
 
				+
			
 
				+	LASSERT(cpt != LNET_LOCK_EX);
			
 
				+
			
 
				+	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		if (LNET_NIDNET(ni->ni_nid) == net) {
			
 
				+			lnet_ni_addref_locked(ni, cpt);
			
 
				+			return ni;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+lnet_ni_t *
			
 
				+lnet_net2ni(__u32 net)
			
 
				+{
			
 
				+	lnet_ni_t *ni;
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+	ni = lnet_net2ni_locked(net, 0);
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	return ni;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_net2ni);
			
 
				+
			
 
				+static unsigned int
			
 
				+lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
			
 
				+{
			
 
				+	__u64		key = nid;
			
 
				+	unsigned int	val;
			
 
				+
			
 
				+	LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
			
 
				+
			
 
				+	if (number == 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	val = cfs_hash_long(key, LNET_CPT_BITS);
			
 
				+	/* NB: LNET_CP_NUMBER doesn't have to be PO2 */
			
 
				+	if (val < number)
			
 
				+		return val;
			
 
				+
			
 
				+	return (unsigned int)(key + val + (val >> 1)) % number;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_cpt_of_nid_locked(lnet_nid_t nid)
			
 
				+{
			
 
				+	struct lnet_ni *ni;
			
 
				+
			
 
				+	/* must called with hold of lnet_net_lock */
			
 
				+	if (LNET_CPT_NUMBER == 1)
			
 
				+		return 0; /* the only one */
			
 
				+
			
 
				+	/* take lnet_net_lock(any) would be OK */
			
 
				+	if (!list_empty(&the_lnet.ln_nis_cpt)) {
			
 
				+		list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
			
 
				+			if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
			
 
				+				continue;
			
 
				+
			
 
				+			LASSERT(ni->ni_cpts != NULL);
			
 
				+			return ni->ni_cpts[lnet_nid_cpt_hash
			
 
				+					   (nid, ni->ni_ncpts)];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_cpt_of_nid(lnet_nid_t nid)
			
 
				+{
			
 
				+	int	cpt;
			
 
				+	int	cpt2;
			
 
				+
			
 
				+	if (LNET_CPT_NUMBER == 1)
			
 
				+		return 0; /* the only one */
			
 
				+
			
 
				+	if (list_empty(&the_lnet.ln_nis_cpt))
			
 
				+		return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+	cpt2 = lnet_cpt_of_nid_locked(nid);
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	return cpt2;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_cpt_of_nid);
			
 
				+
			
 
				+int
			
 
				+lnet_islocalnet(__u32 net)
			
 
				+{
			
 
				+	struct lnet_ni	*ni;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+	ni = lnet_net2ni_locked(net, cpt);
			
 
				+	if (ni != NULL)
			
 
				+		lnet_ni_decref_locked(ni, cpt);
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	return ni != NULL;
			
 
				+}
			
 
				+
			
 
				+lnet_ni_t  *
			
 
				+lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
			
 
				+{
			
 
				+	struct lnet_ni	*ni;
			
 
				+	struct list_head	*tmp;
			
 
				+
			
 
				+	LASSERT(cpt != LNET_LOCK_EX);
			
 
				+
			
 
				+	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		if (ni->ni_nid == nid) {
			
 
				+			lnet_ni_addref_locked(ni, cpt);
			
 
				+			return ni;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_islocalnid(lnet_nid_t nid)
			
 
				+{
			
 
				+	struct lnet_ni	*ni;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+	ni = lnet_nid2ni_locked(nid, cpt);
			
 
				+	if (ni != NULL)
			
 
				+		lnet_ni_decref_locked(ni, cpt);
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	return ni != NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_count_acceptor_nis (void)
			
 
				+{
			
 
				+	/* Return the # of NIs that need the acceptor. */
			
 
				+	int		count = 0;
			
 
				+	struct list_head	*tmp;
			
 
				+	struct lnet_ni	*ni;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		if (ni->ni_lnd->lnd_accept != NULL)
			
 
				+			count++;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_ni_tq_credits(lnet_ni_t *ni)
			
 
				+{
			
 
				+	int	credits;
			
 
				+
			
 
				+	LASSERT(ni->ni_ncpts >= 1);
			
 
				+
			
 
				+	if (ni->ni_ncpts == 1)
			
 
				+		return ni->ni_maxtxcredits;
			
 
				+
			
 
				+	credits = ni->ni_maxtxcredits / ni->ni_ncpts;
			
 
				+	credits = max(credits, 8 * ni->ni_peertxcredits);
			
 
				+	credits = min(credits, ni->ni_maxtxcredits);
			
 
				+
			
 
				+	return credits;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_shutdown_lndnis (void)
			
 
				+{
			
 
				+	int		i;
			
 
				+	int		islo;
			
 
				+	lnet_ni_t	 *ni;
			
 
				+
			
 
				+	/* NB called holding the global mutex */
			
 
				+
			
 
				+	/* All quiet on the API front */
			
 
				+	LASSERT(!the_lnet.ln_shutdown);
			
 
				+	LASSERT(the_lnet.ln_refcount == 0);
			
 
				+	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+	the_lnet.ln_shutdown = 1;	/* flag shutdown */
			
 
				+
			
 
				+	/* Unlink NIs from the global table */
			
 
				+	while (!list_empty(&the_lnet.ln_nis)) {
			
 
				+		ni = list_entry(the_lnet.ln_nis.next,
			
 
				+				    lnet_ni_t, ni_list);
			
 
				+		/* move it to zombie list and nobody can find it anymore */
			
 
				+		list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
			
 
				+		lnet_ni_decref_locked(ni, 0);	/* drop ln_nis' ref */
			
 
				+
			
 
				+		if (!list_empty(&ni->ni_cptlist)) {
			
 
				+			list_del_init(&ni->ni_cptlist);
			
 
				+			lnet_ni_decref_locked(ni, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Drop the cached eqwait NI. */
			
 
				+	if (the_lnet.ln_eq_waitni != NULL) {
			
 
				+		lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
			
 
				+		the_lnet.ln_eq_waitni = NULL;
			
 
				+	}
			
 
				+
			
 
				+	/* Drop the cached loopback NI. */
			
 
				+	if (the_lnet.ln_loni != NULL) {
			
 
				+		lnet_ni_decref_locked(the_lnet.ln_loni, 0);
			
 
				+		the_lnet.ln_loni = NULL;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	/* Clear lazy portals and drop delayed messages which hold refs
			
 
				+	 * on their lnet_msg_t::msg_rxpeer */
			
 
				+	for (i = 0; i < the_lnet.ln_nportals; i++)
			
 
				+		LNetClearLazyPortal(i);
			
 
				+
			
 
				+	/* Clear the peer table and wait for all peers to go (they hold refs on
			
 
				+	 * their NIs) */
			
 
				+	lnet_peer_tables_cleanup();
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+	/* Now wait for the NI's I just nuked to show up on ln_zombie_nis
			
 
				+	 * and shut them down in guaranteed thread context */
			
 
				+	i = 2;
			
 
				+	while (!list_empty(&the_lnet.ln_nis_zombie)) {
			
 
				+		int	*ref;
			
 
				+		int	j;
			
 
				+
			
 
				+		ni = list_entry(the_lnet.ln_nis_zombie.next,
			
 
				+				    lnet_ni_t, ni_list);
			
 
				+		list_del_init(&ni->ni_list);
			
 
				+		cfs_percpt_for_each(ref, j, ni->ni_refs) {
			
 
				+			if (*ref == 0)
			
 
				+				continue;
			
 
				+			/* still busy, add it back to zombie list */
			
 
				+			list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		while (!list_empty(&ni->ni_list)) {
			
 
				+			lnet_net_unlock(LNET_LOCK_EX);
			
 
				+			++i;
			
 
				+			if ((i & (-i)) == i) {
			
 
				+				CDEBUG(D_WARNING,
			
 
				+				       "Waiting for zombie LNI %s\n",
			
 
				+				       libcfs_nid2str(ni->ni_nid));
			
 
				+			}
			
 
				+			cfs_pause(cfs_time_seconds(1));
			
 
				+			lnet_net_lock(LNET_LOCK_EX);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		ni->ni_lnd->lnd_refcount--;
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		islo = ni->ni_lnd->lnd_type == LOLND;
			
 
				+
			
 
				+		LASSERT (!in_interrupt ());
			
 
				+		(ni->ni_lnd->lnd_shutdown)(ni);
			
 
				+
			
 
				+		/* can't deref lnd anymore now; it might have unregistered
			
 
				+		 * itself...  */
			
 
				+
			
 
				+		if (!islo)
			
 
				+			CDEBUG(D_LNI, "Removed LNI %s\n",
			
 
				+			       libcfs_nid2str(ni->ni_nid));
			
 
				+
			
 
				+		lnet_ni_free(ni);
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+	}
			
 
				+
			
 
				+	the_lnet.ln_shutdown = 0;
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	if (the_lnet.ln_network_tokens != NULL) {
			
 
				+		LIBCFS_FREE(the_lnet.ln_network_tokens,
			
 
				+			    the_lnet.ln_network_tokens_nob);
			
 
				+		the_lnet.ln_network_tokens = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_startup_lndnis (void)
			
 
				+{
			
 
				+	lnd_t			*lnd;
			
 
				+	struct lnet_ni		*ni;
			
 
				+	struct lnet_tx_queue	*tq;
			
 
				+	struct list_head		nilist;
			
 
				+	int			i;
			
 
				+	int		rc = 0;
			
 
				+	int		lnd_type;
			
 
				+	int		nicount = 0;
			
 
				+	char	      *nets = lnet_get_networks();
			
 
				+
			
 
				+	INIT_LIST_HEAD(&nilist);
			
 
				+
			
 
				+	if (nets == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = lnet_parse_networks(&nilist, nets);
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	while (!list_empty(&nilist)) {
			
 
				+		ni = list_entry(nilist.next, lnet_ni_t, ni_list);
			
 
				+		lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
			
 
				+
			
 
				+		LASSERT (libcfs_isknown_lnd(lnd_type));
			
 
				+
			
 
				+		if (lnd_type == CIBLND    ||
			
 
				+		    lnd_type == OPENIBLND ||
			
 
				+		    lnd_type == IIBLND    ||
			
 
				+		    lnd_type == VIBLND) {
			
 
				+			CERROR("LND %s obsoleted\n",
			
 
				+			       libcfs_lnd2str(lnd_type));
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
			
 
				+		lnd = lnet_find_lnd_by_type(lnd_type);
			
 
				+
			
 
				+		if (lnd == NULL) {
			
 
				+			LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
			
 
				+			rc = request_module("%s",
			
 
				+						libcfs_lnd2modname(lnd_type));
			
 
				+			LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
			
 
				+
			
 
				+			lnd = lnet_find_lnd_by_type(lnd_type);
			
 
				+			if (lnd == NULL) {
			
 
				+				LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
			
 
				+				CERROR("Can't load LND %s, module %s, rc=%d\n",
			
 
				+				       libcfs_lnd2str(lnd_type),
			
 
				+				       libcfs_lnd2modname(lnd_type), rc);
			
 
				+				goto failed;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+		lnd->lnd_refcount++;
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		ni->ni_lnd = lnd;
			
 
				+
			
 
				+		rc = (lnd->lnd_startup)(ni);
			
 
				+
			
 
				+		LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
			
 
				+
			
 
				+		if (rc != 0) {
			
 
				+			LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
			
 
				+					   "\n",
			
 
				+					   rc, libcfs_lnd2str(lnd->lnd_type));
			
 
				+			lnet_net_lock(LNET_LOCK_EX);
			
 
				+			lnd->lnd_refcount--;
			
 
				+			lnet_net_unlock(LNET_LOCK_EX);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
			
 
				+
			
 
				+		list_del(&ni->ni_list);
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+		/* refcount for ln_nis */
			
 
				+		lnet_ni_addref_locked(ni, 0);
			
 
				+		list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
			
 
				+		if (ni->ni_cpts != NULL) {
			
 
				+			list_add_tail(&ni->ni_cptlist,
			
 
				+					  &the_lnet.ln_nis_cpt);
			
 
				+			lnet_ni_addref_locked(ni, 0);
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		if (lnd->lnd_type == LOLND) {
			
 
				+			lnet_ni_addref(ni);
			
 
				+			LASSERT (the_lnet.ln_loni == NULL);
			
 
				+			the_lnet.ln_loni = ni;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (ni->ni_peertxcredits == 0 ||
			
 
				+		    ni->ni_maxtxcredits == 0) {
			
 
				+			LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
			
 
				+					   libcfs_lnd2str(lnd->lnd_type),
			
 
				+					   ni->ni_peertxcredits == 0 ?
			
 
				+					   "" : "per-peer ");
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
			
 
				+			tq->tq_credits_min =
			
 
				+			tq->tq_credits_max =
			
 
				+			tq->tq_credits = lnet_ni_tq_credits(ni);
			
 
				+		}
			
 
				+
			
 
				+		CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
			
 
				+		       libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
			
 
				+		       lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
			
 
				+		       ni->ni_peerrtrcredits, ni->ni_peertimeout);
			
 
				+
			
 
				+		nicount++;
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_eq_waitni != NULL && nicount > 1) {
			
 
				+		lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
			
 
				+		LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
			
 
				+				   "\n",
			
 
				+				   libcfs_lnd2str(lnd_type));
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_shutdown_lndnis();
			
 
				+
			
 
				+	while (!list_empty(&nilist)) {
			
 
				+		ni = list_entry(nilist.next, lnet_ni_t, ni_list);
			
 
				+		list_del(&ni->ni_list);
			
 
				+		lnet_ni_free(ni);
			
 
				+	}
			
 
				+
			
 
				+	return -ENETDOWN;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Initialize LNet library.
			
 
				+ *
			
 
				+ * Only userspace program needs to call this function - it's automatically
			
 
				+ * called in the kernel at module loading time. Caller has to call LNetFini()
			
 
				+ * after a call to LNetInit(), if and only if the latter returned 0. It must
			
 
				+ * be called exactly once.
			
 
				+ *
			
 
				+ * \return 0 on success, and -ve on failures.
			
 
				+ */
			
 
				+int
			
 
				+LNetInit(void)
			
 
				+{
			
 
				+	int	rc;
			
 
				+
			
 
				+	lnet_assert_wire_constants();
			
 
				+	LASSERT(!the_lnet.ln_init);
			
 
				+
			
 
				+	memset(&the_lnet, 0, sizeof(the_lnet));
			
 
				+
			
 
				+	/* refer to global cfs_cpt_table for now */
			
 
				+	the_lnet.ln_cpt_table	= cfs_cpt_table;
			
 
				+	the_lnet.ln_cpt_number	= cfs_cpt_number(cfs_cpt_table);
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_cpt_number > 0);
			
 
				+	if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
			
 
				+		/* we are under risk of consuming all lh_cookie */
			
 
				+		CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
			
 
				+		       "please change setting of CPT-table and retry\n",
			
 
				+		       the_lnet.ln_cpt_number, LNET_CPT_MAX);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
			
 
				+		the_lnet.ln_cpt_bits++;
			
 
				+
			
 
				+	rc = lnet_create_locks();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create LNet global locks: %d\n", rc);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	the_lnet.ln_refcount = 0;
			
 
				+	the_lnet.ln_init = 1;
			
 
				+	LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_lnds);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
			
 
				+	INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
			
 
				+
			
 
				+	/* The hash table size is the number of bits it takes to express the set
			
 
				+	 * ln_num_routes, minus 1 (better to under estimate than over so we
			
 
				+	 * don't waste memory). */
			
 
				+	if (rnet_htable_size <= 0)
			
 
				+		rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
			
 
				+	else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
			
 
				+		rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
			
 
				+	the_lnet.ln_remote_nets_hbits = max_t(int, 1,
			
 
				+					   order_base_2(rnet_htable_size) - 1);
			
 
				+
			
 
				+	/* All LNDs apart from the LOLND are in separate modules.  They
			
 
				+	 * register themselves when their module loads, and unregister
			
 
				+	 * themselves when their module is unloaded. */
			
 
				+	lnet_register_lnd(&the_lolnd);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetInit);
			
 
				+
			
 
				+/**
			
 
				+ * Finalize LNet library.
			
 
				+ *
			
 
				+ * Only userspace program needs to call this function. It can be called
			
 
				+ * at most once.
			
 
				+ *
			
 
				+ * \pre LNetInit() called with success.
			
 
				+ * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
			
 
				+ */
			
 
				+void
			
 
				+LNetFini(void)
			
 
				+{
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount == 0);
			
 
				+
			
 
				+	while (!list_empty(&the_lnet.ln_lnds))
			
 
				+		lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
			
 
				+						   lnd_t, lnd_list));
			
 
				+	lnet_destroy_locks();
			
 
				+
			
 
				+	the_lnet.ln_init = 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetFini);
			
 
				+
			
 
				+/**
			
 
				+ * Set LNet PID and start LNet interfaces, routing, and forwarding.
			
 
				+ *
			
 
				+ * Userspace program should call this after a successful call to LNetInit().
			
 
				+ * Users must call this function at least once before any other functions.
			
 
				+ * For each successful call there must be a corresponding call to
			
 
				+ * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
			
 
				+ * ignored.
			
 
				+ *
			
 
				+ * The PID used by LNet may be different from the one requested.
			
 
				+ * See LNetGetId().
			
 
				+ *
			
 
				+ * \param requested_pid PID requested by the caller.
			
 
				+ *
			
 
				+ * \return >= 0 on success, and < 0 error code on failures.
			
 
				+ */
			
 
				+int
			
 
				+LNetNIInit(lnet_pid_t requested_pid)
			
 
				+{
			
 
				+	int	 im_a_router = 0;
			
 
				+	int	 rc;
			
 
				+
			
 
				+	LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
			
 
				+
			
 
				+	if (the_lnet.ln_refcount > 0) {
			
 
				+		rc = the_lnet.ln_refcount++;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lnet_get_tunables();
			
 
				+
			
 
				+	if (requested_pid == LNET_PID_ANY) {
			
 
				+		/* Don't instantiate LNET just for me */
			
 
				+		rc = -ENETDOWN;
			
 
				+		goto failed0;
			
 
				+	}
			
 
				+
			
 
				+	rc = lnet_prepare(requested_pid);
			
 
				+	if (rc != 0)
			
 
				+		goto failed0;
			
 
				+
			
 
				+	rc = lnet_startup_lndnis();
			
 
				+	if (rc != 0)
			
 
				+		goto failed1;
			
 
				+
			
 
				+	rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
			
 
				+	if (rc != 0)
			
 
				+		goto failed2;
			
 
				+
			
 
				+	rc = lnet_check_routes();
			
 
				+	if (rc != 0)
			
 
				+		goto failed2;
			
 
				+
			
 
				+	rc = lnet_rtrpools_alloc(im_a_router);
			
 
				+	if (rc != 0)
			
 
				+		goto failed2;
			
 
				+
			
 
				+	rc = lnet_acceptor_start();
			
 
				+	if (rc != 0)
			
 
				+		goto failed2;
			
 
				+
			
 
				+	the_lnet.ln_refcount = 1;
			
 
				+	/* Now I may use my own API functions... */
			
 
				+
			
 
				+	/* NB router checker needs the_lnet.ln_ping_info in
			
 
				+	 * lnet_router_checker -> lnet_update_ni_status_locked */
			
 
				+	rc = lnet_ping_target_init();
			
 
				+	if (rc != 0)
			
 
				+		goto failed3;
			
 
				+
			
 
				+	rc = lnet_router_checker_start();
			
 
				+	if (rc != 0)
			
 
				+		goto failed4;
			
 
				+
			
 
				+	lnet_proc_init();
			
 
				+	goto out;
			
 
				+
			
 
				+ failed4:
			
 
				+	lnet_ping_target_fini();
			
 
				+ failed3:
			
 
				+	the_lnet.ln_refcount = 0;
			
 
				+	lnet_acceptor_stop();
			
 
				+ failed2:
			
 
				+	lnet_destroy_routes();
			
 
				+	lnet_shutdown_lndnis();
			
 
				+ failed1:
			
 
				+	lnet_unprepare();
			
 
				+ failed0:
			
 
				+	LASSERT (rc < 0);
			
 
				+ out:
			
 
				+	LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetNIInit);
			
 
				+
			
 
				+/**
			
 
				+ * Stop LNet interfaces, routing, and forwarding.
			
 
				+ *
			
 
				+ * Users must call this function once for each successful call to LNetNIInit().
			
 
				+ * Once the LNetNIFini() operation has been started, the results of pending
			
 
				+ * API operations are undefined.
			
 
				+ *
			
 
				+ * \return always 0 for current implementation.
			
 
				+ */
			
 
				+int
			
 
				+LNetNIFini()
			
 
				+{
			
 
				+	LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (the_lnet.ln_refcount != 1) {
			
 
				+		the_lnet.ln_refcount--;
			
 
				+	} else {
			
 
				+		LASSERT (!the_lnet.ln_niinit_self);
			
 
				+
			
 
				+		lnet_proc_fini();
			
 
				+		lnet_router_checker_stop();
			
 
				+		lnet_ping_target_fini();
			
 
				+
			
 
				+		/* Teardown fns that use my own API functions BEFORE here */
			
 
				+		the_lnet.ln_refcount = 0;
			
 
				+
			
 
				+		lnet_acceptor_stop();
			
 
				+		lnet_destroy_routes();
			
 
				+		lnet_shutdown_lndnis();
			
 
				+		lnet_unprepare();
			
 
				+	}
			
 
				+
			
 
				+	LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetNIFini);
			
 
				+
			
 
				+/**
			
 
				+ * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
			
 
				+ * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
			
 
				+ * internal ioctl handler.
			
 
				+ *
			
 
				+ * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
			
 
				+ *
			
 
				+ * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
			
 
				+ * The data will be printed to system console. Don't use it excessively.
			
 
				+ * \param arg A pointer to lnet_process_id_t, process ID of the peer.
			
 
				+ *
			
 
				+ * \return Always return 0 when called by users directly (i.e., not via ioctl).
			
 
				+ */
			
 
				+int
			
 
				+LNetCtl(unsigned int cmd, void *arg)
			
 
				+{
			
 
				+	struct libcfs_ioctl_data *data = arg;
			
 
				+	lnet_process_id_t	 id = {0};
			
 
				+	lnet_ni_t		*ni;
			
 
				+	int		       rc;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	switch (cmd) {
			
 
				+	case IOC_LIBCFS_GET_NI:
			
 
				+		rc = LNetGetId(data->ioc_count, &id);
			
 
				+		data->ioc_nid = id.nid;
			
 
				+		return rc;
			
 
				+
			
 
				+	case IOC_LIBCFS_FAIL_NID:
			
 
				+		return lnet_fail_nid(data->ioc_nid, data->ioc_count);
			
 
				+
			
 
				+	case IOC_LIBCFS_ADD_ROUTE:
			
 
				+		rc = lnet_add_route(data->ioc_net, data->ioc_count,
			
 
				+				    data->ioc_nid);
			
 
				+		return (rc != 0) ? rc : lnet_check_routes();
			
 
				+
			
 
				+	case IOC_LIBCFS_DEL_ROUTE:
			
 
				+		return lnet_del_route(data->ioc_net, data->ioc_nid);
			
 
				+
			
 
				+	case IOC_LIBCFS_GET_ROUTE:
			
 
				+		return lnet_get_route(data->ioc_count,
			
 
				+				      &data->ioc_net, &data->ioc_count,
			
 
				+				      &data->ioc_nid, &data->ioc_flags);
			
 
				+	case IOC_LIBCFS_NOTIFY_ROUTER:
			
 
				+		return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
			
 
				+				   cfs_time_current() -
			
 
				+				   cfs_time_seconds(cfs_time_current_sec() -
			
 
				+						    (time_t)data->ioc_u64[0]));
			
 
				+
			
 
				+	case IOC_LIBCFS_PORTALS_COMPATIBILITY:
			
 
				+		/* This can be removed once lustre stops calling it */
			
 
				+		return 0;
			
 
				+
			
 
				+	case IOC_LIBCFS_LNET_DIST:
			
 
				+		rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
			
 
				+		if (rc < 0 && rc != -EHOSTUNREACH)
			
 
				+			return rc;
			
 
				+
			
 
				+		data->ioc_u32[0] = rc;
			
 
				+		return 0;
			
 
				+
			
 
				+	case IOC_LIBCFS_TESTPROTOCOMPAT:
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+		the_lnet.ln_testprotocompat = data->ioc_flags;
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+		return 0;
			
 
				+
			
 
				+	case IOC_LIBCFS_PING:
			
 
				+		id.nid = data->ioc_nid;
			
 
				+		id.pid = data->ioc_u32[0];
			
 
				+		rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
			
 
				+			       (lnet_process_id_t *)data->ioc_pbuf1,
			
 
				+			       data->ioc_plen1/sizeof(lnet_process_id_t));
			
 
				+		if (rc < 0)
			
 
				+			return rc;
			
 
				+		data->ioc_count = rc;
			
 
				+		return 0;
			
 
				+
			
 
				+	case IOC_LIBCFS_DEBUG_PEER: {
			
 
				+		/* CAVEAT EMPTOR: this one designed for calling directly; not
			
 
				+		 * via an ioctl */
			
 
				+		id = *((lnet_process_id_t *) arg);
			
 
				+
			
 
				+		lnet_debug_peer(id.nid);
			
 
				+
			
 
				+		ni = lnet_net2ni(LNET_NIDNET(id.nid));
			
 
				+		if (ni == NULL) {
			
 
				+			CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
			
 
				+		} else {
			
 
				+			if (ni->ni_lnd->lnd_ctl == NULL) {
			
 
				+				CDEBUG(D_WARNING, "No ctl for %s\n",
			
 
				+				       libcfs_id2str(id));
			
 
				+			} else {
			
 
				+				(void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
			
 
				+			}
			
 
				+
			
 
				+			lnet_ni_decref(ni);
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	default:
			
 
				+		ni = lnet_net2ni(data->ioc_net);
			
 
				+		if (ni == NULL)
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (ni->ni_lnd->lnd_ctl == NULL)
			
 
				+			rc = -EINVAL;
			
 
				+		else
			
 
				+			rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
			
 
				+
			
 
				+		lnet_ni_decref(ni);
			
 
				+		return rc;
			
 
				+	}
			
 
				+	/* not reached */
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetCtl);
			
 
				+
			
 
				+/**
			
 
				+ * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
			
 
				+ * all interfaces share a same PID, as requested by LNetNIInit().
			
 
				+ *
			
 
				+ * \param index Index of the interface to look up.
			
 
				+ * \param id On successful return, this location will hold the
			
 
				+ * lnet_process_id_t ID of the interface.
			
 
				+ *
			
 
				+ * \retval 0 If an interface exists at \a index.
			
 
				+ * \retval -ENOENT If no interface has been found.
			
 
				+ */
			
 
				+int
			
 
				+LNetGetId(unsigned int index, lnet_process_id_t *id)
			
 
				+{
			
 
				+	struct lnet_ni	*ni;
			
 
				+	struct list_head	*tmp;
			
 
				+	int		cpt;
			
 
				+	int		rc = -ENOENT;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				+		if (index-- != 0)
			
 
				+			continue;
			
 
				+
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		id->nid = ni->ni_nid;
			
 
				+		id->pid = the_lnet.ln_pid;
			
 
				+		rc = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetGetId);
			
 
				+
			
 
				+/**
			
 
				+ * Print a string representation of handle \a h into buffer \a str of
			
 
				+ * \a len bytes.
			
 
				+ */
			
 
				+void
			
 
				+LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
			
 
				+{
			
 
				+	snprintf(str, len, LPX64, h.cookie);
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetSnprintHandle);
			
 
				+
			
 
				+static int
			
 
				+lnet_create_ping_info(void)
			
 
				+{
			
 
				+	int	       i;
			
 
				+	int	       n;
			
 
				+	int	       rc;
			
 
				+	unsigned int      infosz;
			
 
				+	lnet_ni_t	*ni;
			
 
				+	lnet_process_id_t id;
			
 
				+	lnet_ping_info_t *pinfo;
			
 
				+
			
 
				+	for (n = 0; ; n++) {
			
 
				+		rc = LNetGetId(n, &id);
			
 
				+		if (rc == -ENOENT)
			
 
				+			break;
			
 
				+
			
 
				+		LASSERT (rc == 0);
			
 
				+	}
			
 
				+
			
 
				+	infosz = offsetof(lnet_ping_info_t, pi_ni[n]);
			
 
				+	LIBCFS_ALLOC(pinfo, infosz);
			
 
				+	if (pinfo == NULL) {
			
 
				+		CERROR("Can't allocate ping info[%d]\n", n);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	pinfo->pi_nnis    = n;
			
 
				+	pinfo->pi_pid     = the_lnet.ln_pid;
			
 
				+	pinfo->pi_magic   = LNET_PROTO_PING_MAGIC;
			
 
				+	pinfo->pi_features = LNET_PING_FEAT_NI_STATUS;
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		lnet_ni_status_t *ns = &pinfo->pi_ni[i];
			
 
				+
			
 
				+		rc = LNetGetId(i, &id);
			
 
				+		LASSERT (rc == 0);
			
 
				+
			
 
				+		ns->ns_nid    = id.nid;
			
 
				+		ns->ns_status = LNET_NI_STATUS_UP;
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+
			
 
				+		ni = lnet_nid2ni_locked(id.nid, 0);
			
 
				+		LASSERT(ni != NULL);
			
 
				+
			
 
				+		lnet_ni_lock(ni);
			
 
				+		LASSERT(ni->ni_status == NULL);
			
 
				+		ni->ni_status = ns;
			
 
				+		lnet_ni_unlock(ni);
			
 
				+
			
 
				+		lnet_ni_decref_locked(ni, 0);
			
 
				+		lnet_net_unlock(0);
			
 
				+	}
			
 
				+
			
 
				+	the_lnet.ln_ping_info = pinfo;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_destroy_ping_info(void)
			
 
				+{
			
 
				+	struct lnet_ni	*ni;
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+
			
 
				+	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				+		lnet_ni_lock(ni);
			
 
				+		ni->ni_status = NULL;
			
 
				+		lnet_ni_unlock(ni);
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	LIBCFS_FREE(the_lnet.ln_ping_info,
			
 
				+		    offsetof(lnet_ping_info_t,
			
 
				+			     pi_ni[the_lnet.ln_ping_info->pi_nnis]));
			
 
				+	the_lnet.ln_ping_info = NULL;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ping_target_init(void)
			
 
				+{
			
 
				+	lnet_md_t	 md = {0};
			
 
				+	lnet_handle_me_t  meh;
			
 
				+	lnet_process_id_t id;
			
 
				+	int	       rc;
			
 
				+	int	       rc2;
			
 
				+	int	       infosz;
			
 
				+
			
 
				+	rc = lnet_create_ping_info();
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	/* We can have a tiny EQ since we only need to see the unlink event on
			
 
				+	 * teardown, which by definition is the last one! */
			
 
				+	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't allocate ping EQ: %d\n", rc);
			
 
				+		goto failed_0;
			
 
				+	}
			
 
				+
			
 
				+	memset(&id, 0, sizeof(lnet_process_id_t));
			
 
				+	id.nid = LNET_NID_ANY;
			
 
				+	id.pid = LNET_PID_ANY;
			
 
				+
			
 
				+	rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
			
 
				+			  LNET_PROTO_PING_MATCHBITS, 0,
			
 
				+			  LNET_UNLINK, LNET_INS_AFTER,
			
 
				+			  &meh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create ping ME: %d\n", rc);
			
 
				+		goto failed_1;
			
 
				+	}
			
 
				+
			
 
				+	/* initialize md content */
			
 
				+	infosz = offsetof(lnet_ping_info_t,
			
 
				+			  pi_ni[the_lnet.ln_ping_info->pi_nnis]);
			
 
				+	md.start     = the_lnet.ln_ping_info;
			
 
				+	md.length    = infosz;
			
 
				+	md.threshold = LNET_MD_THRESH_INF;
			
 
				+	md.max_size  = 0;
			
 
				+	md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
			
 
				+		       LNET_MD_MANAGE_REMOTE;
			
 
				+	md.user_ptr  = NULL;
			
 
				+	md.eq_handle = the_lnet.ln_ping_target_eq;
			
 
				+
			
 
				+	rc = LNetMDAttach(meh, md,
			
 
				+			  LNET_RETAIN,
			
 
				+			  &the_lnet.ln_ping_target_md);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't attach ping MD: %d\n", rc);
			
 
				+		goto failed_2;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed_2:
			
 
				+	rc2 = LNetMEUnlink(meh);
			
 
				+	LASSERT (rc2 == 0);
			
 
				+ failed_1:
			
 
				+	rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
			
 
				+	LASSERT (rc2 == 0);
			
 
				+ failed_0:
			
 
				+	lnet_destroy_ping_info();
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ping_target_fini(void)
			
 
				+{
			
 
				+	lnet_event_t    event;
			
 
				+	int	     rc;
			
 
				+	int	     which;
			
 
				+	int	     timeout_ms = 1000;
			
 
				+	sigset_t    blocked = cfs_block_allsigs();
			
 
				+
			
 
				+	LNetMDUnlink(the_lnet.ln_ping_target_md);
			
 
				+	/* NB md could be busy; this just starts the unlink */
			
 
				+
			
 
				+	for (;;) {
			
 
				+		rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
			
 
				+				timeout_ms, &event, &which);
			
 
				+
			
 
				+		/* I expect overflow... */
			
 
				+		LASSERT (rc >= 0 || rc == -EOVERFLOW);
			
 
				+
			
 
				+		if (rc == 0) {
			
 
				+			/* timed out: provide a diagnostic */
			
 
				+			CWARN("Still waiting for ping MD to unlink\n");
			
 
				+			timeout_ms *= 2;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Got a valid event */
			
 
				+		if (event.unlinked)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	rc = LNetEQFree(the_lnet.ln_ping_target_eq);
			
 
				+	LASSERT (rc == 0);
			
 
				+	lnet_destroy_ping_info();
			
 
				+	cfs_restore_sigs(blocked);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
			
 
				+{
			
 
				+	lnet_handle_eq_t     eqh;
			
 
				+	lnet_handle_md_t     mdh;
			
 
				+	lnet_event_t	 event;
			
 
				+	lnet_md_t	    md = {0};
			
 
				+	int		  which;
			
 
				+	int		  unlinked = 0;
			
 
				+	int		  replied = 0;
			
 
				+	const int	    a_long_time = 60000; /* mS */
			
 
				+	int		  infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
			
 
				+	lnet_ping_info_t    *info;
			
 
				+	lnet_process_id_t    tmpid;
			
 
				+	int		  i;
			
 
				+	int		  nob;
			
 
				+	int		  rc;
			
 
				+	int		  rc2;
			
 
				+	sigset_t	 blocked;
			
 
				+
			
 
				+	if (n_ids <= 0 ||
			
 
				+	    id.nid == LNET_NID_ANY ||
			
 
				+	    timeout_ms > 500000 ||	      /* arbitrary limit! */
			
 
				+	    n_ids > 20)			 /* arbitrary limit! */
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (id.pid == LNET_PID_ANY)
			
 
				+		id.pid = LUSTRE_SRV_LNET_PID;
			
 
				+
			
 
				+	LIBCFS_ALLOC(info, infosz);
			
 
				+	if (info == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	/* NB 2 events max (including any unlink event) */
			
 
				+	rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't allocate EQ: %d\n", rc);
			
 
				+		goto out_0;
			
 
				+	}
			
 
				+
			
 
				+	/* initialize md content */
			
 
				+	md.start     = info;
			
 
				+	md.length    = infosz;
			
 
				+	md.threshold = 2; /*GET/REPLY*/
			
 
				+	md.max_size  = 0;
			
 
				+	md.options   = LNET_MD_TRUNCATE;
			
 
				+	md.user_ptr  = NULL;
			
 
				+	md.eq_handle = eqh;
			
 
				+
			
 
				+	rc = LNetMDBind(md, LNET_UNLINK, &mdh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't bind MD: %d\n", rc);
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	rc = LNetGet(LNET_NID_ANY, mdh, id,
			
 
				+		     LNET_RESERVED_PORTAL,
			
 
				+		     LNET_PROTO_PING_MATCHBITS, 0);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		/* Don't CERROR; this could be deliberate! */
			
 
				+
			
 
				+		rc2 = LNetMDUnlink(mdh);
			
 
				+		LASSERT (rc2 == 0);
			
 
				+
			
 
				+		/* NB must wait for the UNLINK event below... */
			
 
				+		unlinked = 1;
			
 
				+		timeout_ms = a_long_time;
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		/* MUST block for unlink to complete */
			
 
				+		if (unlinked)
			
 
				+			blocked = cfs_block_allsigs();
			
 
				+
			
 
				+		rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
			
 
				+
			
 
				+		if (unlinked)
			
 
				+			cfs_restore_sigs(blocked);
			
 
				+
			
 
				+		CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
			
 
				+		       (rc2 <= 0) ? -1 : event.type,
			
 
				+		       (rc2 <= 0) ? -1 : event.status,
			
 
				+		       (rc2 > 0 && event.unlinked) ? " unlinked" : "");
			
 
				+
			
 
				+		LASSERT (rc2 != -EOVERFLOW);     /* can't miss anything */
			
 
				+
			
 
				+		if (rc2 <= 0 || event.status != 0) {
			
 
				+			/* timeout or error */
			
 
				+			if (!replied && rc == 0)
			
 
				+				rc = (rc2 < 0) ? rc2 :
			
 
				+				     (rc2 == 0) ? -ETIMEDOUT :
			
 
				+				     event.status;
			
 
				+
			
 
				+			if (!unlinked) {
			
 
				+				/* Ensure completion in finite time... */
			
 
				+				LNetMDUnlink(mdh);
			
 
				+				/* No assertion (racing with network) */
			
 
				+				unlinked = 1;
			
 
				+				timeout_ms = a_long_time;
			
 
				+			} else if (rc2 == 0) {
			
 
				+				/* timed out waiting for unlink */
			
 
				+				CWARN("ping %s: late network completion\n",
			
 
				+				      libcfs_id2str(id));
			
 
				+			}
			
 
				+		} else if (event.type == LNET_EVENT_REPLY) {
			
 
				+			replied = 1;
			
 
				+			rc = event.mlength;
			
 
				+		}
			
 
				+
			
 
				+	} while (rc2 <= 0 || !event.unlinked);
			
 
				+
			
 
				+	if (!replied) {
			
 
				+		if (rc >= 0)
			
 
				+			CWARN("%s: Unexpected rc >= 0 but no reply!\n",
			
 
				+			      libcfs_id2str(id));
			
 
				+		rc = -EIO;
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	nob = rc;
			
 
				+	LASSERT (nob >= 0 && nob <= infosz);
			
 
				+
			
 
				+	rc = -EPROTO;			   /* if I can't parse... */
			
 
				+
			
 
				+	if (nob < 8) {
			
 
				+		/* can't check magic/version */
			
 
				+		CERROR("%s: ping info too short %d\n",
			
 
				+		       libcfs_id2str(id), nob);
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
			
 
				+		lnet_swap_pinginfo(info);
			
 
				+	} else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
			
 
				+		CERROR("%s: Unexpected magic %08x\n",
			
 
				+		       libcfs_id2str(id), info->pi_magic);
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
			
 
				+		CERROR("%s: ping w/o NI status: 0x%x\n",
			
 
				+		       libcfs_id2str(id), info->pi_features);
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
			
 
				+		CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
			
 
				+		       nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	if (info->pi_nnis < n_ids)
			
 
				+		n_ids = info->pi_nnis;
			
 
				+
			
 
				+	if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
			
 
				+		CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
			
 
				+		       nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
			
 
				+		goto out_1;
			
 
				+	}
			
 
				+
			
 
				+	rc = -EFAULT;			   /* If I SEGV... */
			
 
				+
			
 
				+	for (i = 0; i < n_ids; i++) {
			
 
				+		tmpid.pid = info->pi_pid;
			
 
				+		tmpid.nid = info->pi_ni[i].ns_nid;
			
 
				+		if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
			
 
				+			goto out_1;
			
 
				+	}
			
 
				+	rc = info->pi_nnis;
			
 
				+
			
 
				+ out_1:
			
 
				+	rc2 = LNetEQFree(eqh);
			
 
				+	if (rc2 != 0)
			
 
				+		CERROR("rc2 %d\n", rc2);
			
 
				+	LASSERT (rc2 == 0);
			
 
				+
			
 
				+ out_0:
			
 
				+	LIBCFS_FREE(info, infosz);
			
 
				+	return rc;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -0,0 +1,1264 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+typedef struct {			    /* tmp struct for parsing routes */
			
 
				+	struct list_head	 ltb_list;	/* stash on lists */
			
 
				+	int		ltb_size;	/* allocated size */
			
 
				+	char	       ltb_text[0];     /* text buffer */
			
 
				+} lnet_text_buf_t;
			
 
				+
			
 
				+static int lnet_tbnob = 0;			/* track text buf allocation */
			
 
				+#define LNET_MAX_TEXTBUF_NOB     (64<<10)	/* bound allocation */
			
 
				+#define LNET_SINGLE_TEXTBUF_NOB  (4<<10)
			
 
				+
			
 
				+void
			
 
				+lnet_syntax(char *name, char *str, int offset, int width)
			
 
				+{
			
 
				+	static char dots[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+	static char dashes[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+
			
 
				+	memset(dots, '.', sizeof(dots));
			
 
				+	dots[sizeof(dots)-1] = 0;
			
 
				+	memset(dashes, '-', sizeof(dashes));
			
 
				+	dashes[sizeof(dashes)-1] = 0;
			
 
				+
			
 
				+	LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
			
 
				+	LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
			
 
				+			   (int)strlen(name), dots, offset, dots,
			
 
				+			    (width < 1) ? 0 : width - 1, dashes);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_issep (char c)
			
 
				+{
			
 
				+	switch (c) {
			
 
				+	case '\n':
			
 
				+	case '\r':
			
 
				+	case ';':
			
 
				+		return 1;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_net_unique(__u32 net, struct list_head *nilist)
			
 
				+{
			
 
				+	struct list_head       *tmp;
			
 
				+	lnet_ni_t	*ni;
			
 
				+
			
 
				+	list_for_each (tmp, nilist) {
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		if (LNET_NIDNET(ni->ni_nid) == net)
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ni_free(struct lnet_ni *ni)
			
 
				+{
			
 
				+	if (ni->ni_refs != NULL)
			
 
				+		cfs_percpt_free(ni->ni_refs);
			
 
				+
			
 
				+	if (ni->ni_tx_queues != NULL)
			
 
				+		cfs_percpt_free(ni->ni_tx_queues);
			
 
				+
			
 
				+	if (ni->ni_cpts != NULL)
			
 
				+		cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
			
 
				+
			
 
				+	LIBCFS_FREE(ni, sizeof(*ni));
			
 
				+}
			
 
				+
			
 
				+lnet_ni_t *
			
 
				+lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
			
 
				+{
			
 
				+	struct lnet_tx_queue	*tq;
			
 
				+	struct lnet_ni		*ni;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	if (!lnet_net_unique(net, nilist)) {
			
 
				+		LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
			
 
				+				   libcfs_net2str(net));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(ni, sizeof(*ni));
			
 
				+	if (ni == NULL) {
			
 
				+		CERROR("Out of memory creating network %s\n",
			
 
				+		       libcfs_net2str(net));
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock_init(&ni->ni_lock);
			
 
				+	INIT_LIST_HEAD(&ni->ni_cptlist);
			
 
				+	ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+				       sizeof(*ni->ni_refs[0]));
			
 
				+	if (ni->ni_refs == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					    sizeof(*ni->ni_tx_queues[0]));
			
 
				+	if (ni->ni_tx_queues == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
			
 
				+		INIT_LIST_HEAD(&tq->tq_delayed);
			
 
				+
			
 
				+	if (el == NULL) {
			
 
				+		ni->ni_cpts  = NULL;
			
 
				+		ni->ni_ncpts = LNET_CPT_NUMBER;
			
 
				+	} else {
			
 
				+		rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
			
 
				+		if (rc <= 0) {
			
 
				+			CERROR("Failed to set CPTs for NI %s: %d\n",
			
 
				+			       libcfs_net2str(net), rc);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(rc <= LNET_CPT_NUMBER);
			
 
				+		if (rc == LNET_CPT_NUMBER) {
			
 
				+			LIBCFS_FREE(ni->ni_cpts, rc * sizeof(ni->ni_cpts[0]));
			
 
				+			ni->ni_cpts = NULL;
			
 
				+		}
			
 
				+
			
 
				+		ni->ni_ncpts = rc;
			
 
				+	}
			
 
				+
			
 
				+	/* LND will fill in the address part of the NID */
			
 
				+	ni->ni_nid = LNET_MKNID(net, 0);
			
 
				+	ni->ni_last_alive = cfs_time_current_sec();
			
 
				+	list_add_tail(&ni->ni_list, nilist);
			
 
				+	return ni;
			
 
				+ failed:
			
 
				+	lnet_ni_free(ni);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse_networks(struct list_head *nilist, char *networks)
			
 
				+{
			
 
				+	struct cfs_expr_list *el = NULL;
			
 
				+	int		tokensize = strlen(networks) + 1;
			
 
				+	char		*tokens;
			
 
				+	char		*str;
			
 
				+	char		*tmp;
			
 
				+	struct lnet_ni	*ni;
			
 
				+	__u32		net;
			
 
				+	int		nnets = 0;
			
 
				+
			
 
				+	if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
			
 
				+		/* _WAY_ conservative */
			
 
				+		LCONSOLE_ERROR_MSG(0x112, "Can't parse networks: string too "
			
 
				+				   "long\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(tokens, tokensize);
			
 
				+	if (tokens == NULL) {
			
 
				+		CERROR("Can't allocate net tokens\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	the_lnet.ln_network_tokens = tokens;
			
 
				+	the_lnet.ln_network_tokens_nob = tokensize;
			
 
				+	memcpy (tokens, networks, tokensize);
			
 
				+	str = tmp = tokens;
			
 
				+
			
 
				+	/* Add in the loopback network */
			
 
				+	ni = lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, nilist);
			
 
				+	if (ni == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	while (str != NULL && *str != 0) {
			
 
				+		char	*comma = strchr(str, ',');
			
 
				+		char	*bracket = strchr(str, '(');
			
 
				+		char	*square = strchr(str, '[');
			
 
				+		char	*iface;
			
 
				+		int	niface;
			
 
				+		int	rc;
			
 
				+
			
 
				+		/* NB we don't check interface conflicts here; it's the LNDs
			
 
				+		 * responsibility (if it cares at all) */
			
 
				+
			
 
				+		if (square != NULL && (comma == NULL || square < comma)) {
			
 
				+			/* i.e: o2ib0(ib0)[1,2], number between square
			
 
				+			 * brackets are CPTs this NI needs to be bond */
			
 
				+			if (bracket != NULL && bracket > square) {
			
 
				+				tmp = square;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+
			
 
				+			tmp = strchr(square, ']');
			
 
				+			if (tmp == NULL) {
			
 
				+				tmp = square;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+
			
 
				+			rc = cfs_expr_list_parse(square, tmp - square + 1,
			
 
				+						 0, LNET_CPT_NUMBER - 1, &el);
			
 
				+			if (rc != 0) {
			
 
				+				tmp = square;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+
			
 
				+			while (square <= tmp)
			
 
				+				*square++ = ' ';
			
 
				+		}
			
 
				+
			
 
				+		if (bracket == NULL ||
			
 
				+		    (comma != NULL && comma < bracket)) {
			
 
				+
			
 
				+			/* no interface list specified */
			
 
				+
			
 
				+			if (comma != NULL)
			
 
				+				*comma++ = 0;
			
 
				+			net = libcfs_str2net(cfs_trimwhite(str));
			
 
				+
			
 
				+			if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				+				LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
			
 
				+						   " type\n");
			
 
				+				tmp = str;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+
			
 
				+			if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
			
 
				+			    lnet_ni_alloc(net, el, nilist) == NULL)
			
 
				+				goto failed;
			
 
				+
			
 
				+			if (el != NULL) {
			
 
				+				cfs_expr_list_free(el);
			
 
				+				el = NULL;
			
 
				+			}
			
 
				+
			
 
				+			str = comma;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		*bracket = 0;
			
 
				+		net = libcfs_str2net(cfs_trimwhite(str));
			
 
				+		if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				+			tmp = str;
			
 
				+			goto failed_syntax;
			
 
				+		}
			
 
				+
			
 
				+		nnets++;
			
 
				+		ni = lnet_ni_alloc(net, el, nilist);
			
 
				+		if (ni == NULL)
			
 
				+			goto failed;
			
 
				+
			
 
				+		if (el != NULL) {
			
 
				+			cfs_expr_list_free(el);
			
 
				+			el = NULL;
			
 
				+		}
			
 
				+
			
 
				+		niface = 0;
			
 
				+		iface = bracket + 1;
			
 
				+
			
 
				+		bracket = strchr(iface, ')');
			
 
				+		if (bracket == NULL) {
			
 
				+			tmp = iface;
			
 
				+			goto failed_syntax;
			
 
				+		}
			
 
				+
			
 
				+		*bracket = 0;
			
 
				+		do {
			
 
				+			comma = strchr(iface, ',');
			
 
				+			if (comma != NULL)
			
 
				+				*comma++ = 0;
			
 
				+
			
 
				+			iface = cfs_trimwhite(iface);
			
 
				+			if (*iface == 0) {
			
 
				+				tmp = iface;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+
			
 
				+			if (niface == LNET_MAX_INTERFACES) {
			
 
				+				LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
			
 
				+						   "for net %s\n",
			
 
				+						   libcfs_net2str(net));
			
 
				+				goto failed;
			
 
				+			}
			
 
				+
			
 
				+			ni->ni_interfaces[niface++] = iface;
			
 
				+			iface = comma;
			
 
				+		} while (iface != NULL);
			
 
				+
			
 
				+		str = bracket + 1;
			
 
				+		comma = strchr(bracket + 1, ',');
			
 
				+		if (comma != NULL) {
			
 
				+			*comma = 0;
			
 
				+			str = cfs_trimwhite(str);
			
 
				+			if (*str != 0) {
			
 
				+				tmp = str;
			
 
				+				goto failed_syntax;
			
 
				+			}
			
 
				+			str = comma + 1;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		str = cfs_trimwhite(str);
			
 
				+		if (*str != 0) {
			
 
				+			tmp = str;
			
 
				+			goto failed_syntax;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(!list_empty(nilist));
			
 
				+	return 0;
			
 
				+
			
 
				+ failed_syntax:
			
 
				+	lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
			
 
				+ failed:
			
 
				+	while (!list_empty(nilist)) {
			
 
				+		ni = list_entry(nilist->next, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		list_del(&ni->ni_list);
			
 
				+		lnet_ni_free(ni);
			
 
				+	}
			
 
				+
			
 
				+	if (el != NULL)
			
 
				+		cfs_expr_list_free(el);
			
 
				+
			
 
				+	LIBCFS_FREE(tokens, tokensize);
			
 
				+	the_lnet.ln_network_tokens = NULL;
			
 
				+
			
 
				+	return -EINVAL;
			
 
				+}
			
 
				+
			
 
				+lnet_text_buf_t *
			
 
				+lnet_new_text_buf (int str_len)
			
 
				+{
			
 
				+	lnet_text_buf_t *ltb;
			
 
				+	int	      nob;
			
 
				+
			
 
				+	/* NB allocate space for the terminating 0 */
			
 
				+	nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]);
			
 
				+	if (nob > LNET_SINGLE_TEXTBUF_NOB) {
			
 
				+		/* _way_ conservative for "route net gateway..." */
			
 
				+		CERROR("text buffer too big\n");
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
			
 
				+		CERROR("Too many text buffers\n");
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(ltb, nob);
			
 
				+	if (ltb == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	ltb->ltb_size = nob;
			
 
				+	ltb->ltb_text[0] = 0;
			
 
				+	lnet_tbnob += nob;
			
 
				+	return ltb;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_free_text_buf (lnet_text_buf_t *ltb)
			
 
				+{
			
 
				+	lnet_tbnob -= ltb->ltb_size;
			
 
				+	LIBCFS_FREE(ltb, ltb->ltb_size);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_free_text_bufs(struct list_head *tbs)
			
 
				+{
			
 
				+	lnet_text_buf_t  *ltb;
			
 
				+
			
 
				+	while (!list_empty(tbs)) {
			
 
				+		ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+		list_del(&ltb->ltb_list);
			
 
				+		lnet_free_text_buf(ltb);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_print_text_bufs(struct list_head *tbs)
			
 
				+{
			
 
				+	struct list_head	*tmp;
			
 
				+	lnet_text_buf_t   *ltb;
			
 
				+
			
 
				+	list_for_each (tmp, tbs) {
			
 
				+		ltb = list_entry(tmp, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+		CDEBUG(D_WARNING, "%s\n", ltb->ltb_text);
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_str2tbs_sep (struct list_head *tbs, char *str)
			
 
				+{
			
 
				+	struct list_head	pending;
			
 
				+	char	     *sep;
			
 
				+	int	       nob;
			
 
				+	int	       i;
			
 
				+	lnet_text_buf_t  *ltb;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&pending);
			
 
				+
			
 
				+	/* Split 'str' into separate commands */
			
 
				+	for (;;) {
			
 
				+		/* skip leading whitespace */
			
 
				+		while (cfs_iswhite(*str))
			
 
				+			str++;
			
 
				+
			
 
				+		/* scan for separator or comment */
			
 
				+		for (sep = str; *sep != 0; sep++)
			
 
				+			if (lnet_issep(*sep) || *sep == '#')
			
 
				+				break;
			
 
				+
			
 
				+		nob = (int)(sep - str);
			
 
				+		if (nob > 0) {
			
 
				+			ltb = lnet_new_text_buf(nob);
			
 
				+			if (ltb == NULL) {
			
 
				+				lnet_free_text_bufs(&pending);
			
 
				+				return -1;
			
 
				+			}
			
 
				+
			
 
				+			for (i = 0; i < nob; i++)
			
 
				+				if (cfs_iswhite(str[i]))
			
 
				+					ltb->ltb_text[i] = ' ';
			
 
				+				else
			
 
				+					ltb->ltb_text[i] = str[i];
			
 
				+
			
 
				+			ltb->ltb_text[nob] = 0;
			
 
				+
			
 
				+			list_add_tail(&ltb->ltb_list, &pending);
			
 
				+		}
			
 
				+
			
 
				+		if (*sep == '#') {
			
 
				+			/* scan for separator */
			
 
				+			do {
			
 
				+				sep++;
			
 
				+			} while (*sep != 0 && !lnet_issep(*sep));
			
 
				+		}
			
 
				+
			
 
				+		if (*sep == 0)
			
 
				+			break;
			
 
				+
			
 
				+		str = sep + 1;
			
 
				+	}
			
 
				+
			
 
				+	list_splice(&pending, tbs->prev);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_expand1tb (struct list_head *list,
			
 
				+	       char *str, char *sep1, char *sep2,
			
 
				+	       char *item, int itemlen)
			
 
				+{
			
 
				+	int	      len1 = (int)(sep1 - str);
			
 
				+	int	      len2 = strlen(sep2 + 1);
			
 
				+	lnet_text_buf_t *ltb;
			
 
				+
			
 
				+	LASSERT (*sep1 == '[');
			
 
				+	LASSERT (*sep2 == ']');
			
 
				+
			
 
				+	ltb = lnet_new_text_buf(len1 + itemlen + len2);
			
 
				+	if (ltb == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memcpy(ltb->ltb_text, str, len1);
			
 
				+	memcpy(&ltb->ltb_text[len1], item, itemlen);
			
 
				+	memcpy(&ltb->ltb_text[len1+itemlen], sep2 + 1, len2);
			
 
				+	ltb->ltb_text[len1 + itemlen + len2] = 0;
			
 
				+
			
 
				+	list_add_tail(&ltb->ltb_list, list);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_str2tbs_expand (struct list_head *tbs, char *str)
			
 
				+{
			
 
				+	char	      num[16];
			
 
				+	struct list_head	pending;
			
 
				+	char	     *sep;
			
 
				+	char	     *sep2;
			
 
				+	char	     *parsed;
			
 
				+	char	     *enditem;
			
 
				+	int	       lo;
			
 
				+	int	       hi;
			
 
				+	int	       stride;
			
 
				+	int	       i;
			
 
				+	int	       nob;
			
 
				+	int	       scanned;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&pending);
			
 
				+
			
 
				+	sep = strchr(str, '[');
			
 
				+	if (sep == NULL)			/* nothing to expand */
			
 
				+		return 0;
			
 
				+
			
 
				+	sep2 = strchr(sep, ']');
			
 
				+	if (sep2 == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	for (parsed = sep; parsed < sep2; parsed = enditem) {
			
 
				+
			
 
				+		enditem = ++parsed;
			
 
				+		while (enditem < sep2 && *enditem != ',')
			
 
				+			enditem++;
			
 
				+
			
 
				+		if (enditem == parsed)		/* no empty items */
			
 
				+			goto failed;
			
 
				+
			
 
				+		if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) {
			
 
				+
			
 
				+			if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
			
 
				+
			
 
				+				/* simple string enumeration */
			
 
				+				if (lnet_expand1tb(&pending, str, sep, sep2,
			
 
				+						   parsed, (int)(enditem - parsed)) != 0)
			
 
				+					goto failed;
			
 
				+
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			stride = 1;
			
 
				+		}
			
 
				+
			
 
				+		/* range expansion */
			
 
				+
			
 
				+		if (enditem != parsed + scanned) /* no trailing junk */
			
 
				+			goto failed;
			
 
				+
			
 
				+		if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
			
 
				+		    (hi - lo) % stride != 0)
			
 
				+			goto failed;
			
 
				+
			
 
				+		for (i = lo; i <= hi; i += stride) {
			
 
				+
			
 
				+			snprintf(num, sizeof(num), "%d", i);
			
 
				+			nob = strlen(num);
			
 
				+			if (nob + 1 == sizeof(num))
			
 
				+				goto failed;
			
 
				+
			
 
				+			if (lnet_expand1tb(&pending, str, sep, sep2,
			
 
				+					   num, nob) != 0)
			
 
				+				goto failed;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	list_splice(&pending, tbs->prev);
			
 
				+	return 1;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_free_text_bufs(&pending);
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse_hops (char *str, unsigned int *hops)
			
 
				+{
			
 
				+	int     len = strlen(str);
			
 
				+	int     nob = len;
			
 
				+
			
 
				+	return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
			
 
				+		nob == len &&
			
 
				+		*hops > 0 && *hops < 256);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+lnet_parse_route (char *str, int *im_a_router)
			
 
				+{
			
 
				+	/* static scratch buffer OK (single threaded) */
			
 
				+	static char       cmd[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+
			
 
				+	struct list_head	nets;
			
 
				+	struct list_head	gateways;
			
 
				+	struct list_head       *tmp1;
			
 
				+	struct list_head       *tmp2;
			
 
				+	__u32	     net;
			
 
				+	lnet_nid_t	nid;
			
 
				+	lnet_text_buf_t  *ltb;
			
 
				+	int	       rc;
			
 
				+	char	     *sep;
			
 
				+	char	     *token = str;
			
 
				+	int	       ntokens = 0;
			
 
				+	int	       myrc = -1;
			
 
				+	unsigned int      hops;
			
 
				+	int	       got_hops = 0;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&gateways);
			
 
				+	INIT_LIST_HEAD(&nets);
			
 
				+
			
 
				+	/* save a copy of the string for error messages */
			
 
				+	strncpy(cmd, str, sizeof(cmd) - 1);
			
 
				+	cmd[sizeof(cmd) - 1] = 0;
			
 
				+
			
 
				+	sep = str;
			
 
				+	for (;;) {
			
 
				+		/* scan for token start */
			
 
				+		while (cfs_iswhite(*sep))
			
 
				+			sep++;
			
 
				+		if (*sep == 0) {
			
 
				+			if (ntokens < (got_hops ? 3 : 2))
			
 
				+				goto token_error;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		ntokens++;
			
 
				+		token = sep++;
			
 
				+
			
 
				+		/* scan for token end */
			
 
				+		while (*sep != 0 && !cfs_iswhite(*sep))
			
 
				+			sep++;
			
 
				+		if (*sep != 0)
			
 
				+			*sep++ = 0;
			
 
				+
			
 
				+		if (ntokens == 1) {
			
 
				+			tmp2 = &nets;		/* expanding nets */
			
 
				+		} else if (ntokens == 2 &&
			
 
				+			   lnet_parse_hops(token, &hops)) {
			
 
				+			got_hops = 1;	   /* got a hop count */
			
 
				+			continue;
			
 
				+		} else {
			
 
				+			tmp2 = &gateways;	/* expanding gateways */
			
 
				+		}
			
 
				+
			
 
				+		ltb = lnet_new_text_buf(strlen(token));
			
 
				+		if (ltb == NULL)
			
 
				+			goto out;
			
 
				+
			
 
				+		strcpy(ltb->ltb_text, token);
			
 
				+		tmp1 = &ltb->ltb_list;
			
 
				+		list_add_tail(tmp1, tmp2);
			
 
				+
			
 
				+		while (tmp1 != tmp2) {
			
 
				+			ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+			rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
			
 
				+			if (rc < 0)
			
 
				+				goto token_error;
			
 
				+
			
 
				+			tmp1 = tmp1->next;
			
 
				+
			
 
				+			if (rc > 0) {		/* expanded! */
			
 
				+				list_del(&ltb->ltb_list);
			
 
				+				lnet_free_text_buf(ltb);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if (ntokens == 1) {
			
 
				+				net = libcfs_str2net(ltb->ltb_text);
			
 
				+				if (net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				+				    LNET_NETTYP(net) == LOLND)
			
 
				+					goto token_error;
			
 
				+			} else {
			
 
				+				nid = libcfs_str2nid(ltb->ltb_text);
			
 
				+				if (nid == LNET_NID_ANY ||
			
 
				+				    LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
			
 
				+					goto token_error;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!got_hops)
			
 
				+		hops = 1;
			
 
				+
			
 
				+	LASSERT (!list_empty(&nets));
			
 
				+	LASSERT (!list_empty(&gateways));
			
 
				+
			
 
				+	list_for_each (tmp1, &nets) {
			
 
				+		ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
			
 
				+		net = libcfs_str2net(ltb->ltb_text);
			
 
				+		LASSERT (net != LNET_NIDNET(LNET_NID_ANY));
			
 
				+
			
 
				+		list_for_each (tmp2, &gateways) {
			
 
				+			ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list);
			
 
				+			nid = libcfs_str2nid(ltb->ltb_text);
			
 
				+			LASSERT (nid != LNET_NID_ANY);
			
 
				+
			
 
				+			if (lnet_islocalnid(nid)) {
			
 
				+				*im_a_router = 1;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			rc = lnet_add_route (net, hops, nid);
			
 
				+			if (rc != 0) {
			
 
				+				CERROR("Can't create route "
			
 
				+				       "to %s via %s\n",
			
 
				+				       libcfs_net2str(net),
			
 
				+				       libcfs_nid2str(nid));
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	myrc = 0;
			
 
				+	goto out;
			
 
				+
			
 
				+ token_error:
			
 
				+	lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
			
 
				+ out:
			
 
				+	lnet_free_text_bufs(&nets);
			
 
				+	lnet_free_text_bufs(&gateways);
			
 
				+	return myrc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
			
 
				+{
			
 
				+	lnet_text_buf_t   *ltb;
			
 
				+
			
 
				+	while (!list_empty(tbs)) {
			
 
				+		ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+		if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
			
 
				+			lnet_free_text_bufs(tbs);
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+
			
 
				+		list_del(&ltb->ltb_list);
			
 
				+		lnet_free_text_buf(ltb);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse_routes (char *routes, int *im_a_router)
			
 
				+{
			
 
				+	struct list_head	tbs;
			
 
				+	int	       rc = 0;
			
 
				+
			
 
				+	*im_a_router = 0;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&tbs);
			
 
				+
			
 
				+	if (lnet_str2tbs_sep(&tbs, routes) < 0) {
			
 
				+		CERROR("Error parsing routes\n");
			
 
				+		rc = -EINVAL;
			
 
				+	} else {
			
 
				+		rc = lnet_parse_route_tbs(&tbs, im_a_router);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (lnet_tbnob == 0);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
			
 
				+{
			
 
				+	LIST_HEAD	(list);
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+
			
 
				+	rc = cfs_ip_addr_parse(token, len, &list);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	for (rc = i = 0; !rc && i < nip; i++)
			
 
				+		rc = cfs_ip_addr_match(ipaddrs[i], &list);
			
 
				+
			
 
				+	cfs_ip_addr_free(&list);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
			
 
				+{
			
 
				+	static char tokens[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+
			
 
				+	int   matched = 0;
			
 
				+	int   ntokens = 0;
			
 
				+	int   len;
			
 
				+	char *net = NULL;
			
 
				+	char *sep;
			
 
				+	char *token;
			
 
				+	int   rc;
			
 
				+
			
 
				+	LASSERT (strlen(net_entry) < sizeof(tokens));
			
 
				+
			
 
				+	/* work on a copy of the string */
			
 
				+	strcpy(tokens, net_entry);
			
 
				+	sep = tokens;
			
 
				+	for (;;) {
			
 
				+		/* scan for token start */
			
 
				+		while (cfs_iswhite(*sep))
			
 
				+			sep++;
			
 
				+		if (*sep == 0)
			
 
				+			break;
			
 
				+
			
 
				+		token = sep++;
			
 
				+
			
 
				+		/* scan for token end */
			
 
				+		while (*sep != 0 && !cfs_iswhite(*sep))
			
 
				+			sep++;
			
 
				+		if (*sep != 0)
			
 
				+			*sep++ = 0;
			
 
				+
			
 
				+		if (ntokens++ == 0) {
			
 
				+			net = token;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		len = strlen(token);
			
 
				+
			
 
				+		rc = lnet_match_network_token(token, len, ipaddrs, nip);
			
 
				+		if (rc < 0) {
			
 
				+			lnet_syntax("ip2nets", net_entry,
			
 
				+				    (int)(token - tokens), len);
			
 
				+			return rc;
			
 
				+		}
			
 
				+
			
 
				+		matched |= (rc != 0);
			
 
				+	}
			
 
				+
			
 
				+	if (!matched)
			
 
				+		return 0;
			
 
				+
			
 
				+	strcpy(net_entry, net);		 /* replace with matched net */
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+__u32
			
 
				+lnet_netspec2net(char *netspec)
			
 
				+{
			
 
				+	char   *bracket = strchr(netspec, '(');
			
 
				+	__u32   net;
			
 
				+
			
 
				+	if (bracket != NULL)
			
 
				+		*bracket = 0;
			
 
				+
			
 
				+	net = libcfs_str2net(netspec);
			
 
				+
			
 
				+	if (bracket != NULL)
			
 
				+		*bracket = '(';
			
 
				+
			
 
				+	return net;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_splitnets(char *source, struct list_head *nets)
			
 
				+{
			
 
				+	int	       offset = 0;
			
 
				+	int	       offset2;
			
 
				+	int	       len;
			
 
				+	lnet_text_buf_t  *tb;
			
 
				+	lnet_text_buf_t  *tb2;
			
 
				+	struct list_head       *t;
			
 
				+	char	     *sep;
			
 
				+	char	     *bracket;
			
 
				+	__u32	     net;
			
 
				+
			
 
				+	LASSERT (!list_empty(nets));
			
 
				+	LASSERT (nets->next == nets->prev);     /* single entry */
			
 
				+
			
 
				+	tb = list_entry(nets->next, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		sep = strchr(tb->ltb_text, ',');
			
 
				+		bracket = strchr(tb->ltb_text, '(');
			
 
				+
			
 
				+		if (sep != NULL &&
			
 
				+		    bracket != NULL &&
			
 
				+		    bracket < sep) {
			
 
				+			/* netspec lists interfaces... */
			
 
				+
			
 
				+			offset2 = offset + (int)(bracket - tb->ltb_text);
			
 
				+			len = strlen(bracket);
			
 
				+
			
 
				+			bracket = strchr(bracket + 1, ')');
			
 
				+
			
 
				+			if (bracket == NULL ||
			
 
				+			    !(bracket[1] == ',' || bracket[1] == 0)) {
			
 
				+				lnet_syntax("ip2nets", source, offset2, len);
			
 
				+				return -EINVAL;
			
 
				+			}
			
 
				+
			
 
				+			sep = (bracket[1] == 0) ? NULL : bracket + 1;
			
 
				+		}
			
 
				+
			
 
				+		if (sep != NULL)
			
 
				+			*sep++ = 0;
			
 
				+
			
 
				+		net = lnet_netspec2net(tb->ltb_text);
			
 
				+		if (net == LNET_NIDNET(LNET_NID_ANY)) {
			
 
				+			lnet_syntax("ip2nets", source, offset,
			
 
				+				    strlen(tb->ltb_text));
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each(t, nets) {
			
 
				+			tb2 = list_entry(t, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+			if (tb2 == tb)
			
 
				+				continue;
			
 
				+
			
 
				+			if (net == lnet_netspec2net(tb2->ltb_text)) {
			
 
				+				/* duplicate network */
			
 
				+				lnet_syntax("ip2nets", source, offset,
			
 
				+					    strlen(tb->ltb_text));
			
 
				+				return -EINVAL;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (sep == NULL)
			
 
				+			return 0;
			
 
				+
			
 
				+		offset += (int)(sep - tb->ltb_text);
			
 
				+		tb2 = lnet_new_text_buf(strlen(sep));
			
 
				+		if (tb2 == NULL)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		strcpy(tb2->ltb_text, sep);
			
 
				+		list_add_tail(&tb2->ltb_list, nets);
			
 
				+
			
 
				+		tb = tb2;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
			
 
				+{
			
 
				+	static char	networks[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+	static char	source[LNET_SINGLE_TEXTBUF_NOB];
			
 
				+
			
 
				+	struct list_head	  raw_entries;
			
 
				+	struct list_head	  matched_nets;
			
 
				+	struct list_head	  current_nets;
			
 
				+	struct list_head	 *t;
			
 
				+	struct list_head	 *t2;
			
 
				+	lnet_text_buf_t    *tb;
			
 
				+	lnet_text_buf_t    *tb2;
			
 
				+	__u32	       net1;
			
 
				+	__u32	       net2;
			
 
				+	int		 len;
			
 
				+	int		 count;
			
 
				+	int		 dup;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&raw_entries);
			
 
				+	if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
			
 
				+		CERROR("Error parsing ip2nets\n");
			
 
				+		LASSERT (lnet_tbnob == 0);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	INIT_LIST_HEAD(&matched_nets);
			
 
				+	INIT_LIST_HEAD(&current_nets);
			
 
				+	networks[0] = 0;
			
 
				+	count = 0;
			
 
				+	len = 0;
			
 
				+	rc = 0;
			
 
				+
			
 
				+	while (!list_empty(&raw_entries)) {
			
 
				+		tb = list_entry(raw_entries.next, lnet_text_buf_t,
			
 
				+				    ltb_list);
			
 
				+
			
 
				+		strncpy(source, tb->ltb_text, sizeof(source)-1);
			
 
				+		source[sizeof(source)-1] = 0;
			
 
				+
			
 
				+		/* replace ltb_text with the network(s) add on match */
			
 
				+		rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
			
 
				+		if (rc < 0)
			
 
				+			break;
			
 
				+
			
 
				+		list_del(&tb->ltb_list);
			
 
				+
			
 
				+		if (rc == 0) {		  /* no match */
			
 
				+			lnet_free_text_buf(tb);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* split into separate networks */
			
 
				+		INIT_LIST_HEAD(&current_nets);
			
 
				+		list_add(&tb->ltb_list, &current_nets);
			
 
				+		rc = lnet_splitnets(source, &current_nets);
			
 
				+		if (rc < 0)
			
 
				+			break;
			
 
				+
			
 
				+		dup = 0;
			
 
				+		list_for_each (t, &current_nets) {
			
 
				+			tb = list_entry(t, lnet_text_buf_t, ltb_list);
			
 
				+			net1 = lnet_netspec2net(tb->ltb_text);
			
 
				+			LASSERT (net1 != LNET_NIDNET(LNET_NID_ANY));
			
 
				+
			
 
				+			list_for_each(t2, &matched_nets) {
			
 
				+				tb2 = list_entry(t2, lnet_text_buf_t,
			
 
				+						     ltb_list);
			
 
				+				net2 = lnet_netspec2net(tb2->ltb_text);
			
 
				+				LASSERT (net2 != LNET_NIDNET(LNET_NID_ANY));
			
 
				+
			
 
				+				if (net1 == net2) {
			
 
				+					dup = 1;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (dup)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		if (dup) {
			
 
				+			lnet_free_text_bufs(&current_nets);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_safe(t, t2, &current_nets) {
			
 
				+			tb = list_entry(t, lnet_text_buf_t, ltb_list);
			
 
				+
			
 
				+			list_del(&tb->ltb_list);
			
 
				+			list_add_tail(&tb->ltb_list, &matched_nets);
			
 
				+
			
 
				+			len += snprintf(networks + len, sizeof(networks) - len,
			
 
				+					"%s%s", (len == 0) ? "" : ",",
			
 
				+					tb->ltb_text);
			
 
				+
			
 
				+			if (len >= sizeof(networks)) {
			
 
				+				CERROR("Too many matched networks\n");
			
 
				+				rc = -E2BIG;
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+ out:
			
 
				+	lnet_free_text_bufs(&raw_entries);
			
 
				+	lnet_free_text_bufs(&matched_nets);
			
 
				+	lnet_free_text_bufs(&current_nets);
			
 
				+	LASSERT (lnet_tbnob == 0);
			
 
				+
			
 
				+	if (rc < 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	*networksp = networks;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip)
			
 
				+{
			
 
				+	LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ipaddr_enumerate (__u32 **ipaddrsp)
			
 
				+{
			
 
				+	int	up;
			
 
				+	__u32      netmask;
			
 
				+	__u32     *ipaddrs;
			
 
				+	__u32     *ipaddrs2;
			
 
				+	int	nip;
			
 
				+	char     **ifnames;
			
 
				+	int	nif = libcfs_ipif_enumerate(&ifnames);
			
 
				+	int	i;
			
 
				+	int	rc;
			
 
				+
			
 
				+	if (nif <= 0)
			
 
				+		return nif;
			
 
				+
			
 
				+	LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs));
			
 
				+	if (ipaddrs == NULL) {
			
 
				+		CERROR("Can't allocate ipaddrs[%d]\n", nif);
			
 
				+		libcfs_ipif_free_enumeration(ifnames, nif);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = nip = 0; i < nif; i++) {
			
 
				+		if (!strcmp(ifnames[i], "lo"))
			
 
				+			continue;
			
 
				+
			
 
				+		rc = libcfs_ipif_query(ifnames[i], &up,
			
 
				+				       &ipaddrs[nip], &netmask);
			
 
				+		if (rc != 0) {
			
 
				+			CWARN("Can't query interface %s: %d\n",
			
 
				+			      ifnames[i], rc);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!up) {
			
 
				+			CWARN("Ignoring interface %s: it's down\n",
			
 
				+			      ifnames[i]);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		nip++;
			
 
				+	}
			
 
				+
			
 
				+	libcfs_ipif_free_enumeration(ifnames, nif);
			
 
				+
			
 
				+	if (nip == nif) {
			
 
				+		*ipaddrsp = ipaddrs;
			
 
				+	} else {
			
 
				+		if (nip > 0) {
			
 
				+			LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2));
			
 
				+			if (ipaddrs2 == NULL) {
			
 
				+				CERROR("Can't allocate ipaddrs[%d]\n", nip);
			
 
				+				nip = -ENOMEM;
			
 
				+			} else {
			
 
				+				memcpy(ipaddrs2, ipaddrs,
			
 
				+				       nip * sizeof(*ipaddrs));
			
 
				+				*ipaddrsp = ipaddrs2;
			
 
				+				rc = nip;
			
 
				+			}
			
 
				+		}
			
 
				+		lnet_ipaddr_free_enumeration(ipaddrs, nif);
			
 
				+	}
			
 
				+	return nip;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse_ip2nets (char **networksp, char *ip2nets)
			
 
				+{
			
 
				+	__u32     *ipaddrs;
			
 
				+	int	nip = lnet_ipaddr_enumerate(&ipaddrs);
			
 
				+	int	rc;
			
 
				+
			
 
				+	if (nip < 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x117, "Error %d enumerating local IP "
			
 
				+				   "interfaces for ip2nets to match\n", nip);
			
 
				+		return nip;
			
 
				+	}
			
 
				+
			
 
				+	if (nip == 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces "
			
 
				+				   "for ip2nets to match\n");
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
			
 
				+	lnet_ipaddr_free_enumeration(ipaddrs, nip);
			
 
				+
			
 
				+	if (rc < 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (rc == 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match "
			
 
				+				   "any local IP interfaces\n");
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_set_ip_niaddr (lnet_ni_t *ni)
			
 
				+{
			
 
				+	__u32  net = LNET_NIDNET(ni->ni_nid);
			
 
				+	char **names;
			
 
				+	int    n;
			
 
				+	__u32  ip;
			
 
				+	__u32  netmask;
			
 
				+	int    up;
			
 
				+	int    i;
			
 
				+	int    rc;
			
 
				+
			
 
				+	/* Convenience for LNDs that use the IP address of a local interface as
			
 
				+	 * the local address part of their NID */
			
 
				+
			
 
				+	if (ni->ni_interfaces[0] != NULL) {
			
 
				+
			
 
				+		CLASSERT (LNET_MAX_INTERFACES > 1);
			
 
				+
			
 
				+		if (ni->ni_interfaces[1] != NULL) {
			
 
				+			CERROR("Net %s doesn't support multiple interfaces\n",
			
 
				+			       libcfs_net2str(net));
			
 
				+			return -EPERM;
			
 
				+		}
			
 
				+
			
 
				+		rc = libcfs_ipif_query(ni->ni_interfaces[0],
			
 
				+				       &up, &ip, &netmask);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Net %s can't query interface %s: %d\n",
			
 
				+			       libcfs_net2str(net), ni->ni_interfaces[0], rc);
			
 
				+			return -EPERM;
			
 
				+		}
			
 
				+
			
 
				+		if (!up) {
			
 
				+			CERROR("Net %s can't use interface %s: it's down\n",
			
 
				+			       libcfs_net2str(net), ni->ni_interfaces[0]);
			
 
				+			return -ENETDOWN;
			
 
				+		}
			
 
				+
			
 
				+		ni->ni_nid = LNET_MKNID(net, ip);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	n = libcfs_ipif_enumerate(&names);
			
 
				+	if (n <= 0) {
			
 
				+		CERROR("Net %s can't enumerate interfaces: %d\n",
			
 
				+		       libcfs_net2str(net), n);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		if (!strcmp(names[i], "lo")) /* skip the loopback IF */
			
 
				+			continue;
			
 
				+
			
 
				+		rc = libcfs_ipif_query(names[i], &up, &ip, &netmask);
			
 
				+
			
 
				+		if (rc != 0) {
			
 
				+			CWARN("Net %s can't query interface %s: %d\n",
			
 
				+			      libcfs_net2str(net), names[i], rc);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!up) {
			
 
				+			CWARN("Net %s ignoring interface %s (down)\n",
			
 
				+			      libcfs_net2str(net), names[i]);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		libcfs_ipif_free_enumeration(names, n);
			
 
				+		ni->ni_nid = LNET_MKNID(net, ip);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net));
			
 
				+	libcfs_ipif_free_enumeration(names, n);
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_set_ip_niaddr);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -0,0 +1,447 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-eq.c
			
 
				+ *
			
 
				+ * Library level Event queue management routines
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+/**
			
 
				+ * Create an event queue that has room for \a count number of events.
			
 
				+ *
			
 
				+ * The event queue is circular and older events will be overwritten by new
			
 
				+ * ones if they are not removed in time by the user using the functions
			
 
				+ * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
			
 
				+ * determine the appropriate size of the event queue to prevent this loss
			
 
				+ * of events. Note that when EQ handler is specified in \a callback, no
			
 
				+ * event loss can happen, since the handler is run for each event deposited
			
 
				+ * into the EQ.
			
 
				+ *
			
 
				+ * \param count The number of events to be stored in the event queue. It
			
 
				+ * will be rounded up to the next power of two.
			
 
				+ * \param callback A handler function that runs when an event is deposited
			
 
				+ * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
			
 
				+ * indicate that no event handler is desired.
			
 
				+ * \param handle On successful return, this location will hold a handle for
			
 
				+ * the newly created EQ.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If an parameter is not valid.
			
 
				+ * \retval -ENOMEM If memory for the EQ can't be allocated.
			
 
				+ *
			
 
				+ * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
			
 
				+ */
			
 
				+int
			
 
				+LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
			
 
				+	    lnet_handle_eq_t *handle)
			
 
				+{
			
 
				+	lnet_eq_t     *eq;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	/* We need count to be a power of 2 so that when eq_{enq,deq}_seq
			
 
				+	 * overflow, they don't skip entries, so the queue has the same
			
 
				+	 * apparent capacity at all times */
			
 
				+
			
 
				+	count = cfs_power2_roundup(count);
			
 
				+
			
 
				+	if (callback != LNET_EQ_HANDLER_NONE && count != 0) {
			
 
				+		CWARN("EQ callback is guaranteed to get every event, "
			
 
				+		      "do you still want to set eqcount %d for polling "
			
 
				+		      "event which will have locking overhead? "
			
 
				+		      "Please contact with developer to confirm\n", count);
			
 
				+	}
			
 
				+
			
 
				+	/* count can be 0 if only need callback, we can eliminate
			
 
				+	 * overhead of enqueue event */
			
 
				+	if (count == 0 && callback == LNET_EQ_HANDLER_NONE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	eq = lnet_eq_alloc();
			
 
				+	if (eq == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (count != 0) {
			
 
				+		LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t));
			
 
				+		if (eq->eq_events == NULL)
			
 
				+			goto failed;
			
 
				+		/* NB allocator has set all event sequence numbers to 0,
			
 
				+		 * so all them should be earlier than eq_deq_seq */
			
 
				+	}
			
 
				+
			
 
				+	eq->eq_deq_seq = 1;
			
 
				+	eq->eq_enq_seq = 1;
			
 
				+	eq->eq_size = count;
			
 
				+	eq->eq_callback = callback;
			
 
				+
			
 
				+	eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+				       sizeof(*eq->eq_refs[0]));
			
 
				+	if (eq->eq_refs == NULL)
			
 
				+		goto failed;
			
 
				+
			
 
				+	/* MUST hold both exclusive lnet_res_lock */
			
 
				+	lnet_res_lock(LNET_LOCK_EX);
			
 
				+	/* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
			
 
				+	 * both EQ lookup and poll event with only lnet_eq_wait_lock */
			
 
				+	lnet_eq_wait_lock();
			
 
				+
			
 
				+	lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
			
 
				+	list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
			
 
				+
			
 
				+	lnet_eq_wait_unlock();
			
 
				+	lnet_res_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	lnet_eq2handle(handle, eq);
			
 
				+	return 0;
			
 
				+
			
 
				+failed:
			
 
				+	if (eq->eq_events != NULL)
			
 
				+		LIBCFS_FREE(eq->eq_events, count * sizeof(lnet_event_t));
			
 
				+
			
 
				+	if (eq->eq_refs != NULL)
			
 
				+		cfs_percpt_free(eq->eq_refs);
			
 
				+
			
 
				+	lnet_eq_free(eq);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetEQAlloc);
			
 
				+
			
 
				+/**
			
 
				+ * Release the resources associated with an event queue if it's idle;
			
 
				+ * otherwise do nothing and it's up to the user to try again.
			
 
				+ *
			
 
				+ * \param eqh A handle for the event queue to be released.
			
 
				+ *
			
 
				+ * \retval 0 If the EQ is not in use and freed.
			
 
				+ * \retval -ENOENT If \a eqh does not point to a valid EQ.
			
 
				+ * \retval -EBUSY  If the EQ is still in use by some MDs.
			
 
				+ */
			
 
				+int
			
 
				+LNetEQFree(lnet_handle_eq_t eqh)
			
 
				+{
			
 
				+	struct lnet_eq	*eq;
			
 
				+	lnet_event_t	*events = NULL;
			
 
				+	int		**refs = NULL;
			
 
				+	int		*ref;
			
 
				+	int		rc = 0;
			
 
				+	int		size = 0;
			
 
				+	int		i;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	lnet_res_lock(LNET_LOCK_EX);
			
 
				+	/* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
			
 
				+	 * both EQ lookup and poll event with only lnet_eq_wait_lock */
			
 
				+	lnet_eq_wait_lock();
			
 
				+
			
 
				+	eq = lnet_handle2eq(&eqh);
			
 
				+	if (eq == NULL) {
			
 
				+		rc = -ENOENT;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(ref, i, eq->eq_refs) {
			
 
				+		LASSERT(*ref >= 0);
			
 
				+		if (*ref == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
			
 
				+		       i, *ref);
			
 
				+		rc = -EBUSY;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* stash for free after lock dropped */
			
 
				+	events	= eq->eq_events;
			
 
				+	size	= eq->eq_size;
			
 
				+	refs	= eq->eq_refs;
			
 
				+
			
 
				+	lnet_res_lh_invalidate(&eq->eq_lh);
			
 
				+	list_del(&eq->eq_list);
			
 
				+	lnet_eq_free_locked(eq);
			
 
				+ out:
			
 
				+	lnet_eq_wait_unlock();
			
 
				+	lnet_res_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	if (events != NULL)
			
 
				+		LIBCFS_FREE(events, size * sizeof(lnet_event_t));
			
 
				+	if (refs != NULL)
			
 
				+		cfs_percpt_free(refs);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetEQFree);
			
 
				+
			
 
				+void
			
 
				+lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev)
			
 
				+{
			
 
				+	/* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
			
 
				+	int index;
			
 
				+
			
 
				+	if (eq->eq_size == 0) {
			
 
				+		LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
			
 
				+		eq->eq_callback(ev);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	lnet_eq_wait_lock();
			
 
				+	ev->sequence = eq->eq_enq_seq++;
			
 
				+
			
 
				+	LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size));
			
 
				+	index = ev->sequence & (eq->eq_size - 1);
			
 
				+
			
 
				+	eq->eq_events[index] = *ev;
			
 
				+
			
 
				+	if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
			
 
				+		eq->eq_callback(ev);
			
 
				+
			
 
				+	/* Wake anyone waiting in LNetEQPoll() */
			
 
				+	if (waitqueue_active(&the_lnet.ln_eq_waitq))
			
 
				+		wake_up_all(&the_lnet.ln_eq_waitq);
			
 
				+	lnet_eq_wait_unlock();
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_eq_dequeue_event(lnet_eq_t *eq, lnet_event_t *ev)
			
 
				+{
			
 
				+	int		new_index = eq->eq_deq_seq & (eq->eq_size - 1);
			
 
				+	lnet_event_t	*new_event = &eq->eq_events[new_index];
			
 
				+	int		rc;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	/* must called with lnet_eq_wait_lock hold */
			
 
				+	if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
			
 
				+		RETURN(0);
			
 
				+
			
 
				+	/* We've got a new event... */
			
 
				+	*ev = *new_event;
			
 
				+
			
 
				+	CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
			
 
				+	       new_event, eq->eq_deq_seq, eq->eq_size);
			
 
				+
			
 
				+	/* ...but did it overwrite an event we've not seen yet? */
			
 
				+	if (eq->eq_deq_seq == new_event->sequence) {
			
 
				+		rc = 1;
			
 
				+	} else {
			
 
				+		/* don't complain with CERROR: some EQs are sized small
			
 
				+		 * anyway; if it's important, the caller should complain */
			
 
				+		CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
			
 
				+		       eq->eq_deq_seq, new_event->sequence);
			
 
				+		rc = -EOVERFLOW;
			
 
				+	}
			
 
				+
			
 
				+	eq->eq_deq_seq = new_event->sequence + 1;
			
 
				+	RETURN(rc);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * A nonblocking function that can be used to get the next event in an EQ.
			
 
				+ * If an event handler is associated with the EQ, the handler will run before
			
 
				+ * this function returns successfully. The event is removed from the queue.
			
 
				+ *
			
 
				+ * \param eventq A handle for the event queue.
			
 
				+ * \param event On successful return (1 or -EOVERFLOW), this location will
			
 
				+ * hold the next event in the EQ.
			
 
				+ *
			
 
				+ * \retval 0	  No pending event in the EQ.
			
 
				+ * \retval 1	  Indicates success.
			
 
				+ * \retval -ENOENT    If \a eventq does not point to a valid EQ.
			
 
				+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				+ * at least one event between this event and the last event obtained from the
			
 
				+ * EQ has been dropped due to limited space in the EQ.
			
 
				+ */
			
 
				+int
			
 
				+LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event)
			
 
				+{
			
 
				+	int which;
			
 
				+
			
 
				+	return LNetEQPoll(&eventq, 1, 0,
			
 
				+			 event, &which);
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetEQGet);
			
 
				+
			
 
				+/**
			
 
				+ * Block the calling process until there is an event in the EQ.
			
 
				+ * If an event handler is associated with the EQ, the handler will run before
			
 
				+ * this function returns successfully. This function returns the next event
			
 
				+ * in the EQ and removes it from the EQ.
			
 
				+ *
			
 
				+ * \param eventq A handle for the event queue.
			
 
				+ * \param event On successful return (1 or -EOVERFLOW), this location will
			
 
				+ * hold the next event in the EQ.
			
 
				+ *
			
 
				+ * \retval 1	  Indicates success.
			
 
				+ * \retval -ENOENT    If \a eventq does not point to a valid EQ.
			
 
				+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				+ * at least one event between this event and the last event obtained from the
			
 
				+ * EQ has been dropped due to limited space in the EQ.
			
 
				+ */
			
 
				+int
			
 
				+LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event)
			
 
				+{
			
 
				+	int which;
			
 
				+
			
 
				+	return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER,
			
 
				+			 event, &which);
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetEQWait);
			
 
				+
			
 
				+
			
 
				+static int
			
 
				+lnet_eq_wait_locked(int *timeout_ms)
			
 
				+{
			
 
				+	int		tms = *timeout_ms;
			
 
				+	int		wait;
			
 
				+	wait_queue_t  wl;
			
 
				+	cfs_time_t      now;
			
 
				+
			
 
				+	if (tms == 0)
			
 
				+		return -1; /* don't want to wait and no new event */
			
 
				+
			
 
				+	init_waitqueue_entry_current(&wl);
			
 
				+	set_current_state(TASK_INTERRUPTIBLE);
			
 
				+	add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
			
 
				+
			
 
				+	lnet_eq_wait_unlock();
			
 
				+
			
 
				+	if (tms < 0) {
			
 
				+		waitq_wait(&wl, TASK_INTERRUPTIBLE);
			
 
				+
			
 
				+	} else {
			
 
				+		struct timeval tv;
			
 
				+
			
 
				+		now = cfs_time_current();
			
 
				+		waitq_timedwait(&wl, TASK_INTERRUPTIBLE,
			
 
				+				    cfs_time_seconds(tms) / 1000);
			
 
				+		cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv);
			
 
				+		tms -= (int)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
			
 
				+		if (tms < 0) /* no more wait but may have new event */
			
 
				+			tms = 0;
			
 
				+	}
			
 
				+
			
 
				+	wait = tms != 0; /* might need to call here again */
			
 
				+	*timeout_ms = tms;
			
 
				+
			
 
				+	lnet_eq_wait_lock();
			
 
				+	remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
			
 
				+
			
 
				+	return wait;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * Block the calling process until there's an event from a set of EQs or
			
 
				+ * timeout happens.
			
 
				+ *
			
 
				+ * If an event handler is associated with the EQ, the handler will run before
			
 
				+ * this function returns successfully, in which case the corresponding event
			
 
				+ * is consumed.
			
 
				+ *
			
 
				+ * LNetEQPoll() provides a timeout to allow applications to poll, block for a
			
 
				+ * fixed period, or block indefinitely.
			
 
				+ *
			
 
				+ * \param eventqs,neq An array of EQ handles, and size of the array.
			
 
				+ * \param timeout_ms Time in milliseconds to wait for an event to occur on
			
 
				+ * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
			
 
				+ * infinite timeout.
			
 
				+ * \param event,which On successful return (1 or -EOVERFLOW), \a event will
			
 
				+ * hold the next event in the EQs, and \a which will contain the index of the
			
 
				+ * EQ from which the event was taken.
			
 
				+ *
			
 
				+ * \retval 0	  No pending event in the EQs after timeout.
			
 
				+ * \retval 1	  Indicates success.
			
 
				+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
			
 
				+ * at least one event between this event and the last event obtained from the
			
 
				+ * EQ indicated by \a which has been dropped due to limited space in the EQ.
			
 
				+ * \retval -ENOENT    If there's an invalid handle in \a eventqs.
			
 
				+ */
			
 
				+int
			
 
				+LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
			
 
				+	   lnet_event_t *event, int *which)
			
 
				+{
			
 
				+	int	wait = 1;
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (neq < 1)
			
 
				+		RETURN(-ENOENT);
			
 
				+
			
 
				+	lnet_eq_wait_lock();
			
 
				+
			
 
				+	for (;;) {
			
 
				+		for (i = 0; i < neq; i++) {
			
 
				+			lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]);
			
 
				+
			
 
				+			if (eq == NULL) {
			
 
				+				lnet_eq_wait_unlock();
			
 
				+				RETURN(-ENOENT);
			
 
				+			}
			
 
				+
			
 
				+			rc = lnet_eq_dequeue_event(eq, event);
			
 
				+			if (rc != 0) {
			
 
				+				lnet_eq_wait_unlock();
			
 
				+				*which = i;
			
 
				+				RETURN(rc);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (wait == 0)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * return value of lnet_eq_wait_locked:
			
 
				+		 * -1 : did nothing and it's sure no new event
			
 
				+		 *  1 : sleep inside and wait until new event
			
 
				+		 *  0 : don't want to wait anymore, but might have new event
			
 
				+		 *      so need to call dequeue again
			
 
				+		 */
			
 
				+		wait = lnet_eq_wait_locked(&timeout_ms);
			
 
				+		if (wait < 0) /* no new event */
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	lnet_eq_wait_unlock();
			
 
				+	RETURN(0);
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -0,0 +1,451 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-md.c
			
 
				+ *
			
 
				+ * Memory Descriptor management routines
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+/* must be called with lnet_res_lock held */
			
 
				+void
			
 
				+lnet_md_unlink(lnet_libmd_t *md)
			
 
				+{
			
 
				+	if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) {
			
 
				+		/* first unlink attempt... */
			
 
				+		lnet_me_t *me = md->md_me;
			
 
				+
			
 
				+		md->md_flags |= LNET_MD_FLAG_ZOMBIE;
			
 
				+
			
 
				+		/* Disassociate from ME (if any), and unlink it if it was created
			
 
				+		 * with LNET_UNLINK */
			
 
				+		if (me != NULL) {
			
 
				+			/* detach MD from portal */
			
 
				+			lnet_ptl_detach_md(me, md);
			
 
				+			if (me->me_unlink == LNET_UNLINK)
			
 
				+				lnet_me_unlink(me);
			
 
				+		}
			
 
				+
			
 
				+		/* ensure all future handle lookups fail */
			
 
				+		lnet_res_lh_invalidate(&md->md_lh);
			
 
				+	}
			
 
				+
			
 
				+	if (md->md_refcount != 0) {
			
 
				+		CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "Unlinking md %p\n", md);
			
 
				+
			
 
				+	if (md->md_eq != NULL) {
			
 
				+		int	cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
			
 
				+
			
 
				+		LASSERT(*md->md_eq->eq_refs[cpt] > 0);
			
 
				+		(*md->md_eq->eq_refs[cpt])--;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(!list_empty(&md->md_list));
			
 
				+	list_del_init(&md->md_list);
			
 
				+	lnet_md_free_locked(md);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
			
 
				+{
			
 
				+	int	  i;
			
 
				+	unsigned int niov;
			
 
				+	int	  total_length = 0;
			
 
				+
			
 
				+	lmd->md_me = NULL;
			
 
				+	lmd->md_start = umd->start;
			
 
				+	lmd->md_offset = 0;
			
 
				+	lmd->md_max_size = umd->max_size;
			
 
				+	lmd->md_options = umd->options;
			
 
				+	lmd->md_user_ptr = umd->user_ptr;
			
 
				+	lmd->md_eq = NULL;
			
 
				+	lmd->md_threshold = umd->threshold;
			
 
				+	lmd->md_refcount = 0;
			
 
				+	lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
			
 
				+
			
 
				+	if ((umd->options & LNET_MD_IOVEC) != 0) {
			
 
				+
			
 
				+		if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		lmd->md_niov = niov = umd->length;
			
 
				+		memcpy(lmd->md_iov.iov, umd->start,
			
 
				+		       niov * sizeof (lmd->md_iov.iov[0]));
			
 
				+
			
 
				+		for (i = 0; i < (int)niov; i++) {
			
 
				+			/* We take the base address on trust */
			
 
				+			if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
			
 
				+				return -EINVAL;
			
 
				+
			
 
				+			total_length += lmd->md_iov.iov[i].iov_len;
			
 
				+		}
			
 
				+
			
 
				+		lmd->md_length = total_length;
			
 
				+
			
 
				+		if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
			
 
				+		    (umd->max_size < 0 ||
			
 
				+		     umd->max_size > total_length)) // illegal max_size
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+	} else if ((umd->options & LNET_MD_KIOV) != 0) {
			
 
				+		lmd->md_niov = niov = umd->length;
			
 
				+		memcpy(lmd->md_iov.kiov, umd->start,
			
 
				+		       niov * sizeof (lmd->md_iov.kiov[0]));
			
 
				+
			
 
				+		for (i = 0; i < (int)niov; i++) {
			
 
				+			/* We take the page pointer on trust */
			
 
				+			if (lmd->md_iov.kiov[i].kiov_offset +
			
 
				+			    lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE )
			
 
				+				return -EINVAL; /* invalid length */
			
 
				+
			
 
				+			total_length += lmd->md_iov.kiov[i].kiov_len;
			
 
				+		}
			
 
				+
			
 
				+		lmd->md_length = total_length;
			
 
				+
			
 
				+		if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
			
 
				+		    (umd->max_size < 0 ||
			
 
				+		     umd->max_size > total_length)) // illegal max_size
			
 
				+			return -EINVAL;
			
 
				+	} else {   /* contiguous */
			
 
				+		lmd->md_length = umd->length;
			
 
				+		lmd->md_niov = niov = 1;
			
 
				+		lmd->md_iov.iov[0].iov_base = umd->start;
			
 
				+		lmd->md_iov.iov[0].iov_len = umd->length;
			
 
				+
			
 
				+		if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
			
 
				+		    (umd->max_size < 0 ||
			
 
				+		     umd->max_size > (int)umd->length)) // illegal max_size
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* must be called with resource lock held */
			
 
				+static int
			
 
				+lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt)
			
 
				+{
			
 
				+	struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
			
 
				+
			
 
				+	/* NB we are passed an allocated, but inactive md.
			
 
				+	 * if we return success, caller may lnet_md_unlink() it.
			
 
				+	 * otherwise caller may only lnet_md_free() it.
			
 
				+	 */
			
 
				+	/* This implementation doesn't know how to create START events or
			
 
				+	 * disable END events.  Best to LASSERT our caller is compliant so
			
 
				+	 * we find out quickly...  */
			
 
				+	/*  TODO - reevaluate what should be here in light of
			
 
				+	 * the removal of the start and end events
			
 
				+	 * maybe there we shouldn't even allow LNET_EQ_NONE!)
			
 
				+	 * LASSERT (eq == NULL);
			
 
				+	 */
			
 
				+	if (!LNetHandleIsInvalid(eq_handle)) {
			
 
				+		md->md_eq = lnet_handle2eq(&eq_handle);
			
 
				+
			
 
				+		if (md->md_eq == NULL)
			
 
				+			return -ENOENT;
			
 
				+
			
 
				+		(*md->md_eq->eq_refs[cpt])++;
			
 
				+	}
			
 
				+
			
 
				+	lnet_res_lh_initialize(container, &md->md_lh);
			
 
				+
			
 
				+	LASSERT(list_empty(&md->md_list));
			
 
				+	list_add(&md->md_list, &container->rec_active);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* must be called with lnet_res_lock held */
			
 
				+void
			
 
				+lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd)
			
 
				+{
			
 
				+	/* NB this doesn't copy out all the iov entries so when a
			
 
				+	 * discontiguous MD is copied out, the target gets to know the
			
 
				+	 * original iov pointer (in start) and the number of entries it had
			
 
				+	 * and that's all.
			
 
				+	 */
			
 
				+	umd->start = lmd->md_start;
			
 
				+	umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ?
			
 
				+		      lmd->md_length : lmd->md_niov;
			
 
				+	umd->threshold = lmd->md_threshold;
			
 
				+	umd->max_size = lmd->md_max_size;
			
 
				+	umd->options = lmd->md_options;
			
 
				+	umd->user_ptr = lmd->md_user_ptr;
			
 
				+	lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_md_validate(lnet_md_t *umd)
			
 
				+{
			
 
				+	if (umd->start == NULL && umd->length != 0) {
			
 
				+		CERROR("MD start pointer can not be NULL with length %u\n",
			
 
				+		       umd->length);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
			
 
				+	    umd->length > LNET_MAX_IOV) {
			
 
				+		CERROR("Invalid option: too many fragments %u, %d max\n",
			
 
				+		       umd->length, LNET_MAX_IOV);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Create a memory descriptor and attach it to a ME
			
 
				+ *
			
 
				+ * \param meh A handle for a ME to associate the new MD with.
			
 
				+ * \param umd Provides initial values for the user-visible parts of a MD.
			
 
				+ * Other than its use for initialization, there is no linkage between this
			
 
				+ * structure and the MD maintained by the LNet.
			
 
				+ * \param unlink A flag to indicate whether the MD is automatically unlinked
			
 
				+ * when it becomes inactive, either because the operation threshold drops to
			
 
				+ * zero or because the available memory becomes less than \a umd.max_size.
			
 
				+ * (Note that the check for unlinking a MD only occurs after the completion
			
 
				+ * of a successful operation on the MD.) The value LNET_UNLINK enables auto
			
 
				+ * unlinking; the value LNET_RETAIN disables it.
			
 
				+ * \param handle On successful returns, a handle to the newly created MD is
			
 
				+ * saved here. This handle can be used later in LNetMDUnlink().
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If \a umd is not valid.
			
 
				+ * \retval -ENOMEM If new MD cannot be allocated.
			
 
				+ * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
			
 
				+ * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
			
 
				+ * calling LNetInvalidateHandle() on it.
			
 
				+ * \retval -EBUSY  If the ME pointed to by \a meh is already associated with
			
 
				+ * a MD.
			
 
				+ */
			
 
				+int
			
 
				+LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
			
 
				+	     lnet_unlink_t unlink, lnet_handle_md_t *handle)
			
 
				+{
			
 
				+	LIST_HEAD		(matches);
			
 
				+	LIST_HEAD		(drops);
			
 
				+	struct lnet_me		*me;
			
 
				+	struct lnet_libmd	*md;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (lnet_md_validate(&umd) != 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) {
			
 
				+		CERROR("Invalid option: no MD_OP set\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	md = lnet_md_alloc(&umd);
			
 
				+	if (md == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	rc = lnet_md_build(md, &umd, unlink);
			
 
				+	cpt = lnet_cpt_of_cookie(meh.cookie);
			
 
				+
			
 
				+	lnet_res_lock(cpt);
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	me = lnet_handle2me(&meh);
			
 
				+	if (me == NULL)
			
 
				+		rc = -ENOENT;
			
 
				+	else if (me->me_md != NULL)
			
 
				+		rc = -EBUSY;
			
 
				+	else
			
 
				+		rc = lnet_md_link(md, umd.eq_handle, cpt);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	/* attach this MD to portal of ME and check if it matches any
			
 
				+	 * blocked msgs on this portal */
			
 
				+	lnet_ptl_attach_md(me, md, &matches, &drops);
			
 
				+
			
 
				+	lnet_md2handle(handle, md);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	lnet_drop_delayed_msg_list(&drops, "Bad match");
			
 
				+	lnet_recv_delayed_msg_list(&matches);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_md_free_locked(md);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMDAttach);
			
 
				+
			
 
				+/**
			
 
				+ * Create a "free floating" memory descriptor - a MD that is not associated
			
 
				+ * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
			
 
				+ *
			
 
				+ * \param umd,unlink See the discussion for LNetMDAttach().
			
 
				+ * \param handle On successful returns, a handle to the newly created MD is
			
 
				+ * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
			
 
				+ * and LNetGet() operations.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If \a umd is not valid.
			
 
				+ * \retval -ENOMEM If new MD cannot be allocated.
			
 
				+ * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
			
 
				+ * it's OK to supply a NULL \a umd.eq_handle by calling
			
 
				+ * LNetInvalidateHandle() on it.
			
 
				+ */
			
 
				+int
			
 
				+LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
			
 
				+{
			
 
				+	lnet_libmd_t	*md;
			
 
				+	int		cpt;
			
 
				+	int		rc;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (lnet_md_validate(&umd) != 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0) {
			
 
				+		CERROR("Invalid option: GET|PUT illegal on active MDs\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	md = lnet_md_alloc(&umd);
			
 
				+	if (md == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	rc = lnet_md_build(md, &umd, unlink);
			
 
				+
			
 
				+	cpt = lnet_res_lock_current();
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	rc = lnet_md_link(md, umd.eq_handle, cpt);
			
 
				+	if (rc != 0)
			
 
				+		goto failed;
			
 
				+
			
 
				+	lnet_md2handle(handle, md);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_md_free_locked(md);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+	return rc;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMDBind);
			
 
				+
			
 
				+/**
			
 
				+ * Unlink the memory descriptor from any ME it may be linked to and release
			
 
				+ * the internal resources associated with it.
			
 
				+ *
			
 
				+ * This function does not free the memory region associated with the MD;
			
 
				+ * i.e., the memory the user allocated for this MD. If the ME associated with
			
 
				+ * this MD is not NULL and was created with auto unlink enabled, the ME is
			
 
				+ * unlinked as well (see LNetMEAttach()).
			
 
				+ *
			
 
				+ * Explicitly unlinking a MD via this function call has the same behavior as
			
 
				+ * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
			
 
				+ * is generated in the latter case.
			
 
				+ *
			
 
				+ * An unlinked event can be reported in two ways:
			
 
				+ * - If there's no pending operations on the MD, it's unlinked immediately
			
 
				+ *   and an LNET_EVENT_UNLINK event is logged before this function returns.
			
 
				+ * - Otherwise, the MD is only marked for deletion when this function
			
 
				+ *   returns, and the unlinked event will be piggybacked on the event of
			
 
				+ *   the completion of the last operation by setting the unlinked field of
			
 
				+ *   the event. No dedicated LNET_EVENT_UNLINK event is generated.
			
 
				+ *
			
 
				+ * Note that in both cases the unlinked field of the event is always set; no
			
 
				+ * more event will happen on the MD after such an event is logged.
			
 
				+ *
			
 
				+ * \param mdh A handle for the MD to be unlinked.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -ENOENT If \a mdh does not point to a valid MD object.
			
 
				+ */
			
 
				+int
			
 
				+LNetMDUnlink (lnet_handle_md_t mdh)
			
 
				+{
			
 
				+	lnet_event_t	ev;
			
 
				+	lnet_libmd_t	*md;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	md = lnet_handle2md(&mdh);
			
 
				+	if (md == NULL) {
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	/* If the MD is busy, lnet_md_unlink just marks it for deletion, and
			
 
				+	 * when the NAL is done, the completion event flags that the MD was
			
 
				+	 * unlinked.  Otherwise, we enqueue an event now... */
			
 
				+
			
 
				+	if (md->md_eq != NULL &&
			
 
				+	    md->md_refcount == 0) {
			
 
				+		lnet_build_unlink_event(md, &ev);
			
 
				+		lnet_eq_enqueue_event(md->md_eq, &ev);
			
 
				+	}
			
 
				+
			
 
				+	lnet_md_unlink(md);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMDUnlink);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -0,0 +1,297 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-me.c
			
 
				+ *
			
 
				+ * Match Entry management routines
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+/**
			
 
				+ * Create and attach a match entry to the match list of \a portal. The new
			
 
				+ * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
			
 
				+ * can be used to attach a MD to an empty ME.
			
 
				+ *
			
 
				+ * \param portal The portal table index where the ME should be attached.
			
 
				+ * \param match_id Specifies the match criteria for the process ID of
			
 
				+ * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
			
 
				+ * used to wildcard either of the identifiers in the lnet_process_id_t
			
 
				+ * structure.
			
 
				+ * \param match_bits,ignore_bits Specify the match criteria to apply
			
 
				+ * to the match bits in the incoming request. The ignore bits are used
			
 
				+ * to mask out insignificant bits in the incoming match bits. The resulting
			
 
				+ * bits are then compared to the ME's match bits to determine if the
			
 
				+ * incoming request meets the match criteria.
			
 
				+ * \param unlink Indicates whether the ME should be unlinked when the memory
			
 
				+ * descriptor associated with it is unlinked (Note that the check for
			
 
				+ * unlinking a ME only occurs when the memory descriptor is unlinked.).
			
 
				+ * Valid values are LNET_RETAIN and LNET_UNLINK.
			
 
				+ * \param pos Indicates whether the new ME should be prepended or
			
 
				+ * appended to the match list. Allowed constants: LNET_INS_BEFORE,
			
 
				+ * LNET_INS_AFTER.
			
 
				+ * \param handle On successful returns, a handle to the newly created ME
			
 
				+ * object is saved here. This handle can be used later in LNetMEInsert(),
			
 
				+ * LNetMEUnlink(), or LNetMDAttach() functions.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If \a portal is invalid.
			
 
				+ * \retval -ENOMEM If new ME object cannot be allocated.
			
 
				+ */
			
 
				+int
			
 
				+LNetMEAttach(unsigned int portal,
			
 
				+	     lnet_process_id_t match_id,
			
 
				+	     __u64 match_bits, __u64 ignore_bits,
			
 
				+	     lnet_unlink_t unlink, lnet_ins_pos_t pos,
			
 
				+	     lnet_handle_me_t *handle)
			
 
				+{
			
 
				+	struct lnet_match_table *mtable;
			
 
				+	struct lnet_me		*me;
			
 
				+	struct list_head		*head;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if ((int)portal >= the_lnet.ln_nportals)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mtable = lnet_mt_of_attach(portal, match_id,
			
 
				+				   match_bits, ignore_bits, pos);
			
 
				+	if (mtable == NULL) /* can't match portal type */
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	me = lnet_me_alloc();
			
 
				+	if (me == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	lnet_res_lock(mtable->mt_cpt);
			
 
				+
			
 
				+	me->me_portal = portal;
			
 
				+	me->me_match_id = match_id;
			
 
				+	me->me_match_bits = match_bits;
			
 
				+	me->me_ignore_bits = ignore_bits;
			
 
				+	me->me_unlink = unlink;
			
 
				+	me->me_md = NULL;
			
 
				+
			
 
				+	lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
			
 
				+			       &me->me_lh);
			
 
				+	if (ignore_bits != 0)
			
 
				+		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
			
 
				+	else
			
 
				+		head = lnet_mt_match_head(mtable, match_id, match_bits);
			
 
				+
			
 
				+	me->me_pos = head - &mtable->mt_mhash[0];
			
 
				+	if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
			
 
				+		list_add_tail(&me->me_list, head);
			
 
				+	else
			
 
				+		list_add(&me->me_list, head);
			
 
				+
			
 
				+	lnet_me2handle(handle, me);
			
 
				+
			
 
				+	lnet_res_unlock(mtable->mt_cpt);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMEAttach);
			
 
				+
			
 
				+/**
			
 
				+ * Create and a match entry and insert it before or after the ME pointed to by
			
 
				+ * \a current_meh. The new ME is empty, i.e. not associated with a memory
			
 
				+ * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
			
 
				+ *
			
 
				+ * This function is identical to LNetMEAttach() except for the position
			
 
				+ * where the new ME is inserted.
			
 
				+ *
			
 
				+ * \param current_meh A handle for a ME. The new ME will be inserted
			
 
				+ * immediately before or immediately after this ME.
			
 
				+ * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
			
 
				+ * for LNetMEAttach().
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -ENOMEM If new ME object cannot be allocated.
			
 
				+ * \retval -ENOENT If \a current_meh does not point to a valid match entry.
			
 
				+ */
			
 
				+int
			
 
				+LNetMEInsert(lnet_handle_me_t current_meh,
			
 
				+	     lnet_process_id_t match_id,
			
 
				+	     __u64 match_bits, __u64 ignore_bits,
			
 
				+	     lnet_unlink_t unlink, lnet_ins_pos_t pos,
			
 
				+	     lnet_handle_me_t *handle)
			
 
				+{
			
 
				+	struct lnet_me		*current_me;
			
 
				+	struct lnet_me		*new_me;
			
 
				+	struct lnet_portal	*ptl;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (pos == LNET_INS_LOCAL)
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	new_me = lnet_me_alloc();
			
 
				+	if (new_me == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(current_meh.cookie);
			
 
				+
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	current_me = lnet_handle2me(&current_meh);
			
 
				+	if (current_me == NULL) {
			
 
				+		lnet_me_free_locked(new_me);
			
 
				+
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(current_me->me_portal < the_lnet.ln_nportals);
			
 
				+
			
 
				+	ptl = the_lnet.ln_portals[current_me->me_portal];
			
 
				+	if (lnet_ptl_is_unique(ptl)) {
			
 
				+		/* nosense to insertion on unique portal */
			
 
				+		lnet_me_free_locked(new_me);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return -EPERM;
			
 
				+	}
			
 
				+
			
 
				+	new_me->me_pos = current_me->me_pos;
			
 
				+	new_me->me_portal = current_me->me_portal;
			
 
				+	new_me->me_match_id = match_id;
			
 
				+	new_me->me_match_bits = match_bits;
			
 
				+	new_me->me_ignore_bits = ignore_bits;
			
 
				+	new_me->me_unlink = unlink;
			
 
				+	new_me->me_md = NULL;
			
 
				+
			
 
				+	lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
			
 
				+
			
 
				+	if (pos == LNET_INS_AFTER)
			
 
				+		list_add(&new_me->me_list, &current_me->me_list);
			
 
				+	else
			
 
				+		list_add_tail(&new_me->me_list, &current_me->me_list);
			
 
				+
			
 
				+	lnet_me2handle(handle, new_me);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMEInsert);
			
 
				+
			
 
				+/**
			
 
				+ * Unlink a match entry from its match list.
			
 
				+ *
			
 
				+ * This operation also releases any resources associated with the ME. If a
			
 
				+ * memory descriptor is attached to the ME, then it will be unlinked as well
			
 
				+ * and an unlink event will be generated. It is an error to use the ME handle
			
 
				+ * after calling LNetMEUnlink().
			
 
				+ *
			
 
				+ * \param meh A handle for the ME to be unlinked.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -ENOENT If \a meh does not point to a valid ME.
			
 
				+ * \see LNetMDUnlink() for the discussion on delivering unlink event.
			
 
				+ */
			
 
				+int
			
 
				+LNetMEUnlink(lnet_handle_me_t meh)
			
 
				+{
			
 
				+	lnet_me_t	*me;
			
 
				+	lnet_libmd_t	*md;
			
 
				+	lnet_event_t	ev;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_init);
			
 
				+	LASSERT(the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(meh.cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	me = lnet_handle2me(&meh);
			
 
				+	if (me == NULL) {
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	md = me->me_md;
			
 
				+	if (md != NULL &&
			
 
				+	    md->md_eq != NULL &&
			
 
				+	    md->md_refcount == 0) {
			
 
				+		lnet_build_unlink_event(md, &ev);
			
 
				+		lnet_eq_enqueue_event(md->md_eq, &ev);
			
 
				+	}
			
 
				+
			
 
				+	lnet_me_unlink(me);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetMEUnlink);
			
 
				+
			
 
				+/* call with lnet_res_lock please */
			
 
				+void
			
 
				+lnet_me_unlink(lnet_me_t *me)
			
 
				+{
			
 
				+	list_del(&me->me_list);
			
 
				+
			
 
				+	if (me->me_md != NULL) {
			
 
				+		lnet_libmd_t *md = me->me_md;
			
 
				+
			
 
				+		/* detach MD from portal of this ME */
			
 
				+		lnet_ptl_detach_md(me, md);
			
 
				+		lnet_md_unlink(md);
			
 
				+	}
			
 
				+
			
 
				+	lnet_res_lh_invalidate(&me->me_lh);
			
 
				+	lnet_me_free_locked(me);
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+static void
			
 
				+lib_me_dump(lnet_me_t *me)
			
 
				+{
			
 
				+	CWARN("Match Entry %p ("LPX64")\n", me,
			
 
				+	      me->me_lh.lh_cookie);
			
 
				+
			
 
				+	CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
			
 
				+	      me->me_match_bits, me->me_ignore_bits);
			
 
				+
			
 
				+	CWARN("\tMD\t= %p\n", me->md);
			
 
				+	CWARN("\tprev\t= %p\n",
			
 
				+	      list_entry(me->me_list.prev, lnet_me_t, me_list));
			
 
				+	CWARN("\tnext\t= %p\n",
			
 
				+	      list_entry(me->me_list.next, lnet_me_t, me_list));
			
 
				+}
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -0,0 +1,2441 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-move.c
			
 
				+ *
			
 
				+ * Data movement routines
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+static int local_nid_dist_zero = 1;
			
 
				+CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444,
			
 
				+		"Reserved");
			
 
				+
			
 
				+int
			
 
				+lnet_fail_nid (lnet_nid_t nid, unsigned int threshold)
			
 
				+{
			
 
				+	lnet_test_peer_t  *tp;
			
 
				+	struct list_head	*el;
			
 
				+	struct list_head	*next;
			
 
				+	struct list_head	 cull;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+
			
 
				+	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
			
 
				+	if (threshold != 0) {
			
 
				+		/* Adding a new entry */
			
 
				+		LIBCFS_ALLOC(tp, sizeof(*tp));
			
 
				+		if (tp == NULL)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		tp->tp_nid = nid;
			
 
				+		tp->tp_threshold = threshold;
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+		list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
			
 
				+		lnet_net_unlock(0);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* removing entries */
			
 
				+	INIT_LIST_HEAD(&cull);
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+
			
 
				+	list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
			
 
				+		tp = list_entry (el, lnet_test_peer_t, tp_list);
			
 
				+
			
 
				+		if (tp->tp_threshold == 0 ||    /* needs culling anyway */
			
 
				+		    nid == LNET_NID_ANY ||       /* removing all entries */
			
 
				+		    tp->tp_nid == nid)	  /* matched this one */
			
 
				+		{
			
 
				+			list_del (&tp->tp_list);
			
 
				+			list_add (&tp->tp_list, &cull);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	while (!list_empty (&cull)) {
			
 
				+		tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
			
 
				+
			
 
				+		list_del (&tp->tp_list);
			
 
				+		LIBCFS_FREE(tp, sizeof (*tp));
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+fail_peer (lnet_nid_t nid, int outgoing)
			
 
				+{
			
 
				+	lnet_test_peer_t *tp;
			
 
				+	struct list_head       *el;
			
 
				+	struct list_head       *next;
			
 
				+	struct list_head	cull;
			
 
				+	int	       fail = 0;
			
 
				+
			
 
				+	INIT_LIST_HEAD (&cull);
			
 
				+
			
 
				+	/* NB: use lnet_net_lock(0) to serialize operations on test peers */
			
 
				+	lnet_net_lock(0);
			
 
				+
			
 
				+	list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
			
 
				+		tp = list_entry (el, lnet_test_peer_t, tp_list);
			
 
				+
			
 
				+		if (tp->tp_threshold == 0) {
			
 
				+			/* zombie entry */
			
 
				+			if (outgoing) {
			
 
				+				/* only cull zombies on outgoing tests,
			
 
				+				 * since we may be at interrupt priority on
			
 
				+				 * incoming messages. */
			
 
				+				list_del (&tp->tp_list);
			
 
				+				list_add (&tp->tp_list, &cull);
			
 
				+			}
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
			
 
				+		    nid == tp->tp_nid) {	/* fail this peer */
			
 
				+			fail = 1;
			
 
				+
			
 
				+			if (tp->tp_threshold != LNET_MD_THRESH_INF) {
			
 
				+				tp->tp_threshold--;
			
 
				+				if (outgoing &&
			
 
				+				    tp->tp_threshold == 0) {
			
 
				+					/* see above */
			
 
				+					list_del (&tp->tp_list);
			
 
				+					list_add (&tp->tp_list, &cull);
			
 
				+				}
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	while (!list_empty (&cull)) {
			
 
				+		tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
			
 
				+		list_del (&tp->tp_list);
			
 
				+
			
 
				+		LIBCFS_FREE(tp, sizeof (*tp));
			
 
				+	}
			
 
				+
			
 
				+	return (fail);
			
 
				+}
			
 
				+
			
 
				+unsigned int
			
 
				+lnet_iov_nob (unsigned int niov, struct iovec *iov)
			
 
				+{
			
 
				+	unsigned int nob = 0;
			
 
				+
			
 
				+	while (niov-- > 0)
			
 
				+		nob += (iov++)->iov_len;
			
 
				+
			
 
				+	return (nob);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_iov_nob);
			
 
				+
			
 
				+void
			
 
				+lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, unsigned int doffset,
			
 
				+		   unsigned int nsiov, struct iovec *siov, unsigned int soffset,
			
 
				+		   unsigned int nob)
			
 
				+{
			
 
				+	/* NB diov, siov are READ-ONLY */
			
 
				+	unsigned int  this_nob;
			
 
				+
			
 
				+	if (nob == 0)
			
 
				+		return;
			
 
				+
			
 
				+	/* skip complete frags before 'doffset' */
			
 
				+	LASSERT (ndiov > 0);
			
 
				+	while (doffset >= diov->iov_len) {
			
 
				+		doffset -= diov->iov_len;
			
 
				+		diov++;
			
 
				+		ndiov--;
			
 
				+		LASSERT (ndiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	/* skip complete frags before 'soffset' */
			
 
				+	LASSERT (nsiov > 0);
			
 
				+	while (soffset >= siov->iov_len) {
			
 
				+		soffset -= siov->iov_len;
			
 
				+		siov++;
			
 
				+		nsiov--;
			
 
				+		LASSERT (nsiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (ndiov > 0);
			
 
				+		LASSERT (nsiov > 0);
			
 
				+		this_nob = MIN(diov->iov_len - doffset,
			
 
				+			       siov->iov_len - soffset);
			
 
				+		this_nob = MIN(this_nob, nob);
			
 
				+
			
 
				+		memcpy ((char *)diov->iov_base + doffset,
			
 
				+			(char *)siov->iov_base + soffset, this_nob);
			
 
				+		nob -= this_nob;
			
 
				+
			
 
				+		if (diov->iov_len > doffset + this_nob) {
			
 
				+			doffset += this_nob;
			
 
				+		} else {
			
 
				+			diov++;
			
 
				+			ndiov--;
			
 
				+			doffset = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (siov->iov_len > soffset + this_nob) {
			
 
				+			soffset += this_nob;
			
 
				+		} else {
			
 
				+			siov++;
			
 
				+			nsiov--;
			
 
				+			soffset = 0;
			
 
				+		}
			
 
				+	} while (nob > 0);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_copy_iov2iov);
			
 
				+
			
 
				+int
			
 
				+lnet_extract_iov (int dst_niov, struct iovec *dst,
			
 
				+		  int src_niov, struct iovec *src,
			
 
				+		  unsigned int offset, unsigned int len)
			
 
				+{
			
 
				+	/* Initialise 'dst' to the subset of 'src' starting at 'offset',
			
 
				+	 * for exactly 'len' bytes, and return the number of entries.
			
 
				+	 * NB not destructive to 'src' */
			
 
				+	unsigned int    frag_len;
			
 
				+	unsigned int    niov;
			
 
				+
			
 
				+	if (len == 0)			   /* no data => */
			
 
				+		return (0);		     /* no frags */
			
 
				+
			
 
				+	LASSERT (src_niov > 0);
			
 
				+	while (offset >= src->iov_len) {      /* skip initial frags */
			
 
				+		offset -= src->iov_len;
			
 
				+		src_niov--;
			
 
				+		src++;
			
 
				+		LASSERT (src_niov > 0);
			
 
				+	}
			
 
				+
			
 
				+	niov = 1;
			
 
				+	for (;;) {
			
 
				+		LASSERT (src_niov > 0);
			
 
				+		LASSERT ((int)niov <= dst_niov);
			
 
				+
			
 
				+		frag_len = src->iov_len - offset;
			
 
				+		dst->iov_base = ((char *)src->iov_base) + offset;
			
 
				+
			
 
				+		if (len <= frag_len) {
			
 
				+			dst->iov_len = len;
			
 
				+			return (niov);
			
 
				+		}
			
 
				+
			
 
				+		dst->iov_len = frag_len;
			
 
				+
			
 
				+		len -= frag_len;
			
 
				+		dst++;
			
 
				+		src++;
			
 
				+		niov++;
			
 
				+		src_niov--;
			
 
				+		offset = 0;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_extract_iov);
			
 
				+
			
 
				+
			
 
				+unsigned int
			
 
				+lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov)
			
 
				+{
			
 
				+	unsigned int  nob = 0;
			
 
				+
			
 
				+	while (niov-- > 0)
			
 
				+		nob += (kiov++)->kiov_len;
			
 
				+
			
 
				+	return (nob);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_kiov_nob);
			
 
				+
			
 
				+void
			
 
				+lnet_copy_kiov2kiov (unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
			
 
				+		     unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
			
 
				+		     unsigned int nob)
			
 
				+{
			
 
				+	/* NB diov, siov are READ-ONLY */
			
 
				+	unsigned int    this_nob;
			
 
				+	char	   *daddr = NULL;
			
 
				+	char	   *saddr = NULL;
			
 
				+
			
 
				+	if (nob == 0)
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	LASSERT (ndiov > 0);
			
 
				+	while (doffset >= diov->kiov_len) {
			
 
				+		doffset -= diov->kiov_len;
			
 
				+		diov++;
			
 
				+		ndiov--;
			
 
				+		LASSERT (ndiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (nsiov > 0);
			
 
				+	while (soffset >= siov->kiov_len) {
			
 
				+		soffset -= siov->kiov_len;
			
 
				+		siov++;
			
 
				+		nsiov--;
			
 
				+		LASSERT (nsiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (ndiov > 0);
			
 
				+		LASSERT (nsiov > 0);
			
 
				+		this_nob = MIN(diov->kiov_len - doffset,
			
 
				+			       siov->kiov_len - soffset);
			
 
				+		this_nob = MIN(this_nob, nob);
			
 
				+
			
 
				+		if (daddr == NULL)
			
 
				+			daddr = ((char *)kmap(diov->kiov_page)) +
			
 
				+				diov->kiov_offset + doffset;
			
 
				+		if (saddr == NULL)
			
 
				+			saddr = ((char *)kmap(siov->kiov_page)) +
			
 
				+				siov->kiov_offset + soffset;
			
 
				+
			
 
				+		/* Vanishing risk of kmap deadlock when mapping 2 pages.
			
 
				+		 * However in practice at least one of the kiovs will be mapped
			
 
				+		 * kernel pages and the map/unmap will be NOOPs */
			
 
				+
			
 
				+		memcpy (daddr, saddr, this_nob);
			
 
				+		nob -= this_nob;
			
 
				+
			
 
				+		if (diov->kiov_len > doffset + this_nob) {
			
 
				+			daddr += this_nob;
			
 
				+			doffset += this_nob;
			
 
				+		} else {
			
 
				+			kunmap(diov->kiov_page);
			
 
				+			daddr = NULL;
			
 
				+			diov++;
			
 
				+			ndiov--;
			
 
				+			doffset = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (siov->kiov_len > soffset + this_nob) {
			
 
				+			saddr += this_nob;
			
 
				+			soffset += this_nob;
			
 
				+		} else {
			
 
				+			kunmap(siov->kiov_page);
			
 
				+			saddr = NULL;
			
 
				+			siov++;
			
 
				+			nsiov--;
			
 
				+			soffset = 0;
			
 
				+		}
			
 
				+	} while (nob > 0);
			
 
				+
			
 
				+	if (daddr != NULL)
			
 
				+		kunmap(diov->kiov_page);
			
 
				+	if (saddr != NULL)
			
 
				+		kunmap(siov->kiov_page);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_copy_kiov2kiov);
			
 
				+
			
 
				+void
			
 
				+lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset,
			
 
				+		    unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
			
 
				+		    unsigned int nob)
			
 
				+{
			
 
				+	/* NB iov, kiov are READ-ONLY */
			
 
				+	unsigned int    this_nob;
			
 
				+	char	   *addr = NULL;
			
 
				+
			
 
				+	if (nob == 0)
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	LASSERT (niov > 0);
			
 
				+	while (iovoffset >= iov->iov_len) {
			
 
				+		iovoffset -= iov->iov_len;
			
 
				+		iov++;
			
 
				+		niov--;
			
 
				+		LASSERT (niov > 0);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (nkiov > 0);
			
 
				+	while (kiovoffset >= kiov->kiov_len) {
			
 
				+		kiovoffset -= kiov->kiov_len;
			
 
				+		kiov++;
			
 
				+		nkiov--;
			
 
				+		LASSERT (nkiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (niov > 0);
			
 
				+		LASSERT (nkiov > 0);
			
 
				+		this_nob = MIN(iov->iov_len - iovoffset,
			
 
				+			       kiov->kiov_len - kiovoffset);
			
 
				+		this_nob = MIN(this_nob, nob);
			
 
				+
			
 
				+		if (addr == NULL)
			
 
				+			addr = ((char *)kmap(kiov->kiov_page)) +
			
 
				+				kiov->kiov_offset + kiovoffset;
			
 
				+
			
 
				+		memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob);
			
 
				+		nob -= this_nob;
			
 
				+
			
 
				+		if (iov->iov_len > iovoffset + this_nob) {
			
 
				+			iovoffset += this_nob;
			
 
				+		} else {
			
 
				+			iov++;
			
 
				+			niov--;
			
 
				+			iovoffset = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (kiov->kiov_len > kiovoffset + this_nob) {
			
 
				+			addr += this_nob;
			
 
				+			kiovoffset += this_nob;
			
 
				+		} else {
			
 
				+			kunmap(kiov->kiov_page);
			
 
				+			addr = NULL;
			
 
				+			kiov++;
			
 
				+			nkiov--;
			
 
				+			kiovoffset = 0;
			
 
				+		}
			
 
				+
			
 
				+	} while (nob > 0);
			
 
				+
			
 
				+	if (addr != NULL)
			
 
				+		kunmap(kiov->kiov_page);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_copy_kiov2iov);
			
 
				+
			
 
				+void
			
 
				+lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
			
 
				+		    unsigned int niov, struct iovec *iov, unsigned int iovoffset,
			
 
				+		    unsigned int nob)
			
 
				+{
			
 
				+	/* NB kiov, iov are READ-ONLY */
			
 
				+	unsigned int    this_nob;
			
 
				+	char	   *addr = NULL;
			
 
				+
			
 
				+	if (nob == 0)
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	LASSERT (nkiov > 0);
			
 
				+	while (kiovoffset >= kiov->kiov_len) {
			
 
				+		kiovoffset -= kiov->kiov_len;
			
 
				+		kiov++;
			
 
				+		nkiov--;
			
 
				+		LASSERT (nkiov > 0);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (niov > 0);
			
 
				+	while (iovoffset >= iov->iov_len) {
			
 
				+		iovoffset -= iov->iov_len;
			
 
				+		iov++;
			
 
				+		niov--;
			
 
				+		LASSERT (niov > 0);
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		LASSERT (nkiov > 0);
			
 
				+		LASSERT (niov > 0);
			
 
				+		this_nob = MIN(kiov->kiov_len - kiovoffset,
			
 
				+			       iov->iov_len - iovoffset);
			
 
				+		this_nob = MIN(this_nob, nob);
			
 
				+
			
 
				+		if (addr == NULL)
			
 
				+			addr = ((char *)kmap(kiov->kiov_page)) +
			
 
				+				kiov->kiov_offset + kiovoffset;
			
 
				+
			
 
				+		memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob);
			
 
				+		nob -= this_nob;
			
 
				+
			
 
				+		if (kiov->kiov_len > kiovoffset + this_nob) {
			
 
				+			addr += this_nob;
			
 
				+			kiovoffset += this_nob;
			
 
				+		} else {
			
 
				+			kunmap(kiov->kiov_page);
			
 
				+			addr = NULL;
			
 
				+			kiov++;
			
 
				+			nkiov--;
			
 
				+			kiovoffset = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (iov->iov_len > iovoffset + this_nob) {
			
 
				+			iovoffset += this_nob;
			
 
				+		} else {
			
 
				+			iov++;
			
 
				+			niov--;
			
 
				+			iovoffset = 0;
			
 
				+		}
			
 
				+	} while (nob > 0);
			
 
				+
			
 
				+	if (addr != NULL)
			
 
				+		kunmap(kiov->kiov_page);
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_copy_iov2kiov);
			
 
				+
			
 
				+int
			
 
				+lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
			
 
				+		   int src_niov, lnet_kiov_t *src,
			
 
				+		   unsigned int offset, unsigned int len)
			
 
				+{
			
 
				+	/* Initialise 'dst' to the subset of 'src' starting at 'offset',
			
 
				+	 * for exactly 'len' bytes, and return the number of entries.
			
 
				+	 * NB not destructive to 'src' */
			
 
				+	unsigned int    frag_len;
			
 
				+	unsigned int    niov;
			
 
				+
			
 
				+	if (len == 0)			   /* no data => */
			
 
				+		return (0);		     /* no frags */
			
 
				+
			
 
				+	LASSERT (src_niov > 0);
			
 
				+	while (offset >= src->kiov_len) {      /* skip initial frags */
			
 
				+		offset -= src->kiov_len;
			
 
				+		src_niov--;
			
 
				+		src++;
			
 
				+		LASSERT (src_niov > 0);
			
 
				+	}
			
 
				+
			
 
				+	niov = 1;
			
 
				+	for (;;) {
			
 
				+		LASSERT (src_niov > 0);
			
 
				+		LASSERT ((int)niov <= dst_niov);
			
 
				+
			
 
				+		frag_len = src->kiov_len - offset;
			
 
				+		dst->kiov_page = src->kiov_page;
			
 
				+		dst->kiov_offset = src->kiov_offset + offset;
			
 
				+
			
 
				+		if (len <= frag_len) {
			
 
				+			dst->kiov_len = len;
			
 
				+			LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
			
 
				+			return (niov);
			
 
				+		}
			
 
				+
			
 
				+		dst->kiov_len = frag_len;
			
 
				+		LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
			
 
				+
			
 
				+		len -= frag_len;
			
 
				+		dst++;
			
 
				+		src++;
			
 
				+		niov++;
			
 
				+		src_niov--;
			
 
				+		offset = 0;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_extract_kiov);
			
 
				+
			
 
				+void
			
 
				+lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
			
 
				+	     unsigned int offset, unsigned int mlen, unsigned int rlen)
			
 
				+{
			
 
				+	unsigned int  niov = 0;
			
 
				+	struct iovec *iov = NULL;
			
 
				+	lnet_kiov_t  *kiov = NULL;
			
 
				+	int	   rc;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+	LASSERT (mlen == 0 || msg != NULL);
			
 
				+
			
 
				+	if (msg != NULL) {
			
 
				+		LASSERT(msg->msg_receiving);
			
 
				+		LASSERT(!msg->msg_sending);
			
 
				+		LASSERT(rlen == msg->msg_len);
			
 
				+		LASSERT(mlen <= msg->msg_len);
			
 
				+		LASSERT(msg->msg_offset == offset);
			
 
				+		LASSERT(msg->msg_wanted == mlen);
			
 
				+
			
 
				+		msg->msg_receiving = 0;
			
 
				+
			
 
				+		if (mlen != 0) {
			
 
				+			niov = msg->msg_niov;
			
 
				+			iov  = msg->msg_iov;
			
 
				+			kiov = msg->msg_kiov;
			
 
				+
			
 
				+			LASSERT (niov > 0);
			
 
				+			LASSERT ((iov == NULL) != (kiov == NULL));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
			
 
				+				    niov, iov, kiov, offset, mlen, rlen);
			
 
				+	if (rc < 0)
			
 
				+		lnet_finalize(ni, msg, rc);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_setpayloadbuffer(lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_libmd_t *md = msg->msg_md;
			
 
				+
			
 
				+	LASSERT (msg->msg_len > 0);
			
 
				+	LASSERT (!msg->msg_routing);
			
 
				+	LASSERT (md != NULL);
			
 
				+	LASSERT (msg->msg_niov == 0);
			
 
				+	LASSERT (msg->msg_iov == NULL);
			
 
				+	LASSERT (msg->msg_kiov == NULL);
			
 
				+
			
 
				+	msg->msg_niov = md->md_niov;
			
 
				+	if ((md->md_options & LNET_MD_KIOV) != 0)
			
 
				+		msg->msg_kiov = md->md_iov.kiov;
			
 
				+	else
			
 
				+		msg->msg_iov = md->md_iov.iov;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
			
 
				+	       unsigned int offset, unsigned int len)
			
 
				+{
			
 
				+	msg->msg_type = type;
			
 
				+	msg->msg_target = target;
			
 
				+	msg->msg_len = len;
			
 
				+	msg->msg_offset = offset;
			
 
				+
			
 
				+	if (len != 0)
			
 
				+		lnet_setpayloadbuffer(msg);
			
 
				+
			
 
				+	memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr));
			
 
				+	msg->msg_hdr.type	   = cpu_to_le32(type);
			
 
				+	msg->msg_hdr.dest_nid       = cpu_to_le64(target.nid);
			
 
				+	msg->msg_hdr.dest_pid       = cpu_to_le32(target.pid);
			
 
				+	/* src_nid will be set later */
			
 
				+	msg->msg_hdr.src_pid	= cpu_to_le32(the_lnet.ln_pid);
			
 
				+	msg->msg_hdr.payload_length = cpu_to_le32(len);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	void   *priv = msg->msg_private;
			
 
				+	int     rc;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+	LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
			
 
				+		 (msg->msg_txcredit && msg->msg_peertxcredit));
			
 
				+
			
 
				+	rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
			
 
				+	if (rc < 0)
			
 
				+		lnet_finalize(ni, msg, rc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	int	rc;
			
 
				+
			
 
				+	LASSERT(!msg->msg_sending);
			
 
				+	LASSERT(msg->msg_receiving);
			
 
				+	LASSERT(!msg->msg_rx_ready_delay);
			
 
				+	LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
			
 
				+
			
 
				+	msg->msg_rx_ready_delay = 1;
			
 
				+	rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
			
 
				+					  &msg->msg_private);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("recv from %s / send to %s aborted: "
			
 
				+		       "eager_recv failed %d\n",
			
 
				+		       libcfs_nid2str(msg->msg_rxpeer->lp_nid),
			
 
				+		       libcfs_id2str(msg->msg_target), rc);
			
 
				+		LASSERT(rc < 0); /* required by my callers */
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
			
 
				+void
			
 
				+lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
			
 
				+{
			
 
				+	cfs_time_t last_alive = 0;
			
 
				+
			
 
				+	LASSERT(lnet_peer_aliveness_enabled(lp));
			
 
				+	LASSERT(ni->ni_lnd->lnd_query != NULL);
			
 
				+
			
 
				+	lnet_net_unlock(lp->lp_cpt);
			
 
				+	(ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
			
 
				+	lnet_net_lock(lp->lp_cpt);
			
 
				+
			
 
				+	lp->lp_last_query = cfs_time_current();
			
 
				+
			
 
				+	if (last_alive != 0) /* NI has updated timestamp */
			
 
				+		lp->lp_last_alive = last_alive;
			
 
				+}
			
 
				+
			
 
				+/* NB: always called with lnet_net_lock held */
			
 
				+static inline int
			
 
				+lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
			
 
				+{
			
 
				+	int	alive;
			
 
				+	cfs_time_t deadline;
			
 
				+
			
 
				+	LASSERT (lnet_peer_aliveness_enabled(lp));
			
 
				+
			
 
				+	/* Trust lnet_notify() if it has more recent aliveness news, but
			
 
				+	 * ignore the initial assumed death (see lnet_peers_start_down()).
			
 
				+	 */
			
 
				+	if (!lp->lp_alive && lp->lp_alive_count > 0 &&
			
 
				+	    cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
			
 
				+		return 0;
			
 
				+
			
 
				+	deadline = cfs_time_add(lp->lp_last_alive,
			
 
				+				cfs_time_seconds(lp->lp_ni->ni_peertimeout));
			
 
				+	alive = cfs_time_after(deadline, now);
			
 
				+
			
 
				+	/* Update obsolete lp_alive except for routers assumed to be dead
			
 
				+	 * initially, because router checker would update aliveness in this
			
 
				+	 * case, and moreover lp_last_alive at peer creation is assumed.
			
 
				+	 */
			
 
				+	if (alive && !lp->lp_alive &&
			
 
				+	    !(lnet_isrouter(lp) && lp->lp_alive_count == 0))
			
 
				+		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
			
 
				+
			
 
				+	return alive;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/* NB: returns 1 when alive, 0 when dead, negative when error;
			
 
				+ *     may drop the lnet_net_lock */
			
 
				+int
			
 
				+lnet_peer_alive_locked (lnet_peer_t *lp)
			
 
				+{
			
 
				+	cfs_time_t now = cfs_time_current();
			
 
				+
			
 
				+	if (!lnet_peer_aliveness_enabled(lp))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (lnet_peer_is_alive(lp, now))
			
 
				+		return 1;
			
 
				+
			
 
				+	/* Peer appears dead, but we should avoid frequent NI queries (at
			
 
				+	 * most once per lnet_queryinterval seconds). */
			
 
				+	if (lp->lp_last_query != 0) {
			
 
				+		static const int lnet_queryinterval = 1;
			
 
				+
			
 
				+		cfs_time_t next_query =
			
 
				+			   cfs_time_add(lp->lp_last_query,
			
 
				+					cfs_time_seconds(lnet_queryinterval));
			
 
				+
			
 
				+		if (cfs_time_before(now, next_query)) {
			
 
				+			if (lp->lp_alive)
			
 
				+				CWARN("Unexpected aliveness of peer %s: "
			
 
				+				      "%d < %d (%d/%d)\n",
			
 
				+				      libcfs_nid2str(lp->lp_nid),
			
 
				+				      (int)now, (int)next_query,
			
 
				+				      lnet_queryinterval,
			
 
				+				      lp->lp_ni->ni_peertimeout);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* query NI for latest aliveness news */
			
 
				+	lnet_ni_query_locked(lp->lp_ni, lp);
			
 
				+
			
 
				+	if (lnet_peer_is_alive(lp, now))
			
 
				+		return 1;
			
 
				+
			
 
				+	lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_post_send_locked(lnet_msg_t *msg, int do_send)
			
 
				+{
			
 
				+	/* lnet_send is going to lnet_net_unlock immediately after this,
			
 
				+	 * so it sets do_send FALSE and I don't do the unlock/send/lock bit.
			
 
				+	 * I return EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer
			
 
				+	 * appears dead, and 0 if sent or OK to send */
			
 
				+	struct lnet_peer	*lp = msg->msg_txpeer;
			
 
				+	struct lnet_ni		*ni = lp->lp_ni;
			
 
				+	struct lnet_tx_queue	*tq;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	/* non-lnet_send() callers have checked before */
			
 
				+	LASSERT(!do_send || msg->msg_tx_delayed);
			
 
				+	LASSERT(!msg->msg_receiving);
			
 
				+	LASSERT(msg->msg_tx_committed);
			
 
				+
			
 
				+	cpt = msg->msg_tx_cpt;
			
 
				+	tq = ni->ni_tx_queues[cpt];
			
 
				+
			
 
				+	/* NB 'lp' is always the next hop */
			
 
				+	if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
			
 
				+	    lnet_peer_alive_locked(lp) == 0) {
			
 
				+		the_lnet.ln_counters[cpt]->drop_count++;
			
 
				+		the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		CNETERR("Dropping message for %s: peer not alive\n",
			
 
				+			libcfs_id2str(msg->msg_target));
			
 
				+		if (do_send)
			
 
				+			lnet_finalize(ni, msg, -EHOSTUNREACH);
			
 
				+
			
 
				+		lnet_net_lock(cpt);
			
 
				+		return EHOSTUNREACH;
			
 
				+	}
			
 
				+
			
 
				+	if (!msg->msg_peertxcredit) {
			
 
				+		LASSERT ((lp->lp_txcredits < 0) ==
			
 
				+			 !list_empty(&lp->lp_txq));
			
 
				+
			
 
				+		msg->msg_peertxcredit = 1;
			
 
				+		lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t);
			
 
				+		lp->lp_txcredits--;
			
 
				+
			
 
				+		if (lp->lp_txcredits < lp->lp_mintxcredits)
			
 
				+			lp->lp_mintxcredits = lp->lp_txcredits;
			
 
				+
			
 
				+		if (lp->lp_txcredits < 0) {
			
 
				+			msg->msg_tx_delayed = 1;
			
 
				+			list_add_tail(&msg->msg_list, &lp->lp_txq);
			
 
				+			return EAGAIN;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!msg->msg_txcredit) {
			
 
				+		LASSERT((tq->tq_credits < 0) ==
			
 
				+			!list_empty(&tq->tq_delayed));
			
 
				+
			
 
				+		msg->msg_txcredit = 1;
			
 
				+		tq->tq_credits--;
			
 
				+
			
 
				+		if (tq->tq_credits < tq->tq_credits_min)
			
 
				+			tq->tq_credits_min = tq->tq_credits;
			
 
				+
			
 
				+		if (tq->tq_credits < 0) {
			
 
				+			msg->msg_tx_delayed = 1;
			
 
				+			list_add_tail(&msg->msg_list, &tq->tq_delayed);
			
 
				+			return EAGAIN;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (do_send) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		lnet_ni_send(ni, msg);
			
 
				+		lnet_net_lock(cpt);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+lnet_rtrbufpool_t *
			
 
				+lnet_msg2bufpool(lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_rtrbufpool_t	*rbp;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	LASSERT(msg->msg_rx_committed);
			
 
				+
			
 
				+	cpt = msg->msg_rx_cpt;
			
 
				+	rbp = &the_lnet.ln_rtrpools[cpt][0];
			
 
				+
			
 
				+	LASSERT(msg->msg_len <= LNET_MTU);
			
 
				+	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_CACHE_SIZE) {
			
 
				+		rbp++;
			
 
				+		LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
			
 
				+	}
			
 
				+
			
 
				+	return rbp;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
			
 
				+{
			
 
				+	/* lnet_parse is going to lnet_net_unlock immediately after this, so it
			
 
				+	 * sets do_recv FALSE and I don't do the unlock/send/lock bit.  I
			
 
				+	 * return EAGAIN if msg blocked and 0 if received or OK to receive */
			
 
				+	lnet_peer_t	 *lp = msg->msg_rxpeer;
			
 
				+	lnet_rtrbufpool_t   *rbp;
			
 
				+	lnet_rtrbuf_t       *rb;
			
 
				+
			
 
				+	LASSERT (msg->msg_iov == NULL);
			
 
				+	LASSERT (msg->msg_kiov == NULL);
			
 
				+	LASSERT (msg->msg_niov == 0);
			
 
				+	LASSERT (msg->msg_routing);
			
 
				+	LASSERT (msg->msg_receiving);
			
 
				+	LASSERT (!msg->msg_sending);
			
 
				+
			
 
				+	/* non-lnet_parse callers only receive delayed messages */
			
 
				+	LASSERT(!do_recv || msg->msg_rx_delayed);
			
 
				+
			
 
				+	if (!msg->msg_peerrtrcredit) {
			
 
				+		LASSERT ((lp->lp_rtrcredits < 0) ==
			
 
				+			 !list_empty(&lp->lp_rtrq));
			
 
				+
			
 
				+		msg->msg_peerrtrcredit = 1;
			
 
				+		lp->lp_rtrcredits--;
			
 
				+		if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
			
 
				+			lp->lp_minrtrcredits = lp->lp_rtrcredits;
			
 
				+
			
 
				+		if (lp->lp_rtrcredits < 0) {
			
 
				+			/* must have checked eager_recv before here */
			
 
				+			LASSERT(msg->msg_rx_ready_delay);
			
 
				+			msg->msg_rx_delayed = 1;
			
 
				+			list_add_tail(&msg->msg_list, &lp->lp_rtrq);
			
 
				+			return EAGAIN;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rbp = lnet_msg2bufpool(msg);
			
 
				+
			
 
				+	if (!msg->msg_rtrcredit) {
			
 
				+		LASSERT ((rbp->rbp_credits < 0) ==
			
 
				+			 !list_empty(&rbp->rbp_msgs));
			
 
				+
			
 
				+		msg->msg_rtrcredit = 1;
			
 
				+		rbp->rbp_credits--;
			
 
				+		if (rbp->rbp_credits < rbp->rbp_mincredits)
			
 
				+			rbp->rbp_mincredits = rbp->rbp_credits;
			
 
				+
			
 
				+		if (rbp->rbp_credits < 0) {
			
 
				+			/* must have checked eager_recv before here */
			
 
				+			LASSERT(msg->msg_rx_ready_delay);
			
 
				+			msg->msg_rx_delayed = 1;
			
 
				+			list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
			
 
				+			return EAGAIN;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (!list_empty(&rbp->rbp_bufs));
			
 
				+	rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list);
			
 
				+	list_del(&rb->rb_list);
			
 
				+
			
 
				+	msg->msg_niov = rbp->rbp_npages;
			
 
				+	msg->msg_kiov = &rb->rb_kiov[0];
			
 
				+
			
 
				+	if (do_recv) {
			
 
				+		int cpt = msg->msg_rx_cpt;
			
 
				+
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
			
 
				+			     0, msg->msg_len, msg->msg_len);
			
 
				+		lnet_net_lock(cpt);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_return_tx_credits_locked(lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_peer_t	*txpeer = msg->msg_txpeer;
			
 
				+	lnet_msg_t	*msg2;
			
 
				+
			
 
				+	if (msg->msg_txcredit) {
			
 
				+		struct lnet_ni	     *ni = txpeer->lp_ni;
			
 
				+		struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
			
 
				+
			
 
				+		/* give back NI txcredits */
			
 
				+		msg->msg_txcredit = 0;
			
 
				+
			
 
				+		LASSERT((tq->tq_credits < 0) ==
			
 
				+			!list_empty(&tq->tq_delayed));
			
 
				+
			
 
				+		tq->tq_credits++;
			
 
				+		if (tq->tq_credits <= 0) {
			
 
				+			msg2 = list_entry(tq->tq_delayed.next,
			
 
				+					      lnet_msg_t, msg_list);
			
 
				+			list_del(&msg2->msg_list);
			
 
				+
			
 
				+			LASSERT(msg2->msg_txpeer->lp_ni == ni);
			
 
				+			LASSERT(msg2->msg_tx_delayed);
			
 
				+
			
 
				+			(void) lnet_post_send_locked(msg2, 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_peertxcredit) {
			
 
				+		/* give back peer txcredits */
			
 
				+		msg->msg_peertxcredit = 0;
			
 
				+
			
 
				+		LASSERT((txpeer->lp_txcredits < 0) ==
			
 
				+			!list_empty(&txpeer->lp_txq));
			
 
				+
			
 
				+		txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t);
			
 
				+		LASSERT (txpeer->lp_txqnob >= 0);
			
 
				+
			
 
				+		txpeer->lp_txcredits++;
			
 
				+		if (txpeer->lp_txcredits <= 0) {
			
 
				+			msg2 = list_entry(txpeer->lp_txq.next,
			
 
				+					      lnet_msg_t, msg_list);
			
 
				+			list_del(&msg2->msg_list);
			
 
				+
			
 
				+			LASSERT(msg2->msg_txpeer == txpeer);
			
 
				+			LASSERT(msg2->msg_tx_delayed);
			
 
				+
			
 
				+			(void) lnet_post_send_locked(msg2, 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (txpeer != NULL) {
			
 
				+		msg->msg_txpeer = NULL;
			
 
				+		lnet_peer_decref_locked(txpeer);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_return_rx_credits_locked(lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_peer_t	*rxpeer = msg->msg_rxpeer;
			
 
				+	lnet_msg_t	*msg2;
			
 
				+
			
 
				+	if (msg->msg_rtrcredit) {
			
 
				+		/* give back global router credits */
			
 
				+		lnet_rtrbuf_t     *rb;
			
 
				+		lnet_rtrbufpool_t *rbp;
			
 
				+
			
 
				+		/* NB If a msg ever blocks for a buffer in rbp_msgs, it stays
			
 
				+		 * there until it gets one allocated, or aborts the wait
			
 
				+		 * itself */
			
 
				+		LASSERT (msg->msg_kiov != NULL);
			
 
				+
			
 
				+		rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]);
			
 
				+		rbp = rb->rb_pool;
			
 
				+		LASSERT (rbp == lnet_msg2bufpool(msg));
			
 
				+
			
 
				+		msg->msg_kiov = NULL;
			
 
				+		msg->msg_rtrcredit = 0;
			
 
				+
			
 
				+		LASSERT((rbp->rbp_credits < 0) ==
			
 
				+			!list_empty(&rbp->rbp_msgs));
			
 
				+		LASSERT((rbp->rbp_credits > 0) ==
			
 
				+			!list_empty(&rbp->rbp_bufs));
			
 
				+
			
 
				+		list_add(&rb->rb_list, &rbp->rbp_bufs);
			
 
				+		rbp->rbp_credits++;
			
 
				+		if (rbp->rbp_credits <= 0) {
			
 
				+			msg2 = list_entry(rbp->rbp_msgs.next,
			
 
				+					      lnet_msg_t, msg_list);
			
 
				+			list_del(&msg2->msg_list);
			
 
				+
			
 
				+			(void) lnet_post_routed_recv_locked(msg2, 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_peerrtrcredit) {
			
 
				+		/* give back peer router credits */
			
 
				+		msg->msg_peerrtrcredit = 0;
			
 
				+
			
 
				+		LASSERT((rxpeer->lp_rtrcredits < 0) ==
			
 
				+			!list_empty(&rxpeer->lp_rtrq));
			
 
				+
			
 
				+		rxpeer->lp_rtrcredits++;
			
 
				+		if (rxpeer->lp_rtrcredits <= 0) {
			
 
				+			msg2 = list_entry(rxpeer->lp_rtrq.next,
			
 
				+					      lnet_msg_t, msg_list);
			
 
				+			list_del(&msg2->msg_list);
			
 
				+
			
 
				+			(void) lnet_post_routed_recv_locked(msg2, 1);
			
 
				+		}
			
 
				+	}
			
 
				+	if (rxpeer != NULL) {
			
 
				+		msg->msg_rxpeer = NULL;
			
 
				+		lnet_peer_decref_locked(rxpeer);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2)
			
 
				+{
			
 
				+	lnet_peer_t *p1 = r1->lr_gateway;
			
 
				+	lnet_peer_t *p2 = r2->lr_gateway;
			
 
				+
			
 
				+	if (r1->lr_hops < r2->lr_hops)
			
 
				+		return 1;
			
 
				+
			
 
				+	if (r1->lr_hops > r2->lr_hops)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (p1->lp_txqnob < p2->lp_txqnob)
			
 
				+		return 1;
			
 
				+
			
 
				+	if (p1->lp_txqnob > p2->lp_txqnob)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (p1->lp_txcredits > p2->lp_txcredits)
			
 
				+		return 1;
			
 
				+
			
 
				+	if (p1->lp_txcredits < p2->lp_txcredits)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (r1->lr_seq - r2->lr_seq <= 0)
			
 
				+		return 1;
			
 
				+
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+static lnet_peer_t *
			
 
				+lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
			
 
				+{
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	lnet_route_t		*rtr;
			
 
				+	lnet_route_t		*rtr_best;
			
 
				+	lnet_route_t		*rtr_last;
			
 
				+	struct lnet_peer	*lp_best;
			
 
				+	struct lnet_peer	*lp;
			
 
				+	int			rc;
			
 
				+
			
 
				+	/* If @rtr_nid is not LNET_NID_ANY, return the gateway with
			
 
				+	 * rtr_nid nid, otherwise find the best gateway I can use */
			
 
				+
			
 
				+	rnet = lnet_find_net_locked(LNET_NIDNET(target));
			
 
				+	if (rnet == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	lp_best = NULL;
			
 
				+	rtr_best = rtr_last = NULL;
			
 
				+	list_for_each_entry(rtr, &rnet->lrn_routes, lr_list) {
			
 
				+		lp = rtr->lr_gateway;
			
 
				+
			
 
				+		if (!lp->lp_alive || /* gateway is down */
			
 
				+		    ((lp->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0 &&
			
 
				+		     rtr->lr_downis != 0)) /* NI to target is down */
			
 
				+			continue;
			
 
				+
			
 
				+		if (ni != NULL && lp->lp_ni != ni)
			
 
				+			continue;
			
 
				+
			
 
				+		if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
			
 
				+			return lp;
			
 
				+
			
 
				+		if (lp_best == NULL) {
			
 
				+			rtr_best = rtr_last = rtr;
			
 
				+			lp_best = lp;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* no protection on below fields, but it's harmless */
			
 
				+		if (rtr_last->lr_seq - rtr->lr_seq < 0)
			
 
				+			rtr_last = rtr;
			
 
				+
			
 
				+		rc = lnet_compare_routes(rtr, rtr_best);
			
 
				+		if (rc < 0)
			
 
				+			continue;
			
 
				+
			
 
				+		rtr_best = rtr;
			
 
				+		lp_best = lp;
			
 
				+	}
			
 
				+
			
 
				+	/* set sequence number on the best router to the latest sequence + 1
			
 
				+	 * so we can round-robin all routers, it's race and inaccurate but
			
 
				+	 * harmless and functional  */
			
 
				+	if (rtr_best != NULL)
			
 
				+		rtr_best->lr_seq = rtr_last->lr_seq + 1;
			
 
				+	return lp_best;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
			
 
				+{
			
 
				+	lnet_nid_t		dst_nid = msg->msg_target.nid;
			
 
				+	struct lnet_ni		*src_ni;
			
 
				+	struct lnet_ni		*local_ni;
			
 
				+	struct lnet_peer	*lp;
			
 
				+	int			cpt;
			
 
				+	int			cpt2;
			
 
				+	int			rc;
			
 
				+
			
 
				+	/* NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
			
 
				+	 * but we might want to use pre-determined router for ACK/REPLY
			
 
				+	 * in the future */
			
 
				+	/* NB: ni != NULL == interface pre-determined (ACK/REPLY) */
			
 
				+	LASSERT (msg->msg_txpeer == NULL);
			
 
				+	LASSERT (!msg->msg_sending);
			
 
				+	LASSERT (!msg->msg_target_is_router);
			
 
				+	LASSERT (!msg->msg_receiving);
			
 
				+
			
 
				+	msg->msg_sending = 1;
			
 
				+
			
 
				+	LASSERT(!msg->msg_tx_committed);
			
 
				+	cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
			
 
				+ again:
			
 
				+	lnet_net_lock(cpt);
			
 
				+
			
 
				+	if (the_lnet.ln_shutdown) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		return -ESHUTDOWN;
			
 
				+	}
			
 
				+
			
 
				+	if (src_nid == LNET_NID_ANY) {
			
 
				+		src_ni = NULL;
			
 
				+	} else {
			
 
				+		src_ni = lnet_nid2ni_locked(src_nid, cpt);
			
 
				+		if (src_ni == NULL) {
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			LCONSOLE_WARN("Can't send to %s: src %s is not a "
			
 
				+				      "local nid\n", libcfs_nid2str(dst_nid),
			
 
				+				      libcfs_nid2str(src_nid));
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+		LASSERT (!msg->msg_routing);
			
 
				+	}
			
 
				+
			
 
				+	/* Is this for someone on a local network? */
			
 
				+	local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
			
 
				+
			
 
				+	if (local_ni != NULL) {
			
 
				+		if (src_ni == NULL) {
			
 
				+			src_ni = local_ni;
			
 
				+			src_nid = src_ni->ni_nid;
			
 
				+		} else if (src_ni == local_ni) {
			
 
				+			lnet_ni_decref_locked(local_ni, cpt);
			
 
				+		} else {
			
 
				+			lnet_ni_decref_locked(local_ni, cpt);
			
 
				+			lnet_ni_decref_locked(src_ni, cpt);
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			LCONSOLE_WARN("No route to %s via from %s\n",
			
 
				+				      libcfs_nid2str(dst_nid),
			
 
				+				      libcfs_nid2str(src_nid));
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(src_nid != LNET_NID_ANY);
			
 
				+		lnet_msg_commit(msg, cpt);
			
 
				+
			
 
				+		if (!msg->msg_routing)
			
 
				+			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
			
 
				+
			
 
				+		if (src_ni == the_lnet.ln_loni) {
			
 
				+			/* No send credit hassles with LOLND */
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			lnet_ni_send(src_ni, msg);
			
 
				+
			
 
				+			lnet_net_lock(cpt);
			
 
				+			lnet_ni_decref_locked(src_ni, cpt);
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
			
 
				+		/* lp has ref on src_ni; lose mine */
			
 
				+		lnet_ni_decref_locked(src_ni, cpt);
			
 
				+		if (rc != 0) {
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			LCONSOLE_WARN("Error %d finding peer %s\n", rc,
			
 
				+				      libcfs_nid2str(dst_nid));
			
 
				+			/* ENOMEM or shutting down */
			
 
				+			return rc;
			
 
				+		}
			
 
				+		LASSERT (lp->lp_ni == src_ni);
			
 
				+	} else {
			
 
				+		/* sending to a remote network */
			
 
				+		lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
			
 
				+		if (lp == NULL) {
			
 
				+			if (src_ni != NULL)
			
 
				+				lnet_ni_decref_locked(src_ni, cpt);
			
 
				+			lnet_net_unlock(cpt);
			
 
				+
			
 
				+			LCONSOLE_WARN("No route to %s via %s "
			
 
				+				      "(all routers down)\n",
			
 
				+				      libcfs_id2str(msg->msg_target),
			
 
				+				      libcfs_nid2str(src_nid));
			
 
				+			return -EHOSTUNREACH;
			
 
				+		}
			
 
				+
			
 
				+		/* rtr_nid is LNET_NID_ANY or NID of pre-determined router,
			
 
				+		 * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
			
 
				+		 * pre-determined router, this can happen if router table
			
 
				+		 * was changed when we release the lock */
			
 
				+		if (rtr_nid != lp->lp_nid) {
			
 
				+			cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
			
 
				+			if (cpt2 != cpt) {
			
 
				+				if (src_ni != NULL)
			
 
				+					lnet_ni_decref_locked(src_ni, cpt);
			
 
				+				lnet_net_unlock(cpt);
			
 
				+
			
 
				+				rtr_nid = lp->lp_nid;
			
 
				+				cpt = cpt2;
			
 
				+				goto again;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
			
 
				+		       libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
			
 
				+		       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
			
 
				+
			
 
				+		if (src_ni == NULL) {
			
 
				+			src_ni = lp->lp_ni;
			
 
				+			src_nid = src_ni->ni_nid;
			
 
				+		} else {
			
 
				+			LASSERT (src_ni == lp->lp_ni);
			
 
				+			lnet_ni_decref_locked(src_ni, cpt);
			
 
				+		}
			
 
				+
			
 
				+		lnet_peer_addref_locked(lp);
			
 
				+
			
 
				+		LASSERT(src_nid != LNET_NID_ANY);
			
 
				+		lnet_msg_commit(msg, cpt);
			
 
				+
			
 
				+		if (!msg->msg_routing) {
			
 
				+			/* I'm the source and now I know which NI to send on */
			
 
				+			msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
			
 
				+		}
			
 
				+
			
 
				+		msg->msg_target_is_router = 1;
			
 
				+		msg->msg_target.nid = lp->lp_nid;
			
 
				+		msg->msg_target.pid = LUSTRE_SRV_LNET_PID;
			
 
				+	}
			
 
				+
			
 
				+	/* 'lp' is our best choice of peer */
			
 
				+
			
 
				+	LASSERT (!msg->msg_peertxcredit);
			
 
				+	LASSERT (!msg->msg_txcredit);
			
 
				+	LASSERT (msg->msg_txpeer == NULL);
			
 
				+
			
 
				+	msg->msg_txpeer = lp;		   /* msg takes my ref on lp */
			
 
				+
			
 
				+	rc = lnet_post_send_locked(msg, 0);
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	if (rc == EHOSTUNREACH)
			
 
				+		return -EHOSTUNREACH;
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		lnet_ni_send(src_ni, msg);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_drop_message(lnet_ni_t *ni, int cpt, void *private, unsigned int nob)
			
 
				+{
			
 
				+	lnet_net_lock(cpt);
			
 
				+	the_lnet.ln_counters[cpt]->drop_count++;
			
 
				+	the_lnet.ln_counters[cpt]->drop_length += nob;
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_recv_put(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_hdr_t	*hdr = &msg->msg_hdr;
			
 
				+
			
 
				+	if (msg->msg_wanted != 0)
			
 
				+		lnet_setpayloadbuffer(msg);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_PUT);
			
 
				+
			
 
				+	/* Must I ACK?  If so I'll grab the ack_wmd out of the header and put
			
 
				+	 * it back into the ACK during lnet_finalize() */
			
 
				+	msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
			
 
				+			(msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
			
 
				+
			
 
				+	lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
			
 
				+		     msg->msg_offset, msg->msg_wanted, hdr->payload_length);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_hdr_t		*hdr = &msg->msg_hdr;
			
 
				+	struct lnet_match_info	info;
			
 
				+	int			rc;
			
 
				+
			
 
				+	/* Convert put fields to host byte order */
			
 
				+	hdr->msg.put.match_bits	= le64_to_cpu(hdr->msg.put.match_bits);
			
 
				+	hdr->msg.put.ptl_index	= le32_to_cpu(hdr->msg.put.ptl_index);
			
 
				+	hdr->msg.put.offset	= le32_to_cpu(hdr->msg.put.offset);
			
 
				+
			
 
				+	info.mi_id.nid	= hdr->src_nid;
			
 
				+	info.mi_id.pid	= hdr->src_pid;
			
 
				+	info.mi_opc	= LNET_MD_OP_PUT;
			
 
				+	info.mi_portal	= hdr->msg.put.ptl_index;
			
 
				+	info.mi_rlength	= hdr->payload_length;
			
 
				+	info.mi_roffset	= hdr->msg.put.offset;
			
 
				+	info.mi_mbits	= hdr->msg.put.match_bits;
			
 
				+
			
 
				+	msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
			
 
				+
			
 
				+ again:
			
 
				+	rc = lnet_ptl_match_md(&info, msg);
			
 
				+	switch (rc) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case LNET_MATCHMD_OK:
			
 
				+		lnet_recv_put(ni, msg);
			
 
				+		return 0;
			
 
				+
			
 
				+	case LNET_MATCHMD_NONE:
			
 
				+		if (msg->msg_rx_delayed) /* attached on delayed list */
			
 
				+			return 0;
			
 
				+
			
 
				+		rc = lnet_ni_eager_recv(ni, msg);
			
 
				+		if (rc == 0)
			
 
				+			goto again;
			
 
				+		/* fall through */
			
 
				+
			
 
				+	case LNET_MATCHMD_DROP:
			
 
				+		CNETERR("Dropping PUT from %s portal %d match "LPU64
			
 
				+			" offset %d length %d: %d\n",
			
 
				+			libcfs_id2str(info.mi_id), info.mi_portal,
			
 
				+			info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
			
 
				+
			
 
				+		return ENOENT;	/* +ve: OK but no match */
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
			
 
				+{
			
 
				+	struct lnet_match_info	info;
			
 
				+	lnet_hdr_t		*hdr = &msg->msg_hdr;
			
 
				+	lnet_handle_wire_t	reply_wmd;
			
 
				+	int			rc;
			
 
				+
			
 
				+	/* Convert get fields to host byte order */
			
 
				+	hdr->msg.get.match_bits	  = le64_to_cpu(hdr->msg.get.match_bits);
			
 
				+	hdr->msg.get.ptl_index	  = le32_to_cpu(hdr->msg.get.ptl_index);
			
 
				+	hdr->msg.get.sink_length  = le32_to_cpu(hdr->msg.get.sink_length);
			
 
				+	hdr->msg.get.src_offset	  = le32_to_cpu(hdr->msg.get.src_offset);
			
 
				+
			
 
				+	info.mi_id.nid	= hdr->src_nid;
			
 
				+	info.mi_id.pid	= hdr->src_pid;
			
 
				+	info.mi_opc	= LNET_MD_OP_GET;
			
 
				+	info.mi_portal	= hdr->msg.get.ptl_index;
			
 
				+	info.mi_rlength	= hdr->msg.get.sink_length;
			
 
				+	info.mi_roffset	= hdr->msg.get.src_offset;
			
 
				+	info.mi_mbits	= hdr->msg.get.match_bits;
			
 
				+
			
 
				+	rc = lnet_ptl_match_md(&info, msg);
			
 
				+	if (rc == LNET_MATCHMD_DROP) {
			
 
				+		CNETERR("Dropping GET from %s portal %d match "LPU64
			
 
				+			" offset %d length %d\n",
			
 
				+			libcfs_id2str(info.mi_id), info.mi_portal,
			
 
				+			info.mi_mbits, info.mi_roffset, info.mi_rlength);
			
 
				+		return ENOENT;	/* +ve: OK but no match */
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(rc == LNET_MATCHMD_OK);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_GET);
			
 
				+
			
 
				+	reply_wmd = hdr->msg.get.return_wmd;
			
 
				+
			
 
				+	lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
			
 
				+		       msg->msg_offset, msg->msg_wanted);
			
 
				+
			
 
				+	msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
			
 
				+
			
 
				+	if (rdma_get) {
			
 
				+		/* The LND completes the REPLY from her recv procedure */
			
 
				+		lnet_ni_recv(ni, msg->msg_private, msg, 0,
			
 
				+			     msg->msg_offset, msg->msg_len, msg->msg_len);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
			
 
				+	msg->msg_receiving = 0;
			
 
				+
			
 
				+	rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
			
 
				+	if (rc < 0) {
			
 
				+		/* didn't get as far as lnet_ni_send() */
			
 
				+		CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
			
 
				+		       libcfs_nid2str(ni->ni_nid),
			
 
				+		       libcfs_id2str(info.mi_id), rc);
			
 
				+
			
 
				+		lnet_finalize(ni, msg, rc);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	void	     *private = msg->msg_private;
			
 
				+	lnet_hdr_t       *hdr = &msg->msg_hdr;
			
 
				+	lnet_process_id_t src = {0};
			
 
				+	lnet_libmd_t     *md;
			
 
				+	int	       rlength;
			
 
				+	int	       mlength;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	src.nid = hdr->src_nid;
			
 
				+	src.pid = hdr->src_pid;
			
 
				+
			
 
				+	/* NB handles only looked up by creator (no flips) */
			
 
				+	md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
			
 
				+	if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
			
 
				+		CNETERR("%s: Dropping REPLY from %s for %s "
			
 
				+			"MD "LPX64"."LPX64"\n",
			
 
				+			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				+			(md == NULL) ? "invalid" : "inactive",
			
 
				+			hdr->msg.reply.dst_wmd.wh_interface_cookie,
			
 
				+			hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				+		if (md != NULL && md->md_me != NULL)
			
 
				+			CERROR("REPLY MD also attached to portal %d\n",
			
 
				+			       md->md_me->me_portal);
			
 
				+
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return ENOENT;		  /* +ve: OK but no match */
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (md->md_offset == 0);
			
 
				+
			
 
				+	rlength = hdr->payload_length;
			
 
				+	mlength = MIN(rlength, (int)md->md_length);
			
 
				+
			
 
				+	if (mlength < rlength &&
			
 
				+	    (md->md_options & LNET_MD_TRUNCATE) == 0) {
			
 
				+		CNETERR("%s: Dropping REPLY from %s length %d "
			
 
				+			"for MD "LPX64" would overflow (%d)\n",
			
 
				+			libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				+			rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
			
 
				+			mlength);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return ENOENT;	  /* +ve: OK but no match */
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md "LPX64"\n",
			
 
				+	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				+	       mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, md, 0, mlength);
			
 
				+
			
 
				+	if (mlength != 0)
			
 
				+		lnet_setpayloadbuffer(msg);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
			
 
				+
			
 
				+	lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	lnet_hdr_t       *hdr = &msg->msg_hdr;
			
 
				+	lnet_process_id_t src = {0};
			
 
				+	lnet_libmd_t     *md;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	src.nid = hdr->src_nid;
			
 
				+	src.pid = hdr->src_pid;
			
 
				+
			
 
				+	/* Convert ack fields to host byte order */
			
 
				+	hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
			
 
				+	hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	/* NB handles only looked up by creator (no flips) */
			
 
				+	md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
			
 
				+	if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
			
 
				+		/* Don't moan; this is expected */
			
 
				+		CDEBUG(D_NET,
			
 
				+		       "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n",
			
 
				+		       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				+		       (md == NULL) ? "invalid" : "inactive",
			
 
				+		       hdr->msg.ack.dst_wmd.wh_interface_cookie,
			
 
				+		       hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				+		if (md != NULL && md->md_me != NULL)
			
 
				+			CERROR("Source MD also attached to portal %d\n",
			
 
				+			       md->md_me->me_portal);
			
 
				+
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		return ENOENT;		  /* +ve! */
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "%s: ACK from %s into md "LPX64"\n",
			
 
				+	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
			
 
				+	       hdr->msg.ack.dst_wmd.wh_object_cookie);
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_ACK);
			
 
				+
			
 
				+	lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg)
			
 
				+{
			
 
				+	int	rc = 0;
			
 
				+
			
 
				+	if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
			
 
				+	    lnet_msg2bufpool(msg)->rbp_credits <= 0) {
			
 
				+		if (ni->ni_lnd->lnd_eager_recv == NULL) {
			
 
				+			msg->msg_rx_ready_delay = 1;
			
 
				+		} else {
			
 
				+			lnet_net_unlock(msg->msg_rx_cpt);
			
 
				+			rc = lnet_ni_eager_recv(ni, msg);
			
 
				+			lnet_net_lock(msg->msg_rx_cpt);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		rc = lnet_post_routed_recv_locked(msg, 0);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+char *
			
 
				+lnet_msgtyp2str (int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case LNET_MSG_ACK:
			
 
				+		return ("ACK");
			
 
				+	case LNET_MSG_PUT:
			
 
				+		return ("PUT");
			
 
				+	case LNET_MSG_GET:
			
 
				+		return ("GET");
			
 
				+	case LNET_MSG_REPLY:
			
 
				+		return ("REPLY");
			
 
				+	case LNET_MSG_HELLO:
			
 
				+		return ("HELLO");
			
 
				+	default:
			
 
				+		return ("<UNKNOWN>");
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_msgtyp2str);
			
 
				+
			
 
				+void
			
 
				+lnet_print_hdr(lnet_hdr_t * hdr)
			
 
				+{
			
 
				+	lnet_process_id_t src = {0};
			
 
				+	lnet_process_id_t dst = {0};
			
 
				+	char *type_str = lnet_msgtyp2str (hdr->type);
			
 
				+
			
 
				+	src.nid = hdr->src_nid;
			
 
				+	src.pid = hdr->src_pid;
			
 
				+
			
 
				+	dst.nid = hdr->dest_nid;
			
 
				+	dst.pid = hdr->dest_pid;
			
 
				+
			
 
				+	CWARN("P3 Header at %p of type %s\n", hdr, type_str);
			
 
				+	CWARN("    From %s\n", libcfs_id2str(src));
			
 
				+	CWARN("    To   %s\n", libcfs_id2str(dst));
			
 
				+
			
 
				+	switch (hdr->type) {
			
 
				+	default:
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_PUT:
			
 
				+		CWARN("    Ptl index %d, ack md "LPX64"."LPX64", "
			
 
				+		      "match bits "LPU64"\n",
			
 
				+		      hdr->msg.put.ptl_index,
			
 
				+		      hdr->msg.put.ack_wmd.wh_interface_cookie,
			
 
				+		      hdr->msg.put.ack_wmd.wh_object_cookie,
			
 
				+		      hdr->msg.put.match_bits);
			
 
				+		CWARN("    Length %d, offset %d, hdr data "LPX64"\n",
			
 
				+		      hdr->payload_length, hdr->msg.put.offset,
			
 
				+		      hdr->msg.put.hdr_data);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_GET:
			
 
				+		CWARN("    Ptl index %d, return md "LPX64"."LPX64", "
			
 
				+		      "match bits "LPU64"\n", hdr->msg.get.ptl_index,
			
 
				+		      hdr->msg.get.return_wmd.wh_interface_cookie,
			
 
				+		      hdr->msg.get.return_wmd.wh_object_cookie,
			
 
				+		      hdr->msg.get.match_bits);
			
 
				+		CWARN("    Length %d, src offset %d\n",
			
 
				+		      hdr->msg.get.sink_length,
			
 
				+		      hdr->msg.get.src_offset);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_ACK:
			
 
				+		CWARN("    dst md "LPX64"."LPX64", "
			
 
				+		      "manipulated length %d\n",
			
 
				+		      hdr->msg.ack.dst_wmd.wh_interface_cookie,
			
 
				+		      hdr->msg.ack.dst_wmd.wh_object_cookie,
			
 
				+		      hdr->msg.ack.mlength);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_REPLY:
			
 
				+		CWARN("    dst md "LPX64"."LPX64", "
			
 
				+		      "length %d\n",
			
 
				+		      hdr->msg.reply.dst_wmd.wh_interface_cookie,
			
 
				+		      hdr->msg.reply.dst_wmd.wh_object_cookie,
			
 
				+		      hdr->payload_length);
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
			
 
				+	   void *private, int rdma_req)
			
 
				+{
			
 
				+	int		rc = 0;
			
 
				+	int		cpt;
			
 
				+	int		for_me;
			
 
				+	struct lnet_msg	*msg;
			
 
				+	lnet_pid_t     dest_pid;
			
 
				+	lnet_nid_t     dest_nid;
			
 
				+	lnet_nid_t     src_nid;
			
 
				+	__u32	  payload_length;
			
 
				+	__u32	  type;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	type = le32_to_cpu(hdr->type);
			
 
				+	src_nid = le64_to_cpu(hdr->src_nid);
			
 
				+	dest_nid = le64_to_cpu(hdr->dest_nid);
			
 
				+	dest_pid = le32_to_cpu(hdr->dest_pid);
			
 
				+	payload_length = le32_to_cpu(hdr->payload_length);
			
 
				+
			
 
				+	for_me = (ni->ni_nid == dest_nid);
			
 
				+	cpt = lnet_cpt_of_nid(from_nid);
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case LNET_MSG_ACK:
			
 
				+	case LNET_MSG_GET:
			
 
				+		if (payload_length > 0) {
			
 
				+			CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
			
 
				+			       libcfs_nid2str(from_nid),
			
 
				+			       libcfs_nid2str(src_nid),
			
 
				+			       lnet_msgtyp2str(type), payload_length);
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_MSG_PUT:
			
 
				+	case LNET_MSG_REPLY:
			
 
				+		if (payload_length > (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
			
 
				+			CERROR("%s, src %s: bad %s payload %d "
			
 
				+			       "(%d max expected)\n",
			
 
				+			       libcfs_nid2str(from_nid),
			
 
				+			       libcfs_nid2str(src_nid),
			
 
				+			       lnet_msgtyp2str(type),
			
 
				+			       payload_length,
			
 
				+			       for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		CERROR("%s, src %s: Bad message type 0x%x\n",
			
 
				+		       libcfs_nid2str(from_nid),
			
 
				+		       libcfs_nid2str(src_nid), type);
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_routing &&
			
 
				+	    ni->ni_last_alive != cfs_time_current_sec()) {
			
 
				+		lnet_ni_lock(ni);
			
 
				+
			
 
				+		/* NB: so far here is the only place to set NI status to "up */
			
 
				+		ni->ni_last_alive = cfs_time_current_sec();
			
 
				+		if (ni->ni_status != NULL &&
			
 
				+		    ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
			
 
				+			ni->ni_status->ns_status = LNET_NI_STATUS_UP;
			
 
				+		lnet_ni_unlock(ni);
			
 
				+	}
			
 
				+
			
 
				+	/* Regard a bad destination NID as a protocol error.  Senders should
			
 
				+	 * know what they're doing; if they don't they're misconfigured, buggy
			
 
				+	 * or malicious so we chop them off at the knees :) */
			
 
				+
			
 
				+	if (!for_me) {
			
 
				+		if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
			
 
				+			/* should have gone direct */
			
 
				+			CERROR ("%s, src %s: Bad dest nid %s "
			
 
				+				"(should have been sent direct)\n",
			
 
				+				libcfs_nid2str(from_nid),
			
 
				+				libcfs_nid2str(src_nid),
			
 
				+				libcfs_nid2str(dest_nid));
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+
			
 
				+		if (lnet_islocalnid(dest_nid)) {
			
 
				+			/* dest is another local NI; sender should have used
			
 
				+			 * this node's NID on its own network */
			
 
				+			CERROR ("%s, src %s: Bad dest nid %s "
			
 
				+				"(it's my nid but on a different network)\n",
			
 
				+				libcfs_nid2str(from_nid),
			
 
				+				libcfs_nid2str(src_nid),
			
 
				+				libcfs_nid2str(dest_nid));
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+
			
 
				+		if (rdma_req && type == LNET_MSG_GET) {
			
 
				+			CERROR ("%s, src %s: Bad optimized GET for %s "
			
 
				+				"(final destination must be me)\n",
			
 
				+				libcfs_nid2str(from_nid),
			
 
				+				libcfs_nid2str(src_nid),
			
 
				+				libcfs_nid2str(dest_nid));
			
 
				+			return -EPROTO;
			
 
				+		}
			
 
				+
			
 
				+		if (!the_lnet.ln_routing) {
			
 
				+			CERROR ("%s, src %s: Dropping message for %s "
			
 
				+				"(routing not enabled)\n",
			
 
				+				libcfs_nid2str(from_nid),
			
 
				+				libcfs_nid2str(src_nid),
			
 
				+				libcfs_nid2str(dest_nid));
			
 
				+			goto drop;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Message looks OK; we're not going to return an error, so we MUST
			
 
				+	 * call back lnd_recv() come what may... */
			
 
				+
			
 
				+	if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				+	    fail_peer (src_nid, 0))	     /* shall we now? */
			
 
				+	{
			
 
				+		CERROR("%s, src %s: Dropping %s to simulate failure\n",
			
 
				+		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				+		       lnet_msgtyp2str(type));
			
 
				+		goto drop;
			
 
				+	}
			
 
				+
			
 
				+	msg = lnet_msg_alloc();
			
 
				+	if (msg == NULL) {
			
 
				+		CERROR("%s, src %s: Dropping %s (out of memory)\n",
			
 
				+		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				+		       lnet_msgtyp2str(type));
			
 
				+		goto drop;
			
 
				+	}
			
 
				+
			
 
				+	/* msg zeroed in lnet_msg_alloc; i.e. flags all clear, pointers NULL etc */
			
 
				+
			
 
				+	msg->msg_type = type;
			
 
				+	msg->msg_private = private;
			
 
				+	msg->msg_receiving = 1;
			
 
				+	msg->msg_len = msg->msg_wanted = payload_length;
			
 
				+	msg->msg_offset = 0;
			
 
				+	msg->msg_hdr = *hdr;
			
 
				+	/* for building message event */
			
 
				+	msg->msg_from = from_nid;
			
 
				+	if (!for_me) {
			
 
				+		msg->msg_target.pid	= dest_pid;
			
 
				+		msg->msg_target.nid	= dest_nid;
			
 
				+		msg->msg_routing	= 1;
			
 
				+
			
 
				+	} else {
			
 
				+		/* convert common msg->hdr fields to host byteorder */
			
 
				+		msg->msg_hdr.type	= type;
			
 
				+		msg->msg_hdr.src_nid	= src_nid;
			
 
				+		msg->msg_hdr.src_pid	= le32_to_cpu(msg->msg_hdr.src_pid);
			
 
				+		msg->msg_hdr.dest_nid	= dest_nid;
			
 
				+		msg->msg_hdr.dest_pid	= dest_pid;
			
 
				+		msg->msg_hdr.payload_length = payload_length;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+	rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
			
 
				+	if (rc != 0) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		CERROR("%s, src %s: Dropping %s "
			
 
				+		       "(error %d looking up sender)\n",
			
 
				+		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
			
 
				+		       lnet_msgtyp2str(type), rc);
			
 
				+		lnet_msg_free(msg);
			
 
				+		goto drop;
			
 
				+	}
			
 
				+
			
 
				+	lnet_msg_commit(msg, cpt);
			
 
				+
			
 
				+	if (!for_me) {
			
 
				+		rc = lnet_parse_forward_locked(ni, msg);
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		if (rc < 0)
			
 
				+			goto free_drop;
			
 
				+		if (rc == 0) {
			
 
				+			lnet_ni_recv(ni, msg->msg_private, msg, 0,
			
 
				+				     0, payload_length, payload_length);
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case LNET_MSG_ACK:
			
 
				+		rc = lnet_parse_ack(ni, msg);
			
 
				+		break;
			
 
				+	case LNET_MSG_PUT:
			
 
				+		rc = lnet_parse_put(ni, msg);
			
 
				+		break;
			
 
				+	case LNET_MSG_GET:
			
 
				+		rc = lnet_parse_get(ni, msg, rdma_req);
			
 
				+		break;
			
 
				+	case LNET_MSG_REPLY:
			
 
				+		rc = lnet_parse_reply(ni, msg);
			
 
				+		break;
			
 
				+	default:
			
 
				+		LASSERT(0);
			
 
				+		rc = -EPROTO;
			
 
				+		goto free_drop;  /* prevent an unused label if !kernel */
			
 
				+	}
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	LASSERT (rc == ENOENT);
			
 
				+
			
 
				+ free_drop:
			
 
				+	LASSERT(msg->msg_md == NULL);
			
 
				+	lnet_finalize(ni, msg, rc);
			
 
				+
			
 
				+ drop:
			
 
				+	lnet_drop_message(ni, cpt, private, payload_length);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_parse);
			
 
				+
			
 
				+void
			
 
				+lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
			
 
				+{
			
 
				+	while (!list_empty(head)) {
			
 
				+		lnet_process_id_t	id = {0};
			
 
				+		lnet_msg_t		*msg;
			
 
				+
			
 
				+		msg = list_entry(head->next, lnet_msg_t, msg_list);
			
 
				+		list_del(&msg->msg_list);
			
 
				+
			
 
				+		id.nid = msg->msg_hdr.src_nid;
			
 
				+		id.pid = msg->msg_hdr.src_pid;
			
 
				+
			
 
				+		LASSERT(msg->msg_md == NULL);
			
 
				+		LASSERT(msg->msg_rx_delayed);
			
 
				+		LASSERT(msg->msg_rxpeer != NULL);
			
 
				+		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
			
 
				+
			
 
				+		CWARN("Dropping delayed PUT from %s portal %d match "LPU64
			
 
				+		      " offset %d length %d: %s\n",
			
 
				+		      libcfs_id2str(id),
			
 
				+		      msg->msg_hdr.msg.put.ptl_index,
			
 
				+		      msg->msg_hdr.msg.put.match_bits,
			
 
				+		      msg->msg_hdr.msg.put.offset,
			
 
				+		      msg->msg_hdr.payload_length, reason);
			
 
				+
			
 
				+		/* NB I can't drop msg's ref on msg_rxpeer until after I've
			
 
				+		 * called lnet_drop_message(), so I just hang onto msg as well
			
 
				+		 * until that's done */
			
 
				+
			
 
				+		lnet_drop_message(msg->msg_rxpeer->lp_ni,
			
 
				+				  msg->msg_rxpeer->lp_cpt,
			
 
				+				  msg->msg_private, msg->msg_len);
			
 
				+		/*
			
 
				+		 * NB: message will not generate event because w/o attached MD,
			
 
				+		 * but we still should give error code so lnet_msg_decommit()
			
 
				+		 * can skip counters operations and other checks.
			
 
				+		 */
			
 
				+		lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_recv_delayed_msg_list(struct list_head *head)
			
 
				+{
			
 
				+	while (!list_empty(head)) {
			
 
				+		lnet_msg_t	  *msg;
			
 
				+		lnet_process_id_t  id;
			
 
				+
			
 
				+		msg = list_entry(head->next, lnet_msg_t, msg_list);
			
 
				+		list_del(&msg->msg_list);
			
 
				+
			
 
				+		/* md won't disappear under me, since each msg
			
 
				+		 * holds a ref on it */
			
 
				+
			
 
				+		id.nid = msg->msg_hdr.src_nid;
			
 
				+		id.pid = msg->msg_hdr.src_pid;
			
 
				+
			
 
				+		LASSERT(msg->msg_rx_delayed);
			
 
				+		LASSERT(msg->msg_md != NULL);
			
 
				+		LASSERT(msg->msg_rxpeer != NULL);
			
 
				+		LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
			
 
				+
			
 
				+		CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
			
 
				+		       "match "LPU64" offset %d length %d.\n",
			
 
				+			libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
			
 
				+			msg->msg_hdr.msg.put.match_bits,
			
 
				+			msg->msg_hdr.msg.put.offset,
			
 
				+			msg->msg_hdr.payload_length);
			
 
				+
			
 
				+		lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Initiate an asynchronous PUT operation.
			
 
				+ *
			
 
				+ * There are several events associated with a PUT: completion of the send on
			
 
				+ * the initiator node (LNET_EVENT_SEND), and when the send completes
			
 
				+ * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
			
 
				+ * that the operation was accepted by the target. The event LNET_EVENT_PUT is
			
 
				+ * used at the target node to indicate the completion of incoming data
			
 
				+ * delivery.
			
 
				+ *
			
 
				+ * The local events will be logged in the EQ associated with the MD pointed to
			
 
				+ * by \a mdh handle. Using a MD without an associated EQ results in these
			
 
				+ * events being discarded. In this case, the caller must have another
			
 
				+ * mechanism (e.g., a higher level protocol) for determining when it is safe
			
 
				+ * to modify the memory region associated with the MD.
			
 
				+ *
			
 
				+ * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
			
 
				+ * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
			
 
				+ *
			
 
				+ * \param self Indicates the NID of a local interface through which to send
			
 
				+ * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
			
 
				+ * \param mdh A handle for the MD that describes the memory to be sent. The MD
			
 
				+ * must be "free floating" (See LNetMDBind()).
			
 
				+ * \param ack Controls whether an acknowledgment is requested.
			
 
				+ * Acknowledgments are only sent when they are requested by the initiating
			
 
				+ * process and the target MD enables them.
			
 
				+ * \param target A process identifier for the target process.
			
 
				+ * \param portal The index in the \a target's portal table.
			
 
				+ * \param match_bits The match bits to use for MD selection at the target
			
 
				+ * process.
			
 
				+ * \param offset The offset into the target MD (only used when the target
			
 
				+ * MD has the LNET_MD_MANAGE_REMOTE option set).
			
 
				+ * \param hdr_data 64 bits of user data that can be included in the message
			
 
				+ * header. This data is written to an event queue entry at the target if an
			
 
				+ * EQ is present on the matching MD.
			
 
				+ *
			
 
				+ * \retval  0      Success, and only in this case events will be generated
			
 
				+ * and logged to EQ (if it exists).
			
 
				+ * \retval -EIO    Simulated failure.
			
 
				+ * \retval -ENOMEM Memory allocation failure.
			
 
				+ * \retval -ENOENT Invalid MD object.
			
 
				+ *
			
 
				+ * \see lnet_event_t::hdr_data and lnet_event_kind_t.
			
 
				+ */
			
 
				+int
			
 
				+LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack,
			
 
				+	lnet_process_id_t target, unsigned int portal,
			
 
				+	__u64 match_bits, unsigned int offset,
			
 
				+	__u64 hdr_data)
			
 
				+{
			
 
				+	struct lnet_msg		*msg;
			
 
				+	struct lnet_libmd	*md;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				+	    fail_peer (target.nid, 1))	  /* shall we now? */
			
 
				+	{
			
 
				+		CERROR("Dropping PUT to %s: simulated failure\n",
			
 
				+		       libcfs_id2str(target));
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	msg = lnet_msg_alloc();
			
 
				+	if (msg == NULL) {
			
 
				+		CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n",
			
 
				+		       libcfs_id2str(target));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+	msg->msg_vmflush = !!memory_pressure_get();
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	md = lnet_handle2md(&mdh);
			
 
				+	if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
			
 
				+		CERROR("Dropping PUT ("LPU64":%d:%s): MD (%d) invalid\n",
			
 
				+		       match_bits, portal, libcfs_id2str(target),
			
 
				+		       md == NULL ? -1 : md->md_threshold);
			
 
				+		if (md != NULL && md->md_me != NULL)
			
 
				+			CERROR("Source MD also attached to portal %d\n",
			
 
				+			       md->md_me->me_portal);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+
			
 
				+		lnet_msg_free(msg);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "LNetPut -> %s\n", libcfs_id2str(target));
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				+
			
 
				+	lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
			
 
				+
			
 
				+	msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
			
 
				+	msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
			
 
				+	msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
			
 
				+	msg->msg_hdr.msg.put.hdr_data = hdr_data;
			
 
				+
			
 
				+	/* NB handles only looked up by creator (no flips) */
			
 
				+	if (ack == LNET_ACK_REQ) {
			
 
				+		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
			
 
				+			the_lnet.ln_interface_cookie;
			
 
				+		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
			
 
				+			md->md_lh.lh_cookie;
			
 
				+	} else {
			
 
				+		msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
			
 
				+			LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				+		msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
			
 
				+			LNET_WIRE_HANDLE_COOKIE_NONE;
			
 
				+	}
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_SEND);
			
 
				+
			
 
				+	rc = lnet_send(self, msg, LNET_NID_ANY);
			
 
				+	if (rc != 0) {
			
 
				+		CNETERR( "Error sending PUT to %s: %d\n",
			
 
				+		       libcfs_id2str(target), rc);
			
 
				+		lnet_finalize (NULL, msg, rc);
			
 
				+	}
			
 
				+
			
 
				+	/* completion will be signalled by an event */
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetPut);
			
 
				+
			
 
				+lnet_msg_t *
			
 
				+lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg)
			
 
				+{
			
 
				+	/* The LND can DMA direct to the GET md (i.e. no REPLY msg).  This
			
 
				+	 * returns a msg for the LND to pass to lnet_finalize() when the sink
			
 
				+	 * data has been received.
			
 
				+	 *
			
 
				+	 * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
			
 
				+	 * lnet_finalize() is called on it, so the LND must call this first */
			
 
				+
			
 
				+	struct lnet_msg		*msg = lnet_msg_alloc();
			
 
				+	struct lnet_libmd	*getmd = getmsg->msg_md;
			
 
				+	lnet_process_id_t	peer_id = getmsg->msg_target;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	LASSERT(!getmsg->msg_target_is_router);
			
 
				+	LASSERT(!getmsg->msg_routing);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	LASSERT (getmd->md_refcount > 0);
			
 
				+
			
 
				+	if (msg == NULL) {
			
 
				+		CERROR ("%s: Dropping REPLY from %s: can't allocate msg\n",
			
 
				+			libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
			
 
				+		goto drop;
			
 
				+	}
			
 
				+
			
 
				+	if (getmd->md_threshold == 0) {
			
 
				+		CERROR ("%s: Dropping REPLY from %s for inactive MD %p\n",
			
 
				+			libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
			
 
				+			getmd);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+		goto drop;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(getmd->md_offset == 0);
			
 
				+
			
 
				+	CDEBUG(D_NET, "%s: Reply from %s md %p\n",
			
 
				+	       libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
			
 
				+
			
 
				+	/* setup information for lnet_build_msg_event */
			
 
				+	msg->msg_from = peer_id.nid;
			
 
				+	msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
			
 
				+	msg->msg_hdr.src_nid = peer_id.nid;
			
 
				+	msg->msg_hdr.payload_length = getmd->md_length;
			
 
				+	msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	cpt = lnet_cpt_of_nid(peer_id.nid);
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+	lnet_msg_commit(msg, cpt);
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_REPLY);
			
 
				+
			
 
				+	return msg;
			
 
				+
			
 
				+ drop:
			
 
				+	cpt = lnet_cpt_of_nid(peer_id.nid);
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+	the_lnet.ln_counters[cpt]->drop_count++;
			
 
				+	the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	if (msg != NULL)
			
 
				+		lnet_msg_free(msg);
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_create_reply_msg);
			
 
				+
			
 
				+void
			
 
				+lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len)
			
 
				+{
			
 
				+	/* Set the REPLY length, now the RDMA that elides the REPLY message has
			
 
				+	 * completed and I know it. */
			
 
				+	LASSERT (reply != NULL);
			
 
				+	LASSERT (reply->msg_type == LNET_MSG_GET);
			
 
				+	LASSERT (reply->msg_ev.type == LNET_EVENT_REPLY);
			
 
				+
			
 
				+	/* NB I trusted my peer to RDMA.  If she tells me she's written beyond
			
 
				+	 * the end of my buffer, I might as well be dead. */
			
 
				+	LASSERT (len <= reply->msg_ev.mlength);
			
 
				+
			
 
				+	reply->msg_ev.mlength = len;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_set_reply_msg_len);
			
 
				+
			
 
				+/**
			
 
				+ * Initiate an asynchronous GET operation.
			
 
				+ *
			
 
				+ * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
			
 
				+ * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
			
 
				+ * the target node in the REPLY has been written to local MD.
			
 
				+ *
			
 
				+ * On the target node, an LNET_EVENT_GET is logged when the GET request
			
 
				+ * arrives and is accepted into a MD.
			
 
				+ *
			
 
				+ * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
			
 
				+ * \param mdh A handle for the MD that describes the memory into which the
			
 
				+ * requested data will be received. The MD must be "free floating" (See LNetMDBind()).
			
 
				+ *
			
 
				+ * \retval  0      Success, and only in this case events will be generated
			
 
				+ * and logged to EQ (if it exists) of the MD.
			
 
				+ * \retval -EIO    Simulated failure.
			
 
				+ * \retval -ENOMEM Memory allocation failure.
			
 
				+ * \retval -ENOENT Invalid MD object.
			
 
				+ */
			
 
				+int
			
 
				+LNetGet(lnet_nid_t self, lnet_handle_md_t mdh,
			
 
				+	lnet_process_id_t target, unsigned int portal,
			
 
				+	__u64 match_bits, unsigned int offset)
			
 
				+{
			
 
				+	struct lnet_msg		*msg;
			
 
				+	struct lnet_libmd	*md;
			
 
				+	int			cpt;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
			
 
				+	    fail_peer (target.nid, 1))	  /* shall we now? */
			
 
				+	{
			
 
				+		CERROR("Dropping GET to %s: simulated failure\n",
			
 
				+		       libcfs_id2str(target));
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	msg = lnet_msg_alloc();
			
 
				+	if (msg == NULL) {
			
 
				+		CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n",
			
 
				+		       libcfs_id2str(target));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(mdh.cookie);
			
 
				+	lnet_res_lock(cpt);
			
 
				+
			
 
				+	md = lnet_handle2md(&mdh);
			
 
				+	if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
			
 
				+		CERROR("Dropping GET ("LPU64":%d:%s): MD (%d) invalid\n",
			
 
				+		       match_bits, portal, libcfs_id2str(target),
			
 
				+		       md == NULL ? -1 : md->md_threshold);
			
 
				+		if (md != NULL && md->md_me != NULL)
			
 
				+			CERROR("REPLY MD also attached to portal %d\n",
			
 
				+			       md->md_me->me_portal);
			
 
				+
			
 
				+		lnet_res_unlock(cpt);
			
 
				+
			
 
				+		lnet_msg_free(msg);
			
 
				+
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "LNetGet -> %s\n", libcfs_id2str(target));
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, md, 0, 0);
			
 
				+
			
 
				+	lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
			
 
				+
			
 
				+	msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
			
 
				+	msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
			
 
				+	msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
			
 
				+	msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
			
 
				+
			
 
				+	/* NB handles only looked up by creator (no flips) */
			
 
				+	msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
			
 
				+		the_lnet.ln_interface_cookie;
			
 
				+	msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
			
 
				+		md->md_lh.lh_cookie;
			
 
				+
			
 
				+	lnet_res_unlock(cpt);
			
 
				+
			
 
				+	lnet_build_msg_event(msg, LNET_EVENT_SEND);
			
 
				+
			
 
				+	rc = lnet_send(self, msg, LNET_NID_ANY);
			
 
				+	if (rc < 0) {
			
 
				+		CNETERR( "Error sending GET to %s: %d\n",
			
 
				+		       libcfs_id2str(target), rc);
			
 
				+		lnet_finalize (NULL, msg, rc);
			
 
				+	}
			
 
				+
			
 
				+	/* completion will be signalled by an event */
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetGet);
			
 
				+
			
 
				+/**
			
 
				+ * Calculate distance to node at \a dstnid.
			
 
				+ *
			
 
				+ * \param dstnid Target NID.
			
 
				+ * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
			
 
				+ * is saved here.
			
 
				+ * \param orderp If not NULL, order of the route to reach \a dstnid is saved
			
 
				+ * here.
			
 
				+ *
			
 
				+ * \retval 0 If \a dstnid belongs to a local interface, and reserved option
			
 
				+ * local_nid_dist_zero is set, which is the default.
			
 
				+ * \retval positives Distance to target NID, i.e. number of hops plus one.
			
 
				+ * \retval -EHOSTUNREACH If \a dstnid is not reachable.
			
 
				+ */
			
 
				+int
			
 
				+LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
			
 
				+{
			
 
				+	struct list_head		*e;
			
 
				+	struct lnet_ni		*ni;
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	__u32			dstnet = LNET_NIDNET(dstnid);
			
 
				+	int			hops;
			
 
				+	int			cpt;
			
 
				+	__u32			order = 2;
			
 
				+	struct list_head		*rn_list;
			
 
				+
			
 
				+	/* if !local_nid_dist_zero, I don't return a distance of 0 ever
			
 
				+	 * (when lustre sees a distance of 0, it substitutes 0@lo), so I
			
 
				+	 * keep order 0 free for 0@lo and order 1 free for a local NID
			
 
				+	 * match */
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_init);
			
 
				+	LASSERT (the_lnet.ln_refcount > 0);
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+	list_for_each (e, &the_lnet.ln_nis) {
			
 
				+		ni = list_entry(e, lnet_ni_t, ni_list);
			
 
				+
			
 
				+		if (ni->ni_nid == dstnid) {
			
 
				+			if (srcnidp != NULL)
			
 
				+				*srcnidp = dstnid;
			
 
				+			if (orderp != NULL) {
			
 
				+				if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
			
 
				+					*orderp = 0;
			
 
				+				else
			
 
				+					*orderp = 1;
			
 
				+			}
			
 
				+			lnet_net_unlock(cpt);
			
 
				+
			
 
				+			return local_nid_dist_zero ? 0 : 1;
			
 
				+		}
			
 
				+
			
 
				+		if (LNET_NIDNET(ni->ni_nid) == dstnet) {
			
 
				+			if (srcnidp != NULL)
			
 
				+				*srcnidp = ni->ni_nid;
			
 
				+			if (orderp != NULL)
			
 
				+				*orderp = order;
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		order++;
			
 
				+	}
			
 
				+
			
 
				+	rn_list = lnet_net2rnethash(dstnet);
			
 
				+	list_for_each(e, rn_list) {
			
 
				+		rnet = list_entry(e, lnet_remotenet_t, lrn_list);
			
 
				+
			
 
				+		if (rnet->lrn_net == dstnet) {
			
 
				+			lnet_route_t *route;
			
 
				+			lnet_route_t *shortest = NULL;
			
 
				+
			
 
				+			LASSERT (!list_empty(&rnet->lrn_routes));
			
 
				+
			
 
				+			list_for_each_entry(route, &rnet->lrn_routes,
			
 
				+						lr_list) {
			
 
				+				if (shortest == NULL ||
			
 
				+				    route->lr_hops < shortest->lr_hops)
			
 
				+					shortest = route;
			
 
				+			}
			
 
				+
			
 
				+			LASSERT (shortest != NULL);
			
 
				+			hops = shortest->lr_hops;
			
 
				+			if (srcnidp != NULL)
			
 
				+				*srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
			
 
				+			if (orderp != NULL)
			
 
				+				*orderp = order;
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			return hops + 1;
			
 
				+		}
			
 
				+		order++;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+	return -EHOSTUNREACH;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetDist);
			
 
				+
			
 
				+/**
			
 
				+ * Set the number of asynchronous messages expected from a target process.
			
 
				+ *
			
 
				+ * This function is only meaningful for userspace callers. It's a no-op when
			
 
				+ * called from kernel.
			
 
				+ *
			
 
				+ * Asynchronous messages are those that can come from a target when the
			
 
				+ * userspace process is not waiting for IO to complete; e.g., AST callbacks
			
 
				+ * from Lustre servers. Specifying the expected number of such messages
			
 
				+ * allows them to be eagerly received when user process is not running in
			
 
				+ * LNet; otherwise network errors may occur.
			
 
				+ *
			
 
				+ * \param id Process ID of the target process.
			
 
				+ * \param nasync Number of asynchronous messages expected from the target.
			
 
				+ *
			
 
				+ * \return 0 on success, and an error code otherwise.
			
 
				+ */
			
 
				+int
			
 
				+LNetSetAsync(lnet_process_id_t id, int nasync)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetSetAsync);
			
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -0,0 +1,650 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-msg.c
			
 
				+ *
			
 
				+ * Message decoding, parsing and finalizing routines
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+void
			
 
				+lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev)
			
 
				+{
			
 
				+	ENTRY;
			
 
				+
			
 
				+	memset(ev, 0, sizeof(*ev));
			
 
				+
			
 
				+	ev->status   = 0;
			
 
				+	ev->unlinked = 1;
			
 
				+	ev->type     = LNET_EVENT_UNLINK;
			
 
				+	lnet_md_deconstruct(md, &ev->md);
			
 
				+	lnet_md2handle(&ev->md_handle, md);
			
 
				+	EXIT;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Don't need any lock, must be called after lnet_commit_md
			
 
				+ */
			
 
				+void
			
 
				+lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type)
			
 
				+{
			
 
				+	lnet_hdr_t	*hdr = &msg->msg_hdr;
			
 
				+	lnet_event_t	*ev  = &msg->msg_ev;
			
 
				+
			
 
				+	LASSERT(!msg->msg_routing);
			
 
				+
			
 
				+	ev->type = ev_type;
			
 
				+
			
 
				+	if (ev_type == LNET_EVENT_SEND) {
			
 
				+		/* event for active message */
			
 
				+		ev->target.nid    = le64_to_cpu(hdr->dest_nid);
			
 
				+		ev->target.pid    = le32_to_cpu(hdr->dest_pid);
			
 
				+		ev->initiator.nid = LNET_NID_ANY;
			
 
				+		ev->initiator.pid = the_lnet.ln_pid;
			
 
				+		ev->sender	  = LNET_NID_ANY;
			
 
				+
			
 
				+	} else {
			
 
				+		/* event for passive message */
			
 
				+		ev->target.pid    = hdr->dest_pid;
			
 
				+		ev->target.nid    = hdr->dest_nid;
			
 
				+		ev->initiator.pid = hdr->src_pid;
			
 
				+		ev->initiator.nid = hdr->src_nid;
			
 
				+		ev->rlength       = hdr->payload_length;
			
 
				+		ev->sender	  = msg->msg_from;
			
 
				+		ev->mlength	  = msg->msg_wanted;
			
 
				+		ev->offset	  = msg->msg_offset;
			
 
				+	}
			
 
				+
			
 
				+	switch (ev_type) {
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+
			
 
				+	case LNET_EVENT_PUT: /* passive PUT */
			
 
				+		ev->pt_index   = hdr->msg.put.ptl_index;
			
 
				+		ev->match_bits = hdr->msg.put.match_bits;
			
 
				+		ev->hdr_data   = hdr->msg.put.hdr_data;
			
 
				+		return;
			
 
				+
			
 
				+	case LNET_EVENT_GET: /* passive GET */
			
 
				+		ev->pt_index   = hdr->msg.get.ptl_index;
			
 
				+		ev->match_bits = hdr->msg.get.match_bits;
			
 
				+		ev->hdr_data   = 0;
			
 
				+		return;
			
 
				+
			
 
				+	case LNET_EVENT_ACK: /* ACK */
			
 
				+		ev->match_bits = hdr->msg.ack.match_bits;
			
 
				+		ev->mlength    = hdr->msg.ack.mlength;
			
 
				+		return;
			
 
				+
			
 
				+	case LNET_EVENT_REPLY: /* REPLY */
			
 
				+		return;
			
 
				+
			
 
				+	case LNET_EVENT_SEND: /* active message */
			
 
				+		if (msg->msg_type == LNET_MSG_PUT) {
			
 
				+			ev->pt_index   = le32_to_cpu(hdr->msg.put.ptl_index);
			
 
				+			ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
			
 
				+			ev->offset     = le32_to_cpu(hdr->msg.put.offset);
			
 
				+			ev->mlength    =
			
 
				+			ev->rlength    = le32_to_cpu(hdr->payload_length);
			
 
				+			ev->hdr_data   = le64_to_cpu(hdr->msg.put.hdr_data);
			
 
				+
			
 
				+		} else {
			
 
				+			LASSERT(msg->msg_type == LNET_MSG_GET);
			
 
				+			ev->pt_index   = le32_to_cpu(hdr->msg.get.ptl_index);
			
 
				+			ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
			
 
				+			ev->mlength    =
			
 
				+			ev->rlength    = le32_to_cpu(hdr->msg.get.sink_length);
			
 
				+			ev->offset     = le32_to_cpu(hdr->msg.get.src_offset);
			
 
				+			ev->hdr_data   = 0;
			
 
				+		}
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_msg_commit(lnet_msg_t *msg, int cpt)
			
 
				+{
			
 
				+	struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
			
 
				+	lnet_counters_t		  *counters  = the_lnet.ln_counters[cpt];
			
 
				+
			
 
				+	/* routed message can be committed for both receiving and sending */
			
 
				+	LASSERT(!msg->msg_tx_committed);
			
 
				+
			
 
				+	if (msg->msg_sending) {
			
 
				+		LASSERT(!msg->msg_receiving);
			
 
				+
			
 
				+		msg->msg_tx_cpt = cpt;
			
 
				+		msg->msg_tx_committed = 1;
			
 
				+		if (msg->msg_rx_committed) { /* routed message REPLY */
			
 
				+			LASSERT(msg->msg_onactivelist);
			
 
				+			return;
			
 
				+		}
			
 
				+	} else {
			
 
				+		LASSERT(!msg->msg_sending);
			
 
				+		msg->msg_rx_cpt = cpt;
			
 
				+		msg->msg_rx_committed = 1;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(!msg->msg_onactivelist);
			
 
				+	msg->msg_onactivelist = 1;
			
 
				+	list_add(&msg->msg_activelist, &container->msc_active);
			
 
				+
			
 
				+	counters->msgs_alloc++;
			
 
				+	if (counters->msgs_alloc > counters->msgs_max)
			
 
				+		counters->msgs_max = counters->msgs_alloc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_msg_decommit_tx(lnet_msg_t *msg, int status)
			
 
				+{
			
 
				+	lnet_counters_t	*counters;
			
 
				+	lnet_event_t	*ev = &msg->msg_ev;
			
 
				+
			
 
				+	LASSERT(msg->msg_tx_committed);
			
 
				+	if (status != 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	counters = the_lnet.ln_counters[msg->msg_tx_cpt];
			
 
				+	switch (ev->type) {
			
 
				+	default: /* routed message */
			
 
				+		LASSERT(msg->msg_routing);
			
 
				+		LASSERT(msg->msg_rx_committed);
			
 
				+		LASSERT(ev->type == 0);
			
 
				+
			
 
				+		counters->route_length += msg->msg_len;
			
 
				+		counters->route_count++;
			
 
				+		goto out;
			
 
				+
			
 
				+	case LNET_EVENT_PUT:
			
 
				+		/* should have been decommitted */
			
 
				+		LASSERT(!msg->msg_rx_committed);
			
 
				+		/* overwritten while sending ACK */
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_ACK);
			
 
				+		msg->msg_type = LNET_MSG_PUT; /* fix type */
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_EVENT_SEND:
			
 
				+		LASSERT(!msg->msg_rx_committed);
			
 
				+		if (msg->msg_type == LNET_MSG_PUT)
			
 
				+			counters->send_length += msg->msg_len;
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_EVENT_GET:
			
 
				+		LASSERT(msg->msg_rx_committed);
			
 
				+		/* overwritten while sending reply, we should never be
			
 
				+		 * here for optimized GET */
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_REPLY);
			
 
				+		msg->msg_type = LNET_MSG_GET; /* fix type */
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	counters->send_count++;
			
 
				+ out:
			
 
				+	lnet_return_tx_credits_locked(msg);
			
 
				+	msg->msg_tx_committed = 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_msg_decommit_rx(lnet_msg_t *msg, int status)
			
 
				+{
			
 
				+	lnet_counters_t	*counters;
			
 
				+	lnet_event_t	*ev = &msg->msg_ev;
			
 
				+
			
 
				+	LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
			
 
				+	LASSERT(msg->msg_rx_committed);
			
 
				+
			
 
				+	if (status != 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	counters = the_lnet.ln_counters[msg->msg_rx_cpt];
			
 
				+	switch (ev->type) {
			
 
				+	default:
			
 
				+		LASSERT(ev->type == 0);
			
 
				+		LASSERT(msg->msg_routing);
			
 
				+		goto out;
			
 
				+
			
 
				+	case LNET_EVENT_ACK:
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_ACK);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_EVENT_GET:
			
 
				+		/* type is "REPLY" if it's an optimized GET on passive side,
			
 
				+		 * because optimized GET will never be committed for sending,
			
 
				+		 * so message type wouldn't be changed back to "GET" by
			
 
				+		 * lnet_msg_decommit_tx(), see details in lnet_parse_get() */
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_REPLY ||
			
 
				+			msg->msg_type == LNET_MSG_GET);
			
 
				+		counters->send_length += msg->msg_wanted;
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_EVENT_PUT:
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_PUT);
			
 
				+		break;
			
 
				+
			
 
				+	case LNET_EVENT_REPLY:
			
 
				+		/* type is "GET" if it's an optimized GET on active side,
			
 
				+		 * see details in lnet_create_reply_msg() */
			
 
				+		LASSERT(msg->msg_type == LNET_MSG_GET ||
			
 
				+			msg->msg_type == LNET_MSG_REPLY);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	counters->recv_count++;
			
 
				+	if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
			
 
				+		counters->recv_length += msg->msg_wanted;
			
 
				+
			
 
				+ out:
			
 
				+	lnet_return_rx_credits_locked(msg);
			
 
				+	msg->msg_rx_committed = 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status)
			
 
				+{
			
 
				+	int	cpt2 = cpt;
			
 
				+
			
 
				+	LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
			
 
				+	LASSERT(msg->msg_onactivelist);
			
 
				+
			
 
				+	if (msg->msg_tx_committed) { /* always decommit for sending first */
			
 
				+		LASSERT(cpt == msg->msg_tx_cpt);
			
 
				+		lnet_msg_decommit_tx(msg, status);
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_rx_committed) {
			
 
				+		/* forwarding msg committed for both receiving and sending */
			
 
				+		if (cpt != msg->msg_rx_cpt) {
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			cpt2 = msg->msg_rx_cpt;
			
 
				+			lnet_net_lock(cpt2);
			
 
				+		}
			
 
				+		lnet_msg_decommit_rx(msg, status);
			
 
				+	}
			
 
				+
			
 
				+	list_del(&msg->msg_activelist);
			
 
				+	msg->msg_onactivelist = 0;
			
 
				+
			
 
				+	the_lnet.ln_counters[cpt2]->msgs_alloc--;
			
 
				+
			
 
				+	if (cpt2 != cpt) {
			
 
				+		lnet_net_unlock(cpt2);
			
 
				+		lnet_net_lock(cpt);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
			
 
				+		   unsigned int offset, unsigned int mlen)
			
 
				+{
			
 
				+	/* NB: @offset and @len are only useful for receiving */
			
 
				+	/* Here, we attach the MD on lnet_msg and mark it busy and
			
 
				+	 * decrementing its threshold. Come what may, the lnet_msg "owns"
			
 
				+	 * the MD until a call to lnet_msg_detach_md or lnet_finalize()
			
 
				+	 * signals completion. */
			
 
				+	LASSERT(!msg->msg_routing);
			
 
				+
			
 
				+	msg->msg_md = md;
			
 
				+	if (msg->msg_receiving) { /* commited for receiving */
			
 
				+		msg->msg_offset = offset;
			
 
				+		msg->msg_wanted = mlen;
			
 
				+	}
			
 
				+
			
 
				+	md->md_refcount++;
			
 
				+	if (md->md_threshold != LNET_MD_THRESH_INF) {
			
 
				+		LASSERT(md->md_threshold > 0);
			
 
				+		md->md_threshold--;
			
 
				+	}
			
 
				+
			
 
				+	/* build umd in event */
			
 
				+	lnet_md2handle(&msg->msg_ev.md_handle, md);
			
 
				+	lnet_md_deconstruct(md, &msg->msg_ev.md);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_msg_detach_md(lnet_msg_t *msg, int status)
			
 
				+{
			
 
				+	lnet_libmd_t	*md = msg->msg_md;
			
 
				+	int		unlink;
			
 
				+
			
 
				+	/* Now it's safe to drop my caller's ref */
			
 
				+	md->md_refcount--;
			
 
				+	LASSERT(md->md_refcount >= 0);
			
 
				+
			
 
				+	unlink = lnet_md_unlinkable(md);
			
 
				+	if (md->md_eq != NULL) {
			
 
				+		msg->msg_ev.status   = status;
			
 
				+		msg->msg_ev.unlinked = unlink;
			
 
				+		lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
			
 
				+	}
			
 
				+
			
 
				+	if (unlink)
			
 
				+		lnet_md_unlink(md);
			
 
				+
			
 
				+	msg->msg_md = NULL;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_complete_msg_locked(lnet_msg_t *msg, int cpt)
			
 
				+{
			
 
				+	lnet_handle_wire_t ack_wmd;
			
 
				+	int		rc;
			
 
				+	int		status = msg->msg_ev.status;
			
 
				+
			
 
				+	LASSERT (msg->msg_onactivelist);
			
 
				+
			
 
				+	if (status == 0 && msg->msg_ack) {
			
 
				+		/* Only send an ACK if the PUT completed successfully */
			
 
				+
			
 
				+		lnet_msg_decommit(msg, cpt, 0);
			
 
				+
			
 
				+		msg->msg_ack = 0;
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
			
 
				+		LASSERT(!msg->msg_routing);
			
 
				+
			
 
				+		ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
			
 
				+
			
 
				+		lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
			
 
				+
			
 
				+		msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
			
 
				+		msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
			
 
				+		msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
			
 
				+
			
 
				+		/* NB: we probably want to use NID of msg::msg_from as 3rd
			
 
				+		 * parameter (router NID) if it's routed message */
			
 
				+		rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
			
 
				+
			
 
				+		lnet_net_lock(cpt);
			
 
				+		/*
			
 
				+		 * NB: message is committed for sending, we should return
			
 
				+		 * on success because LND will finalize this message later.
			
 
				+		 *
			
 
				+		 * Also, there is possibility that message is commited for
			
 
				+		 * sending and also failed before delivering to LND,
			
 
				+		 * i.e: ENOMEM, in that case we can't fall through either
			
 
				+		 * because CPT for sending can be different with CPT for
			
 
				+		 * receiving, so we should return back to lnet_finalize()
			
 
				+		 * to make sure we are locking the correct partition.
			
 
				+		 */
			
 
				+		return rc;
			
 
				+
			
 
				+	} else if (status == 0 &&	/* OK so far */
			
 
				+		   (msg->msg_routing && !msg->msg_sending)) {
			
 
				+		/* not forwarded */
			
 
				+		LASSERT(!msg->msg_receiving);	/* called back recv already */
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
			
 
				+
			
 
				+		lnet_net_lock(cpt);
			
 
				+		/*
			
 
				+		 * NB: message is committed for sending, we should return
			
 
				+		 * on success because LND will finalize this message later.
			
 
				+		 *
			
 
				+		 * Also, there is possibility that message is commited for
			
 
				+		 * sending and also failed before delivering to LND,
			
 
				+		 * i.e: ENOMEM, in that case we can't fall through either:
			
 
				+		 * - The rule is message must decommit for sending first if
			
 
				+		 *   the it's committed for both sending and receiving
			
 
				+		 * - CPT for sending can be different with CPT for receiving,
			
 
				+		 *   so we should return back to lnet_finalize() to make
			
 
				+		 *   sure we are locking the correct partition.
			
 
				+		 */
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lnet_msg_decommit(msg, cpt, status);
			
 
				+	lnet_msg_free_locked(msg);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
			
 
				+{
			
 
				+	struct lnet_msg_container	*container;
			
 
				+	int				my_slot;
			
 
				+	int				cpt;
			
 
				+	int				rc;
			
 
				+	int				i;
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	if (msg == NULL)
			
 
				+		return;
			
 
				+#if 0
			
 
				+	CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
			
 
				+	       lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
			
 
				+	       msg->msg_target_is_router ? "t" : "",
			
 
				+	       msg->msg_routing ? "X" : "",
			
 
				+	       msg->msg_ack ? "A" : "",
			
 
				+	       msg->msg_sending ? "S" : "",
			
 
				+	       msg->msg_receiving ? "R" : "",
			
 
				+	       msg->msg_delayed ? "d" : "",
			
 
				+	       msg->msg_txcredit ? "C" : "",
			
 
				+	       msg->msg_peertxcredit ? "c" : "",
			
 
				+	       msg->msg_rtrcredit ? "F" : "",
			
 
				+	       msg->msg_peerrtrcredit ? "f" : "",
			
 
				+	       msg->msg_onactivelist ? "!" : "",
			
 
				+	       msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
			
 
				+	       msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
			
 
				+#endif
			
 
				+	msg->msg_ev.status = status;
			
 
				+
			
 
				+	if (msg->msg_md != NULL) {
			
 
				+		cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
			
 
				+
			
 
				+		lnet_res_lock(cpt);
			
 
				+		lnet_msg_detach_md(msg, status);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+	}
			
 
				+
			
 
				+ again:
			
 
				+	rc = 0;
			
 
				+	if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
			
 
				+		/* not commited to network yet */
			
 
				+		LASSERT(!msg->msg_onactivelist);
			
 
				+		lnet_msg_free(msg);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * NB: routed message can be commited for both receiving and sending,
			
 
				+	 * we should finalize in LIFO order and keep counters correct.
			
 
				+	 * (finalize sending first then finalize receiving)
			
 
				+	 */
			
 
				+	cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
			
 
				+	lnet_net_lock(cpt);
			
 
				+
			
 
				+	container = the_lnet.ln_msg_containers[cpt];
			
 
				+	list_add_tail(&msg->msg_list, &container->msc_finalizing);
			
 
				+
			
 
				+	/* Recursion breaker.  Don't complete the message here if I am (or
			
 
				+	 * enough other threads are) already completing messages */
			
 
				+
			
 
				+	my_slot = -1;
			
 
				+	for (i = 0; i < container->msc_nfinalizers; i++) {
			
 
				+		if (container->msc_finalizers[i] == current)
			
 
				+			break;
			
 
				+
			
 
				+		if (my_slot < 0 && container->msc_finalizers[i] == NULL)
			
 
				+			my_slot = i;
			
 
				+	}
			
 
				+
			
 
				+	if (i < container->msc_nfinalizers || my_slot < 0) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	container->msc_finalizers[my_slot] = current;
			
 
				+
			
 
				+	while (!list_empty(&container->msc_finalizing)) {
			
 
				+		msg = list_entry(container->msc_finalizing.next,
			
 
				+				     lnet_msg_t, msg_list);
			
 
				+
			
 
				+		list_del(&msg->msg_list);
			
 
				+
			
 
				+		/* NB drops and regains the lnet lock if it actually does
			
 
				+		 * anything, so my finalizing friends can chomp along too */
			
 
				+		rc = lnet_complete_msg_locked(msg, cpt);
			
 
				+		if (rc != 0)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	container->msc_finalizers[my_slot] = NULL;
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		goto again;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_finalize);
			
 
				+
			
 
				+void
			
 
				+lnet_msg_container_cleanup(struct lnet_msg_container *container)
			
 
				+{
			
 
				+	int     count = 0;
			
 
				+
			
 
				+	if (container->msc_init == 0)
			
 
				+		return;
			
 
				+
			
 
				+	while (!list_empty(&container->msc_active)) {
			
 
				+		lnet_msg_t *msg = list_entry(container->msc_active.next,
			
 
				+						 lnet_msg_t, msg_activelist);
			
 
				+
			
 
				+		LASSERT(msg->msg_onactivelist);
			
 
				+		msg->msg_onactivelist = 0;
			
 
				+		list_del(&msg->msg_activelist);
			
 
				+		lnet_msg_free(msg);
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	if (count > 0)
			
 
				+		CERROR("%d active msg on exit\n", count);
			
 
				+
			
 
				+	if (container->msc_finalizers != NULL) {
			
 
				+		LIBCFS_FREE(container->msc_finalizers,
			
 
				+			    container->msc_nfinalizers *
			
 
				+			    sizeof(*container->msc_finalizers));
			
 
				+		container->msc_finalizers = NULL;
			
 
				+	}
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	lnet_freelist_fini(&container->msc_freelist);
			
 
				+#endif
			
 
				+	container->msc_init = 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
			
 
				+{
			
 
				+	int	rc;
			
 
				+
			
 
				+	container->msc_init = 1;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&container->msc_active);
			
 
				+	INIT_LIST_HEAD(&container->msc_finalizing);
			
 
				+
			
 
				+#ifdef LNET_USE_LIB_FREELIST
			
 
				+	memset(&container->msc_freelist, 0, sizeof(lnet_freelist_t));
			
 
				+
			
 
				+	rc = lnet_freelist_init(&container->msc_freelist,
			
 
				+				LNET_FL_MAX_MSGS, sizeof(lnet_msg_t));
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to init freelist for message container\n");
			
 
				+		lnet_msg_container_cleanup(container);
			
 
				+		return rc;
			
 
				+	}
			
 
				+#else
			
 
				+	rc = 0;
			
 
				+#endif
			
 
				+	/* number of CPUs */
			
 
				+	container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(container->msc_finalizers, lnet_cpt_table(), cpt,
			
 
				+			 container->msc_nfinalizers *
			
 
				+			 sizeof(*container->msc_finalizers));
			
 
				+
			
 
				+	if (container->msc_finalizers == NULL) {
			
 
				+		CERROR("Failed to allocate message finalizers\n");
			
 
				+		lnet_msg_container_cleanup(container);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_msg_containers_destroy(void)
			
 
				+{
			
 
				+	struct lnet_msg_container *container;
			
 
				+	int     i;
			
 
				+
			
 
				+	if (the_lnet.ln_msg_containers == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
			
 
				+		lnet_msg_container_cleanup(container);
			
 
				+
			
 
				+	cfs_percpt_free(the_lnet.ln_msg_containers);
			
 
				+	the_lnet.ln_msg_containers = NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_msg_containers_create(void)
			
 
				+{
			
 
				+	struct lnet_msg_container *container;
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+
			
 
				+	the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+						      sizeof(*container));
			
 
				+
			
 
				+	if (the_lnet.ln_msg_containers == NULL) {
			
 
				+		CERROR("Failed to allocate cpu-partition data for network\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
			
 
				+		rc = lnet_msg_container_setup(container, i);
			
 
				+		if (rc != 0) {
			
 
				+			lnet_msg_containers_destroy();
			
 
				+			return rc;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
@@ -0,0 +1,938 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/lib-ptl.c
			
 
				+ *
			
 
				+ * portal & match routines
			
 
				+ *
			
 
				+ * Author: liang@whamcloud.com
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+/* NB: add /proc interfaces in upcoming patches */
			
 
				+int	portal_rotor	= LNET_PTL_ROTOR_HASH_RT;
			
 
				+CFS_MODULE_PARM(portal_rotor, "i", int, 0644,
			
 
				+		"redirect PUTs to different cpu-partitions");
			
 
				+
			
 
				+static int
			
 
				+lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
			
 
				+		    __u64 mbits, __u64 ignore_bits)
			
 
				+{
			
 
				+	struct lnet_portal	*ptl = the_lnet.ln_portals[index];
			
 
				+	int			unique;
			
 
				+
			
 
				+	unique = ignore_bits == 0 &&
			
 
				+		 match_id.nid != LNET_NID_ANY &&
			
 
				+		 match_id.pid != LNET_PID_ANY;
			
 
				+
			
 
				+	LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
			
 
				+
			
 
				+	/* prefer to check w/o any lock */
			
 
				+	if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
			
 
				+		goto match;
			
 
				+
			
 
				+	/* unset, new portal */
			
 
				+	lnet_ptl_lock(ptl);
			
 
				+	/* check again with lock */
			
 
				+	if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+		goto match;
			
 
				+	}
			
 
				+
			
 
				+	/* still not set */
			
 
				+	if (unique)
			
 
				+		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
			
 
				+	else
			
 
				+		lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
			
 
				+
			
 
				+	lnet_ptl_unlock(ptl);
			
 
				+
			
 
				+	return 1;
			
 
				+
			
 
				+ match:
			
 
				+	if ((lnet_ptl_is_unique(ptl) && !unique) ||
			
 
				+	    (lnet_ptl_is_wildcard(ptl) && unique))
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
			
 
				+	int			i;
			
 
				+
			
 
				+	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
			
 
				+	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				+
			
 
				+	mtable->mt_enabled = 1;
			
 
				+
			
 
				+	ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
			
 
				+	for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
			
 
				+		LASSERT(ptl->ptl_mt_maps[i] != cpt);
			
 
				+		if (ptl->ptl_mt_maps[i] < cpt)
			
 
				+			break;
			
 
				+
			
 
				+		/* swap to order */
			
 
				+		ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
			
 
				+		ptl->ptl_mt_maps[i] = cpt;
			
 
				+	}
			
 
				+
			
 
				+	ptl->ptl_mt_nmaps++;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable = ptl->ptl_mtables[cpt];
			
 
				+	int			i;
			
 
				+
			
 
				+	/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
			
 
				+	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				+
			
 
				+	if (LNET_CPT_NUMBER == 1)
			
 
				+		return; /* never disable the only match-table */
			
 
				+
			
 
				+	mtable->mt_enabled = 0;
			
 
				+
			
 
				+	LASSERT(ptl->ptl_mt_nmaps > 0 &&
			
 
				+		ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
			
 
				+
			
 
				+	/* remove it from mt_maps */
			
 
				+	ptl->ptl_mt_nmaps--;
			
 
				+	for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
			
 
				+		if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
			
 
				+			ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_try_match_md(lnet_libmd_t *md,
			
 
				+		  struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				+{
			
 
				+	/* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
			
 
				+	 * lnet_match_blocked_msg() relies on this to avoid races */
			
 
				+	unsigned int	offset;
			
 
				+	unsigned int	mlength;
			
 
				+	lnet_me_t	*me = md->md_me;
			
 
				+
			
 
				+	/* MD exhausted */
			
 
				+	if (lnet_md_exhausted(md))
			
 
				+		return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
			
 
				+
			
 
				+	/* mismatched MD op */
			
 
				+	if ((md->md_options & info->mi_opc) == 0)
			
 
				+		return LNET_MATCHMD_NONE;
			
 
				+
			
 
				+	/* mismatched ME nid/pid? */
			
 
				+	if (me->me_match_id.nid != LNET_NID_ANY &&
			
 
				+	    me->me_match_id.nid != info->mi_id.nid)
			
 
				+		return LNET_MATCHMD_NONE;
			
 
				+
			
 
				+	if (me->me_match_id.pid != LNET_PID_ANY &&
			
 
				+	    me->me_match_id.pid != info->mi_id.pid)
			
 
				+		return LNET_MATCHMD_NONE;
			
 
				+
			
 
				+	/* mismatched ME matchbits? */
			
 
				+	if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
			
 
				+		return LNET_MATCHMD_NONE;
			
 
				+
			
 
				+	/* Hurrah! This _is_ a match; check it out... */
			
 
				+
			
 
				+	if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
			
 
				+		offset = md->md_offset;
			
 
				+	else
			
 
				+		offset = info->mi_roffset;
			
 
				+
			
 
				+	if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
			
 
				+		mlength = md->md_max_size;
			
 
				+		LASSERT(md->md_offset + mlength <= md->md_length);
			
 
				+	} else {
			
 
				+		mlength = md->md_length - offset;
			
 
				+	}
			
 
				+
			
 
				+	if (info->mi_rlength <= mlength) {	/* fits in allowed space */
			
 
				+		mlength = info->mi_rlength;
			
 
				+	} else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
			
 
				+		/* this packet _really_ is too big */
			
 
				+		CERROR("Matching packet from %s, match "LPU64
			
 
				+		       " length %d too big: %d left, %d allowed\n",
			
 
				+		       libcfs_id2str(info->mi_id), info->mi_mbits,
			
 
				+		       info->mi_rlength, md->md_length - offset, mlength);
			
 
				+
			
 
				+		return LNET_MATCHMD_DROP;
			
 
				+	}
			
 
				+
			
 
				+	/* Commit to this ME/MD */
			
 
				+	CDEBUG(D_NET, "Incoming %s index %x from %s of "
			
 
				+	       "length %d/%d into md "LPX64" [%d] + %d\n",
			
 
				+	       (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
			
 
				+	       info->mi_portal, libcfs_id2str(info->mi_id), mlength,
			
 
				+	       info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
			
 
				+
			
 
				+	lnet_msg_attach_md(msg, md, offset, mlength);
			
 
				+	md->md_offset = offset + mlength;
			
 
				+
			
 
				+	if (!lnet_md_exhausted(md))
			
 
				+		return LNET_MATCHMD_OK;
			
 
				+
			
 
				+	/* Auto-unlink NOW, so the ME gets unlinked if required.
			
 
				+	 * We bumped md->md_refcount above so the MD just gets flagged
			
 
				+	 * for unlink when it is finalized. */
			
 
				+	if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0)
			
 
				+		lnet_md_unlink(md);
			
 
				+
			
 
				+	return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
			
 
				+}
			
 
				+
			
 
				+static struct lnet_match_table *
			
 
				+lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
			
 
				+{
			
 
				+	if (LNET_CPT_NUMBER == 1)
			
 
				+		return ptl->ptl_mtables[0]; /* the only one */
			
 
				+
			
 
				+	/* if it's a unique portal, return match-table hashed by NID */
			
 
				+	return lnet_ptl_is_unique(ptl) ?
			
 
				+	       ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
			
 
				+}
			
 
				+
			
 
				+struct lnet_match_table *
			
 
				+lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
			
 
				+		  __u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
			
 
				+{
			
 
				+	struct lnet_portal	*ptl;
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+
			
 
				+	/* NB: called w/o lock */
			
 
				+	LASSERT(index < the_lnet.ln_nportals);
			
 
				+
			
 
				+	if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
			
 
				+		return NULL;
			
 
				+
			
 
				+	ptl = the_lnet.ln_portals[index];
			
 
				+
			
 
				+	mtable = lnet_match2mt(ptl, id, mbits);
			
 
				+	if (mtable != NULL) /* unique portal or only one match-table */
			
 
				+		return mtable;
			
 
				+
			
 
				+	/* it's a wildcard portal */
			
 
				+	switch (pos) {
			
 
				+	default:
			
 
				+		return NULL;
			
 
				+	case LNET_INS_BEFORE:
			
 
				+	case LNET_INS_AFTER:
			
 
				+		/* posted by no affinity thread, always hash to specific
			
 
				+		 * match-table to avoid buffer stealing which is heavy */
			
 
				+		return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
			
 
				+	case LNET_INS_LOCAL:
			
 
				+		/* posted by cpu-affinity thread */
			
 
				+		return ptl->ptl_mtables[lnet_cpt_current()];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct lnet_match_table *
			
 
				+lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+	struct lnet_portal	*ptl;
			
 
				+	int			nmaps;
			
 
				+	int			rotor;
			
 
				+	int			routed;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	/* NB: called w/o lock */
			
 
				+	LASSERT(info->mi_portal < the_lnet.ln_nportals);
			
 
				+	ptl = the_lnet.ln_portals[info->mi_portal];
			
 
				+
			
 
				+	LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
			
 
				+
			
 
				+	mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
			
 
				+	if (mtable != NULL)
			
 
				+		return mtable;
			
 
				+
			
 
				+	/* it's a wildcard portal */
			
 
				+	routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
			
 
				+		 LNET_NIDNET(msg->msg_hdr.dest_nid);
			
 
				+
			
 
				+	if (portal_rotor == LNET_PTL_ROTOR_OFF ||
			
 
				+	    (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
			
 
				+		cpt = lnet_cpt_current();
			
 
				+		if (ptl->ptl_mtables[cpt]->mt_enabled)
			
 
				+			return ptl->ptl_mtables[cpt];
			
 
				+	}
			
 
				+
			
 
				+	rotor = ptl->ptl_rotor++; /* get round-robin factor */
			
 
				+	if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
			
 
				+		cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
			
 
				+	else
			
 
				+		cpt = rotor % LNET_CPT_NUMBER;
			
 
				+
			
 
				+	if (!ptl->ptl_mtables[cpt]->mt_enabled) {
			
 
				+		/* is there any active entry for this portal? */
			
 
				+		nmaps = ptl->ptl_mt_nmaps;
			
 
				+		/* map to an active mtable to avoid heavy "stealing" */
			
 
				+		if (nmaps != 0) {
			
 
				+			/* NB: there is possibility that ptl_mt_maps is being
			
 
				+			 * changed because we are not under protection of
			
 
				+			 * lnet_ptl_lock, but it shouldn't hurt anything */
			
 
				+			cpt = ptl->ptl_mt_maps[rotor % nmaps];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return ptl->ptl_mtables[cpt];
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
			
 
				+{
			
 
				+	__u64	*bmap;
			
 
				+	int	i;
			
 
				+
			
 
				+	if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (pos < 0) { /* check all bits */
			
 
				+		for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
			
 
				+			if (mtable->mt_exhausted[i] != (__u64)(-1))
			
 
				+				return 0;
			
 
				+		}
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(pos <= LNET_MT_HASH_IGNORE);
			
 
				+	/* mtable::mt_mhash[pos] is marked as exhausted or not */
			
 
				+	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
			
 
				+	pos &= (1 << LNET_MT_BITS_U64) - 1;
			
 
				+
			
 
				+	return ((*bmap) & (1ULL << pos)) != 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
			
 
				+{
			
 
				+	__u64	*bmap;
			
 
				+
			
 
				+	LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
			
 
				+	LASSERT(pos <= LNET_MT_HASH_IGNORE);
			
 
				+
			
 
				+	/* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
			
 
				+	bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
			
 
				+	pos &= (1 << LNET_MT_BITS_U64) - 1;
			
 
				+
			
 
				+	if (!exhausted)
			
 
				+		*bmap &= ~(1ULL << pos);
			
 
				+	else
			
 
				+		*bmap |= 1ULL << pos;
			
 
				+}
			
 
				+
			
 
				+struct list_head *
			
 
				+lnet_mt_match_head(struct lnet_match_table *mtable,
			
 
				+		   lnet_process_id_t id, __u64 mbits)
			
 
				+{
			
 
				+	struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
			
 
				+
			
 
				+	if (lnet_ptl_is_wildcard(ptl)) {
			
 
				+		return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK];
			
 
				+	} else {
			
 
				+		unsigned long hash = mbits + id.nid + id.pid;
			
 
				+
			
 
				+		LASSERT(lnet_ptl_is_unique(ptl));
			
 
				+		hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
			
 
				+		return &mtable->mt_mhash[hash];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_mt_match_md(struct lnet_match_table *mtable,
			
 
				+		 struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				+{
			
 
				+	struct list_head		*head;
			
 
				+	lnet_me_t		*me;
			
 
				+	lnet_me_t		*tmp;
			
 
				+	int			exhausted = 0;
			
 
				+	int			rc;
			
 
				+
			
 
				+	/* any ME with ignore bits? */
			
 
				+	if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
			
 
				+		head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
			
 
				+	else
			
 
				+		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
			
 
				+ again:
			
 
				+	/* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
			
 
				+	if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
			
 
				+		exhausted = LNET_MATCHMD_EXHAUSTED;
			
 
				+
			
 
				+	list_for_each_entry_safe(me, tmp, head, me_list) {
			
 
				+		/* ME attached but MD not attached yet */
			
 
				+		if (me->me_md == NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		LASSERT(me == me->me_md->md_me);
			
 
				+
			
 
				+		rc = lnet_try_match_md(me->me_md, info, msg);
			
 
				+		if ((rc & LNET_MATCHMD_EXHAUSTED) == 0)
			
 
				+			exhausted = 0; /* mlist is not empty */
			
 
				+
			
 
				+		if ((rc & LNET_MATCHMD_FINISH) != 0) {
			
 
				+			/* don't return EXHAUSTED bit because we don't know
			
 
				+			 * whether the mlist is empty or not */
			
 
				+			return rc & ~LNET_MATCHMD_EXHAUSTED;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
			
 
				+		lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
			
 
				+		if (!lnet_mt_test_exhausted(mtable, -1))
			
 
				+			exhausted = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
			
 
				+		head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
			
 
				+		goto again; /* re-check MEs w/o ignore-bits */
			
 
				+	}
			
 
				+
			
 
				+	if (info->mi_opc == LNET_MD_OP_GET ||
			
 
				+	    !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
			
 
				+		return LNET_MATCHMD_DROP | exhausted;
			
 
				+
			
 
				+	return LNET_MATCHMD_NONE | exhausted;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
			
 
				+{
			
 
				+	int	rc;
			
 
				+
			
 
				+	/* message arrived before any buffer posting on this portal,
			
 
				+	 * simply delay or drop this message */
			
 
				+	if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
			
 
				+		return 0;
			
 
				+
			
 
				+	lnet_ptl_lock(ptl);
			
 
				+	/* check it again with hold of lock */
			
 
				+	if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (lnet_ptl_is_lazy(ptl)) {
			
 
				+		if (msg->msg_rx_ready_delay) {
			
 
				+			msg->msg_rx_delayed = 1;
			
 
				+			list_add_tail(&msg->msg_list,
			
 
				+					  &ptl->ptl_msg_delayed);
			
 
				+		}
			
 
				+		rc = LNET_MATCHMD_NONE;
			
 
				+	} else {
			
 
				+		rc = LNET_MATCHMD_DROP;
			
 
				+	}
			
 
				+
			
 
				+	lnet_ptl_unlock(ptl);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_ptl_match_delay(struct lnet_portal *ptl,
			
 
				+		     struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				+{
			
 
				+	int	first = ptl->ptl_mt_maps[0]; /* read w/o lock */
			
 
				+	int	rc = 0;
			
 
				+	int	i;
			
 
				+
			
 
				+	/* steal buffer from other CPTs, and delay it if nothing to steal,
			
 
				+	 * this function is more expensive than a regular match, but we
			
 
				+	 * don't expect it can happen a lot */
			
 
				+	LASSERT(lnet_ptl_is_wildcard(ptl));
			
 
				+
			
 
				+	for (i = 0; i < LNET_CPT_NUMBER; i++) {
			
 
				+		struct lnet_match_table *mtable;
			
 
				+		int			cpt;
			
 
				+
			
 
				+		cpt = (first + i) % LNET_CPT_NUMBER;
			
 
				+		mtable = ptl->ptl_mtables[cpt];
			
 
				+		if (i != 0 && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
			
 
				+			continue;
			
 
				+
			
 
				+		lnet_res_lock(cpt);
			
 
				+		lnet_ptl_lock(ptl);
			
 
				+
			
 
				+		if (i == 0) { /* the first try, attach on stealing list */
			
 
				+			list_add_tail(&msg->msg_list,
			
 
				+					  &ptl->ptl_msg_stealing);
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&msg->msg_list)) { /* on stealing list */
			
 
				+			rc = lnet_mt_match_md(mtable, info, msg);
			
 
				+
			
 
				+			if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 &&
			
 
				+			    mtable->mt_enabled)
			
 
				+				lnet_ptl_disable_mt(ptl, cpt);
			
 
				+
			
 
				+			if ((rc & LNET_MATCHMD_FINISH) != 0)
			
 
				+				list_del_init(&msg->msg_list);
			
 
				+
			
 
				+		} else {
			
 
				+			/* could be matched by lnet_ptl_attach_md()
			
 
				+			 * which is called by another thread */
			
 
				+			rc = msg->msg_md == NULL ?
			
 
				+			     LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&msg->msg_list) && /* not matched yet */
			
 
				+		    (i == LNET_CPT_NUMBER - 1 || /* the last CPT */
			
 
				+		     ptl->ptl_mt_nmaps == 0 ||   /* no active CPT */
			
 
				+		     (ptl->ptl_mt_nmaps == 1 &&  /* the only active CPT */
			
 
				+		      ptl->ptl_mt_maps[0] == cpt))) {
			
 
				+			/* nothing to steal, delay or drop */
			
 
				+			list_del_init(&msg->msg_list);
			
 
				+
			
 
				+			if (lnet_ptl_is_lazy(ptl)) {
			
 
				+				msg->msg_rx_delayed = 1;
			
 
				+				list_add_tail(&msg->msg_list,
			
 
				+						  &ptl->ptl_msg_delayed);
			
 
				+				rc = LNET_MATCHMD_NONE;
			
 
				+			} else {
			
 
				+				rc = LNET_MATCHMD_DROP;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+		lnet_res_unlock(cpt);
			
 
				+
			
 
				+		if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+	struct lnet_portal	*ptl;
			
 
				+	int			rc;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Request from %s of length %d into portal %d "
			
 
				+	       "MB="LPX64"\n", libcfs_id2str(info->mi_id),
			
 
				+	       info->mi_rlength, info->mi_portal, info->mi_mbits);
			
 
				+
			
 
				+	if (info->mi_portal >= the_lnet.ln_nportals) {
			
 
				+		CERROR("Invalid portal %d not in [0-%d]\n",
			
 
				+		       info->mi_portal, the_lnet.ln_nportals);
			
 
				+		return LNET_MATCHMD_DROP;
			
 
				+	}
			
 
				+
			
 
				+	ptl = the_lnet.ln_portals[info->mi_portal];
			
 
				+	rc = lnet_ptl_match_early(ptl, msg);
			
 
				+	if (rc != 0) /* matched or delayed early message */
			
 
				+		return rc;
			
 
				+
			
 
				+	mtable = lnet_mt_of_match(info, msg);
			
 
				+	lnet_res_lock(mtable->mt_cpt);
			
 
				+
			
 
				+	if (the_lnet.ln_shutdown) {
			
 
				+		rc = LNET_MATCHMD_DROP;
			
 
				+		goto out1;
			
 
				+	}
			
 
				+
			
 
				+	rc = lnet_mt_match_md(mtable, info, msg);
			
 
				+	if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 && mtable->mt_enabled) {
			
 
				+		lnet_ptl_lock(ptl);
			
 
				+		lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+	}
			
 
				+
			
 
				+	if ((rc & LNET_MATCHMD_FINISH) != 0)	/* matched or dropping */
			
 
				+		goto out1;
			
 
				+
			
 
				+	if (!msg->msg_rx_ready_delay)
			
 
				+		goto out1;
			
 
				+
			
 
				+	LASSERT(lnet_ptl_is_lazy(ptl));
			
 
				+	LASSERT(!msg->msg_rx_delayed);
			
 
				+
			
 
				+	/* NB: we don't expect "delay" can happen a lot */
			
 
				+	if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
			
 
				+		lnet_ptl_lock(ptl);
			
 
				+
			
 
				+		msg->msg_rx_delayed = 1;
			
 
				+		list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
			
 
				+
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+		lnet_res_unlock(mtable->mt_cpt);
			
 
				+
			
 
				+	} else  {
			
 
				+		lnet_res_unlock(mtable->mt_cpt);
			
 
				+		rc = lnet_ptl_match_delay(ptl, info, msg);
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_rx_delayed) {
			
 
				+		CDEBUG(D_NET,
			
 
				+		       "Delaying %s from %s ptl %d MB "LPX64" off %d len %d\n",
			
 
				+		       info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
			
 
				+		       libcfs_id2str(info->mi_id), info->mi_portal,
			
 
				+		       info->mi_mbits, info->mi_roffset, info->mi_rlength);
			
 
				+	}
			
 
				+	goto out0;
			
 
				+ out1:
			
 
				+	lnet_res_unlock(mtable->mt_cpt);
			
 
				+ out0:
			
 
				+	/* EXHAUSTED bit is only meaningful for internal functions */
			
 
				+	return rc & ~LNET_MATCHMD_EXHAUSTED;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
			
 
				+{
			
 
				+	LASSERT(me->me_md == md && md->md_me == me);
			
 
				+
			
 
				+	me->me_md = NULL;
			
 
				+	md->md_me = NULL;
			
 
				+}
			
 
				+
			
 
				+/* called with lnet_res_lock held */
			
 
				+void
			
 
				+lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
			
 
				+		   struct list_head *matches, struct list_head *drops)
			
 
				+{
			
 
				+	struct lnet_portal	*ptl = the_lnet.ln_portals[me->me_portal];
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+	struct list_head		*head;
			
 
				+	lnet_msg_t		*tmp;
			
 
				+	lnet_msg_t		*msg;
			
 
				+	int			exhausted = 0;
			
 
				+	int			cpt;
			
 
				+
			
 
				+	LASSERT(md->md_refcount == 0); /* a brand new MD */
			
 
				+
			
 
				+	me->me_md = md;
			
 
				+	md->md_me = me;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
			
 
				+	mtable = ptl->ptl_mtables[cpt];
			
 
				+
			
 
				+	if (list_empty(&ptl->ptl_msg_stealing) &&
			
 
				+	    list_empty(&ptl->ptl_msg_delayed) &&
			
 
				+	    !lnet_mt_test_exhausted(mtable, me->me_pos))
			
 
				+		return;
			
 
				+
			
 
				+	lnet_ptl_lock(ptl);
			
 
				+	head = &ptl->ptl_msg_stealing;
			
 
				+ again:
			
 
				+	list_for_each_entry_safe(msg, tmp, head, msg_list) {
			
 
				+		struct lnet_match_info	info;
			
 
				+		lnet_hdr_t		*hdr;
			
 
				+		int			rc;
			
 
				+
			
 
				+		LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
			
 
				+
			
 
				+		hdr   = &msg->msg_hdr;
			
 
				+		info.mi_id.nid	= hdr->src_nid;
			
 
				+		info.mi_id.pid	= hdr->src_pid;
			
 
				+		info.mi_opc	= LNET_MD_OP_PUT;
			
 
				+		info.mi_portal	= hdr->msg.put.ptl_index;
			
 
				+		info.mi_rlength	= hdr->payload_length;
			
 
				+		info.mi_roffset	= hdr->msg.put.offset;
			
 
				+		info.mi_mbits	= hdr->msg.put.match_bits;
			
 
				+
			
 
				+		rc = lnet_try_match_md(md, &info, msg);
			
 
				+
			
 
				+		exhausted = (rc & LNET_MATCHMD_EXHAUSTED) != 0;
			
 
				+		if ((rc & LNET_MATCHMD_NONE) != 0) {
			
 
				+			if (exhausted)
			
 
				+				break;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Hurrah! This _is_ a match */
			
 
				+		LASSERT((rc & LNET_MATCHMD_FINISH) != 0);
			
 
				+		list_del_init(&msg->msg_list);
			
 
				+
			
 
				+		if (head == &ptl->ptl_msg_stealing) {
			
 
				+			if (exhausted)
			
 
				+				break;
			
 
				+			/* stealing thread will handle the message */
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if ((rc & LNET_MATCHMD_OK) != 0) {
			
 
				+			list_add_tail(&msg->msg_list, matches);
			
 
				+
			
 
				+			CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
			
 
				+			       "match "LPU64" offset %d length %d.\n",
			
 
				+			       libcfs_id2str(info.mi_id),
			
 
				+			       info.mi_portal, info.mi_mbits,
			
 
				+			       info.mi_roffset, info.mi_rlength);
			
 
				+		} else {
			
 
				+			list_add_tail(&msg->msg_list, drops);
			
 
				+		}
			
 
				+
			
 
				+		if (exhausted)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (!exhausted && head == &ptl->ptl_msg_stealing) {
			
 
				+		head = &ptl->ptl_msg_delayed;
			
 
				+		goto again;
			
 
				+	}
			
 
				+
			
 
				+	if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
			
 
				+		lnet_mt_set_exhausted(mtable, me->me_pos, 0);
			
 
				+		if (!mtable->mt_enabled)
			
 
				+			lnet_ptl_enable_mt(ptl, cpt);
			
 
				+	}
			
 
				+
			
 
				+	lnet_ptl_unlock(ptl);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ptl_cleanup(struct lnet_portal *ptl)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+	int			i;
			
 
				+
			
 
				+	if (ptl->ptl_mtables == NULL) /* uninitialized portal */
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT(list_empty(&ptl->ptl_msg_delayed));
			
 
				+	LASSERT(list_empty(&ptl->ptl_msg_stealing));
			
 
				+	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
			
 
				+		struct list_head	*mhash;
			
 
				+		lnet_me_t	*me;
			
 
				+		int		j;
			
 
				+
			
 
				+		if (mtable->mt_mhash == NULL) /* uninitialized match-table */
			
 
				+			continue;
			
 
				+
			
 
				+		mhash = mtable->mt_mhash;
			
 
				+		/* cleanup ME */
			
 
				+		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
			
 
				+			while (!list_empty(&mhash[j])) {
			
 
				+				me = list_entry(mhash[j].next,
			
 
				+						    lnet_me_t, me_list);
			
 
				+				CERROR("Active ME %p on exit\n", me);
			
 
				+				list_del(&me->me_list);
			
 
				+				lnet_me_free(me);
			
 
				+			}
			
 
				+		}
			
 
				+		/* the extra entry is for MEs with ignore bits */
			
 
				+		LIBCFS_FREE(mhash, sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_free(ptl->ptl_mtables);
			
 
				+	ptl->ptl_mtables = NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ptl_setup(struct lnet_portal *ptl, int index)
			
 
				+{
			
 
				+	struct lnet_match_table	*mtable;
			
 
				+	struct list_head		*mhash;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					    sizeof(struct lnet_match_table));
			
 
				+	if (ptl->ptl_mtables == NULL) {
			
 
				+		CERROR("Failed to create match table for portal %d\n", index);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	ptl->ptl_index = index;
			
 
				+	INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
			
 
				+	INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
			
 
				+	spin_lock_init(&ptl->ptl_lock);
			
 
				+	cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
			
 
				+		/* the extra entry is for MEs with ignore bits */
			
 
				+		LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i,
			
 
				+				 sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
			
 
				+		if (mhash == NULL) {
			
 
				+			CERROR("Failed to create match hash for portal %d\n",
			
 
				+			       index);
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		memset(&mtable->mt_exhausted[0], -1,
			
 
				+		       sizeof(mtable->mt_exhausted[0]) *
			
 
				+		       LNET_MT_EXHAUSTED_BMAP);
			
 
				+		mtable->mt_mhash = mhash;
			
 
				+		for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
			
 
				+			INIT_LIST_HEAD(&mhash[j]);
			
 
				+
			
 
				+		mtable->mt_portal = index;
			
 
				+		mtable->mt_cpt = i;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+ failed:
			
 
				+	lnet_ptl_cleanup(ptl);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_portals_destroy(void)
			
 
				+{
			
 
				+	int	i;
			
 
				+
			
 
				+	if (the_lnet.ln_portals == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < the_lnet.ln_nportals; i++)
			
 
				+		lnet_ptl_cleanup(the_lnet.ln_portals[i]);
			
 
				+
			
 
				+	cfs_array_free(the_lnet.ln_portals);
			
 
				+	the_lnet.ln_portals = NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_portals_create(void)
			
 
				+{
			
 
				+	int	size;
			
 
				+	int	i;
			
 
				+
			
 
				+	size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
			
 
				+
			
 
				+	the_lnet.ln_nportals = MAX_PORTALS;
			
 
				+	the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size);
			
 
				+	if (the_lnet.ln_portals == NULL) {
			
 
				+		CERROR("Failed to allocate portals table\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < the_lnet.ln_nportals; i++) {
			
 
				+		if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
			
 
				+			lnet_portals_destroy();
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Turn on the lazy portal attribute. Use with caution!
			
 
				+ *
			
 
				+ * This portal attribute only affects incoming PUT requests to the portal,
			
 
				+ * and is off by default. By default, if there's no matching MD for an
			
 
				+ * incoming PUT request, it is simply dropped. With the lazy attribute on,
			
 
				+ * such requests are queued indefinitely until either a matching MD is
			
 
				+ * posted to the portal or the lazy attribute is turned off.
			
 
				+ *
			
 
				+ * It would prevent dropped requests, however it should be regarded as the
			
 
				+ * last line of defense - i.e. users must keep a close watch on active
			
 
				+ * buffers on a lazy portal and once it becomes too low post more buffers as
			
 
				+ * soon as possible. This is because delayed requests usually have detrimental
			
 
				+ * effects on underlying network connections. A few delayed requests often
			
 
				+ * suffice to bring an underlying connection to a complete halt, due to flow
			
 
				+ * control mechanisms.
			
 
				+ *
			
 
				+ * There's also a DOS attack risk. If users don't post match-all MDs on a
			
 
				+ * lazy portal, a malicious peer can easily stop a service by sending some
			
 
				+ * PUT requests with match bits that won't match any MD. A routed server is
			
 
				+ * especially vulnerable since the connections to its neighbor routers are
			
 
				+ * shared among all clients.
			
 
				+ *
			
 
				+ * \param portal Index of the portal to enable the lazy attribute on.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If \a portal is not a valid index.
			
 
				+ */
			
 
				+int
			
 
				+LNetSetLazyPortal(int portal)
			
 
				+{
			
 
				+	struct lnet_portal *ptl;
			
 
				+
			
 
				+	if (portal < 0 || portal >= the_lnet.ln_nportals)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
			
 
				+	ptl = the_lnet.ln_portals[portal];
			
 
				+
			
 
				+	lnet_res_lock(LNET_LOCK_EX);
			
 
				+	lnet_ptl_lock(ptl);
			
 
				+
			
 
				+	lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
			
 
				+
			
 
				+	lnet_ptl_unlock(ptl);
			
 
				+	lnet_res_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetSetLazyPortal);
			
 
				+
			
 
				+/**
			
 
				+ * Turn off the lazy portal attribute. Delayed requests on the portal,
			
 
				+ * if any, will be all dropped when this function returns.
			
 
				+ *
			
 
				+ * \param portal Index of the portal to disable the lazy attribute on.
			
 
				+ *
			
 
				+ * \retval 0       On success.
			
 
				+ * \retval -EINVAL If \a portal is not a valid index.
			
 
				+ */
			
 
				+int
			
 
				+LNetClearLazyPortal(int portal)
			
 
				+{
			
 
				+	struct lnet_portal	*ptl;
			
 
				+	LIST_HEAD		(zombies);
			
 
				+
			
 
				+	if (portal < 0 || portal >= the_lnet.ln_nportals)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	ptl = the_lnet.ln_portals[portal];
			
 
				+
			
 
				+	lnet_res_lock(LNET_LOCK_EX);
			
 
				+	lnet_ptl_lock(ptl);
			
 
				+
			
 
				+	if (!lnet_ptl_is_lazy(ptl)) {
			
 
				+		lnet_ptl_unlock(ptl);
			
 
				+		lnet_res_unlock(LNET_LOCK_EX);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_shutdown)
			
 
				+		CWARN("Active lazy portal %d on exit\n", portal);
			
 
				+	else
			
 
				+		CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
			
 
				+
			
 
				+	/* grab all the blocked messages atomically */
			
 
				+	list_splice_init(&ptl->ptl_msg_delayed, &zombies);
			
 
				+
			
 
				+	lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
			
 
				+
			
 
				+	lnet_ptl_unlock(ptl);
			
 
				+	lnet_res_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(LNetClearLazyPortal);
			
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -0,0 +1,120 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+int
			
 
				+lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
			
 
				+{
			
 
				+	LASSERT (!lntmsg->msg_routing);
			
 
				+	LASSERT (!lntmsg->msg_target_is_router);
			
 
				+
			
 
				+	return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
			
 
				+	    int delayed, unsigned int niov,
			
 
				+	    struct iovec *iov, lnet_kiov_t *kiov,
			
 
				+	    unsigned int offset, unsigned int mlen, unsigned int rlen)
			
 
				+{
			
 
				+	lnet_msg_t *sendmsg = private;
			
 
				+
			
 
				+	if (lntmsg != NULL) {		   /* not discarding */
			
 
				+		if (sendmsg->msg_iov != NULL) {
			
 
				+			if (iov != NULL)
			
 
				+				lnet_copy_iov2iov(niov, iov, offset,
			
 
				+						  sendmsg->msg_niov,
			
 
				+						  sendmsg->msg_iov,
			
 
				+						  sendmsg->msg_offset, mlen);
			
 
				+			else
			
 
				+				lnet_copy_iov2kiov(niov, kiov, offset,
			
 
				+						   sendmsg->msg_niov,
			
 
				+						   sendmsg->msg_iov,
			
 
				+						   sendmsg->msg_offset, mlen);
			
 
				+		} else {
			
 
				+			if (iov != NULL)
			
 
				+				lnet_copy_kiov2iov(niov, iov, offset,
			
 
				+						   sendmsg->msg_niov,
			
 
				+						   sendmsg->msg_kiov,
			
 
				+						   sendmsg->msg_offset, mlen);
			
 
				+			else
			
 
				+				lnet_copy_kiov2kiov(niov, kiov, offset,
			
 
				+						    sendmsg->msg_niov,
			
 
				+						    sendmsg->msg_kiov,
			
 
				+						    sendmsg->msg_offset, mlen);
			
 
				+		}
			
 
				+
			
 
				+		lnet_finalize(ni, lntmsg, 0);
			
 
				+	}
			
 
				+
			
 
				+	lnet_finalize(ni, sendmsg, 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int lolnd_instanced;
			
 
				+
			
 
				+void
			
 
				+lolnd_shutdown(lnet_ni_t *ni)
			
 
				+{
			
 
				+	CDEBUG (D_NET, "shutdown\n");
			
 
				+	LASSERT (lolnd_instanced);
			
 
				+
			
 
				+	lolnd_instanced = 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lolnd_startup (lnet_ni_t *ni)
			
 
				+{
			
 
				+	LASSERT (ni->ni_lnd == &the_lolnd);
			
 
				+	LASSERT (!lolnd_instanced);
			
 
				+	lolnd_instanced = 1;
			
 
				+
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+lnd_t the_lolnd = {
			
 
				+	/* .lnd_list       = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
			
 
				+	/* .lnd_refcount   = */ 0,
			
 
				+	/* .lnd_type       = */ LOLND,
			
 
				+	/* .lnd_startup    = */ lolnd_startup,
			
 
				+	/* .lnd_shutdown   = */ lolnd_shutdown,
			
 
				+	/* .lnt_ctl	= */ NULL,
			
 
				+	/* .lnd_send       = */ lolnd_send,
			
 
				+	/* .lnd_recv       = */ lolnd_recv,
			
 
				+	/* .lnd_eager_recv = */ NULL,
			
 
				+	/* .lnd_notify     = */ NULL,
			
 
				+	/* .lnd_accept     = */ NULL
			
 
				+};
			
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ b/drivers/staging/lustre/lnet/lnet/module.c
@@ -0,0 +1,154 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+static int config_on_load = 0;
			
 
				+CFS_MODULE_PARM(config_on_load, "i", int, 0444,
			
 
				+		"configure network at module load");
			
 
				+
			
 
				+static struct mutex lnet_config_mutex;
			
 
				+
			
 
				+int
			
 
				+lnet_configure (void *arg)
			
 
				+{
			
 
				+	/* 'arg' only there so I can be passed to cfs_create_thread() */
			
 
				+	int    rc = 0;
			
 
				+
			
 
				+	LNET_MUTEX_LOCK(&lnet_config_mutex);
			
 
				+
			
 
				+	if (!the_lnet.ln_niinit_self) {
			
 
				+		rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
			
 
				+		if (rc >= 0) {
			
 
				+			the_lnet.ln_niinit_self = 1;
			
 
				+			rc = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	LNET_MUTEX_UNLOCK(&lnet_config_mutex);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_unconfigure (void)
			
 
				+{
			
 
				+	int   refcount;
			
 
				+
			
 
				+	LNET_MUTEX_LOCK(&lnet_config_mutex);
			
 
				+
			
 
				+	if (the_lnet.ln_niinit_self) {
			
 
				+		the_lnet.ln_niinit_self = 0;
			
 
				+		LNetNIFini();
			
 
				+	}
			
 
				+
			
 
				+	LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
			
 
				+	refcount = the_lnet.ln_refcount;
			
 
				+	LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
			
 
				+
			
 
				+	LNET_MUTEX_UNLOCK(&lnet_config_mutex);
			
 
				+	return (refcount == 0) ? 0 : -EBUSY;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data)
			
 
				+{
			
 
				+	int   rc;
			
 
				+
			
 
				+	switch (cmd) {
			
 
				+	case IOC_LIBCFS_CONFIGURE:
			
 
				+		return lnet_configure(NULL);
			
 
				+
			
 
				+	case IOC_LIBCFS_UNCONFIGURE:
			
 
				+		return lnet_unconfigure();
			
 
				+
			
 
				+	default:
			
 
				+		/* Passing LNET_PID_ANY only gives me a ref if the net is up
			
 
				+		 * already; I'll need it to ensure the net can't go down while
			
 
				+		 * I'm called into it */
			
 
				+		rc = LNetNIInit(LNET_PID_ANY);
			
 
				+		if (rc >= 0) {
			
 
				+			rc = LNetCtl(cmd, data);
			
 
				+			LNetNIFini();
			
 
				+		}
			
 
				+		return rc;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl);
			
 
				+
			
 
				+int
			
 
				+init_lnet(void)
			
 
				+{
			
 
				+	int		  rc;
			
 
				+	ENTRY;
			
 
				+
			
 
				+	mutex_init(&lnet_config_mutex);
			
 
				+
			
 
				+	rc = LNetInit();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("LNetInit: error %d\n", rc);
			
 
				+		RETURN(rc);
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_register_ioctl(&lnet_ioctl_handler);
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	if (config_on_load) {
			
 
				+		/* Have to schedule a separate thread to avoid deadlocking
			
 
				+		 * in modload */
			
 
				+		(void) kthread_run(lnet_configure, NULL, "lnet_initd");
			
 
				+	}
			
 
				+
			
 
				+	RETURN(0);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+fini_lnet(void)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = libcfs_deregister_ioctl(&lnet_ioctl_handler);
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	LNetFini();
			
 
				+}
			
 
				+
			
 
				+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
			
 
				+MODULE_DESCRIPTION("Portals v3.1");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				+cfs_module(lnet, "1.0.0", init_lnet, fini_lnet);
			
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -0,0 +1,337 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/lnet/peer.c
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+int
			
 
				+lnet_peer_tables_create(void)
			
 
				+{
			
 
				+	struct lnet_peer_table	*ptable;
			
 
				+	struct list_head		*hash;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+						   sizeof(*ptable));
			
 
				+	if (the_lnet.ln_peer_tables == NULL) {
			
 
				+		CERROR("Failed to allocate cpu-partition peer tables\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				+		INIT_LIST_HEAD(&ptable->pt_deathrow);
			
 
				+
			
 
				+		LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
			
 
				+				 LNET_PEER_HASH_SIZE * sizeof(*hash));
			
 
				+		if (hash == NULL) {
			
 
				+			CERROR("Failed to create peer hash table\n");
			
 
				+			lnet_peer_tables_destroy();
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
			
 
				+			INIT_LIST_HEAD(&hash[j]);
			
 
				+		ptable->pt_hash = hash; /* sign of initialization */
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_peer_tables_destroy(void)
			
 
				+{
			
 
				+	struct lnet_peer_table	*ptable;
			
 
				+	struct list_head		*hash;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	if (the_lnet.ln_peer_tables == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				+		hash = ptable->pt_hash;
			
 
				+		if (hash == NULL) /* not intialized */
			
 
				+			break;
			
 
				+
			
 
				+		LASSERT(list_empty(&ptable->pt_deathrow));
			
 
				+
			
 
				+		ptable->pt_hash = NULL;
			
 
				+		for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
			
 
				+			LASSERT(list_empty(&hash[j]));
			
 
				+
			
 
				+		LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_free(the_lnet.ln_peer_tables);
			
 
				+	the_lnet.ln_peer_tables = NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_peer_tables_cleanup(void)
			
 
				+{
			
 
				+	struct lnet_peer_table	*ptable;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_shutdown);	/* i.e. no new peers */
			
 
				+
			
 
				+	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				+		lnet_net_lock(i);
			
 
				+
			
 
				+		for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
			
 
				+			struct list_head *peers = &ptable->pt_hash[j];
			
 
				+
			
 
				+			while (!list_empty(peers)) {
			
 
				+				lnet_peer_t *lp = list_entry(peers->next,
			
 
				+								 lnet_peer_t,
			
 
				+								 lp_hashlist);
			
 
				+				list_del_init(&lp->lp_hashlist);
			
 
				+				/* lose hash table's ref */
			
 
				+				lnet_peer_decref_locked(lp);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(i);
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
			
 
				+		LIST_HEAD	(deathrow);
			
 
				+		lnet_peer_t	*lp;
			
 
				+
			
 
				+		lnet_net_lock(i);
			
 
				+
			
 
				+		for (j = 3; ptable->pt_number != 0; j++) {
			
 
				+			lnet_net_unlock(i);
			
 
				+
			
 
				+			if ((j & (j - 1)) == 0) {
			
 
				+				CDEBUG(D_WARNING,
			
 
				+				       "Waiting for %d peers on peer table\n",
			
 
				+				       ptable->pt_number);
			
 
				+			}
			
 
				+			cfs_pause(cfs_time_seconds(1) / 2);
			
 
				+			lnet_net_lock(i);
			
 
				+		}
			
 
				+		list_splice_init(&ptable->pt_deathrow, &deathrow);
			
 
				+
			
 
				+		lnet_net_unlock(i);
			
 
				+
			
 
				+		while (!list_empty(&deathrow)) {
			
 
				+			lp = list_entry(deathrow.next,
			
 
				+					    lnet_peer_t, lp_hashlist);
			
 
				+			list_del(&lp->lp_hashlist);
			
 
				+			LIBCFS_FREE(lp, sizeof(*lp));
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_destroy_peer_locked(lnet_peer_t *lp)
			
 
				+{
			
 
				+	struct lnet_peer_table *ptable;
			
 
				+
			
 
				+	LASSERT(lp->lp_refcount == 0);
			
 
				+	LASSERT(lp->lp_rtr_refcount == 0);
			
 
				+	LASSERT(list_empty(&lp->lp_txq));
			
 
				+	LASSERT(list_empty(&lp->lp_hashlist));
			
 
				+	LASSERT(lp->lp_txqnob == 0);
			
 
				+
			
 
				+	ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
			
 
				+	LASSERT(ptable->pt_number > 0);
			
 
				+	ptable->pt_number--;
			
 
				+
			
 
				+	lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
			
 
				+	lp->lp_ni = NULL;
			
 
				+
			
 
				+	list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
			
 
				+}
			
 
				+
			
 
				+lnet_peer_t *
			
 
				+lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
			
 
				+{
			
 
				+	struct list_head	*peers;
			
 
				+	lnet_peer_t	*lp;
			
 
				+
			
 
				+	LASSERT(!the_lnet.ln_shutdown);
			
 
				+
			
 
				+	peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
			
 
				+	list_for_each_entry(lp, peers, lp_hashlist) {
			
 
				+		if (lp->lp_nid == nid) {
			
 
				+			lnet_peer_addref_locked(lp);
			
 
				+			return lp;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
			
 
				+{
			
 
				+	struct lnet_peer_table	*ptable;
			
 
				+	lnet_peer_t		*lp = NULL;
			
 
				+	lnet_peer_t		*lp2;
			
 
				+	int			cpt2;
			
 
				+	int			rc = 0;
			
 
				+
			
 
				+	*lpp = NULL;
			
 
				+	if (the_lnet.ln_shutdown) /* it's shutting down */
			
 
				+		return -ESHUTDOWN;
			
 
				+
			
 
				+	/* cpt can be LNET_LOCK_EX if it's called from router functions */
			
 
				+	cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
			
 
				+
			
 
				+	ptable = the_lnet.ln_peer_tables[cpt2];
			
 
				+	lp = lnet_find_peer_locked(ptable, nid);
			
 
				+	if (lp != NULL) {
			
 
				+		*lpp = lp;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (!list_empty(&ptable->pt_deathrow)) {
			
 
				+		lp = list_entry(ptable->pt_deathrow.next,
			
 
				+				    lnet_peer_t, lp_hashlist);
			
 
				+		list_del(&lp->lp_hashlist);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * take extra refcount in case another thread has shutdown LNet
			
 
				+	 * and destroyed locks and peer-table before I finish the allocation
			
 
				+	 */
			
 
				+	ptable->pt_number++;
			
 
				+	lnet_net_unlock(cpt);
			
 
				+
			
 
				+	if (lp != NULL)
			
 
				+		memset(lp, 0, sizeof(*lp));
			
 
				+	else
			
 
				+		LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
			
 
				+
			
 
				+	if (lp == NULL) {
			
 
				+		rc = -ENOMEM;
			
 
				+		lnet_net_lock(cpt);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	INIT_LIST_HEAD(&lp->lp_txq);
			
 
				+	INIT_LIST_HEAD(&lp->lp_rtrq);
			
 
				+	INIT_LIST_HEAD(&lp->lp_routes);
			
 
				+
			
 
				+	lp->lp_notify = 0;
			
 
				+	lp->lp_notifylnd = 0;
			
 
				+	lp->lp_notifying = 0;
			
 
				+	lp->lp_alive_count = 0;
			
 
				+	lp->lp_timestamp = 0;
			
 
				+	lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
			
 
				+	lp->lp_last_alive = cfs_time_current(); /* assumes alive */
			
 
				+	lp->lp_last_query = 0; /* haven't asked NI yet */
			
 
				+	lp->lp_ping_timestamp = 0;
			
 
				+	lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				+	lp->lp_nid = nid;
			
 
				+	lp->lp_cpt = cpt2;
			
 
				+	lp->lp_refcount = 2;	/* 1 for caller; 1 for hash */
			
 
				+	lp->lp_rtr_refcount = 0;
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+
			
 
				+	if (the_lnet.ln_shutdown) {
			
 
				+		rc = -ESHUTDOWN;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lp2 = lnet_find_peer_locked(ptable, nid);
			
 
				+	if (lp2 != NULL) {
			
 
				+		*lpp = lp2;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
			
 
				+	if (lp->lp_ni == NULL) {
			
 
				+		rc = -EHOSTUNREACH;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lp->lp_txcredits    =
			
 
				+	lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
			
 
				+	lp->lp_rtrcredits    =
			
 
				+	lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
			
 
				+
			
 
				+	list_add_tail(&lp->lp_hashlist,
			
 
				+			  &ptable->pt_hash[lnet_nid2peerhash(nid)]);
			
 
				+	ptable->pt_version++;
			
 
				+	*lpp = lp;
			
 
				+
			
 
				+	return 0;
			
 
				+out:
			
 
				+	if (lp != NULL)
			
 
				+		list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
			
 
				+	ptable->pt_number--;
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_debug_peer(lnet_nid_t nid)
			
 
				+{
			
 
				+	char		*aliveness = "NA";
			
 
				+	lnet_peer_t	*lp;
			
 
				+	int		rc;
			
 
				+	int		cpt;
			
 
				+
			
 
				+	cpt = lnet_cpt_of_nid(nid);
			
 
				+	lnet_net_lock(cpt);
			
 
				+
			
 
				+	rc = lnet_nid2peer_locked(&lp, nid, cpt);
			
 
				+	if (rc != 0) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
			
 
				+		aliveness = lp->lp_alive ? "up" : "down";
			
 
				+
			
 
				+	CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
			
 
				+	       libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
			
 
				+	       aliveness, lp->lp_ni->ni_peertxcredits,
			
 
				+	       lp->lp_rtrcredits, lp->lp_minrtrcredits,
			
 
				+	       lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
			
 
				+
			
 
				+	lnet_peer_decref_locked(lp);
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -0,0 +1,1693 @@
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   This file is part of Portals
			
 
				+ *   http://sourceforge.net/projects/sandiaportals/
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+#if  defined(LNET_ROUTER)
			
 
				+
			
 
				+#define LNET_NRB_TINY_MIN	512	/* min value for each CPT */
			
 
				+#define LNET_NRB_TINY		(LNET_NRB_TINY_MIN * 4)
			
 
				+#define LNET_NRB_SMALL_MIN	4096	/* min value for each CPT */
			
 
				+#define LNET_NRB_SMALL		(LNET_NRB_SMALL_MIN * 4)
			
 
				+#define LNET_NRB_LARGE_MIN	256	/* min value for each CPT */
			
 
				+#define LNET_NRB_LARGE		(LNET_NRB_LARGE_MIN * 4)
			
 
				+
			
 
				+static char *forwarding = "";
			
 
				+CFS_MODULE_PARM(forwarding, "s", charp, 0444,
			
 
				+		"Explicitly enable/disable forwarding between networks");
			
 
				+
			
 
				+static int tiny_router_buffers;
			
 
				+CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444,
			
 
				+		"# of 0 payload messages to buffer in the router");
			
 
				+static int small_router_buffers;
			
 
				+CFS_MODULE_PARM(small_router_buffers, "i", int, 0444,
			
 
				+		"# of small (1 page) messages to buffer in the router");
			
 
				+static int large_router_buffers;
			
 
				+CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
			
 
				+		"# of large messages to buffer in the router");
			
 
				+static int peer_buffer_credits = 0;
			
 
				+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
			
 
				+		"# router buffer credits per peer");
			
 
				+
			
 
				+static int auto_down = 1;
			
 
				+CFS_MODULE_PARM(auto_down, "i", int, 0444,
			
 
				+		"Automatically mark peers down on comms error");
			
 
				+
			
 
				+int
			
 
				+lnet_peer_buffer_credits(lnet_ni_t *ni)
			
 
				+{
			
 
				+	/* NI option overrides LNet default */
			
 
				+	if (ni->ni_peerrtrcredits > 0)
			
 
				+		return ni->ni_peerrtrcredits;
			
 
				+	if (peer_buffer_credits > 0)
			
 
				+		return peer_buffer_credits;
			
 
				+
			
 
				+	/* As an approximation, allow this peer the same number of router
			
 
				+	 * buffers as it is allowed outstanding sends */
			
 
				+	return ni->ni_peertxcredits;
			
 
				+}
			
 
				+
			
 
				+/* forward ref's */
			
 
				+static int lnet_router_checker(void *);
			
 
				+#else
			
 
				+
			
 
				+int
			
 
				+lnet_peer_buffer_credits(lnet_ni_t *ni)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+static int check_routers_before_use = 0;
			
 
				+CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444,
			
 
				+		"Assume routers are down and ping them before use");
			
 
				+
			
 
				+static int avoid_asym_router_failure = 1;
			
 
				+CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644,
			
 
				+		"Avoid asymmetrical router failures (0 to disable)");
			
 
				+
			
 
				+static int dead_router_check_interval = 60;
			
 
				+CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0644,
			
 
				+		"Seconds between dead router health checks (<= 0 to disable)");
			
 
				+
			
 
				+static int live_router_check_interval = 60;
			
 
				+CFS_MODULE_PARM(live_router_check_interval, "i", int, 0644,
			
 
				+		"Seconds between live router health checks (<= 0 to disable)");
			
 
				+
			
 
				+static int router_ping_timeout = 50;
			
 
				+CFS_MODULE_PARM(router_ping_timeout, "i", int, 0644,
			
 
				+		"Seconds to wait for the reply to a router health query");
			
 
				+
			
 
				+int
			
 
				+lnet_peers_start_down(void)
			
 
				+{
			
 
				+	return check_routers_before_use;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when)
			
 
				+{
			
 
				+	if (cfs_time_before(when, lp->lp_timestamp)) { /* out of date information */
			
 
				+		CDEBUG(D_NET, "Out of date\n");
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	lp->lp_timestamp = when;		/* update timestamp */
			
 
				+	lp->lp_ping_deadline = 0;	       /* disable ping timeout */
			
 
				+
			
 
				+	if (lp->lp_alive_count != 0 &&	  /* got old news */
			
 
				+	    (!lp->lp_alive) == (!alive)) {      /* new date for old news */
			
 
				+		CDEBUG(D_NET, "Old news\n");
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* Flag that notification is outstanding */
			
 
				+
			
 
				+	lp->lp_alive_count++;
			
 
				+	lp->lp_alive = !(!alive);	       /* 1 bit! */
			
 
				+	lp->lp_notify = 1;
			
 
				+	lp->lp_notifylnd |= notifylnd;
			
 
				+	if (lp->lp_alive)
			
 
				+		lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
			
 
				+
			
 
				+	CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp)
			
 
				+{
			
 
				+	int	alive;
			
 
				+	int	notifylnd;
			
 
				+
			
 
				+	/* Notify only in 1 thread at any time to ensure ordered notification.
			
 
				+	 * NB individual events can be missed; the only guarantee is that you
			
 
				+	 * always get the most recent news */
			
 
				+
			
 
				+	if (lp->lp_notifying)
			
 
				+		return;
			
 
				+
			
 
				+	lp->lp_notifying = 1;
			
 
				+
			
 
				+	while (lp->lp_notify) {
			
 
				+		alive     = lp->lp_alive;
			
 
				+		notifylnd = lp->lp_notifylnd;
			
 
				+
			
 
				+		lp->lp_notifylnd = 0;
			
 
				+		lp->lp_notify    = 0;
			
 
				+
			
 
				+		if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
			
 
				+			lnet_net_unlock(lp->lp_cpt);
			
 
				+
			
 
				+			/* A new notification could happen now; I'll handle it
			
 
				+			 * when control returns to me */
			
 
				+
			
 
				+			(ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
			
 
				+
			
 
				+			lnet_net_lock(lp->lp_cpt);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	lp->lp_notifying = 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+lnet_rtr_addref_locked(lnet_peer_t *lp)
			
 
				+{
			
 
				+	LASSERT(lp->lp_refcount > 0);
			
 
				+	LASSERT(lp->lp_rtr_refcount >= 0);
			
 
				+
			
 
				+	/* lnet_net_lock must be exclusively locked */
			
 
				+	lp->lp_rtr_refcount++;
			
 
				+	if (lp->lp_rtr_refcount == 1) {
			
 
				+		struct list_head *pos;
			
 
				+
			
 
				+		/* a simple insertion sort */
			
 
				+		list_for_each_prev(pos, &the_lnet.ln_routers) {
			
 
				+			lnet_peer_t *rtr = list_entry(pos, lnet_peer_t,
			
 
				+							  lp_rtr_list);
			
 
				+
			
 
				+			if (rtr->lp_nid < lp->lp_nid)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		list_add(&lp->lp_rtr_list, pos);
			
 
				+		/* addref for the_lnet.ln_routers */
			
 
				+		lnet_peer_addref_locked(lp);
			
 
				+		the_lnet.ln_routers_version++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_rtr_decref_locked(lnet_peer_t *lp)
			
 
				+{
			
 
				+	LASSERT(lp->lp_refcount > 0);
			
 
				+	LASSERT(lp->lp_rtr_refcount > 0);
			
 
				+
			
 
				+	/* lnet_net_lock must be exclusively locked */
			
 
				+	lp->lp_rtr_refcount--;
			
 
				+	if (lp->lp_rtr_refcount == 0) {
			
 
				+		LASSERT(list_empty(&lp->lp_routes));
			
 
				+
			
 
				+		if (lp->lp_rcd != NULL) {
			
 
				+			list_add(&lp->lp_rcd->rcd_list,
			
 
				+				     &the_lnet.ln_rcd_deathrow);
			
 
				+			lp->lp_rcd = NULL;
			
 
				+		}
			
 
				+
			
 
				+		list_del(&lp->lp_rtr_list);
			
 
				+		/* decref for the_lnet.ln_routers */
			
 
				+		lnet_peer_decref_locked(lp);
			
 
				+		the_lnet.ln_routers_version++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+lnet_remotenet_t *
			
 
				+lnet_find_net_locked (__u32 net)
			
 
				+{
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	struct list_head		*tmp;
			
 
				+	struct list_head		*rn_list;
			
 
				+
			
 
				+	LASSERT(!the_lnet.ln_shutdown);
			
 
				+
			
 
				+	rn_list = lnet_net2rnethash(net);
			
 
				+	list_for_each(tmp, rn_list) {
			
 
				+		rnet = list_entry(tmp, lnet_remotenet_t, lrn_list);
			
 
				+
			
 
				+		if (rnet->lrn_net == net)
			
 
				+			return rnet;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void lnet_shuffle_seed(void)
			
 
				+{
			
 
				+	static int seeded = 0;
			
 
				+	int lnd_type, seed[2];
			
 
				+	struct timeval tv;
			
 
				+	lnet_ni_t *ni;
			
 
				+	struct list_head *tmp;
			
 
				+
			
 
				+	if (seeded)
			
 
				+		return;
			
 
				+
			
 
				+	cfs_get_random_bytes(seed, sizeof(seed));
			
 
				+
			
 
				+	/* Nodes with small feet have little entropy
			
 
				+	 * the NID for this node gives the most entropy in the low bits */
			
 
				+	list_for_each(tmp, &the_lnet.ln_nis) {
			
 
				+		ni = list_entry(tmp, lnet_ni_t, ni_list);
			
 
				+		lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
			
 
				+
			
 
				+		if (lnd_type != LOLND)
			
 
				+			seed[0] ^= (LNET_NIDADDR(ni->ni_nid) | lnd_type);
			
 
				+	}
			
 
				+
			
 
				+	do_gettimeofday(&tv);
			
 
				+	cfs_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]);
			
 
				+	seeded = 1;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/* NB expects LNET_LOCK held */
			
 
				+void
			
 
				+lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route)
			
 
				+{
			
 
				+	unsigned int      len = 0;
			
 
				+	unsigned int      offset = 0;
			
 
				+	struct list_head       *e;
			
 
				+
			
 
				+	lnet_shuffle_seed();
			
 
				+
			
 
				+	list_for_each (e, &rnet->lrn_routes) {
			
 
				+		len++;
			
 
				+	}
			
 
				+
			
 
				+	/* len+1 positions to add a new entry, also prevents division by 0 */
			
 
				+	offset = cfs_rand() % (len + 1);
			
 
				+	list_for_each (e, &rnet->lrn_routes) {
			
 
				+		if (offset == 0)
			
 
				+			break;
			
 
				+		offset--;
			
 
				+	}
			
 
				+	list_add(&route->lr_list, e);
			
 
				+	list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
			
 
				+
			
 
				+	the_lnet.ln_remote_nets_version++;
			
 
				+	lnet_rtr_addref_locked(route->lr_gateway);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
			
 
				+{
			
 
				+	struct list_head	  *e;
			
 
				+	lnet_remotenet_t    *rnet;
			
 
				+	lnet_remotenet_t    *rnet2;
			
 
				+	lnet_route_t	*route;
			
 
				+	lnet_ni_t	   *ni;
			
 
				+	int		  add_route;
			
 
				+	int		  rc;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n",
			
 
				+	       libcfs_net2str(net), hops, libcfs_nid2str(gateway));
			
 
				+
			
 
				+	if (gateway == LNET_NID_ANY ||
			
 
				+	    LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
			
 
				+	    net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				+	    LNET_NETTYP(net) == LOLND ||
			
 
				+	    LNET_NIDNET(gateway) == net ||
			
 
				+	    hops < 1 || hops > 255)
			
 
				+		return (-EINVAL);
			
 
				+
			
 
				+	if (lnet_islocalnet(net))	       /* it's a local network */
			
 
				+		return 0;		       /* ignore the route entry */
			
 
				+
			
 
				+	/* Assume net, route, all new */
			
 
				+	LIBCFS_ALLOC(route, sizeof(*route));
			
 
				+	LIBCFS_ALLOC(rnet, sizeof(*rnet));
			
 
				+	if (route == NULL || rnet == NULL) {
			
 
				+		CERROR("Out of memory creating route %s %d %s\n",
			
 
				+		       libcfs_net2str(net), hops, libcfs_nid2str(gateway));
			
 
				+		if (route != NULL)
			
 
				+			LIBCFS_FREE(route, sizeof(*route));
			
 
				+		if (rnet != NULL)
			
 
				+			LIBCFS_FREE(rnet, sizeof(*rnet));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	INIT_LIST_HEAD(&rnet->lrn_routes);
			
 
				+	rnet->lrn_net = net;
			
 
				+	route->lr_hops = hops;
			
 
				+	route->lr_net = net;
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+
			
 
				+	rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
			
 
				+	if (rc != 0) {
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		LIBCFS_FREE(route, sizeof(*route));
			
 
				+		LIBCFS_FREE(rnet, sizeof(*rnet));
			
 
				+
			
 
				+		if (rc == -EHOSTUNREACH) { /* gateway is not on a local net */
			
 
				+			return 0;	/* ignore the route entry */
			
 
				+		} else {
			
 
				+			CERROR("Error %d creating route %s %d %s\n", rc,
			
 
				+			       libcfs_net2str(net), hops,
			
 
				+			       libcfs_nid2str(gateway));
			
 
				+		}
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (!the_lnet.ln_shutdown);
			
 
				+
			
 
				+	rnet2 = lnet_find_net_locked(net);
			
 
				+	if (rnet2 == NULL) {
			
 
				+		/* new network */
			
 
				+		list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
			
 
				+		rnet2 = rnet;
			
 
				+	}
			
 
				+
			
 
				+	/* Search for a duplicate route (it's a NOOP if it is) */
			
 
				+	add_route = 1;
			
 
				+	list_for_each (e, &rnet2->lrn_routes) {
			
 
				+		lnet_route_t *route2 = list_entry(e, lnet_route_t, lr_list);
			
 
				+
			
 
				+		if (route2->lr_gateway == route->lr_gateway) {
			
 
				+			add_route = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* our lookups must be true */
			
 
				+		LASSERT (route2->lr_gateway->lp_nid != gateway);
			
 
				+	}
			
 
				+
			
 
				+	if (add_route) {
			
 
				+		lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
			
 
				+		lnet_add_route_to_rnet(rnet2, route);
			
 
				+
			
 
				+		ni = route->lr_gateway->lp_ni;
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		/* XXX Assume alive */
			
 
				+		if (ni->ni_lnd->lnd_notify != NULL)
			
 
				+			(ni->ni_lnd->lnd_notify)(ni, gateway, 1);
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+	}
			
 
				+
			
 
				+	/* -1 for notify or !add_route */
			
 
				+	lnet_peer_decref_locked(route->lr_gateway);
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	if (!add_route)
			
 
				+		LIBCFS_FREE(route, sizeof(*route));
			
 
				+
			
 
				+	if (rnet != rnet2)
			
 
				+		LIBCFS_FREE(rnet, sizeof(*rnet));
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_check_routes(void)
			
 
				+{
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	lnet_route_t		*route;
			
 
				+	lnet_route_t		*route2;
			
 
				+	struct list_head		*e1;
			
 
				+	struct list_head		*e2;
			
 
				+	int			cpt;
			
 
				+	struct list_head		*rn_list;
			
 
				+	int			i;
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
			
 
				+		rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				+		list_for_each(e1, rn_list) {
			
 
				+			rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
			
 
				+
			
 
				+			route2 = NULL;
			
 
				+			list_for_each(e2, &rnet->lrn_routes) {
			
 
				+				lnet_nid_t	nid1;
			
 
				+				lnet_nid_t	nid2;
			
 
				+				int		net;
			
 
				+
			
 
				+				route = list_entry(e2, lnet_route_t,
			
 
				+						       lr_list);
			
 
				+
			
 
				+				if (route2 == NULL) {
			
 
				+					route2 = route;
			
 
				+					continue;
			
 
				+				}
			
 
				+
			
 
				+				if (route->lr_gateway->lp_ni ==
			
 
				+				    route2->lr_gateway->lp_ni)
			
 
				+					continue;
			
 
				+
			
 
				+				nid1 = route->lr_gateway->lp_nid;
			
 
				+				nid2 = route2->lr_gateway->lp_nid;
			
 
				+				net = rnet->lrn_net;
			
 
				+
			
 
				+				lnet_net_unlock(cpt);
			
 
				+
			
 
				+				CERROR("Routes to %s via %s and %s not "
			
 
				+				       "supported\n",
			
 
				+				       libcfs_net2str(net),
			
 
				+				       libcfs_nid2str(nid1),
			
 
				+				       libcfs_nid2str(nid2));
			
 
				+				return -EINVAL;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_del_route(__u32 net, lnet_nid_t gw_nid)
			
 
				+{
			
 
				+	struct lnet_peer	*gateway;
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	lnet_route_t		*route;
			
 
				+	struct list_head		*e1;
			
 
				+	struct list_head		*e2;
			
 
				+	int			rc = -ENOENT;
			
 
				+	struct list_head		*rn_list;
			
 
				+	int			idx = 0;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Del route: net %s : gw %s\n",
			
 
				+	       libcfs_net2str(net), libcfs_nid2str(gw_nid));
			
 
				+
			
 
				+	/* NB Caller may specify either all routes via the given gateway
			
 
				+	 * or a specific route entry actual NIDs) */
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+	if (net == LNET_NIDNET(LNET_NID_ANY))
			
 
				+		rn_list = &the_lnet.ln_remote_nets_hash[0];
			
 
				+	else
			
 
				+		rn_list = lnet_net2rnethash(net);
			
 
				+
			
 
				+ again:
			
 
				+	list_for_each(e1, rn_list) {
			
 
				+		rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
			
 
				+
			
 
				+		if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
			
 
				+			net == rnet->lrn_net))
			
 
				+			continue;
			
 
				+
			
 
				+		list_for_each(e2, &rnet->lrn_routes) {
			
 
				+			route = list_entry(e2, lnet_route_t, lr_list);
			
 
				+
			
 
				+			gateway = route->lr_gateway;
			
 
				+			if (!(gw_nid == LNET_NID_ANY ||
			
 
				+			      gw_nid == gateway->lp_nid))
			
 
				+				continue;
			
 
				+
			
 
				+			list_del(&route->lr_list);
			
 
				+			list_del(&route->lr_gwlist);
			
 
				+			the_lnet.ln_remote_nets_version++;
			
 
				+
			
 
				+			if (list_empty(&rnet->lrn_routes))
			
 
				+				list_del(&rnet->lrn_list);
			
 
				+			else
			
 
				+				rnet = NULL;
			
 
				+
			
 
				+			lnet_rtr_decref_locked(gateway);
			
 
				+			lnet_peer_decref_locked(gateway);
			
 
				+
			
 
				+			lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+			LIBCFS_FREE(route, sizeof(*route));
			
 
				+
			
 
				+			if (rnet != NULL)
			
 
				+				LIBCFS_FREE(rnet, sizeof(*rnet));
			
 
				+
			
 
				+			rc = 0;
			
 
				+			lnet_net_lock(LNET_LOCK_EX);
			
 
				+			goto again;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (net == LNET_NIDNET(LNET_NID_ANY) &&
			
 
				+	    ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
			
 
				+		rn_list = &the_lnet.ln_remote_nets_hash[idx];
			
 
				+		goto again;
			
 
				+	}
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_destroy_routes (void)
			
 
				+{
			
 
				+	lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_get_route(int idx, __u32 *net, __u32 *hops,
			
 
				+	       lnet_nid_t *gateway, __u32 *alive)
			
 
				+{
			
 
				+	struct list_head		*e1;
			
 
				+	struct list_head		*e2;
			
 
				+	lnet_remotenet_t	*rnet;
			
 
				+	lnet_route_t		*route;
			
 
				+	int			cpt;
			
 
				+	int			i;
			
 
				+	struct list_head		*rn_list;
			
 
				+
			
 
				+	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+	for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
			
 
				+		rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				+		list_for_each(e1, rn_list) {
			
 
				+			rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
			
 
				+
			
 
				+			list_for_each(e2, &rnet->lrn_routes) {
			
 
				+				route = list_entry(e2, lnet_route_t,
			
 
				+						       lr_list);
			
 
				+
			
 
				+				if (idx-- == 0) {
			
 
				+					*net     = rnet->lrn_net;
			
 
				+					*hops    = route->lr_hops;
			
 
				+					*gateway = route->lr_gateway->lp_nid;
			
 
				+					*alive   = route->lr_gateway->lp_alive;
			
 
				+					lnet_net_unlock(cpt);
			
 
				+					return 0;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_swap_pinginfo(lnet_ping_info_t *info)
			
 
				+{
			
 
				+	int	       i;
			
 
				+	lnet_ni_status_t *stat;
			
 
				+
			
 
				+	__swab32s(&info->pi_magic);
			
 
				+	__swab32s(&info->pi_features);
			
 
				+	__swab32s(&info->pi_pid);
			
 
				+	__swab32s(&info->pi_nnis);
			
 
				+	for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
			
 
				+		stat = &info->pi_ni[i];
			
 
				+		__swab64s(&stat->ns_nid);
			
 
				+		__swab32s(&stat->ns_status);
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * parse router-checker pinginfo, record number of down NIs for remote
			
 
				+ * networks on that router.
			
 
				+ */
			
 
				+static void
			
 
				+lnet_parse_rc_info(lnet_rc_data_t *rcd)
			
 
				+{
			
 
				+	lnet_ping_info_t	*info = rcd->rcd_pinginfo;
			
 
				+	struct lnet_peer	*gw   = rcd->rcd_gateway;
			
 
				+	lnet_route_t		*rtr;
			
 
				+
			
 
				+	if (!gw->lp_alive)
			
 
				+		return;
			
 
				+
			
 
				+	if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
			
 
				+		lnet_swap_pinginfo(info);
			
 
				+
			
 
				+	/* NB always racing with network! */
			
 
				+	if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
			
 
				+		CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
			
 
				+		       libcfs_nid2str(gw->lp_nid), info->pi_magic);
			
 
				+		gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	gw->lp_ping_feats = info->pi_features;
			
 
				+	if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) {
			
 
				+		CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
			
 
				+		       libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
			
 
				+		return; /* nothing I can understand */
			
 
				+	}
			
 
				+
			
 
				+	if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0)
			
 
				+		return; /* can't carry NI status info */
			
 
				+
			
 
				+	list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) {
			
 
				+		int	ptl_status = LNET_NI_STATUS_INVALID;
			
 
				+		int	down = 0;
			
 
				+		int	up = 0;
			
 
				+		int	i;
			
 
				+
			
 
				+		for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
			
 
				+			lnet_ni_status_t *stat = &info->pi_ni[i];
			
 
				+			lnet_nid_t	 nid = stat->ns_nid;
			
 
				+
			
 
				+			if (nid == LNET_NID_ANY) {
			
 
				+				CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
			
 
				+				       libcfs_nid2str(gw->lp_nid));
			
 
				+				gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
			
 
				+				continue;
			
 
				+
			
 
				+			if (stat->ns_status == LNET_NI_STATUS_DOWN) {
			
 
				+				if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND)
			
 
				+					down++;
			
 
				+				else if (ptl_status != LNET_NI_STATUS_UP)
			
 
				+					ptl_status = LNET_NI_STATUS_DOWN;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if (stat->ns_status == LNET_NI_STATUS_UP) {
			
 
				+				if (LNET_NIDNET(nid) == rtr->lr_net) {
			
 
				+					up = 1;
			
 
				+					break;
			
 
				+				}
			
 
				+				/* ptl NIs are considered down only when
			
 
				+				 * they're all down */
			
 
				+				if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND)
			
 
				+					ptl_status = LNET_NI_STATUS_UP;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
			
 
				+			       libcfs_nid2str(gw->lp_nid), stat->ns_status);
			
 
				+			gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		if (up) { /* ignore downed NIs if NI for dest network is up */
			
 
				+			rtr->lr_downis = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+		rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_router_checker_event(lnet_event_t *event)
			
 
				+{
			
 
				+	lnet_rc_data_t		*rcd = event->md.user_ptr;
			
 
				+	struct lnet_peer	*lp;
			
 
				+
			
 
				+	LASSERT(rcd != NULL);
			
 
				+
			
 
				+	if (event->unlinked) {
			
 
				+		LNetInvalidateHandle(&rcd->rcd_mdh);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(event->type == LNET_EVENT_SEND ||
			
 
				+		event->type == LNET_EVENT_REPLY);
			
 
				+
			
 
				+	lp = rcd->rcd_gateway;
			
 
				+	LASSERT(lp != NULL);
			
 
				+
			
 
				+	 /* NB: it's called with holding lnet_res_lock, we have a few
			
 
				+	  * places need to hold both locks at the same time, please take
			
 
				+	  * care of lock ordering */
			
 
				+	lnet_net_lock(lp->lp_cpt);
			
 
				+	if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
			
 
				+		/* ignore if no longer a router or rcd is replaced */
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (event->type == LNET_EVENT_SEND) {
			
 
				+		lp->lp_ping_notsent = 0;
			
 
				+		if (event->status == 0)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* LNET_EVENT_REPLY */
			
 
				+	/* A successful REPLY means the router is up.  If _any_ comms
			
 
				+	 * to the router fail I assume it's down (this will happen if
			
 
				+	 * we ping alive routers to try to detect router death before
			
 
				+	 * apps get burned). */
			
 
				+
			
 
				+	lnet_notify_locked(lp, 1, (event->status == 0), cfs_time_current());
			
 
				+	/* The router checker will wake up very shortly and do the
			
 
				+	 * actual notification.
			
 
				+	 * XXX If 'lp' stops being a router before then, it will still
			
 
				+	 * have the notification pending!!! */
			
 
				+
			
 
				+	if (avoid_asym_router_failure && event->status == 0)
			
 
				+		lnet_parse_rc_info(rcd);
			
 
				+
			
 
				+ out:
			
 
				+	lnet_net_unlock(lp->lp_cpt);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_wait_known_routerstate(void)
			
 
				+{
			
 
				+	lnet_peer_t	 *rtr;
			
 
				+	struct list_head	  *entry;
			
 
				+	int		  all_known;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		int	cpt = lnet_net_lock_current();
			
 
				+
			
 
				+		all_known = 1;
			
 
				+		list_for_each (entry, &the_lnet.ln_routers) {
			
 
				+			rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
			
 
				+
			
 
				+			if (rtr->lp_alive_count == 0) {
			
 
				+				all_known = 0;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		if (all_known)
			
 
				+			return;
			
 
				+
			
 
				+		cfs_pause(cfs_time_seconds(1));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_update_ni_status_locked(void)
			
 
				+{
			
 
				+	lnet_ni_t	*ni;
			
 
				+	long		now;
			
 
				+	int		timeout;
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_routing);
			
 
				+
			
 
				+	timeout = router_ping_timeout +
			
 
				+		  MAX(live_router_check_interval, dead_router_check_interval);
			
 
				+
			
 
				+	now = cfs_time_current_sec();
			
 
				+	list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
			
 
				+		if (ni->ni_lnd->lnd_type == LOLND)
			
 
				+			continue;
			
 
				+
			
 
				+		if (now < ni->ni_last_alive + timeout)
			
 
				+			continue;
			
 
				+
			
 
				+		lnet_ni_lock(ni);
			
 
				+		/* re-check with lock */
			
 
				+		if (now < ni->ni_last_alive + timeout) {
			
 
				+			lnet_ni_unlock(ni);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(ni->ni_status != NULL);
			
 
				+
			
 
				+		if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
			
 
				+			CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
			
 
				+			       libcfs_nid2str(ni->ni_nid), timeout);
			
 
				+			/* NB: so far, this is the only place to set
			
 
				+			 * NI status to "down" */
			
 
				+			ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
			
 
				+		}
			
 
				+		lnet_ni_unlock(ni);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_destroy_rc_data(lnet_rc_data_t *rcd)
			
 
				+{
			
 
				+	LASSERT(list_empty(&rcd->rcd_list));
			
 
				+	/* detached from network */
			
 
				+	LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh));
			
 
				+
			
 
				+	if (rcd->rcd_gateway != NULL) {
			
 
				+		int cpt = rcd->rcd_gateway->lp_cpt;
			
 
				+
			
 
				+		lnet_net_lock(cpt);
			
 
				+		lnet_peer_decref_locked(rcd->rcd_gateway);
			
 
				+		lnet_net_unlock(cpt);
			
 
				+	}
			
 
				+
			
 
				+	if (rcd->rcd_pinginfo != NULL)
			
 
				+		LIBCFS_FREE(rcd->rcd_pinginfo, LNET_PINGINFO_SIZE);
			
 
				+
			
 
				+	LIBCFS_FREE(rcd, sizeof(*rcd));
			
 
				+}
			
 
				+
			
 
				+lnet_rc_data_t *
			
 
				+lnet_create_rc_data_locked(lnet_peer_t *gateway)
			
 
				+{
			
 
				+	lnet_rc_data_t		*rcd = NULL;
			
 
				+	lnet_ping_info_t	*pi;
			
 
				+	int			rc;
			
 
				+	int			i;
			
 
				+
			
 
				+	lnet_net_unlock(gateway->lp_cpt);
			
 
				+
			
 
				+	LIBCFS_ALLOC(rcd, sizeof(*rcd));
			
 
				+	if (rcd == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	LNetInvalidateHandle(&rcd->rcd_mdh);
			
 
				+	INIT_LIST_HEAD(&rcd->rcd_list);
			
 
				+
			
 
				+	LIBCFS_ALLOC(pi, LNET_PINGINFO_SIZE);
			
 
				+	if (pi == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	memset(pi, 0, LNET_PINGINFO_SIZE);
			
 
				+	for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
			
 
				+		pi->pi_ni[i].ns_nid = LNET_NID_ANY;
			
 
				+		pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
			
 
				+	}
			
 
				+	rcd->rcd_pinginfo = pi;
			
 
				+
			
 
				+	LASSERT (!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
			
 
				+	rc = LNetMDBind((lnet_md_t){.start     = pi,
			
 
				+				    .user_ptr  = rcd,
			
 
				+				    .length    = LNET_PINGINFO_SIZE,
			
 
				+				    .threshold = LNET_MD_THRESH_INF,
			
 
				+				    .options   = LNET_MD_TRUNCATE,
			
 
				+				    .eq_handle = the_lnet.ln_rc_eqh},
			
 
				+			LNET_UNLINK,
			
 
				+			&rcd->rcd_mdh);
			
 
				+	if (rc < 0) {
			
 
				+		CERROR("Can't bind MD: %d\n", rc);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	LASSERT(rc == 0);
			
 
				+
			
 
				+	lnet_net_lock(gateway->lp_cpt);
			
 
				+	/* router table changed or someone has created rcd for this gateway */
			
 
				+	if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) {
			
 
				+		lnet_net_unlock(gateway->lp_cpt);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	lnet_peer_addref_locked(gateway);
			
 
				+	rcd->rcd_gateway = gateway;
			
 
				+	gateway->lp_rcd = rcd;
			
 
				+	gateway->lp_ping_notsent = 0;
			
 
				+
			
 
				+	return rcd;
			
 
				+
			
 
				+ out:
			
 
				+	if (rcd != NULL) {
			
 
				+		if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
			
 
				+			rc = LNetMDUnlink(rcd->rcd_mdh);
			
 
				+			LASSERT(rc == 0);
			
 
				+		}
			
 
				+		lnet_destroy_rc_data(rcd);
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_lock(gateway->lp_cpt);
			
 
				+	return gateway->lp_rcd;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_router_check_interval (lnet_peer_t *rtr)
			
 
				+{
			
 
				+	int secs;
			
 
				+
			
 
				+	secs = rtr->lp_alive ? live_router_check_interval :
			
 
				+			       dead_router_check_interval;
			
 
				+	if (secs < 0)
			
 
				+		secs = 0;
			
 
				+
			
 
				+	return secs;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_ping_router_locked (lnet_peer_t *rtr)
			
 
				+{
			
 
				+	lnet_rc_data_t *rcd = NULL;
			
 
				+	cfs_time_t      now = cfs_time_current();
			
 
				+	int	     secs;
			
 
				+
			
 
				+	lnet_peer_addref_locked(rtr);
			
 
				+
			
 
				+	if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
			
 
				+	    cfs_time_after(now, rtr->lp_ping_deadline))
			
 
				+		lnet_notify_locked(rtr, 1, 0, now);
			
 
				+
			
 
				+	/* Run any outstanding notifications */
			
 
				+	lnet_ni_notify_locked(rtr->lp_ni, rtr);
			
 
				+
			
 
				+	if (!lnet_isrouter(rtr) ||
			
 
				+	    the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
			
 
				+		/* router table changed or router checker is shutting down */
			
 
				+		lnet_peer_decref_locked(rtr);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	rcd = rtr->lp_rcd != NULL ?
			
 
				+	      rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
			
 
				+
			
 
				+	if (rcd == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	secs = lnet_router_check_interval(rtr);
			
 
				+
			
 
				+	CDEBUG(D_NET,
			
 
				+	       "rtr %s %d: deadline %lu ping_notsent %d alive %d "
			
 
				+	       "alive_count %d lp_ping_timestamp %lu\n",
			
 
				+	       libcfs_nid2str(rtr->lp_nid), secs,
			
 
				+	       rtr->lp_ping_deadline, rtr->lp_ping_notsent,
			
 
				+	       rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
			
 
				+
			
 
				+	if (secs != 0 && !rtr->lp_ping_notsent &&
			
 
				+	    cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
			
 
				+					     cfs_time_seconds(secs)))) {
			
 
				+		int	       rc;
			
 
				+		lnet_process_id_t id;
			
 
				+		lnet_handle_md_t  mdh;
			
 
				+
			
 
				+		id.nid = rtr->lp_nid;
			
 
				+		id.pid = LUSTRE_SRV_LNET_PID;
			
 
				+		CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
			
 
				+
			
 
				+		rtr->lp_ping_notsent   = 1;
			
 
				+		rtr->lp_ping_timestamp = now;
			
 
				+
			
 
				+		mdh = rcd->rcd_mdh;
			
 
				+
			
 
				+		if (rtr->lp_ping_deadline == 0) {
			
 
				+			rtr->lp_ping_deadline =
			
 
				+				cfs_time_shift(router_ping_timeout);
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(rtr->lp_cpt);
			
 
				+
			
 
				+		rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
			
 
				+			     LNET_PROTO_PING_MATCHBITS, 0);
			
 
				+
			
 
				+		lnet_net_lock(rtr->lp_cpt);
			
 
				+		if (rc != 0)
			
 
				+			rtr->lp_ping_notsent = 0; /* no event pending */
			
 
				+	}
			
 
				+
			
 
				+	lnet_peer_decref_locked(rtr);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_router_checker_start(void)
			
 
				+{
			
 
				+	int	  rc;
			
 
				+	int	  eqsz;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
			
 
				+
			
 
				+	if (check_routers_before_use &&
			
 
				+	    dead_router_check_interval <= 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
			
 
				+				   " set if 'check_routers_before_use' is set"
			
 
				+				   "\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (!the_lnet.ln_routing &&
			
 
				+	    live_router_check_interval <= 0 &&
			
 
				+	    dead_router_check_interval <= 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	sema_init(&the_lnet.ln_rc_signal, 0);
			
 
				+	/* EQ size doesn't matter; the callback is guaranteed to get every
			
 
				+	 * event */
			
 
				+	eqsz = 0;
			
 
				+	rc = LNetEQAlloc(eqsz, lnet_router_checker_event,
			
 
				+			 &the_lnet.ln_rc_eqh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
			
 
				+	rc = PTR_ERR(kthread_run(lnet_router_checker,
			
 
				+				 NULL, "router_checker"));
			
 
				+	if (IS_ERR_VALUE(rc)) {
			
 
				+		CERROR("Can't start router checker thread: %d\n", rc);
			
 
				+		/* block until event callback signals exit */
			
 
				+		down(&the_lnet.ln_rc_signal);
			
 
				+		rc = LNetEQFree(the_lnet.ln_rc_eqh);
			
 
				+		LASSERT(rc == 0);
			
 
				+		the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	if (check_routers_before_use) {
			
 
				+		/* Note that a helpful side-effect of pinging all known routers
			
 
				+		 * at startup is that it makes them drop stale connections they
			
 
				+		 * may have to a previous instance of me. */
			
 
				+		lnet_wait_known_routerstate();
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_router_checker_stop (void)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				+	the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
			
 
				+
			
 
				+	/* block until event callback signals exit */
			
 
				+	down(&the_lnet.ln_rc_signal);
			
 
				+	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
			
 
				+
			
 
				+	rc = LNetEQFree(the_lnet.ln_rc_eqh);
			
 
				+	LASSERT (rc == 0);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lnet_prune_rc_data(int wait_unlink)
			
 
				+{
			
 
				+	lnet_rc_data_t		*rcd;
			
 
				+	lnet_rc_data_t		*tmp;
			
 
				+	lnet_peer_t		*lp;
			
 
				+	struct list_head		head;
			
 
				+	int			i = 2;
			
 
				+
			
 
				+	if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
			
 
				+		   list_empty(&the_lnet.ln_rcd_deathrow) &&
			
 
				+		   list_empty(&the_lnet.ln_rcd_zombie)))
			
 
				+		return;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&head);
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+
			
 
				+	if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
			
 
				+		/* router checker is stopping, prune all */
			
 
				+		list_for_each_entry(lp, &the_lnet.ln_routers,
			
 
				+					lp_rtr_list) {
			
 
				+			if (lp->lp_rcd == NULL)
			
 
				+				continue;
			
 
				+
			
 
				+			LASSERT(list_empty(&lp->lp_rcd->rcd_list));
			
 
				+			list_add(&lp->lp_rcd->rcd_list,
			
 
				+				     &the_lnet.ln_rcd_deathrow);
			
 
				+			lp->lp_rcd = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* unlink all RCDs on deathrow list */
			
 
				+	list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
			
 
				+
			
 
				+	if (!list_empty(&head)) {
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		list_for_each_entry(rcd, &head, rcd_list)
			
 
				+			LNetMDUnlink(rcd->rcd_mdh);
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+	}
			
 
				+
			
 
				+	list_splice_init(&head, &the_lnet.ln_rcd_zombie);
			
 
				+
			
 
				+	/* release all zombie RCDs */
			
 
				+	while (!list_empty(&the_lnet.ln_rcd_zombie)) {
			
 
				+		list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
			
 
				+					     rcd_list) {
			
 
				+			if (LNetHandleIsInvalid(rcd->rcd_mdh))
			
 
				+				list_move(&rcd->rcd_list, &head);
			
 
				+		}
			
 
				+
			
 
				+		wait_unlink = wait_unlink &&
			
 
				+			      !list_empty(&the_lnet.ln_rcd_zombie);
			
 
				+
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+		while (!list_empty(&head)) {
			
 
				+			rcd = list_entry(head.next,
			
 
				+					     lnet_rc_data_t, rcd_list);
			
 
				+			list_del_init(&rcd->rcd_list);
			
 
				+			lnet_destroy_rc_data(rcd);
			
 
				+		}
			
 
				+
			
 
				+		if (!wait_unlink)
			
 
				+			return;
			
 
				+
			
 
				+		i++;
			
 
				+		CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
			
 
				+		       "Waiting for rc buffers to unlink\n");
			
 
				+		cfs_pause(cfs_time_seconds(1) / 4);
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#if  defined(LNET_ROUTER)
			
 
				+
			
 
				+static int
			
 
				+lnet_router_checker(void *arg)
			
 
				+{
			
 
				+	lnet_peer_t       *rtr;
			
 
				+	struct list_head	*entry;
			
 
				+
			
 
				+	cfs_block_allsigs();
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				+
			
 
				+	while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
			
 
				+		__u64	version;
			
 
				+		int	cpt;
			
 
				+		int	cpt2;
			
 
				+
			
 
				+		cpt = lnet_net_lock_current();
			
 
				+rescan:
			
 
				+		version = the_lnet.ln_routers_version;
			
 
				+
			
 
				+		list_for_each(entry, &the_lnet.ln_routers) {
			
 
				+			rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
			
 
				+
			
 
				+			cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
			
 
				+			if (cpt != cpt2) {
			
 
				+				lnet_net_unlock(cpt);
			
 
				+				cpt = cpt2;
			
 
				+				lnet_net_lock(cpt);
			
 
				+				/* the routers list has changed */
			
 
				+				if (version != the_lnet.ln_routers_version)
			
 
				+					goto rescan;
			
 
				+			}
			
 
				+
			
 
				+			lnet_ping_router_locked(rtr);
			
 
				+
			
 
				+			/* NB dropped lock */
			
 
				+			if (version != the_lnet.ln_routers_version) {
			
 
				+				/* the routers list has changed */
			
 
				+				goto rescan;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (the_lnet.ln_routing)
			
 
				+			lnet_update_ni_status_locked();
			
 
				+
			
 
				+		lnet_net_unlock(cpt);
			
 
				+
			
 
				+		lnet_prune_rc_data(0); /* don't wait for UNLINK */
			
 
				+
			
 
				+		/* Call cfs_pause() here always adds 1 to load average
			
 
				+		 * because kernel counts # active tasks as nr_running
			
 
				+		 * + nr_uninterruptible. */
			
 
				+		schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
			
 
				+						   cfs_time_seconds(1));
			
 
				+	}
			
 
				+
			
 
				+	LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING);
			
 
				+
			
 
				+	lnet_prune_rc_data(1); /* wait for UNLINK */
			
 
				+
			
 
				+	the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
			
 
				+	up(&the_lnet.ln_rc_signal);
			
 
				+	/* The unlink event callback will signal final completion */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
			
 
				+{
			
 
				+	int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
			
 
				+
			
 
				+	while (--npages >= 0)
			
 
				+		__free_page(rb->rb_kiov[npages].kiov_page);
			
 
				+
			
 
				+	LIBCFS_FREE(rb, sz);
			
 
				+}
			
 
				+
			
 
				+lnet_rtrbuf_t *
			
 
				+lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
			
 
				+{
			
 
				+	int	    npages = rbp->rbp_npages;
			
 
				+	int	    sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
			
 
				+	struct page   *page;
			
 
				+	lnet_rtrbuf_t *rb;
			
 
				+	int	    i;
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(rb, lnet_cpt_table(), cpt, sz);
			
 
				+	if (rb == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	rb->rb_pool = rbp;
			
 
				+
			
 
				+	for (i = 0; i < npages; i++) {
			
 
				+		page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt,
			
 
				+					  __GFP_ZERO | GFP_IOFS);
			
 
				+		if (page == NULL) {
			
 
				+			while (--i >= 0)
			
 
				+				__free_page(rb->rb_kiov[i].kiov_page);
			
 
				+
			
 
				+			LIBCFS_FREE(rb, sz);
			
 
				+			return NULL;
			
 
				+		}
			
 
				+
			
 
				+		rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
			
 
				+		rb->rb_kiov[i].kiov_offset = 0;
			
 
				+		rb->rb_kiov[i].kiov_page = page;
			
 
				+	}
			
 
				+
			
 
				+	return rb;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp)
			
 
				+{
			
 
				+	int		npages = rbp->rbp_npages;
			
 
				+	int		nbuffers = 0;
			
 
				+	lnet_rtrbuf_t	*rb;
			
 
				+
			
 
				+	if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (list_empty(&rbp->rbp_msgs));
			
 
				+	LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers);
			
 
				+
			
 
				+	while (!list_empty(&rbp->rbp_bufs)) {
			
 
				+		LASSERT (rbp->rbp_credits > 0);
			
 
				+
			
 
				+		rb = list_entry(rbp->rbp_bufs.next,
			
 
				+				    lnet_rtrbuf_t, rb_list);
			
 
				+		list_del(&rb->rb_list);
			
 
				+		lnet_destroy_rtrbuf(rb, npages);
			
 
				+		nbuffers++;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (rbp->rbp_nbuffers == nbuffers);
			
 
				+	LASSERT (rbp->rbp_credits == nbuffers);
			
 
				+
			
 
				+	rbp->rbp_nbuffers = rbp->rbp_credits = 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt)
			
 
				+{
			
 
				+	lnet_rtrbuf_t *rb;
			
 
				+	int	    i;
			
 
				+
			
 
				+	if (rbp->rbp_nbuffers != 0) {
			
 
				+		LASSERT (rbp->rbp_nbuffers == nbufs);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < nbufs; i++) {
			
 
				+		rb = lnet_new_rtrbuf(rbp, cpt);
			
 
				+
			
 
				+		if (rb == NULL) {
			
 
				+			CERROR("Failed to allocate %d router bufs of %d pages\n",
			
 
				+			       nbufs, rbp->rbp_npages);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		rbp->rbp_nbuffers++;
			
 
				+		rbp->rbp_credits++;
			
 
				+		rbp->rbp_mincredits++;
			
 
				+		list_add(&rb->rb_list, &rbp->rbp_bufs);
			
 
				+
			
 
				+		/* No allocation "under fire" */
			
 
				+		/* Otherwise we'd need code to schedule blocked msgs etc */
			
 
				+		LASSERT (!the_lnet.ln_routing);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (rbp->rbp_credits == nbufs);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages)
			
 
				+{
			
 
				+	INIT_LIST_HEAD(&rbp->rbp_msgs);
			
 
				+	INIT_LIST_HEAD(&rbp->rbp_bufs);
			
 
				+
			
 
				+	rbp->rbp_npages = npages;
			
 
				+	rbp->rbp_credits = 0;
			
 
				+	rbp->rbp_mincredits = 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_rtrpools_free(void)
			
 
				+{
			
 
				+	lnet_rtrbufpool_t *rtrp;
			
 
				+	int		  i;
			
 
				+
			
 
				+	if (the_lnet.ln_rtrpools == NULL) /* uninitialized or freed */
			
 
				+		return;
			
 
				+
			
 
				+	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				+		lnet_rtrpool_free_bufs(&rtrp[0]);
			
 
				+		lnet_rtrpool_free_bufs(&rtrp[1]);
			
 
				+		lnet_rtrpool_free_bufs(&rtrp[2]);
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_free(the_lnet.ln_rtrpools);
			
 
				+	the_lnet.ln_rtrpools = NULL;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_nrb_tiny_calculate(int npages)
			
 
				+{
			
 
				+	int	nrbs = LNET_NRB_TINY;
			
 
				+
			
 
				+	if (tiny_router_buffers < 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x10c,
			
 
				+				   "tiny_router_buffers=%d invalid when "
			
 
				+				   "routing enabled\n", tiny_router_buffers);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	if (tiny_router_buffers > 0)
			
 
				+		nrbs = tiny_router_buffers;
			
 
				+
			
 
				+	nrbs /= LNET_CPT_NUMBER;
			
 
				+	return max(nrbs, LNET_NRB_TINY_MIN);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_nrb_small_calculate(int npages)
			
 
				+{
			
 
				+	int	nrbs = LNET_NRB_SMALL;
			
 
				+
			
 
				+	if (small_router_buffers < 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x10c,
			
 
				+				   "small_router_buffers=%d invalid when "
			
 
				+				   "routing enabled\n", small_router_buffers);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	if (small_router_buffers > 0)
			
 
				+		nrbs = small_router_buffers;
			
 
				+
			
 
				+	nrbs /= LNET_CPT_NUMBER;
			
 
				+	return max(nrbs, LNET_NRB_SMALL_MIN);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lnet_nrb_large_calculate(int npages)
			
 
				+{
			
 
				+	int	nrbs = LNET_NRB_LARGE;
			
 
				+
			
 
				+	if (large_router_buffers < 0) {
			
 
				+		LCONSOLE_ERROR_MSG(0x10c,
			
 
				+				   "large_router_buffers=%d invalid when "
			
 
				+				   "routing enabled\n", large_router_buffers);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	if (large_router_buffers > 0)
			
 
				+		nrbs = large_router_buffers;
			
 
				+
			
 
				+	nrbs /= LNET_CPT_NUMBER;
			
 
				+	return max(nrbs, LNET_NRB_LARGE_MIN);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_rtrpools_alloc(int im_a_router)
			
 
				+{
			
 
				+	lnet_rtrbufpool_t *rtrp;
			
 
				+	int	large_pages = (LNET_MTU + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	int	small_pages = 1;
			
 
				+	int	nrb_tiny;
			
 
				+	int	nrb_small;
			
 
				+	int	nrb_large;
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+
			
 
				+	if (!strcmp(forwarding, "")) {
			
 
				+		/* not set either way */
			
 
				+		if (!im_a_router)
			
 
				+			return 0;
			
 
				+	} else if (!strcmp(forwarding, "disabled")) {
			
 
				+		/* explicitly disabled */
			
 
				+		return 0;
			
 
				+	} else if (!strcmp(forwarding, "enabled")) {
			
 
				+		/* explicitly enabled */
			
 
				+	} else {
			
 
				+		LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
			
 
				+				   "'enabled' or 'disabled'\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	nrb_tiny = lnet_nrb_tiny_calculate(0);
			
 
				+	if (nrb_tiny < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	nrb_small = lnet_nrb_small_calculate(small_pages);
			
 
				+	if (nrb_small < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	nrb_large = lnet_nrb_large_calculate(large_pages);
			
 
				+	if (nrb_large < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+						LNET_NRBPOOLS *
			
 
				+						sizeof(lnet_rtrbufpool_t));
			
 
				+	if (the_lnet.ln_rtrpools == NULL) {
			
 
				+		LCONSOLE_ERROR_MSG(0x10c,
			
 
				+				   "Failed to initialize router buffe pool\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
			
 
				+		lnet_rtrpool_init(&rtrp[0], 0);
			
 
				+		rc = lnet_rtrpool_alloc_bufs(&rtrp[0], nrb_tiny, i);
			
 
				+		if (rc != 0)
			
 
				+			goto failed;
			
 
				+
			
 
				+		lnet_rtrpool_init(&rtrp[1], small_pages);
			
 
				+		rc = lnet_rtrpool_alloc_bufs(&rtrp[1], nrb_small, i);
			
 
				+		if (rc != 0)
			
 
				+			goto failed;
			
 
				+
			
 
				+		lnet_rtrpool_init(&rtrp[2], large_pages);
			
 
				+		rc = lnet_rtrpool_alloc_bufs(&rtrp[2], nrb_large, i);
			
 
				+		if (rc != 0)
			
 
				+			goto failed;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_lock(LNET_LOCK_EX);
			
 
				+	the_lnet.ln_routing = 1;
			
 
				+	lnet_net_unlock(LNET_LOCK_EX);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	lnet_rtrpools_free();
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
			
 
				+{
			
 
				+	struct lnet_peer	*lp = NULL;
			
 
				+	cfs_time_t		now = cfs_time_current();
			
 
				+	int			cpt = lnet_cpt_of_nid(nid);
			
 
				+
			
 
				+	LASSERT (!in_interrupt ());
			
 
				+
			
 
				+	CDEBUG (D_NET, "%s notifying %s: %s\n",
			
 
				+		(ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
			
 
				+		libcfs_nid2str(nid),
			
 
				+		alive ? "up" : "down");
			
 
				+
			
 
				+	if (ni != NULL &&
			
 
				+	    LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
			
 
				+		CWARN ("Ignoring notification of %s %s by %s (different net)\n",
			
 
				+			libcfs_nid2str(nid), alive ? "birth" : "death",
			
 
				+			libcfs_nid2str(ni->ni_nid));
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	/* can't do predictions... */
			
 
				+	if (cfs_time_after(when, now)) {
			
 
				+		CWARN ("Ignoring prediction from %s of %s %s "
			
 
				+		       "%ld seconds in the future\n",
			
 
				+		       (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
			
 
				+		       libcfs_nid2str(nid), alive ? "up" : "down",
			
 
				+		       cfs_duration_sec(cfs_time_sub(when, now)));
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (ni != NULL && !alive &&	     /* LND telling me she's down */
			
 
				+	    !auto_down) {		       /* auto-down disabled */
			
 
				+		CDEBUG(D_NET, "Auto-down disabled\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_lock(cpt);
			
 
				+
			
 
				+	if (the_lnet.ln_shutdown) {
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		return -ESHUTDOWN;
			
 
				+	}
			
 
				+
			
 
				+	lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
			
 
				+	if (lp == NULL) {
			
 
				+		/* nid not found */
			
 
				+		lnet_net_unlock(cpt);
			
 
				+		CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* We can't fully trust LND on reporting exact peer last_alive
			
 
				+	 * if he notifies us about dead peer. For example ksocklnd can
			
 
				+	 * call us with when == _time_when_the_node_was_booted_ if
			
 
				+	 * no connections were successfully established */
			
 
				+	if (ni != NULL && !alive && when < lp->lp_last_alive)
			
 
				+		when = lp->lp_last_alive;
			
 
				+
			
 
				+	lnet_notify_locked(lp, ni == NULL, alive, when);
			
 
				+
			
 
				+	lnet_ni_notify_locked(ni, lp);
			
 
				+
			
 
				+	lnet_peer_decref_locked(lp);
			
 
				+
			
 
				+	lnet_net_unlock(cpt);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(lnet_notify);
			
 
				+
			
 
				+void
			
 
				+lnet_get_tunables (void)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+int
			
 
				+lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
			
 
				+{
			
 
				+	return -EOPNOTSUPP;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_router_checker (void)
			
 
				+{
			
 
				+	static time_t last = 0;
			
 
				+	static int    running = 0;
			
 
				+
			
 
				+	time_t	    now = cfs_time_current_sec();
			
 
				+	int	       interval = now - last;
			
 
				+	int	       rc;
			
 
				+	__u64	     version;
			
 
				+	lnet_peer_t      *rtr;
			
 
				+
			
 
				+	/* It's no use to call me again within a sec - all intervals and
			
 
				+	 * timeouts are measured in seconds */
			
 
				+	if (last != 0 && interval < 2)
			
 
				+		return;
			
 
				+
			
 
				+	if (last != 0 &&
			
 
				+	    interval > MAX(live_router_check_interval,
			
 
				+			   dead_router_check_interval))
			
 
				+		CNETERR("Checker(%d/%d) not called for %d seconds\n",
			
 
				+			live_router_check_interval, dead_router_check_interval,
			
 
				+			interval);
			
 
				+
			
 
				+	LASSERT(LNET_CPT_NUMBER == 1);
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+	LASSERT(!running); /* recursion check */
			
 
				+	running = 1;
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	last = now;
			
 
				+
			
 
				+	if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING)
			
 
				+		lnet_prune_rc_data(0); /* unlink all rcd and nowait */
			
 
				+
			
 
				+	/* consume all pending events */
			
 
				+	while (1) {
			
 
				+		int	  i;
			
 
				+		lnet_event_t ev;
			
 
				+
			
 
				+		/* NB ln_rc_eqh must be the 1st in 'eventqs' otherwise the
			
 
				+		 * recursion breaker in LNetEQPoll would fail */
			
 
				+		rc = LNetEQPoll(&the_lnet.ln_rc_eqh, 1, 0, &ev, &i);
			
 
				+		if (rc == 0)   /* no event pending */
			
 
				+			break;
			
 
				+
			
 
				+		/* NB a lost SENT prevents me from pinging a router again */
			
 
				+		if (rc == -EOVERFLOW) {
			
 
				+			CERROR("Dropped an event!!!\n");
			
 
				+			abort();
			
 
				+		}
			
 
				+
			
 
				+		LASSERT (rc == 1);
			
 
				+
			
 
				+		lnet_router_checker_event(&ev);
			
 
				+	}
			
 
				+
			
 
				+	if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING) {
			
 
				+		lnet_prune_rc_data(1); /* release rcd */
			
 
				+		the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
			
 
				+		running = 0;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
			
 
				+
			
 
				+	lnet_net_lock(0);
			
 
				+
			
 
				+	version = the_lnet.ln_routers_version;
			
 
				+	list_for_each_entry (rtr, &the_lnet.ln_routers, lp_rtr_list) {
			
 
				+		lnet_ping_router_locked(rtr);
			
 
				+		LASSERT (version == the_lnet.ln_routers_version);
			
 
				+	}
			
 
				+
			
 
				+	lnet_net_unlock(0);
			
 
				+
			
 
				+	running = 0; /* lock only needed for the recursion check */
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/* NB lnet_peers_start_down depends on me,
			
 
				+ * so must be called before any peer creation */
			
 
				+void
			
 
				+lnet_get_tunables (void)
			
 
				+{
			
 
				+	char *s;
			
 
				+
			
 
				+	s = getenv("LNET_ROUTER_PING_TIMEOUT");
			
 
				+	if (s != NULL) router_ping_timeout = atoi(s);
			
 
				+
			
 
				+	s = getenv("LNET_LIVE_ROUTER_CHECK_INTERVAL");
			
 
				+	if (s != NULL) live_router_check_interval = atoi(s);
			
 
				+
			
 
				+	s = getenv("LNET_DEAD_ROUTER_CHECK_INTERVAL");
			
 
				+	if (s != NULL) dead_router_check_interval = atoi(s);
			
 
				+
			
 
				+	/* This replaces old lnd_notify mechanism */
			
 
				+	check_routers_before_use = 1;
			
 
				+	if (dead_router_check_interval <= 0)
			
 
				+		dead_router_check_interval = 30;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_rtrpools_free(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_rtrpools_alloc(int im_a_arouter)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ b/drivers/staging/lustre/lnet/lnet/router_proc.c
@@ -0,0 +1,950 @@
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ *
			
 
				+ *   This file is part of Portals
			
 
				+ *   http://sourceforge.net/projects/sandiaportals/
			
 
				+ *
			
 
				+ *   Portals is free software; you can redistribute it and/or
			
 
				+ *   modify it under the terms of version 2 of the GNU General Public
			
 
				+ *   License as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ *   Portals is distributed in the hope that it will be useful,
			
 
				+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ *   GNU General Public License for more details.
			
 
				+ *
			
 
				+ *   You should have received a copy of the GNU General Public License
			
 
				+ *   along with Portals; if not, write to the Free Software
			
 
				+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+
			
 
				+#if  defined(LNET_ROUTER)
			
 
				+
			
 
				+/* This is really lnet_proc.c. You might need to update sanity test 215
			
 
				+ * if any file format is changed. */
			
 
				+
			
 
				+static ctl_table_header_t *lnet_table_header = NULL;
			
 
				+
			
 
				+#define CTL_LNET	 (0x100)
			
 
				+enum {
			
 
				+	PSDEV_LNET_STATS = 100,
			
 
				+	PSDEV_LNET_ROUTES,
			
 
				+	PSDEV_LNET_ROUTERS,
			
 
				+	PSDEV_LNET_PEERS,
			
 
				+	PSDEV_LNET_BUFFERS,
			
 
				+	PSDEV_LNET_NIS,
			
 
				+	PSDEV_LNET_PTL_ROTOR,
			
 
				+};
			
 
				+
			
 
				+#define LNET_LOFFT_BITS		(sizeof(loff_t) * 8)
			
 
				+/*
			
 
				+ * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
			
 
				+ */
			
 
				+#define LNET_PROC_CPT_BITS	(LNET_CPT_BITS + 1)
			
 
				+/* change version, 16 bits or 8 bits */
			
 
				+#define LNET_PROC_VER_BITS	MAX(((MIN(LNET_LOFFT_BITS, 64)) / 4), 8)
			
 
				+
			
 
				+#define LNET_PROC_HASH_BITS	LNET_PEER_HASH_BITS
			
 
				+/*
			
 
				+ * bits for peer hash offset
			
 
				+ * NB: we don't use the highest bit of *ppos because it's signed
			
 
				+ */
			
 
				+#define LNET_PROC_HOFF_BITS	(LNET_LOFFT_BITS -       \
			
 
				+				 LNET_PROC_CPT_BITS -    \
			
 
				+				 LNET_PROC_VER_BITS -    \
			
 
				+				 LNET_PROC_HASH_BITS - 1)
			
 
				+/* bits for hash index + position */
			
 
				+#define LNET_PROC_HPOS_BITS	(LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
			
 
				+/* bits for peer hash table + hash version */
			
 
				+#define LNET_PROC_VPOS_BITS	(LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
			
 
				+
			
 
				+#define LNET_PROC_CPT_MASK	((1ULL << LNET_PROC_CPT_BITS) - 1)
			
 
				+#define LNET_PROC_VER_MASK	((1ULL << LNET_PROC_VER_BITS) - 1)
			
 
				+#define LNET_PROC_HASH_MASK	((1ULL << LNET_PROC_HASH_BITS) - 1)
			
 
				+#define LNET_PROC_HOFF_MASK	((1ULL << LNET_PROC_HOFF_BITS) - 1)
			
 
				+
			
 
				+#define LNET_PROC_CPT_GET(pos)				\
			
 
				+	(int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
			
 
				+
			
 
				+#define LNET_PROC_VER_GET(pos)				\
			
 
				+	(int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
			
 
				+
			
 
				+#define LNET_PROC_HASH_GET(pos)				\
			
 
				+	(int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
			
 
				+
			
 
				+#define LNET_PROC_HOFF_GET(pos)				\
			
 
				+	(int)((pos) & LNET_PROC_HOFF_MASK)
			
 
				+
			
 
				+#define LNET_PROC_POS_MAKE(cpt, ver, hash, off)		\
			
 
				+	(((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) |   \
			
 
				+	((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) |   \
			
 
				+	((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
			
 
				+	((off) & LNET_PROC_HOFF_MASK))
			
 
				+
			
 
				+#define LNET_PROC_VERSION(v)	((unsigned int)((v) & LNET_PROC_VER_MASK))
			
 
				+
			
 
				+static int __proc_lnet_stats(void *data, int write,
			
 
				+			     loff_t pos, void *buffer, int nob)
			
 
				+{
			
 
				+	int	      rc;
			
 
				+	lnet_counters_t *ctrs;
			
 
				+	int	      len;
			
 
				+	char	    *tmpstr;
			
 
				+	const int	tmpsiz = 256; /* 7 %u and 4 LPU64 */
			
 
				+
			
 
				+	if (write) {
			
 
				+		lnet_counters_reset();
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* read */
			
 
				+
			
 
				+	LIBCFS_ALLOC(ctrs, sizeof(*ctrs));
			
 
				+	if (ctrs == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL) {
			
 
				+		LIBCFS_FREE(ctrs, sizeof(*ctrs));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	lnet_counters_get(ctrs);
			
 
				+
			
 
				+	len = snprintf(tmpstr, tmpsiz,
			
 
				+		       "%u %u %u %u %u %u %u "LPU64" "LPU64" "
			
 
				+		       LPU64" "LPU64,
			
 
				+		       ctrs->msgs_alloc, ctrs->msgs_max,
			
 
				+		       ctrs->errors,
			
 
				+		       ctrs->send_count, ctrs->recv_count,
			
 
				+		       ctrs->route_count, ctrs->drop_count,
			
 
				+		       ctrs->send_length, ctrs->recv_length,
			
 
				+		       ctrs->route_length, ctrs->drop_length);
			
 
				+
			
 
				+	if (pos >= min_t(int, len, strlen(tmpstr)))
			
 
				+		rc = 0;
			
 
				+	else
			
 
				+		rc = cfs_trace_copyout_string(buffer, nob,
			
 
				+					      tmpstr + pos, "\n");
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+	LIBCFS_FREE(ctrs, sizeof(*ctrs));
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+DECLARE_PROC_HANDLER(proc_lnet_stats);
			
 
				+
			
 
				+int LL_PROC_PROTO(proc_lnet_routes)
			
 
				+{
			
 
				+	const int	tmpsiz = 256;
			
 
				+	char		*tmpstr;
			
 
				+	char		*s;
			
 
				+	int		rc = 0;
			
 
				+	int		len;
			
 
				+	int		ver;
			
 
				+	int		off;
			
 
				+
			
 
				+	DECLARE_LL_PROC_PPOS_DECL;
			
 
				+
			
 
				+	CLASSERT(sizeof(loff_t) >= 4);
			
 
				+
			
 
				+	off = LNET_PROC_HOFF_GET(*ppos);
			
 
				+	ver = LNET_PROC_VER_GET(*ppos);
			
 
				+
			
 
				+	LASSERT (!write);
			
 
				+
			
 
				+	if (*lenp == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				+
			
 
				+	if (*ppos == 0) {
			
 
				+		s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
			
 
				+			      the_lnet.ln_routing ? "enabled" : "disabled");
			
 
				+		LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+		s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %7s %s\n",
			
 
				+			      "net", "hops", "state", "router");
			
 
				+		LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+		ver = (unsigned int)the_lnet.ln_remote_nets_version;
			
 
				+		lnet_net_unlock(0);
			
 
				+		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				+	} else {
			
 
				+		struct list_head		*n;
			
 
				+		struct list_head		*r;
			
 
				+		lnet_route_t		*route = NULL;
			
 
				+		lnet_remotenet_t	*rnet  = NULL;
			
 
				+		int			skip  = off - 1;
			
 
				+		struct list_head		*rn_list;
			
 
				+		int			i;
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+
			
 
				+		if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
			
 
				+			lnet_net_unlock(0);
			
 
				+			LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+			return -ESTALE;
			
 
				+		}
			
 
				+
			
 
				+		for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && route == NULL;
			
 
				+		     i++) {
			
 
				+			rn_list = &the_lnet.ln_remote_nets_hash[i];
			
 
				+
			
 
				+			n = rn_list->next;
			
 
				+
			
 
				+			while (n != rn_list && route == NULL) {
			
 
				+				rnet = list_entry(n, lnet_remotenet_t,
			
 
				+						      lrn_list);
			
 
				+
			
 
				+				r = rnet->lrn_routes.next;
			
 
				+
			
 
				+				while (r != &rnet->lrn_routes) {
			
 
				+					lnet_route_t *re =
			
 
				+						list_entry(r, lnet_route_t,
			
 
				+							       lr_list);
			
 
				+					if (skip == 0) {
			
 
				+						route = re;
			
 
				+						break;
			
 
				+					}
			
 
				+
			
 
				+					skip--;
			
 
				+					r = r->next;
			
 
				+				}
			
 
				+
			
 
				+				n = n->next;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (route != NULL) {
			
 
				+			__u32	net   = rnet->lrn_net;
			
 
				+			unsigned int hops  = route->lr_hops;
			
 
				+			lnet_nid_t   nid   = route->lr_gateway->lp_nid;
			
 
				+			int	  alive = route->lr_gateway->lp_alive;
			
 
				+
			
 
				+			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+				      "%-8s %4u %7s %s\n",
			
 
				+				      libcfs_net2str(net), hops,
			
 
				+				      alive ? "up" : "down",
			
 
				+				      libcfs_nid2str(nid));
			
 
				+			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(0);
			
 
				+	}
			
 
				+
			
 
				+	len = s - tmpstr;     /* how many bytes was written */
			
 
				+
			
 
				+	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				+		rc = -EINVAL;
			
 
				+	} else if (len > 0) { /* wrote something */
			
 
				+		if (copy_to_user(buffer, tmpstr, len))
			
 
				+			rc = -EFAULT;
			
 
				+		else {
			
 
				+			off += 1;
			
 
				+			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		*lenp = len;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int LL_PROC_PROTO(proc_lnet_routers)
			
 
				+{
			
 
				+	int	rc = 0;
			
 
				+	char      *tmpstr;
			
 
				+	char      *s;
			
 
				+	const int  tmpsiz = 256;
			
 
				+	int	len;
			
 
				+	int	ver;
			
 
				+	int	off;
			
 
				+
			
 
				+	DECLARE_LL_PROC_PPOS_DECL;
			
 
				+
			
 
				+	off = LNET_PROC_HOFF_GET(*ppos);
			
 
				+	ver = LNET_PROC_VER_GET(*ppos);
			
 
				+
			
 
				+	LASSERT (!write);
			
 
				+
			
 
				+	if (*lenp == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				+
			
 
				+	if (*ppos == 0) {
			
 
				+		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+			      "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
			
 
				+			      "ref", "rtr_ref", "alive_cnt", "state",
			
 
				+			      "last_ping", "ping_sent", "deadline",
			
 
				+			      "down_ni", "router");
			
 
				+		LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+		ver = (unsigned int)the_lnet.ln_routers_version;
			
 
				+		lnet_net_unlock(0);
			
 
				+		*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				+	} else {
			
 
				+		struct list_head		*r;
			
 
				+		struct lnet_peer	*peer = NULL;
			
 
				+		int			skip = off - 1;
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+
			
 
				+		if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
			
 
				+			lnet_net_unlock(0);
			
 
				+
			
 
				+			LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+			return -ESTALE;
			
 
				+		}
			
 
				+
			
 
				+		r = the_lnet.ln_routers.next;
			
 
				+
			
 
				+		while (r != &the_lnet.ln_routers) {
			
 
				+			lnet_peer_t *lp = list_entry(r, lnet_peer_t,
			
 
				+							 lp_rtr_list);
			
 
				+
			
 
				+			if (skip == 0) {
			
 
				+				peer = lp;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			skip--;
			
 
				+			r = r->next;
			
 
				+		}
			
 
				+
			
 
				+		if (peer != NULL) {
			
 
				+			lnet_nid_t nid = peer->lp_nid;
			
 
				+			cfs_time_t now = cfs_time_current();
			
 
				+			cfs_time_t deadline = peer->lp_ping_deadline;
			
 
				+			int nrefs     = peer->lp_refcount;
			
 
				+			int nrtrrefs  = peer->lp_rtr_refcount;
			
 
				+			int alive_cnt = peer->lp_alive_count;
			
 
				+			int alive     = peer->lp_alive;
			
 
				+			int pingsent  = !peer->lp_ping_notsent;
			
 
				+			int last_ping = cfs_duration_sec(cfs_time_sub(now,
			
 
				+						     peer->lp_ping_timestamp));
			
 
				+			int down_ni   = 0;
			
 
				+			lnet_route_t *rtr;
			
 
				+
			
 
				+			if ((peer->lp_ping_feats &
			
 
				+			     LNET_PING_FEAT_NI_STATUS) != 0) {
			
 
				+				list_for_each_entry(rtr, &peer->lp_routes,
			
 
				+							lr_gwlist) {
			
 
				+					/* downis on any route should be the
			
 
				+					 * number of downis on the gateway */
			
 
				+					if (rtr->lr_downis != 0) {
			
 
				+						down_ni = rtr->lr_downis;
			
 
				+						break;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (deadline == 0)
			
 
				+				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+					      "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
			
 
				+					      nrefs, nrtrrefs, alive_cnt,
			
 
				+					      alive ? "up" : "down", last_ping,
			
 
				+					      pingsent, "NA", down_ni,
			
 
				+					      libcfs_nid2str(nid));
			
 
				+			else
			
 
				+				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+					      "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
			
 
				+					      nrefs, nrtrrefs, alive_cnt,
			
 
				+					      alive ? "up" : "down", last_ping,
			
 
				+					      pingsent,
			
 
				+					      cfs_duration_sec(cfs_time_sub(deadline, now)),
			
 
				+					      down_ni, libcfs_nid2str(nid));
			
 
				+			LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(0);
			
 
				+	}
			
 
				+
			
 
				+	len = s - tmpstr;     /* how many bytes was written */
			
 
				+
			
 
				+	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				+		rc = -EINVAL;
			
 
				+	} else if (len > 0) { /* wrote something */
			
 
				+		if (copy_to_user(buffer, tmpstr, len))
			
 
				+			rc = -EFAULT;
			
 
				+		else {
			
 
				+			off += 1;
			
 
				+			*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		*lenp = len;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int LL_PROC_PROTO(proc_lnet_peers)
			
 
				+{
			
 
				+	const int		tmpsiz  = 256;
			
 
				+	struct lnet_peer_table	*ptable;
			
 
				+	char			*tmpstr;
			
 
				+	char			*s;
			
 
				+	int			cpt  = LNET_PROC_CPT_GET(*ppos);
			
 
				+	int			ver  = LNET_PROC_VER_GET(*ppos);
			
 
				+	int			hash = LNET_PROC_HASH_GET(*ppos);
			
 
				+	int			hoff = LNET_PROC_HOFF_GET(*ppos);
			
 
				+	int			rc = 0;
			
 
				+	int			len;
			
 
				+
			
 
				+	CLASSERT(LNET_PROC_HASH_BITS >= LNET_PEER_HASH_BITS);
			
 
				+	LASSERT(!write);
			
 
				+
			
 
				+	if (*lenp == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (cpt >= LNET_CPT_NUMBER) {
			
 
				+		*lenp = 0;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				+
			
 
				+	if (*ppos == 0) {
			
 
				+		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+			      "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
			
 
				+			      "nid", "refs", "state", "last", "max",
			
 
				+			      "rtr", "min", "tx", "min", "queue");
			
 
				+		LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+		hoff++;
			
 
				+	} else {
			
 
				+		struct lnet_peer	*peer;
			
 
				+		struct list_head		*p;
			
 
				+		int			skip;
			
 
				+ again:
			
 
				+		p = NULL;
			
 
				+		peer = NULL;
			
 
				+		skip = hoff - 1;
			
 
				+
			
 
				+		lnet_net_lock(cpt);
			
 
				+		ptable = the_lnet.ln_peer_tables[cpt];
			
 
				+		if (hoff == 1)
			
 
				+			ver = LNET_PROC_VERSION(ptable->pt_version);
			
 
				+
			
 
				+		if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
			
 
				+			lnet_net_unlock(cpt);
			
 
				+			LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+			return -ESTALE;
			
 
				+		}
			
 
				+
			
 
				+		while (hash < LNET_PEER_HASH_SIZE) {
			
 
				+			if (p == NULL)
			
 
				+				p = ptable->pt_hash[hash].next;
			
 
				+
			
 
				+			while (p != &ptable->pt_hash[hash]) {
			
 
				+				lnet_peer_t *lp = list_entry(p, lnet_peer_t,
			
 
				+								 lp_hashlist);
			
 
				+				if (skip == 0) {
			
 
				+					peer = lp;
			
 
				+
			
 
				+					/* minor optimization: start from idx+1
			
 
				+					 * on next iteration if we've just
			
 
				+					 * drained lp_hashlist */
			
 
				+					if (lp->lp_hashlist.next ==
			
 
				+					    &ptable->pt_hash[hash]) {
			
 
				+						hoff = 1;
			
 
				+						hash++;
			
 
				+					} else {
			
 
				+						hoff++;
			
 
				+					}
			
 
				+
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				skip--;
			
 
				+				p = lp->lp_hashlist.next;
			
 
				+			}
			
 
				+
			
 
				+			if (peer != NULL)
			
 
				+				break;
			
 
				+
			
 
				+			p = NULL;
			
 
				+			hoff = 1;
			
 
				+			hash++;
			
 
				+		}
			
 
				+
			
 
				+		if (peer != NULL) {
			
 
				+			lnet_nid_t nid       = peer->lp_nid;
			
 
				+			int	nrefs     = peer->lp_refcount;
			
 
				+			int	lastalive = -1;
			
 
				+			char      *aliveness = "NA";
			
 
				+			int	maxcr     = peer->lp_ni->ni_peertxcredits;
			
 
				+			int	txcr      = peer->lp_txcredits;
			
 
				+			int	mintxcr   = peer->lp_mintxcredits;
			
 
				+			int	rtrcr     = peer->lp_rtrcredits;
			
 
				+			int	minrtrcr  = peer->lp_minrtrcredits;
			
 
				+			int	txqnob    = peer->lp_txqnob;
			
 
				+
			
 
				+			if (lnet_isrouter(peer) ||
			
 
				+			    lnet_peer_aliveness_enabled(peer))
			
 
				+				aliveness = peer->lp_alive ? "up" : "down";
			
 
				+
			
 
				+			if (lnet_peer_aliveness_enabled(peer)) {
			
 
				+				cfs_time_t     now = cfs_time_current();
			
 
				+				cfs_duration_t delta;
			
 
				+
			
 
				+				delta = cfs_time_sub(now, peer->lp_last_alive);
			
 
				+				lastalive = cfs_duration_sec(delta);
			
 
				+
			
 
				+				/* No need to mess up peers contents with
			
 
				+				 * arbitrarily long integers - it suffices to
			
 
				+				 * know that lastalive is more than 10000s old
			
 
				+				 */
			
 
				+				if (lastalive >= 10000)
			
 
				+					lastalive = 9999;
			
 
				+			}
			
 
				+
			
 
				+			lnet_net_unlock(cpt);
			
 
				+
			
 
				+			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+				      "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
			
 
				+				      libcfs_nid2str(nid), nrefs, aliveness,
			
 
				+				      lastalive, maxcr, rtrcr, minrtrcr, txcr,
			
 
				+				      mintxcr, txqnob);
			
 
				+			LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+		} else { /* peer is NULL */
			
 
				+			lnet_net_unlock(cpt);
			
 
				+		}
			
 
				+
			
 
				+		if (hash == LNET_PEER_HASH_SIZE) {
			
 
				+			cpt++;
			
 
				+			hash = 0;
			
 
				+			hoff = 1;
			
 
				+			if (peer == NULL && cpt < LNET_CPT_NUMBER)
			
 
				+				goto again;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	len = s - tmpstr;     /* how many bytes was written */
			
 
				+
			
 
				+	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				+		rc = -EINVAL;
			
 
				+	} else if (len > 0) { /* wrote something */
			
 
				+		if (copy_to_user(buffer, tmpstr, len))
			
 
				+			rc = -EFAULT;
			
 
				+		else
			
 
				+			*ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		*lenp = len;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int __proc_lnet_buffers(void *data, int write,
			
 
				+			       loff_t pos, void *buffer, int nob)
			
 
				+{
			
 
				+	char	    *s;
			
 
				+	char	    *tmpstr;
			
 
				+	int		tmpsiz;
			
 
				+	int		idx;
			
 
				+	int		len;
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+
			
 
				+	LASSERT(!write);
			
 
				+
			
 
				+	/* (4 %d) * 4 * LNET_CPT_NUMBER */
			
 
				+	tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				+
			
 
				+	s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+		      "%5s %5s %7s %7s\n",
			
 
				+		      "pages", "count", "credits", "min");
			
 
				+	LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+
			
 
				+	if (the_lnet.ln_rtrpools == NULL)
			
 
				+		goto out; /* I'm not a router */
			
 
				+
			
 
				+	for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
			
 
				+		lnet_rtrbufpool_t *rbp;
			
 
				+
			
 
				+		lnet_net_lock(LNET_LOCK_EX);
			
 
				+		cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
			
 
				+			s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+				      "%5d %5d %7d %7d\n",
			
 
				+				      rbp[idx].rbp_npages,
			
 
				+				      rbp[idx].rbp_nbuffers,
			
 
				+				      rbp[idx].rbp_credits,
			
 
				+				      rbp[idx].rbp_mincredits);
			
 
				+			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				+		}
			
 
				+		lnet_net_unlock(LNET_LOCK_EX);
			
 
				+	}
			
 
				+
			
 
				+ out:
			
 
				+	len = s - tmpstr;
			
 
				+
			
 
				+	if (pos >= min_t(int, len, strlen(tmpstr)))
			
 
				+		rc = 0;
			
 
				+	else
			
 
				+		rc = cfs_trace_copyout_string(buffer, nob,
			
 
				+					      tmpstr + pos, NULL);
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+DECLARE_PROC_HANDLER(proc_lnet_buffers);
			
 
				+
			
 
				+int LL_PROC_PROTO(proc_lnet_nis)
			
 
				+{
			
 
				+	int	tmpsiz = 128 * LNET_CPT_NUMBER;
			
 
				+	int	rc = 0;
			
 
				+	char      *tmpstr;
			
 
				+	char      *s;
			
 
				+	int	len;
			
 
				+
			
 
				+	DECLARE_LL_PROC_PPOS_DECL;
			
 
				+
			
 
				+	LASSERT (!write);
			
 
				+
			
 
				+	if (*lenp == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	LIBCFS_ALLOC(tmpstr, tmpsiz);
			
 
				+	if (tmpstr == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	s = tmpstr; /* points to current position in tmpstr[] */
			
 
				+
			
 
				+	if (*ppos == 0) {
			
 
				+		s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+			      "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
			
 
				+			      "nid", "status", "alive", "refs", "peer",
			
 
				+			      "rtr", "max", "tx", "min");
			
 
				+		LASSERT (tmpstr + tmpsiz - s > 0);
			
 
				+	} else {
			
 
				+		struct list_head	*n;
			
 
				+		lnet_ni_t	 *ni   = NULL;
			
 
				+		int		skip = *ppos - 1;
			
 
				+
			
 
				+		lnet_net_lock(0);
			
 
				+
			
 
				+		n = the_lnet.ln_nis.next;
			
 
				+
			
 
				+		while (n != &the_lnet.ln_nis) {
			
 
				+			lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
			
 
				+
			
 
				+			if (skip == 0) {
			
 
				+				ni = a_ni;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			skip--;
			
 
				+			n = n->next;
			
 
				+		}
			
 
				+
			
 
				+		if (ni != NULL) {
			
 
				+			struct lnet_tx_queue	*tq;
			
 
				+			char	*stat;
			
 
				+			long	now = cfs_time_current_sec();
			
 
				+			int	last_alive = -1;
			
 
				+			int	i;
			
 
				+			int	j;
			
 
				+
			
 
				+			if (the_lnet.ln_routing)
			
 
				+				last_alive = now - ni->ni_last_alive;
			
 
				+
			
 
				+			/* @lo forever alive */
			
 
				+			if (ni->ni_lnd->lnd_type == LOLND)
			
 
				+				last_alive = 0;
			
 
				+
			
 
				+			lnet_ni_lock(ni);
			
 
				+			LASSERT(ni->ni_status != NULL);
			
 
				+			stat = (ni->ni_status->ns_status ==
			
 
				+				LNET_NI_STATUS_UP) ? "up" : "down";
			
 
				+			lnet_ni_unlock(ni);
			
 
				+
			
 
				+			/* we actually output credits information for
			
 
				+			 * TX queue of each partition */
			
 
				+			cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
			
 
				+				for (j = 0; ni->ni_cpts != NULL &&
			
 
				+				     j < ni->ni_ncpts; j++) {
			
 
				+					if (i == ni->ni_cpts[j])
			
 
				+						break;
			
 
				+				}
			
 
				+
			
 
				+				if (j == ni->ni_ncpts)
			
 
				+					continue;
			
 
				+
			
 
				+				if (i != 0)
			
 
				+					lnet_net_lock(i);
			
 
				+
			
 
				+				s += snprintf(s, tmpstr + tmpsiz - s,
			
 
				+				      "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
			
 
				+				      libcfs_nid2str(ni->ni_nid), stat,
			
 
				+				      last_alive, *ni->ni_refs[i],
			
 
				+				      ni->ni_peertxcredits,
			
 
				+				      ni->ni_peerrtrcredits,
			
 
				+				      tq->tq_credits_max,
			
 
				+				      tq->tq_credits, tq->tq_credits_min);
			
 
				+				if (i != 0)
			
 
				+					lnet_net_unlock(i);
			
 
				+			}
			
 
				+			LASSERT(tmpstr + tmpsiz - s > 0);
			
 
				+		}
			
 
				+
			
 
				+		lnet_net_unlock(0);
			
 
				+	}
			
 
				+
			
 
				+	len = s - tmpstr;     /* how many bytes was written */
			
 
				+
			
 
				+	if (len > *lenp) {    /* linux-supplied buffer is too small */
			
 
				+		rc = -EINVAL;
			
 
				+	} else if (len > 0) { /* wrote something */
			
 
				+		if (copy_to_user(buffer, tmpstr, len))
			
 
				+			rc = -EFAULT;
			
 
				+		else
			
 
				+			*ppos += 1;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tmpstr, tmpsiz);
			
 
				+
			
 
				+	if (rc == 0)
			
 
				+		*lenp = len;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+struct lnet_portal_rotors {
			
 
				+	int	     pr_value;
			
 
				+	const char      *pr_name;
			
 
				+	const char	*pr_desc;
			
 
				+};
			
 
				+
			
 
				+static struct lnet_portal_rotors	portal_rotors[] = {
			
 
				+	{
			
 
				+		.pr_value = LNET_PTL_ROTOR_OFF,
			
 
				+		.pr_name  = "OFF",
			
 
				+		.pr_desc  = "Turn off message rotor for wildcard portals"
			
 
				+	},
			
 
				+	{
			
 
				+		.pr_value = LNET_PTL_ROTOR_ON,
			
 
				+		.pr_name  = "ON",
			
 
				+		.pr_desc  = "round-robin dispatch all PUT messages for "
			
 
				+			    "wildcard portals"
			
 
				+	},
			
 
				+	{
			
 
				+		.pr_value = LNET_PTL_ROTOR_RR_RT,
			
 
				+		.pr_name  = "RR_RT",
			
 
				+		.pr_desc  = "round-robin dispatch routed PUT message for "
			
 
				+			    "wildcard portals"
			
 
				+	},
			
 
				+	{
			
 
				+		.pr_value = LNET_PTL_ROTOR_HASH_RT,
			
 
				+		.pr_name  = "HASH_RT",
			
 
				+		.pr_desc  = "dispatch routed PUT message by hashing source "
			
 
				+			    "NID for wildcard portals"
			
 
				+	},
			
 
				+	{
			
 
				+		.pr_value = -1,
			
 
				+		.pr_name  = NULL,
			
 
				+		.pr_desc  = NULL
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				+extern int portal_rotor;
			
 
				+
			
 
				+static int __proc_lnet_portal_rotor(void *data, int write,
			
 
				+				    loff_t pos, void *buffer, int nob)
			
 
				+{
			
 
				+	const int	buf_len	= 128;
			
 
				+	char		*buf;
			
 
				+	char		*tmp;
			
 
				+	int		rc;
			
 
				+	int		i;
			
 
				+
			
 
				+	LIBCFS_ALLOC(buf, buf_len);
			
 
				+	if (buf == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (!write) {
			
 
				+		lnet_res_lock(0);
			
 
				+
			
 
				+		for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
			
 
				+			if (portal_rotors[i].pr_value == portal_rotor)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(portal_rotors[i].pr_value == portal_rotor);
			
 
				+		lnet_res_unlock(0);
			
 
				+
			
 
				+		rc = snprintf(buf, buf_len,
			
 
				+			      "{\n\tportals: all\n"
			
 
				+			      "\trotor: %s\n\tdescription: %s\n}",
			
 
				+			      portal_rotors[i].pr_name,
			
 
				+			      portal_rotors[i].pr_desc);
			
 
				+
			
 
				+		if (pos >= min_t(int, rc, buf_len)) {
			
 
				+			rc = 0;
			
 
				+		} else {
			
 
				+			rc = cfs_trace_copyout_string(buffer, nob,
			
 
				+					buf + pos, "\n");
			
 
				+		}
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
			
 
				+	if (rc < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	tmp = cfs_trimwhite(buf);
			
 
				+
			
 
				+	rc = -EINVAL;
			
 
				+	lnet_res_lock(0);
			
 
				+	for (i = 0; portal_rotors[i].pr_name != NULL; i++) {
			
 
				+		if (cfs_strncasecmp(portal_rotors[i].pr_name, tmp,
			
 
				+				    strlen(portal_rotors[i].pr_name)) == 0) {
			
 
				+			portal_rotor = portal_rotors[i].pr_value;
			
 
				+			rc = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	lnet_res_unlock(0);
			
 
				+out:
			
 
				+	LIBCFS_FREE(buf, buf_len);
			
 
				+	return rc;
			
 
				+}
			
 
				+DECLARE_PROC_HANDLER(proc_lnet_portal_rotor);
			
 
				+
			
 
				+static ctl_table_t lnet_table[] = {
			
 
				+	/*
			
 
				+	 * NB No .strategy entries have been provided since sysctl(8) prefers
			
 
				+	 * to go via /proc for portability.
			
 
				+	 */
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_STATS)
			
 
				+		.procname = "stats",
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_lnet_stats,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_ROUTES)
			
 
				+		.procname = "routes",
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_lnet_routes,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_ROUTERS)
			
 
				+		.procname = "routers",
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_lnet_routers,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_PEERS)
			
 
				+		.procname = "peers",
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_lnet_peers,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_PEERS)
			
 
				+		.procname = "buffers",
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_lnet_buffers,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_NIS)
			
 
				+		.procname = "nis",
			
 
				+		.mode     = 0444,
			
 
				+		.proc_handler = &proc_lnet_nis,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(PSDEV_LNET_PTL_ROTOR)
			
 
				+		.procname = "portal_rotor",
			
 
				+		.mode     = 0644,
			
 
				+		.proc_handler = &proc_lnet_portal_rotor,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(0)
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+static ctl_table_t top_table[] = {
			
 
				+	{
			
 
				+		INIT_CTL_NAME(CTL_LNET)
			
 
				+		.procname = "lnet",
			
 
				+		.mode     = 0555,
			
 
				+		.data     = NULL,
			
 
				+		.maxlen   = 0,
			
 
				+		.child    = lnet_table,
			
 
				+	},
			
 
				+	{
			
 
				+		INIT_CTL_NAME(0)
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void
			
 
				+lnet_proc_init(void)
			
 
				+{
			
 
				+#ifdef CONFIG_SYSCTL
			
 
				+	if (lnet_table_header == NULL)
			
 
				+		lnet_table_header = cfs_register_sysctl_table(top_table, 0);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_proc_fini(void)
			
 
				+{
			
 
				+#ifdef CONFIG_SYSCTL
			
 
				+	if (lnet_table_header != NULL)
			
 
				+		unregister_sysctl_table(lnet_table_header);
			
 
				+
			
 
				+	lnet_table_header = NULL;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+void
			
 
				+lnet_proc_init(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_proc_fini(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/selftest/Makefile
+++ b/drivers/staging/lustre/lnet/selftest/Makefile
@@ -0,0 +1,6 @@
 
				+obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
			
 
				+
			
 
				+lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
			
 
				+		   module.o ping_test.o brw_test.o
			
 
				+
			
 
				+ccflags-y := -I$(src)/../include
			
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -0,0 +1,499 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/brw_test.c
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+static int brw_srv_workitems = SFW_TEST_WI_MAX;
			
 
				+CFS_MODULE_PARM(brw_srv_workitems, "i", int, 0644, "# BRW server workitems");
			
 
				+
			
 
				+static int brw_inject_errors;
			
 
				+CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
			
 
				+		"# data errors to inject randomly, zero by default");
			
 
				+
			
 
				+static void
			
 
				+brw_client_fini (sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	srpc_bulk_t     *bulk;
			
 
				+	sfw_test_unit_t *tsu;
			
 
				+
			
 
				+	LASSERT (tsi->tsi_is_client);
			
 
				+
			
 
				+	list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
			
 
				+		bulk = tsu->tsu_private;
			
 
				+		if (bulk == NULL) continue;
			
 
				+
			
 
				+		srpc_free_bulk(bulk);
			
 
				+		tsu->tsu_private = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+brw_client_init (sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	sfw_session_t	 *sn = tsi->tsi_batch->bat_session;
			
 
				+	int		  flags;
			
 
				+	int		  npg;
			
 
				+	int		  len;
			
 
				+	int		  opc;
			
 
				+	srpc_bulk_t	 *bulk;
			
 
				+	sfw_test_unit_t	 *tsu;
			
 
				+
			
 
				+	LASSERT(sn != NULL);
			
 
				+	LASSERT(tsi->tsi_is_client);
			
 
				+
			
 
				+	if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
			
 
				+		test_bulk_req_t  *breq = &tsi->tsi_u.bulk_v0;
			
 
				+
			
 
				+		opc   = breq->blk_opc;
			
 
				+		flags = breq->blk_flags;
			
 
				+		npg   = breq->blk_npg;
			
 
				+		/* NB: this is not going to work for variable page size,
			
 
				+		 * but we have to keep it for compatibility */
			
 
				+		len   = npg * PAGE_CACHE_SIZE;
			
 
				+
			
 
				+	} else {
			
 
				+		test_bulk_req_v1_t  *breq = &tsi->tsi_u.bulk_v1;
			
 
				+
			
 
				+		/* I should never get this step if it's unknown feature
			
 
				+		 * because make_session will reject unknown feature */
			
 
				+		LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
			
 
				+
			
 
				+		opc   = breq->blk_opc;
			
 
				+		flags = breq->blk_flags;
			
 
				+		len   = breq->blk_len;
			
 
				+		npg   = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	}
			
 
				+
			
 
				+	if (npg > LNET_MAX_IOV || npg <= 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (flags != LST_BRW_CHECK_NONE &&
			
 
				+	    flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
			
 
				+		bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
			
 
				+				       npg, len, opc == LST_BRW_READ);
			
 
				+		if (bulk == NULL) {
			
 
				+			brw_client_fini(tsi);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		tsu->tsu_private = bulk;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#define BRW_POISON      0xbeefbeefbeefbeefULL
			
 
				+#define BRW_MAGIC       0xeeb0eeb1eeb2eeb3ULL
			
 
				+#define BRW_MSIZE       sizeof(__u64)
			
 
				+
			
 
				+int
			
 
				+brw_inject_one_error (void)
			
 
				+{
			
 
				+	struct timeval tv;
			
 
				+
			
 
				+	if (brw_inject_errors <= 0) return 0;
			
 
				+
			
 
				+	do_gettimeofday(&tv);
			
 
				+
			
 
				+	if ((tv.tv_usec & 1) == 0) return 0;
			
 
				+
			
 
				+	return brw_inject_errors--;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+brw_fill_page (struct page *pg, int pattern, __u64 magic)
			
 
				+{
			
 
				+	char *addr = page_address(pg);
			
 
				+	int   i;
			
 
				+
			
 
				+	LASSERT (addr != NULL);
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_NONE) return;
			
 
				+
			
 
				+	if (magic == BRW_MAGIC)
			
 
				+		magic += brw_inject_one_error();
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_SIMPLE) {
			
 
				+		memcpy(addr, &magic, BRW_MSIZE);
			
 
				+		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
			
 
				+		memcpy(addr, &magic, BRW_MSIZE);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_FULL) {
			
 
				+		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++)
			
 
				+			memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LBUG ();
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+brw_check_page (struct page *pg, int pattern, __u64 magic)
			
 
				+{
			
 
				+	char  *addr = page_address(pg);
			
 
				+	__u64  data = 0; /* make compiler happy */
			
 
				+	int    i;
			
 
				+
			
 
				+	LASSERT (addr != NULL);
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_NONE)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_SIMPLE) {
			
 
				+		data = *((__u64 *) addr);
			
 
				+		if (data != magic) goto bad_data;
			
 
				+
			
 
				+		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
			
 
				+		data = *((__u64 *) addr);
			
 
				+		if (data != magic) goto bad_data;
			
 
				+
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (pattern == LST_BRW_CHECK_FULL) {
			
 
				+		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) {
			
 
				+			data = *(((__u64 *) addr) + i);
			
 
				+			if (data != magic) goto bad_data;
			
 
				+		}
			
 
				+
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LBUG ();
			
 
				+
			
 
				+bad_data:
			
 
				+	CERROR ("Bad data in page %p: "LPX64", "LPX64" expected\n",
			
 
				+		pg, data, magic);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+brw_fill_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
			
 
				+{
			
 
				+	int	 i;
			
 
				+	struct page *pg;
			
 
				+
			
 
				+	for (i = 0; i < bk->bk_niov; i++) {
			
 
				+		pg = bk->bk_iovs[i].kiov_page;
			
 
				+		brw_fill_page(pg, pattern, magic);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+brw_check_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
			
 
				+{
			
 
				+	int	 i;
			
 
				+	struct page *pg;
			
 
				+
			
 
				+	for (i = 0; i < bk->bk_niov; i++) {
			
 
				+		pg = bk->bk_iovs[i].kiov_page;
			
 
				+		if (brw_check_page(pg, pattern, magic) != 0) {
			
 
				+			CERROR ("Bulk page %p (%d/%d) is corrupted!\n",
			
 
				+				pg, i, bk->bk_niov);
			
 
				+			return 1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+brw_client_prep_rpc (sfw_test_unit_t *tsu,
			
 
				+		     lnet_process_id_t dest, srpc_client_rpc_t **rpcpp)
			
 
				+{
			
 
				+	srpc_bulk_t	 *bulk = tsu->tsu_private;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	sfw_session_t	    *sn = tsi->tsi_batch->bat_session;
			
 
				+	srpc_client_rpc_t   *rpc;
			
 
				+	srpc_brw_reqst_t    *req;
			
 
				+	int		     flags;
			
 
				+	int		     npg;
			
 
				+	int		     len;
			
 
				+	int		     opc;
			
 
				+	int		     rc;
			
 
				+
			
 
				+	LASSERT(sn != NULL);
			
 
				+	LASSERT(bulk != NULL);
			
 
				+
			
 
				+	if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
			
 
				+		test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
			
 
				+
			
 
				+		opc   = breq->blk_opc;
			
 
				+		flags = breq->blk_flags;
			
 
				+		npg   = breq->blk_npg;
			
 
				+		len   = npg * PAGE_CACHE_SIZE;
			
 
				+
			
 
				+	} else {
			
 
				+		test_bulk_req_v1_t  *breq = &tsi->tsi_u.bulk_v1;
			
 
				+
			
 
				+		/* I should never get this step if it's unknown feature
			
 
				+		 * because make_session will reject unknown feature */
			
 
				+		LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
			
 
				+
			
 
				+		opc   = breq->blk_opc;
			
 
				+		flags = breq->blk_flags;
			
 
				+		len   = breq->blk_len;
			
 
				+		npg   = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	}
			
 
				+
			
 
				+	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg]));
			
 
				+	if (opc == LST_BRW_WRITE)
			
 
				+		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
			
 
				+	else
			
 
				+		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
			
 
				+
			
 
				+	req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
			
 
				+	req->brw_flags = flags;
			
 
				+	req->brw_rw    = opc;
			
 
				+	req->brw_len   = len;
			
 
				+
			
 
				+	*rpcpp = rpc;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+brw_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	__u64		magic = BRW_MAGIC;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	sfw_session_t       *sn = tsi->tsi_batch->bat_session;
			
 
				+	srpc_msg_t	  *msg = &rpc->crpc_replymsg;
			
 
				+	srpc_brw_reply_t    *reply = &msg->msg_body.brw_reply;
			
 
				+	srpc_brw_reqst_t    *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
			
 
				+
			
 
				+	LASSERT (sn != NULL);
			
 
				+
			
 
				+	if (rpc->crpc_status != 0) {
			
 
				+		CERROR ("BRW RPC to %s failed with %d\n",
			
 
				+			libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
			
 
				+		if (!tsi->tsi_stopping) /* rpc could have been aborted */
			
 
				+			atomic_inc(&sn->sn_brw_errors);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				+		__swab64s(&magic);
			
 
				+		__swab32s(&reply->brw_status);
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG (reply->brw_status ? D_WARNING : D_NET,
			
 
				+		"BRW RPC to %s finished with brw_status: %d\n",
			
 
				+		libcfs_id2str(rpc->crpc_dest), reply->brw_status);
			
 
				+
			
 
				+	if (reply->brw_status != 0) {
			
 
				+		atomic_inc(&sn->sn_brw_errors);
			
 
				+		rpc->crpc_status = -(int)reply->brw_status;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (reqst->brw_rw == LST_BRW_WRITE) goto out;
			
 
				+
			
 
				+	if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) {
			
 
				+		CERROR ("Bulk data from %s is corrupted!\n",
			
 
				+			libcfs_id2str(rpc->crpc_dest));
			
 
				+		atomic_inc(&sn->sn_brw_errors);
			
 
				+		rpc->crpc_status = -EBADMSG;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+brw_server_rpc_done (srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_bulk_t *blk = rpc->srpc_bulk;
			
 
				+
			
 
				+	if (blk == NULL) return;
			
 
				+
			
 
				+	if (rpc->srpc_status != 0)
			
 
				+		CERROR ("Bulk transfer %s %s has failed: %d\n",
			
 
				+			blk->bk_sink ? "from" : "to",
			
 
				+			libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
			
 
				+	else
			
 
				+		CDEBUG (D_NET, "Transfered %d pages bulk data %s %s\n",
			
 
				+			blk->bk_niov, blk->bk_sink ? "from" : "to",
			
 
				+			libcfs_id2str(rpc->srpc_peer));
			
 
				+
			
 
				+	sfw_free_pages(rpc);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+brw_bulk_ready (srpc_server_rpc_t *rpc, int status)
			
 
				+{
			
 
				+	__u64	     magic = BRW_MAGIC;
			
 
				+	srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
			
 
				+	srpc_brw_reqst_t *reqst;
			
 
				+	srpc_msg_t       *reqstmsg;
			
 
				+
			
 
				+	LASSERT (rpc->srpc_bulk != NULL);
			
 
				+	LASSERT (rpc->srpc_reqstbuf != NULL);
			
 
				+
			
 
				+	reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	reqst = &reqstmsg->msg_body.brw_reqst;
			
 
				+
			
 
				+	if (status != 0) {
			
 
				+		CERROR ("BRW bulk %s failed for RPC from %s: %d\n",
			
 
				+			reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
			
 
				+			libcfs_id2str(rpc->srpc_peer), status);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (reqst->brw_rw == LST_BRW_READ)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
			
 
				+		__swab64s(&magic);
			
 
				+
			
 
				+	if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) {
			
 
				+		CERROR ("Bulk data from %s is corrupted!\n",
			
 
				+			libcfs_id2str(rpc->srpc_peer));
			
 
				+		reply->brw_status = EBADMSG;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+brw_server_handle(struct srpc_server_rpc *rpc)
			
 
				+{
			
 
				+	struct srpc_service	*sv = rpc->srpc_scd->scd_svc;
			
 
				+	srpc_msg_t       *replymsg = &rpc->srpc_replymsg;
			
 
				+	srpc_msg_t       *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply;
			
 
				+	srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst;
			
 
				+	int		  npg;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
			
 
				+
			
 
				+	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				+		LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				+
			
 
				+		__swab32s(&reqst->brw_rw);
			
 
				+		__swab32s(&reqst->brw_len);
			
 
				+		__swab32s(&reqst->brw_flags);
			
 
				+		__swab64s(&reqst->brw_rpyid);
			
 
				+		__swab64s(&reqst->brw_bulkid);
			
 
				+	}
			
 
				+	LASSERT (reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
			
 
				+
			
 
				+	reply->brw_status = 0;
			
 
				+	rpc->srpc_done = brw_server_rpc_done;
			
 
				+
			
 
				+	if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
			
 
				+	    (reqst->brw_flags != LST_BRW_CHECK_NONE &&
			
 
				+	     reqst->brw_flags != LST_BRW_CHECK_FULL &&
			
 
				+	     reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
			
 
				+		reply->brw_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		replymsg->msg_ses_feats = LST_FEATS_MASK;
			
 
				+		reply->brw_status = EPROTO;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if ((reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) {
			
 
				+		/* compat with old version */
			
 
				+		if ((reqst->brw_len & ~CFS_PAGE_MASK) != 0) {
			
 
				+			reply->brw_status = EINVAL;
			
 
				+			return 0;
			
 
				+		}
			
 
				+		npg = reqst->brw_len >> PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+	} else {
			
 
				+		npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	}
			
 
				+
			
 
				+	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
			
 
				+
			
 
				+	if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) {
			
 
				+		reply->brw_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
			
 
				+			     reqst->brw_len,
			
 
				+			     reqst->brw_rw == LST_BRW_WRITE);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (reqst->brw_rw == LST_BRW_READ)
			
 
				+		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
			
 
				+	else
			
 
				+		brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+sfw_test_client_ops_t brw_test_client;
			
 
				+void brw_init_test_client(void)
			
 
				+{
			
 
				+	brw_test_client.tso_init       = brw_client_init;
			
 
				+	brw_test_client.tso_fini       = brw_client_fini;
			
 
				+	brw_test_client.tso_prep_rpc   = brw_client_prep_rpc;
			
 
				+	brw_test_client.tso_done_rpc   = brw_client_done_rpc;
			
 
				+};
			
 
				+
			
 
				+srpc_service_t brw_test_service;
			
 
				+void brw_init_test_service(void)
			
 
				+{
			
 
				+
			
 
				+	brw_test_service.sv_id	 = SRPC_SERVICE_BRW;
			
 
				+	brw_test_service.sv_name       = "brw_test";
			
 
				+	brw_test_service.sv_handler    = brw_server_handle;
			
 
				+	brw_test_service.sv_bulk_ready = brw_bulk_ready;
			
 
				+	brw_test_service.sv_wi_total   = brw_srv_workitems;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -0,0 +1,931 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/conctl.c
			
 
				+ *
			
 
				+ * IOC handle in kernel
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include <linux/lnet/lnetst.h>
			
 
				+#include "console.h"
			
 
				+
			
 
				+int
			
 
				+lst_session_new_ioctl(lstio_session_new_args_t *args)
			
 
				+{
			
 
				+	char      *name;
			
 
				+	int	rc;
			
 
				+
			
 
				+	if (args->lstio_ses_idp   == NULL || /* address for output sid */
			
 
				+	    args->lstio_ses_key   == 0 || /* no key is specified */
			
 
				+	    args->lstio_ses_namep == NULL || /* session name */
			
 
				+	    args->lstio_ses_nmlen <= 0 ||
			
 
				+	    args->lstio_ses_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_ses_namep,
			
 
				+			       args->lstio_ses_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_ses_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_session_new(name,
			
 
				+				args->lstio_ses_key,
			
 
				+				args->lstio_ses_feats,
			
 
				+				args->lstio_ses_force,
			
 
				+				args->lstio_ses_timeout,
			
 
				+				args->lstio_ses_idp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_session_end_ioctl(lstio_session_end_args_t *args)
			
 
				+{
			
 
				+	if (args->lstio_ses_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	return lstcon_session_end();
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_session_info_ioctl(lstio_session_info_args_t *args)
			
 
				+{
			
 
				+	/* no checking of key */
			
 
				+
			
 
				+	if (args->lstio_ses_idp   == NULL || /* address for ouput sid */
			
 
				+	    args->lstio_ses_keyp  == NULL || /* address for ouput key */
			
 
				+	    args->lstio_ses_featp  == NULL || /* address for ouput features */
			
 
				+	    args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */
			
 
				+	    args->lstio_ses_namep == NULL || /* address for ouput name */
			
 
				+	    args->lstio_ses_nmlen <= 0 ||
			
 
				+	    args->lstio_ses_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return lstcon_session_info(args->lstio_ses_idp,
			
 
				+				   args->lstio_ses_keyp,
			
 
				+				   args->lstio_ses_featp,
			
 
				+				   args->lstio_ses_ndinfo,
			
 
				+				   args->lstio_ses_namep,
			
 
				+				   args->lstio_ses_nmlen);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_debug_ioctl(lstio_debug_args_t *args)
			
 
				+{
			
 
				+	char   *name   = NULL;
			
 
				+	int     client = 1;
			
 
				+	int     rc;
			
 
				+
			
 
				+	if (args->lstio_dbg_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_dbg_resultp == NULL)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_dbg_namep != NULL && /* name of batch/group */
			
 
				+	    (args->lstio_dbg_nmlen <= 0 ||
			
 
				+	     args->lstio_dbg_nmlen > LST_NAME_SIZE))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_dbg_namep != NULL) {
			
 
				+		LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1);
			
 
				+		if (name == NULL)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		if (copy_from_user(name, args->lstio_dbg_namep,
			
 
				+				       args->lstio_dbg_nmlen)) {
			
 
				+			LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
			
 
				+
			
 
				+			return -EFAULT;
			
 
				+		}
			
 
				+
			
 
				+		name[args->lstio_dbg_nmlen] = 0;
			
 
				+	}
			
 
				+
			
 
				+	rc = -EINVAL;
			
 
				+
			
 
				+	switch (args->lstio_dbg_type) {
			
 
				+	case LST_OPC_SESSION:
			
 
				+		rc = lstcon_session_debug(args->lstio_dbg_timeout,
			
 
				+					  args->lstio_dbg_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_OPC_BATCHSRV:
			
 
				+		client = 0;
			
 
				+	case LST_OPC_BATCHCLI:
			
 
				+		if (name == NULL)
			
 
				+			goto out;
			
 
				+
			
 
				+		rc = lstcon_batch_debug(args->lstio_dbg_timeout,
			
 
				+					name, client, args->lstio_dbg_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_OPC_GROUP:
			
 
				+		if (name == NULL)
			
 
				+			goto out;
			
 
				+
			
 
				+		rc = lstcon_group_debug(args->lstio_dbg_timeout,
			
 
				+					name, args->lstio_dbg_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_OPC_NODES:
			
 
				+		if (args->lstio_dbg_count <= 0 ||
			
 
				+		    args->lstio_dbg_idsp == NULL)
			
 
				+			goto out;
			
 
				+
			
 
				+		rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
			
 
				+					args->lstio_dbg_count,
			
 
				+					args->lstio_dbg_idsp,
			
 
				+					args->lstio_dbg_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	if (name != NULL)
			
 
				+		LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_group_add_ioctl(lstio_group_add_args_t *args)
			
 
				+{
			
 
				+	char	   *name;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_namep == NULL||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_grp_namep,
			
 
				+			       args->lstio_grp_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_grp_nmlen);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_grp_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_group_add(name);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_group_del_ioctl(lstio_group_del_args_t *args)
			
 
				+{
			
 
				+	int     rc;
			
 
				+	char   *name;
			
 
				+
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_namep == NULL ||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_grp_namep,
			
 
				+			       args->lstio_grp_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_grp_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_group_del(name);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_group_update_ioctl(lstio_group_update_args_t *args)
			
 
				+{
			
 
				+	int     rc;
			
 
				+	char   *name;
			
 
				+
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_resultp == NULL ||
			
 
				+	    args->lstio_grp_namep == NULL ||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			   args->lstio_grp_namep,
			
 
				+			   args->lstio_grp_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_grp_nmlen] = 0;
			
 
				+
			
 
				+	switch (args->lstio_grp_opc) {
			
 
				+	case LST_GROUP_CLEAN:
			
 
				+		rc = lstcon_group_clean(name, args->lstio_grp_args);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_GROUP_REFRESH:
			
 
				+		rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_GROUP_RMND:
			
 
				+		if (args->lstio_grp_count  <= 0 ||
			
 
				+		    args->lstio_grp_idsp == NULL) {
			
 
				+			rc = -EINVAL;
			
 
				+			break;
			
 
				+		}
			
 
				+		rc = lstcon_nodes_remove(name, args->lstio_grp_count,
			
 
				+					 args->lstio_grp_idsp,
			
 
				+					 args->lstio_grp_resultp);
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		rc = -EINVAL;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
			
 
				+{
			
 
				+	unsigned feats;
			
 
				+	int     rc;
			
 
				+	char   *name;
			
 
				+
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_idsp == NULL || /* array of ids */
			
 
				+	    args->lstio_grp_count <= 0 ||
			
 
				+	    args->lstio_grp_resultp == NULL ||
			
 
				+	    args->lstio_grp_featp == NULL ||
			
 
				+	    args->lstio_grp_namep == NULL ||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name, args->lstio_grp_namep,
			
 
				+			       args->lstio_grp_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_grp_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_nodes_add(name, args->lstio_grp_count,
			
 
				+			      args->lstio_grp_idsp, &feats,
			
 
				+			      args->lstio_grp_resultp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (rc == 0 &&
			
 
				+	    copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_group_list_ioctl(lstio_group_list_args_t *args)
			
 
				+{
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_idx   < 0 ||
			
 
				+	    args->lstio_grp_namep == NULL ||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return lstcon_group_list(args->lstio_grp_idx,
			
 
				+			      args->lstio_grp_nmlen,
			
 
				+			      args->lstio_grp_namep);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_group_info_ioctl(lstio_group_info_args_t *args)
			
 
				+{
			
 
				+	char	   *name;
			
 
				+	int	     ndent;
			
 
				+	int	     index;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	if (args->lstio_grp_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_grp_namep == NULL ||
			
 
				+	    args->lstio_grp_nmlen <= 0 ||
			
 
				+	    args->lstio_grp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_grp_entp  == NULL && /* output: group entry */
			
 
				+	    args->lstio_grp_dentsp == NULL)  /* output: node entry */
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_grp_dentsp != NULL) { /* have node entry */
			
 
				+		if (args->lstio_grp_idxp == NULL || /* node index */
			
 
				+		    args->lstio_grp_ndentp == NULL) /* # of node entry */
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (copy_from_user(&ndent, args->lstio_grp_ndentp,
			
 
				+				       sizeof(ndent)) ||
			
 
				+		    copy_from_user(&index, args->lstio_grp_idxp,
			
 
				+				       sizeof(index)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		if (ndent <= 0 || index < 0)
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_grp_namep,
			
 
				+			       args->lstio_grp_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_grp_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_group_info(name, args->lstio_grp_entp,
			
 
				+			       &index, &ndent, args->lstio_grp_dentsp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (args->lstio_grp_dentsp != NULL &&
			
 
				+	    (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
			
 
				+	     copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
			
 
				+		rc = -EFAULT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_add_ioctl(lstio_batch_add_args_t *args)
			
 
				+{
			
 
				+	int	     rc;
			
 
				+	char	   *name;
			
 
				+
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_namep == NULL ||
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_bat_namep,
			
 
				+			       args->lstio_bat_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_bat_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_batch_add(name);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_run_ioctl(lstio_batch_run_args_t *args)
			
 
				+{
			
 
				+	int	     rc;
			
 
				+	char	   *name;
			
 
				+
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_namep == NULL ||
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_bat_namep,
			
 
				+			       args->lstio_bat_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_bat_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_batch_run(name, args->lstio_bat_timeout,
			
 
				+			      args->lstio_bat_resultp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_stop_ioctl(lstio_batch_stop_args_t *args)
			
 
				+{
			
 
				+	int	     rc;
			
 
				+	char	   *name;
			
 
				+
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_resultp == NULL ||
			
 
				+	    args->lstio_bat_namep == NULL ||
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_bat_namep,
			
 
				+			       args->lstio_bat_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_bat_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_batch_stop(name, args->lstio_bat_force,
			
 
				+			       args->lstio_bat_resultp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_query_ioctl(lstio_batch_query_args_t *args)
			
 
				+{
			
 
				+	char   *name;
			
 
				+	int     rc;
			
 
				+
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_resultp == NULL ||
			
 
				+	    args->lstio_bat_namep == NULL ||
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_bat_testidx < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_bat_namep,
			
 
				+			       args->lstio_bat_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_bat_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_test_batch_query(name,
			
 
				+				     args->lstio_bat_testidx,
			
 
				+				     args->lstio_bat_client,
			
 
				+				     args->lstio_bat_timeout,
			
 
				+				     args->lstio_bat_resultp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_list_ioctl(lstio_batch_list_args_t *args)
			
 
				+{
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_idx   < 0 ||
			
 
				+	    args->lstio_bat_namep == NULL ||
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return lstcon_batch_list(args->lstio_bat_idx,
			
 
				+			      args->lstio_bat_nmlen,
			
 
				+			      args->lstio_bat_namep);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_batch_info_ioctl(lstio_batch_info_args_t *args)
			
 
				+{
			
 
				+	char	   *name;
			
 
				+	int	     rc;
			
 
				+	int	     index;
			
 
				+	int	     ndent;
			
 
				+
			
 
				+	if (args->lstio_bat_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_bat_namep == NULL || /* batch name */
			
 
				+	    args->lstio_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_bat_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_bat_entp == NULL && /* output: batch entry */
			
 
				+	    args->lstio_bat_dentsp == NULL) /* output: node entry */
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_bat_dentsp != NULL) { /* have node entry */
			
 
				+		if (args->lstio_bat_idxp == NULL || /* node index */
			
 
				+		    args->lstio_bat_ndentp == NULL) /* # of node entry */
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (copy_from_user(&index, args->lstio_bat_idxp,
			
 
				+				       sizeof(index)) ||
			
 
				+		    copy_from_user(&ndent, args->lstio_bat_ndentp,
			
 
				+				       sizeof(ndent)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		if (ndent <= 0 || index < 0)
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name,
			
 
				+			       args->lstio_bat_namep, args->lstio_bat_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	name[args->lstio_bat_nmlen] = 0;
			
 
				+
			
 
				+	rc = lstcon_batch_info(name,
			
 
				+			    args->lstio_bat_entp, args->lstio_bat_server,
			
 
				+			    args->lstio_bat_testidx, &index, &ndent,
			
 
				+			    args->lstio_bat_dentsp);
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (args->lstio_bat_dentsp != NULL &&
			
 
				+	    (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
			
 
				+	     copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
			
 
				+		rc = -EFAULT;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lst_stat_query_ioctl(lstio_stat_args_t *args)
			
 
				+{
			
 
				+	int	     rc;
			
 
				+	char	   *name;
			
 
				+
			
 
				+	/* TODO: not finished */
			
 
				+	if (args->lstio_sta_key != console_session.ses_key)
			
 
				+		return -EACCES;
			
 
				+
			
 
				+	if (args->lstio_sta_resultp == NULL ||
			
 
				+	    (args->lstio_sta_namep  == NULL &&
			
 
				+	     args->lstio_sta_idsp   == NULL) ||
			
 
				+	    args->lstio_sta_nmlen <= 0 ||
			
 
				+	    args->lstio_sta_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_sta_idsp != NULL &&
			
 
				+	    args->lstio_sta_count <= 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (copy_from_user(name, args->lstio_sta_namep,
			
 
				+			       args->lstio_sta_nmlen)) {
			
 
				+		LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	if (args->lstio_sta_idsp == NULL) {
			
 
				+		rc = lstcon_group_stat(name, args->lstio_sta_timeout,
			
 
				+				       args->lstio_sta_resultp);
			
 
				+	} else {
			
 
				+		rc = lstcon_nodes_stat(args->lstio_sta_count,
			
 
				+				       args->lstio_sta_idsp,
			
 
				+				       args->lstio_sta_timeout,
			
 
				+				       args->lstio_sta_resultp);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int lst_test_add_ioctl(lstio_test_args_t *args)
			
 
				+{
			
 
				+	char	   *name;
			
 
				+	char	   *srcgrp = NULL;
			
 
				+	char	   *dstgrp = NULL;
			
 
				+	void	   *param = NULL;
			
 
				+	int	     ret = 0;
			
 
				+	int	     rc = -ENOMEM;
			
 
				+
			
 
				+	if (args->lstio_tes_resultp == NULL ||
			
 
				+	    args->lstio_tes_retp == NULL ||
			
 
				+	    args->lstio_tes_bat_name == NULL || /* no specified batch */
			
 
				+	    args->lstio_tes_bat_nmlen <= 0 ||
			
 
				+	    args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
			
 
				+	    args->lstio_tes_sgrp_name == NULL || /* no source group */
			
 
				+	    args->lstio_tes_sgrp_nmlen <= 0 ||
			
 
				+	    args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
			
 
				+	    args->lstio_tes_dgrp_name == NULL || /* no target group */
			
 
				+	    args->lstio_tes_dgrp_nmlen <= 0 ||
			
 
				+	    args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (args->lstio_tes_loop == 0 || /* negative is infinite */
			
 
				+	    args->lstio_tes_concur <= 0 ||
			
 
				+	    args->lstio_tes_dist <= 0 ||
			
 
				+	    args->lstio_tes_span <= 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* have parameter, check if parameter length is valid */
			
 
				+	if (args->lstio_tes_param != NULL &&
			
 
				+	    (args->lstio_tes_param_len <= 0 ||
			
 
				+	     args->lstio_tes_param_len > PAGE_CACHE_SIZE - sizeof(lstcon_test_t)))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(name, args->lstio_tes_bat_nmlen + 1);
			
 
				+	if (name == NULL)
			
 
				+		return rc;
			
 
				+
			
 
				+	LIBCFS_ALLOC(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
			
 
				+	if (srcgrp == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	LIBCFS_ALLOC(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
			
 
				+	 if (dstgrp == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (args->lstio_tes_param != NULL) {
			
 
				+		LIBCFS_ALLOC(param, args->lstio_tes_param_len);
			
 
				+		if (param == NULL)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = -EFAULT;
			
 
				+	if (copy_from_user(name,
			
 
				+			      args->lstio_tes_bat_name,
			
 
				+			      args->lstio_tes_bat_nmlen) ||
			
 
				+	    copy_from_user(srcgrp,
			
 
				+			      args->lstio_tes_sgrp_name,
			
 
				+			      args->lstio_tes_sgrp_nmlen) ||
			
 
				+	    copy_from_user(dstgrp,
			
 
				+			      args->lstio_tes_dgrp_name,
			
 
				+			      args->lstio_tes_dgrp_nmlen) ||
			
 
				+	    copy_from_user(param, args->lstio_tes_param,
			
 
				+			      args->lstio_tes_param_len))
			
 
				+		goto out;
			
 
				+
			
 
				+	rc = lstcon_test_add(name,
			
 
				+			    args->lstio_tes_type,
			
 
				+			    args->lstio_tes_loop,
			
 
				+			    args->lstio_tes_concur,
			
 
				+			    args->lstio_tes_dist, args->lstio_tes_span,
			
 
				+			    srcgrp, dstgrp, param, args->lstio_tes_param_len,
			
 
				+			    &ret, args->lstio_tes_resultp);
			
 
				+
			
 
				+	if (ret != 0)
			
 
				+		rc = (copy_to_user(args->lstio_tes_retp, &ret,
			
 
				+				       sizeof(ret))) ? -EFAULT : 0;
			
 
				+out:
			
 
				+	if (name != NULL)
			
 
				+		LIBCFS_FREE(name, args->lstio_tes_bat_nmlen + 1);
			
 
				+
			
 
				+	if (srcgrp != NULL)
			
 
				+		LIBCFS_FREE(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
			
 
				+
			
 
				+	if (dstgrp != NULL)
			
 
				+		LIBCFS_FREE(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
			
 
				+
			
 
				+	if (param != NULL)
			
 
				+		LIBCFS_FREE(param, args->lstio_tes_param_len);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data)
			
 
				+{
			
 
				+	char   *buf;
			
 
				+	int     opc = data->ioc_u32[0];
			
 
				+	int     rc;
			
 
				+
			
 
				+	if (cmd != IOC_LIBCFS_LNETST)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (data->ioc_plen1 > PAGE_CACHE_SIZE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	LIBCFS_ALLOC(buf, data->ioc_plen1);
			
 
				+	if (buf == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	/* copy in parameter */
			
 
				+	if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
			
 
				+		LIBCFS_FREE(buf, data->ioc_plen1);
			
 
				+		return -EFAULT;
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock(&console_session.ses_mutex);
			
 
				+
			
 
				+	console_session.ses_laststamp = cfs_time_current_sec();
			
 
				+
			
 
				+	if (console_session.ses_shutdown) {
			
 
				+		rc = -ESHUTDOWN;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (console_session.ses_expired)
			
 
				+		lstcon_session_end();
			
 
				+
			
 
				+	if (opc != LSTIO_SESSION_NEW &&
			
 
				+	    console_session.ses_state == LST_SESSION_NONE) {
			
 
				+		CDEBUG(D_NET, "LST no active session\n");
			
 
				+		rc = -ESRCH;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	memset(&console_session.ses_trans_stat, 0, sizeof(lstcon_trans_stat_t));
			
 
				+
			
 
				+	switch (opc) {
			
 
				+		case LSTIO_SESSION_NEW:
			
 
				+			rc = lst_session_new_ioctl((lstio_session_new_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_SESSION_END:
			
 
				+			rc = lst_session_end_ioctl((lstio_session_end_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_SESSION_INFO:
			
 
				+			rc = lst_session_info_ioctl((lstio_session_info_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_DEBUG:
			
 
				+			rc = lst_debug_ioctl((lstio_debug_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_GROUP_ADD:
			
 
				+			rc = lst_group_add_ioctl((lstio_group_add_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_GROUP_DEL:
			
 
				+			rc = lst_group_del_ioctl((lstio_group_del_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_GROUP_UPDATE:
			
 
				+			rc = lst_group_update_ioctl((lstio_group_update_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_NODES_ADD:
			
 
				+			rc = lst_nodes_add_ioctl((lstio_group_nodes_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_GROUP_LIST:
			
 
				+			rc = lst_group_list_ioctl((lstio_group_list_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_GROUP_INFO:
			
 
				+			rc = lst_group_info_ioctl((lstio_group_info_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_ADD:
			
 
				+			rc = lst_batch_add_ioctl((lstio_batch_add_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_START:
			
 
				+			rc = lst_batch_run_ioctl((lstio_batch_run_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_STOP:
			
 
				+			rc = lst_batch_stop_ioctl((lstio_batch_stop_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_QUERY:
			
 
				+			rc = lst_batch_query_ioctl((lstio_batch_query_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_LIST:
			
 
				+			rc = lst_batch_list_ioctl((lstio_batch_list_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_BATCH_INFO:
			
 
				+			rc = lst_batch_info_ioctl((lstio_batch_info_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_TEST_ADD:
			
 
				+			rc = lst_test_add_ioctl((lstio_test_args_t *)buf);
			
 
				+			break;
			
 
				+		case LSTIO_STAT_QUERY:
			
 
				+			rc = lst_stat_query_ioctl((lstio_stat_args_t *)buf);
			
 
				+			break;
			
 
				+		default:
			
 
				+			rc = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
			
 
				+			     sizeof(lstcon_trans_stat_t)))
			
 
				+		rc = -EFAULT;
			
 
				+out:
			
 
				+	mutex_unlock(&console_session.ses_mutex);
			
 
				+
			
 
				+	LIBCFS_FREE(buf, data->ioc_plen1);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+EXPORT_SYMBOL(lstcon_ioctl_entry);
			
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -0,0 +1,1397 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/conctl.c
			
 
				+ *
			
 
				+ * Console framework rpcs
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liang@whamcloud.com>
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include "timer.h"
			
 
				+#include "conrpc.h"
			
 
				+#include "console.h"
			
 
				+
			
 
				+void lstcon_rpc_stat_reply(lstcon_rpc_trans_t *, srpc_msg_t *,
			
 
				+			   lstcon_node_t *, lstcon_trans_stat_t *);
			
 
				+
			
 
				+static void
			
 
				+lstcon_rpc_done(srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv;
			
 
				+
			
 
				+	LASSERT(crpc != NULL && rpc == crpc->crp_rpc);
			
 
				+	LASSERT(crpc->crp_posted && !crpc->crp_finished);
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+	if (crpc->crp_trans == NULL) {
			
 
				+		/* Orphan RPC is not in any transaction,
			
 
				+		 * I'm just a poor body and nobody loves me */
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+		/* release it */
			
 
				+		lstcon_rpc_put(crpc);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* not an orphan RPC */
			
 
				+	crpc->crp_finished = 1;
			
 
				+
			
 
				+	if (crpc->crp_stamp == 0) {
			
 
				+		/* not aborted */
			
 
				+		LASSERT (crpc->crp_status == 0);
			
 
				+
			
 
				+		crpc->crp_stamp  = cfs_time_current();
			
 
				+		crpc->crp_status = rpc->crpc_status;
			
 
				+	}
			
 
				+
			
 
				+	/* wakeup (transaction)thread if I'm the last RPC in the transaction */
			
 
				+	if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
			
 
				+		wake_up(&crpc->crp_trans->tas_waitq);
			
 
				+
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats,
			
 
				+		int bulk_npg, int bulk_len, int embedded, lstcon_rpc_t *crpc)
			
 
				+{
			
 
				+	crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
			
 
				+				       feats, bulk_npg, bulk_len,
			
 
				+				       lstcon_rpc_done, (void *)crpc);
			
 
				+	if (crpc->crp_rpc == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	crpc->crp_trans    = NULL;
			
 
				+	crpc->crp_node     = nd;
			
 
				+	crpc->crp_posted   = 0;
			
 
				+	crpc->crp_finished = 0;
			
 
				+	crpc->crp_unpacked = 0;
			
 
				+	crpc->crp_status   = 0;
			
 
				+	crpc->crp_stamp    = 0;
			
 
				+	crpc->crp_embedded = embedded;
			
 
				+	INIT_LIST_HEAD(&crpc->crp_link);
			
 
				+
			
 
				+	atomic_inc(&console_session.ses_rpc_counter);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats,
			
 
				+		int bulk_npg, int bulk_len, lstcon_rpc_t **crpcpp)
			
 
				+{
			
 
				+	lstcon_rpc_t  *crpc = NULL;
			
 
				+	int	    rc;
			
 
				+
			
 
				+	spin_lock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	if (!list_empty(&console_session.ses_rpc_freelist)) {
			
 
				+		crpc = list_entry(console_session.ses_rpc_freelist.next,
			
 
				+				      lstcon_rpc_t, crp_link);
			
 
				+		list_del_init(&crpc->crp_link);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	if (crpc == NULL) {
			
 
				+		LIBCFS_ALLOC(crpc, sizeof(*crpc));
			
 
				+		if (crpc == NULL)
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
			
 
				+	if (rc == 0) {
			
 
				+		*crpcpp = crpc;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(crpc, sizeof(*crpc));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_put(lstcon_rpc_t *crpc)
			
 
				+{
			
 
				+	srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk;
			
 
				+	int	  i;
			
 
				+
			
 
				+	LASSERT (list_empty(&crpc->crp_link));
			
 
				+
			
 
				+	for (i = 0; i < bulk->bk_niov; i++) {
			
 
				+		if (bulk->bk_iovs[i].kiov_page == NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		__free_page(bulk->bk_iovs[i].kiov_page);
			
 
				+	}
			
 
				+
			
 
				+	srpc_client_rpc_decref(crpc->crp_rpc);
			
 
				+
			
 
				+	if (crpc->crp_embedded) {
			
 
				+		/* embedded RPC, don't recycle it */
			
 
				+		memset(crpc, 0, sizeof(*crpc));
			
 
				+		crpc->crp_embedded = 1;
			
 
				+
			
 
				+	} else {
			
 
				+		spin_lock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+		list_add(&crpc->crp_link,
			
 
				+			     &console_session.ses_rpc_freelist);
			
 
				+
			
 
				+		spin_unlock(&console_session.ses_rpc_lock);
			
 
				+	}
			
 
				+
			
 
				+	/* RPC is not alive now */
			
 
				+	atomic_dec(&console_session.ses_rpc_counter);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_post(lstcon_rpc_t *crpc)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans = crpc->crp_trans;
			
 
				+
			
 
				+	LASSERT (trans != NULL);
			
 
				+
			
 
				+	atomic_inc(&trans->tas_remaining);
			
 
				+	crpc->crp_posted = 1;
			
 
				+
			
 
				+	sfw_post_rpc(crpc->crp_rpc);
			
 
				+}
			
 
				+
			
 
				+static char *
			
 
				+lstcon_rpc_trans_name(int transop)
			
 
				+{
			
 
				+	if (transop == LST_TRANS_SESNEW)
			
 
				+		return "SESNEW";
			
 
				+
			
 
				+	if (transop == LST_TRANS_SESEND)
			
 
				+		return "SESEND";
			
 
				+
			
 
				+	if (transop == LST_TRANS_SESQRY)
			
 
				+		return "SESQRY";
			
 
				+
			
 
				+	if (transop == LST_TRANS_SESPING)
			
 
				+		return "SESPING";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBCLIADD)
			
 
				+		return "TSBCLIADD";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBSRVADD)
			
 
				+		return "TSBSRVADD";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBRUN)
			
 
				+		return "TSBRUN";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBSTOP)
			
 
				+		return "TSBSTOP";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBCLIQRY)
			
 
				+		return "TSBCLIQRY";
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBSRVQRY)
			
 
				+		return "TSBSRVQRY";
			
 
				+
			
 
				+	if (transop == LST_TRANS_STATQRY)
			
 
				+		return "STATQRY";
			
 
				+
			
 
				+	return "Unknown";
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_trans_prep(struct list_head *translist,
			
 
				+		      int transop, lstcon_rpc_trans_t **transpp)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+
			
 
				+	if (translist != NULL) {
			
 
				+		list_for_each_entry(trans, translist, tas_link) {
			
 
				+			/* Can't enqueue two private transaction on
			
 
				+			 * the same object */
			
 
				+			if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
			
 
				+				return -EPERM;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* create a trans group */
			
 
				+	LIBCFS_ALLOC(trans, sizeof(*trans));
			
 
				+	if (trans == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	trans->tas_opc = transop;
			
 
				+
			
 
				+	if (translist == NULL)
			
 
				+		INIT_LIST_HEAD(&trans->tas_olink);
			
 
				+	else
			
 
				+		list_add_tail(&trans->tas_olink, translist);
			
 
				+
			
 
				+	list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&trans->tas_rpcs_list);
			
 
				+	atomic_set(&trans->tas_remaining, 0);
			
 
				+	init_waitqueue_head(&trans->tas_waitq);
			
 
				+
			
 
				+	spin_lock(&console_session.ses_rpc_lock);
			
 
				+	trans->tas_features = console_session.ses_features;
			
 
				+	spin_unlock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	*transpp = trans;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc)
			
 
				+{
			
 
				+	list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
			
 
				+	crpc->crp_trans = trans;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc;
			
 
				+	lstcon_rpc_t      *crpc;
			
 
				+	lstcon_node_t     *nd;
			
 
				+
			
 
				+	list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				+		rpc = crpc->crp_rpc;
			
 
				+
			
 
				+		spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+		if (!crpc->crp_posted || /* not posted */
			
 
				+		    crpc->crp_stamp != 0) { /* rpc done or aborted already */
			
 
				+			if (crpc->crp_stamp == 0) {
			
 
				+				crpc->crp_stamp = cfs_time_current();
			
 
				+				crpc->crp_status = -EINTR;
			
 
				+			}
			
 
				+			spin_unlock(&rpc->crpc_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		crpc->crp_stamp  = cfs_time_current();
			
 
				+		crpc->crp_status = error;
			
 
				+
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+		sfw_abort_rpc(rpc);
			
 
				+
			
 
				+		if  (error != ETIMEDOUT)
			
 
				+			continue;
			
 
				+
			
 
				+		nd = crpc->crp_node;
			
 
				+		if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
			
 
				+			continue;
			
 
				+
			
 
				+		nd->nd_stamp = crpc->crp_stamp;
			
 
				+		nd->nd_state = LST_NODE_DOWN;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans)
			
 
				+{
			
 
				+	if (console_session.ses_shutdown &&
			
 
				+	    !list_empty(&trans->tas_olink)) /* Not an end session RPC */
			
 
				+		return 1;
			
 
				+
			
 
				+	return (atomic_read(&trans->tas_remaining) == 0) ? 1: 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout)
			
 
				+{
			
 
				+	lstcon_rpc_t  *crpc;
			
 
				+	int	    rc;
			
 
				+
			
 
				+	if (list_empty(&trans->tas_rpcs_list))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (timeout < LST_TRANS_MIN_TIMEOUT)
			
 
				+		timeout = LST_TRANS_MIN_TIMEOUT;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Transaction %s started\n",
			
 
				+	       lstcon_rpc_trans_name(trans->tas_opc));
			
 
				+
			
 
				+	/* post all requests */
			
 
				+	list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				+		LASSERT (!crpc->crp_posted);
			
 
				+
			
 
				+		lstcon_rpc_post(crpc);
			
 
				+	}
			
 
				+
			
 
				+	mutex_unlock(&console_session.ses_mutex);
			
 
				+
			
 
				+	rc = wait_event_interruptible_timeout(trans->tas_waitq,
			
 
				+					      lstcon_rpc_trans_check(trans),
			
 
				+					      cfs_time_seconds(timeout));
			
 
				+	rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
			
 
				+
			
 
				+	mutex_lock(&console_session.ses_mutex);
			
 
				+
			
 
				+	if (console_session.ses_shutdown)
			
 
				+		rc = -ESHUTDOWN;
			
 
				+
			
 
				+	if (rc != 0 || atomic_read(&trans->tas_remaining) != 0) {
			
 
				+		/* treat short timeout as canceled */
			
 
				+		if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
			
 
				+			rc = -EINTR;
			
 
				+
			
 
				+		lstcon_rpc_trans_abort(trans, rc);
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "Transaction %s stopped: %d\n",
			
 
				+	       lstcon_rpc_trans_name(trans->tas_opc), rc);
			
 
				+
			
 
				+	lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp)
			
 
				+{
			
 
				+	lstcon_node_t	*nd  = crpc->crp_node;
			
 
				+	srpc_client_rpc_t    *rpc = crpc->crp_rpc;
			
 
				+	srpc_generic_reply_t *rep;
			
 
				+
			
 
				+	LASSERT (nd != NULL && rpc != NULL);
			
 
				+	LASSERT (crpc->crp_stamp != 0);
			
 
				+
			
 
				+	if (crpc->crp_status != 0) {
			
 
				+		*msgpp = NULL;
			
 
				+		return crpc->crp_status;
			
 
				+	}
			
 
				+
			
 
				+	*msgpp = &rpc->crpc_replymsg;
			
 
				+	if (!crpc->crp_unpacked) {
			
 
				+		sfw_unpack_message(*msgpp);
			
 
				+		crpc->crp_unpacked = 1;
			
 
				+	}
			
 
				+
			
 
				+	if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
			
 
				+		return 0;
			
 
				+
			
 
				+	nd->nd_stamp = crpc->crp_stamp;
			
 
				+	rep = &(*msgpp)->msg_body.reply;
			
 
				+
			
 
				+	if (rep->sid.ses_nid == LNET_NID_ANY)
			
 
				+		nd->nd_state = LST_NODE_UNKNOWN;
			
 
				+	else if (lstcon_session_match(rep->sid))
			
 
				+		nd->nd_state = LST_NODE_ACTIVE;
			
 
				+	else
			
 
				+		nd->nd_state = LST_NODE_BUSY;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat)
			
 
				+{
			
 
				+	lstcon_rpc_t      *crpc;
			
 
				+	srpc_msg_t	*rep;
			
 
				+	int		error;
			
 
				+
			
 
				+	LASSERT (stat != NULL);
			
 
				+
			
 
				+	memset(stat, 0, sizeof(*stat));
			
 
				+
			
 
				+	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				+		lstcon_rpc_stat_total(stat, 1);
			
 
				+
			
 
				+		LASSERT (crpc->crp_stamp != 0);
			
 
				+
			
 
				+		error = lstcon_rpc_get_reply(crpc, &rep);
			
 
				+		if (error != 0) {
			
 
				+			lstcon_rpc_stat_failure(stat, 1);
			
 
				+			if (stat->trs_rpc_errno == 0)
			
 
				+				stat->trs_rpc_errno = -error;
			
 
				+
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_rpc_stat_success(stat, 1);
			
 
				+
			
 
				+		lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
			
 
				+	}
			
 
				+
			
 
				+	if (trans->tas_opc == LST_TRANS_SESNEW && stat->trs_fwk_errno == 0) {
			
 
				+		stat->trs_fwk_errno =
			
 
				+		      lstcon_session_feats_check(trans->tas_features);
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, "
			
 
				+		      "RPC error(%d), Framework error(%d)\n",
			
 
				+	       lstcon_rpc_trans_name(trans->tas_opc),
			
 
				+	       lstcon_rpc_stat_success(stat, 0),
			
 
				+	       lstcon_rpc_stat_failure(stat, 0),
			
 
				+	       lstcon_rpc_stat_total(stat, 0),
			
 
				+	       stat->trs_rpc_errno, stat->trs_fwk_errno);
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
			
 
				+			     struct list_head *head_up,
			
 
				+			     lstcon_rpc_readent_func_t readent)
			
 
				+{
			
 
				+	struct list_head	    tmp;
			
 
				+	struct list_head	   *next;
			
 
				+	lstcon_rpc_ent_t     *ent;
			
 
				+	srpc_generic_reply_t *rep;
			
 
				+	lstcon_rpc_t	 *crpc;
			
 
				+	srpc_msg_t	   *msg;
			
 
				+	lstcon_node_t	*nd;
			
 
				+	cfs_duration_t	dur;
			
 
				+	struct timeval	tv;
			
 
				+	int		   error;
			
 
				+
			
 
				+	LASSERT (head_up != NULL);
			
 
				+
			
 
				+	next = head_up;
			
 
				+
			
 
				+	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
			
 
				+		if (copy_from_user(&tmp, next,
			
 
				+				       sizeof(struct list_head)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		if (tmp.next == head_up)
			
 
				+			return 0;
			
 
				+
			
 
				+		next = tmp.next;
			
 
				+
			
 
				+		ent = list_entry(next, lstcon_rpc_ent_t, rpe_link);
			
 
				+
			
 
				+		LASSERT (crpc->crp_stamp != 0);
			
 
				+
			
 
				+		error = lstcon_rpc_get_reply(crpc, &msg);
			
 
				+
			
 
				+		nd = crpc->crp_node;
			
 
				+
			
 
				+		dur = (cfs_duration_t)cfs_time_sub(crpc->crp_stamp,
			
 
				+		      (cfs_time_t)console_session.ses_id.ses_stamp);
			
 
				+		cfs_duration_usec(dur, &tv);
			
 
				+
			
 
				+		if (copy_to_user(&ent->rpe_peer,
			
 
				+				     &nd->nd_id, sizeof(lnet_process_id_t)) ||
			
 
				+		    copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
			
 
				+		    copy_to_user(&ent->rpe_state,
			
 
				+				     &nd->nd_state, sizeof(nd->nd_state)) ||
			
 
				+		    copy_to_user(&ent->rpe_rpc_errno, &error,
			
 
				+				     sizeof(error)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		if (error != 0)
			
 
				+			continue;
			
 
				+
			
 
				+		/* RPC is done */
			
 
				+		rep = (srpc_generic_reply_t *)&msg->msg_body.reply;
			
 
				+
			
 
				+		if (copy_to_user(&ent->rpe_sid,
			
 
				+				     &rep->sid, sizeof(lst_sid_t)) ||
			
 
				+		    copy_to_user(&ent->rpe_fwk_errno,
			
 
				+				     &rep->status, sizeof(rep->status)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		if (readent == NULL)
			
 
				+			continue;
			
 
				+
			
 
				+		if ((error = readent(trans->tas_opc, msg, ent)) != 0)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc;
			
 
				+	lstcon_rpc_t      *crpc;
			
 
				+	lstcon_rpc_t      *tmp;
			
 
				+	int		count = 0;
			
 
				+
			
 
				+	list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list,
			
 
				+				 crp_link) {
			
 
				+		rpc = crpc->crp_rpc;
			
 
				+
			
 
				+		spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+		/* free it if not posted or finished already */
			
 
				+		if (!crpc->crp_posted || crpc->crp_finished) {
			
 
				+			spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+			list_del_init(&crpc->crp_link);
			
 
				+			lstcon_rpc_put(crpc);
			
 
				+
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* rpcs can be still not callbacked (even LNetMDUnlink is called)
			
 
				+		 * because huge timeout for inaccessible network, don't make
			
 
				+		 * user wait for them, just abandon them, they will be recycled
			
 
				+		 * in callback */
			
 
				+
			
 
				+		LASSERT (crpc->crp_status != 0);
			
 
				+
			
 
				+		crpc->crp_node  = NULL;
			
 
				+		crpc->crp_trans = NULL;
			
 
				+		list_del_init(&crpc->crp_link);
			
 
				+		count ++;
			
 
				+
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+		atomic_dec(&trans->tas_remaining);
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (atomic_read(&trans->tas_remaining) == 0);
			
 
				+
			
 
				+	list_del(&trans->tas_link);
			
 
				+	if (!list_empty(&trans->tas_olink))
			
 
				+		list_del(&trans->tas_olink);
			
 
				+
			
 
				+	CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
			
 
				+	       lstcon_rpc_trans_name(trans->tas_opc), count);
			
 
				+
			
 
				+	LIBCFS_FREE(trans, sizeof(*trans));
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_sesrpc_prep(lstcon_node_t *nd, int transop,
			
 
				+		   unsigned feats, lstcon_rpc_t **crpc)
			
 
				+{
			
 
				+	srpc_mksn_reqst_t *msrq;
			
 
				+	srpc_rmsn_reqst_t *rsrq;
			
 
				+	int		rc;
			
 
				+
			
 
				+	switch (transop) {
			
 
				+	case LST_TRANS_SESNEW:
			
 
				+		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
			
 
				+				     feats, 0, 0, crpc);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+
			
 
				+		msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
			
 
				+		msrq->mksn_sid     = console_session.ses_id;
			
 
				+		msrq->mksn_force   = console_session.ses_force;
			
 
				+		strncpy(msrq->mksn_name, console_session.ses_name,
			
 
				+			strlen(console_session.ses_name));
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_SESEND:
			
 
				+		rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
			
 
				+				     feats, 0, 0, crpc);
			
 
				+		if (rc != 0)
			
 
				+			return rc;
			
 
				+
			
 
				+		rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
			
 
				+		rsrq->rmsn_sid = console_session.ses_id;
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
			
 
				+{
			
 
				+	srpc_debug_reqst_t *drq;
			
 
				+	int		    rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
			
 
				+
			
 
				+	drq->dbg_sid   = console_session.ses_id;
			
 
				+	drq->dbg_flags = 0;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
			
 
				+		   lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc)
			
 
				+{
			
 
				+	lstcon_batch_t	   *batch;
			
 
				+	srpc_batch_reqst_t *brq;
			
 
				+	int		    rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
			
 
				+
			
 
				+	brq->bar_sid     = console_session.ses_id;
			
 
				+	brq->bar_bid     = tsb->tsb_id;
			
 
				+	brq->bar_testidx = tsb->tsb_index;
			
 
				+	brq->bar_opc     = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
			
 
				+			   (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP:
			
 
				+			    SRPC_BATCH_OPC_QUERY);
			
 
				+
			
 
				+	if (transop != LST_TRANS_TSBRUN &&
			
 
				+	    transop != LST_TRANS_TSBSTOP)
			
 
				+		return 0;
			
 
				+
			
 
				+	LASSERT (tsb->tsb_index == 0);
			
 
				+
			
 
				+	batch = (lstcon_batch_t *)tsb;
			
 
				+	brq->bar_arg = batch->bat_arg;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_statrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
			
 
				+{
			
 
				+	srpc_stat_reqst_t *srq;
			
 
				+	int		   rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
			
 
				+
			
 
				+	srq->str_sid  = console_session.ses_id;
			
 
				+	srq->str_type = 0; /* XXX remove it */
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+lnet_process_id_packed_t *
			
 
				+lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
			
 
				+{
			
 
				+	lnet_process_id_packed_t *pid;
			
 
				+	int		       i;
			
 
				+
			
 
				+	i = idx / SFW_ID_PER_PAGE;
			
 
				+
			
 
				+	LASSERT (i < nkiov);
			
 
				+
			
 
				+	pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page);
			
 
				+
			
 
				+	return &pid[idx % SFW_ID_PER_PAGE];
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_dstnodes_prep(lstcon_group_t *grp, int idx,
			
 
				+		     int dist, int span, int nkiov, lnet_kiov_t *kiov)
			
 
				+{
			
 
				+	lnet_process_id_packed_t *pid;
			
 
				+	lstcon_ndlink_t	  *ndl;
			
 
				+	lstcon_node_t	    *nd;
			
 
				+	int		       start;
			
 
				+	int		       end;
			
 
				+	int		       i = 0;
			
 
				+
			
 
				+	LASSERT (dist >= 1);
			
 
				+	LASSERT (span >= 1);
			
 
				+	LASSERT (grp->grp_nnode >= 1);
			
 
				+
			
 
				+	if (span > grp->grp_nnode)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	start = ((idx / dist) * span) % grp->grp_nnode;
			
 
				+	end   = ((idx / dist) * span + span - 1) % grp->grp_nnode;
			
 
				+
			
 
				+	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
			
 
				+		nd = ndl->ndl_node;
			
 
				+		if (i < start) {
			
 
				+			i ++;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (i > (end >= start ? end: grp->grp_nnode))
			
 
				+			break;
			
 
				+
			
 
				+		pid = lstcon_next_id((i - start), nkiov, kiov);
			
 
				+		pid->nid = nd->nd_id.nid;
			
 
				+		pid->pid = nd->nd_id.pid;
			
 
				+		i++;
			
 
				+	}
			
 
				+
			
 
				+	if (start <= end) /* done */
			
 
				+		return 0;
			
 
				+
			
 
				+	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
			
 
				+		if (i > grp->grp_nnode + end)
			
 
				+			break;
			
 
				+
			
 
				+		nd = ndl->ndl_node;
			
 
				+		pid = lstcon_next_id((i - start), nkiov, kiov);
			
 
				+		pid->nid = nd->nd_id.nid;
			
 
				+		pid->pid = nd->nd_id.pid;
			
 
				+		i++;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req)
			
 
				+{
			
 
				+	test_ping_req_t *prq = &req->tsr_u.ping;
			
 
				+
			
 
				+	prq->png_size   = param->png_size;
			
 
				+	prq->png_flags  = param->png_flags;
			
 
				+	/* TODO dest */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
			
 
				+{
			
 
				+	test_bulk_req_t *brq = &req->tsr_u.bulk_v0;
			
 
				+
			
 
				+	brq->blk_opc    = param->blk_opc;
			
 
				+	brq->blk_npg    = (param->blk_size + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
			
 
				+	brq->blk_flags  = param->blk_flags;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
			
 
				+{
			
 
				+	test_bulk_req_v1_t *brq = &req->tsr_u.bulk_v1;
			
 
				+
			
 
				+	brq->blk_opc	= param->blk_opc;
			
 
				+	brq->blk_flags	= param->blk_flags;
			
 
				+	brq->blk_len	= param->blk_size;
			
 
				+	brq->blk_offset	= 0; /* reserved */
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
			
 
				+		    lstcon_test_t *test, lstcon_rpc_t **crpc)
			
 
				+{
			
 
				+	lstcon_group_t    *sgrp = test->tes_src_grp;
			
 
				+	lstcon_group_t    *dgrp = test->tes_dst_grp;
			
 
				+	srpc_test_reqst_t *trq;
			
 
				+	srpc_bulk_t       *bulk;
			
 
				+	int		i;
			
 
				+	int		   npg = 0;
			
 
				+	int		   nob = 0;
			
 
				+	int		   rc  = 0;
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBCLIADD) {
			
 
				+		npg = sfw_id_pages(test->tes_span);
			
 
				+		nob = (feats & LST_FEAT_BULK_LEN) == 0 ?
			
 
				+		      npg * PAGE_CACHE_SIZE :
			
 
				+		      sizeof(lnet_process_id_packed_t) * test->tes_span;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	trq  = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBSRVADD) {
			
 
				+		int ndist = (sgrp->grp_nnode + test->tes_dist - 1) / test->tes_dist;
			
 
				+		int nspan = (dgrp->grp_nnode + test->tes_span - 1) / test->tes_span;
			
 
				+		int nmax = (ndist + nspan - 1) / nspan;
			
 
				+
			
 
				+		trq->tsr_ndest = 0;
			
 
				+		trq->tsr_loop  = nmax * test->tes_dist * test->tes_concur;
			
 
				+
			
 
				+	} else {
			
 
				+		bulk = &(*crpc)->crp_rpc->crpc_bulk;
			
 
				+
			
 
				+		for (i = 0; i < npg; i++) {
			
 
				+			int	len;
			
 
				+
			
 
				+			LASSERT(nob > 0);
			
 
				+
			
 
				+			len = (feats & LST_FEAT_BULK_LEN) == 0 ?
			
 
				+			      PAGE_CACHE_SIZE : min_t(int, nob, PAGE_CACHE_SIZE);
			
 
				+			nob -= len;
			
 
				+
			
 
				+			bulk->bk_iovs[i].kiov_offset = 0;
			
 
				+			bulk->bk_iovs[i].kiov_len    = len;
			
 
				+			bulk->bk_iovs[i].kiov_page   =
			
 
				+				alloc_page(GFP_IOFS);
			
 
				+
			
 
				+			if (bulk->bk_iovs[i].kiov_page == NULL) {
			
 
				+				lstcon_rpc_put(*crpc);
			
 
				+				return -ENOMEM;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		bulk->bk_sink = 0;
			
 
				+
			
 
				+		LASSERT (transop == LST_TRANS_TSBCLIADD);
			
 
				+
			
 
				+		rc = lstcon_dstnodes_prep(test->tes_dst_grp,
			
 
				+					  test->tes_cliidx++,
			
 
				+					  test->tes_dist,
			
 
				+					  test->tes_span,
			
 
				+					  npg, &bulk->bk_iovs[0]);
			
 
				+		if (rc != 0) {
			
 
				+			lstcon_rpc_put(*crpc);
			
 
				+			return rc;
			
 
				+		}
			
 
				+
			
 
				+		trq->tsr_ndest = test->tes_span;
			
 
				+		trq->tsr_loop  = test->tes_loop;
			
 
				+	}
			
 
				+
			
 
				+	trq->tsr_sid	= console_session.ses_id;
			
 
				+	trq->tsr_bid	= test->tes_hdr.tsb_id;
			
 
				+	trq->tsr_concur     = test->tes_concur;
			
 
				+	trq->tsr_is_client  = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
			
 
				+	trq->tsr_stop_onerr = !!test->tes_stop_onerr;
			
 
				+
			
 
				+	switch (test->tes_type) {
			
 
				+	case LST_TEST_PING:
			
 
				+		trq->tsr_service = SRPC_SERVICE_PING;
			
 
				+		rc = lstcon_pingrpc_prep((lst_test_ping_param_t *)
			
 
				+					 &test->tes_param[0], trq);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TEST_BULK:
			
 
				+		trq->tsr_service = SRPC_SERVICE_BRW;
			
 
				+		if ((feats & LST_FEAT_BULK_LEN) == 0) {
			
 
				+			rc = lstcon_bulkrpc_v0_prep((lst_test_bulk_param_t *)
			
 
				+						    &test->tes_param[0], trq);
			
 
				+		} else {
			
 
				+			rc = lstcon_bulkrpc_v1_prep((lst_test_bulk_param_t *)
			
 
				+						    &test->tes_param[0], trq);
			
 
				+		}
			
 
				+
			
 
				+		break;
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans,
			
 
				+			 lstcon_node_t *nd, srpc_msg_t *reply)
			
 
				+{
			
 
				+	srpc_mksn_reply_t *mksn_rep = &reply->msg_body.mksn_reply;
			
 
				+	int		   status   = mksn_rep->mksn_status;
			
 
				+
			
 
				+	if (status == 0 &&
			
 
				+	    (reply->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		mksn_rep->mksn_status = EPROTO;
			
 
				+		status = EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (status == EPROTO) {
			
 
				+		CNETERR("session protocol error from %s: %u\n",
			
 
				+			libcfs_nid2str(nd->nd_id.nid),
			
 
				+			reply->msg_ses_feats);
			
 
				+	}
			
 
				+
			
 
				+	if (status != 0)
			
 
				+		return status;
			
 
				+
			
 
				+	if (!trans->tas_feats_updated) {
			
 
				+		trans->tas_feats_updated = 1;
			
 
				+		trans->tas_features = reply->msg_ses_feats;
			
 
				+	}
			
 
				+
			
 
				+	if (reply->msg_ses_feats != trans->tas_features) {
			
 
				+		CNETERR("Framework features %x from %s is different with "
			
 
				+			"features on this transaction: %x\n",
			
 
				+			 reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
			
 
				+			 trans->tas_features);
			
 
				+		status = mksn_rep->mksn_status = EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	if (status == 0) {
			
 
				+		/* session timeout on remote node */
			
 
				+		nd->nd_timeout = mksn_rep->mksn_timeout;
			
 
				+	}
			
 
				+
			
 
				+	return status;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg,
			
 
				+		      lstcon_node_t *nd, lstcon_trans_stat_t *stat)
			
 
				+{
			
 
				+	srpc_rmsn_reply_t  *rmsn_rep;
			
 
				+	srpc_debug_reply_t *dbg_rep;
			
 
				+	srpc_batch_reply_t *bat_rep;
			
 
				+	srpc_test_reply_t  *test_rep;
			
 
				+	srpc_stat_reply_t  *stat_rep;
			
 
				+	int		 rc = 0;
			
 
				+
			
 
				+	switch (trans->tas_opc) {
			
 
				+	case LST_TRANS_SESNEW:
			
 
				+		rc = lstcon_sesnew_stat_reply(trans, nd, msg);
			
 
				+		if (rc == 0) {
			
 
				+			lstcon_sesop_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_sesop_stat_failure(stat, 1);
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_SESEND:
			
 
				+		rmsn_rep = &msg->msg_body.rmsn_reply;
			
 
				+		/* ESRCH is not an error for end session */
			
 
				+		if (rmsn_rep->rmsn_status == 0 ||
			
 
				+		    rmsn_rep->rmsn_status == ESRCH) {
			
 
				+			lstcon_sesop_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_sesop_stat_failure(stat, 1);
			
 
				+		rc = rmsn_rep->rmsn_status;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_SESQRY:
			
 
				+	case LST_TRANS_SESPING:
			
 
				+		dbg_rep = &msg->msg_body.dbg_reply;
			
 
				+
			
 
				+		if (dbg_rep->dbg_status == ESRCH) {
			
 
				+			lstcon_sesqry_stat_unknown(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		if (lstcon_session_match(dbg_rep->dbg_sid))
			
 
				+			lstcon_sesqry_stat_active(stat, 1);
			
 
				+		else
			
 
				+			lstcon_sesqry_stat_busy(stat, 1);
			
 
				+		return;
			
 
				+
			
 
				+	case LST_TRANS_TSBRUN:
			
 
				+	case LST_TRANS_TSBSTOP:
			
 
				+		bat_rep = &msg->msg_body.bat_reply;
			
 
				+
			
 
				+		if (bat_rep->bar_status == 0) {
			
 
				+			lstcon_tsbop_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		if (bat_rep->bar_status == EPERM &&
			
 
				+		    trans->tas_opc == LST_TRANS_TSBSTOP) {
			
 
				+			lstcon_tsbop_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_tsbop_stat_failure(stat, 1);
			
 
				+		rc = bat_rep->bar_status;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_TSBCLIQRY:
			
 
				+	case LST_TRANS_TSBSRVQRY:
			
 
				+		bat_rep = &msg->msg_body.bat_reply;
			
 
				+
			
 
				+		if (bat_rep->bar_active != 0)
			
 
				+			lstcon_tsbqry_stat_run(stat, 1);
			
 
				+		else
			
 
				+			lstcon_tsbqry_stat_idle(stat, 1);
			
 
				+
			
 
				+		if (bat_rep->bar_status == 0)
			
 
				+			return;
			
 
				+
			
 
				+		lstcon_tsbqry_stat_failure(stat, 1);
			
 
				+		rc = bat_rep->bar_status;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_TSBCLIADD:
			
 
				+	case LST_TRANS_TSBSRVADD:
			
 
				+		test_rep = &msg->msg_body.tes_reply;
			
 
				+
			
 
				+		if (test_rep->tsr_status == 0) {
			
 
				+			lstcon_tsbop_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_tsbop_stat_failure(stat, 1);
			
 
				+		rc = test_rep->tsr_status;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_STATQRY:
			
 
				+		stat_rep = &msg->msg_body.stat_reply;
			
 
				+
			
 
				+		if (stat_rep->str_status == 0) {
			
 
				+			lstcon_statqry_stat_success(stat, 1);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_statqry_stat_failure(stat, 1);
			
 
				+		rc = stat_rep->str_status;
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	}
			
 
				+
			
 
				+	if (stat->trs_fwk_errno == 0)
			
 
				+		stat->trs_fwk_errno = rc;
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_trans_ndlist(struct list_head *ndlist,
			
 
				+			struct list_head *translist, int transop,
			
 
				+			void *arg, lstcon_rpc_cond_func_t condition,
			
 
				+			lstcon_rpc_trans_t **transpp)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	lstcon_ndlink_t    *ndl;
			
 
				+	lstcon_node_t      *nd;
			
 
				+	lstcon_rpc_t       *rpc;
			
 
				+	unsigned	    feats;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	/* Creating session RPG for list of nodes */
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_prep(translist, transop, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction %d: %d\n", transop, rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	feats = trans->tas_features;
			
 
				+	list_for_each_entry(ndl, ndlist, ndl_link) {
			
 
				+		rc = condition == NULL ? 1 :
			
 
				+		     condition(transop, ndl->ndl_node, arg);
			
 
				+
			
 
				+		if (rc == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		if (rc < 0) {
			
 
				+			CDEBUG(D_NET, "Condition error while creating RPC "
			
 
				+				      " for transaction %d: %d\n", transop, rc);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		nd = ndl->ndl_node;
			
 
				+
			
 
				+		switch (transop) {
			
 
				+		case LST_TRANS_SESNEW:
			
 
				+		case LST_TRANS_SESEND:
			
 
				+			rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
			
 
				+			break;
			
 
				+		case LST_TRANS_SESQRY:
			
 
				+		case LST_TRANS_SESPING:
			
 
				+			rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
			
 
				+			break;
			
 
				+		case LST_TRANS_TSBCLIADD:
			
 
				+		case LST_TRANS_TSBSRVADD:
			
 
				+			rc = lstcon_testrpc_prep(nd, transop, feats,
			
 
				+						 (lstcon_test_t *)arg, &rpc);
			
 
				+			break;
			
 
				+		case LST_TRANS_TSBRUN:
			
 
				+		case LST_TRANS_TSBSTOP:
			
 
				+		case LST_TRANS_TSBCLIQRY:
			
 
				+		case LST_TRANS_TSBSRVQRY:
			
 
				+			rc = lstcon_batrpc_prep(nd, transop, feats,
			
 
				+						(lstcon_tsb_hdr_t *)arg, &rpc);
			
 
				+			break;
			
 
				+		case LST_TRANS_STATQRY:
			
 
				+			rc = lstcon_statrpc_prep(nd, feats, &rpc);
			
 
				+			break;
			
 
				+		default:
			
 
				+			rc = -EINVAL;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Failed to create RPC for transaction %s: %d\n",
			
 
				+			       lstcon_rpc_trans_name(transop), rc);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		lstcon_rpc_trans_addreq(trans, rpc);
			
 
				+	}
			
 
				+
			
 
				+	if (rc == 0) {
			
 
				+		*transpp = trans;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_pinger(void *arg)
			
 
				+{
			
 
				+	stt_timer_t	*ptimer = (stt_timer_t *)arg;
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	lstcon_rpc_t       *crpc;
			
 
				+	srpc_msg_t	 *rep;
			
 
				+	srpc_debug_reqst_t *drq;
			
 
				+	lstcon_ndlink_t    *ndl;
			
 
				+	lstcon_node_t      *nd;
			
 
				+	time_t	      intv;
			
 
				+	int		 count = 0;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	/* RPC pinger is a special case of transaction,
			
 
				+	 * it's called by timer at 8 seconds interval.
			
 
				+	 */
			
 
				+	mutex_lock(&console_session.ses_mutex);
			
 
				+
			
 
				+	if (console_session.ses_shutdown || console_session.ses_expired) {
			
 
				+		mutex_unlock(&console_session.ses_mutex);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!console_session.ses_expired &&
			
 
				+	    cfs_time_current_sec() - console_session.ses_laststamp >
			
 
				+	    (time_t)console_session.ses_timeout)
			
 
				+		console_session.ses_expired = 1;
			
 
				+
			
 
				+	trans = console_session.ses_ping;
			
 
				+
			
 
				+	LASSERT (trans != NULL);
			
 
				+
			
 
				+	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
			
 
				+		nd = ndl->ndl_node;
			
 
				+
			
 
				+		if (console_session.ses_expired) {
			
 
				+			/* idle console, end session on all nodes */
			
 
				+			if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+				continue;
			
 
				+
			
 
				+			rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
			
 
				+						trans->tas_features, &crpc);
			
 
				+			if (rc != 0) {
			
 
				+				CERROR("Out of memory\n");
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			lstcon_rpc_trans_addreq(trans, crpc);
			
 
				+			lstcon_rpc_post(crpc);
			
 
				+
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		crpc = &nd->nd_ping;
			
 
				+
			
 
				+		if (crpc->crp_rpc != NULL) {
			
 
				+			LASSERT (crpc->crp_trans == trans);
			
 
				+			LASSERT (!list_empty(&crpc->crp_link));
			
 
				+
			
 
				+			spin_lock(&crpc->crp_rpc->crpc_lock);
			
 
				+
			
 
				+			LASSERT(crpc->crp_posted);
			
 
				+
			
 
				+			if (!crpc->crp_finished) {
			
 
				+				/* in flight */
			
 
				+				spin_unlock(&crpc->crp_rpc->crpc_lock);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			spin_unlock(&crpc->crp_rpc->crpc_lock);
			
 
				+
			
 
				+			lstcon_rpc_get_reply(crpc, &rep);
			
 
				+
			
 
				+			list_del_init(&crpc->crp_link);
			
 
				+
			
 
				+			lstcon_rpc_put(crpc);
			
 
				+		}
			
 
				+
			
 
				+		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+			continue;
			
 
				+
			
 
				+		intv = cfs_duration_sec(cfs_time_sub(cfs_time_current(),
			
 
				+						     nd->nd_stamp));
			
 
				+		if (intv < (time_t)nd->nd_timeout / 2)
			
 
				+			continue;
			
 
				+
			
 
				+		rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
			
 
				+				     trans->tas_features, 0, 0, 1, crpc);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Out of memory\n");
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
			
 
				+
			
 
				+		drq->dbg_sid   = console_session.ses_id;
			
 
				+		drq->dbg_flags = 0;
			
 
				+
			
 
				+		lstcon_rpc_trans_addreq(trans, crpc);
			
 
				+		lstcon_rpc_post(crpc);
			
 
				+
			
 
				+		count ++;
			
 
				+	}
			
 
				+
			
 
				+	if (console_session.ses_expired) {
			
 
				+		mutex_unlock(&console_session.ses_mutex);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "Ping %d nodes in session\n", count);
			
 
				+
			
 
				+	ptimer->stt_expires = (cfs_time_t)(cfs_time_current_sec() + LST_PING_INTERVAL);
			
 
				+	stt_add_timer(ptimer);
			
 
				+
			
 
				+	mutex_unlock(&console_session.ses_mutex);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_pinger_start(void)
			
 
				+{
			
 
				+	stt_timer_t    *ptimer;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	LASSERT (list_empty(&console_session.ses_rpc_freelist));
			
 
				+	LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
			
 
				+				   &console_session.ses_ping);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to create console pinger\n");
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	ptimer = &console_session.ses_ping_timer;
			
 
				+	ptimer->stt_expires = (cfs_time_t)(cfs_time_current_sec() + LST_PING_INTERVAL);
			
 
				+
			
 
				+	stt_add_timer(ptimer);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_pinger_stop(void)
			
 
				+{
			
 
				+	LASSERT (console_session.ses_shutdown);
			
 
				+
			
 
				+	stt_del_timer(&console_session.ses_ping_timer);
			
 
				+
			
 
				+	lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
			
 
				+	lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
			
 
				+	lstcon_rpc_trans_destroy(console_session.ses_ping);
			
 
				+
			
 
				+	memset(lstcon_trans_stat(), 0, sizeof(lstcon_trans_stat_t));
			
 
				+
			
 
				+	console_session.ses_ping = NULL;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_cleanup_wait(void)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	lstcon_rpc_t       *crpc;
			
 
				+	struct list_head	 *pacer;
			
 
				+	struct list_head	  zlist;
			
 
				+
			
 
				+	/* Called with hold of global mutex */
			
 
				+
			
 
				+	LASSERT (console_session.ses_shutdown);
			
 
				+
			
 
				+	while (!list_empty(&console_session.ses_trans_list)) {
			
 
				+		list_for_each(pacer, &console_session.ses_trans_list) {
			
 
				+			trans = list_entry(pacer, lstcon_rpc_trans_t,
			
 
				+					       tas_link);
			
 
				+
			
 
				+			CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
			
 
				+			       lstcon_rpc_trans_name(trans->tas_opc));
			
 
				+
			
 
				+			wake_up(&trans->tas_waitq);
			
 
				+		}
			
 
				+
			
 
				+		mutex_unlock(&console_session.ses_mutex);
			
 
				+
			
 
				+		CWARN("Session is shutting down, "
			
 
				+		      "waiting for termination of transactions\n");
			
 
				+		cfs_pause(cfs_time_seconds(1));
			
 
				+
			
 
				+		mutex_lock(&console_session.ses_mutex);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	lst_wait_until((atomic_read(&console_session.ses_rpc_counter) == 0),
			
 
				+		       console_session.ses_rpc_lock,
			
 
				+		       "Network is not accessable or target is down, "
			
 
				+		       "waiting for %d console RPCs to being recycled\n",
			
 
				+		       atomic_read(&console_session.ses_rpc_counter));
			
 
				+
			
 
				+	list_add(&zlist, &console_session.ses_rpc_freelist);
			
 
				+	list_del_init(&console_session.ses_rpc_freelist);
			
 
				+
			
 
				+	spin_unlock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	while (!list_empty(&zlist)) {
			
 
				+		crpc = list_entry(zlist.next, lstcon_rpc_t, crp_link);
			
 
				+
			
 
				+		list_del(&crpc->crp_link);
			
 
				+		LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_rpc_module_init(void)
			
 
				+{
			
 
				+	INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
			
 
				+	console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
			
 
				+	console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
			
 
				+
			
 
				+	console_session.ses_ping = NULL;
			
 
				+
			
 
				+	spin_lock_init(&console_session.ses_rpc_lock);
			
 
				+	atomic_set(&console_session.ses_rpc_counter, 0);
			
 
				+	INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_rpc_module_fini(void)
			
 
				+{
			
 
				+	LASSERT (list_empty(&console_session.ses_rpc_freelist));
			
 
				+	LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -0,0 +1,146 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * /lnet/selftest/conrpc.h
			
 
				+ *
			
 
				+ * Console rpc
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liang@whamcloud.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LST_CONRPC_H__
			
 
				+#define __LST_CONRPC_H__
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+#include <linux/lnet/lnetst.h>
			
 
				+#include "rpc.h"
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+/* Console rpc and rpc transaction */
			
 
				+#define LST_TRANS_TIMEOUT       30
			
 
				+#define LST_TRANS_MIN_TIMEOUT   3
			
 
				+
			
 
				+#define LST_VALIDATE_TIMEOUT(t) MIN(MAX(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
			
 
				+
			
 
				+#define LST_PING_INTERVAL       8
			
 
				+
			
 
				+struct lstcon_rpc_trans;
			
 
				+struct lstcon_tsb_hdr;
			
 
				+struct lstcon_test;
			
 
				+struct lstcon_node;
			
 
				+
			
 
				+typedef struct lstcon_rpc {
			
 
				+	struct list_head	       crp_link;       /* chain on rpc transaction */
			
 
				+	srpc_client_rpc_t       *crp_rpc;	/* client rpc */
			
 
				+	struct lstcon_node      *crp_node;       /* destination node */
			
 
				+	struct lstcon_rpc_trans *crp_trans;     /* conrpc transaction */
			
 
				+
			
 
				+	unsigned int		 crp_posted:1;   /* rpc is posted */
			
 
				+	unsigned int		 crp_finished:1; /* rpc is finished */
			
 
				+	unsigned int		 crp_unpacked:1; /* reply is unpacked */
			
 
				+	/** RPC is embedded in other structure and can't free it */
			
 
				+	unsigned int		 crp_embedded:1;
			
 
				+	int		      crp_status;     /* console rpc errors */
			
 
				+	cfs_time_t	       crp_stamp;      /* replied time stamp */
			
 
				+} lstcon_rpc_t;
			
 
				+
			
 
				+typedef struct lstcon_rpc_trans {
			
 
				+	struct list_head	    tas_olink;     /* link chain on owner list */
			
 
				+	struct list_head	    tas_link;      /* link chain on global list */
			
 
				+	int		   tas_opc;       /* operation code of transaction */
			
 
				+	/* features mask is uptodate */
			
 
				+	unsigned	      tas_feats_updated;
			
 
				+	/* test features mask */
			
 
				+	unsigned	      tas_features;
			
 
				+	wait_queue_head_t	   tas_waitq;     /* wait queue head */
			
 
				+	atomic_t	  tas_remaining; /* # of un-scheduled rpcs */
			
 
				+	struct list_head	    tas_rpcs_list; /* queued requests */
			
 
				+} lstcon_rpc_trans_t;
			
 
				+
			
 
				+#define LST_TRANS_PRIVATE       0x1000
			
 
				+
			
 
				+#define LST_TRANS_SESNEW	(LST_TRANS_PRIVATE | 0x01)
			
 
				+#define LST_TRANS_SESEND	(LST_TRANS_PRIVATE | 0x02)
			
 
				+#define LST_TRANS_SESQRY	0x03
			
 
				+#define LST_TRANS_SESPING       0x04
			
 
				+
			
 
				+#define LST_TRANS_TSBCLIADD     (LST_TRANS_PRIVATE | 0x11)
			
 
				+#define LST_TRANS_TSBSRVADD     (LST_TRANS_PRIVATE | 0x12)
			
 
				+#define LST_TRANS_TSBRUN	(LST_TRANS_PRIVATE | 0x13)
			
 
				+#define LST_TRANS_TSBSTOP       (LST_TRANS_PRIVATE | 0x14)
			
 
				+#define LST_TRANS_TSBCLIQRY     0x15
			
 
				+#define LST_TRANS_TSBSRVQRY     0x16
			
 
				+
			
 
				+#define LST_TRANS_STATQRY       0x21
			
 
				+
			
 
				+typedef int (* lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
			
 
				+typedef int (* lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *);
			
 
				+
			
 
				+int  lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
			
 
				+			unsigned version, lstcon_rpc_t **crpc);
			
 
				+int  lstcon_dbgrpc_prep(struct lstcon_node *nd,
			
 
				+			unsigned version, lstcon_rpc_t **crpc);
			
 
				+int  lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
			
 
				+			struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc);
			
 
				+int  lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
			
 
				+			 struct lstcon_test *test, lstcon_rpc_t **crpc);
			
 
				+int  lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version,
			
 
				+			 lstcon_rpc_t **crpc);
			
 
				+void lstcon_rpc_put(lstcon_rpc_t *crpc);
			
 
				+int  lstcon_rpc_trans_prep(struct list_head *translist,
			
 
				+			   int transop, lstcon_rpc_trans_t **transpp);
			
 
				+int  lstcon_rpc_trans_ndlist(struct list_head *ndlist,
			
 
				+			     struct list_head *translist, int transop,
			
 
				+			     void *arg, lstcon_rpc_cond_func_t condition,
			
 
				+			     lstcon_rpc_trans_t **transpp);
			
 
				+void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans,
			
 
				+			   lstcon_trans_stat_t *stat);
			
 
				+int  lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
			
 
				+				  struct list_head *head_up,
			
 
				+				  lstcon_rpc_readent_func_t readent);
			
 
				+void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error);
			
 
				+void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans);
			
 
				+void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req);
			
 
				+int  lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout);
			
 
				+int  lstcon_rpc_pinger_start(void);
			
 
				+void lstcon_rpc_pinger_stop(void);
			
 
				+void lstcon_rpc_cleanup_wait(void);
			
 
				+int  lstcon_rpc_module_init(void);
			
 
				+void lstcon_rpc_module_fini(void);
			
 
				+
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -0,0 +1,2071 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/conctl.c
			
 
				+ *
			
 
				+ * Infrastructure of LST console
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include "console.h"
			
 
				+#include "conrpc.h"
			
 
				+
			
 
				+#define LST_NODE_STATE_COUNTER(nd, p)		   \
			
 
				+do {						    \
			
 
				+	if ((nd)->nd_state == LST_NODE_ACTIVE)	  \
			
 
				+		(p)->nle_nactive ++;		    \
			
 
				+	else if ((nd)->nd_state == LST_NODE_BUSY)       \
			
 
				+		(p)->nle_nbusy ++;		      \
			
 
				+	else if ((nd)->nd_state == LST_NODE_DOWN)       \
			
 
				+		(p)->nle_ndown ++;		      \
			
 
				+	else					    \
			
 
				+		(p)->nle_nunknown ++;		   \
			
 
				+	(p)->nle_nnode ++;			      \
			
 
				+} while (0)
			
 
				+
			
 
				+lstcon_session_t	console_session;
			
 
				+
			
 
				+void
			
 
				+lstcon_node_get(lstcon_node_t *nd)
			
 
				+{
			
 
				+	LASSERT (nd->nd_ref >= 1);
			
 
				+
			
 
				+	nd->nd_ref++;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
			
 
				+{
			
 
				+	lstcon_ndlink_t *ndl;
			
 
				+	unsigned int     idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
			
 
				+
			
 
				+	LASSERT (id.nid != LNET_NID_ANY);
			
 
				+
			
 
				+	list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], ndl_hlink) {
			
 
				+		if (ndl->ndl_node->nd_id.nid != id.nid ||
			
 
				+		    ndl->ndl_node->nd_id.pid != id.pid)
			
 
				+			continue;
			
 
				+
			
 
				+		lstcon_node_get(ndl->ndl_node);
			
 
				+		*ndpp = ndl->ndl_node;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (!create)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
			
 
				+	if (*ndpp == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ndl = (lstcon_ndlink_t *)(*ndpp + 1);
			
 
				+
			
 
				+	ndl->ndl_node = *ndpp;
			
 
				+
			
 
				+	ndl->ndl_node->nd_ref   = 1;
			
 
				+	ndl->ndl_node->nd_id    = id;
			
 
				+	ndl->ndl_node->nd_stamp = cfs_time_current();
			
 
				+	ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
			
 
				+	ndl->ndl_node->nd_timeout = 0;
			
 
				+	memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t));
			
 
				+
			
 
				+	/* queued in global hash & list, no refcount is taken by
			
 
				+	 * global hash & list, if caller release his refcount,
			
 
				+	 * node will be released */
			
 
				+	list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
			
 
				+	list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lstcon_node_put(lstcon_node_t *nd)
			
 
				+{
			
 
				+	lstcon_ndlink_t  *ndl;
			
 
				+
			
 
				+	LASSERT (nd->nd_ref > 0);
			
 
				+
			
 
				+	if (--nd->nd_ref > 0)
			
 
				+		return;
			
 
				+
			
 
				+	ndl = (lstcon_ndlink_t *)(nd + 1);
			
 
				+
			
 
				+	LASSERT (!list_empty(&ndl->ndl_link));
			
 
				+	LASSERT (!list_empty(&ndl->ndl_hlink));
			
 
				+
			
 
				+	/* remove from session */
			
 
				+	list_del(&ndl->ndl_link);
			
 
				+	list_del(&ndl->ndl_hlink);
			
 
				+
			
 
				+	LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_ndlink_find(struct list_head *hash,
			
 
				+		   lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create)
			
 
				+{
			
 
				+	unsigned int     idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
			
 
				+	lstcon_ndlink_t *ndl;
			
 
				+	lstcon_node_t   *nd;
			
 
				+	int	      rc;
			
 
				+
			
 
				+	if (id.nid == LNET_NID_ANY)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* search in hash */
			
 
				+	list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
			
 
				+		if (ndl->ndl_node->nd_id.nid != id.nid ||
			
 
				+		    ndl->ndl_node->nd_id.pid != id.pid)
			
 
				+			continue;
			
 
				+
			
 
				+		*ndlpp = ndl;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (create == 0)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	/* find or create in session hash */
			
 
				+	rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t));
			
 
				+	if (ndl == NULL) {
			
 
				+		lstcon_node_put(nd);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	*ndlpp = ndl;
			
 
				+
			
 
				+	ndl->ndl_node = nd;
			
 
				+	INIT_LIST_HEAD(&ndl->ndl_link);
			
 
				+	list_add_tail(&ndl->ndl_hlink, &hash[idx]);
			
 
				+
			
 
				+	return  0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_ndlink_release(lstcon_ndlink_t *ndl)
			
 
				+{
			
 
				+	LASSERT (list_empty(&ndl->ndl_link));
			
 
				+	LASSERT (!list_empty(&ndl->ndl_hlink));
			
 
				+
			
 
				+	list_del(&ndl->ndl_hlink); /* delete from hash */
			
 
				+	lstcon_node_put(ndl->ndl_node);
			
 
				+
			
 
				+	LIBCFS_FREE(ndl, sizeof(*ndl));
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_group_alloc(char *name, lstcon_group_t **grpp)
			
 
				+{
			
 
				+	lstcon_group_t *grp;
			
 
				+	int	     i;
			
 
				+
			
 
				+	LIBCFS_ALLOC(grp, offsetof(lstcon_group_t,
			
 
				+				   grp_ndl_hash[LST_NODE_HASHSIZE]));
			
 
				+	if (grp == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset(grp, 0, offsetof(lstcon_group_t,
			
 
				+				grp_ndl_hash[LST_NODE_HASHSIZE]));
			
 
				+
			
 
				+	grp->grp_ref = 1;
			
 
				+	if (name != NULL)
			
 
				+		strcpy(grp->grp_name, name);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&grp->grp_link);
			
 
				+	INIT_LIST_HEAD(&grp->grp_ndl_list);
			
 
				+	INIT_LIST_HEAD(&grp->grp_trans_list);
			
 
				+
			
 
				+	for (i = 0; i < LST_NODE_HASHSIZE; i++)
			
 
				+		INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
			
 
				+
			
 
				+	*grpp = grp;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_addref(lstcon_group_t *grp)
			
 
				+{
			
 
				+	grp->grp_ref ++;
			
 
				+}
			
 
				+
			
 
				+static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *);
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_drain(lstcon_group_t *grp, int keep)
			
 
				+{
			
 
				+	lstcon_ndlink_t *ndl;
			
 
				+	lstcon_ndlink_t *tmp;
			
 
				+
			
 
				+	list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
			
 
				+		if ((ndl->ndl_node->nd_state & keep) == 0)
			
 
				+			lstcon_group_ndlink_release(grp, ndl);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_decref(lstcon_group_t *grp)
			
 
				+{
			
 
				+	int     i;
			
 
				+
			
 
				+	if (--grp->grp_ref > 0)
			
 
				+		return;
			
 
				+
			
 
				+	if (!list_empty(&grp->grp_link))
			
 
				+		list_del(&grp->grp_link);
			
 
				+
			
 
				+	lstcon_group_drain(grp, 0);
			
 
				+
			
 
				+	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				+		LASSERT (list_empty(&grp->grp_ndl_hash[i]));
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(grp, offsetof(lstcon_group_t,
			
 
				+				  grp_ndl_hash[LST_NODE_HASHSIZE]));
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_group_find(char *name, lstcon_group_t **grpp)
			
 
				+{
			
 
				+	lstcon_group_t   *grp;
			
 
				+
			
 
				+	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
			
 
				+		if (strncmp(grp->grp_name, name, LST_NAME_SIZE) != 0)
			
 
				+			continue;
			
 
				+
			
 
				+		lstcon_group_addref(grp);  /* +1 ref for caller */
			
 
				+		*grpp = grp;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_put(lstcon_group_t *grp)
			
 
				+{
			
 
				+	lstcon_group_decref(grp);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id,
			
 
				+			 lstcon_ndlink_t **ndlpp, int create)
			
 
				+{
			
 
				+	int     rc;
			
 
				+
			
 
				+	rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	if (!list_empty(&(*ndlpp)->ndl_link))
			
 
				+		return 0;
			
 
				+
			
 
				+	list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
			
 
				+	grp->grp_nnode ++;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl)
			
 
				+{
			
 
				+	list_del_init(&ndl->ndl_link);
			
 
				+	lstcon_ndlink_release(ndl);
			
 
				+	grp->grp_nnode --;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_ndlink_move(lstcon_group_t *old,
			
 
				+			 lstcon_group_t *new, lstcon_ndlink_t *ndl)
			
 
				+{
			
 
				+	unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
			
 
				+			   LST_NODE_HASHSIZE;
			
 
				+
			
 
				+	list_del(&ndl->ndl_hlink);
			
 
				+	list_del(&ndl->ndl_link);
			
 
				+	old->grp_nnode --;
			
 
				+
			
 
				+	list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
			
 
				+	list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
			
 
				+	new->grp_nnode ++;
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new)
			
 
				+{
			
 
				+	lstcon_ndlink_t *ndl;
			
 
				+
			
 
				+	while (!list_empty(&old->grp_ndl_list)) {
			
 
				+		ndl = list_entry(old->grp_ndl_list.next,
			
 
				+				     lstcon_ndlink_t, ndl_link);
			
 
				+		lstcon_group_ndlink_move(old, new, ndl);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg)
			
 
				+{
			
 
				+	lstcon_group_t *grp = (lstcon_group_t *)arg;
			
 
				+
			
 
				+	switch (transop) {
			
 
				+	case LST_TRANS_SESNEW:
			
 
				+		if (nd->nd_state == LST_NODE_ACTIVE)
			
 
				+			return 0;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_SESEND:
			
 
				+		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+			return 0;
			
 
				+
			
 
				+		if (grp != NULL && nd->nd_ref > 1)
			
 
				+			return 0;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_SESQRY:
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_sesrpc_readent(int transop, srpc_msg_t *msg,
			
 
				+		      lstcon_rpc_ent_t *ent_up)
			
 
				+{
			
 
				+	srpc_debug_reply_t *rep;
			
 
				+
			
 
				+	switch (transop) {
			
 
				+	case LST_TRANS_SESNEW:
			
 
				+	case LST_TRANS_SESEND:
			
 
				+		return 0;
			
 
				+
			
 
				+	case LST_TRANS_SESQRY:
			
 
				+		rep = &msg->msg_body.dbg_reply;
			
 
				+
			
 
				+		if (copy_to_user(&ent_up->rpe_priv[0],
			
 
				+				     &rep->dbg_timeout, sizeof(int)) ||
			
 
				+		    copy_to_user(&ent_up->rpe_payload[0],
			
 
				+				     &rep->dbg_name, LST_NAME_SIZE))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		return 0;
			
 
				+
			
 
				+	default:
			
 
				+		LBUG();
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_group_nodes_add(lstcon_group_t *grp,
			
 
				+		       int count, lnet_process_id_t *ids_up,
			
 
				+		       unsigned *featp, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t      *trans;
			
 
				+	lstcon_ndlink_t	 *ndl;
			
 
				+	lstcon_group_t	  *tmp;
			
 
				+	lnet_process_id_t	id;
			
 
				+	int		      i;
			
 
				+	int		      rc;
			
 
				+
			
 
				+	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Out of memory\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0 ; i < count; i++) {
			
 
				+		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				+			rc = -EFAULT;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* skip if it's in this group already */
			
 
				+		rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
			
 
				+		if (rc == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		/* add to tmp group */
			
 
				+		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't create ndlink, out of memory\n");
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		lstcon_group_put(tmp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
			
 
				+				     &tmp->grp_trans_list, LST_TRANS_SESNEW,
			
 
				+				     tmp, lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		lstcon_group_put(tmp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* post all RPCs */
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				+					  lstcon_sesrpc_readent);
			
 
				+	*featp = trans->tas_features;
			
 
				+
			
 
				+	/* destroy all RPGs */
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	lstcon_group_move(tmp, grp);
			
 
				+	lstcon_group_put(tmp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_group_nodes_remove(lstcon_group_t *grp,
			
 
				+			  int count, lnet_process_id_t *ids_up,
			
 
				+			  struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t     *trans;
			
 
				+	lstcon_ndlink_t	*ndl;
			
 
				+	lstcon_group_t	 *tmp;
			
 
				+	lnet_process_id_t       id;
			
 
				+	int		     rc;
			
 
				+	int		     i;
			
 
				+
			
 
				+	/* End session and remove node from the group */
			
 
				+
			
 
				+	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Out of memory\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < count; i++) {
			
 
				+		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				+			rc = -EFAULT;
			
 
				+			goto error;
			
 
				+		}
			
 
				+
			
 
				+		/* move node to tmp group */
			
 
				+		if (lstcon_group_ndlink_find(grp, id, &ndl, 0) == 0)
			
 
				+			lstcon_group_ndlink_move(grp, tmp, ndl);
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
			
 
				+				     &tmp->grp_trans_list, LST_TRANS_SESEND,
			
 
				+				     tmp, lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		goto error;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+	/* release nodes anyway, because we can't rollback status */
			
 
				+	lstcon_group_put(tmp);
			
 
				+
			
 
				+	return rc;
			
 
				+error:
			
 
				+	lstcon_group_move(tmp, grp);
			
 
				+	lstcon_group_put(tmp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_add(char *name)
			
 
				+{
			
 
				+	lstcon_group_t *grp;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	rc = (lstcon_group_find(name, &grp) == 0)? -EEXIST: 0;
			
 
				+	if (rc != 0) {
			
 
				+		/* find a group with same name */
			
 
				+		lstcon_group_put(grp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_alloc(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't allocate descriptor for group %s\n", name);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_nodes_add(char *name, int count, lnet_process_id_t *ids_up,
			
 
				+		 unsigned *featp, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_group_t	 *grp;
			
 
				+	int		     rc;
			
 
				+
			
 
				+	LASSERT (count > 0);
			
 
				+	LASSERT (ids_up != NULL);
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* referred by other threads or test */
			
 
				+		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+		return -EBUSY;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_del(char *name)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	lstcon_group_t     *grp;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* referred by others threads or test */
			
 
				+		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return -EBUSY;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				+				     &grp->grp_trans_list, LST_TRANS_SESEND,
			
 
				+				     grp, lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+	/* -ref for session, it's destroyed,
			
 
				+	 * status can't be rolled back, destroy group anway */
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_clean(char *name, int args)
			
 
				+{
			
 
				+	lstcon_group_t *grp = NULL;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* referred by test */
			
 
				+		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return -EBUSY;
			
 
				+	}
			
 
				+
			
 
				+	args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
			
 
				+		LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
			
 
				+
			
 
				+	lstcon_group_drain(grp, args);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+	/* release empty group */
			
 
				+	if (list_empty(&grp->grp_ndl_list))
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_nodes_remove(char *name, int count,
			
 
				+		    lnet_process_id_t *ids_up, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_group_t *grp = NULL;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* referred by test */
			
 
				+		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return -EBUSY;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+	/* release empty group */
			
 
				+	if (list_empty(&grp->grp_ndl_list))
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_refresh(char *name, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t      *trans;
			
 
				+	lstcon_group_t	  *grp;
			
 
				+	int		      rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group: %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* referred by test */
			
 
				+		CDEBUG(D_NET, "Group %s is busy\n", name);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return -EBUSY;
			
 
				+	}
			
 
				+
			
 
				+	/* re-invite all inactive nodes int the group */
			
 
				+	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				+				     &grp->grp_trans_list, LST_TRANS_SESNEW,
			
 
				+				     grp, lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		/* local error, return */
			
 
				+		CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
			
 
				+		lstcon_group_put(grp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+	/* -ref for me */
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_list(int index, int len, char *name_up)
			
 
				+{
			
 
				+	lstcon_group_t *grp;
			
 
				+
			
 
				+	LASSERT (index >= 0);
			
 
				+	LASSERT (name_up != NULL);
			
 
				+
			
 
				+	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
			
 
				+		if (index-- == 0) {
			
 
				+			return copy_to_user(name_up, grp->grp_name, len) ?
			
 
				+			       -EFAULT : 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_nodes_getent(struct list_head *head, int *index_p,
			
 
				+		    int *count_p, lstcon_node_ent_t *dents_up)
			
 
				+{
			
 
				+	lstcon_ndlink_t  *ndl;
			
 
				+	lstcon_node_t    *nd;
			
 
				+	int	       count = 0;
			
 
				+	int	       index = 0;
			
 
				+
			
 
				+	LASSERT (index_p != NULL && count_p != NULL);
			
 
				+	LASSERT (dents_up != NULL);
			
 
				+	LASSERT (*index_p >= 0);
			
 
				+	LASSERT (*count_p > 0);
			
 
				+
			
 
				+	list_for_each_entry(ndl, head, ndl_link) {
			
 
				+		if (index++ < *index_p)
			
 
				+			continue;
			
 
				+
			
 
				+		if (count >= *count_p)
			
 
				+			break;
			
 
				+
			
 
				+		nd = ndl->ndl_node;
			
 
				+		if (copy_to_user(&dents_up[count].nde_id,
			
 
				+				     &nd->nd_id, sizeof(nd->nd_id)) ||
			
 
				+		    copy_to_user(&dents_up[count].nde_state,
			
 
				+				     &nd->nd_state, sizeof(nd->nd_state)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		count ++;
			
 
				+	}
			
 
				+
			
 
				+	if (index <= *index_p)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	*count_p = count;
			
 
				+	*index_p = index;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p,
			
 
				+		  int *index_p, int *count_p, lstcon_node_ent_t *dents_up)
			
 
				+{
			
 
				+	lstcon_ndlist_ent_t *gentp;
			
 
				+	lstcon_group_t      *grp;
			
 
				+	lstcon_ndlink_t     *ndl;
			
 
				+	int		  rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (dents_up != 0) {
			
 
				+		/* verbose query */
			
 
				+		rc = lstcon_nodes_getent(&grp->grp_ndl_list,
			
 
				+					 index_p, count_p, dents_up);
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* non-verbose query */
			
 
				+	LIBCFS_ALLOC(gentp, sizeof(lstcon_ndlist_ent_t));
			
 
				+	if (gentp == NULL) {
			
 
				+		CERROR("Can't allocate ndlist_ent\n");
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(gentp, 0, sizeof(lstcon_ndlist_ent_t));
			
 
				+
			
 
				+	list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
			
 
				+		LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
			
 
				+
			
 
				+	rc = copy_to_user(gents_p, gentp,
			
 
				+			      sizeof(lstcon_ndlist_ent_t)) ? -EFAULT: 0;
			
 
				+
			
 
				+	LIBCFS_FREE(gentp, sizeof(lstcon_ndlist_ent_t));
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_find(char *name, lstcon_batch_t **batpp)
			
 
				+{
			
 
				+	lstcon_batch_t   *bat;
			
 
				+
			
 
				+	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
			
 
				+		if (strncmp(bat->bat_name, name, LST_NAME_SIZE) == 0) {
			
 
				+			*batpp = bat;
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_add(char *name)
			
 
				+{
			
 
				+	lstcon_batch_t   *bat;
			
 
				+	int	       i;
			
 
				+	int	       rc;
			
 
				+
			
 
				+	rc = (lstcon_batch_find(name, &bat) == 0)? -EEXIST: 0;
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Batch %s already exists\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t));
			
 
				+	if (bat == NULL) {
			
 
				+		CERROR("Can't allocate descriptor for batch %s\n", name);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(bat->bat_cli_hash,
			
 
				+		     sizeof(struct list_head) * LST_NODE_HASHSIZE);
			
 
				+	if (bat->bat_cli_hash == NULL) {
			
 
				+		CERROR("Can't allocate hash for batch %s\n", name);
			
 
				+		LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
			
 
				+
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(bat->bat_srv_hash,
			
 
				+		     sizeof(struct list_head) * LST_NODE_HASHSIZE);
			
 
				+	if (bat->bat_srv_hash == NULL) {
			
 
				+		CERROR("Can't allocate hash for batch %s\n", name);
			
 
				+		LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE);
			
 
				+		LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
			
 
				+
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	strcpy(bat->bat_name, name);
			
 
				+	bat->bat_hdr.tsb_index = 0;
			
 
				+	bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
			
 
				+
			
 
				+	bat->bat_ntest = 0;
			
 
				+	bat->bat_state = LST_BATCH_IDLE;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&bat->bat_cli_list);
			
 
				+	INIT_LIST_HEAD(&bat->bat_srv_list);
			
 
				+	INIT_LIST_HEAD(&bat->bat_test_list);
			
 
				+	INIT_LIST_HEAD(&bat->bat_trans_list);
			
 
				+
			
 
				+	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				+		INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
			
 
				+		INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_list(int index, int len, char *name_up)
			
 
				+{
			
 
				+	lstcon_batch_t    *bat;
			
 
				+
			
 
				+	LASSERT (name_up != NULL);
			
 
				+	LASSERT (index >= 0);
			
 
				+
			
 
				+	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
			
 
				+		if (index-- == 0) {
			
 
				+			return copy_to_user(name_up,bat->bat_name, len) ?
			
 
				+			       -EFAULT: 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server,
			
 
				+		  int testidx, int *index_p, int *ndent_p,
			
 
				+		  lstcon_node_ent_t *dents_up)
			
 
				+{
			
 
				+	lstcon_test_batch_ent_t *entp;
			
 
				+	struct list_head	      *clilst;
			
 
				+	struct list_head	      *srvlst;
			
 
				+	lstcon_test_t	   *test = NULL;
			
 
				+	lstcon_batch_t	  *bat;
			
 
				+	lstcon_ndlink_t	 *ndl;
			
 
				+	int		      rc;
			
 
				+
			
 
				+	rc = lstcon_batch_find(name, &bat);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	if (testidx > 0) {
			
 
				+		/* query test, test index start from 1 */
			
 
				+		list_for_each_entry(test, &bat->bat_test_list, tes_link) {
			
 
				+			if (testidx-- == 1)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		if (testidx > 0) {
			
 
				+			CDEBUG(D_NET, "Can't find specified test in batch\n");
			
 
				+			return -ENOENT;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	clilst = (test == NULL) ? &bat->bat_cli_list :
			
 
				+				  &test->tes_src_grp->grp_ndl_list;
			
 
				+	srvlst = (test == NULL) ? &bat->bat_srv_list :
			
 
				+				  &test->tes_dst_grp->grp_ndl_list;
			
 
				+
			
 
				+	if (dents_up != NULL) {
			
 
				+		rc = lstcon_nodes_getent((server ? srvlst: clilst),
			
 
				+					 index_p, ndent_p, dents_up);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	/* non-verbose query */
			
 
				+	LIBCFS_ALLOC(entp, sizeof(lstcon_test_batch_ent_t));
			
 
				+	if (entp == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset(entp, 0, sizeof(lstcon_test_batch_ent_t));
			
 
				+
			
 
				+	if (test == NULL) {
			
 
				+		entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
			
 
				+		entp->u.tbe_batch.bae_state = bat->bat_state;
			
 
				+
			
 
				+	} else {
			
 
				+
			
 
				+		entp->u.tbe_test.tse_type   = test->tes_type;
			
 
				+		entp->u.tbe_test.tse_loop   = test->tes_loop;
			
 
				+		entp->u.tbe_test.tse_concur = test->tes_concur;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry(ndl, clilst, ndl_link)
			
 
				+		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
			
 
				+
			
 
				+	list_for_each_entry(ndl, srvlst, ndl_link)
			
 
				+		LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
			
 
				+
			
 
				+	rc = copy_to_user(ent_up, entp,
			
 
				+			      sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0;
			
 
				+
			
 
				+	LIBCFS_FREE(entp, sizeof(lstcon_test_batch_ent_t));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg)
			
 
				+{
			
 
				+	switch (transop) {
			
 
				+	case LST_TRANS_TSBRUN:
			
 
				+		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+			return -ENETDOWN;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_TSBSTOP:
			
 
				+		if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+			return 0;
			
 
				+		break;
			
 
				+
			
 
				+	case LST_TRANS_TSBCLIQRY:
			
 
				+	case LST_TRANS_TSBSRVQRY:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_batch_op(lstcon_batch_t *bat, int transop,
			
 
				+		struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
			
 
				+				     &bat->bat_trans_list, transop,
			
 
				+				     bat, lstcon_batrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_run(char *name, int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_batch_t *bat;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	if (lstcon_batch_find(name, &bat) != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	bat->bat_arg = timeout;
			
 
				+
			
 
				+	rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
			
 
				+
			
 
				+	/* mark batch as running if it's started in any node */
			
 
				+	if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0) != 0)
			
 
				+		bat->bat_state = LST_BATCH_RUNNING;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_stop(char *name, int force, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_batch_t *bat;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	if (lstcon_batch_find(name, &bat) != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	bat->bat_arg = force;
			
 
				+
			
 
				+	rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
			
 
				+
			
 
				+	/* mark batch as stopped if all RPCs finished */
			
 
				+	if (lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0) == 0)
			
 
				+		bat->bat_state = LST_BATCH_IDLE;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_batch_destroy(lstcon_batch_t *bat)
			
 
				+{
			
 
				+	lstcon_ndlink_t    *ndl;
			
 
				+	lstcon_test_t      *test;
			
 
				+	int		 i;
			
 
				+
			
 
				+	list_del(&bat->bat_link);
			
 
				+
			
 
				+	while (!list_empty(&bat->bat_test_list)) {
			
 
				+		test = list_entry(bat->bat_test_list.next,
			
 
				+				      lstcon_test_t, tes_link);
			
 
				+		LASSERT (list_empty(&test->tes_trans_list));
			
 
				+
			
 
				+		list_del(&test->tes_link);
			
 
				+
			
 
				+		lstcon_group_put(test->tes_src_grp);
			
 
				+		lstcon_group_put(test->tes_dst_grp);
			
 
				+
			
 
				+		LIBCFS_FREE(test, offsetof(lstcon_test_t,
			
 
				+					   tes_param[test->tes_paramlen]));
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (list_empty(&bat->bat_trans_list));
			
 
				+
			
 
				+	while (!list_empty(&bat->bat_cli_list)) {
			
 
				+		ndl = list_entry(bat->bat_cli_list.next,
			
 
				+				     lstcon_ndlink_t, ndl_link);
			
 
				+		list_del_init(&ndl->ndl_link);
			
 
				+
			
 
				+		lstcon_ndlink_release(ndl);
			
 
				+	}
			
 
				+
			
 
				+	while (!list_empty(&bat->bat_srv_list)) {
			
 
				+		ndl = list_entry(bat->bat_srv_list.next,
			
 
				+				     lstcon_ndlink_t, ndl_link);
			
 
				+		list_del_init(&ndl->ndl_link);
			
 
				+
			
 
				+		lstcon_ndlink_release(ndl);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				+		LASSERT (list_empty(&bat->bat_cli_hash[i]));
			
 
				+		LASSERT (list_empty(&bat->bat_srv_hash[i]));
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(bat->bat_cli_hash,
			
 
				+		    sizeof(struct list_head) * LST_NODE_HASHSIZE);
			
 
				+	LIBCFS_FREE(bat->bat_srv_hash,
			
 
				+		    sizeof(struct list_head) * LST_NODE_HASHSIZE);
			
 
				+	LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg)
			
 
				+{
			
 
				+	lstcon_test_t    *test;
			
 
				+	lstcon_batch_t   *batch;
			
 
				+	lstcon_ndlink_t  *ndl;
			
 
				+	struct list_head       *hash;
			
 
				+	struct list_head       *head;
			
 
				+
			
 
				+	test = (lstcon_test_t *)arg;
			
 
				+	LASSERT (test != NULL);
			
 
				+
			
 
				+	batch = test->tes_batch;
			
 
				+	LASSERT (batch != NULL);
			
 
				+
			
 
				+	if (test->tes_oneside &&
			
 
				+	    transop == LST_TRANS_TSBSRVADD)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (nd->nd_state != LST_NODE_ACTIVE)
			
 
				+		return -ENETDOWN;
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBCLIADD) {
			
 
				+		hash = batch->bat_cli_hash;
			
 
				+		head = &batch->bat_cli_list;
			
 
				+
			
 
				+	} else {
			
 
				+		LASSERT (transop == LST_TRANS_TSBSRVADD);
			
 
				+
			
 
				+		hash = batch->bat_srv_hash;
			
 
				+		head = &batch->bat_srv_list;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (nd->nd_id.nid != LNET_NID_ANY);
			
 
				+
			
 
				+	if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1) != 0)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (list_empty(&ndl->ndl_link))
			
 
				+		list_add_tail(&ndl->ndl_link, head);
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t     *trans;
			
 
				+	lstcon_group_t	 *grp;
			
 
				+	int		     transop;
			
 
				+	int		     rc;
			
 
				+
			
 
				+	LASSERT (test->tes_src_grp != NULL);
			
 
				+	LASSERT (test->tes_dst_grp != NULL);
			
 
				+
			
 
				+	transop = LST_TRANS_TSBSRVADD;
			
 
				+	grp  = test->tes_dst_grp;
			
 
				+again:
			
 
				+	rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
			
 
				+				     &test->tes_trans_list, transop,
			
 
				+				     test, lstcon_testrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
			
 
				+	    lstcon_trans_stat()->trs_fwk_errno != 0) {
			
 
				+		lstcon_rpc_trans_interpreter(trans, result_up, NULL);
			
 
				+
			
 
				+		lstcon_rpc_trans_destroy(trans);
			
 
				+		/* return if any error */
			
 
				+		CDEBUG(D_NET, "Failed to add test %s, "
			
 
				+			      "RPC error %d, framework error %d\n",
			
 
				+		       transop == LST_TRANS_TSBCLIADD ? "client" : "server",
			
 
				+		       lstcon_trans_stat()->trs_rpc_errno,
			
 
				+		       lstcon_trans_stat()->trs_fwk_errno);
			
 
				+
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	if (transop == LST_TRANS_TSBCLIADD)
			
 
				+		return rc;
			
 
				+
			
 
				+	transop = LST_TRANS_TSBCLIADD;
			
 
				+	grp = test->tes_src_grp;
			
 
				+	test->tes_cliidx = 0;
			
 
				+
			
 
				+	/* requests to test clients */
			
 
				+	goto again;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_test_add(char *name, int type, int loop, int concur,
			
 
				+		int dist, int span, char *src_name, char * dst_name,
			
 
				+		void *param, int paramlen, int *retp,
			
 
				+		struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_group_t  *src_grp = NULL;
			
 
				+	lstcon_group_t  *dst_grp = NULL;
			
 
				+	lstcon_test_t   *test    = NULL;
			
 
				+	lstcon_batch_t  *batch;
			
 
				+	int	      rc;
			
 
				+
			
 
				+	rc = lstcon_batch_find(name, &batch);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find batch %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (batch->bat_state != LST_BATCH_IDLE) {
			
 
				+		CDEBUG(D_NET, "Can't change running batch %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_find(src_name, &src_grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", src_name);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_find(dst_name, &dst_grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", dst_name);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (dst_grp->grp_userland)
			
 
				+		*retp = 1;
			
 
				+
			
 
				+	LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen]));
			
 
				+	if (!test) {
			
 
				+		CERROR("Can't allocate test descriptor\n");
			
 
				+		rc = -ENOMEM;
			
 
				+
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	memset(test, 0, offsetof(lstcon_test_t, tes_param[paramlen]));
			
 
				+	test->tes_hdr.tsb_id    = batch->bat_hdr.tsb_id;
			
 
				+	test->tes_batch	 = batch;
			
 
				+	test->tes_type	  = type;
			
 
				+	test->tes_oneside       = 0; /* TODO */
			
 
				+	test->tes_loop	  = loop;
			
 
				+	test->tes_concur	= concur;
			
 
				+	test->tes_stop_onerr    = 1; /* TODO */
			
 
				+	test->tes_span	  = span;
			
 
				+	test->tes_dist	  = dist;
			
 
				+	test->tes_cliidx	= 0; /* just used for creating RPC */
			
 
				+	test->tes_src_grp       = src_grp;
			
 
				+	test->tes_dst_grp       = dst_grp;
			
 
				+	INIT_LIST_HEAD(&test->tes_trans_list);
			
 
				+
			
 
				+	if (param != NULL) {
			
 
				+		test->tes_paramlen = paramlen;
			
 
				+		memcpy(&test->tes_param[0], param, paramlen);
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_test_nodes_add(test, result_up);
			
 
				+
			
 
				+	if (rc != 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
			
 
				+	    lstcon_trans_stat()->trs_fwk_errno != 0)
			
 
				+		CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name);
			
 
				+
			
 
				+	/* add to test list anyway, so user can check what's going on */
			
 
				+	list_add_tail(&test->tes_link, &batch->bat_test_list);
			
 
				+
			
 
				+	batch->bat_ntest ++;
			
 
				+	test->tes_hdr.tsb_index = batch->bat_ntest;
			
 
				+
			
 
				+	/*  hold groups so nobody can change them */
			
 
				+	return rc;
			
 
				+out:
			
 
				+	if (test != NULL)
			
 
				+		LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen]));
			
 
				+
			
 
				+	if (dst_grp != NULL)
			
 
				+		lstcon_group_put(dst_grp);
			
 
				+
			
 
				+	if (src_grp != NULL)
			
 
				+		lstcon_group_put(src_grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp)
			
 
				+{
			
 
				+	lstcon_test_t *test;
			
 
				+
			
 
				+	list_for_each_entry(test, &batch->bat_test_list, tes_link) {
			
 
				+		if (idx == test->tes_hdr.tsb_index) {
			
 
				+			*testpp = test;
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg,
			
 
				+		      lstcon_rpc_ent_t *ent_up)
			
 
				+{
			
 
				+	srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
			
 
				+
			
 
				+	LASSERT (transop == LST_TRANS_TSBCLIQRY ||
			
 
				+		 transop == LST_TRANS_TSBSRVQRY);
			
 
				+
			
 
				+	/* positive errno, framework error code */
			
 
				+	if (copy_to_user(&ent_up->rpe_priv[0],
			
 
				+			     &rep->bar_active, sizeof(rep->bar_active)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_test_batch_query(char *name, int testidx, int client,
			
 
				+			int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	struct list_head	 *translist;
			
 
				+	struct list_head	 *ndlist;
			
 
				+	lstcon_tsb_hdr_t   *hdr;
			
 
				+	lstcon_batch_t     *batch;
			
 
				+	lstcon_test_t      *test = NULL;
			
 
				+	int		 transop;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	rc = lstcon_batch_find(name, &batch);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find batch: %s\n", name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (testidx == 0) {
			
 
				+		translist = &batch->bat_trans_list;
			
 
				+		ndlist    = &batch->bat_cli_list;
			
 
				+		hdr       = &batch->bat_hdr;
			
 
				+
			
 
				+	} else {
			
 
				+		/* query specified test only */
			
 
				+		rc = lstcon_test_find(batch, testidx, &test);
			
 
				+		if (rc != 0) {
			
 
				+			CDEBUG(D_NET, "Can't find test: %d\n", testidx);
			
 
				+			return rc;
			
 
				+		}
			
 
				+
			
 
				+		translist = &test->tes_trans_list;
			
 
				+		ndlist    = &test->tes_src_grp->grp_ndl_list;
			
 
				+		hdr       = &test->tes_hdr;
			
 
				+	}
			
 
				+
			
 
				+	transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
			
 
				+				     lstcon_batrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, timeout);
			
 
				+
			
 
				+	if (testidx == 0 && /* query a batch, not a test */
			
 
				+	    lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) == 0 &&
			
 
				+	    lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0) == 0) {
			
 
				+		/* all RPCs finished, and no active test */
			
 
				+		batch->bat_state = LST_BATCH_IDLE;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				+					  lstcon_tsbrpc_readent);
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_statrpc_readent(int transop, srpc_msg_t *msg,
			
 
				+		       lstcon_rpc_ent_t *ent_up)
			
 
				+{
			
 
				+	srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
			
 
				+	sfw_counters_t    *sfwk_stat;
			
 
				+	srpc_counters_t   *srpc_stat;
			
 
				+	lnet_counters_t   *lnet_stat;
			
 
				+
			
 
				+	if (rep->str_status != 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	sfwk_stat = (sfw_counters_t *)&ent_up->rpe_payload[0];
			
 
				+	srpc_stat = (srpc_counters_t *)((char *)sfwk_stat + sizeof(*sfwk_stat));
			
 
				+	lnet_stat = (lnet_counters_t *)((char *)srpc_stat + sizeof(*srpc_stat));
			
 
				+
			
 
				+	if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
			
 
				+	    copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
			
 
				+	    copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_ndlist_stat(struct list_head *ndlist,
			
 
				+		   int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	struct list_head	  head;
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&head);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(ndlist, &head,
			
 
				+				     LST_TRANS_STATQRY, NULL, NULL, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				+					  lstcon_statrpc_readent);
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_group_t     *grp;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(grp_name, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Can't find group %s\n", grp_name);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
			
 
				+		  int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_ndlink_t	 *ndl;
			
 
				+	lstcon_group_t	  *tmp;
			
 
				+	lnet_process_id_t	id;
			
 
				+	int		      i;
			
 
				+	int		      rc;
			
 
				+
			
 
				+	rc = lstcon_group_alloc(NULL, &tmp);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Out of memory\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0 ; i < count; i++) {
			
 
				+		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				+			rc = -EFAULT;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* add to tmp group */
			
 
				+		rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
			
 
				+		if (rc != 0) {
			
 
				+			CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
			
 
				+			       "Failed to find or create %s: %d\n",
			
 
				+			       libcfs_id2str(id), rc);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		lstcon_group_put(tmp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
			
 
				+
			
 
				+	lstcon_group_put(tmp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_debug_ndlist(struct list_head *ndlist,
			
 
				+		    struct list_head *translist,
			
 
				+		    int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	int		 rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
			
 
				+				     NULL, lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_interpreter(trans, result_up,
			
 
				+					  lstcon_sesrpc_readent);
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_session_debug(int timeout, struct list_head *result_up)
			
 
				+{
			
 
				+	return lstcon_debug_ndlist(&console_session.ses_ndl_list,
			
 
				+				   NULL, timeout, result_up);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_batch_debug(int timeout, char *name,
			
 
				+		   int client, struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_batch_t *bat;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	rc = lstcon_batch_find(name, &bat);
			
 
				+	if (rc != 0)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
			
 
				+					  &bat->bat_srv_list,
			
 
				+				 NULL, timeout, result_up);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_group_debug(int timeout, char *name,
			
 
				+		   struct list_head *result_up)
			
 
				+{
			
 
				+	lstcon_group_t *grp;
			
 
				+	int	     rc;
			
 
				+
			
 
				+	rc = lstcon_group_find(name, &grp);
			
 
				+	if (rc != 0)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
			
 
				+				 timeout, result_up);
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_nodes_debug(int timeout,
			
 
				+		   int count, lnet_process_id_t *ids_up,
			
 
				+		   struct list_head *result_up)
			
 
				+{
			
 
				+	lnet_process_id_t  id;
			
 
				+	lstcon_ndlink_t   *ndl;
			
 
				+	lstcon_group_t    *grp;
			
 
				+	int		i;
			
 
				+	int		rc;
			
 
				+
			
 
				+	rc = lstcon_group_alloc(NULL, &grp);
			
 
				+	if (rc != 0) {
			
 
				+		CDEBUG(D_NET, "Out of memory\n");
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < count; i++) {
			
 
				+		if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
			
 
				+			rc = -EFAULT;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* node is added to tmp group */
			
 
				+		rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Can't create node link\n");
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		lstcon_group_put(grp);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
			
 
				+				 timeout, result_up);
			
 
				+
			
 
				+	lstcon_group_put(grp);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_session_match(lst_sid_t sid)
			
 
				+{
			
 
				+	return (console_session.ses_id.ses_nid   == sid.ses_nid &&
			
 
				+		console_session.ses_id.ses_stamp == sid.ses_stamp) ?  1: 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+lstcon_new_session_id(lst_sid_t *sid)
			
 
				+{
			
 
				+	lnet_process_id_t      id;
			
 
				+
			
 
				+	LASSERT (console_session.ses_state == LST_SESSION_NONE);
			
 
				+
			
 
				+	LNetGetId(1, &id);
			
 
				+	sid->ses_nid   = id.nid;
			
 
				+	sid->ses_stamp = cfs_time_current();
			
 
				+}
			
 
				+
			
 
				+extern srpc_service_t lstcon_acceptor_service;
			
 
				+
			
 
				+int
			
 
				+lstcon_session_new(char *name, int key, unsigned feats,
			
 
				+		   int timeout, int force, lst_sid_t *sid_up)
			
 
				+{
			
 
				+	int     rc = 0;
			
 
				+	int     i;
			
 
				+
			
 
				+	if (console_session.ses_state != LST_SESSION_NONE) {
			
 
				+		/* session exists */
			
 
				+		if (!force) {
			
 
				+			CNETERR("Session %s already exists\n",
			
 
				+				console_session.ses_name);
			
 
				+			return -EEXIST;
			
 
				+		}
			
 
				+
			
 
				+		rc = lstcon_session_end();
			
 
				+
			
 
				+		/* lstcon_session_end() only return local error */
			
 
				+		if  (rc != 0)
			
 
				+			return rc;
			
 
				+	}
			
 
				+
			
 
				+	if ((feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		CNETERR("Unknown session features %x\n",
			
 
				+			(feats & ~LST_FEATS_MASK));
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
			
 
				+		LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
			
 
				+
			
 
				+	lstcon_new_session_id(&console_session.ses_id);
			
 
				+
			
 
				+	console_session.ses_key	    = key;
			
 
				+	console_session.ses_state   = LST_SESSION_ACTIVE;
			
 
				+	console_session.ses_force   = !!force;
			
 
				+	console_session.ses_features = feats;
			
 
				+	console_session.ses_feats_updated = 0;
			
 
				+	console_session.ses_timeout = (timeout <= 0) ?
			
 
				+				      LST_CONSOLE_TIMEOUT : timeout;
			
 
				+	strcpy(console_session.ses_name, name);
			
 
				+
			
 
				+	rc = lstcon_batch_add(LST_DEFAULT_BATCH);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	rc = lstcon_rpc_pinger_start();
			
 
				+	if (rc != 0) {
			
 
				+		lstcon_batch_t *bat = NULL;
			
 
				+
			
 
				+		lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
			
 
				+		lstcon_batch_destroy(bat);
			
 
				+
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	if (copy_to_user(sid_up, &console_session.ses_id,
			
 
				+			     sizeof(lst_sid_t)) == 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	lstcon_session_end();
			
 
				+
			
 
				+	return -EFAULT;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_session_info(lst_sid_t *sid_up, int *key_up, unsigned *featp,
			
 
				+		    lstcon_ndlist_ent_t *ndinfo_up, char *name_up, int len)
			
 
				+{
			
 
				+	lstcon_ndlist_ent_t *entp;
			
 
				+	lstcon_ndlink_t     *ndl;
			
 
				+	int		  rc = 0;
			
 
				+
			
 
				+	if (console_session.ses_state != LST_SESSION_ACTIVE)
			
 
				+		return -ESRCH;
			
 
				+
			
 
				+	LIBCFS_ALLOC(entp, sizeof(*entp));
			
 
				+	if (entp == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset(entp, 0, sizeof(*entp));
			
 
				+
			
 
				+	list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
			
 
				+		LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
			
 
				+
			
 
				+	if (copy_to_user(sid_up, &console_session.ses_id,
			
 
				+			     sizeof(lst_sid_t)) ||
			
 
				+	    copy_to_user(key_up, &console_session.ses_key,
			
 
				+			     sizeof(*key_up)) ||
			
 
				+	    copy_to_user(featp, &console_session.ses_features,
			
 
				+			     sizeof(*featp)) ||
			
 
				+	    copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
			
 
				+	    copy_to_user(name_up, console_session.ses_name, len))
			
 
				+		rc = -EFAULT;
			
 
				+
			
 
				+	LIBCFS_FREE(entp, sizeof(*entp));
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_session_end()
			
 
				+{
			
 
				+	lstcon_rpc_trans_t *trans;
			
 
				+	lstcon_group_t     *grp;
			
 
				+	lstcon_batch_t     *bat;
			
 
				+	int		 rc = 0;
			
 
				+
			
 
				+	LASSERT (console_session.ses_state == LST_SESSION_ACTIVE);
			
 
				+
			
 
				+	rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
			
 
				+				     NULL, LST_TRANS_SESEND, NULL,
			
 
				+				     lstcon_sesrpc_condition, &trans);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Can't create transaction: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	console_session.ses_shutdown = 1;
			
 
				+
			
 
				+	lstcon_rpc_pinger_stop();
			
 
				+
			
 
				+	lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
			
 
				+
			
 
				+	lstcon_rpc_trans_destroy(trans);
			
 
				+	/* User can do nothing even rpc failed, so go on */
			
 
				+
			
 
				+	/* waiting for orphan rpcs to die */
			
 
				+	lstcon_rpc_cleanup_wait();
			
 
				+
			
 
				+	console_session.ses_id    = LST_INVALID_SID;
			
 
				+	console_session.ses_state = LST_SESSION_NONE;
			
 
				+	console_session.ses_key   = 0;
			
 
				+	console_session.ses_force = 0;
			
 
				+	console_session.ses_feats_updated = 0;
			
 
				+
			
 
				+	/* destroy all batches */
			
 
				+	while (!list_empty(&console_session.ses_bat_list)) {
			
 
				+		bat = list_entry(console_session.ses_bat_list.next,
			
 
				+				     lstcon_batch_t, bat_link);
			
 
				+
			
 
				+		lstcon_batch_destroy(bat);
			
 
				+	}
			
 
				+
			
 
				+	/* destroy all groups */
			
 
				+	while (!list_empty(&console_session.ses_grp_list)) {
			
 
				+		grp = list_entry(console_session.ses_grp_list.next,
			
 
				+				     lstcon_group_t, grp_link);
			
 
				+		LASSERT (grp->grp_ref == 1);
			
 
				+
			
 
				+		lstcon_group_put(grp);
			
 
				+	}
			
 
				+
			
 
				+	/* all nodes should be released */
			
 
				+	LASSERT (list_empty(&console_session.ses_ndl_list));
			
 
				+
			
 
				+	console_session.ses_shutdown = 0;
			
 
				+	console_session.ses_expired  = 0;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_session_feats_check(unsigned feats)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	if ((feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		CERROR("Can't support these features: %x\n",
			
 
				+		       (feats & ~LST_FEATS_MASK));
			
 
				+		return -EPROTO;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	if (!console_session.ses_feats_updated) {
			
 
				+		console_session.ses_feats_updated = 1;
			
 
				+		console_session.ses_features = feats;
			
 
				+	}
			
 
				+
			
 
				+	if (console_session.ses_features != feats)
			
 
				+		rc = -EPROTO;
			
 
				+
			
 
				+	spin_unlock(&console_session.ses_rpc_lock);
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("remote features %x do not match with "
			
 
				+		       "session features %x of console\n",
			
 
				+		       feats, console_session.ses_features);
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+lstcon_acceptor_handle (srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_msg_t	*rep  = &rpc->srpc_replymsg;
			
 
				+	srpc_msg_t	*req  = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	srpc_join_reqst_t *jreq = &req->msg_body.join_reqst;
			
 
				+	srpc_join_reply_t *jrep = &rep->msg_body.join_reply;
			
 
				+	lstcon_group_t    *grp  = NULL;
			
 
				+	lstcon_ndlink_t   *ndl;
			
 
				+	int		rc   = 0;
			
 
				+
			
 
				+	sfw_unpack_message(req);
			
 
				+
			
 
				+	mutex_lock(&console_session.ses_mutex);
			
 
				+
			
 
				+	jrep->join_sid = console_session.ses_id;
			
 
				+
			
 
				+	if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
			
 
				+		jrep->join_status = ESRCH;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (lstcon_session_feats_check(req->msg_ses_feats) != 0) {
			
 
				+		jrep->join_status = EPROTO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
			
 
				+	     !lstcon_session_match(jreq->join_sid)) {
			
 
				+		jrep->join_status = EBUSY;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (lstcon_group_find(jreq->join_group, &grp) != 0) {
			
 
				+		rc = lstcon_group_alloc(jreq->join_group, &grp);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Out of memory\n");
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		list_add_tail(&grp->grp_link,
			
 
				+				  &console_session.ses_grp_list);
			
 
				+		lstcon_group_addref(grp);
			
 
				+	}
			
 
				+
			
 
				+	if (grp->grp_ref > 2) {
			
 
				+		/* Group in using */
			
 
				+		jrep->join_status = EBUSY;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
			
 
				+	if (rc == 0) {
			
 
				+		jrep->join_status = EEXIST;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Out of memory\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ndl->ndl_node->nd_state   = LST_NODE_ACTIVE;
			
 
				+	ndl->ndl_node->nd_timeout = console_session.ses_timeout;
			
 
				+
			
 
				+	if (grp->grp_userland == 0)
			
 
				+		grp->grp_userland = 1;
			
 
				+
			
 
				+	strcpy(jrep->join_session, console_session.ses_name);
			
 
				+	jrep->join_timeout = console_session.ses_timeout;
			
 
				+	jrep->join_status  = 0;
			
 
				+
			
 
				+out:
			
 
				+	rep->msg_ses_feats = console_session.ses_features;
			
 
				+	if (grp != NULL)
			
 
				+		lstcon_group_put(grp);
			
 
				+
			
 
				+	mutex_unlock(&console_session.ses_mutex);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+srpc_service_t lstcon_acceptor_service;
			
 
				+void lstcon_init_acceptor_service(void)
			
 
				+{
			
 
				+	/* initialize selftest console acceptor service table */
			
 
				+	lstcon_acceptor_service.sv_name    = "join session";
			
 
				+	lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
			
 
				+	lstcon_acceptor_service.sv_id      = SRPC_SERVICE_JOIN;
			
 
				+	lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
			
 
				+}
			
 
				+
			
 
				+extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data);
			
 
				+
			
 
				+DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
			
 
				+
			
 
				+/* initialize console */
			
 
				+int
			
 
				+lstcon_console_init(void)
			
 
				+{
			
 
				+	int     i;
			
 
				+	int     rc;
			
 
				+
			
 
				+	memset(&console_session, 0, sizeof(lstcon_session_t));
			
 
				+
			
 
				+	console_session.ses_id		    = LST_INVALID_SID;
			
 
				+	console_session.ses_state	    = LST_SESSION_NONE;
			
 
				+	console_session.ses_timeout	    = 0;
			
 
				+	console_session.ses_force	    = 0;
			
 
				+	console_session.ses_expired	    = 0;
			
 
				+	console_session.ses_feats_updated   = 0;
			
 
				+	console_session.ses_features	    = LST_FEATS_MASK;
			
 
				+	console_session.ses_laststamp	    = cfs_time_current_sec();
			
 
				+
			
 
				+	mutex_init(&console_session.ses_mutex);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&console_session.ses_ndl_list);
			
 
				+	INIT_LIST_HEAD(&console_session.ses_grp_list);
			
 
				+	INIT_LIST_HEAD(&console_session.ses_bat_list);
			
 
				+	INIT_LIST_HEAD(&console_session.ses_trans_list);
			
 
				+
			
 
				+	LIBCFS_ALLOC(console_session.ses_ndl_hash,
			
 
				+		     sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
			
 
				+	if (console_session.ses_ndl_hash == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
			
 
				+		INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
			
 
				+
			
 
				+
			
 
				+	/* initialize acceptor service table */
			
 
				+	lstcon_init_acceptor_service();
			
 
				+
			
 
				+	rc = srpc_add_service(&lstcon_acceptor_service);
			
 
				+	LASSERT (rc != -EBUSY);
			
 
				+	if (rc != 0) {
			
 
				+		LIBCFS_FREE(console_session.ses_ndl_hash,
			
 
				+			    sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	rc = srpc_service_add_buffers(&lstcon_acceptor_service,
			
 
				+				      lstcon_acceptor_service.sv_wi_total);
			
 
				+	if (rc != 0) {
			
 
				+		rc = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	rc = libcfs_register_ioctl(&lstcon_ioctl_handler);
			
 
				+
			
 
				+	if (rc == 0) {
			
 
				+		lstcon_rpc_module_init();
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	srpc_shutdown_service(&lstcon_acceptor_service);
			
 
				+	srpc_remove_service(&lstcon_acceptor_service);
			
 
				+
			
 
				+	LIBCFS_FREE(console_session.ses_ndl_hash,
			
 
				+		    sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
			
 
				+
			
 
				+	srpc_wait_service_shutdown(&lstcon_acceptor_service);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lstcon_console_fini(void)
			
 
				+{
			
 
				+	int     i;
			
 
				+
			
 
				+	libcfs_deregister_ioctl(&lstcon_ioctl_handler);
			
 
				+
			
 
				+	mutex_lock(&console_session.ses_mutex);
			
 
				+
			
 
				+	srpc_shutdown_service(&lstcon_acceptor_service);
			
 
				+	srpc_remove_service(&lstcon_acceptor_service);
			
 
				+
			
 
				+	if (console_session.ses_state != LST_SESSION_NONE)
			
 
				+		lstcon_session_end();
			
 
				+
			
 
				+	lstcon_rpc_module_fini();
			
 
				+
			
 
				+	mutex_unlock(&console_session.ses_mutex);
			
 
				+
			
 
				+	LASSERT (list_empty(&console_session.ses_ndl_list));
			
 
				+	LASSERT (list_empty(&console_session.ses_grp_list));
			
 
				+	LASSERT (list_empty(&console_session.ses_bat_list));
			
 
				+	LASSERT (list_empty(&console_session.ses_trans_list));
			
 
				+
			
 
				+	for (i = 0; i < LST_NODE_HASHSIZE; i++) {
			
 
				+		LASSERT (list_empty(&console_session.ses_ndl_hash[i]));
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(console_session.ses_ndl_hash,
			
 
				+		    sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
			
 
				+
			
 
				+	srpc_wait_service_shutdown(&lstcon_acceptor_service);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -0,0 +1,232 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/console.h
			
 
				+ *
			
 
				+ * kernel structure for LST console
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __LST_CONSOLE_H__
			
 
				+#define __LST_CONSOLE_H__
			
 
				+
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+#include <linux/lnet/lnetst.h>
			
 
				+#include "selftest.h"
			
 
				+#include "conrpc.h"
			
 
				+
			
 
				+typedef struct lstcon_node {
			
 
				+	lnet_process_id_t    nd_id;	  /* id of the node */
			
 
				+	int		  nd_ref;	 /* reference count */
			
 
				+	int		  nd_state;       /* state of the node */
			
 
				+	int		  nd_timeout;     /* session timeout */
			
 
				+	cfs_time_t	   nd_stamp;       /* timestamp of last replied RPC */
			
 
				+	struct lstcon_rpc    nd_ping;	/* ping rpc */
			
 
				+} lstcon_node_t;				/*** node descriptor */
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	   ndl_link;       /* chain on list */
			
 
				+	struct list_head	   ndl_hlink;      /* chain on hash */
			
 
				+	lstcon_node_t       *ndl_node;       /* pointer to node */
			
 
				+} lstcon_ndlink_t;			      /*** node link descriptor */
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	   grp_link;       /* chain on global group list */
			
 
				+	int		  grp_ref;	/* reference count */
			
 
				+	int		  grp_userland;   /* has userland nodes */
			
 
				+	int		  grp_nnode;      /* # of nodes */
			
 
				+	char		 grp_name[LST_NAME_SIZE]; /* group name */
			
 
				+
			
 
				+	struct list_head	   grp_trans_list; /* transaction list */
			
 
				+	struct list_head	   grp_ndl_list;   /* nodes list */
			
 
				+	struct list_head	   grp_ndl_hash[0];/* hash table for nodes */
			
 
				+} lstcon_group_t;		    /*** (alias of nodes) group descriptor */
			
 
				+
			
 
				+#define LST_BATCH_IDLE	  0xB0	    /* idle batch */
			
 
				+#define LST_BATCH_RUNNING       0xB1	    /* running batch */
			
 
				+
			
 
				+typedef struct lstcon_tsb_hdr {
			
 
				+	lst_bid_t	       tsb_id;	 /* batch ID */
			
 
				+	int		     tsb_index;      /* test index */
			
 
				+} lstcon_tsb_hdr_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	lstcon_tsb_hdr_t	bat_hdr;	/* test_batch header */
			
 
				+	struct list_head	      bat_link;       /* chain on session's batches list */
			
 
				+	int		     bat_ntest;      /* # of test */
			
 
				+	int		     bat_state;      /* state of the batch */
			
 
				+	int		     bat_arg;	/* parameter for run|stop, timeout for run, force for stop */
			
 
				+	char		    bat_name[LST_NAME_SIZE]; /* name of batch */
			
 
				+
			
 
				+	struct list_head	      bat_test_list;  /* list head of tests (lstcon_test_t) */
			
 
				+	struct list_head	      bat_trans_list; /* list head of transaction */
			
 
				+	struct list_head	      bat_cli_list;   /* list head of client nodes (lstcon_node_t) */
			
 
				+	struct list_head	     *bat_cli_hash;   /* hash table of client nodes */
			
 
				+	struct list_head	      bat_srv_list;   /* list head of server nodes */
			
 
				+	struct list_head	     *bat_srv_hash;   /* hash table of server nodes */
			
 
				+} lstcon_batch_t;			     /*** (tests ) batch descritptor */
			
 
				+
			
 
				+typedef struct lstcon_test {
			
 
				+	lstcon_tsb_hdr_t      tes_hdr;	/* test batch header */
			
 
				+	struct list_head	    tes_link;       /* chain on batch's tests list */
			
 
				+	lstcon_batch_t       *tes_batch;      /* pointer to batch */
			
 
				+
			
 
				+	int		   tes_type;       /* type of the test, i.e: bulk, ping */
			
 
				+	int		   tes_stop_onerr; /* stop on error */
			
 
				+	int		   tes_oneside;    /* one-sided test */
			
 
				+	int		   tes_concur;     /* concurrency */
			
 
				+	int		   tes_loop;       /* loop count */
			
 
				+	int		   tes_dist;       /* nodes distribution of target group */
			
 
				+	int		   tes_span;       /* nodes span of target group */
			
 
				+	int		   tes_cliidx;     /* client index, used for RPC creating */
			
 
				+
			
 
				+	struct list_head  tes_trans_list; /* transaction list */
			
 
				+	lstcon_group_t       *tes_src_grp;    /* group run the test */
			
 
				+	lstcon_group_t       *tes_dst_grp;    /* target group */
			
 
				+
			
 
				+	int		   tes_paramlen;   /* test parameter length */
			
 
				+	char		  tes_param[0];   /* test parameter */
			
 
				+} lstcon_test_t;				/*** a single test descriptor */
			
 
				+
			
 
				+#define LST_GLOBAL_HASHSIZE     503	     /* global nodes hash table size */
			
 
				+#define LST_NODE_HASHSIZE       239	     /* node hash table (for batch or group) */
			
 
				+
			
 
				+#define LST_SESSION_NONE	0x0	     /* no session */
			
 
				+#define LST_SESSION_ACTIVE      0x1	     /* working session */
			
 
				+
			
 
				+#define LST_CONSOLE_TIMEOUT     300	     /* default console timeout */
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct mutex		ses_mutex;      /* only 1 thread in session */
			
 
				+	lst_sid_t	       ses_id;	 /* global session id */
			
 
				+	int		     ses_key;	/* local session key */
			
 
				+	int		     ses_state;      /* state of session */
			
 
				+	int		     ses_timeout;    /* timeout in seconds */
			
 
				+	time_t		  ses_laststamp;  /* last operation stamp (seconds) */
			
 
				+	/** tests features of the session */
			
 
				+	unsigned		ses_features;
			
 
				+	/** features are synced with remote test nodes */
			
 
				+	unsigned		ses_feats_updated:1;
			
 
				+	/** force creating */
			
 
				+	unsigned		ses_force:1;
			
 
				+	/** session is shutting down */
			
 
				+	unsigned		ses_shutdown:1;
			
 
				+	/** console is timedout */
			
 
				+	unsigned		ses_expired:1;
			
 
				+	__u64		   ses_id_cookie;  /* batch id cookie */
			
 
				+	char		    ses_name[LST_NAME_SIZE];  /* session name */
			
 
				+	lstcon_rpc_trans_t     *ses_ping;       /* session pinger */
			
 
				+	stt_timer_t	     ses_ping_timer; /* timer for pinger */
			
 
				+	lstcon_trans_stat_t     ses_trans_stat; /* transaction stats */
			
 
				+
			
 
				+	struct list_head	      ses_trans_list; /* global list of transaction */
			
 
				+	struct list_head	      ses_grp_list;   /* global list of groups */
			
 
				+	struct list_head	      ses_bat_list;   /* global list of batches */
			
 
				+	struct list_head	      ses_ndl_list;   /* global list of nodes */
			
 
				+	struct list_head	     *ses_ndl_hash;   /* hash table of nodes */
			
 
				+
			
 
				+	spinlock_t	  ses_rpc_lock;   /* serialize */
			
 
				+	atomic_t	    ses_rpc_counter;/* # of initialized RPCs */
			
 
				+	struct list_head	      ses_rpc_freelist; /* idle console rpc */
			
 
				+} lstcon_session_t;			     /*** session descriptor */
			
 
				+
			
 
				+extern lstcon_session_t	 console_session;
			
 
				+
			
 
				+static inline lstcon_trans_stat_t *
			
 
				+lstcon_trans_stat(void)
			
 
				+{
			
 
				+	return &console_session.ses_trans_stat;
			
 
				+}
			
 
				+
			
 
				+static inline struct list_head *
			
 
				+lstcon_id2hash (lnet_process_id_t id, struct list_head *hash)
			
 
				+{
			
 
				+	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
			
 
				+
			
 
				+	return &hash[idx];
			
 
				+}
			
 
				+
			
 
				+extern int lstcon_session_match(lst_sid_t sid);
			
 
				+extern int lstcon_session_new(char *name, int key, unsigned version,
			
 
				+			      int timeout, int flags, lst_sid_t *sid_up);
			
 
				+extern int lstcon_session_info(lst_sid_t *sid_up, int *key, unsigned *verp,
			
 
				+			       lstcon_ndlist_ent_t *entp, char *name_up, int len);
			
 
				+extern int lstcon_session_end(void);
			
 
				+extern int lstcon_session_debug(int timeout, struct list_head *result_up);
			
 
				+extern int lstcon_session_feats_check(unsigned feats);
			
 
				+extern int lstcon_batch_debug(int timeout, char *name,
			
 
				+			      int client, struct list_head *result_up);
			
 
				+extern int lstcon_group_debug(int timeout, char *name,
			
 
				+			      struct list_head *result_up);
			
 
				+extern int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up,
			
 
				+			      struct list_head *result_up);
			
 
				+extern int lstcon_group_add(char *name);
			
 
				+extern int lstcon_group_del(char *name);
			
 
				+extern int lstcon_group_clean(char *name, int args);
			
 
				+extern int lstcon_group_refresh(char *name, struct list_head *result_up);
			
 
				+extern int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up,
			
 
				+			    unsigned *featp, struct list_head *result_up);
			
 
				+extern int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up,
			
 
				+			       struct list_head *result_up);
			
 
				+extern int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up,
			
 
				+			     int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up);
			
 
				+extern int lstcon_group_list(int idx, int len, char *name_up);
			
 
				+extern int lstcon_batch_add(char *name);
			
 
				+extern int lstcon_batch_run(char *name, int timeout,
			
 
				+			    struct list_head *result_up);
			
 
				+extern int lstcon_batch_stop(char *name, int force,
			
 
				+			     struct list_head *result_up);
			
 
				+extern int lstcon_test_batch_query(char *name, int testidx,
			
 
				+				   int client, int timeout,
			
 
				+				   struct list_head *result_up);
			
 
				+extern int lstcon_batch_del(char *name);
			
 
				+extern int lstcon_batch_list(int idx, int namelen, char *name_up);
			
 
				+extern int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up,
			
 
				+			     int server, int testidx, int *index_p,
			
 
				+			     int *ndent_p, lstcon_node_ent_t *dents_up);
			
 
				+extern int lstcon_group_stat(char *grp_name, int timeout,
			
 
				+			     struct list_head *result_up);
			
 
				+extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
			
 
				+			     int timeout, struct list_head *result_up);
			
 
				+extern int lstcon_test_add(char *name, int type, int loop, int concur,
			
 
				+			   int dist, int span, char *src_name, char * dst_name,
			
 
				+			   void *param, int paramlen, int *retp,
			
 
				+			   struct list_head *result_up);
			
 
				+
			
 
				+#endif
			
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -0,0 +1,1814 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/framework.c
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ * Author: Liang Zhen  <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
			
 
				+
			
 
				+static int session_timeout = 100;
			
 
				+CFS_MODULE_PARM(session_timeout, "i", int, 0444,
			
 
				+		"test session timeout in seconds (100 by default, 0 == never)");
			
 
				+
			
 
				+static int rpc_timeout = 64;
			
 
				+CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
			
 
				+		"rpc timeout in seconds (64 by default, 0 == never)");
			
 
				+
			
 
				+#define sfw_unpack_id(id)	       \
			
 
				+do {				    \
			
 
				+	__swab64s(&(id).nid);	   \
			
 
				+	__swab32s(&(id).pid);	   \
			
 
				+} while (0)
			
 
				+
			
 
				+#define sfw_unpack_sid(sid)	     \
			
 
				+do {				    \
			
 
				+	__swab64s(&(sid).ses_nid);      \
			
 
				+	__swab64s(&(sid).ses_stamp);    \
			
 
				+} while (0)
			
 
				+
			
 
				+#define sfw_unpack_fw_counters(fc)	\
			
 
				+do {				      \
			
 
				+	__swab32s(&(fc).running_ms);      \
			
 
				+	__swab32s(&(fc).active_batches);  \
			
 
				+	__swab32s(&(fc).zombie_sessions); \
			
 
				+	__swab32s(&(fc).brw_errors);      \
			
 
				+	__swab32s(&(fc).ping_errors);     \
			
 
				+} while (0)
			
 
				+
			
 
				+#define sfw_unpack_rpc_counters(rc)     \
			
 
				+do {				    \
			
 
				+	__swab32s(&(rc).errors);	\
			
 
				+	__swab32s(&(rc).rpcs_sent);     \
			
 
				+	__swab32s(&(rc).rpcs_rcvd);     \
			
 
				+	__swab32s(&(rc).rpcs_dropped);  \
			
 
				+	__swab32s(&(rc).rpcs_expired);  \
			
 
				+	__swab64s(&(rc).bulk_get);      \
			
 
				+	__swab64s(&(rc).bulk_put);      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define sfw_unpack_lnet_counters(lc)    \
			
 
				+do {				    \
			
 
				+	__swab32s(&(lc).errors);	\
			
 
				+	__swab32s(&(lc).msgs_max);      \
			
 
				+	__swab32s(&(lc).msgs_alloc);    \
			
 
				+	__swab32s(&(lc).send_count);    \
			
 
				+	__swab32s(&(lc).recv_count);    \
			
 
				+	__swab32s(&(lc).drop_count);    \
			
 
				+	__swab32s(&(lc).route_count);   \
			
 
				+	__swab64s(&(lc).send_length);   \
			
 
				+	__swab64s(&(lc).recv_length);   \
			
 
				+	__swab64s(&(lc).drop_length);   \
			
 
				+	__swab64s(&(lc).route_length);  \
			
 
				+} while (0)
			
 
				+
			
 
				+#define sfw_test_active(t)      (atomic_read(&(t)->tsi_nactive) != 0)
			
 
				+#define sfw_batch_active(b)     (atomic_read(&(b)->bat_nactive) != 0)
			
 
				+
			
 
				+struct smoketest_framework {
			
 
				+	struct list_head	 fw_zombie_rpcs;     /* RPCs to be recycled */
			
 
				+	struct list_head	 fw_zombie_sessions; /* stopping sessions */
			
 
				+	struct list_head	 fw_tests;	   /* registered test cases */
			
 
				+	atomic_t       fw_nzombies;	/* # zombie sessions */
			
 
				+	spinlock_t	   fw_lock;		/* serialise */
			
 
				+	sfw_session_t	  *fw_session;		/* _the_ session */
			
 
				+	int		   fw_shuttingdown;	/* shutdown in progress */
			
 
				+	srpc_server_rpc_t *fw_active_srpc;	/* running RPC */
			
 
				+} sfw_data;
			
 
				+
			
 
				+/* forward ref's */
			
 
				+int sfw_stop_batch (sfw_batch_t *tsb, int force);
			
 
				+void sfw_destroy_session (sfw_session_t *sn);
			
 
				+
			
 
				+static inline sfw_test_case_t *
			
 
				+sfw_find_test_case(int id)
			
 
				+{
			
 
				+	sfw_test_case_t *tsc;
			
 
				+
			
 
				+	LASSERT (id <= SRPC_SERVICE_MAX_ID);
			
 
				+	LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				+
			
 
				+	list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				+		if (tsc->tsc_srv_service->sv_id == id)
			
 
				+			return tsc;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+sfw_register_test (srpc_service_t *service, sfw_test_client_ops_t *cliops)
			
 
				+{
			
 
				+	sfw_test_case_t *tsc;
			
 
				+
			
 
				+	if (sfw_find_test_case(service->sv_id) != NULL) {
			
 
				+		CERROR ("Failed to register test %s (%d)\n",
			
 
				+			service->sv_name, service->sv_id);
			
 
				+		return -EEXIST;
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t));
			
 
				+	if (tsc == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset(tsc, 0, sizeof(sfw_test_case_t));
			
 
				+	tsc->tsc_cli_ops     = cliops;
			
 
				+	tsc->tsc_srv_service = service;
			
 
				+
			
 
				+	list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_add_session_timer (void)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	stt_timer_t   *timer = &sn->sn_timer;
			
 
				+
			
 
				+	LASSERT (!sfw_data.fw_shuttingdown);
			
 
				+
			
 
				+	if (sn == NULL || sn->sn_timeout == 0)
			
 
				+		return;
			
 
				+
			
 
				+	LASSERT (!sn->sn_timer_active);
			
 
				+
			
 
				+	sn->sn_timer_active = 1;
			
 
				+	timer->stt_expires = cfs_time_add(sn->sn_timeout,
			
 
				+					  cfs_time_current_sec());
			
 
				+	stt_add_timer(timer);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_del_session_timer (void)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+
			
 
				+	if (sn == NULL || !sn->sn_timer_active)
			
 
				+		return 0;
			
 
				+
			
 
				+	LASSERT (sn->sn_timeout != 0);
			
 
				+
			
 
				+	if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
			
 
				+		sn->sn_timer_active = 0;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return EBUSY; /* racing with sfw_session_expired() */
			
 
				+}
			
 
				+
			
 
				+/* called with sfw_data.fw_lock held */
			
 
				+static void
			
 
				+sfw_deactivate_session (void)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	int	    nactive = 0;
			
 
				+	sfw_batch_t   *tsb;
			
 
				+	sfw_test_case_t *tsc;
			
 
				+
			
 
				+	if (sn == NULL) return;
			
 
				+
			
 
				+	LASSERT (!sn->sn_timer_active);
			
 
				+
			
 
				+	sfw_data.fw_session = NULL;
			
 
				+	atomic_inc(&sfw_data.fw_nzombies);
			
 
				+	list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				+		srpc_abort_service(tsc->tsc_srv_service);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
			
 
				+		if (sfw_batch_active(tsb)) {
			
 
				+			nactive++;
			
 
				+			sfw_stop_batch(tsb, 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (nactive != 0)
			
 
				+		return;   /* wait for active batches to stop */
			
 
				+
			
 
				+	list_del_init(&sn->sn_list);
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	sfw_destroy_session(sn);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void
			
 
				+sfw_session_expired (void *data)
			
 
				+{
			
 
				+	sfw_session_t *sn = data;
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	LASSERT (sn->sn_timer_active);
			
 
				+	LASSERT (sn == sfw_data.fw_session);
			
 
				+
			
 
				+	CWARN ("Session expired! sid: %s-"LPU64", name: %s\n",
			
 
				+	       libcfs_nid2str(sn->sn_id.ses_nid),
			
 
				+	       sn->sn_id.ses_stamp, &sn->sn_name[0]);
			
 
				+
			
 
				+	sn->sn_timer_active = 0;
			
 
				+	sfw_deactivate_session();
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+sfw_init_session(sfw_session_t *sn, lst_sid_t sid,
			
 
				+		 unsigned features, const char *name)
			
 
				+{
			
 
				+	stt_timer_t *timer = &sn->sn_timer;
			
 
				+
			
 
				+	memset(sn, 0, sizeof(sfw_session_t));
			
 
				+	INIT_LIST_HEAD(&sn->sn_list);
			
 
				+	INIT_LIST_HEAD(&sn->sn_batches);
			
 
				+	atomic_set(&sn->sn_refcount, 1);	/* +1 for caller */
			
 
				+	atomic_set(&sn->sn_brw_errors, 0);
			
 
				+	atomic_set(&sn->sn_ping_errors, 0);
			
 
				+	strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
			
 
				+
			
 
				+	sn->sn_timer_active = 0;
			
 
				+	sn->sn_id	   = sid;
			
 
				+	sn->sn_features	    = features;
			
 
				+	sn->sn_timeout      = session_timeout;
			
 
				+	sn->sn_started      = cfs_time_current();
			
 
				+
			
 
				+	timer->stt_data = sn;
			
 
				+	timer->stt_func = sfw_session_expired;
			
 
				+	INIT_LIST_HEAD(&timer->stt_list);
			
 
				+}
			
 
				+
			
 
				+/* completion handler for incoming framework RPCs */
			
 
				+void
			
 
				+sfw_server_rpc_done(struct srpc_server_rpc *rpc)
			
 
				+{
			
 
				+	struct srpc_service	*sv	= rpc->srpc_scd->scd_svc;
			
 
				+	int			status	= rpc->srpc_status;
			
 
				+
			
 
				+	CDEBUG (D_NET,
			
 
				+		"Incoming framework RPC done: "
			
 
				+		"service %s, peer %s, status %s:%d\n",
			
 
				+		sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				+		swi_state2str(rpc->srpc_wi.swi_state),
			
 
				+		status);
			
 
				+
			
 
				+	if (rpc->srpc_bulk != NULL)
			
 
				+		sfw_free_pages(rpc);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_client_rpc_fini (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	LASSERT (rpc->crpc_bulk.bk_niov == 0);
			
 
				+	LASSERT (list_empty(&rpc->crpc_list));
			
 
				+	LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
			
 
				+
			
 
				+	CDEBUG (D_NET,
			
 
				+		"Outgoing framework RPC done: "
			
 
				+		"service %d, peer %s, status %s:%d:%d\n",
			
 
				+		rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				+		swi_state2str(rpc->crpc_wi.swi_state),
			
 
				+		rpc->crpc_aborted, rpc->crpc_status);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	/* my callers must finish all RPCs before shutting me down */
			
 
				+	LASSERT(!sfw_data.fw_shuttingdown);
			
 
				+	list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+}
			
 
				+
			
 
				+sfw_batch_t *
			
 
				+sfw_find_batch (lst_bid_t bid)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	sfw_batch_t   *bat;
			
 
				+
			
 
				+	LASSERT (sn != NULL);
			
 
				+
			
 
				+	list_for_each_entry (bat, &sn->sn_batches, bat_list) {
			
 
				+		if (bat->bat_id.bat_id == bid.bat_id)
			
 
				+			return bat;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+sfw_batch_t *
			
 
				+sfw_bid2batch (lst_bid_t bid)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	sfw_batch_t   *bat;
			
 
				+
			
 
				+	LASSERT (sn != NULL);
			
 
				+
			
 
				+	bat = sfw_find_batch(bid);
			
 
				+	if (bat != NULL)
			
 
				+		return bat;
			
 
				+
			
 
				+	LIBCFS_ALLOC(bat, sizeof(sfw_batch_t));
			
 
				+	if (bat == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	bat->bat_error    = 0;
			
 
				+	bat->bat_session  = sn;
			
 
				+	bat->bat_id       = bid;
			
 
				+	atomic_set(&bat->bat_nactive, 0);
			
 
				+	INIT_LIST_HEAD(&bat->bat_tests);
			
 
				+
			
 
				+	list_add_tail(&bat->bat_list, &sn->sn_batches);
			
 
				+	return bat;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
			
 
				+{
			
 
				+	sfw_session_t  *sn = sfw_data.fw_session;
			
 
				+	sfw_counters_t *cnt = &reply->str_fw;
			
 
				+	sfw_batch_t    *bat;
			
 
				+	struct timeval  tv;
			
 
				+
			
 
				+	reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
			
 
				+
			
 
				+	if (request->str_sid.ses_nid == LNET_NID_ANY) {
			
 
				+		reply->str_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
			
 
				+		reply->str_status = ESRCH;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	lnet_counters_get(&reply->str_lnet);
			
 
				+	srpc_get_counters(&reply->str_rpc);
			
 
				+
			
 
				+	/* send over the msecs since the session was started
			
 
				+	 - with 32 bits to send, this is ~49 days */
			
 
				+	cfs_duration_usec(cfs_time_sub(cfs_time_current(),
			
 
				+				       sn->sn_started), &tv);
			
 
				+
			
 
				+	cnt->running_ms      = (__u32)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
			
 
				+	cnt->brw_errors      = atomic_read(&sn->sn_brw_errors);
			
 
				+	cnt->ping_errors     = atomic_read(&sn->sn_ping_errors);
			
 
				+	cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
			
 
				+
			
 
				+	cnt->active_batches = 0;
			
 
				+	list_for_each_entry (bat, &sn->sn_batches, bat_list) {
			
 
				+		if (atomic_read(&bat->bat_nactive) > 0)
			
 
				+			cnt->active_batches++;
			
 
				+	}
			
 
				+
			
 
				+	reply->str_status = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	srpc_msg_t    *msg = container_of(request, srpc_msg_t,
			
 
				+					  msg_body.mksn_reqst);
			
 
				+	int	       cplen = 0;
			
 
				+
			
 
				+	if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
			
 
				+		reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
			
 
				+		reply->mksn_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sn != NULL) {
			
 
				+		reply->mksn_status  = 0;
			
 
				+		reply->mksn_sid     = sn->sn_id;
			
 
				+		reply->mksn_timeout = sn->sn_timeout;
			
 
				+
			
 
				+		if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
			
 
				+			atomic_inc(&sn->sn_refcount);
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		if (!request->mksn_force) {
			
 
				+			reply->mksn_status = EBUSY;
			
 
				+			cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
			
 
				+					sizeof(reply->mksn_name));
			
 
				+			if (cplen >= sizeof(reply->mksn_name))
			
 
				+				return -E2BIG;
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* reject the request if it requires unknown features
			
 
				+	 * NB: old version will always accept all features because it's not
			
 
				+	 * aware of srpc_msg_t::msg_ses_feats, it's a defect but it's also
			
 
				+	 * harmless because it will return zero feature to console, and it's
			
 
				+	 * console's responsibility to make sure all nodes in a session have
			
 
				+	 * same feature mask. */
			
 
				+	if ((msg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		reply->mksn_status = EPROTO;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* brand new or create by force */
			
 
				+	LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
			
 
				+	if (sn == NULL) {
			
 
				+		CERROR ("Dropping RPC (mksn) under memory pressure.\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	sfw_init_session(sn, request->mksn_sid,
			
 
				+			 msg->msg_ses_feats, &request->mksn_name[0]);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	sfw_deactivate_session();
			
 
				+	LASSERT(sfw_data.fw_session == NULL);
			
 
				+	sfw_data.fw_session = sn;
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	reply->mksn_status  = 0;
			
 
				+	reply->mksn_sid     = sn->sn_id;
			
 
				+	reply->mksn_timeout = sn->sn_timeout;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+
			
 
				+	reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
			
 
				+
			
 
				+	if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
			
 
				+		reply->rmsn_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
			
 
				+		reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (!atomic_dec_and_test(&sn->sn_refcount)) {
			
 
				+		reply->rmsn_status = 0;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+	sfw_deactivate_session();
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	reply->rmsn_status = 0;
			
 
				+	reply->rmsn_sid    = LST_INVALID_SID;
			
 
				+	LASSERT(sfw_data.fw_session == NULL);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_debug_session (srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+
			
 
				+	if (sn == NULL) {
			
 
				+		reply->dbg_status = ESRCH;
			
 
				+		reply->dbg_sid    = LST_INVALID_SID;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	reply->dbg_status  = 0;
			
 
				+	reply->dbg_sid     = sn->sn_id;
			
 
				+	reply->dbg_timeout = sn->sn_timeout;
			
 
				+	if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
			
 
				+	    >= sizeof(reply->dbg_name))
			
 
				+		return -E2BIG;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_test_rpc_fini (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	sfw_test_unit_t     *tsu = rpc->crpc_priv;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+
			
 
				+	/* Called with hold of tsi->tsi_lock */
			
 
				+	LASSERT (list_empty(&rpc->crpc_list));
			
 
				+	list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+sfw_test_buffers(sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	struct sfw_test_case	*tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				+	struct srpc_service	*svc = tsc->tsc_srv_service;
			
 
				+	int			nbuf;
			
 
				+
			
 
				+	nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
			
 
				+	return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_load_test(struct sfw_test_instance *tsi)
			
 
				+{
			
 
				+	struct sfw_test_case	*tsc;
			
 
				+	struct srpc_service	*svc;
			
 
				+	int			nbuf;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT(tsi != NULL);
			
 
				+	tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				+	nbuf = sfw_test_buffers(tsi);
			
 
				+	LASSERT(tsc != NULL);
			
 
				+	svc = tsc->tsc_srv_service;
			
 
				+
			
 
				+	if (tsi->tsi_is_client) {
			
 
				+		tsi->tsi_ops = tsc->tsc_cli_ops;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	rc = srpc_service_add_buffers(svc, nbuf);
			
 
				+	if (rc != 0) {
			
 
				+		CWARN("Failed to reserve enough buffers: "
			
 
				+		      "service %s, %d needed: %d\n", svc->sv_name, nbuf, rc);
			
 
				+		/* NB: this error handler is not strictly correct, because
			
 
				+		 * it may release more buffers than already allocated,
			
 
				+		 * but it doesn't matter because request portal should
			
 
				+		 * be lazy portal and will grow buffers if necessary. */
			
 
				+		srpc_service_remove_buffers(svc, nbuf);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
			
 
				+	       nbuf * (srpc_serv_is_framework(svc) ?
			
 
				+		       1 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_unload_test(struct sfw_test_instance *tsi)
			
 
				+{
			
 
				+	struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service);
			
 
				+
			
 
				+	LASSERT(tsc != NULL);
			
 
				+
			
 
				+	if (tsi->tsi_is_client)
			
 
				+		return;
			
 
				+
			
 
				+	/* shrink buffers, because request portal is lazy portal
			
 
				+	 * which can grow buffers at runtime so we may leave
			
 
				+	 * some buffers behind, but never mind... */
			
 
				+	srpc_service_remove_buffers(tsc->tsc_srv_service,
			
 
				+				    sfw_test_buffers(tsi));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_destroy_test_instance (sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc;
			
 
				+	sfw_test_unit_t   *tsu;
			
 
				+
			
 
				+	if (!tsi->tsi_is_client) goto clean;
			
 
				+
			
 
				+	tsi->tsi_ops->tso_fini(tsi);
			
 
				+
			
 
				+	LASSERT (!tsi->tsi_stopping);
			
 
				+	LASSERT (list_empty(&tsi->tsi_active_rpcs));
			
 
				+	LASSERT (!sfw_test_active(tsi));
			
 
				+
			
 
				+	while (!list_empty(&tsi->tsi_units)) {
			
 
				+		tsu = list_entry(tsi->tsi_units.next,
			
 
				+				     sfw_test_unit_t, tsu_list);
			
 
				+		list_del(&tsu->tsu_list);
			
 
				+		LIBCFS_FREE(tsu, sizeof(*tsu));
			
 
				+	}
			
 
				+
			
 
				+	while (!list_empty(&tsi->tsi_free_rpcs)) {
			
 
				+		rpc = list_entry(tsi->tsi_free_rpcs.next,
			
 
				+				     srpc_client_rpc_t, crpc_list);
			
 
				+		list_del(&rpc->crpc_list);
			
 
				+		LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
			
 
				+	}
			
 
				+
			
 
				+clean:
			
 
				+	sfw_unload_test(tsi);
			
 
				+	LIBCFS_FREE(tsi, sizeof(*tsi));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_destroy_batch (sfw_batch_t *tsb)
			
 
				+{
			
 
				+	sfw_test_instance_t *tsi;
			
 
				+
			
 
				+	LASSERT (!sfw_batch_active(tsb));
			
 
				+	LASSERT (list_empty(&tsb->bat_list));
			
 
				+
			
 
				+	while (!list_empty(&tsb->bat_tests)) {
			
 
				+		tsi = list_entry(tsb->bat_tests.next,
			
 
				+				     sfw_test_instance_t, tsi_list);
			
 
				+		list_del_init(&tsi->tsi_list);
			
 
				+		sfw_destroy_test_instance(tsi);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(tsb, sizeof(sfw_batch_t));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_destroy_session (sfw_session_t *sn)
			
 
				+{
			
 
				+	sfw_batch_t *batch;
			
 
				+
			
 
				+	LASSERT (list_empty(&sn->sn_list));
			
 
				+	LASSERT (sn != sfw_data.fw_session);
			
 
				+
			
 
				+	while (!list_empty(&sn->sn_batches)) {
			
 
				+		batch = list_entry(sn->sn_batches.next,
			
 
				+				       sfw_batch_t, bat_list);
			
 
				+		list_del_init(&batch->bat_list);
			
 
				+		sfw_destroy_batch(batch);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(sn, sizeof(*sn));
			
 
				+	atomic_dec(&sfw_data.fw_nzombies);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_unpack_addtest_req(srpc_msg_t *msg)
			
 
				+{
			
 
				+	srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
			
 
				+
			
 
				+	LASSERT (msg->msg_type == SRPC_MSG_TEST_REQST);
			
 
				+	LASSERT (req->tsr_is_client);
			
 
				+
			
 
				+	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				+		return; /* no flipping needed */
			
 
				+
			
 
				+	LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				+
			
 
				+	if (req->tsr_service == SRPC_SERVICE_BRW) {
			
 
				+		if ((msg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) {
			
 
				+			test_bulk_req_t *bulk = &req->tsr_u.bulk_v0;
			
 
				+
			
 
				+			__swab32s(&bulk->blk_opc);
			
 
				+			__swab32s(&bulk->blk_npg);
			
 
				+			__swab32s(&bulk->blk_flags);
			
 
				+
			
 
				+		} else {
			
 
				+			test_bulk_req_v1_t *bulk = &req->tsr_u.bulk_v1;
			
 
				+
			
 
				+			__swab16s(&bulk->blk_opc);
			
 
				+			__swab16s(&bulk->blk_flags);
			
 
				+			__swab32s(&bulk->blk_offset);
			
 
				+			__swab32s(&bulk->blk_len);
			
 
				+		}
			
 
				+
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (req->tsr_service == SRPC_SERVICE_PING) {
			
 
				+		test_ping_req_t *ping = &req->tsr_u.ping;
			
 
				+
			
 
				+		__swab32s(&ping->png_size);
			
 
				+		__swab32s(&ping->png_flags);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LBUG ();
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_msg_t	  *msg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	srpc_test_reqst_t   *req = &msg->msg_body.tes_reqst;
			
 
				+	srpc_bulk_t	 *bk = rpc->srpc_bulk;
			
 
				+	int		  ndest = req->tsr_ndest;
			
 
				+	sfw_test_unit_t     *tsu;
			
 
				+	sfw_test_instance_t *tsi;
			
 
				+	int		  i;
			
 
				+	int		  rc;
			
 
				+
			
 
				+	LIBCFS_ALLOC(tsi, sizeof(*tsi));
			
 
				+	if (tsi == NULL) {
			
 
				+		CERROR ("Can't allocate test instance for batch: "LPU64"\n",
			
 
				+			tsb->bat_id.bat_id);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memset(tsi, 0, sizeof(*tsi));
			
 
				+	spin_lock_init(&tsi->tsi_lock);
			
 
				+	atomic_set(&tsi->tsi_nactive, 0);
			
 
				+	INIT_LIST_HEAD(&tsi->tsi_units);
			
 
				+	INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
			
 
				+	INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
			
 
				+
			
 
				+	tsi->tsi_stopping      = 0;
			
 
				+	tsi->tsi_batch	 = tsb;
			
 
				+	tsi->tsi_loop	  = req->tsr_loop;
			
 
				+	tsi->tsi_concur	= req->tsr_concur;
			
 
				+	tsi->tsi_service       = req->tsr_service;
			
 
				+	tsi->tsi_is_client     = !!(req->tsr_is_client);
			
 
				+	tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
			
 
				+
			
 
				+	rc = sfw_load_test(tsi);
			
 
				+	if (rc != 0) {
			
 
				+		LIBCFS_FREE(tsi, sizeof(*tsi));
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (!sfw_batch_active(tsb));
			
 
				+
			
 
				+	if (!tsi->tsi_is_client) {
			
 
				+		/* it's test server, just add it to tsb */
			
 
				+		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (bk != NULL);
			
 
				+	LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
			
 
				+	LASSERT((unsigned int)bk->bk_len >=
			
 
				+		sizeof(lnet_process_id_packed_t) * ndest);
			
 
				+
			
 
				+	sfw_unpack_addtest_req(msg);
			
 
				+	memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
			
 
				+
			
 
				+	for (i = 0; i < ndest; i++) {
			
 
				+		lnet_process_id_packed_t *dests;
			
 
				+		lnet_process_id_packed_t  id;
			
 
				+		int		       j;
			
 
				+
			
 
				+		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
			
 
				+		LASSERT (dests != NULL);  /* my pages are within KVM always */
			
 
				+		id = dests[i % SFW_ID_PER_PAGE];
			
 
				+		if (msg->msg_magic != SRPC_MSG_MAGIC)
			
 
				+			sfw_unpack_id(id);
			
 
				+
			
 
				+		for (j = 0; j < tsi->tsi_concur; j++) {
			
 
				+			LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t));
			
 
				+			if (tsu == NULL) {
			
 
				+				rc = -ENOMEM;
			
 
				+				CERROR ("Can't allocate tsu for %d\n",
			
 
				+					tsi->tsi_service);
			
 
				+				goto error;
			
 
				+			}
			
 
				+
			
 
				+			tsu->tsu_dest.nid = id.nid;
			
 
				+			tsu->tsu_dest.pid = id.pid;
			
 
				+			tsu->tsu_instance = tsi;
			
 
				+			tsu->tsu_private  = NULL;
			
 
				+			list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rc = tsi->tsi_ops->tso_init(tsi);
			
 
				+	if (rc == 0) {
			
 
				+		list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+error:
			
 
				+	LASSERT (rc != 0);
			
 
				+	sfw_destroy_test_instance(tsi);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+sfw_test_unit_done (sfw_test_unit_t *tsu)
			
 
				+{
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	sfw_batch_t	 *tsb = tsi->tsi_batch;
			
 
				+	sfw_session_t       *sn = tsb->bat_session;
			
 
				+
			
 
				+	LASSERT (sfw_test_active(tsi));
			
 
				+
			
 
				+	if (!atomic_dec_and_test(&tsi->tsi_nactive))
			
 
				+		return;
			
 
				+
			
 
				+	/* the test instance is done */
			
 
				+	spin_lock(&tsi->tsi_lock);
			
 
				+
			
 
				+	tsi->tsi_stopping = 0;
			
 
				+
			
 
				+	spin_unlock(&tsi->tsi_lock);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (!atomic_dec_and_test(&tsb->bat_nactive) ||/* tsb still active */
			
 
				+	    sn == sfw_data.fw_session) {		  /* sn also active */
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */
			
 
				+
			
 
				+	list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
			
 
				+		if (sfw_batch_active(tsb)) {
			
 
				+			spin_unlock(&sfw_data.fw_lock);
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	list_del_init(&sn->sn_list);
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	sfw_destroy_session(sn);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_test_rpc_done (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	sfw_test_unit_t     *tsu = rpc->crpc_priv;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	int		  done = 0;
			
 
				+
			
 
				+	tsi->tsi_ops->tso_done_rpc(tsu, rpc);
			
 
				+
			
 
				+	spin_lock(&tsi->tsi_lock);
			
 
				+
			
 
				+	LASSERT (sfw_test_active(tsi));
			
 
				+	LASSERT (!list_empty(&rpc->crpc_list));
			
 
				+
			
 
				+	list_del_init(&rpc->crpc_list);
			
 
				+
			
 
				+	/* batch is stopping or loop is done or get error */
			
 
				+	if (tsi->tsi_stopping ||
			
 
				+	    tsu->tsu_loop == 0 ||
			
 
				+	    (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr))
			
 
				+		done = 1;
			
 
				+
			
 
				+	/* dec ref for poster */
			
 
				+	srpc_client_rpc_decref(rpc);
			
 
				+
			
 
				+	spin_unlock(&tsi->tsi_lock);
			
 
				+
			
 
				+	if (!done) {
			
 
				+		swi_schedule_workitem(&tsu->tsu_worker);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	sfw_test_unit_done(tsu);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
			
 
				+		    unsigned features, int nblk, int blklen,
			
 
				+		    srpc_client_rpc_t **rpcpp)
			
 
				+{
			
 
				+	srpc_client_rpc_t   *rpc = NULL;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+
			
 
				+	spin_lock(&tsi->tsi_lock);
			
 
				+
			
 
				+	LASSERT (sfw_test_active(tsi));
			
 
				+
			
 
				+	if (!list_empty(&tsi->tsi_free_rpcs)) {
			
 
				+		/* pick request from buffer */
			
 
				+		rpc = list_entry(tsi->tsi_free_rpcs.next,
			
 
				+				     srpc_client_rpc_t, crpc_list);
			
 
				+		LASSERT (nblk == rpc->crpc_bulk.bk_niov);
			
 
				+		list_del_init(&rpc->crpc_list);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&tsi->tsi_lock);
			
 
				+
			
 
				+	if (rpc == NULL) {
			
 
				+		rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
			
 
				+					     blklen, sfw_test_rpc_done,
			
 
				+					     sfw_test_rpc_fini, tsu);
			
 
				+	} else {
			
 
				+		srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
			
 
				+				     blklen, sfw_test_rpc_done,
			
 
				+				     sfw_test_rpc_fini, tsu);
			
 
				+	}
			
 
				+
			
 
				+	if (rpc == NULL) {
			
 
				+		CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	rpc->crpc_reqstmsg.msg_ses_feats = features;
			
 
				+	*rpcpp = rpc;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_run_test (swi_workitem_t *wi)
			
 
				+{
			
 
				+	sfw_test_unit_t     *tsu = wi->swi_workitem.wi_data;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	srpc_client_rpc_t   *rpc = NULL;
			
 
				+
			
 
				+	LASSERT (wi == &tsu->tsu_worker);
			
 
				+
			
 
				+	if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) {
			
 
				+		LASSERT (rpc == NULL);
			
 
				+		goto test_done;
			
 
				+	}
			
 
				+
			
 
				+	LASSERT (rpc != NULL);
			
 
				+
			
 
				+	spin_lock(&tsi->tsi_lock);
			
 
				+
			
 
				+	if (tsi->tsi_stopping) {
			
 
				+		list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
			
 
				+		spin_unlock(&tsi->tsi_lock);
			
 
				+		goto test_done;
			
 
				+	}
			
 
				+
			
 
				+	if (tsu->tsu_loop > 0)
			
 
				+		tsu->tsu_loop--;
			
 
				+
			
 
				+	list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
			
 
				+	spin_unlock(&tsi->tsi_lock);
			
 
				+
			
 
				+	rpc->crpc_timeout = rpc_timeout;
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+	srpc_post_rpc(rpc);
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+	return 0;
			
 
				+
			
 
				+test_done:
			
 
				+	/*
			
 
				+	 * No one can schedule me now since:
			
 
				+	 * - previous RPC, if any, has done and
			
 
				+	 * - no new RPC is initiated.
			
 
				+	 * - my batch is still active; no one can run it again now.
			
 
				+	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				+	 */
			
 
				+	swi_exit_workitem(wi);
			
 
				+	sfw_test_unit_done(tsu);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_run_batch (sfw_batch_t *tsb)
			
 
				+{
			
 
				+	swi_workitem_t      *wi;
			
 
				+	sfw_test_unit_t     *tsu;
			
 
				+	sfw_test_instance_t *tsi;
			
 
				+
			
 
				+	if (sfw_batch_active(tsb)) {
			
 
				+		CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n",
			
 
				+		       tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
			
 
				+		if (!tsi->tsi_is_client) /* skip server instances */
			
 
				+			continue;
			
 
				+
			
 
				+		LASSERT (!tsi->tsi_stopping);
			
 
				+		LASSERT (!sfw_test_active(tsi));
			
 
				+
			
 
				+		atomic_inc(&tsb->bat_nactive);
			
 
				+
			
 
				+		list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
			
 
				+			atomic_inc(&tsi->tsi_nactive);
			
 
				+			tsu->tsu_loop = tsi->tsi_loop;
			
 
				+			wi = &tsu->tsu_worker;
			
 
				+			swi_init_workitem(wi, tsu, sfw_run_test,
			
 
				+					  lst_sched_test[\
			
 
				+					  lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
			
 
				+			swi_schedule_workitem(wi);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_stop_batch (sfw_batch_t *tsb, int force)
			
 
				+{
			
 
				+	sfw_test_instance_t *tsi;
			
 
				+	srpc_client_rpc_t   *rpc;
			
 
				+
			
 
				+	if (!sfw_batch_active(tsb)) {
			
 
				+		CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
			
 
				+		spin_lock(&tsi->tsi_lock);
			
 
				+
			
 
				+		if (!tsi->tsi_is_client ||
			
 
				+		    !sfw_test_active(tsi) || tsi->tsi_stopping) {
			
 
				+			spin_unlock(&tsi->tsi_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		tsi->tsi_stopping = 1;
			
 
				+
			
 
				+		if (!force) {
			
 
				+			spin_unlock(&tsi->tsi_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* abort launched rpcs in the test */
			
 
				+		list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
			
 
				+			spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+			srpc_abort_rpc(rpc, -EINTR);
			
 
				+
			
 
				+			spin_unlock(&rpc->crpc_lock);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&tsi->tsi_lock);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
			
 
				+{
			
 
				+	sfw_test_instance_t *tsi;
			
 
				+
			
 
				+	if (testidx < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (testidx == 0) {
			
 
				+		reply->bar_active = atomic_read(&tsb->bat_nactive);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
			
 
				+		if (testidx-- > 1)
			
 
				+			continue;
			
 
				+
			
 
				+		reply->bar_active = atomic_read(&tsi->tsi_nactive);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_free_pages (srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_free_bulk(rpc->srpc_bulk);
			
 
				+	rpc->srpc_bulk = NULL;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
			
 
				+		int sink)
			
 
				+{
			
 
				+	LASSERT(rpc->srpc_bulk == NULL);
			
 
				+	LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
			
 
				+
			
 
				+	rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, len, sink);
			
 
				+	if (rpc->srpc_bulk == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_add_test (srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	sfw_session_t     *sn = sfw_data.fw_session;
			
 
				+	srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
			
 
				+	srpc_test_reqst_t *request;
			
 
				+	int		rc;
			
 
				+	sfw_batch_t       *bat;
			
 
				+
			
 
				+	request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
			
 
				+	reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
			
 
				+
			
 
				+	if (request->tsr_loop == 0 ||
			
 
				+	    request->tsr_concur == 0 ||
			
 
				+	    request->tsr_sid.ses_nid == LNET_NID_ANY ||
			
 
				+	    request->tsr_ndest > SFW_MAX_NDESTS ||
			
 
				+	    (request->tsr_is_client && request->tsr_ndest == 0) ||
			
 
				+	    request->tsr_concur > SFW_MAX_CONCUR ||
			
 
				+	    request->tsr_service > SRPC_SERVICE_MAX_ID ||
			
 
				+	    request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
			
 
				+		reply->tsr_status = EINVAL;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
			
 
				+	    sfw_find_test_case(request->tsr_service) == NULL) {
			
 
				+		reply->tsr_status = ENOENT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	bat = sfw_bid2batch(request->tsr_bid);
			
 
				+	if (bat == NULL) {
			
 
				+		CERROR ("Dropping RPC (%s) from %s under memory pressure.\n",
			
 
				+			rpc->srpc_scd->scd_svc->sv_name,
			
 
				+			libcfs_id2str(rpc->srpc_peer));
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	if (sfw_batch_active(bat)) {
			
 
				+		reply->tsr_status = EBUSY;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
			
 
				+		/* rpc will be resumed later in sfw_bulk_ready */
			
 
				+		int	npg = sfw_id_pages(request->tsr_ndest);
			
 
				+		int	len;
			
 
				+
			
 
				+		if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
			
 
				+			len = npg * PAGE_CACHE_SIZE;
			
 
				+
			
 
				+		} else  {
			
 
				+			len = sizeof(lnet_process_id_packed_t) *
			
 
				+			      request->tsr_ndest;
			
 
				+		}
			
 
				+
			
 
				+		return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
			
 
				+	}
			
 
				+
			
 
				+	rc = sfw_add_test_instance(bat, rpc);
			
 
				+	CDEBUG (rc == 0 ? D_NET : D_WARNING,
			
 
				+		"%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
			
 
				+		rc == 0 ? "Added" : "Failed to add", request->tsr_service,
			
 
				+		request->tsr_is_client ? "client" : "server",
			
 
				+		request->tsr_loop, request->tsr_concur, request->tsr_ndest);
			
 
				+
			
 
				+	reply->tsr_status = (rc < 0) ? -rc : rc;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
			
 
				+{
			
 
				+	sfw_session_t *sn = sfw_data.fw_session;
			
 
				+	int	    rc = 0;
			
 
				+	sfw_batch_t   *bat;
			
 
				+
			
 
				+	reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
			
 
				+
			
 
				+	if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
			
 
				+		reply->bar_status = ESRCH;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	bat = sfw_find_batch(request->bar_bid);
			
 
				+	if (bat == NULL) {
			
 
				+		reply->bar_status = ENOENT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	switch (request->bar_opc) {
			
 
				+	case SRPC_BATCH_OPC_RUN:
			
 
				+		rc = sfw_run_batch(bat);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_BATCH_OPC_STOP:
			
 
				+		rc = sfw_stop_batch(bat, request->bar_arg);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_BATCH_OPC_QUERY:
			
 
				+		rc = sfw_query_batch(bat, request->bar_testidx, reply);
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		return -EINVAL; /* drop it */
			
 
				+	}
			
 
				+
			
 
				+	reply->bar_status = (rc < 0) ? -rc : rc;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
			
 
				+{
			
 
				+	struct srpc_service	*sv = rpc->srpc_scd->scd_svc;
			
 
				+	srpc_msg_t     *reply	= &rpc->srpc_replymsg;
			
 
				+	srpc_msg_t     *request	= &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	unsigned	features = LST_FEATS_MASK;
			
 
				+	int		rc = 0;
			
 
				+
			
 
				+	LASSERT(sfw_data.fw_active_srpc == NULL);
			
 
				+	LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (sfw_data.fw_shuttingdown) {
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return -ESHUTDOWN;
			
 
				+	}
			
 
				+
			
 
				+	/* Remove timer to avoid racing with it or expiring active session */
			
 
				+	if (sfw_del_session_timer() != 0) {
			
 
				+		CERROR("Dropping RPC (%s) from %s: racing with expiry timer.",
			
 
				+		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	sfw_data.fw_active_srpc = rpc;
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	sfw_unpack_message(request);
			
 
				+	LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
			
 
				+
			
 
				+	/* rpc module should have checked this */
			
 
				+	LASSERT(request->msg_version == SRPC_MSG_VERSION);
			
 
				+
			
 
				+	if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
			
 
				+	    sv->sv_id != SRPC_SERVICE_DEBUG) {
			
 
				+		sfw_session_t *sn = sfw_data.fw_session;
			
 
				+
			
 
				+		if (sn != NULL &&
			
 
				+		    sn->sn_features != request->msg_ses_feats) {
			
 
				+			CNETERR("Features of framework RPC don't match "
			
 
				+				"features of current session: %x/%x\n",
			
 
				+				request->msg_ses_feats, sn->sn_features);
			
 
				+			reply->msg_body.reply.status = EPROTO;
			
 
				+			reply->msg_body.reply.sid    = sn->sn_id;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+	} else if ((request->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		/* NB: at this point, old version will ignore features and
			
 
				+		 * create new session anyway, so console should be able
			
 
				+		 * to handle this */
			
 
				+		reply->msg_body.reply.status = EPROTO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	switch(sv->sv_id) {
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+	case SRPC_SERVICE_TEST:
			
 
				+		rc = sfw_add_test(rpc);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_SERVICE_BATCH:
			
 
				+		rc = sfw_control_batch(&request->msg_body.bat_reqst,
			
 
				+				       &reply->msg_body.bat_reply);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_SERVICE_QUERY_STAT:
			
 
				+		rc = sfw_get_stats(&request->msg_body.stat_reqst,
			
 
				+				   &reply->msg_body.stat_reply);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_SERVICE_DEBUG:
			
 
				+		rc = sfw_debug_session(&request->msg_body.dbg_reqst,
			
 
				+				       &reply->msg_body.dbg_reply);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_SERVICE_MAKE_SESSION:
			
 
				+		rc = sfw_make_session(&request->msg_body.mksn_reqst,
			
 
				+				      &reply->msg_body.mksn_reply);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_SERVICE_REMOVE_SESSION:
			
 
				+		rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
			
 
				+					&reply->msg_body.rmsn_reply);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (sfw_data.fw_session != NULL)
			
 
				+		features = sfw_data.fw_session->sn_features;
			
 
				+ out:
			
 
				+	reply->msg_ses_feats = features;
			
 
				+	rpc->srpc_done = sfw_server_rpc_done;
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (!sfw_data.fw_shuttingdown)
			
 
				+		sfw_add_session_timer();
			
 
				+
			
 
				+	sfw_data.fw_active_srpc = NULL;
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
			
 
				+{
			
 
				+	struct srpc_service	*sv = rpc->srpc_scd->scd_svc;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT(rpc->srpc_bulk != NULL);
			
 
				+	LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
			
 
				+	LASSERT(sfw_data.fw_active_srpc == NULL);
			
 
				+	LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (status != 0) {
			
 
				+		CERROR("Bulk transfer failed for RPC: "
			
 
				+		       "service %s, peer %s, status %d\n",
			
 
				+		       sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (sfw_data.fw_shuttingdown) {
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return -ESHUTDOWN;
			
 
				+	}
			
 
				+
			
 
				+	if (sfw_del_session_timer() != 0) {
			
 
				+		CERROR("Dropping RPC (%s) from %s: racing with expiry timer",
			
 
				+		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
			
 
				+		spin_unlock(&sfw_data.fw_lock);
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	sfw_data.fw_active_srpc = rpc;
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	rc = sfw_add_test(rpc);
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (!sfw_data.fw_shuttingdown)
			
 
				+		sfw_add_session_timer();
			
 
				+
			
 
				+	sfw_data.fw_active_srpc = NULL;
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+srpc_client_rpc_t *
			
 
				+sfw_create_rpc(lnet_process_id_t peer, int service,
			
 
				+	       unsigned features, int nbulkiov, int bulklen,
			
 
				+	       void (*done)(srpc_client_rpc_t *), void *priv)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc = NULL;
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	LASSERT (!sfw_data.fw_shuttingdown);
			
 
				+	LASSERT (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				+
			
 
				+	if (nbulkiov == 0 && !list_empty(&sfw_data.fw_zombie_rpcs)) {
			
 
				+		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
			
 
				+				     srpc_client_rpc_t, crpc_list);
			
 
				+		list_del(&rpc->crpc_list);
			
 
				+
			
 
				+		srpc_init_client_rpc(rpc, peer, service, 0, 0,
			
 
				+				     done, sfw_client_rpc_fini, priv);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	if (rpc == NULL) {
			
 
				+		rpc = srpc_create_client_rpc(peer, service,
			
 
				+					     nbulkiov, bulklen, done,
			
 
				+					     nbulkiov != 0 ?  NULL :
			
 
				+					     sfw_client_rpc_fini,
			
 
				+					     priv);
			
 
				+	}
			
 
				+
			
 
				+	if (rpc != NULL) /* "session" is concept in framework */
			
 
				+		rpc->crpc_reqstmsg.msg_ses_feats = features;
			
 
				+
			
 
				+	return rpc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_unpack_message (srpc_msg_t *msg)
			
 
				+{
			
 
				+	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				+		return; /* no flipping needed */
			
 
				+
			
 
				+	/* srpc module should guarantee I wouldn't get crap */
			
 
				+	LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_STAT_REQST) {
			
 
				+		srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst;
			
 
				+
			
 
				+		__swab32s(&req->str_type);
			
 
				+		__swab64s(&req->str_rpyid);
			
 
				+		sfw_unpack_sid(req->str_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
			
 
				+		srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
			
 
				+
			
 
				+		__swab32s(&rep->str_status);
			
 
				+		sfw_unpack_sid(rep->str_sid);
			
 
				+		sfw_unpack_fw_counters(rep->str_fw);
			
 
				+		sfw_unpack_rpc_counters(rep->str_rpc);
			
 
				+		sfw_unpack_lnet_counters(rep->str_lnet);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
			
 
				+		srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst;
			
 
				+
			
 
				+		__swab64s(&req->mksn_rpyid);
			
 
				+		__swab32s(&req->mksn_force);
			
 
				+		sfw_unpack_sid(req->mksn_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
			
 
				+		srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply;
			
 
				+
			
 
				+		__swab32s(&rep->mksn_status);
			
 
				+		__swab32s(&rep->mksn_timeout);
			
 
				+		sfw_unpack_sid(rep->mksn_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
			
 
				+		srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst;
			
 
				+
			
 
				+		__swab64s(&req->rmsn_rpyid);
			
 
				+		sfw_unpack_sid(req->rmsn_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
			
 
				+		srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply;
			
 
				+
			
 
				+		__swab32s(&rep->rmsn_status);
			
 
				+		sfw_unpack_sid(rep->rmsn_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
			
 
				+		srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst;
			
 
				+
			
 
				+		__swab64s(&req->dbg_rpyid);
			
 
				+		__swab32s(&req->dbg_flags);
			
 
				+		sfw_unpack_sid(req->dbg_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
			
 
				+		srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply;
			
 
				+
			
 
				+		__swab32s(&rep->dbg_nbatch);
			
 
				+		__swab32s(&rep->dbg_timeout);
			
 
				+		sfw_unpack_sid(rep->dbg_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
			
 
				+		srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst;
			
 
				+
			
 
				+		__swab32s(&req->bar_opc);
			
 
				+		__swab64s(&req->bar_rpyid);
			
 
				+		__swab32s(&req->bar_testidx);
			
 
				+		__swab32s(&req->bar_arg);
			
 
				+		sfw_unpack_sid(req->bar_sid);
			
 
				+		__swab64s(&req->bar_bid.bat_id);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
			
 
				+		srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
			
 
				+
			
 
				+		__swab32s(&rep->bar_status);
			
 
				+		sfw_unpack_sid(rep->bar_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_TEST_REQST) {
			
 
				+		srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
			
 
				+
			
 
				+		__swab64s(&req->tsr_rpyid);
			
 
				+		__swab64s(&req->tsr_bulkid);
			
 
				+		__swab32s(&req->tsr_loop);
			
 
				+		__swab32s(&req->tsr_ndest);
			
 
				+		__swab32s(&req->tsr_concur);
			
 
				+		__swab32s(&req->tsr_service);
			
 
				+		sfw_unpack_sid(req->tsr_sid);
			
 
				+		__swab64s(&req->tsr_bid.bat_id);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
			
 
				+		srpc_test_reply_t *rep = &msg->msg_body.tes_reply;
			
 
				+
			
 
				+		__swab32s(&rep->tsr_status);
			
 
				+		sfw_unpack_sid(rep->tsr_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
			
 
				+		srpc_join_reqst_t *req = &msg->msg_body.join_reqst;
			
 
				+
			
 
				+		__swab64s(&req->join_rpyid);
			
 
				+		sfw_unpack_sid(req->join_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
			
 
				+		srpc_join_reply_t *rep = &msg->msg_body.join_reply;
			
 
				+
			
 
				+		__swab32s(&rep->join_status);
			
 
				+		__swab32s(&rep->join_timeout);
			
 
				+		sfw_unpack_sid(rep->join_sid);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	LBUG ();
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_abort_rpc (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
			
 
				+	LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+	srpc_abort_rpc(rpc, -EINTR);
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_post_rpc (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+	LASSERT (!rpc->crpc_closed);
			
 
				+	LASSERT (!rpc->crpc_aborted);
			
 
				+	LASSERT (list_empty(&rpc->crpc_list));
			
 
				+	LASSERT (!sfw_data.fw_shuttingdown);
			
 
				+
			
 
				+	rpc->crpc_timeout = rpc_timeout;
			
 
				+	srpc_post_rpc(rpc);
			
 
				+
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static srpc_service_t sfw_services[] =
			
 
				+{
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_DEBUG,
			
 
				+		/* sv_name */  "debug",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_QUERY_STAT,
			
 
				+		/* sv_name */  "query stats",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_MAKE_SESSION,
			
 
				+		/* sv_name */  "make session",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_REMOVE_SESSION,
			
 
				+		/* sv_name */  "remove session",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_BATCH,
			
 
				+		/* sv_name */  "batch service",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    SRPC_SERVICE_TEST,
			
 
				+		/* sv_name */  "test service",
			
 
				+		0
			
 
				+	},
			
 
				+	{
			
 
				+		/* sv_id */    0,
			
 
				+		/* sv_name */  NULL,
			
 
				+		0
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+extern sfw_test_client_ops_t ping_test_client;
			
 
				+extern srpc_service_t	ping_test_service;
			
 
				+extern void ping_init_test_client(void);
			
 
				+extern void ping_init_test_service(void);
			
 
				+
			
 
				+extern sfw_test_client_ops_t brw_test_client;
			
 
				+extern srpc_service_t	brw_test_service;
			
 
				+extern void brw_init_test_client(void);
			
 
				+extern void brw_init_test_service(void);
			
 
				+
			
 
				+
			
 
				+int
			
 
				+sfw_startup (void)
			
 
				+{
			
 
				+	int	      i;
			
 
				+	int	      rc;
			
 
				+	int	      error;
			
 
				+	srpc_service_t  *sv;
			
 
				+	sfw_test_case_t *tsc;
			
 
				+
			
 
				+
			
 
				+	if (session_timeout < 0) {
			
 
				+		CERROR ("Session timeout must be non-negative: %d\n",
			
 
				+			session_timeout);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (rpc_timeout < 0) {
			
 
				+		CERROR ("RPC timeout must be non-negative: %d\n",
			
 
				+			rpc_timeout);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (session_timeout == 0)
			
 
				+		CWARN ("Zero session_timeout specified "
			
 
				+		       "- test sessions never expire.\n");
			
 
				+
			
 
				+	if (rpc_timeout == 0)
			
 
				+		CWARN ("Zero rpc_timeout specified "
			
 
				+		       "- test RPC never expire.\n");
			
 
				+
			
 
				+	memset(&sfw_data, 0, sizeof(struct smoketest_framework));
			
 
				+
			
 
				+	sfw_data.fw_session     = NULL;
			
 
				+	sfw_data.fw_active_srpc = NULL;
			
 
				+	spin_lock_init(&sfw_data.fw_lock);
			
 
				+	atomic_set(&sfw_data.fw_nzombies, 0);
			
 
				+	INIT_LIST_HEAD(&sfw_data.fw_tests);
			
 
				+	INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
			
 
				+	INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
			
 
				+
			
 
				+	brw_init_test_client();
			
 
				+	brw_init_test_service();
			
 
				+	rc = sfw_register_test(&brw_test_service, &brw_test_client);
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	ping_init_test_client();
			
 
				+	ping_init_test_service();
			
 
				+	rc = sfw_register_test(&ping_test_service, &ping_test_client);
			
 
				+	LASSERT (rc == 0);
			
 
				+
			
 
				+	error = 0;
			
 
				+	list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				+		sv = tsc->tsc_srv_service;
			
 
				+
			
 
				+		rc = srpc_add_service(sv);
			
 
				+		LASSERT (rc != -EBUSY);
			
 
				+		if (rc != 0) {
			
 
				+			CWARN ("Failed to add %s service: %d\n",
			
 
				+			       sv->sv_name, rc);
			
 
				+			error = rc;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; ; i++) {
			
 
				+		sv = &sfw_services[i];
			
 
				+		if (sv->sv_name == NULL) break;
			
 
				+
			
 
				+		sv->sv_bulk_ready = NULL;
			
 
				+		sv->sv_handler    = sfw_handle_server_rpc;
			
 
				+		sv->sv_wi_total   = SFW_FRWK_WI_MAX;
			
 
				+		if (sv->sv_id == SRPC_SERVICE_TEST)
			
 
				+			sv->sv_bulk_ready = sfw_bulk_ready;
			
 
				+
			
 
				+		rc = srpc_add_service(sv);
			
 
				+		LASSERT (rc != -EBUSY);
			
 
				+		if (rc != 0) {
			
 
				+			CWARN ("Failed to add %s service: %d\n",
			
 
				+			       sv->sv_name, rc);
			
 
				+			error = rc;
			
 
				+		}
			
 
				+
			
 
				+		/* about to sfw_shutdown, no need to add buffer */
			
 
				+		if (error) continue;
			
 
				+
			
 
				+		rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
			
 
				+		if (rc != 0) {
			
 
				+			CWARN("Failed to reserve enough buffers: "
			
 
				+			      "service %s, %d needed: %d\n",
			
 
				+			      sv->sv_name, sv->sv_wi_total, rc);
			
 
				+			error = -ENOMEM;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (error != 0)
			
 
				+		sfw_shutdown();
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+sfw_shutdown (void)
			
 
				+{
			
 
				+	srpc_service_t	*sv;
			
 
				+	sfw_test_case_t	*tsc;
			
 
				+	int		 i;
			
 
				+
			
 
				+	spin_lock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	sfw_data.fw_shuttingdown = 1;
			
 
				+	lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock,
			
 
				+		       "waiting for active RPC to finish.\n");
			
 
				+
			
 
				+	if (sfw_del_session_timer() != 0)
			
 
				+		lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock,
			
 
				+			       "waiting for session timer to explode.\n");
			
 
				+
			
 
				+	sfw_deactivate_session();
			
 
				+	lst_wait_until(atomic_read(&sfw_data.fw_nzombies) == 0,
			
 
				+		       sfw_data.fw_lock,
			
 
				+		       "waiting for %d zombie sessions to die.\n",
			
 
				+		       atomic_read(&sfw_data.fw_nzombies));
			
 
				+
			
 
				+	spin_unlock(&sfw_data.fw_lock);
			
 
				+
			
 
				+	for (i = 0; ; i++) {
			
 
				+		sv = &sfw_services[i];
			
 
				+		if (sv->sv_name == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		srpc_shutdown_service(sv);
			
 
				+		srpc_remove_service(sv);
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
			
 
				+		sv = tsc->tsc_srv_service;
			
 
				+		srpc_shutdown_service(sv);
			
 
				+		srpc_remove_service(sv);
			
 
				+	}
			
 
				+
			
 
				+	while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
			
 
				+		srpc_client_rpc_t *rpc;
			
 
				+
			
 
				+		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
			
 
				+				     srpc_client_rpc_t, crpc_list);
			
 
				+		list_del(&rpc->crpc_list);
			
 
				+
			
 
				+		LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; ; i++) {
			
 
				+		sv = &sfw_services[i];
			
 
				+		if (sv->sv_name == NULL)
			
 
				+			break;
			
 
				+
			
 
				+		srpc_wait_service_shutdown(sv);
			
 
				+	}
			
 
				+
			
 
				+	while (!list_empty(&sfw_data.fw_tests)) {
			
 
				+		tsc = list_entry(sfw_data.fw_tests.next,
			
 
				+				     sfw_test_case_t, tsc_list);
			
 
				+
			
 
				+		srpc_wait_service_shutdown(tsc->tsc_srv_service);
			
 
				+
			
 
				+		list_del(&tsc->tsc_list);
			
 
				+		LIBCFS_FREE(tsc, sizeof(*tsc));
			
 
				+	}
			
 
				+
			
 
				+	return;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/module.c
+++ b/drivers/staging/lustre/lnet/selftest/module.c
@@ -0,0 +1,169 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+enum {
			
 
				+	LST_INIT_NONE		= 0,
			
 
				+	LST_INIT_WI_SERIAL,
			
 
				+	LST_INIT_WI_TEST,
			
 
				+	LST_INIT_RPC,
			
 
				+	LST_INIT_FW,
			
 
				+	LST_INIT_CONSOLE
			
 
				+};
			
 
				+
			
 
				+extern int lstcon_console_init(void);
			
 
				+extern int lstcon_console_fini(void);
			
 
				+
			
 
				+static int lst_init_step = LST_INIT_NONE;
			
 
				+
			
 
				+struct cfs_wi_sched *lst_sched_serial;
			
 
				+struct cfs_wi_sched **lst_sched_test;
			
 
				+
			
 
				+void
			
 
				+lnet_selftest_fini(void)
			
 
				+{
			
 
				+	int	i;
			
 
				+
			
 
				+	switch (lst_init_step) {
			
 
				+		case LST_INIT_CONSOLE:
			
 
				+			lstcon_console_fini();
			
 
				+		case LST_INIT_FW:
			
 
				+			sfw_shutdown();
			
 
				+		case LST_INIT_RPC:
			
 
				+			srpc_shutdown();
			
 
				+		case LST_INIT_WI_TEST:
			
 
				+			for (i = 0;
			
 
				+			     i < cfs_cpt_number(lnet_cpt_table()); i++) {
			
 
				+				if (lst_sched_test[i] == NULL)
			
 
				+					continue;
			
 
				+				cfs_wi_sched_destroy(lst_sched_test[i]);
			
 
				+			}
			
 
				+			LIBCFS_FREE(lst_sched_test,
			
 
				+				    sizeof(lst_sched_test[0]) *
			
 
				+				    cfs_cpt_number(lnet_cpt_table()));
			
 
				+			lst_sched_test = NULL;
			
 
				+
			
 
				+		case LST_INIT_WI_SERIAL:
			
 
				+			cfs_wi_sched_destroy(lst_sched_serial);
			
 
				+			lst_sched_serial = NULL;
			
 
				+		case LST_INIT_NONE:
			
 
				+			break;
			
 
				+		default:
			
 
				+			LBUG();
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+lnet_selftest_structure_assertion(void)
			
 
				+{
			
 
				+	CLASSERT(sizeof(srpc_msg_t) == 160);
			
 
				+	CLASSERT(sizeof(srpc_test_reqst_t) == 70);
			
 
				+	CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_concur) == 72);
			
 
				+	CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_ndest) == 78);
			
 
				+	CLASSERT(sizeof(srpc_stat_reply_t) == 136);
			
 
				+	CLASSERT(sizeof(srpc_stat_reqst_t) == 28);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+lnet_selftest_init(void)
			
 
				+{
			
 
				+	int	nscheds;
			
 
				+	int	rc;
			
 
				+	int	i;
			
 
				+
			
 
				+	rc = cfs_wi_sched_create("lst_s", lnet_cpt_table(), CFS_CPT_ANY,
			
 
				+				 1, &lst_sched_serial);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("Failed to create serial WI scheduler for LST\n");
			
 
				+		return rc;
			
 
				+	}
			
 
				+	lst_init_step = LST_INIT_WI_SERIAL;
			
 
				+
			
 
				+	nscheds = cfs_cpt_number(lnet_cpt_table());
			
 
				+	LIBCFS_ALLOC(lst_sched_test, sizeof(lst_sched_test[0]) * nscheds);
			
 
				+	if (lst_sched_test == NULL)
			
 
				+		goto error;
			
 
				+
			
 
				+	lst_init_step = LST_INIT_WI_TEST;
			
 
				+	for (i = 0; i < nscheds; i++) {
			
 
				+		int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
			
 
				+
			
 
				+		/* reserve at least one CPU for LND */
			
 
				+		nthrs = max(nthrs - 1, 1);
			
 
				+		rc = cfs_wi_sched_create("lst_t", lnet_cpt_table(), i,
			
 
				+					 nthrs, &lst_sched_test[i]);
			
 
				+		if (rc != 0) {
			
 
				+			CERROR("Failed to create CPT affinity WI scheduler "
			
 
				+			       "%d for LST\n", i);
			
 
				+			goto error;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rc = srpc_startup();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("LST can't startup rpc\n");
			
 
				+		goto error;
			
 
				+	}
			
 
				+	lst_init_step = LST_INIT_RPC;
			
 
				+
			
 
				+	rc = sfw_startup();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("LST can't startup framework\n");
			
 
				+		goto error;
			
 
				+	}
			
 
				+	lst_init_step = LST_INIT_FW;
			
 
				+
			
 
				+	rc = lstcon_console_init();
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("LST can't startup console\n");
			
 
				+		goto error;
			
 
				+	}
			
 
				+	lst_init_step = LST_INIT_CONSOLE;
			
 
				+	return 0;
			
 
				+error:
			
 
				+	lnet_selftest_fini();
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+MODULE_DESCRIPTION("LNet Selftest");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+
			
 
				+cfs_module(lnet, "0.9.0", lnet_selftest_init, lnet_selftest_fini);
			
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -0,0 +1,229 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/conctl.c
			
 
				+ *
			
 
				+ * Test client & Server
			
 
				+ *
			
 
				+ * Author: Liang Zhen <liangzhen@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+#define LST_PING_TEST_MAGIC     0xbabeface
			
 
				+
			
 
				+int ping_srv_workitems = SFW_TEST_WI_MAX;
			
 
				+CFS_MODULE_PARM(ping_srv_workitems, "i", int, 0644, "# PING server workitems");
			
 
				+
			
 
				+typedef struct {
			
 
				+	spinlock_t	pnd_lock;	/* serialize */
			
 
				+	int		pnd_counter;	/* sequence counter */
			
 
				+} lst_ping_data_t;
			
 
				+
			
 
				+static lst_ping_data_t  lst_ping_data;
			
 
				+
			
 
				+static int
			
 
				+ping_client_init(sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	sfw_session_t *sn = tsi->tsi_batch->bat_session;
			
 
				+
			
 
				+	LASSERT(tsi->tsi_is_client);
			
 
				+	LASSERT(sn != NULL && (sn->sn_features & ~LST_FEATS_MASK) == 0);
			
 
				+
			
 
				+	spin_lock_init(&lst_ping_data.pnd_lock);
			
 
				+	lst_ping_data.pnd_counter = 0;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ping_client_fini (sfw_test_instance_t *tsi)
			
 
				+{
			
 
				+	sfw_session_t *sn = tsi->tsi_batch->bat_session;
			
 
				+	int	    errors;
			
 
				+
			
 
				+	LASSERT (sn != NULL);
			
 
				+	LASSERT (tsi->tsi_is_client);
			
 
				+
			
 
				+	errors = atomic_read(&sn->sn_ping_errors);
			
 
				+	if (errors)
			
 
				+		CWARN ("%d pings have failed.\n", errors);
			
 
				+	else
			
 
				+		CDEBUG (D_NET, "Ping test finished OK.\n");
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ping_client_prep_rpc(sfw_test_unit_t *tsu,
			
 
				+		     lnet_process_id_t dest, srpc_client_rpc_t **rpc)
			
 
				+{
			
 
				+	srpc_ping_reqst_t   *req;
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	sfw_session_t       *sn  = tsi->tsi_batch->bat_session;
			
 
				+	struct timeval       tv;
			
 
				+	int		     rc;
			
 
				+
			
 
				+	LASSERT(sn != NULL);
			
 
				+	LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
			
 
				+
			
 
				+	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
			
 
				+
			
 
				+	req->pnr_magic = LST_PING_TEST_MAGIC;
			
 
				+
			
 
				+	spin_lock(&lst_ping_data.pnd_lock);
			
 
				+	req->pnr_seq = lst_ping_data.pnd_counter++;
			
 
				+	spin_unlock(&lst_ping_data.pnd_lock);
			
 
				+
			
 
				+	cfs_fs_timeval(&tv);
			
 
				+	req->pnr_time_sec  = tv.tv_sec;
			
 
				+	req->pnr_time_usec = tv.tv_usec;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	sfw_test_instance_t *tsi = tsu->tsu_instance;
			
 
				+	sfw_session_t       *sn = tsi->tsi_batch->bat_session;
			
 
				+	srpc_ping_reqst_t   *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
			
 
				+	srpc_ping_reply_t   *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
			
 
				+	struct timeval       tv;
			
 
				+
			
 
				+	LASSERT (sn != NULL);
			
 
				+
			
 
				+	if (rpc->crpc_status != 0) {
			
 
				+		if (!tsi->tsi_stopping) /* rpc could have been aborted */
			
 
				+			atomic_inc(&sn->sn_ping_errors);
			
 
				+		CERROR ("Unable to ping %s (%d): %d\n",
			
 
				+			libcfs_id2str(rpc->crpc_dest),
			
 
				+			reqst->pnr_seq, rpc->crpc_status);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
			
 
				+		__swab32s(&reply->pnr_seq);
			
 
				+		__swab32s(&reply->pnr_magic);
			
 
				+		__swab32s(&reply->pnr_status);
			
 
				+	}
			
 
				+
			
 
				+	if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
			
 
				+		rpc->crpc_status = -EBADMSG;
			
 
				+		atomic_inc(&sn->sn_ping_errors);
			
 
				+		CERROR ("Bad magic %u from %s, %u expected.\n",
			
 
				+			reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
			
 
				+			LST_PING_TEST_MAGIC);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (reply->pnr_seq != reqst->pnr_seq) {
			
 
				+		rpc->crpc_status = -EBADMSG;
			
 
				+		atomic_inc(&sn->sn_ping_errors);
			
 
				+		CERROR ("Bad seq %u from %s, %u expected.\n",
			
 
				+			reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
			
 
				+			reqst->pnr_seq);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	cfs_fs_timeval(&tv);
			
 
				+	CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq,
			
 
				+		(unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000
			
 
				+			   + (tv.tv_usec - reqst->pnr_time_usec)));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+ping_server_handle(struct srpc_server_rpc *rpc)
			
 
				+{
			
 
				+	struct srpc_service	*sv  = rpc->srpc_scd->scd_svc;
			
 
				+	srpc_msg_t	*reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+	srpc_msg_t	  *replymsg = &rpc->srpc_replymsg;
			
 
				+	srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst;
			
 
				+	srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
			
 
				+
			
 
				+	LASSERT (sv->sv_id == SRPC_SERVICE_PING);
			
 
				+
			
 
				+	if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
			
 
				+		LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
			
 
				+
			
 
				+		__swab32s(&req->pnr_seq);
			
 
				+		__swab32s(&req->pnr_magic);
			
 
				+		__swab64s(&req->pnr_time_sec);
			
 
				+		__swab64s(&req->pnr_time_usec);
			
 
				+	}
			
 
				+	LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id));
			
 
				+
			
 
				+	if (req->pnr_magic != LST_PING_TEST_MAGIC) {
			
 
				+		CERROR ("Unexpect magic %08x from %s\n",
			
 
				+			req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	rep->pnr_seq   = req->pnr_seq;
			
 
				+	rep->pnr_magic = LST_PING_TEST_MAGIC;
			
 
				+
			
 
				+	if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
			
 
				+		replymsg->msg_ses_feats = LST_FEATS_MASK;
			
 
				+		rep->pnr_status = EPROTO;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Get ping %d from %s\n",
			
 
				+	       req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+sfw_test_client_ops_t ping_test_client;
			
 
				+void ping_init_test_client(void)
			
 
				+{
			
 
				+	ping_test_client.tso_init     = ping_client_init;
			
 
				+	ping_test_client.tso_fini     = ping_client_fini;
			
 
				+	ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
			
 
				+	ping_test_client.tso_done_rpc = ping_client_done_rpc;
			
 
				+}
			
 
				+
			
 
				+srpc_service_t ping_test_service;
			
 
				+void ping_init_test_service(void)
			
 
				+{
			
 
				+	ping_test_service.sv_id       = SRPC_SERVICE_PING;
			
 
				+	ping_test_service.sv_name     = "ping_test";
			
 
				+	ping_test_service.sv_handler  = ping_server_handle;
			
 
				+	ping_test_service.sv_wi_total = ping_srv_workitems;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -0,0 +1,1665 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/rpc.c
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ *
			
 
				+ * 2012-05-13: Liang Zhen <liang@whamcloud.com>
			
 
				+ * - percpt data for service to improve smp performance
			
 
				+ * - code cleanup
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+typedef enum {
			
 
				+	SRPC_STATE_NONE,
			
 
				+	SRPC_STATE_NI_INIT,
			
 
				+	SRPC_STATE_EQ_INIT,
			
 
				+	SRPC_STATE_RUNNING,
			
 
				+	SRPC_STATE_STOPPING,
			
 
				+} srpc_state_t;
			
 
				+
			
 
				+struct smoketest_rpc {
			
 
				+	spinlock_t	 rpc_glock;	/* global lock */
			
 
				+	srpc_service_t	*rpc_services[SRPC_SERVICE_MAX_ID + 1];
			
 
				+	lnet_handle_eq_t rpc_lnet_eq;	/* _the_ LNet event queue */
			
 
				+	srpc_state_t	 rpc_state;
			
 
				+	srpc_counters_t	 rpc_counters;
			
 
				+	__u64		 rpc_matchbits;	/* matchbits counter */
			
 
				+} srpc_data;
			
 
				+
			
 
				+static inline int
			
 
				+srpc_serv_portal(int svc_id)
			
 
				+{
			
 
				+	return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
			
 
				+	       SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
			
 
				+}
			
 
				+
			
 
				+/* forward ref's */
			
 
				+int srpc_handle_rpc (swi_workitem_t *wi);
			
 
				+
			
 
				+void srpc_get_counters (srpc_counters_t *cnt)
			
 
				+{
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+	*cnt = srpc_data.rpc_counters;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+}
			
 
				+
			
 
				+void srpc_set_counters (const srpc_counters_t *cnt)
			
 
				+{
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+	srpc_data.rpc_counters = *cnt;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
			
 
				+{
			
 
				+	nob = min(nob, (int)PAGE_CACHE_SIZE);
			
 
				+
			
 
				+	LASSERT(nob > 0);
			
 
				+	LASSERT(i >= 0 && i < bk->bk_niov);
			
 
				+
			
 
				+	bk->bk_iovs[i].kiov_offset = 0;
			
 
				+	bk->bk_iovs[i].kiov_page   = pg;
			
 
				+	bk->bk_iovs[i].kiov_len    = nob;
			
 
				+	return nob;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_free_bulk (srpc_bulk_t *bk)
			
 
				+{
			
 
				+	int	 i;
			
 
				+	struct page *pg;
			
 
				+
			
 
				+	LASSERT (bk != NULL);
			
 
				+
			
 
				+	for (i = 0; i < bk->bk_niov; i++) {
			
 
				+		pg = bk->bk_iovs[i].kiov_page;
			
 
				+		if (pg == NULL) break;
			
 
				+
			
 
				+		__free_page(pg);
			
 
				+	}
			
 
				+
			
 
				+	LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov]));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+srpc_bulk_t *
			
 
				+srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
			
 
				+{
			
 
				+	srpc_bulk_t  *bk;
			
 
				+	struct page  **pages;
			
 
				+	int	      i;
			
 
				+
			
 
				+	LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
			
 
				+
			
 
				+	LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt,
			
 
				+			 offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
			
 
				+	if (bk == NULL) {
			
 
				+		CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
			
 
				+	bk->bk_sink   = sink;
			
 
				+	bk->bk_len    = bulk_len;
			
 
				+	bk->bk_niov   = bulk_npg;
			
 
				+	UNUSED(pages);
			
 
				+
			
 
				+	for (i = 0; i < bulk_npg; i++) {
			
 
				+		struct page *pg;
			
 
				+		int	    nob;
			
 
				+
			
 
				+		pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_IOFS);
			
 
				+		if (pg == NULL) {
			
 
				+			CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
			
 
				+			srpc_free_bulk(bk);
			
 
				+			return NULL;
			
 
				+		}
			
 
				+
			
 
				+		nob = srpc_add_bulk_page(bk, pg, i, bulk_len);
			
 
				+		bulk_len -= nob;
			
 
				+	}
			
 
				+
			
 
				+	return bk;
			
 
				+}
			
 
				+
			
 
				+static inline __u64
			
 
				+srpc_next_id (void)
			
 
				+{
			
 
				+	__u64 id;
			
 
				+
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+	id = srpc_data.rpc_matchbits++;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+	return id;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_init_server_rpc(struct srpc_server_rpc *rpc,
			
 
				+		     struct srpc_service_cd *scd,
			
 
				+		     struct srpc_buffer *buffer)
			
 
				+{
			
 
				+	memset(rpc, 0, sizeof(*rpc));
			
 
				+	swi_init_workitem(&rpc->srpc_wi, rpc, srpc_handle_rpc,
			
 
				+			  srpc_serv_is_framework(scd->scd_svc) ?
			
 
				+			  lst_sched_serial : lst_sched_test[scd->scd_cpt]);
			
 
				+
			
 
				+	rpc->srpc_ev.ev_fired = 1; /* no event expected now */
			
 
				+
			
 
				+	rpc->srpc_scd      = scd;
			
 
				+	rpc->srpc_reqstbuf = buffer;
			
 
				+	rpc->srpc_peer     = buffer->buf_peer;
			
 
				+	rpc->srpc_self     = buffer->buf_self;
			
 
				+	LNetInvalidateHandle(&rpc->srpc_replymdh);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+srpc_service_fini(struct srpc_service *svc)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	struct srpc_server_rpc	*rpc;
			
 
				+	struct srpc_buffer	*buf;
			
 
				+	struct list_head		*q;
			
 
				+	int			i;
			
 
				+
			
 
				+	if (svc->sv_cpt_data == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
			
 
				+		while (1) {
			
 
				+			if (!list_empty(&scd->scd_buf_posted))
			
 
				+				q = &scd->scd_buf_posted;
			
 
				+			else if (!list_empty(&scd->scd_buf_blocked))
			
 
				+				q = &scd->scd_buf_blocked;
			
 
				+			else
			
 
				+				break;
			
 
				+
			
 
				+			while (!list_empty(q)) {
			
 
				+				buf = list_entry(q->next,
			
 
				+						     struct srpc_buffer,
			
 
				+						     buf_list);
			
 
				+				list_del(&buf->buf_list);
			
 
				+				LIBCFS_FREE(buf, sizeof(*buf));
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		LASSERT(list_empty(&scd->scd_rpc_active));
			
 
				+
			
 
				+		while (!list_empty(&scd->scd_rpc_free)) {
			
 
				+			rpc = list_entry(scd->scd_rpc_free.next,
			
 
				+					     struct srpc_server_rpc,
			
 
				+					     srpc_list);
			
 
				+			list_del(&rpc->srpc_list);
			
 
				+			LIBCFS_FREE(rpc, sizeof(*rpc));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_free(svc->sv_cpt_data);
			
 
				+	svc->sv_cpt_data = NULL;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+srpc_service_nrpcs(struct srpc_service *svc)
			
 
				+{
			
 
				+	int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
			
 
				+
			
 
				+	return srpc_serv_is_framework(svc) ?
			
 
				+	       max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
			
 
				+}
			
 
				+
			
 
				+int srpc_add_buffer(struct swi_workitem *wi);
			
 
				+
			
 
				+static int
			
 
				+srpc_service_init(struct srpc_service *svc)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	struct srpc_server_rpc	*rpc;
			
 
				+	int			nrpcs;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	svc->sv_shuttingdown = 0;
			
 
				+
			
 
				+	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
			
 
				+					    sizeof(struct srpc_service_cd));
			
 
				+	if (svc->sv_cpt_data == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	svc->sv_ncpts = srpc_serv_is_framework(svc) ?
			
 
				+			1 : cfs_cpt_number(lnet_cpt_table());
			
 
				+	nrpcs = srpc_service_nrpcs(svc);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
			
 
				+		scd->scd_cpt = i;
			
 
				+		scd->scd_svc = svc;
			
 
				+		spin_lock_init(&scd->scd_lock);
			
 
				+		INIT_LIST_HEAD(&scd->scd_rpc_free);
			
 
				+		INIT_LIST_HEAD(&scd->scd_rpc_active);
			
 
				+		INIT_LIST_HEAD(&scd->scd_buf_posted);
			
 
				+		INIT_LIST_HEAD(&scd->scd_buf_blocked);
			
 
				+
			
 
				+		scd->scd_ev.ev_data = scd;
			
 
				+		scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
			
 
				+
			
 
				+		/* NB: don't use lst_sched_serial for adding buffer,
			
 
				+		 * see details in srpc_service_add_buffers() */
			
 
				+		swi_init_workitem(&scd->scd_buf_wi, scd,
			
 
				+				  srpc_add_buffer, lst_sched_test[i]);
			
 
				+
			
 
				+		if (i != 0 && srpc_serv_is_framework(svc)) {
			
 
				+			/* NB: framework service only needs srpc_service_cd for
			
 
				+			 * one partition, but we allocate for all to make
			
 
				+			 * it easier to implement, it will waste a little
			
 
				+			 * memory but nobody should care about this */
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		for (j = 0; j < nrpcs; j++) {
			
 
				+			LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
			
 
				+					 i, sizeof(*rpc));
			
 
				+			if (rpc == NULL) {
			
 
				+				srpc_service_fini(svc);
			
 
				+				return -ENOMEM;
			
 
				+			}
			
 
				+			list_add(&rpc->srpc_list, &scd->scd_rpc_free);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_add_service(struct srpc_service *sv)
			
 
				+{
			
 
				+	int id = sv->sv_id;
			
 
				+
			
 
				+	LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
			
 
				+
			
 
				+	if (srpc_service_init(sv) != 0)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
			
 
				+
			
 
				+	if (srpc_data.rpc_services[id] != NULL) {
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	srpc_data.rpc_services[id] = sv;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+	CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
			
 
				+	return 0;
			
 
				+
			
 
				+ failed:
			
 
				+	srpc_service_fini(sv);
			
 
				+	return -EBUSY;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_remove_service (srpc_service_t *sv)
			
 
				+{
			
 
				+	int id = sv->sv_id;
			
 
				+
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+	if (srpc_data.rpc_services[id] != sv) {
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	srpc_data.rpc_services[id] = NULL;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
			
 
				+		       int len, int options, lnet_process_id_t peer,
			
 
				+		       lnet_handle_md_t *mdh, srpc_event_t *ev)
			
 
				+{
			
 
				+	int		 rc;
			
 
				+	lnet_md_t	 md;
			
 
				+	lnet_handle_me_t meh;
			
 
				+
			
 
				+	rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
			
 
				+			  local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("LNetMEAttach failed: %d\n", rc);
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	md.threshold = 1;
			
 
				+	md.user_ptr  = ev;
			
 
				+	md.start     = buf;
			
 
				+	md.length    = len;
			
 
				+	md.options   = options;
			
 
				+	md.eq_handle = srpc_data.rpc_lnet_eq;
			
 
				+
			
 
				+	rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("LNetMDAttach failed: %d\n", rc);
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+
			
 
				+		rc = LNetMEUnlink(meh);
			
 
				+		LASSERT (rc == 0);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	CDEBUG (D_NET,
			
 
				+		"Posted passive RDMA: peer %s, portal %d, matchbits "LPX64"\n",
			
 
				+		libcfs_id2str(peer), portal, matchbits);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
			
 
				+		      int options, lnet_process_id_t peer, lnet_nid_t self,
			
 
				+		      lnet_handle_md_t *mdh, srpc_event_t *ev)
			
 
				+{
			
 
				+	int       rc;
			
 
				+	lnet_md_t md;
			
 
				+
			
 
				+	md.user_ptr  = ev;
			
 
				+	md.start     = buf;
			
 
				+	md.length    = len;
			
 
				+	md.eq_handle = srpc_data.rpc_lnet_eq;
			
 
				+	md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1;
			
 
				+	md.options   = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
			
 
				+
			
 
				+	rc = LNetMDBind(md, LNET_UNLINK, mdh);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("LNetMDBind failed: %d\n", rc);
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
			
 
				+	 * they're only meaningful for MDs attached to an ME (i.e. passive
			
 
				+	 * buffers... */
			
 
				+	if ((options & LNET_MD_OP_PUT) != 0) {
			
 
				+		rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
			
 
				+			     portal, matchbits, 0, 0);
			
 
				+	} else {
			
 
				+		LASSERT ((options & LNET_MD_OP_GET) != 0);
			
 
				+
			
 
				+		rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		CERROR ("LNet%s(%s, %d, "LPD64") failed: %d\n",
			
 
				+			((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get",
			
 
				+			libcfs_id2str(peer), portal, matchbits, rc);
			
 
				+
			
 
				+		/* The forthcoming unlink event will complete this operation
			
 
				+		 * with failure, so fall through and return success here.
			
 
				+		 */
			
 
				+		rc = LNetMDUnlink(*mdh);
			
 
				+		LASSERT (rc == 0);
			
 
				+	} else {
			
 
				+		CDEBUG (D_NET,
			
 
				+			"Posted active RDMA: peer %s, portal %u, matchbits "LPX64"\n",
			
 
				+			libcfs_id2str(peer), portal, matchbits);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf,
			
 
				+			int len, lnet_handle_md_t *mdh, srpc_event_t *ev)
			
 
				+{
			
 
				+	return srpc_post_active_rdma(srpc_serv_portal(service), service,
			
 
				+				     buf, len, LNET_MD_OP_PUT, peer,
			
 
				+				     LNET_NID_ANY, mdh, ev);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
			
 
				+			 lnet_handle_md_t *mdh, srpc_event_t *ev)
			
 
				+{
			
 
				+	lnet_process_id_t any = {0};
			
 
				+
			
 
				+	any.nid = LNET_NID_ANY;
			
 
				+	any.pid = LNET_PID_ANY;
			
 
				+
			
 
				+	return srpc_post_passive_rdma(srpc_serv_portal(service),
			
 
				+				      local, service, buf, len,
			
 
				+				      LNET_MD_OP_PUT, any, mdh, ev);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
			
 
				+{
			
 
				+	struct srpc_service	*sv = scd->scd_svc;
			
 
				+	struct srpc_msg		*msg = &buf->buf_msg;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LNetInvalidateHandle(&buf->buf_mdh);
			
 
				+	list_add(&buf->buf_list, &scd->scd_buf_posted);
			
 
				+	scd->scd_buf_nposted++;
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+	rc = srpc_post_passive_rqtbuf(sv->sv_id,
			
 
				+				      !srpc_serv_is_framework(sv),
			
 
				+				      msg, sizeof(*msg), &buf->buf_mdh,
			
 
				+				      &scd->scd_ev);
			
 
				+
			
 
				+	/* At this point, a RPC (new or delayed) may have arrived in
			
 
				+	 * msg and its event handler has been called. So we must add
			
 
				+	 * buf to scd_buf_posted _before_ dropping scd_lock */
			
 
				+
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	if (rc == 0) {
			
 
				+		if (!sv->sv_shuttingdown)
			
 
				+			return 0;
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+		/* srpc_shutdown_service might have tried to unlink me
			
 
				+		 * when my buf_mdh was still invalid */
			
 
				+		LNetMDUnlink(buf->buf_mdh);
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	scd->scd_buf_nposted--;
			
 
				+	if (sv->sv_shuttingdown)
			
 
				+		return rc; /* don't allow to change scd_buf_posted */
			
 
				+
			
 
				+	list_del(&buf->buf_list);
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+	LIBCFS_FREE(buf, sizeof(*buf));
			
 
				+
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_add_buffer(struct swi_workitem *wi)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd = wi->swi_workitem.wi_data;
			
 
				+	struct srpc_buffer	*buf;
			
 
				+	int			rc = 0;
			
 
				+
			
 
				+	/* it's called by workitem scheduler threads, these threads
			
 
				+	 * should have been set CPT affinity, so buffers will be posted
			
 
				+	 * on CPT local list of Portal */
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	while (scd->scd_buf_adjust > 0 &&
			
 
				+	       !scd->scd_svc->sv_shuttingdown) {
			
 
				+		scd->scd_buf_adjust--; /* consume it */
			
 
				+		scd->scd_buf_posting++;
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+		LIBCFS_ALLOC(buf, sizeof(*buf));
			
 
				+		if (buf == NULL) {
			
 
				+			CERROR("Failed to add new buf to service: %s\n",
			
 
				+			       scd->scd_svc->sv_name);
			
 
				+			spin_lock(&scd->scd_lock);
			
 
				+			rc = -ENOMEM;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+		if (scd->scd_svc->sv_shuttingdown) {
			
 
				+			spin_unlock(&scd->scd_lock);
			
 
				+			LIBCFS_FREE(buf, sizeof(*buf));
			
 
				+
			
 
				+			spin_lock(&scd->scd_lock);
			
 
				+			rc = -ESHUTDOWN;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		rc = srpc_service_post_buffer(scd, buf);
			
 
				+		if (rc != 0)
			
 
				+			break; /* buf has been freed inside */
			
 
				+
			
 
				+		LASSERT(scd->scd_buf_posting > 0);
			
 
				+		scd->scd_buf_posting--;
			
 
				+		scd->scd_buf_total++;
			
 
				+		scd->scd_buf_low = MAX(2, scd->scd_buf_total / 4);
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		scd->scd_buf_err_stamp = cfs_time_current_sec();
			
 
				+		scd->scd_buf_err = rc;
			
 
				+
			
 
				+		LASSERT(scd->scd_buf_posting > 0);
			
 
				+		scd->scd_buf_posting--;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	int			rc = 0;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		scd->scd_buf_err = 0;
			
 
				+		scd->scd_buf_err_stamp = 0;
			
 
				+		scd->scd_buf_posting = 0;
			
 
				+		scd->scd_buf_adjust = nbuffer;
			
 
				+		/* start to post buffers */
			
 
				+		swi_schedule_workitem(&scd->scd_buf_wi);
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+		/* framework service only post buffer for one partition  */
			
 
				+		if (srpc_serv_is_framework(sv))
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+		/*
			
 
				+		 * NB: srpc_service_add_buffers() can be called inside
			
 
				+		 * thread context of lst_sched_serial, and we don't normally
			
 
				+		 * allow to sleep inside thread context of WI scheduler
			
 
				+		 * because it will block current scheduler thread from doing
			
 
				+		 * anything else, even worse, it could deadlock if it's
			
 
				+		 * waiting on result from another WI of the same scheduler.
			
 
				+		 * However, it's safe at here because scd_buf_wi is scheduled
			
 
				+		 * by thread in a different WI scheduler (lst_sched_test),
			
 
				+		 * so we don't have any risk of deadlock, though this could
			
 
				+		 * block all WIs pending on lst_sched_serial for a moment
			
 
				+		 * which is not good but not fatal.
			
 
				+		 */
			
 
				+		lst_wait_until(scd->scd_buf_err != 0 ||
			
 
				+			       (scd->scd_buf_adjust == 0 &&
			
 
				+				scd->scd_buf_posting == 0),
			
 
				+			       scd->scd_lock, "waiting for adding buffer\n");
			
 
				+
			
 
				+		if (scd->scd_buf_err != 0 && rc == 0)
			
 
				+			rc = scd->scd_buf_err;
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	int			num;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT(!sv->sv_shuttingdown);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		num = scd->scd_buf_total + scd->scd_buf_posting;
			
 
				+		scd->scd_buf_adjust -= min(nbuffer, num);
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* returns 1 if sv has finished, otherwise 0 */
			
 
				+int
			
 
				+srpc_finish_service(struct srpc_service *sv)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	struct srpc_server_rpc	*rpc;
			
 
				+	int			i;
			
 
				+
			
 
				+	LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+		if (!swi_deschedule_workitem(&scd->scd_buf_wi))
			
 
				+			return 0;
			
 
				+
			
 
				+		if (scd->scd_buf_nposted > 0) {
			
 
				+			CDEBUG(D_NET, "waiting for %d posted buffers to unlink",
			
 
				+			       scd->scd_buf_nposted);
			
 
				+			spin_unlock(&scd->scd_lock);
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		if (list_empty(&scd->scd_rpc_active)) {
			
 
				+			spin_unlock(&scd->scd_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		rpc = list_entry(scd->scd_rpc_active.next,
			
 
				+				     struct srpc_server_rpc, srpc_list);
			
 
				+		CNETERR("Active RPC %p on shutdown: sv %s, peer %s, "
			
 
				+			"wi %s scheduled %d running %d, "
			
 
				+			"ev fired %d type %d status %d lnet %d\n",
			
 
				+			rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				+			swi_state2str(rpc->srpc_wi.swi_state),
			
 
				+			rpc->srpc_wi.swi_workitem.wi_scheduled,
			
 
				+			rpc->srpc_wi.swi_workitem.wi_running,
			
 
				+			rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
			
 
				+			rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* no lock needed from now on */
			
 
				+	srpc_service_fini(sv);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+/* called with sv->sv_lock held */
			
 
				+void
			
 
				+srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf)
			
 
				+{
			
 
				+	if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
			
 
				+		if (srpc_service_post_buffer(scd, buf) != 0) {
			
 
				+			CWARN("Failed to post %s buffer\n",
			
 
				+			      scd->scd_svc->sv_name);
			
 
				+		}
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* service is shutting down, or we want to recycle some buffers */
			
 
				+	scd->scd_buf_total--;
			
 
				+
			
 
				+	if (scd->scd_buf_adjust < 0) {
			
 
				+		scd->scd_buf_adjust++;
			
 
				+		if (scd->scd_buf_adjust < 0 &&
			
 
				+		    scd->scd_buf_total == 0 && scd->scd_buf_posting == 0) {
			
 
				+			CDEBUG(D_INFO,
			
 
				+			       "Try to recyle %d buffers but nothing left\n",
			
 
				+			       scd->scd_buf_adjust);
			
 
				+			scd->scd_buf_adjust = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+	LIBCFS_FREE(buf, sizeof(*buf));
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_abort_service(struct srpc_service *sv)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	struct srpc_server_rpc	*rpc;
			
 
				+	int			i;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
			
 
				+	       sv->sv_id, sv->sv_name);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		/* schedule in-flight RPCs to notice the abort, NB:
			
 
				+		 * racing with incoming RPCs; complete fix should make test
			
 
				+		 * RPCs carry session ID in its headers */
			
 
				+		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
			
 
				+			rpc->srpc_aborted = 1;
			
 
				+			swi_schedule_workitem(&rpc->srpc_wi);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_shutdown_service(srpc_service_t *sv)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	struct srpc_server_rpc	*rpc;
			
 
				+	srpc_buffer_t		*buf;
			
 
				+	int			i;
			
 
				+
			
 
				+	CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
			
 
				+	       sv->sv_id, sv->sv_name);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+	cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		/* schedule in-flight RPCs to notice the shutdown */
			
 
				+		list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
			
 
				+			swi_schedule_workitem(&rpc->srpc_wi);
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+		/* OK to traverse scd_buf_posted without lock, since no one
			
 
				+		 * touches scd_buf_posted now */
			
 
				+		list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
			
 
				+			LNetMDUnlink(buf->buf_mdh);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_send_request (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_event_t *ev = &rpc->crpc_reqstev;
			
 
				+	int	   rc;
			
 
				+
			
 
				+	ev->ev_fired = 0;
			
 
				+	ev->ev_data  = rpc;
			
 
				+	ev->ev_type  = SRPC_REQUEST_SENT;
			
 
				+
			
 
				+	rc = srpc_post_active_rqtbuf(rpc->crpc_dest, rpc->crpc_service,
			
 
				+				     &rpc->crpc_reqstmsg, sizeof(srpc_msg_t),
			
 
				+				     &rpc->crpc_reqstmdh, ev);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+		ev->ev_fired = 1;  /* no more event expected */
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_prepare_reply (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_event_t *ev = &rpc->crpc_replyev;
			
 
				+	__u64	*id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
			
 
				+	int	   rc;
			
 
				+
			
 
				+	ev->ev_fired = 0;
			
 
				+	ev->ev_data  = rpc;
			
 
				+	ev->ev_type  = SRPC_REPLY_RCVD;
			
 
				+
			
 
				+	*id = srpc_next_id();
			
 
				+
			
 
				+	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
			
 
				+				    &rpc->crpc_replymsg, sizeof(srpc_msg_t),
			
 
				+				    LNET_MD_OP_PUT, rpc->crpc_dest,
			
 
				+				    &rpc->crpc_replymdh, ev);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+		ev->ev_fired = 1;  /* no more event expected */
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_prepare_bulk (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_bulk_t  *bk = &rpc->crpc_bulk;
			
 
				+	srpc_event_t *ev = &rpc->crpc_bulkev;
			
 
				+	__u64	*id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
			
 
				+	int	   rc;
			
 
				+	int	   opt;
			
 
				+
			
 
				+	LASSERT (bk->bk_niov <= LNET_MAX_IOV);
			
 
				+
			
 
				+	if (bk->bk_niov == 0) return 0; /* nothing to do */
			
 
				+
			
 
				+	opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
			
 
				+	opt |= LNET_MD_KIOV;
			
 
				+
			
 
				+	ev->ev_fired = 0;
			
 
				+	ev->ev_data  = rpc;
			
 
				+	ev->ev_type  = SRPC_BULK_REQ_RCVD;
			
 
				+
			
 
				+	*id = srpc_next_id();
			
 
				+
			
 
				+	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
			
 
				+				    &bk->bk_iovs[0], bk->bk_niov, opt,
			
 
				+				    rpc->crpc_dest, &bk->bk_mdh, ev);
			
 
				+	if (rc != 0) {
			
 
				+		LASSERT (rc == -ENOMEM);
			
 
				+		ev->ev_fired = 1;  /* no more event expected */
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+srpc_do_bulk (srpc_server_rpc_t *rpc)
			
 
				+{
			
 
				+	srpc_event_t  *ev = &rpc->srpc_ev;
			
 
				+	srpc_bulk_t   *bk = rpc->srpc_bulk;
			
 
				+	__u64	  id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
			
 
				+	int	    rc;
			
 
				+	int	    opt;
			
 
				+
			
 
				+	LASSERT (bk != NULL);
			
 
				+
			
 
				+	opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
			
 
				+	opt |= LNET_MD_KIOV;
			
 
				+
			
 
				+	ev->ev_fired = 0;
			
 
				+	ev->ev_data  = rpc;
			
 
				+	ev->ev_type  = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
			
 
				+
			
 
				+	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
			
 
				+				   &bk->bk_iovs[0], bk->bk_niov, opt,
			
 
				+				   rpc->srpc_peer, rpc->srpc_self,
			
 
				+				   &bk->bk_mdh, ev);
			
 
				+	if (rc != 0)
			
 
				+		ev->ev_fired = 1;  /* no more event expected */
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/* only called from srpc_handle_rpc */
			
 
				+void
			
 
				+srpc_server_rpc_done(srpc_server_rpc_t *rpc, int status)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd = rpc->srpc_scd;
			
 
				+	struct srpc_service	*sv  = scd->scd_svc;
			
 
				+	srpc_buffer_t		*buffer;
			
 
				+
			
 
				+	LASSERT (status != 0 || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
			
 
				+
			
 
				+	rpc->srpc_status = status;
			
 
				+
			
 
				+	CDEBUG_LIMIT (status == 0 ? D_NET : D_NETERROR,
			
 
				+		"Server RPC %p done: service %s, peer %s, status %s:%d\n",
			
 
				+		rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
			
 
				+		swi_state2str(rpc->srpc_wi.swi_state), status);
			
 
				+
			
 
				+	if (status != 0) {
			
 
				+		spin_lock(&srpc_data.rpc_glock);
			
 
				+		srpc_data.rpc_counters.rpcs_dropped++;
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+	}
			
 
				+
			
 
				+	if (rpc->srpc_done != NULL)
			
 
				+		(*rpc->srpc_done) (rpc);
			
 
				+	LASSERT(rpc->srpc_bulk == NULL);
			
 
				+
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	if (rpc->srpc_reqstbuf != NULL) {
			
 
				+		/* NB might drop sv_lock in srpc_service_recycle_buffer, but
			
 
				+		 * sv won't go away for scd_rpc_active must not be empty */
			
 
				+		srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
			
 
				+		rpc->srpc_reqstbuf = NULL;
			
 
				+	}
			
 
				+
			
 
				+	list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
			
 
				+
			
 
				+	/*
			
 
				+	 * No one can schedule me now since:
			
 
				+	 * - I'm not on scd_rpc_active.
			
 
				+	 * - all LNet events have been fired.
			
 
				+	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				+	 */
			
 
				+	LASSERT(rpc->srpc_ev.ev_fired);
			
 
				+	swi_exit_workitem(&rpc->srpc_wi);
			
 
				+
			
 
				+	if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
			
 
				+		buffer = list_entry(scd->scd_buf_blocked.next,
			
 
				+					srpc_buffer_t, buf_list);
			
 
				+		list_del(&buffer->buf_list);
			
 
				+
			
 
				+		srpc_init_server_rpc(rpc, scd, buffer);
			
 
				+		list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
			
 
				+		swi_schedule_workitem(&rpc->srpc_wi);
			
 
				+	} else {
			
 
				+		list_add(&rpc->srpc_list, &scd->scd_rpc_free);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/* handles an incoming RPC */
			
 
				+int
			
 
				+srpc_handle_rpc(swi_workitem_t *wi)
			
 
				+{
			
 
				+	struct srpc_server_rpc	*rpc = wi->swi_workitem.wi_data;
			
 
				+	struct srpc_service_cd	*scd = rpc->srpc_scd;
			
 
				+	struct srpc_service	*sv = scd->scd_svc;
			
 
				+	srpc_event_t		*ev = &rpc->srpc_ev;
			
 
				+	int			rc = 0;
			
 
				+
			
 
				+	LASSERT(wi == &rpc->srpc_wi);
			
 
				+
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	if (sv->sv_shuttingdown || rpc->srpc_aborted) {
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+		if (rpc->srpc_bulk != NULL)
			
 
				+			LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
			
 
				+		LNetMDUnlink(rpc->srpc_replymdh);
			
 
				+
			
 
				+		if (ev->ev_fired) { /* no more event, OK to finish */
			
 
				+			srpc_server_rpc_done(rpc, -ESHUTDOWN);
			
 
				+			return 1;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+	switch (wi->swi_state) {
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+	case SWI_STATE_NEWBORN: {
			
 
				+		srpc_msg_t	   *msg;
			
 
				+		srpc_generic_reply_t *reply;
			
 
				+
			
 
				+		msg = &rpc->srpc_reqstbuf->buf_msg;
			
 
				+		reply = &rpc->srpc_replymsg.msg_body.reply;
			
 
				+
			
 
				+		if (msg->msg_magic == 0) {
			
 
				+			/* moaned already in srpc_lnet_ev_handler */
			
 
				+			srpc_server_rpc_done(rpc, EBADMSG);
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		srpc_unpack_msg_hdr(msg);
			
 
				+		if (msg->msg_version != SRPC_MSG_VERSION) {
			
 
				+			CWARN("Version mismatch: %u, %u expected, from %s\n",
			
 
				+			      msg->msg_version, SRPC_MSG_VERSION,
			
 
				+			      libcfs_id2str(rpc->srpc_peer));
			
 
				+			reply->status = EPROTO;
			
 
				+			/* drop through and send reply */
			
 
				+		} else {
			
 
				+			reply->status = 0;
			
 
				+			rc = (*sv->sv_handler)(rpc);
			
 
				+			LASSERT(reply->status == 0 || !rpc->srpc_bulk);
			
 
				+			if (rc != 0) {
			
 
				+				srpc_server_rpc_done(rpc, rc);
			
 
				+				return 1;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_BULK_STARTED;
			
 
				+
			
 
				+		if (rpc->srpc_bulk != NULL) {
			
 
				+			rc = srpc_do_bulk(rpc);
			
 
				+			if (rc == 0)
			
 
				+				return 0; /* wait for bulk */
			
 
				+
			
 
				+			LASSERT (ev->ev_fired);
			
 
				+			ev->ev_status = rc;
			
 
				+		}
			
 
				+	}
			
 
				+	case SWI_STATE_BULK_STARTED:
			
 
				+		LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
			
 
				+
			
 
				+		if (rpc->srpc_bulk != NULL) {
			
 
				+			rc = ev->ev_status;
			
 
				+
			
 
				+			if (sv->sv_bulk_ready != NULL)
			
 
				+				rc = (*sv->sv_bulk_ready) (rpc, rc);
			
 
				+
			
 
				+			if (rc != 0) {
			
 
				+				srpc_server_rpc_done(rpc, rc);
			
 
				+				return 1;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
			
 
				+		rc = srpc_send_reply(rpc);
			
 
				+		if (rc == 0)
			
 
				+			return 0; /* wait for reply */
			
 
				+		srpc_server_rpc_done(rpc, rc);
			
 
				+		return 1;
			
 
				+
			
 
				+	case SWI_STATE_REPLY_SUBMITTED:
			
 
				+		if (!ev->ev_fired) {
			
 
				+			CERROR("RPC %p: bulk %p, service %d\n",
			
 
				+			       rpc, rpc->srpc_bulk, sv->sv_id);
			
 
				+			CERROR("Event: status %d, type %d, lnet %d\n",
			
 
				+			       ev->ev_status, ev->ev_type, ev->ev_lnet);
			
 
				+			LASSERT (ev->ev_fired);
			
 
				+		}
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_DONE;
			
 
				+		srpc_server_rpc_done(rpc, ev->ev_status);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_client_rpc_expired (void *data)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc = data;
			
 
				+
			
 
				+	CWARN ("Client RPC expired: service %d, peer %s, timeout %d.\n",
			
 
				+	       rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				+	       rpc->crpc_timeout);
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+	rpc->crpc_timeout = 0;
			
 
				+	srpc_abort_rpc(rpc, -ETIMEDOUT);
			
 
				+
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+	spin_lock(&srpc_data.rpc_glock);
			
 
				+	srpc_data.rpc_counters.rpcs_expired++;
			
 
				+	spin_unlock(&srpc_data.rpc_glock);
			
 
				+}
			
 
				+
			
 
				+inline void
			
 
				+srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	stt_timer_t *timer = &rpc->crpc_timer;
			
 
				+
			
 
				+	if (rpc->crpc_timeout == 0) return;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&timer->stt_list);
			
 
				+	timer->stt_data    = rpc;
			
 
				+	timer->stt_func    = srpc_client_rpc_expired;
			
 
				+	timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
			
 
				+					  cfs_time_current_sec());
			
 
				+	stt_add_timer(timer);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Called with rpc->crpc_lock held.
			
 
				+ *
			
 
				+ * Upon exit the RPC expiry timer is not queued and the handler is not
			
 
				+ * running on any CPU. */
			
 
				+void
			
 
				+srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	/* timer not planted or already exploded */
			
 
				+	if (rpc->crpc_timeout == 0)
			
 
				+		return;
			
 
				+
			
 
				+	/* timer sucessfully defused */
			
 
				+	if (stt_del_timer(&rpc->crpc_timer))
			
 
				+		return;
			
 
				+
			
 
				+	/* timer detonated, wait for it to explode */
			
 
				+	while (rpc->crpc_timeout != 0) {
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+		schedule();
			
 
				+
			
 
				+		spin_lock(&rpc->crpc_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status)
			
 
				+{
			
 
				+	swi_workitem_t *wi = &rpc->crpc_wi;
			
 
				+
			
 
				+	LASSERT(status != 0 || wi->swi_state == SWI_STATE_DONE);
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+	rpc->crpc_closed = 1;
			
 
				+	if (rpc->crpc_status == 0)
			
 
				+		rpc->crpc_status = status;
			
 
				+
			
 
				+	srpc_del_client_rpc_timer(rpc);
			
 
				+
			
 
				+	CDEBUG_LIMIT ((status == 0) ? D_NET : D_NETERROR,
			
 
				+		"Client RPC done: service %d, peer %s, status %s:%d:%d\n",
			
 
				+		rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				+		swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
			
 
				+
			
 
				+	/*
			
 
				+	 * No one can schedule me now since:
			
 
				+	 * - RPC timer has been defused.
			
 
				+	 * - all LNet events have been fired.
			
 
				+	 * - crpc_closed has been set, preventing srpc_abort_rpc from
			
 
				+	 *   scheduling me.
			
 
				+	 * Cancel pending schedules and prevent future schedule attempts:
			
 
				+	 */
			
 
				+	LASSERT (!srpc_event_pending(rpc));
			
 
				+	swi_exit_workitem(wi);
			
 
				+
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+	(*rpc->crpc_done)(rpc);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/* sends an outgoing RPC */
			
 
				+int
			
 
				+srpc_send_rpc (swi_workitem_t *wi)
			
 
				+{
			
 
				+	int		rc = 0;
			
 
				+	srpc_client_rpc_t *rpc;
			
 
				+	srpc_msg_t	*reply;
			
 
				+	int		do_bulk;
			
 
				+
			
 
				+	LASSERT(wi != NULL);
			
 
				+
			
 
				+	rpc = wi->swi_workitem.wi_data;
			
 
				+
			
 
				+	LASSERT (rpc != NULL);
			
 
				+	LASSERT (wi == &rpc->crpc_wi);
			
 
				+
			
 
				+	reply = &rpc->crpc_replymsg;
			
 
				+	do_bulk = rpc->crpc_bulk.bk_niov > 0;
			
 
				+
			
 
				+	spin_lock(&rpc->crpc_lock);
			
 
				+
			
 
				+	if (rpc->crpc_aborted) {
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+		goto abort;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&rpc->crpc_lock);
			
 
				+
			
 
				+	switch (wi->swi_state) {
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+	case SWI_STATE_NEWBORN:
			
 
				+		LASSERT (!srpc_event_pending(rpc));
			
 
				+
			
 
				+		rc = srpc_prepare_reply(rpc);
			
 
				+		if (rc != 0) {
			
 
				+			srpc_client_rpc_done(rpc, rc);
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		rc = srpc_prepare_bulk(rpc);
			
 
				+		if (rc != 0) break;
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
			
 
				+		rc = srpc_send_request(rpc);
			
 
				+		break;
			
 
				+
			
 
				+	case SWI_STATE_REQUEST_SUBMITTED:
			
 
				+		/* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
			
 
				+		 * order; however, they're processed in a strict order:
			
 
				+		 * rqt, rpy, and bulk. */
			
 
				+		if (!rpc->crpc_reqstev.ev_fired) break;
			
 
				+
			
 
				+		rc = rpc->crpc_reqstev.ev_status;
			
 
				+		if (rc != 0) break;
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_REQUEST_SENT;
			
 
				+		/* perhaps more events, fall thru */
			
 
				+	case SWI_STATE_REQUEST_SENT: {
			
 
				+		srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service);
			
 
				+
			
 
				+		if (!rpc->crpc_replyev.ev_fired) break;
			
 
				+
			
 
				+		rc = rpc->crpc_replyev.ev_status;
			
 
				+		if (rc != 0) break;
			
 
				+
			
 
				+		srpc_unpack_msg_hdr(reply);
			
 
				+		if (reply->msg_type != type ||
			
 
				+		    (reply->msg_magic != SRPC_MSG_MAGIC &&
			
 
				+		     reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
			
 
				+			CWARN ("Bad message from %s: type %u (%d expected),"
			
 
				+			       " magic %u (%d expected).\n",
			
 
				+			       libcfs_id2str(rpc->crpc_dest),
			
 
				+			       reply->msg_type, type,
			
 
				+			       reply->msg_magic, SRPC_MSG_MAGIC);
			
 
				+			rc = -EBADMSG;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (do_bulk && reply->msg_body.reply.status != 0) {
			
 
				+			CWARN ("Remote error %d at %s, unlink bulk buffer in "
			
 
				+			       "case peer didn't initiate bulk transfer\n",
			
 
				+			       reply->msg_body.reply.status,
			
 
				+			       libcfs_id2str(rpc->crpc_dest));
			
 
				+			LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
			
 
				+		}
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_REPLY_RECEIVED;
			
 
				+	}
			
 
				+	case SWI_STATE_REPLY_RECEIVED:
			
 
				+		if (do_bulk && !rpc->crpc_bulkev.ev_fired) break;
			
 
				+
			
 
				+		rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
			
 
				+
			
 
				+		/* Bulk buffer was unlinked due to remote error. Clear error
			
 
				+		 * since reply buffer still contains valid data.
			
 
				+		 * NB rpc->crpc_done shouldn't look into bulk data in case of
			
 
				+		 * remote error. */
			
 
				+		if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
			
 
				+		    rpc->crpc_status == 0 && reply->msg_body.reply.status != 0)
			
 
				+			rc = 0;
			
 
				+
			
 
				+		wi->swi_state = SWI_STATE_DONE;
			
 
				+		srpc_client_rpc_done(rpc, rc);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	if (rc != 0) {
			
 
				+		spin_lock(&rpc->crpc_lock);
			
 
				+		srpc_abort_rpc(rpc, rc);
			
 
				+		spin_unlock(&rpc->crpc_lock);
			
 
				+	}
			
 
				+
			
 
				+abort:
			
 
				+	if (rpc->crpc_aborted) {
			
 
				+		LNetMDUnlink(rpc->crpc_reqstmdh);
			
 
				+		LNetMDUnlink(rpc->crpc_replymdh);
			
 
				+		LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
			
 
				+
			
 
				+		if (!srpc_event_pending(rpc)) {
			
 
				+			srpc_client_rpc_done(rpc, -EINTR);
			
 
				+			return 1;
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+srpc_client_rpc_t *
			
 
				+srpc_create_client_rpc (lnet_process_id_t peer, int service,
			
 
				+			int nbulkiov, int bulklen,
			
 
				+			void (*rpc_done)(srpc_client_rpc_t *),
			
 
				+			void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
			
 
				+{
			
 
				+	srpc_client_rpc_t *rpc;
			
 
				+
			
 
				+	LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
			
 
				+				   crpc_bulk.bk_iovs[nbulkiov]));
			
 
				+	if (rpc == NULL)
			
 
				+		return NULL;
			
 
				+
			
 
				+	srpc_init_client_rpc(rpc, peer, service, nbulkiov,
			
 
				+			     bulklen, rpc_done, rpc_fini, priv);
			
 
				+	return rpc;
			
 
				+}
			
 
				+
			
 
				+/* called with rpc->crpc_lock held */
			
 
				+void
			
 
				+srpc_abort_rpc (srpc_client_rpc_t *rpc, int why)
			
 
				+{
			
 
				+	LASSERT (why != 0);
			
 
				+
			
 
				+	if (rpc->crpc_aborted || /* already aborted */
			
 
				+	    rpc->crpc_closed)    /* callback imminent */
			
 
				+		return;
			
 
				+
			
 
				+	CDEBUG (D_NET,
			
 
				+		"Aborting RPC: service %d, peer %s, state %s, why %d\n",
			
 
				+		rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
			
 
				+		swi_state2str(rpc->crpc_wi.swi_state), why);
			
 
				+
			
 
				+	rpc->crpc_aborted = 1;
			
 
				+	rpc->crpc_status  = why;
			
 
				+	swi_schedule_workitem(&rpc->crpc_wi);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/* called with rpc->crpc_lock held */
			
 
				+void
			
 
				+srpc_post_rpc (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	LASSERT (!rpc->crpc_aborted);
			
 
				+	LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
			
 
				+
			
 
				+	CDEBUG (D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
			
 
				+		libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
			
 
				+		rpc->crpc_timeout);
			
 
				+
			
 
				+	srpc_add_client_rpc_timer(rpc);
			
 
				+	swi_schedule_workitem(&rpc->crpc_wi);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+srpc_send_reply(struct srpc_server_rpc *rpc)
			
 
				+{
			
 
				+	srpc_event_t		*ev = &rpc->srpc_ev;
			
 
				+	struct srpc_msg		*msg = &rpc->srpc_replymsg;
			
 
				+	struct srpc_buffer	*buffer = rpc->srpc_reqstbuf;
			
 
				+	struct srpc_service_cd	*scd = rpc->srpc_scd;
			
 
				+	struct srpc_service	*sv = scd->scd_svc;
			
 
				+	__u64			rpyid;
			
 
				+	int			rc;
			
 
				+
			
 
				+	LASSERT(buffer != NULL);
			
 
				+	rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
			
 
				+
			
 
				+	spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+	if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
			
 
				+		/* Repost buffer before replying since test client
			
 
				+		 * might send me another RPC once it gets the reply */
			
 
				+		if (srpc_service_post_buffer(scd, buffer) != 0)
			
 
				+			CWARN("Failed to repost %s buffer\n", sv->sv_name);
			
 
				+		rpc->srpc_reqstbuf = NULL;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+	ev->ev_fired = 0;
			
 
				+	ev->ev_data  = rpc;
			
 
				+	ev->ev_type  = SRPC_REPLY_SENT;
			
 
				+
			
 
				+	msg->msg_magic   = SRPC_MSG_MAGIC;
			
 
				+	msg->msg_version = SRPC_MSG_VERSION;
			
 
				+	msg->msg_type    = srpc_service2reply(sv->sv_id);
			
 
				+
			
 
				+	rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
			
 
				+				   sizeof(*msg), LNET_MD_OP_PUT,
			
 
				+				   rpc->srpc_peer, rpc->srpc_self,
			
 
				+				   &rpc->srpc_replymdh, ev);
			
 
				+	if (rc != 0)
			
 
				+		ev->ev_fired = 1;  /* no more event expected */
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/* when in kernel always called with LNET_LOCK() held, and in thread context */
			
 
				+void
			
 
				+srpc_lnet_ev_handler(lnet_event_t *ev)
			
 
				+{
			
 
				+	struct srpc_service_cd	*scd;
			
 
				+	srpc_event_t      *rpcev = ev->md.user_ptr;
			
 
				+	srpc_client_rpc_t *crpc;
			
 
				+	srpc_server_rpc_t *srpc;
			
 
				+	srpc_buffer_t     *buffer;
			
 
				+	srpc_service_t    *sv;
			
 
				+	srpc_msg_t	*msg;
			
 
				+	srpc_msg_type_t    type;
			
 
				+
			
 
				+	LASSERT (!in_interrupt());
			
 
				+
			
 
				+	if (ev->status != 0) {
			
 
				+		spin_lock(&srpc_data.rpc_glock);
			
 
				+		srpc_data.rpc_counters.errors++;
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+	}
			
 
				+
			
 
				+	rpcev->ev_lnet = ev->type;
			
 
				+
			
 
				+	switch (rpcev->ev_type) {
			
 
				+	default:
			
 
				+		CERROR("Unknown event: status %d, type %d, lnet %d\n",
			
 
				+		       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
			
 
				+		LBUG ();
			
 
				+	case SRPC_REQUEST_SENT:
			
 
				+		if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
			
 
				+			spin_lock(&srpc_data.rpc_glock);
			
 
				+			srpc_data.rpc_counters.rpcs_sent++;
			
 
				+			spin_unlock(&srpc_data.rpc_glock);
			
 
				+		}
			
 
				+	case SRPC_REPLY_RCVD:
			
 
				+	case SRPC_BULK_REQ_RCVD:
			
 
				+		crpc = rpcev->ev_data;
			
 
				+
			
 
				+		if (rpcev != &crpc->crpc_reqstev &&
			
 
				+		    rpcev != &crpc->crpc_replyev &&
			
 
				+		    rpcev != &crpc->crpc_bulkev) {
			
 
				+			CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
			
 
				+			       rpcev, crpc, &crpc->crpc_reqstev,
			
 
				+			       &crpc->crpc_replyev, &crpc->crpc_bulkev);
			
 
				+			CERROR("Bad event: status %d, type %d, lnet %d\n",
			
 
				+			       rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
			
 
				+			LBUG ();
			
 
				+		}
			
 
				+
			
 
				+		spin_lock(&crpc->crpc_lock);
			
 
				+
			
 
				+		LASSERT(rpcev->ev_fired == 0);
			
 
				+		rpcev->ev_fired  = 1;
			
 
				+		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
			
 
				+						-EINTR : ev->status;
			
 
				+		swi_schedule_workitem(&crpc->crpc_wi);
			
 
				+
			
 
				+		spin_unlock(&crpc->crpc_lock);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_REQUEST_RCVD:
			
 
				+		scd = rpcev->ev_data;
			
 
				+		sv = scd->scd_svc;
			
 
				+
			
 
				+		LASSERT(rpcev == &scd->scd_ev);
			
 
				+
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		LASSERT (ev->unlinked);
			
 
				+		LASSERT (ev->type == LNET_EVENT_PUT ||
			
 
				+			 ev->type == LNET_EVENT_UNLINK);
			
 
				+		LASSERT (ev->type != LNET_EVENT_UNLINK ||
			
 
				+			 sv->sv_shuttingdown);
			
 
				+
			
 
				+		buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg);
			
 
				+		buffer->buf_peer = ev->initiator;
			
 
				+		buffer->buf_self = ev->target.nid;
			
 
				+
			
 
				+		LASSERT(scd->scd_buf_nposted > 0);
			
 
				+		scd->scd_buf_nposted--;
			
 
				+
			
 
				+		if (sv->sv_shuttingdown) {
			
 
				+			/* Leave buffer on scd->scd_buf_nposted since
			
 
				+			 * srpc_finish_service needs to traverse it. */
			
 
				+			spin_unlock(&scd->scd_lock);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (scd->scd_buf_err_stamp != 0 &&
			
 
				+		    scd->scd_buf_err_stamp < cfs_time_current_sec()) {
			
 
				+			/* re-enable adding buffer */
			
 
				+			scd->scd_buf_err_stamp = 0;
			
 
				+			scd->scd_buf_err = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (scd->scd_buf_err == 0 && /* adding buffer is enabled */
			
 
				+		    scd->scd_buf_adjust == 0 &&
			
 
				+		    scd->scd_buf_nposted < scd->scd_buf_low) {
			
 
				+			scd->scd_buf_adjust = MAX(scd->scd_buf_total / 2,
			
 
				+						  SFW_TEST_WI_MIN);
			
 
				+			swi_schedule_workitem(&scd->scd_buf_wi);
			
 
				+		}
			
 
				+
			
 
				+		list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
			
 
				+		msg = &buffer->buf_msg;
			
 
				+		type = srpc_service2request(sv->sv_id);
			
 
				+
			
 
				+		if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
			
 
				+		    (msg->msg_type != type &&
			
 
				+		     msg->msg_type != __swab32(type)) ||
			
 
				+		    (msg->msg_magic != SRPC_MSG_MAGIC &&
			
 
				+		     msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
			
 
				+			CERROR ("Dropping RPC (%s) from %s: "
			
 
				+				"status %d mlength %d type %u magic %u.\n",
			
 
				+				sv->sv_name, libcfs_id2str(ev->initiator),
			
 
				+				ev->status, ev->mlength,
			
 
				+				msg->msg_type, msg->msg_magic);
			
 
				+
			
 
				+			/* NB can't call srpc_service_recycle_buffer here since
			
 
				+			 * it may call LNetM[DE]Attach. The invalid magic tells
			
 
				+			 * srpc_handle_rpc to drop this RPC */
			
 
				+			msg->msg_magic = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (!list_empty(&scd->scd_rpc_free)) {
			
 
				+			srpc = list_entry(scd->scd_rpc_free.next,
			
 
				+					      struct srpc_server_rpc,
			
 
				+					      srpc_list);
			
 
				+			list_del(&srpc->srpc_list);
			
 
				+
			
 
				+			srpc_init_server_rpc(srpc, scd, buffer);
			
 
				+			list_add_tail(&srpc->srpc_list,
			
 
				+					  &scd->scd_rpc_active);
			
 
				+			swi_schedule_workitem(&srpc->srpc_wi);
			
 
				+		} else {
			
 
				+			list_add_tail(&buffer->buf_list,
			
 
				+					  &scd->scd_buf_blocked);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+
			
 
				+		spin_lock(&srpc_data.rpc_glock);
			
 
				+		srpc_data.rpc_counters.rpcs_rcvd++;
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+		break;
			
 
				+
			
 
				+	case SRPC_BULK_GET_RPLD:
			
 
				+		LASSERT (ev->type == LNET_EVENT_SEND ||
			
 
				+			 ev->type == LNET_EVENT_REPLY ||
			
 
				+			 ev->type == LNET_EVENT_UNLINK);
			
 
				+
			
 
				+		if (!ev->unlinked)
			
 
				+			break; /* wait for final event */
			
 
				+
			
 
				+	case SRPC_BULK_PUT_SENT:
			
 
				+		if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
			
 
				+			spin_lock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+			if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
			
 
				+				srpc_data.rpc_counters.bulk_get += ev->mlength;
			
 
				+			else
			
 
				+				srpc_data.rpc_counters.bulk_put += ev->mlength;
			
 
				+
			
 
				+			spin_unlock(&srpc_data.rpc_glock);
			
 
				+		}
			
 
				+	case SRPC_REPLY_SENT:
			
 
				+		srpc = rpcev->ev_data;
			
 
				+		scd  = srpc->srpc_scd;
			
 
				+
			
 
				+		LASSERT(rpcev == &srpc->srpc_ev);
			
 
				+
			
 
				+		spin_lock(&scd->scd_lock);
			
 
				+
			
 
				+		rpcev->ev_fired  = 1;
			
 
				+		rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
			
 
				+				   -EINTR : ev->status;
			
 
				+		swi_schedule_workitem(&srpc->srpc_wi);
			
 
				+
			
 
				+		spin_unlock(&scd->scd_lock);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+srpc_startup (void)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
			
 
				+	spin_lock_init(&srpc_data.rpc_glock);
			
 
				+
			
 
				+	/* 1 second pause to avoid timestamp reuse */
			
 
				+	cfs_pause(cfs_time_seconds(1));
			
 
				+	srpc_data.rpc_matchbits = ((__u64) cfs_time_current_sec()) << 48;
			
 
				+
			
 
				+	srpc_data.rpc_state = SRPC_STATE_NONE;
			
 
				+
			
 
				+	rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
			
 
				+	if (rc < 0) {
			
 
				+		CERROR ("LNetNIInit() has failed: %d\n", rc);
			
 
				+		return rc;
			
 
				+	}
			
 
				+
			
 
				+	srpc_data.rpc_state = SRPC_STATE_NI_INIT;
			
 
				+
			
 
				+	LNetInvalidateHandle(&srpc_data.rpc_lnet_eq);
			
 
				+	rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
			
 
				+	if (rc != 0) {
			
 
				+		CERROR("LNetEQAlloc() has failed: %d\n", rc);
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				+	rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
			
 
				+	LASSERT(rc == 0);
			
 
				+	rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
			
 
				+	LASSERT(rc == 0);
			
 
				+
			
 
				+	srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
			
 
				+
			
 
				+	rc = stt_startup();
			
 
				+
			
 
				+bail:
			
 
				+	if (rc != 0)
			
 
				+		srpc_shutdown();
			
 
				+	else
			
 
				+		srpc_data.rpc_state = SRPC_STATE_RUNNING;
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+srpc_shutdown (void)
			
 
				+{
			
 
				+	int i;
			
 
				+	int rc;
			
 
				+	int state;
			
 
				+
			
 
				+	state = srpc_data.rpc_state;
			
 
				+	srpc_data.rpc_state = SRPC_STATE_STOPPING;
			
 
				+
			
 
				+	switch (state) {
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+	case SRPC_STATE_RUNNING:
			
 
				+		spin_lock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+		for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
			
 
				+			srpc_service_t *sv = srpc_data.rpc_services[i];
			
 
				+
			
 
				+			LASSERTF (sv == NULL,
			
 
				+				  "service not empty: id %d, name %s\n",
			
 
				+				  i, sv->sv_name);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&srpc_data.rpc_glock);
			
 
				+
			
 
				+		stt_shutdown();
			
 
				+
			
 
				+	case SRPC_STATE_EQ_INIT:
			
 
				+		rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
			
 
				+		rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
			
 
				+		LASSERT (rc == 0);
			
 
				+		rc = LNetEQFree(srpc_data.rpc_lnet_eq);
			
 
				+		LASSERT (rc == 0); /* the EQ should have no user by now */
			
 
				+
			
 
				+	case SRPC_STATE_NI_INIT:
			
 
				+		LNetNIFini();
			
 
				+	}
			
 
				+
			
 
				+	return;
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -0,0 +1,302 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __SELFTEST_RPC_H__
			
 
				+#define __SELFTEST_RPC_H__
			
 
				+
			
 
				+#include <linux/lnet/lnetst.h>
			
 
				+
			
 
				+/*
			
 
				+ * LST wired structures
			
 
				+ *
			
 
				+ * XXX: *REPLY == *REQST + 1
			
 
				+ */
			
 
				+typedef enum {
			
 
				+	SRPC_MSG_MKSN_REQST     = 0,
			
 
				+	SRPC_MSG_MKSN_REPLY     = 1,
			
 
				+	SRPC_MSG_RMSN_REQST     = 2,
			
 
				+	SRPC_MSG_RMSN_REPLY     = 3,
			
 
				+	SRPC_MSG_BATCH_REQST    = 4,
			
 
				+	SRPC_MSG_BATCH_REPLY    = 5,
			
 
				+	SRPC_MSG_STAT_REQST     = 6,
			
 
				+	SRPC_MSG_STAT_REPLY     = 7,
			
 
				+	SRPC_MSG_TEST_REQST     = 8,
			
 
				+	SRPC_MSG_TEST_REPLY     = 9,
			
 
				+	SRPC_MSG_DEBUG_REQST    = 10,
			
 
				+	SRPC_MSG_DEBUG_REPLY    = 11,
			
 
				+	SRPC_MSG_BRW_REQST      = 12,
			
 
				+	SRPC_MSG_BRW_REPLY      = 13,
			
 
				+	SRPC_MSG_PING_REQST     = 14,
			
 
				+	SRPC_MSG_PING_REPLY     = 15,
			
 
				+	SRPC_MSG_JOIN_REQST     = 16,
			
 
				+	SRPC_MSG_JOIN_REPLY     = 17,
			
 
				+} srpc_msg_type_t;
			
 
				+
			
 
				+
			
 
				+/* CAVEAT EMPTOR:
			
 
				+ * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
			
 
				+ * and 2nd field matchbits of bulk buffer if any.
			
 
				+ *
			
 
				+ * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
			
 
				+ * session id if needed.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+	__u64			rpyid;		/* reply buffer matchbits */
			
 
				+	__u64			bulkid;		/* bulk buffer matchbits */
			
 
				+} WIRE_ATTR srpc_generic_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   status;
			
 
				+	lst_sid_t	       sid;
			
 
				+} WIRE_ATTR srpc_generic_reply_t;
			
 
				+
			
 
				+/* FRAMEWORK RPCs */
			
 
				+typedef struct {
			
 
				+	__u64			mksn_rpyid;      /* reply buffer matchbits */
			
 
				+	lst_sid_t	       mksn_sid;	/* session id */
			
 
				+	__u32			mksn_force;      /* use brute force */
			
 
				+	char			mksn_name[LST_NAME_SIZE];
			
 
				+} WIRE_ATTR srpc_mksn_reqst_t;			/* make session request */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   mksn_status;      /* session status */
			
 
				+	lst_sid_t	       mksn_sid;	 /* session id */
			
 
				+	__u32		   mksn_timeout;     /* session timeout */
			
 
				+	char			mksn_name[LST_NAME_SIZE];
			
 
				+} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64			rmsn_rpyid;      /* reply buffer matchbits */
			
 
				+	lst_sid_t		rmsn_sid;	/* session id */
			
 
				+} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32			rmsn_status;
			
 
				+	lst_sid_t		rmsn_sid;	/* session id */
			
 
				+} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64			join_rpyid;     /* reply buffer matchbits */
			
 
				+	lst_sid_t	       join_sid;       /* session id to join */
			
 
				+	char		    join_group[LST_NAME_SIZE]; /* group name */
			
 
				+} WIRE_ATTR srpc_join_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   join_status;    /* returned status */
			
 
				+	lst_sid_t	       join_sid;       /* session id */
			
 
				+	__u32			join_timeout;   /* # seconds' inactivity to expire */
			
 
				+	char		    join_session[LST_NAME_SIZE]; /* session name */
			
 
				+} WIRE_ATTR srpc_join_reply_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64		   dbg_rpyid;      /* reply buffer matchbits */
			
 
				+	lst_sid_t	       dbg_sid;	/* session id */
			
 
				+	__u32		   dbg_flags;      /* bitmap of debug */
			
 
				+} WIRE_ATTR srpc_debug_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   dbg_status;     /* returned code */
			
 
				+	lst_sid_t	       dbg_sid;	/* session id */
			
 
				+	__u32		   dbg_timeout;    /* session timeout */
			
 
				+	__u32		   dbg_nbatch;     /* # of batches in the node */
			
 
				+	char		    dbg_name[LST_NAME_SIZE]; /* session name */
			
 
				+} WIRE_ATTR srpc_debug_reply_t;
			
 
				+
			
 
				+#define SRPC_BATCH_OPC_RUN      1
			
 
				+#define SRPC_BATCH_OPC_STOP     2
			
 
				+#define SRPC_BATCH_OPC_QUERY    3
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64		   bar_rpyid;      /* reply buffer matchbits */
			
 
				+	lst_sid_t	       bar_sid;	/* session id */
			
 
				+	lst_bid_t	       bar_bid;	/* batch id */
			
 
				+	__u32		   bar_opc;	/* create/start/stop batch */
			
 
				+	__u32		   bar_testidx;    /* index of test */
			
 
				+	__u32		   bar_arg;	/* parameters */
			
 
				+} WIRE_ATTR srpc_batch_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   bar_status;     /* status of request */
			
 
				+	lst_sid_t	       bar_sid;	/* session id */
			
 
				+	__u32		   bar_active;     /* # of active tests in batch/test */
			
 
				+	__u32		   bar_time;       /* remained time */
			
 
				+} WIRE_ATTR srpc_batch_reply_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64		   str_rpyid;      /* reply buffer matchbits */
			
 
				+	lst_sid_t	       str_sid;	/* session id */
			
 
				+	__u32		   str_type;       /* type of stat */
			
 
				+} WIRE_ATTR srpc_stat_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   str_status;
			
 
				+	lst_sid_t	       str_sid;
			
 
				+	sfw_counters_t	  str_fw;
			
 
				+	srpc_counters_t	 str_rpc;
			
 
				+	lnet_counters_t	 str_lnet;
			
 
				+} WIRE_ATTR srpc_stat_reply_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   blk_opc;	/* bulk operation code */
			
 
				+	__u32		   blk_npg;	/* # of pages */
			
 
				+	__u32		   blk_flags;      /* reserved flags */
			
 
				+} WIRE_ATTR test_bulk_req_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	/** bulk operation code */
			
 
				+	__u16			blk_opc;
			
 
				+	/** data check flags */
			
 
				+	__u16			blk_flags;
			
 
				+	/** data length */
			
 
				+	__u32			blk_len;
			
 
				+	/** reserved: offset */
			
 
				+	__u32		   blk_offset;
			
 
				+} WIRE_ATTR test_bulk_req_v1_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32			png_size;       /* size of ping message */
			
 
				+	__u32			png_flags;      /* reserved flags */
			
 
				+} WIRE_ATTR test_ping_req_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64			tsr_rpyid;      /* reply buffer matchbits */
			
 
				+	__u64			tsr_bulkid;     /* bulk buffer matchbits */
			
 
				+	lst_sid_t		tsr_sid;	/* session id */
			
 
				+	lst_bid_t		tsr_bid;	/* batch id */
			
 
				+	__u32			tsr_service;    /* test type: bulk|ping|... */
			
 
				+	/* test client loop count or # server buffers needed */
			
 
				+	__u32			tsr_loop;
			
 
				+	__u32			tsr_concur;     /* concurrency of test */
			
 
				+	__u8			tsr_is_client;  /* is test client or not */
			
 
				+	__u8			tsr_stop_onerr; /* stop on error */
			
 
				+	__u32			tsr_ndest;      /* # of dest nodes */
			
 
				+
			
 
				+	union {
			
 
				+		test_ping_req_t		ping;
			
 
				+		test_bulk_req_t		bulk_v0;
			
 
				+		test_bulk_req_v1_t	bulk_v1;
			
 
				+	}		tsr_u;
			
 
				+} WIRE_ATTR srpc_test_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32			tsr_status;     /* returned code */
			
 
				+	lst_sid_t		tsr_sid;
			
 
				+} WIRE_ATTR srpc_test_reply_t;
			
 
				+
			
 
				+/* TEST RPCs */
			
 
				+typedef struct {
			
 
				+	__u64		   pnr_rpyid;
			
 
				+	__u32		   pnr_magic;
			
 
				+	__u32		   pnr_seq;
			
 
				+	__u64		   pnr_time_sec;
			
 
				+	__u64		   pnr_time_usec;
			
 
				+} WIRE_ATTR srpc_ping_reqst_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   pnr_status;
			
 
				+	__u32		   pnr_magic;
			
 
				+	__u32		   pnr_seq;
			
 
				+} WIRE_ATTR srpc_ping_reply_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u64		   brw_rpyid;      /* reply buffer matchbits */
			
 
				+	__u64		   brw_bulkid;     /* bulk buffer matchbits */
			
 
				+	__u32		   brw_rw;	 /* read or write */
			
 
				+	__u32		   brw_len;	/* bulk data len */
			
 
				+	__u32		   brw_flags;      /* bulk data patterns */
			
 
				+} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */
			
 
				+
			
 
				+typedef struct {
			
 
				+	__u32		   brw_status;
			
 
				+} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */
			
 
				+
			
 
				+#define SRPC_MSG_MAGIC		  0xeeb0f00d
			
 
				+#define SRPC_MSG_VERSION		1
			
 
				+
			
 
				+typedef struct srpc_msg {
			
 
				+	/** magic number */
			
 
				+	__u32	msg_magic;
			
 
				+	/** message version number */
			
 
				+	__u32	msg_version;
			
 
				+	/** type of message body: srpc_msg_type_t */
			
 
				+	__u32	msg_type;
			
 
				+	__u32	msg_reserved0;
			
 
				+	__u32	msg_reserved1;
			
 
				+	/** test session features */
			
 
				+	__u32	msg_ses_feats;
			
 
				+	union {
			
 
				+		srpc_generic_reqst_t reqst;
			
 
				+		srpc_generic_reply_t reply;
			
 
				+
			
 
				+		srpc_mksn_reqst_t    mksn_reqst;
			
 
				+		srpc_mksn_reply_t    mksn_reply;
			
 
				+		srpc_rmsn_reqst_t    rmsn_reqst;
			
 
				+		srpc_rmsn_reply_t    rmsn_reply;
			
 
				+		srpc_debug_reqst_t   dbg_reqst;
			
 
				+		srpc_debug_reply_t   dbg_reply;
			
 
				+		srpc_batch_reqst_t   bat_reqst;
			
 
				+		srpc_batch_reply_t   bat_reply;
			
 
				+		srpc_stat_reqst_t    stat_reqst;
			
 
				+		srpc_stat_reply_t    stat_reply;
			
 
				+		srpc_test_reqst_t    tes_reqst;
			
 
				+		srpc_test_reply_t    tes_reply;
			
 
				+		srpc_join_reqst_t    join_reqst;
			
 
				+		srpc_join_reply_t    join_reply;
			
 
				+
			
 
				+		srpc_ping_reqst_t    ping_reqst;
			
 
				+		srpc_ping_reply_t    ping_reply;
			
 
				+		srpc_brw_reqst_t     brw_reqst;
			
 
				+		srpc_brw_reply_t     brw_reply;
			
 
				+	}     msg_body;
			
 
				+} WIRE_ATTR srpc_msg_t;
			
 
				+
			
 
				+static inline void
			
 
				+srpc_unpack_msg_hdr(srpc_msg_t *msg)
			
 
				+{
			
 
				+	if (msg->msg_magic == SRPC_MSG_MAGIC)
			
 
				+		return; /* no flipping needed */
			
 
				+
			
 
				+	/* We do not swap the magic number here as it is needed to
			
 
				+	   determine whether the body needs to be swapped. */
			
 
				+	/* __swab32s(&msg->msg_magic); */
			
 
				+	__swab32s(&msg->msg_type);
			
 
				+	__swab32s(&msg->msg_version);
			
 
				+	__swab32s(&msg->msg_ses_feats);
			
 
				+	__swab32s(&msg->msg_reserved0);
			
 
				+	__swab32s(&msg->msg_reserved1);
			
 
				+}
			
 
				+
			
 
				+#endif /* __SELFTEST_RPC_H__ */
			
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -0,0 +1,611 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ * copy of GPLv2].
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/selftest.h
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ */
			
 
				+#ifndef __SELFTEST_SELFTEST_H__
			
 
				+#define __SELFTEST_SELFTEST_H__
			
 
				+
			
 
				+#define LNET_ONLY
			
 
				+
			
 
				+#include <linux/libcfs/libcfs.h>
			
 
				+#include <linux/lnet/lnet.h>
			
 
				+#include <linux/lnet/lib-lnet.h>
			
 
				+#include <linux/lnet/lib-types.h>
			
 
				+#include <linux/lnet/lnetst.h>
			
 
				+
			
 
				+#include "rpc.h"
			
 
				+#include "timer.h"
			
 
				+
			
 
				+#ifndef MADE_WITHOUT_COMPROMISE
			
 
				+#define MADE_WITHOUT_COMPROMISE
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#define SWI_STATE_NEWBORN		  0
			
 
				+#define SWI_STATE_REPLY_SUBMITTED	  1
			
 
				+#define SWI_STATE_REPLY_SENT	       2
			
 
				+#define SWI_STATE_REQUEST_SUBMITTED	3
			
 
				+#define SWI_STATE_REQUEST_SENT	     4
			
 
				+#define SWI_STATE_REPLY_RECEIVED	   5
			
 
				+#define SWI_STATE_BULK_STARTED	     6
			
 
				+#define SWI_STATE_DONE		     10
			
 
				+
			
 
				+/* forward refs */
			
 
				+struct srpc_service;
			
 
				+struct srpc_service_cd;
			
 
				+struct sfw_test_unit;
			
 
				+struct sfw_test_instance;
			
 
				+
			
 
				+/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
			
 
				+ * services, e.g. create/modify session.
			
 
				+ */
			
 
				+#define SRPC_SERVICE_DEBUG	      0
			
 
				+#define SRPC_SERVICE_MAKE_SESSION       1
			
 
				+#define SRPC_SERVICE_REMOVE_SESSION     2
			
 
				+#define SRPC_SERVICE_BATCH	      3
			
 
				+#define SRPC_SERVICE_TEST	       4
			
 
				+#define SRPC_SERVICE_QUERY_STAT	 5
			
 
				+#define SRPC_SERVICE_JOIN	       6
			
 
				+#define SRPC_FRAMEWORK_SERVICE_MAX_ID   10
			
 
				+/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
			
 
				+#define SRPC_SERVICE_BRW		11
			
 
				+#define SRPC_SERVICE_PING	       12
			
 
				+#define SRPC_SERVICE_MAX_ID	     12
			
 
				+
			
 
				+#define SRPC_REQUEST_PORTAL	     50
			
 
				+/* a lazy portal for framework RPC requests */
			
 
				+#define SRPC_FRAMEWORK_REQUEST_PORTAL   51
			
 
				+/* all reply/bulk RDMAs go to this portal */
			
 
				+#define SRPC_RDMA_PORTAL		52
			
 
				+
			
 
				+static inline srpc_msg_type_t
			
 
				+srpc_service2request (int service)
			
 
				+{
			
 
				+	switch (service) {
			
 
				+	default:
			
 
				+		LBUG ();
			
 
				+	case SRPC_SERVICE_DEBUG:
			
 
				+		return SRPC_MSG_DEBUG_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_MAKE_SESSION:
			
 
				+		return SRPC_MSG_MKSN_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_REMOVE_SESSION:
			
 
				+		return SRPC_MSG_RMSN_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_BATCH:
			
 
				+		return SRPC_MSG_BATCH_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_TEST:
			
 
				+		return SRPC_MSG_TEST_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_QUERY_STAT:
			
 
				+		return SRPC_MSG_STAT_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_BRW:
			
 
				+		return SRPC_MSG_BRW_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_PING:
			
 
				+		return SRPC_MSG_PING_REQST;
			
 
				+
			
 
				+	case SRPC_SERVICE_JOIN:
			
 
				+		return SRPC_MSG_JOIN_REQST;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline srpc_msg_type_t
			
 
				+srpc_service2reply (int service)
			
 
				+{
			
 
				+	return srpc_service2request(service) + 1;
			
 
				+}
			
 
				+
			
 
				+typedef enum {
			
 
				+	SRPC_BULK_REQ_RCVD   = 1, /* passive bulk request(PUT sink/GET source) received */
			
 
				+	SRPC_BULK_PUT_SENT   = 2, /* active bulk PUT sent (source) */
			
 
				+	SRPC_BULK_GET_RPLD   = 3, /* active bulk GET replied (sink) */
			
 
				+	SRPC_REPLY_RCVD      = 4, /* incoming reply received */
			
 
				+	SRPC_REPLY_SENT      = 5, /* outgoing reply sent */
			
 
				+	SRPC_REQUEST_RCVD    = 6, /* incoming request received */
			
 
				+	SRPC_REQUEST_SENT    = 7, /* outgoing request sent */
			
 
				+} srpc_event_type_t;
			
 
				+
			
 
				+/* RPC event */
			
 
				+typedef struct {
			
 
				+	srpc_event_type_t ev_type;   /* what's up */
			
 
				+	lnet_event_kind_t ev_lnet;   /* LNet event type */
			
 
				+	int	       ev_fired;  /* LNet event fired? */
			
 
				+	int	       ev_status; /* LNet event status */
			
 
				+	void	     *ev_data;   /* owning server/client RPC */
			
 
				+} srpc_event_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int	      bk_len;  /* len of bulk data */
			
 
				+	lnet_handle_md_t bk_mdh;
			
 
				+	int	      bk_sink; /* sink/source */
			
 
				+	int	      bk_niov; /* # iov in bk_iovs */
			
 
				+	lnet_kiov_t      bk_iovs[0];
			
 
				+} srpc_bulk_t; /* bulk descriptor */
			
 
				+
			
 
				+/* message buffer descriptor */
			
 
				+typedef struct srpc_buffer {
			
 
				+	struct list_head	   buf_list; /* chain on srpc_service::*_msgq */
			
 
				+	srpc_msg_t	   buf_msg;
			
 
				+	lnet_handle_md_t     buf_mdh;
			
 
				+	lnet_nid_t	   buf_self;
			
 
				+	lnet_process_id_t    buf_peer;
			
 
				+} srpc_buffer_t;
			
 
				+
			
 
				+struct swi_workitem;
			
 
				+typedef int (*swi_action_t) (struct swi_workitem *);
			
 
				+
			
 
				+typedef struct swi_workitem {
			
 
				+	struct cfs_wi_sched	*swi_sched;
			
 
				+	cfs_workitem_t       swi_workitem;
			
 
				+	swi_action_t	 swi_action;
			
 
				+	int		  swi_state;
			
 
				+} swi_workitem_t;
			
 
				+
			
 
				+/* server-side state of a RPC */
			
 
				+typedef struct srpc_server_rpc {
			
 
				+	/* chain on srpc_service::*_rpcq */
			
 
				+	struct list_head		srpc_list;
			
 
				+	struct srpc_service_cd *srpc_scd;
			
 
				+	swi_workitem_t       srpc_wi;
			
 
				+	srpc_event_t	 srpc_ev;      /* bulk/reply event */
			
 
				+	lnet_nid_t	   srpc_self;
			
 
				+	lnet_process_id_t    srpc_peer;
			
 
				+	srpc_msg_t	   srpc_replymsg;
			
 
				+	lnet_handle_md_t     srpc_replymdh;
			
 
				+	srpc_buffer_t       *srpc_reqstbuf;
			
 
				+	srpc_bulk_t	 *srpc_bulk;
			
 
				+
			
 
				+	unsigned int	 srpc_aborted; /* being given up */
			
 
				+	int		  srpc_status;
			
 
				+	void	       (*srpc_done)(struct srpc_server_rpc *);
			
 
				+} srpc_server_rpc_t;
			
 
				+
			
 
				+/* client-side state of a RPC */
			
 
				+typedef struct srpc_client_rpc {
			
 
				+	struct list_head		crpc_list;	/* chain on user's lists */
			
 
				+	spinlock_t		crpc_lock;	/* serialize */
			
 
				+	int		  crpc_service;
			
 
				+	atomic_t	 crpc_refcount;
			
 
				+	int		  crpc_timeout; /* # seconds to wait for reply */
			
 
				+	stt_timer_t	  crpc_timer;
			
 
				+	swi_workitem_t       crpc_wi;
			
 
				+	lnet_process_id_t    crpc_dest;
			
 
				+
			
 
				+	void	       (*crpc_done)(struct srpc_client_rpc *);
			
 
				+	void	       (*crpc_fini)(struct srpc_client_rpc *);
			
 
				+	int		  crpc_status;    /* completion status */
			
 
				+	void		*crpc_priv;      /* caller data */
			
 
				+
			
 
				+	/* state flags */
			
 
				+	unsigned int	 crpc_aborted:1; /* being given up */
			
 
				+	unsigned int	 crpc_closed:1;  /* completed */
			
 
				+
			
 
				+	/* RPC events */
			
 
				+	srpc_event_t	 crpc_bulkev;    /* bulk event */
			
 
				+	srpc_event_t	 crpc_reqstev;   /* request event */
			
 
				+	srpc_event_t	 crpc_replyev;   /* reply event */
			
 
				+
			
 
				+	/* bulk, request(reqst), and reply exchanged on wire */
			
 
				+	srpc_msg_t	   crpc_reqstmsg;
			
 
				+	srpc_msg_t	   crpc_replymsg;
			
 
				+	lnet_handle_md_t     crpc_reqstmdh;
			
 
				+	lnet_handle_md_t     crpc_replymdh;
			
 
				+	srpc_bulk_t	  crpc_bulk;
			
 
				+} srpc_client_rpc_t;
			
 
				+
			
 
				+#define srpc_client_rpc_size(rpc)				       \
			
 
				+offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
			
 
				+
			
 
				+#define srpc_client_rpc_addref(rpc)				     \
			
 
				+do {								    \
			
 
				+	CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n",			 \
			
 
				+	       (rpc), libcfs_id2str((rpc)->crpc_dest),		  \
			
 
				+	       atomic_read(&(rpc)->crpc_refcount));		 \
			
 
				+	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);	    \
			
 
				+	atomic_inc(&(rpc)->crpc_refcount);			  \
			
 
				+} while (0)
			
 
				+
			
 
				+#define srpc_client_rpc_decref(rpc)				     \
			
 
				+do {								    \
			
 
				+	CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n",			 \
			
 
				+	       (rpc), libcfs_id2str((rpc)->crpc_dest),		  \
			
 
				+	       atomic_read(&(rpc)->crpc_refcount));		 \
			
 
				+	LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0);	    \
			
 
				+	if (atomic_dec_and_test(&(rpc)->crpc_refcount))	     \
			
 
				+		srpc_destroy_client_rpc(rpc);			   \
			
 
				+} while (0)
			
 
				+
			
 
				+#define srpc_event_pending(rpc)   ((rpc)->crpc_bulkev.ev_fired == 0 ||  \
			
 
				+				   (rpc)->crpc_reqstev.ev_fired == 0 || \
			
 
				+				   (rpc)->crpc_replyev.ev_fired == 0)
			
 
				+
			
 
				+/* CPU partition data of srpc service */
			
 
				+struct srpc_service_cd {
			
 
				+	/** serialize */
			
 
				+	spinlock_t		scd_lock;
			
 
				+	/** backref to service */
			
 
				+	struct srpc_service	*scd_svc;
			
 
				+	/** event buffer */
			
 
				+	srpc_event_t		scd_ev;
			
 
				+	/** free RPC descriptors */
			
 
				+	struct list_head		scd_rpc_free;
			
 
				+	/** in-flight RPCs */
			
 
				+	struct list_head		scd_rpc_active;
			
 
				+	/** workitem for posting buffer */
			
 
				+	swi_workitem_t		scd_buf_wi;
			
 
				+	/** CPT id */
			
 
				+	int			scd_cpt;
			
 
				+	/** error code for scd_buf_wi */
			
 
				+	int			scd_buf_err;
			
 
				+	/** timestamp for scd_buf_err */
			
 
				+	unsigned long	   scd_buf_err_stamp;
			
 
				+	/** total # request buffers */
			
 
				+	int			scd_buf_total;
			
 
				+	/** # posted request buffers */
			
 
				+	int			scd_buf_nposted;
			
 
				+	/** in progress of buffer posting */
			
 
				+	int			scd_buf_posting;
			
 
				+	/** allocate more buffers if scd_buf_nposted < scd_buf_low */
			
 
				+	int			scd_buf_low;
			
 
				+	/** increase/decrease some buffers */
			
 
				+	int			scd_buf_adjust;
			
 
				+	/** posted message buffers */
			
 
				+	struct list_head		scd_buf_posted;
			
 
				+	/** blocked for RPC descriptor */
			
 
				+	struct list_head		scd_buf_blocked;
			
 
				+};
			
 
				+
			
 
				+/* number of server workitems (mini-thread) for testing service */
			
 
				+#define SFW_TEST_WI_MIN		256
			
 
				+#define SFW_TEST_WI_MAX		2048
			
 
				+/* extra buffers for tolerating buggy peers, or unbalanced number
			
 
				+ * of peers between partitions  */
			
 
				+#define SFW_TEST_WI_EXTRA	64
			
 
				+
			
 
				+/* number of server workitems (mini-thread) for framework service */
			
 
				+#define SFW_FRWK_WI_MIN		16
			
 
				+#define SFW_FRWK_WI_MAX		256
			
 
				+
			
 
				+typedef struct srpc_service {
			
 
				+	int			sv_id;		/* service id */
			
 
				+	const char		*sv_name;	/* human readable name */
			
 
				+	int			sv_wi_total;	/* total server workitems */
			
 
				+	int			sv_shuttingdown;
			
 
				+	int			sv_ncpts;
			
 
				+	/* percpt data for srpc_service */
			
 
				+	struct srpc_service_cd	**sv_cpt_data;
			
 
				+	/* Service callbacks:
			
 
				+	 * - sv_handler: process incoming RPC request
			
 
				+	 * - sv_bulk_ready: notify bulk data
			
 
				+	 */
			
 
				+	int	      (*sv_handler) (srpc_server_rpc_t *);
			
 
				+	int	      (*sv_bulk_ready) (srpc_server_rpc_t *, int);
			
 
				+} srpc_service_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	sn_list;    /* chain on fw_zombie_sessions */
			
 
				+	lst_sid_t	 sn_id;      /* unique identifier */
			
 
				+	unsigned int      sn_timeout; /* # seconds' inactivity to expire */
			
 
				+	int	       sn_timer_active;
			
 
				+	unsigned int	  sn_features;
			
 
				+	stt_timer_t       sn_timer;
			
 
				+	struct list_head	sn_batches; /* list of batches */
			
 
				+	char	      sn_name[LST_NAME_SIZE];
			
 
				+	atomic_t      sn_refcount;
			
 
				+	atomic_t      sn_brw_errors;
			
 
				+	atomic_t      sn_ping_errors;
			
 
				+	cfs_time_t	sn_started;
			
 
				+} sfw_session_t;
			
 
				+
			
 
				+#define sfw_sid_equal(sid0, sid1)     ((sid0).ses_nid == (sid1).ses_nid && \
			
 
				+				       (sid0).ses_stamp == (sid1).ses_stamp)
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	bat_list;      /* chain on sn_batches */
			
 
				+	lst_bid_t	 bat_id;	/* batch id */
			
 
				+	int	       bat_error;     /* error code of batch */
			
 
				+	sfw_session_t    *bat_session;   /* batch's session */
			
 
				+	atomic_t      bat_nactive;   /* # of active tests */
			
 
				+	struct list_head	bat_tests;     /* test instances */
			
 
				+} sfw_batch_t;
			
 
				+
			
 
				+typedef struct {
			
 
				+	int  (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
			
 
				+	void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
			
 
				+	int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,
			
 
				+			     lnet_process_id_t dest,
			
 
				+			     srpc_client_rpc_t **rpc);   /* prep a tests rpc */
			
 
				+	void (*tso_done_rpc)(struct sfw_test_unit *tsu,
			
 
				+			     srpc_client_rpc_t *rpc);    /* done a test rpc */
			
 
				+} sfw_test_client_ops_t;
			
 
				+
			
 
				+typedef struct sfw_test_instance {
			
 
				+	struct list_head	      tsi_list;	 /* chain on batch */
			
 
				+	int		     tsi_service;      /* test type */
			
 
				+	sfw_batch_t	    *tsi_batch;	/* batch */
			
 
				+	sfw_test_client_ops_t  *tsi_ops;	  /* test client operations */
			
 
				+
			
 
				+	/* public parameter for all test units */
			
 
				+	unsigned int		tsi_is_client:1;     /* is test client */
			
 
				+	unsigned int		tsi_stoptsu_onerr:1; /* stop tsu on error */
			
 
				+	int		     tsi_concur;	  /* concurrency */
			
 
				+	int		     tsi_loop;	    /* loop count */
			
 
				+
			
 
				+	/* status of test instance */
			
 
				+	spinlock_t		tsi_lock;	  /* serialize */
			
 
				+	unsigned int		tsi_stopping:1;   /* test is stopping */
			
 
				+	atomic_t	    tsi_nactive;      /* # of active test unit */
			
 
				+	struct list_head	      tsi_units;	/* test units */
			
 
				+	struct list_head	      tsi_free_rpcs;    /* free rpcs */
			
 
				+	struct list_head	      tsi_active_rpcs;  /* active rpcs */
			
 
				+
			
 
				+	union {
			
 
				+		test_ping_req_t		ping;	  /* ping parameter */
			
 
				+		test_bulk_req_t		bulk_v0;  /* bulk parameter */
			
 
				+		test_bulk_req_v1_t	bulk_v1;  /* bulk v1 parameter */
			
 
				+	} tsi_u;
			
 
				+} sfw_test_instance_t;
			
 
				+
			
 
				+/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
			
 
				+ * the end of pages are not used */
			
 
				+#define SFW_MAX_CONCUR     LST_MAX_CONCUR
			
 
				+#define SFW_ID_PER_PAGE    (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t))
			
 
				+#define SFW_MAX_NDESTS     (LNET_MAX_IOV * SFW_ID_PER_PAGE)
			
 
				+#define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
			
 
				+
			
 
				+typedef struct sfw_test_unit {
			
 
				+	struct list_head	    tsu_list;	 /* chain on lst_test_instance */
			
 
				+	lnet_process_id_t     tsu_dest;	 /* id of dest node */
			
 
				+	int		   tsu_loop;	 /* loop count of the test */
			
 
				+	sfw_test_instance_t  *tsu_instance;     /* pointer to test instance */
			
 
				+	void		 *tsu_private;      /* private data */
			
 
				+	swi_workitem_t	tsu_worker;       /* workitem of the test unit */
			
 
				+} sfw_test_unit_t;
			
 
				+
			
 
				+typedef struct sfw_test_case {
			
 
				+	struct list_head	      tsc_list;	 /* chain on fw_tests */
			
 
				+	srpc_service_t	 *tsc_srv_service;  /* test service */
			
 
				+	sfw_test_client_ops_t  *tsc_cli_ops;      /* ops of test client */
			
 
				+} sfw_test_case_t;
			
 
				+
			
 
				+srpc_client_rpc_t *
			
 
				+sfw_create_rpc(lnet_process_id_t peer, int service,
			
 
				+	       unsigned features, int nbulkiov, int bulklen,
			
 
				+	       void (*done) (srpc_client_rpc_t *), void *priv);
			
 
				+int sfw_create_test_rpc(sfw_test_unit_t *tsu,
			
 
				+			lnet_process_id_t peer, unsigned features,
			
 
				+			int nblk, int blklen, srpc_client_rpc_t **rpc);
			
 
				+void sfw_abort_rpc(srpc_client_rpc_t *rpc);
			
 
				+void sfw_post_rpc(srpc_client_rpc_t *rpc);
			
 
				+void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
			
 
				+void sfw_unpack_message(srpc_msg_t *msg);
			
 
				+void sfw_free_pages(srpc_server_rpc_t *rpc);
			
 
				+void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i);
			
 
				+int sfw_alloc_pages(srpc_server_rpc_t *rpc, int cpt, int npages, int len,
			
 
				+		    int sink);
			
 
				+int sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply);
			
 
				+
			
 
				+srpc_client_rpc_t *
			
 
				+srpc_create_client_rpc(lnet_process_id_t peer, int service,
			
 
				+		       int nbulkiov, int bulklen,
			
 
				+		       void (*rpc_done)(srpc_client_rpc_t *),
			
 
				+		       void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
			
 
				+void srpc_post_rpc(srpc_client_rpc_t *rpc);
			
 
				+void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
			
 
				+void srpc_free_bulk(srpc_bulk_t *bk);
			
 
				+srpc_bulk_t *srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len,
			
 
				+			     int sink);
			
 
				+int srpc_send_rpc(swi_workitem_t *wi);
			
 
				+int srpc_send_reply(srpc_server_rpc_t *rpc);
			
 
				+int srpc_add_service(srpc_service_t *sv);
			
 
				+int srpc_remove_service(srpc_service_t *sv);
			
 
				+void srpc_shutdown_service(srpc_service_t *sv);
			
 
				+void srpc_abort_service(srpc_service_t *sv);
			
 
				+int srpc_finish_service(srpc_service_t *sv);
			
 
				+int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
			
 
				+void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
			
 
				+void srpc_get_counters(srpc_counters_t *cnt);
			
 
				+void srpc_set_counters(const srpc_counters_t *cnt);
			
 
				+
			
 
				+extern struct cfs_wi_sched *lst_sched_serial;
			
 
				+extern struct cfs_wi_sched **lst_sched_test;
			
 
				+
			
 
				+static inline int
			
 
				+srpc_serv_is_framework(struct srpc_service *svc)
			
 
				+{
			
 
				+	return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+swi_wi_action(cfs_workitem_t *wi)
			
 
				+{
			
 
				+	swi_workitem_t *swi = container_of(wi, swi_workitem_t, swi_workitem);
			
 
				+
			
 
				+	return swi->swi_action(swi);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+swi_init_workitem(swi_workitem_t *swi, void *data,
			
 
				+		  swi_action_t action, struct cfs_wi_sched *sched)
			
 
				+{
			
 
				+	swi->swi_sched  = sched;
			
 
				+	swi->swi_action = action;
			
 
				+	swi->swi_state  = SWI_STATE_NEWBORN;
			
 
				+	cfs_wi_init(&swi->swi_workitem, data, swi_wi_action);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+swi_schedule_workitem(swi_workitem_t *wi)
			
 
				+{
			
 
				+	cfs_wi_schedule(wi->swi_sched, &wi->swi_workitem);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+swi_exit_workitem(swi_workitem_t *swi)
			
 
				+{
			
 
				+	cfs_wi_exit(swi->swi_sched, &swi->swi_workitem);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+swi_deschedule_workitem(swi_workitem_t *swi)
			
 
				+{
			
 
				+	return cfs_wi_deschedule(swi->swi_sched, &swi->swi_workitem);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int sfw_startup(void);
			
 
				+int srpc_startup(void);
			
 
				+void sfw_shutdown(void);
			
 
				+void srpc_shutdown(void);
			
 
				+
			
 
				+static inline void
			
 
				+srpc_destroy_client_rpc (srpc_client_rpc_t *rpc)
			
 
				+{
			
 
				+	LASSERT (rpc != NULL);
			
 
				+	LASSERT (!srpc_event_pending(rpc));
			
 
				+	LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
			
 
				+
			
 
				+	if (rpc->crpc_fini == NULL) {
			
 
				+		LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
			
 
				+	} else {
			
 
				+		(*rpc->crpc_fini) (rpc);
			
 
				+	}
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
			
 
				+		      int service, int nbulkiov, int bulklen,
			
 
				+		      void (*rpc_done)(srpc_client_rpc_t *),
			
 
				+		      void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
			
 
				+{
			
 
				+	LASSERT (nbulkiov <= LNET_MAX_IOV);
			
 
				+
			
 
				+	memset(rpc, 0, offsetof(srpc_client_rpc_t,
			
 
				+				crpc_bulk.bk_iovs[nbulkiov]));
			
 
				+
			
 
				+	INIT_LIST_HEAD(&rpc->crpc_list);
			
 
				+	swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
			
 
				+			  lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
			
 
				+	spin_lock_init(&rpc->crpc_lock);
			
 
				+	atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
			
 
				+
			
 
				+	rpc->crpc_dest	 = peer;
			
 
				+	rpc->crpc_priv	 = priv;
			
 
				+	rpc->crpc_service      = service;
			
 
				+	rpc->crpc_bulk.bk_len  = bulklen;
			
 
				+	rpc->crpc_bulk.bk_niov = nbulkiov;
			
 
				+	rpc->crpc_done	 = rpc_done;
			
 
				+	rpc->crpc_fini	 = rpc_fini;
			
 
				+	LNetInvalidateHandle(&rpc->crpc_reqstmdh);
			
 
				+	LNetInvalidateHandle(&rpc->crpc_replymdh);
			
 
				+	LNetInvalidateHandle(&rpc->crpc_bulk.bk_mdh);
			
 
				+
			
 
				+	/* no event is expected at this point */
			
 
				+	rpc->crpc_bulkev.ev_fired  =
			
 
				+	rpc->crpc_reqstev.ev_fired =
			
 
				+	rpc->crpc_replyev.ev_fired = 1;
			
 
				+
			
 
				+	rpc->crpc_reqstmsg.msg_magic   = SRPC_MSG_MAGIC;
			
 
				+	rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
			
 
				+	rpc->crpc_reqstmsg.msg_type    = srpc_service2request(service);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static inline const char *
			
 
				+swi_state2str (int state)
			
 
				+{
			
 
				+#define STATE2STR(x) case x: return #x
			
 
				+	switch(state) {
			
 
				+		default:
			
 
				+			LBUG();
			
 
				+		STATE2STR(SWI_STATE_NEWBORN);
			
 
				+		STATE2STR(SWI_STATE_REPLY_SUBMITTED);
			
 
				+		STATE2STR(SWI_STATE_REPLY_SENT);
			
 
				+		STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
			
 
				+		STATE2STR(SWI_STATE_REQUEST_SENT);
			
 
				+		STATE2STR(SWI_STATE_REPLY_RECEIVED);
			
 
				+		STATE2STR(SWI_STATE_BULK_STARTED);
			
 
				+		STATE2STR(SWI_STATE_DONE);
			
 
				+	}
			
 
				+#undef STATE2STR
			
 
				+}
			
 
				+
			
 
				+#define UNUSED(x)       ( (void)(x) )
			
 
				+
			
 
				+
			
 
				+#define selftest_wait_events()	cfs_pause(cfs_time_seconds(1) / 10)
			
 
				+
			
 
				+
			
 
				+#define lst_wait_until(cond, lock, fmt, ...)				\
			
 
				+do {									\
			
 
				+	int __I = 2;							\
			
 
				+	while (!(cond)) {						\
			
 
				+		CDEBUG(IS_PO2(++__I) ? D_WARNING : D_NET,		\
			
 
				+		       fmt, ## __VA_ARGS__);				\
			
 
				+		spin_unlock(&(lock));					\
			
 
				+									\
			
 
				+		selftest_wait_events();					\
			
 
				+									\
			
 
				+		spin_lock(&(lock));					\
			
 
				+	}								\
			
 
				+} while (0)
			
 
				+
			
 
				+static inline void
			
 
				+srpc_wait_service_shutdown(srpc_service_t *sv)
			
 
				+{
			
 
				+	int i = 2;
			
 
				+
			
 
				+	LASSERT(sv->sv_shuttingdown);
			
 
				+
			
 
				+	while (srpc_finish_service(sv) == 0) {
			
 
				+		i++;
			
 
				+		CDEBUG (((i & -i) == i) ? D_WARNING : D_NET,
			
 
				+			"Waiting for %s service to shutdown...\n",
			
 
				+			sv->sv_name);
			
 
				+		selftest_wait_events();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif /* __SELFTEST_SELFTEST_H__ */
			
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -0,0 +1,253 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ *
			
 
				+ * Copyright (c) 2011, 2012, Intel Corporation.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/timer.c
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ */
			
 
				+
			
 
				+#define DEBUG_SUBSYSTEM S_LNET
			
 
				+
			
 
				+#include "selftest.h"
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Timers are implemented as a sorted queue of expiry times. The queue
			
 
				+ * is slotted, with each slot holding timers which expire in a
			
 
				+ * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
			
 
				+ * sorted by increasing expiry time. The number of slots is 2**7 (128),
			
 
				+ * to cover a time period of 1024 seconds into the future before wrapping.
			
 
				+ */
			
 
				+#define STTIMER_MINPOLL	3   /* log2 min poll interval (8 s) */
			
 
				+#define STTIMER_SLOTTIME       (1 << STTIMER_MINPOLL)
			
 
				+#define STTIMER_SLOTTIMEMASK   (~(STTIMER_SLOTTIME - 1))
			
 
				+#define STTIMER_NSLOTS	       (1 << 7)
			
 
				+#define STTIMER_SLOT(t)	       (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
			
 
				+						    (STTIMER_NSLOTS - 1))])
			
 
				+
			
 
				+struct st_timer_data {
			
 
				+	spinlock_t	 stt_lock;
			
 
				+	/* start time of the slot processed previously */
			
 
				+	cfs_time_t       stt_prev_slot;
			
 
				+	struct list_head       stt_hash[STTIMER_NSLOTS];
			
 
				+	int	      stt_shuttingdown;
			
 
				+	wait_queue_head_t      stt_waitq;
			
 
				+	int	      stt_nthreads;
			
 
				+} stt_data;
			
 
				+
			
 
				+void
			
 
				+stt_add_timer(stt_timer_t *timer)
			
 
				+{
			
 
				+	struct list_head *pos;
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+
			
 
				+	LASSERT (stt_data.stt_nthreads > 0);
			
 
				+	LASSERT (!stt_data.stt_shuttingdown);
			
 
				+	LASSERT (timer->stt_func != NULL);
			
 
				+	LASSERT (list_empty(&timer->stt_list));
			
 
				+	LASSERT (cfs_time_after(timer->stt_expires, cfs_time_current_sec()));
			
 
				+
			
 
				+	/* a simple insertion sort */
			
 
				+	list_for_each_prev (pos, STTIMER_SLOT(timer->stt_expires)) {
			
 
				+		stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list);
			
 
				+
			
 
				+		if (cfs_time_aftereq(timer->stt_expires, old->stt_expires))
			
 
				+			break;
			
 
				+	}
			
 
				+	list_add(&timer->stt_list, pos);
			
 
				+
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The function returns whether it has deactivated a pending timer or not.
			
 
				+ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
			
 
				+ * active timer returns 1.)
			
 
				+ *
			
 
				+ * CAVEAT EMPTOR:
			
 
				+ * When 0 is returned, it is possible that timer->stt_func _is_ running on
			
 
				+ * another CPU.
			
 
				+ */
			
 
				+int
			
 
				+stt_del_timer (stt_timer_t *timer)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+
			
 
				+	LASSERT (stt_data.stt_nthreads > 0);
			
 
				+	LASSERT (!stt_data.stt_shuttingdown);
			
 
				+
			
 
				+	if (!list_empty(&timer->stt_list)) {
			
 
				+		ret = 1;
			
 
				+		list_del_init(&timer->stt_list);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* called with stt_data.stt_lock held */
			
 
				+int
			
 
				+stt_expire_list (struct list_head *slot, cfs_time_t now)
			
 
				+{
			
 
				+	int	  expired = 0;
			
 
				+	stt_timer_t *timer;
			
 
				+
			
 
				+	while (!list_empty(slot)) {
			
 
				+		timer = list_entry(slot->next, stt_timer_t, stt_list);
			
 
				+
			
 
				+		if (cfs_time_after(timer->stt_expires, now))
			
 
				+			break;
			
 
				+
			
 
				+		list_del_init(&timer->stt_list);
			
 
				+		spin_unlock(&stt_data.stt_lock);
			
 
				+
			
 
				+		expired++;
			
 
				+		(*timer->stt_func) (timer->stt_data);
			
 
				+
			
 
				+		spin_lock(&stt_data.stt_lock);
			
 
				+	}
			
 
				+
			
 
				+	return expired;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+stt_check_timers (cfs_time_t *last)
			
 
				+{
			
 
				+	int	expired = 0;
			
 
				+	cfs_time_t now;
			
 
				+	cfs_time_t this_slot;
			
 
				+
			
 
				+	now = cfs_time_current_sec();
			
 
				+	this_slot = now & STTIMER_SLOTTIMEMASK;
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+
			
 
				+	while (cfs_time_aftereq(this_slot, *last)) {
			
 
				+		expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
			
 
				+		this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME);
			
 
				+	}
			
 
				+
			
 
				+	*last = now & STTIMER_SLOTTIMEMASK;
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+	return expired;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+stt_timer_main (void *arg)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+	UNUSED(arg);
			
 
				+
			
 
				+	SET_BUT_UNUSED(rc);
			
 
				+
			
 
				+	cfs_block_allsigs();
			
 
				+
			
 
				+	while (!stt_data.stt_shuttingdown) {
			
 
				+		stt_check_timers(&stt_data.stt_prev_slot);
			
 
				+
			
 
				+		rc = wait_event_timeout(stt_data.stt_waitq,
			
 
				+					stt_data.stt_shuttingdown,
			
 
				+					cfs_time_seconds(STTIMER_SLOTTIME));
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+	stt_data.stt_nthreads--;
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+stt_start_timer_thread (void)
			
 
				+{
			
 
				+	task_t *task;
			
 
				+
			
 
				+	LASSERT(!stt_data.stt_shuttingdown);
			
 
				+
			
 
				+	task = kthread_run(stt_timer_main, NULL, "st_timer");
			
 
				+	if (IS_ERR(task))
			
 
				+		return PTR_ERR(task);
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+	stt_data.stt_nthreads++;
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+stt_startup (void)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	stt_data.stt_shuttingdown = 0;
			
 
				+	stt_data.stt_prev_slot = cfs_time_current_sec() & STTIMER_SLOTTIMEMASK;
			
 
				+
			
 
				+	spin_lock_init(&stt_data.stt_lock);
			
 
				+	for (i = 0; i < STTIMER_NSLOTS; i++)
			
 
				+		INIT_LIST_HEAD(&stt_data.stt_hash[i]);
			
 
				+
			
 
				+	stt_data.stt_nthreads = 0;
			
 
				+	init_waitqueue_head(&stt_data.stt_waitq);
			
 
				+	rc = stt_start_timer_thread();
			
 
				+	if (rc != 0)
			
 
				+		CERROR ("Can't spawn timer thread: %d\n", rc);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+stt_shutdown (void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	spin_lock(&stt_data.stt_lock);
			
 
				+
			
 
				+	for (i = 0; i < STTIMER_NSLOTS; i++)
			
 
				+		LASSERT (list_empty(&stt_data.stt_hash[i]));
			
 
				+
			
 
				+	stt_data.stt_shuttingdown = 1;
			
 
				+
			
 
				+	wake_up(&stt_data.stt_waitq);
			
 
				+	lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock,
			
 
				+		       "waiting for %d threads to terminate\n",
			
 
				+		       stt_data.stt_nthreads);
			
 
				+
			
 
				+	spin_unlock(&stt_data.stt_lock);
			
 
				+}
			
--- a/drivers/staging/lustre/lnet/selftest/timer.h
+++ b/drivers/staging/lustre/lnet/selftest/timer.h
@@ -0,0 +1,53 @@
 
				+/*
			
 
				+ * GPL HEADER START
			
 
				+ *
			
 
				+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 only,
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License version 2 for more details (a copy is included
			
 
				+ * in the LICENSE file that accompanied this code).
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * version 2 along with this program; If not, see
			
 
				+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
			
 
				+ *
			
 
				+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
			
 
				+ * CA 95054 USA or visit www.sun.com if you need additional information or
			
 
				+ * have any questions.
			
 
				+ *
			
 
				+ * GPL HEADER END
			
 
				+ */
			
 
				+/*
			
 
				+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Use is subject to license terms.
			
 
				+ */
			
 
				+/*
			
 
				+ * This file is part of Lustre, http://www.lustre.org/
			
 
				+ * Lustre is a trademark of Sun Microsystems, Inc.
			
 
				+ *
			
 
				+ * lnet/selftest/timer.h
			
 
				+ *
			
 
				+ * Author: Isaac Huang <isaac@clusterfs.com>
			
 
				+ */
			
 
				+#ifndef __SELFTEST_TIMER_H__
			
 
				+#define __SELFTEST_TIMER_H__
			
 
				+
			
 
				+typedef struct {
			
 
				+	struct list_head	stt_list;
			
 
				+	cfs_time_t	stt_expires;
			
 
				+	void	    (*stt_func) (void *);
			
 
				+	void	     *stt_data;
			
 
				+} stt_timer_t;
			
 
				+
			
 
				+void stt_add_timer (stt_timer_t *timer);
			
 
				+int stt_del_timer (stt_timer_t *timer);
			
 
				+int stt_startup (void);
			
 
				+void stt_shutdown (void);
			
 
				+
			
 
				+#endif /* __SELFTEST_TIMER_H__ */
			
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -0,0 +1,33 @@
 
				+config LUSTRE_FS
			
 
				+	tristate "Lustre file system client support"
			
 
				+	depends on STAGING && INET && BROKEN
			
 
				+	select LNET
			
 
				+	help
			
 
				+	  This option enables Lustre file system client support. Choose Y
			
 
				+	  here if you want to access a Lustre file system cluster. To compile
			
 
				+	  this file system support as a module, choose M here: the module will
			
 
				+	  be called lustre.
			
 
				+
			
 
				+	  To mount Lustre file systems , you also need to install the user space
			
 
				+	  mount.lustre and other user space commands which can be found in the
			
 
				+	  lustre-client package, available from
			
 
				+	  http://downloads.whamcloud.com/public/lustre/
			
 
				+
			
 
				+	  Lustre file system is the most popular cluster file system in high
			
 
				+	  performance computing. Source code of both kernel space and user space
			
 
				+	  Lustre components can also be found at
			
 
				+	  http://git.whamcloud.com/?p=fs/lustre-release.git;a=summary
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				+	  See also http://wiki.lustre.org/
			
 
				+
			
 
				+config LUSTRE_OBD_MAX_IOCTL_BUFFER
			
 
				+	int "Lustre obd max ioctl buffer bytes (default 8KB)"
			
 
				+	depends on LUSTRE_FS
			
 
				+	default 8192
			
 
				+	help
			
 
				+	  This option defines the maximum size of buffer in bytes that user space
			
 
				+	  applications can pass to Lustre kernel module through ioctl interface.
			
 
				+
			
 
				+	  If unsure, use default.
		`@@ -0,0 +1 @@`
		`+obj-$(CONFIG_LNET) := klnds/ lnet/ selftest/`