|
@@ -0,0 +1,401 @@
|
|
|
+/*
|
|
|
+ * r2net.c
|
|
|
+ *
|
|
|
+ * Copyright (c) 2011, Dan Magenheimer, Oracle Corp.
|
|
|
+ *
|
|
|
+ * Ramster_r2net provides an interface between zcache and r2net.
|
|
|
+ *
|
|
|
+ * FIXME: support more than two nodes
|
|
|
+ */
|
|
|
+
|
|
|
+#include <linux/list.h>
|
|
|
+#include "cluster/tcp.h"
|
|
|
+#include "cluster/nodemanager.h"
|
|
|
+#include "tmem.h"
|
|
|
+#include "zcache.h"
|
|
|
+#include "ramster.h"
|
|
|
+
|
|
|
+#define RAMSTER_TESTING
|
|
|
+
|
|
|
+#define RMSTR_KEY 0x77347734
|
|
|
+
|
|
|
+enum {
|
|
|
+ RMSTR_TMEM_PUT_EPH = 100,
|
|
|
+ RMSTR_TMEM_PUT_PERS,
|
|
|
+ RMSTR_TMEM_ASYNC_GET_REQUEST,
|
|
|
+ RMSTR_TMEM_ASYNC_GET_AND_FREE_REQUEST,
|
|
|
+ RMSTR_TMEM_ASYNC_GET_REPLY,
|
|
|
+ RMSTR_TMEM_FLUSH,
|
|
|
+ RMSTR_TMEM_FLOBJ,
|
|
|
+ RMSTR_TMEM_DESTROY_POOL,
|
|
|
+};
|
|
|
+
|
|
|
+#define RMSTR_R2NET_MAX_LEN \
|
|
|
+ (R2NET_MAX_PAYLOAD_BYTES - sizeof(struct tmem_xhandle))
|
|
|
+
|
|
|
+#include "cluster/tcp_internal.h"
|
|
|
+
|
|
|
+static struct r2nm_node *r2net_target_node;
|
|
|
+static int r2net_target_nodenum;
|
|
|
+
|
|
|
+int r2net_remote_target_node_set(int node_num)
|
|
|
+{
|
|
|
+ int ret = -1;
|
|
|
+
|
|
|
+ r2net_target_node = r2nm_get_node_by_num(node_num);
|
|
|
+ if (r2net_target_node != NULL) {
|
|
|
+ r2net_target_nodenum = node_num;
|
|
|
+ r2nm_node_put(r2net_target_node);
|
|
|
+ ret = 0;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/* FIXME following buffer should be per-cpu, protected by preempt_disable */
|
|
|
+static char ramster_async_get_buf[R2NET_MAX_PAYLOAD_BYTES];
|
|
|
+
|
|
|
+static int ramster_remote_async_get_request_handler(struct r2net_msg *msg,
|
|
|
+ u32 len, void *data, void **ret_data)
|
|
|
+{
|
|
|
+ char *pdata;
|
|
|
+ struct tmem_xhandle xh;
|
|
|
+ int found;
|
|
|
+ size_t size = RMSTR_R2NET_MAX_LEN;
|
|
|
+ u16 msgtype = be16_to_cpu(msg->msg_type);
|
|
|
+ bool get_and_free = (msgtype == RMSTR_TMEM_ASYNC_GET_AND_FREE_REQUEST);
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ xh = *(struct tmem_xhandle *)msg->buf;
|
|
|
+ if (xh.xh_data_size > RMSTR_R2NET_MAX_LEN)
|
|
|
+ BUG();
|
|
|
+ pdata = ramster_async_get_buf;
|
|
|
+ *(struct tmem_xhandle *)pdata = xh;
|
|
|
+ pdata += sizeof(struct tmem_xhandle);
|
|
|
+ local_irq_save(flags);
|
|
|
+ found = zcache_get(xh.client_id, xh.pool_id, &xh.oid, xh.index,
|
|
|
+ pdata, &size, 1, get_and_free ? 1 : -1);
|
|
|
+ local_irq_restore(flags);
|
|
|
+ if (found < 0) {
|
|
|
+ /* a zero size indicates the get failed */
|
|
|
+ size = 0;
|
|
|
+ }
|
|
|
+ if (size > RMSTR_R2NET_MAX_LEN)
|
|
|
+ BUG();
|
|
|
+ *ret_data = pdata - sizeof(struct tmem_xhandle);
|
|
|
+ /* now make caller (r2net_process_message) handle specially */
|
|
|
+ r2net_force_data_magic(msg, RMSTR_TMEM_ASYNC_GET_REPLY, RMSTR_KEY);
|
|
|
+ return size + sizeof(struct tmem_xhandle);
|
|
|
+}
|
|
|
+
|
|
|
+static int ramster_remote_async_get_reply_handler(struct r2net_msg *msg,
|
|
|
+ u32 len, void *data, void **ret_data)
|
|
|
+{
|
|
|
+ char *in = (char *)msg->buf;
|
|
|
+ int datalen = len - sizeof(struct r2net_msg);
|
|
|
+ int ret = -1;
|
|
|
+ struct tmem_xhandle *xh = (struct tmem_xhandle *)in;
|
|
|
+
|
|
|
+ in += sizeof(struct tmem_xhandle);
|
|
|
+ datalen -= sizeof(struct tmem_xhandle);
|
|
|
+ BUG_ON(datalen < 0 || datalen > PAGE_SIZE);
|
|
|
+ ret = zcache_localify(xh->pool_id, &xh->oid, xh->index,
|
|
|
+ in, datalen, xh->extra);
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+ if (ret == -EEXIST)
|
|
|
+ pr_err("TESTING ArrgREP, aborted overwrite on racy put\n");
|
|
|
+#endif
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_put_handler(struct r2net_msg *msg,
|
|
|
+ u32 len, void *data, void **ret_data)
|
|
|
+{
|
|
|
+ struct tmem_xhandle *xh;
|
|
|
+ char *p = (char *)msg->buf;
|
|
|
+ int datalen = len - sizeof(struct r2net_msg) -
|
|
|
+ sizeof(struct tmem_xhandle);
|
|
|
+ u16 msgtype = be16_to_cpu(msg->msg_type);
|
|
|
+ bool ephemeral = (msgtype == RMSTR_TMEM_PUT_EPH);
|
|
|
+ unsigned long flags;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ xh = (struct tmem_xhandle *)p;
|
|
|
+ p += sizeof(struct tmem_xhandle);
|
|
|
+ zcache_autocreate_pool(xh->client_id, xh->pool_id, ephemeral);
|
|
|
+ local_irq_save(flags);
|
|
|
+ ret = zcache_put(xh->client_id, xh->pool_id, &xh->oid, xh->index,
|
|
|
+ p, datalen, 1, ephemeral ? 1 : -1);
|
|
|
+ local_irq_restore(flags);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_flush_handler(struct r2net_msg *msg,
|
|
|
+ u32 len, void *data, void **ret_data)
|
|
|
+{
|
|
|
+ struct tmem_xhandle *xh;
|
|
|
+ char *p = (char *)msg->buf;
|
|
|
+
|
|
|
+ xh = (struct tmem_xhandle *)p;
|
|
|
+ p += sizeof(struct tmem_xhandle);
|
|
|
+ (void)zcache_flush(xh->client_id, xh->pool_id, &xh->oid, xh->index);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_flobj_handler(struct r2net_msg *msg,
|
|
|
+ u32 len, void *data, void **ret_data)
|
|
|
+{
|
|
|
+ struct tmem_xhandle *xh;
|
|
|
+ char *p = (char *)msg->buf;
|
|
|
+
|
|
|
+ xh = (struct tmem_xhandle *)p;
|
|
|
+ p += sizeof(struct tmem_xhandle);
|
|
|
+ (void)zcache_flush_object(xh->client_id, xh->pool_id, &xh->oid);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_async_get(struct tmem_xhandle *xh, bool free, int remotenode,
|
|
|
+ size_t expect_size, uint8_t expect_cksum,
|
|
|
+ void *extra)
|
|
|
+{
|
|
|
+ int ret = -1, status;
|
|
|
+ struct r2nm_node *node = NULL;
|
|
|
+ struct kvec vec[1];
|
|
|
+ size_t veclen = 1;
|
|
|
+ u32 msg_type;
|
|
|
+
|
|
|
+ node = r2nm_get_node_by_num(remotenode);
|
|
|
+ if (node == NULL)
|
|
|
+ goto out;
|
|
|
+ xh->client_id = r2nm_this_node(); /* which node is getting */
|
|
|
+ xh->xh_data_cksum = expect_cksum;
|
|
|
+ xh->xh_data_size = expect_size;
|
|
|
+ xh->extra = extra;
|
|
|
+ vec[0].iov_len = sizeof(*xh);
|
|
|
+ vec[0].iov_base = xh;
|
|
|
+ if (free)
|
|
|
+ msg_type = RMSTR_TMEM_ASYNC_GET_AND_FREE_REQUEST;
|
|
|
+ else
|
|
|
+ msg_type = RMSTR_TMEM_ASYNC_GET_REQUEST;
|
|
|
+ ret = r2net_send_message_vec(msg_type, RMSTR_KEY,
|
|
|
+ vec, veclen, remotenode, &status);
|
|
|
+ r2nm_node_put(node);
|
|
|
+ if (ret < 0) {
|
|
|
+ /* FIXME handle bad message possibilities here? */
|
|
|
+ pr_err("UNTESTED ret<0 in ramster_remote_async_get\n");
|
|
|
+ }
|
|
|
+ ret = status;
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+/* leave me here to see if it catches a weird crash */
|
|
|
+static void ramster_check_irq_counts(void)
|
|
|
+{
|
|
|
+ static int last_hardirq_cnt, last_softirq_cnt, last_preempt_cnt;
|
|
|
+ int cur_hardirq_cnt, cur_softirq_cnt, cur_preempt_cnt;
|
|
|
+
|
|
|
+ cur_hardirq_cnt = hardirq_count() >> HARDIRQ_SHIFT;
|
|
|
+ if (cur_hardirq_cnt > last_hardirq_cnt) {
|
|
|
+ last_hardirq_cnt = cur_hardirq_cnt;
|
|
|
+ if (!(last_hardirq_cnt&(last_hardirq_cnt-1)))
|
|
|
+ pr_err("RAMSTER TESTING RRP hardirq_count=%d\n",
|
|
|
+ last_hardirq_cnt);
|
|
|
+ }
|
|
|
+ cur_softirq_cnt = softirq_count() >> SOFTIRQ_SHIFT;
|
|
|
+ if (cur_softirq_cnt > last_softirq_cnt) {
|
|
|
+ last_softirq_cnt = cur_softirq_cnt;
|
|
|
+ if (!(last_softirq_cnt&(last_softirq_cnt-1)))
|
|
|
+ pr_err("RAMSTER TESTING RRP softirq_count=%d\n",
|
|
|
+ last_softirq_cnt);
|
|
|
+ }
|
|
|
+ cur_preempt_cnt = preempt_count() & PREEMPT_MASK;
|
|
|
+ if (cur_preempt_cnt > last_preempt_cnt) {
|
|
|
+ last_preempt_cnt = cur_preempt_cnt;
|
|
|
+ if (!(last_preempt_cnt&(last_preempt_cnt-1)))
|
|
|
+ pr_err("RAMSTER TESTING RRP preempt_count=%d\n",
|
|
|
+ last_preempt_cnt);
|
|
|
+ }
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+int ramster_remote_put(struct tmem_xhandle *xh, char *data, size_t size,
|
|
|
+ bool ephemeral, int *remotenode)
|
|
|
+{
|
|
|
+ int nodenum, ret = -1, status;
|
|
|
+ struct r2nm_node *node = NULL;
|
|
|
+ struct kvec vec[2];
|
|
|
+ size_t veclen = 2;
|
|
|
+ u32 msg_type;
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+ struct r2net_node *nn;
|
|
|
+#endif
|
|
|
+
|
|
|
+ BUG_ON(size > RMSTR_R2NET_MAX_LEN);
|
|
|
+ xh->client_id = r2nm_this_node(); /* which node is putting */
|
|
|
+ vec[0].iov_len = sizeof(*xh);
|
|
|
+ vec[0].iov_base = xh;
|
|
|
+ vec[1].iov_len = size;
|
|
|
+ vec[1].iov_base = data;
|
|
|
+ node = r2net_target_node;
|
|
|
+ if (!node)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ nodenum = r2net_target_nodenum;
|
|
|
+
|
|
|
+ r2nm_node_get(node);
|
|
|
+
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+ nn = r2net_nn_from_num(nodenum);
|
|
|
+ WARN_ON_ONCE(nn->nn_persistent_error || !nn->nn_sc_valid);
|
|
|
+#endif
|
|
|
+
|
|
|
+ if (ephemeral)
|
|
|
+ msg_type = RMSTR_TMEM_PUT_EPH;
|
|
|
+ else
|
|
|
+ msg_type = RMSTR_TMEM_PUT_PERS;
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+ /* leave me here to see if it catches a weird crash */
|
|
|
+ ramster_check_irq_counts();
|
|
|
+#endif
|
|
|
+
|
|
|
+ ret = r2net_send_message_vec(msg_type, RMSTR_KEY, vec, veclen,
|
|
|
+ nodenum, &status);
|
|
|
+#ifdef RAMSTER_TESTING
|
|
|
+ if (ret != 0) {
|
|
|
+ static unsigned long cnt;
|
|
|
+ cnt++;
|
|
|
+ if (!(cnt&(cnt-1)))
|
|
|
+ pr_err("ramster_remote_put: message failed, "
|
|
|
+ "ret=%d, cnt=%lu\n", ret, cnt);
|
|
|
+ ret = -1;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ if (ret < 0)
|
|
|
+ ret = -1;
|
|
|
+ else {
|
|
|
+ ret = status;
|
|
|
+ *remotenode = nodenum;
|
|
|
+ }
|
|
|
+
|
|
|
+ r2nm_node_put(node);
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_flush(struct tmem_xhandle *xh, int remotenode)
|
|
|
+{
|
|
|
+ int ret = -1, status;
|
|
|
+ struct r2nm_node *node = NULL;
|
|
|
+ struct kvec vec[1];
|
|
|
+ size_t veclen = 1;
|
|
|
+
|
|
|
+ node = r2nm_get_node_by_num(remotenode);
|
|
|
+ BUG_ON(node == NULL);
|
|
|
+ xh->client_id = r2nm_this_node(); /* which node is flushing */
|
|
|
+ vec[0].iov_len = sizeof(*xh);
|
|
|
+ vec[0].iov_base = xh;
|
|
|
+ BUG_ON(irqs_disabled());
|
|
|
+ BUG_ON(in_softirq());
|
|
|
+ ret = r2net_send_message_vec(RMSTR_TMEM_FLUSH, RMSTR_KEY,
|
|
|
+ vec, veclen, remotenode, &status);
|
|
|
+ r2nm_node_put(node);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int ramster_remote_flush_object(struct tmem_xhandle *xh, int remotenode)
|
|
|
+{
|
|
|
+ int ret = -1, status;
|
|
|
+ struct r2nm_node *node = NULL;
|
|
|
+ struct kvec vec[1];
|
|
|
+ size_t veclen = 1;
|
|
|
+
|
|
|
+ node = r2nm_get_node_by_num(remotenode);
|
|
|
+ BUG_ON(node == NULL);
|
|
|
+ xh->client_id = r2nm_this_node(); /* which node is flobjing */
|
|
|
+ vec[0].iov_len = sizeof(*xh);
|
|
|
+ vec[0].iov_base = xh;
|
|
|
+ ret = r2net_send_message_vec(RMSTR_TMEM_FLOBJ, RMSTR_KEY,
|
|
|
+ vec, veclen, remotenode, &status);
|
|
|
+ r2nm_node_put(node);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Handler registration
|
|
|
+ */
|
|
|
+
|
|
|
+static LIST_HEAD(r2net_unreg_list);
|
|
|
+
|
|
|
+static void r2net_unregister_handlers(void)
|
|
|
+{
|
|
|
+ r2net_unregister_handler_list(&r2net_unreg_list);
|
|
|
+}
|
|
|
+
|
|
|
+int r2net_register_handlers(void)
|
|
|
+{
|
|
|
+ int status;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_PUT_EPH, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_put_handler,
|
|
|
+ NULL, NULL, &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_PUT_PERS, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_put_handler,
|
|
|
+ NULL, NULL, &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_ASYNC_GET_REQUEST, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_async_get_request_handler,
|
|
|
+ NULL, NULL,
|
|
|
+ &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_ASYNC_GET_AND_FREE_REQUEST,
|
|
|
+ RMSTR_KEY, RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_async_get_request_handler,
|
|
|
+ NULL, NULL,
|
|
|
+ &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_ASYNC_GET_REPLY, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_async_get_reply_handler,
|
|
|
+ NULL, NULL,
|
|
|
+ &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_FLUSH, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_flush_handler,
|
|
|
+ NULL, NULL,
|
|
|
+ &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ status = r2net_register_handler(RMSTR_TMEM_FLOBJ, RMSTR_KEY,
|
|
|
+ RMSTR_R2NET_MAX_LEN,
|
|
|
+ ramster_remote_flobj_handler,
|
|
|
+ NULL, NULL,
|
|
|
+ &r2net_unreg_list);
|
|
|
+ if (status)
|
|
|
+ goto bail;
|
|
|
+
|
|
|
+ pr_info("ramster: r2net handlers registered\n");
|
|
|
+
|
|
|
+bail:
|
|
|
+ if (status) {
|
|
|
+ r2net_unregister_handlers();
|
|
|
+ pr_err("ramster: couldn't register r2net handlers\n");
|
|
|
+ }
|
|
|
+ return status;
|
|
|
+}
|