stack_user.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * stack_user.c
  5. *
  6. * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
  7. *
  8. * Copyright (C) 2007 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation, version 2.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. */
  19. #include <linux/module.h>
  20. #include <linux/fs.h>
  21. #include <linux/miscdevice.h>
  22. #include <linux/mutex.h>
  23. #include <linux/smp_lock.h>
  24. #include <linux/reboot.h>
  25. #include <asm/uaccess.h>
  26. #include "ocfs2.h" /* For struct ocfs2_lock_res */
  27. #include "stackglue.h"
  28. #include <linux/dlm_plock.h>
  29. /*
  30. * The control protocol starts with a handshake. Until the handshake
  31. * is complete, the control device will fail all write(2)s.
  32. *
  33. * The handshake is simple. First, the client reads until EOF. Each line
  34. * of output is a supported protocol tag. All protocol tags are a single
  35. * character followed by a two hex digit version number. Currently the
  36. * only things supported is T01, for "Text-base version 0x01". Next, the
  37. * client writes the version they would like to use, including the newline.
  38. * Thus, the protocol tag is 'T01\n'. If the version tag written is
  39. * unknown, -EINVAL is returned. Once the negotiation is complete, the
  40. * client can start sending messages.
  41. *
  42. * The T01 protocol has three messages. First is the "SETN" message.
  43. * It has the following syntax:
  44. *
  45. * SETN<space><8-char-hex-nodenum><newline>
  46. *
  47. * This is 14 characters.
  48. *
  49. * The "SETN" message must be the first message following the protocol.
  50. * It tells ocfs2_control the local node number.
  51. *
  52. * Next comes the "SETV" message. It has the following syntax:
  53. *
  54. * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
  55. *
  56. * This is 11 characters.
  57. *
  58. * The "SETV" message sets the filesystem locking protocol version as
  59. * negotiated by the client. The client negotiates based on the maximum
  60. * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
  61. * number from the "SETV" message must match
  62. * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number
  63. * must be less than or equal to ...->lp_max_version.pv_minor.
  64. *
  65. * Once this information has been set, mounts will be allowed. From this
  66. * point on, the "DOWN" message can be sent for node down notification.
  67. * It has the following syntax:
  68. *
  69. * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
  70. *
  71. * eg:
  72. *
  73. * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
  74. *
  75. * This is 47 characters.
  76. */
  77. /*
  78. * Whether or not the client has done the handshake.
  79. * For now, we have just one protocol version.
  80. */
  81. #define OCFS2_CONTROL_PROTO "T01\n"
  82. #define OCFS2_CONTROL_PROTO_LEN 4
  83. /* Handshake states */
  84. #define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
  85. #define OCFS2_CONTROL_HANDSHAKE_READ (1)
  86. #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
  87. #define OCFS2_CONTROL_HANDSHAKE_VALID (3)
  88. /* Messages */
  89. #define OCFS2_CONTROL_MESSAGE_OP_LEN 4
  90. #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
  91. #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
  92. #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
  93. #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
  94. #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
  95. #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
  96. #define OCFS2_TEXT_UUID_LEN 32
  97. #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
  98. #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
  99. /*
  100. * ocfs2_live_connection is refcounted because the filesystem and
  101. * miscdevice sides can detach in different order. Let's just be safe.
  102. */
  103. struct ocfs2_live_connection {
  104. struct list_head oc_list;
  105. struct ocfs2_cluster_connection *oc_conn;
  106. };
  107. struct ocfs2_control_private {
  108. struct list_head op_list;
  109. int op_state;
  110. int op_this_node;
  111. struct ocfs2_protocol_version op_proto;
  112. };
  113. /* SETN<space><8-char-hex-nodenum><newline> */
  114. struct ocfs2_control_message_setn {
  115. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  116. char space;
  117. char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
  118. char newline;
  119. };
  120. /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
  121. struct ocfs2_control_message_setv {
  122. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  123. char space1;
  124. char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
  125. char space2;
  126. char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
  127. char newline;
  128. };
  129. /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
  130. struct ocfs2_control_message_down {
  131. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  132. char space1;
  133. char uuid[OCFS2_TEXT_UUID_LEN];
  134. char space2;
  135. char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
  136. char newline;
  137. };
  138. union ocfs2_control_message {
  139. char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
  140. struct ocfs2_control_message_setn u_setn;
  141. struct ocfs2_control_message_setv u_setv;
  142. struct ocfs2_control_message_down u_down;
  143. };
  144. static struct ocfs2_stack_plugin ocfs2_user_plugin;
  145. static atomic_t ocfs2_control_opened;
  146. static int ocfs2_control_this_node = -1;
  147. static struct ocfs2_protocol_version running_proto;
  148. static LIST_HEAD(ocfs2_live_connection_list);
  149. static LIST_HEAD(ocfs2_control_private_list);
  150. static DEFINE_MUTEX(ocfs2_control_lock);
  151. static inline void ocfs2_control_set_handshake_state(struct file *file,
  152. int state)
  153. {
  154. struct ocfs2_control_private *p = file->private_data;
  155. p->op_state = state;
  156. }
  157. static inline int ocfs2_control_get_handshake_state(struct file *file)
  158. {
  159. struct ocfs2_control_private *p = file->private_data;
  160. return p->op_state;
  161. }
  162. static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
  163. {
  164. size_t len = strlen(name);
  165. struct ocfs2_live_connection *c;
  166. BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
  167. list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
  168. if ((c->oc_conn->cc_namelen == len) &&
  169. !strncmp(c->oc_conn->cc_name, name, len))
  170. return c;
  171. }
  172. return c;
  173. }
  174. /*
  175. * ocfs2_live_connection structures are created underneath the ocfs2
  176. * mount path. Since the VFS prevents multiple calls to
  177. * fill_super(), we can't get dupes here.
  178. */
  179. static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
  180. struct ocfs2_live_connection **c_ret)
  181. {
  182. int rc = 0;
  183. struct ocfs2_live_connection *c;
  184. c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
  185. if (!c)
  186. return -ENOMEM;
  187. mutex_lock(&ocfs2_control_lock);
  188. c->oc_conn = conn;
  189. if (atomic_read(&ocfs2_control_opened))
  190. list_add(&c->oc_list, &ocfs2_live_connection_list);
  191. else {
  192. printk(KERN_ERR
  193. "ocfs2: Userspace control daemon is not present\n");
  194. rc = -ESRCH;
  195. }
  196. mutex_unlock(&ocfs2_control_lock);
  197. if (!rc)
  198. *c_ret = c;
  199. else
  200. kfree(c);
  201. return rc;
  202. }
  203. /*
  204. * This function disconnects the cluster connection from ocfs2_control.
  205. * Afterwards, userspace can't affect the cluster connection.
  206. */
  207. static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
  208. {
  209. mutex_lock(&ocfs2_control_lock);
  210. list_del_init(&c->oc_list);
  211. c->oc_conn = NULL;
  212. mutex_unlock(&ocfs2_control_lock);
  213. kfree(c);
  214. }
  215. static int ocfs2_control_cfu(void *target, size_t target_len,
  216. const char __user *buf, size_t count)
  217. {
  218. /* The T01 expects write(2) calls to have exactly one command */
  219. if ((count != target_len) ||
  220. (count > sizeof(union ocfs2_control_message)))
  221. return -EINVAL;
  222. if (copy_from_user(target, buf, target_len))
  223. return -EFAULT;
  224. return 0;
  225. }
  226. static ssize_t ocfs2_control_validate_protocol(struct file *file,
  227. const char __user *buf,
  228. size_t count)
  229. {
  230. ssize_t ret;
  231. char kbuf[OCFS2_CONTROL_PROTO_LEN];
  232. ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
  233. buf, count);
  234. if (ret)
  235. return ret;
  236. if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
  237. return -EINVAL;
  238. ocfs2_control_set_handshake_state(file,
  239. OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
  240. return count;
  241. }
  242. static void ocfs2_control_send_down(const char *uuid,
  243. int nodenum)
  244. {
  245. struct ocfs2_live_connection *c;
  246. mutex_lock(&ocfs2_control_lock);
  247. c = ocfs2_connection_find(uuid);
  248. if (c) {
  249. BUG_ON(c->oc_conn == NULL);
  250. c->oc_conn->cc_recovery_handler(nodenum,
  251. c->oc_conn->cc_recovery_data);
  252. }
  253. mutex_unlock(&ocfs2_control_lock);
  254. }
  255. /*
  256. * Called whenever configuration elements are sent to /dev/ocfs2_control.
  257. * If all configuration elements are present, try to set the global
  258. * values. If there is a problem, return an error. Skip any missing
  259. * elements, and only bump ocfs2_control_opened when we have all elements
  260. * and are successful.
  261. */
  262. static int ocfs2_control_install_private(struct file *file)
  263. {
  264. int rc = 0;
  265. int set_p = 1;
  266. struct ocfs2_control_private *p = file->private_data;
  267. BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
  268. mutex_lock(&ocfs2_control_lock);
  269. if (p->op_this_node < 0) {
  270. set_p = 0;
  271. } else if ((ocfs2_control_this_node >= 0) &&
  272. (ocfs2_control_this_node != p->op_this_node)) {
  273. rc = -EINVAL;
  274. goto out_unlock;
  275. }
  276. if (!p->op_proto.pv_major) {
  277. set_p = 0;
  278. } else if (!list_empty(&ocfs2_live_connection_list) &&
  279. ((running_proto.pv_major != p->op_proto.pv_major) ||
  280. (running_proto.pv_minor != p->op_proto.pv_minor))) {
  281. rc = -EINVAL;
  282. goto out_unlock;
  283. }
  284. if (set_p) {
  285. ocfs2_control_this_node = p->op_this_node;
  286. running_proto.pv_major = p->op_proto.pv_major;
  287. running_proto.pv_minor = p->op_proto.pv_minor;
  288. }
  289. out_unlock:
  290. mutex_unlock(&ocfs2_control_lock);
  291. if (!rc && set_p) {
  292. /* We set the global values successfully */
  293. atomic_inc(&ocfs2_control_opened);
  294. ocfs2_control_set_handshake_state(file,
  295. OCFS2_CONTROL_HANDSHAKE_VALID);
  296. }
  297. return rc;
  298. }
  299. static int ocfs2_control_get_this_node(void)
  300. {
  301. int rc;
  302. mutex_lock(&ocfs2_control_lock);
  303. if (ocfs2_control_this_node < 0)
  304. rc = -EINVAL;
  305. else
  306. rc = ocfs2_control_this_node;
  307. mutex_unlock(&ocfs2_control_lock);
  308. return rc;
  309. }
  310. static int ocfs2_control_do_setnode_msg(struct file *file,
  311. struct ocfs2_control_message_setn *msg)
  312. {
  313. long nodenum;
  314. char *ptr = NULL;
  315. struct ocfs2_control_private *p = file->private_data;
  316. if (ocfs2_control_get_handshake_state(file) !=
  317. OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
  318. return -EINVAL;
  319. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
  320. OCFS2_CONTROL_MESSAGE_OP_LEN))
  321. return -EINVAL;
  322. if ((msg->space != ' ') || (msg->newline != '\n'))
  323. return -EINVAL;
  324. msg->space = msg->newline = '\0';
  325. nodenum = simple_strtol(msg->nodestr, &ptr, 16);
  326. if (!ptr || *ptr)
  327. return -EINVAL;
  328. if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
  329. (nodenum > INT_MAX) || (nodenum < 0))
  330. return -ERANGE;
  331. p->op_this_node = nodenum;
  332. return ocfs2_control_install_private(file);
  333. }
  334. static int ocfs2_control_do_setversion_msg(struct file *file,
  335. struct ocfs2_control_message_setv *msg)
  336. {
  337. long major, minor;
  338. char *ptr = NULL;
  339. struct ocfs2_control_private *p = file->private_data;
  340. struct ocfs2_protocol_version *max =
  341. &ocfs2_user_plugin.sp_proto->lp_max_version;
  342. if (ocfs2_control_get_handshake_state(file) !=
  343. OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
  344. return -EINVAL;
  345. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
  346. OCFS2_CONTROL_MESSAGE_OP_LEN))
  347. return -EINVAL;
  348. if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
  349. (msg->newline != '\n'))
  350. return -EINVAL;
  351. msg->space1 = msg->space2 = msg->newline = '\0';
  352. major = simple_strtol(msg->major, &ptr, 16);
  353. if (!ptr || *ptr)
  354. return -EINVAL;
  355. minor = simple_strtol(msg->minor, &ptr, 16);
  356. if (!ptr || *ptr)
  357. return -EINVAL;
  358. /*
  359. * The major must be between 1 and 255, inclusive. The minor
  360. * must be between 0 and 255, inclusive. The version passed in
  361. * must be within the maximum version supported by the filesystem.
  362. */
  363. if ((major == LONG_MIN) || (major == LONG_MAX) ||
  364. (major > (u8)-1) || (major < 1))
  365. return -ERANGE;
  366. if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
  367. (minor > (u8)-1) || (minor < 0))
  368. return -ERANGE;
  369. if ((major != max->pv_major) ||
  370. (minor > max->pv_minor))
  371. return -EINVAL;
  372. p->op_proto.pv_major = major;
  373. p->op_proto.pv_minor = minor;
  374. return ocfs2_control_install_private(file);
  375. }
  376. static int ocfs2_control_do_down_msg(struct file *file,
  377. struct ocfs2_control_message_down *msg)
  378. {
  379. long nodenum;
  380. char *p = NULL;
  381. if (ocfs2_control_get_handshake_state(file) !=
  382. OCFS2_CONTROL_HANDSHAKE_VALID)
  383. return -EINVAL;
  384. if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
  385. OCFS2_CONTROL_MESSAGE_OP_LEN))
  386. return -EINVAL;
  387. if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
  388. (msg->newline != '\n'))
  389. return -EINVAL;
  390. msg->space1 = msg->space2 = msg->newline = '\0';
  391. nodenum = simple_strtol(msg->nodestr, &p, 16);
  392. if (!p || *p)
  393. return -EINVAL;
  394. if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
  395. (nodenum > INT_MAX) || (nodenum < 0))
  396. return -ERANGE;
  397. ocfs2_control_send_down(msg->uuid, nodenum);
  398. return 0;
  399. }
  400. static ssize_t ocfs2_control_message(struct file *file,
  401. const char __user *buf,
  402. size_t count)
  403. {
  404. ssize_t ret;
  405. union ocfs2_control_message msg;
  406. /* Try to catch padding issues */
  407. WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
  408. (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
  409. memset(&msg, 0, sizeof(union ocfs2_control_message));
  410. ret = ocfs2_control_cfu(&msg, count, buf, count);
  411. if (ret)
  412. goto out;
  413. if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
  414. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
  415. OCFS2_CONTROL_MESSAGE_OP_LEN))
  416. ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
  417. else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
  418. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
  419. OCFS2_CONTROL_MESSAGE_OP_LEN))
  420. ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
  421. else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
  422. !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
  423. OCFS2_CONTROL_MESSAGE_OP_LEN))
  424. ret = ocfs2_control_do_down_msg(file, &msg.u_down);
  425. else
  426. ret = -EINVAL;
  427. out:
  428. return ret ? ret : count;
  429. }
  430. static ssize_t ocfs2_control_write(struct file *file,
  431. const char __user *buf,
  432. size_t count,
  433. loff_t *ppos)
  434. {
  435. ssize_t ret;
  436. switch (ocfs2_control_get_handshake_state(file)) {
  437. case OCFS2_CONTROL_HANDSHAKE_INVALID:
  438. ret = -EINVAL;
  439. break;
  440. case OCFS2_CONTROL_HANDSHAKE_READ:
  441. ret = ocfs2_control_validate_protocol(file, buf,
  442. count);
  443. break;
  444. case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
  445. case OCFS2_CONTROL_HANDSHAKE_VALID:
  446. ret = ocfs2_control_message(file, buf, count);
  447. break;
  448. default:
  449. BUG();
  450. ret = -EIO;
  451. break;
  452. }
  453. return ret;
  454. }
  455. /*
  456. * This is a naive version. If we ever have a new protocol, we'll expand
  457. * it. Probably using seq_file.
  458. */
  459. static ssize_t ocfs2_control_read(struct file *file,
  460. char __user *buf,
  461. size_t count,
  462. loff_t *ppos)
  463. {
  464. ssize_t ret;
  465. ret = simple_read_from_buffer(buf, count, ppos,
  466. OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
  467. /* Have we read the whole protocol list? */
  468. if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
  469. ocfs2_control_set_handshake_state(file,
  470. OCFS2_CONTROL_HANDSHAKE_READ);
  471. return ret;
  472. }
  473. static int ocfs2_control_release(struct inode *inode, struct file *file)
  474. {
  475. struct ocfs2_control_private *p = file->private_data;
  476. mutex_lock(&ocfs2_control_lock);
  477. if (ocfs2_control_get_handshake_state(file) !=
  478. OCFS2_CONTROL_HANDSHAKE_VALID)
  479. goto out;
  480. if (atomic_dec_and_test(&ocfs2_control_opened)) {
  481. if (!list_empty(&ocfs2_live_connection_list)) {
  482. /* XXX: Do bad things! */
  483. printk(KERN_ERR
  484. "ocfs2: Unexpected release of ocfs2_control!\n"
  485. " Loss of cluster connection requires "
  486. "an emergency restart!\n");
  487. emergency_restart();
  488. }
  489. /*
  490. * Last valid close clears the node number and resets
  491. * the locking protocol version
  492. */
  493. ocfs2_control_this_node = -1;
  494. running_proto.pv_major = 0;
  495. running_proto.pv_major = 0;
  496. }
  497. out:
  498. list_del_init(&p->op_list);
  499. file->private_data = NULL;
  500. mutex_unlock(&ocfs2_control_lock);
  501. kfree(p);
  502. return 0;
  503. }
  504. static int ocfs2_control_open(struct inode *inode, struct file *file)
  505. {
  506. struct ocfs2_control_private *p;
  507. p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
  508. if (!p)
  509. return -ENOMEM;
  510. p->op_this_node = -1;
  511. lock_kernel();
  512. mutex_lock(&ocfs2_control_lock);
  513. file->private_data = p;
  514. list_add(&p->op_list, &ocfs2_control_private_list);
  515. mutex_unlock(&ocfs2_control_lock);
  516. unlock_kernel();
  517. return 0;
  518. }
  519. static const struct file_operations ocfs2_control_fops = {
  520. .open = ocfs2_control_open,
  521. .release = ocfs2_control_release,
  522. .read = ocfs2_control_read,
  523. .write = ocfs2_control_write,
  524. .owner = THIS_MODULE,
  525. };
  526. static struct miscdevice ocfs2_control_device = {
  527. .minor = MISC_DYNAMIC_MINOR,
  528. .name = "ocfs2_control",
  529. .fops = &ocfs2_control_fops,
  530. };
  531. static int ocfs2_control_init(void)
  532. {
  533. int rc;
  534. atomic_set(&ocfs2_control_opened, 0);
  535. rc = misc_register(&ocfs2_control_device);
  536. if (rc)
  537. printk(KERN_ERR
  538. "ocfs2: Unable to register ocfs2_control device "
  539. "(errno %d)\n",
  540. -rc);
  541. return rc;
  542. }
  543. static void ocfs2_control_exit(void)
  544. {
  545. int rc;
  546. rc = misc_deregister(&ocfs2_control_device);
  547. if (rc)
  548. printk(KERN_ERR
  549. "ocfs2: Unable to deregister ocfs2_control device "
  550. "(errno %d)\n",
  551. -rc);
  552. }
  553. static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg)
  554. {
  555. struct ocfs2_lock_res *res = astarg;
  556. return &res->l_lksb.lksb_fsdlm;
  557. }
  558. static void fsdlm_lock_ast_wrapper(void *astarg)
  559. {
  560. struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
  561. int status = lksb->sb_status;
  562. BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
  563. /*
  564. * For now we're punting on the issue of other non-standard errors
  565. * where we can't tell if the unlock_ast or lock_ast should be called.
  566. * The main "other error" that's possible is EINVAL which means the
  567. * function was called with invalid args, which shouldn't be possible
  568. * since the caller here is under our control. Other non-standard
  569. * errors probably fall into the same category, or otherwise are fatal
  570. * which means we can't carry on anyway.
  571. */
  572. if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
  573. ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0);
  574. else
  575. ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg);
  576. }
  577. static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
  578. {
  579. BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
  580. ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level);
  581. }
  582. static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
  583. int mode,
  584. union ocfs2_dlm_lksb *lksb,
  585. u32 flags,
  586. void *name,
  587. unsigned int namelen,
  588. void *astarg)
  589. {
  590. int ret;
  591. if (!lksb->lksb_fsdlm.sb_lvbptr)
  592. lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
  593. sizeof(struct dlm_lksb);
  594. ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
  595. flags|DLM_LKF_NODLCKWT, name, namelen, 0,
  596. fsdlm_lock_ast_wrapper, astarg,
  597. fsdlm_blocking_ast_wrapper);
  598. return ret;
  599. }
  600. static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
  601. union ocfs2_dlm_lksb *lksb,
  602. u32 flags,
  603. void *astarg)
  604. {
  605. int ret;
  606. ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
  607. flags, &lksb->lksb_fsdlm, astarg);
  608. return ret;
  609. }
  610. static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
  611. {
  612. return lksb->lksb_fsdlm.sb_status;
  613. }
  614. static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
  615. {
  616. int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
  617. return !invalid;
  618. }
  619. static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
  620. {
  621. if (!lksb->lksb_fsdlm.sb_lvbptr)
  622. lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
  623. sizeof(struct dlm_lksb);
  624. return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
  625. }
  626. static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
  627. {
  628. }
  629. static int user_plock(struct ocfs2_cluster_connection *conn,
  630. u64 ino,
  631. struct file *file,
  632. int cmd,
  633. struct file_lock *fl)
  634. {
  635. /*
  636. * This more or less just demuxes the plock request into any
  637. * one of three dlm calls.
  638. *
  639. * Internally, fs/dlm will pass these to a misc device, which
  640. * a userspace daemon will read and write to.
  641. *
  642. * For now, cancel requests (which happen internally only),
  643. * are turned into unlocks. Most of this function taken from
  644. * gfs2_lock.
  645. */
  646. if (cmd == F_CANCELLK) {
  647. cmd = F_SETLK;
  648. fl->fl_type = F_UNLCK;
  649. }
  650. if (IS_GETLK(cmd))
  651. return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
  652. else if (fl->fl_type == F_UNLCK)
  653. return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
  654. else
  655. return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
  656. }
  657. /*
  658. * Compare a requested locking protocol version against the current one.
  659. *
  660. * If the major numbers are different, they are incompatible.
  661. * If the current minor is greater than the request, they are incompatible.
  662. * If the current minor is less than or equal to the request, they are
  663. * compatible, and the requester should run at the current minor version.
  664. */
  665. static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
  666. struct ocfs2_protocol_version *request)
  667. {
  668. if (existing->pv_major != request->pv_major)
  669. return 1;
  670. if (existing->pv_minor > request->pv_minor)
  671. return 1;
  672. if (existing->pv_minor < request->pv_minor)
  673. request->pv_minor = existing->pv_minor;
  674. return 0;
  675. }
  676. static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
  677. {
  678. dlm_lockspace_t *fsdlm;
  679. struct ocfs2_live_connection *control;
  680. int rc = 0;
  681. BUG_ON(conn == NULL);
  682. rc = ocfs2_live_connection_new(conn, &control);
  683. if (rc)
  684. goto out;
  685. /*
  686. * running_proto must have been set before we allowed any mounts
  687. * to proceed.
  688. */
  689. if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
  690. printk(KERN_ERR
  691. "Unable to mount with fs locking protocol version "
  692. "%u.%u because the userspace control daemon has "
  693. "negotiated %u.%u\n",
  694. conn->cc_version.pv_major, conn->cc_version.pv_minor,
  695. running_proto.pv_major, running_proto.pv_minor);
  696. rc = -EPROTO;
  697. ocfs2_live_connection_drop(control);
  698. goto out;
  699. }
  700. rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
  701. &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
  702. if (rc) {
  703. ocfs2_live_connection_drop(control);
  704. goto out;
  705. }
  706. conn->cc_private = control;
  707. conn->cc_lockspace = fsdlm;
  708. out:
  709. return rc;
  710. }
  711. static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
  712. {
  713. dlm_release_lockspace(conn->cc_lockspace, 2);
  714. conn->cc_lockspace = NULL;
  715. ocfs2_live_connection_drop(conn->cc_private);
  716. conn->cc_private = NULL;
  717. return 0;
  718. }
  719. static int user_cluster_this_node(unsigned int *this_node)
  720. {
  721. int rc;
  722. rc = ocfs2_control_get_this_node();
  723. if (rc < 0)
  724. return rc;
  725. *this_node = rc;
  726. return 0;
  727. }
  728. static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
  729. .connect = user_cluster_connect,
  730. .disconnect = user_cluster_disconnect,
  731. .this_node = user_cluster_this_node,
  732. .dlm_lock = user_dlm_lock,
  733. .dlm_unlock = user_dlm_unlock,
  734. .lock_status = user_dlm_lock_status,
  735. .lvb_valid = user_dlm_lvb_valid,
  736. .lock_lvb = user_dlm_lvb,
  737. .plock = user_plock,
  738. .dump_lksb = user_dlm_dump_lksb,
  739. };
  740. static struct ocfs2_stack_plugin ocfs2_user_plugin = {
  741. .sp_name = "user",
  742. .sp_ops = &ocfs2_user_plugin_ops,
  743. .sp_owner = THIS_MODULE,
  744. };
  745. static int __init ocfs2_user_plugin_init(void)
  746. {
  747. int rc;
  748. rc = ocfs2_control_init();
  749. if (!rc) {
  750. rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
  751. if (rc)
  752. ocfs2_control_exit();
  753. }
  754. return rc;
  755. }
  756. static void __exit ocfs2_user_plugin_exit(void)
  757. {
  758. ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
  759. ocfs2_control_exit();
  760. }
  761. MODULE_AUTHOR("Oracle");
  762. MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
  763. MODULE_LICENSE("GPL");
  764. module_init(ocfs2_user_plugin_init);
  765. module_exit(ocfs2_user_plugin_exit);