plock.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*
  2. * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
  3. *
  4. * This copyrighted material is made available to anyone wishing to use,
  5. * modify, copy, or redistribute it subject to the terms and conditions
  6. * of the GNU General Public License version 2.
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/miscdevice.h>
  10. #include <linux/poll.h>
  11. #include <linux/dlm.h>
  12. #include <linux/dlm_plock.h>
  13. #include <linux/slab.h>
  14. #include "dlm_internal.h"
  15. #include "lockspace.h"
  16. static spinlock_t ops_lock;
  17. static struct list_head send_list;
  18. static struct list_head recv_list;
  19. static wait_queue_head_t send_wq;
  20. static wait_queue_head_t recv_wq;
  21. struct plock_op {
  22. struct list_head list;
  23. int done;
  24. struct dlm_plock_info info;
  25. };
  26. struct plock_xop {
  27. struct plock_op xop;
  28. void *callback;
  29. void *fl;
  30. void *file;
  31. struct file_lock flc;
  32. };
  33. static inline void set_version(struct dlm_plock_info *info)
  34. {
  35. info->version[0] = DLM_PLOCK_VERSION_MAJOR;
  36. info->version[1] = DLM_PLOCK_VERSION_MINOR;
  37. info->version[2] = DLM_PLOCK_VERSION_PATCH;
  38. }
  39. static int check_version(struct dlm_plock_info *info)
  40. {
  41. if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
  42. (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
  43. log_print("plock device version mismatch: "
  44. "kernel (%u.%u.%u), user (%u.%u.%u)",
  45. DLM_PLOCK_VERSION_MAJOR,
  46. DLM_PLOCK_VERSION_MINOR,
  47. DLM_PLOCK_VERSION_PATCH,
  48. info->version[0],
  49. info->version[1],
  50. info->version[2]);
  51. return -EINVAL;
  52. }
  53. return 0;
  54. }
  55. static void send_op(struct plock_op *op)
  56. {
  57. set_version(&op->info);
  58. INIT_LIST_HEAD(&op->list);
  59. spin_lock(&ops_lock);
  60. list_add_tail(&op->list, &send_list);
  61. spin_unlock(&ops_lock);
  62. wake_up(&send_wq);
  63. }
  64. int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  65. int cmd, struct file_lock *fl)
  66. {
  67. struct dlm_ls *ls;
  68. struct plock_op *op;
  69. struct plock_xop *xop;
  70. int rv;
  71. ls = dlm_find_lockspace_local(lockspace);
  72. if (!ls)
  73. return -EINVAL;
  74. xop = kzalloc(sizeof(*xop), GFP_NOFS);
  75. if (!xop) {
  76. rv = -ENOMEM;
  77. goto out;
  78. }
  79. op = &xop->xop;
  80. op->info.optype = DLM_PLOCK_OP_LOCK;
  81. op->info.pid = fl->fl_pid;
  82. op->info.ex = (fl->fl_type == F_WRLCK);
  83. op->info.wait = IS_SETLKW(cmd);
  84. op->info.fsid = ls->ls_global_id;
  85. op->info.number = number;
  86. op->info.start = fl->fl_start;
  87. op->info.end = fl->fl_end;
  88. if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
  89. /* fl_owner is lockd which doesn't distinguish
  90. processes on the nfs client */
  91. op->info.owner = (__u64) fl->fl_pid;
  92. xop->callback = fl->fl_lmops->fl_grant;
  93. locks_init_lock(&xop->flc);
  94. locks_copy_lock(&xop->flc, fl);
  95. xop->fl = fl;
  96. xop->file = file;
  97. } else {
  98. op->info.owner = (__u64)(long) fl->fl_owner;
  99. xop->callback = NULL;
  100. }
  101. send_op(op);
  102. if (xop->callback == NULL)
  103. wait_event(recv_wq, (op->done != 0));
  104. else {
  105. rv = FILE_LOCK_DEFERRED;
  106. goto out;
  107. }
  108. spin_lock(&ops_lock);
  109. if (!list_empty(&op->list)) {
  110. log_error(ls, "dlm_posix_lock: op on list %llx",
  111. (unsigned long long)number);
  112. list_del(&op->list);
  113. }
  114. spin_unlock(&ops_lock);
  115. rv = op->info.rv;
  116. if (!rv) {
  117. if (posix_lock_file_wait(file, fl) < 0)
  118. log_error(ls, "dlm_posix_lock: vfs lock error %llx",
  119. (unsigned long long)number);
  120. }
  121. kfree(xop);
  122. out:
  123. dlm_put_lockspace(ls);
  124. return rv;
  125. }
  126. EXPORT_SYMBOL_GPL(dlm_posix_lock);
  127. /* Returns failure iff a successful lock operation should be canceled */
  128. static int dlm_plock_callback(struct plock_op *op)
  129. {
  130. struct file *file;
  131. struct file_lock *fl;
  132. struct file_lock *flc;
  133. int (*notify)(void *, void *, int) = NULL;
  134. struct plock_xop *xop = (struct plock_xop *)op;
  135. int rv = 0;
  136. spin_lock(&ops_lock);
  137. if (!list_empty(&op->list)) {
  138. log_print("dlm_plock_callback: op on list %llx",
  139. (unsigned long long)op->info.number);
  140. list_del(&op->list);
  141. }
  142. spin_unlock(&ops_lock);
  143. /* check if the following 2 are still valid or make a copy */
  144. file = xop->file;
  145. flc = &xop->flc;
  146. fl = xop->fl;
  147. notify = xop->callback;
  148. if (op->info.rv) {
  149. notify(fl, NULL, op->info.rv);
  150. goto out;
  151. }
  152. /* got fs lock; bookkeep locally as well: */
  153. flc->fl_flags &= ~FL_SLEEP;
  154. if (posix_lock_file(file, flc, NULL)) {
  155. /*
  156. * This can only happen in the case of kmalloc() failure.
  157. * The filesystem's own lock is the authoritative lock,
  158. * so a failure to get the lock locally is not a disaster.
  159. * As long as the fs cannot reliably cancel locks (especially
  160. * in a low-memory situation), we're better off ignoring
  161. * this failure than trying to recover.
  162. */
  163. log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
  164. (unsigned long long)op->info.number, file, fl);
  165. }
  166. rv = notify(fl, NULL, 0);
  167. if (rv) {
  168. /* XXX: We need to cancel the fs lock here: */
  169. log_print("dlm_plock_callback: lock granted after lock request "
  170. "failed; dangling lock!\n");
  171. goto out;
  172. }
  173. out:
  174. kfree(xop);
  175. return rv;
  176. }
  177. int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  178. struct file_lock *fl)
  179. {
  180. struct dlm_ls *ls;
  181. struct plock_op *op;
  182. int rv;
  183. ls = dlm_find_lockspace_local(lockspace);
  184. if (!ls)
  185. return -EINVAL;
  186. op = kzalloc(sizeof(*op), GFP_NOFS);
  187. if (!op) {
  188. rv = -ENOMEM;
  189. goto out;
  190. }
  191. if (posix_lock_file_wait(file, fl) < 0)
  192. log_error(ls, "dlm_posix_unlock: vfs unlock error %llx",
  193. (unsigned long long)number);
  194. op->info.optype = DLM_PLOCK_OP_UNLOCK;
  195. op->info.pid = fl->fl_pid;
  196. op->info.fsid = ls->ls_global_id;
  197. op->info.number = number;
  198. op->info.start = fl->fl_start;
  199. op->info.end = fl->fl_end;
  200. if (fl->fl_lmops && fl->fl_lmops->fl_grant)
  201. op->info.owner = (__u64) fl->fl_pid;
  202. else
  203. op->info.owner = (__u64)(long) fl->fl_owner;
  204. send_op(op);
  205. wait_event(recv_wq, (op->done != 0));
  206. spin_lock(&ops_lock);
  207. if (!list_empty(&op->list)) {
  208. log_error(ls, "dlm_posix_unlock: op on list %llx",
  209. (unsigned long long)number);
  210. list_del(&op->list);
  211. }
  212. spin_unlock(&ops_lock);
  213. rv = op->info.rv;
  214. if (rv == -ENOENT)
  215. rv = 0;
  216. kfree(op);
  217. out:
  218. dlm_put_lockspace(ls);
  219. return rv;
  220. }
  221. EXPORT_SYMBOL_GPL(dlm_posix_unlock);
  222. int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  223. struct file_lock *fl)
  224. {
  225. struct dlm_ls *ls;
  226. struct plock_op *op;
  227. int rv;
  228. ls = dlm_find_lockspace_local(lockspace);
  229. if (!ls)
  230. return -EINVAL;
  231. op = kzalloc(sizeof(*op), GFP_NOFS);
  232. if (!op) {
  233. rv = -ENOMEM;
  234. goto out;
  235. }
  236. op->info.optype = DLM_PLOCK_OP_GET;
  237. op->info.pid = fl->fl_pid;
  238. op->info.ex = (fl->fl_type == F_WRLCK);
  239. op->info.fsid = ls->ls_global_id;
  240. op->info.number = number;
  241. op->info.start = fl->fl_start;
  242. op->info.end = fl->fl_end;
  243. if (fl->fl_lmops && fl->fl_lmops->fl_grant)
  244. op->info.owner = (__u64) fl->fl_pid;
  245. else
  246. op->info.owner = (__u64)(long) fl->fl_owner;
  247. send_op(op);
  248. wait_event(recv_wq, (op->done != 0));
  249. spin_lock(&ops_lock);
  250. if (!list_empty(&op->list)) {
  251. log_error(ls, "dlm_posix_get: op on list %llx",
  252. (unsigned long long)number);
  253. list_del(&op->list);
  254. }
  255. spin_unlock(&ops_lock);
  256. /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
  257. -ENOENT if there are no locks on the file */
  258. rv = op->info.rv;
  259. fl->fl_type = F_UNLCK;
  260. if (rv == -ENOENT)
  261. rv = 0;
  262. else if (rv > 0) {
  263. locks_init_lock(fl);
  264. fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
  265. fl->fl_flags = FL_POSIX;
  266. fl->fl_pid = op->info.pid;
  267. fl->fl_start = op->info.start;
  268. fl->fl_end = op->info.end;
  269. rv = 0;
  270. }
  271. kfree(op);
  272. out:
  273. dlm_put_lockspace(ls);
  274. return rv;
  275. }
  276. EXPORT_SYMBOL_GPL(dlm_posix_get);
  277. /* a read copies out one plock request from the send list */
  278. static ssize_t dev_read(struct file *file, char __user *u, size_t count,
  279. loff_t *ppos)
  280. {
  281. struct dlm_plock_info info;
  282. struct plock_op *op = NULL;
  283. if (count < sizeof(info))
  284. return -EINVAL;
  285. spin_lock(&ops_lock);
  286. if (!list_empty(&send_list)) {
  287. op = list_entry(send_list.next, struct plock_op, list);
  288. list_move(&op->list, &recv_list);
  289. memcpy(&info, &op->info, sizeof(info));
  290. }
  291. spin_unlock(&ops_lock);
  292. if (!op)
  293. return -EAGAIN;
  294. if (copy_to_user(u, &info, sizeof(info)))
  295. return -EFAULT;
  296. return sizeof(info);
  297. }
  298. /* a write copies in one plock result that should match a plock_op
  299. on the recv list */
  300. static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
  301. loff_t *ppos)
  302. {
  303. struct dlm_plock_info info;
  304. struct plock_op *op;
  305. int found = 0, do_callback = 0;
  306. if (count != sizeof(info))
  307. return -EINVAL;
  308. if (copy_from_user(&info, u, sizeof(info)))
  309. return -EFAULT;
  310. if (check_version(&info))
  311. return -EINVAL;
  312. spin_lock(&ops_lock);
  313. list_for_each_entry(op, &recv_list, list) {
  314. if (op->info.fsid == info.fsid &&
  315. op->info.number == info.number &&
  316. op->info.owner == info.owner) {
  317. struct plock_xop *xop = (struct plock_xop *)op;
  318. list_del_init(&op->list);
  319. memcpy(&op->info, &info, sizeof(info));
  320. if (xop->callback)
  321. do_callback = 1;
  322. else
  323. op->done = 1;
  324. found = 1;
  325. break;
  326. }
  327. }
  328. spin_unlock(&ops_lock);
  329. if (found) {
  330. if (do_callback)
  331. dlm_plock_callback(op);
  332. else
  333. wake_up(&recv_wq);
  334. } else
  335. log_print("dev_write no op %x %llx", info.fsid,
  336. (unsigned long long)info.number);
  337. return count;
  338. }
  339. static unsigned int dev_poll(struct file *file, poll_table *wait)
  340. {
  341. unsigned int mask = 0;
  342. poll_wait(file, &send_wq, wait);
  343. spin_lock(&ops_lock);
  344. if (!list_empty(&send_list))
  345. mask = POLLIN | POLLRDNORM;
  346. spin_unlock(&ops_lock);
  347. return mask;
  348. }
  349. static const struct file_operations dev_fops = {
  350. .read = dev_read,
  351. .write = dev_write,
  352. .poll = dev_poll,
  353. .owner = THIS_MODULE
  354. };
  355. static struct miscdevice plock_dev_misc = {
  356. .minor = MISC_DYNAMIC_MINOR,
  357. .name = DLM_PLOCK_MISC_NAME,
  358. .fops = &dev_fops
  359. };
  360. int dlm_plock_init(void)
  361. {
  362. int rv;
  363. spin_lock_init(&ops_lock);
  364. INIT_LIST_HEAD(&send_list);
  365. INIT_LIST_HEAD(&recv_list);
  366. init_waitqueue_head(&send_wq);
  367. init_waitqueue_head(&recv_wq);
  368. rv = misc_register(&plock_dev_misc);
  369. if (rv)
  370. log_print("dlm_plock_init: misc_register failed %d", rv);
  371. return rv;
  372. }
  373. void dlm_plock_exit(void)
  374. {
  375. if (misc_deregister(&plock_dev_misc) < 0)
  376. log_print("dlm_plock_exit: misc_deregister failed");
  377. }