member.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /******************************************************************************
  2. *******************************************************************************
  3. **
  4. ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
  5. **
  6. ** This copyrighted material is made available to anyone wishing to use,
  7. ** modify, copy, or redistribute it subject to the terms and conditions
  8. ** of the GNU General Public License v.2.
  9. **
  10. *******************************************************************************
  11. ******************************************************************************/
  12. #include "dlm_internal.h"
  13. #include "lockspace.h"
  14. #include "member.h"
  15. #include "recoverd.h"
  16. #include "recover.h"
  17. #include "rcom.h"
  18. #include "config.h"
  19. /*
  20. * Following called by dlm_recoverd thread
  21. */
  22. static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
  23. {
  24. struct dlm_member *memb = NULL;
  25. struct list_head *tmp;
  26. struct list_head *newlist = &new->list;
  27. struct list_head *head = &ls->ls_nodes;
  28. list_for_each(tmp, head) {
  29. memb = list_entry(tmp, struct dlm_member, list);
  30. if (new->nodeid < memb->nodeid)
  31. break;
  32. }
  33. if (!memb)
  34. list_add_tail(newlist, head);
  35. else {
  36. /* FIXME: can use list macro here */
  37. newlist->prev = tmp->prev;
  38. newlist->next = tmp;
  39. tmp->prev->next = newlist;
  40. tmp->prev = newlist;
  41. }
  42. }
  43. static int dlm_add_member(struct dlm_ls *ls, int nodeid)
  44. {
  45. struct dlm_member *memb;
  46. int w;
  47. memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
  48. if (!memb)
  49. return -ENOMEM;
  50. w = dlm_node_weight(ls->ls_name, nodeid);
  51. if (w < 0) {
  52. kfree(memb);
  53. return w;
  54. }
  55. memb->nodeid = nodeid;
  56. memb->weight = w;
  57. add_ordered_member(ls, memb);
  58. ls->ls_num_nodes++;
  59. return 0;
  60. }
  61. static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
  62. {
  63. list_move(&memb->list, &ls->ls_nodes_gone);
  64. ls->ls_num_nodes--;
  65. }
  66. static int dlm_is_member(struct dlm_ls *ls, int nodeid)
  67. {
  68. struct dlm_member *memb;
  69. list_for_each_entry(memb, &ls->ls_nodes, list) {
  70. if (memb->nodeid == nodeid)
  71. return 1;
  72. }
  73. return 0;
  74. }
  75. int dlm_is_removed(struct dlm_ls *ls, int nodeid)
  76. {
  77. struct dlm_member *memb;
  78. list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
  79. if (memb->nodeid == nodeid)
  80. return 1;
  81. }
  82. return 0;
  83. }
  84. static void clear_memb_list(struct list_head *head)
  85. {
  86. struct dlm_member *memb;
  87. while (!list_empty(head)) {
  88. memb = list_entry(head->next, struct dlm_member, list);
  89. list_del(&memb->list);
  90. kfree(memb);
  91. }
  92. }
  93. void dlm_clear_members(struct dlm_ls *ls)
  94. {
  95. clear_memb_list(&ls->ls_nodes);
  96. ls->ls_num_nodes = 0;
  97. }
  98. void dlm_clear_members_gone(struct dlm_ls *ls)
  99. {
  100. clear_memb_list(&ls->ls_nodes_gone);
  101. }
  102. static void make_member_array(struct dlm_ls *ls)
  103. {
  104. struct dlm_member *memb;
  105. int i, w, x = 0, total = 0, all_zero = 0, *array;
  106. kfree(ls->ls_node_array);
  107. ls->ls_node_array = NULL;
  108. list_for_each_entry(memb, &ls->ls_nodes, list) {
  109. if (memb->weight)
  110. total += memb->weight;
  111. }
  112. /* all nodes revert to weight of 1 if all have weight 0 */
  113. if (!total) {
  114. total = ls->ls_num_nodes;
  115. all_zero = 1;
  116. }
  117. ls->ls_total_weight = total;
  118. array = kmalloc(sizeof(int) * total, GFP_KERNEL);
  119. if (!array)
  120. return;
  121. list_for_each_entry(memb, &ls->ls_nodes, list) {
  122. if (!all_zero && !memb->weight)
  123. continue;
  124. if (all_zero)
  125. w = 1;
  126. else
  127. w = memb->weight;
  128. DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
  129. for (i = 0; i < w; i++)
  130. array[x++] = memb->nodeid;
  131. }
  132. ls->ls_node_array = array;
  133. }
  134. /* send a status request to all members just to establish comms connections */
  135. static int ping_members(struct dlm_ls *ls)
  136. {
  137. struct dlm_member *memb;
  138. int error = 0;
  139. list_for_each_entry(memb, &ls->ls_nodes, list) {
  140. error = dlm_recovery_stopped(ls);
  141. if (error)
  142. break;
  143. error = dlm_rcom_status(ls, memb->nodeid);
  144. if (error)
  145. break;
  146. }
  147. if (error)
  148. log_debug(ls, "ping_members aborted %d last nodeid %d",
  149. error, ls->ls_recover_nodeid);
  150. return error;
  151. }
  152. int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
  153. {
  154. struct dlm_member *memb, *safe;
  155. int i, error, found, pos = 0, neg = 0, low = -1;
  156. /* previously removed members that we've not finished removing need to
  157. count as a negative change so the "neg" recovery steps will happen */
  158. list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
  159. log_debug(ls, "prev removed member %d", memb->nodeid);
  160. neg++;
  161. }
  162. /* move departed members from ls_nodes to ls_nodes_gone */
  163. list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
  164. found = 0;
  165. for (i = 0; i < rv->node_count; i++) {
  166. if (memb->nodeid == rv->nodeids[i]) {
  167. found = 1;
  168. break;
  169. }
  170. }
  171. if (!found) {
  172. neg++;
  173. dlm_remove_member(ls, memb);
  174. log_debug(ls, "remove member %d", memb->nodeid);
  175. }
  176. }
  177. /* add new members to ls_nodes */
  178. for (i = 0; i < rv->node_count; i++) {
  179. if (dlm_is_member(ls, rv->nodeids[i]))
  180. continue;
  181. dlm_add_member(ls, rv->nodeids[i]);
  182. pos++;
  183. log_debug(ls, "add member %d", rv->nodeids[i]);
  184. }
  185. list_for_each_entry(memb, &ls->ls_nodes, list) {
  186. if (low == -1 || memb->nodeid < low)
  187. low = memb->nodeid;
  188. }
  189. ls->ls_low_nodeid = low;
  190. make_member_array(ls);
  191. dlm_set_recover_status(ls, DLM_RS_NODES);
  192. *neg_out = neg;
  193. error = ping_members(ls);
  194. if (!error || error == -EPROTO) {
  195. /* new_lockspace() may be waiting to know if the config
  196. is good or bad */
  197. ls->ls_members_result = error;
  198. complete(&ls->ls_members_done);
  199. }
  200. if (error)
  201. goto out;
  202. error = dlm_recover_members_wait(ls);
  203. out:
  204. log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
  205. return error;
  206. }
  207. /*
  208. * Following called from lockspace.c
  209. */
  210. int dlm_ls_stop(struct dlm_ls *ls)
  211. {
  212. int new;
  213. /*
  214. * A stop cancels any recovery that's in progress (see RECOVERY_STOP,
  215. * dlm_recovery_stopped()) and prevents any new locks from being
  216. * processed (see RUNNING, dlm_locking_stopped()).
  217. */
  218. spin_lock(&ls->ls_recover_lock);
  219. set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
  220. new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
  221. ls->ls_recover_seq++;
  222. spin_unlock(&ls->ls_recover_lock);
  223. /*
  224. * This in_recovery lock does two things:
  225. *
  226. * 1) Keeps this function from returning until all threads are out
  227. * of locking routines and locking is truely stopped.
  228. * 2) Keeps any new requests from being processed until it's unlocked
  229. * when recovery is complete.
  230. */
  231. if (new)
  232. down_write(&ls->ls_in_recovery);
  233. /*
  234. * The recoverd suspend/resume makes sure that dlm_recoverd (if
  235. * running) has noticed the clearing of RUNNING above and quit
  236. * processing the previous recovery. This will be true for all nodes
  237. * before any nodes start the new recovery.
  238. */
  239. dlm_recoverd_suspend(ls);
  240. ls->ls_recover_status = 0;
  241. dlm_recoverd_resume(ls);
  242. if (!ls->ls_recover_begin)
  243. ls->ls_recover_begin = jiffies;
  244. return 0;
  245. }
  246. int dlm_ls_start(struct dlm_ls *ls)
  247. {
  248. struct dlm_recover *rv = NULL, *rv_old;
  249. int *ids = NULL;
  250. int error, count;
  251. rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
  252. if (!rv)
  253. return -ENOMEM;
  254. error = count = dlm_nodeid_list(ls->ls_name, &ids);
  255. if (error <= 0)
  256. goto fail;
  257. spin_lock(&ls->ls_recover_lock);
  258. /* the lockspace needs to be stopped before it can be started */
  259. if (!dlm_locking_stopped(ls)) {
  260. spin_unlock(&ls->ls_recover_lock);
  261. log_error(ls, "start ignored: lockspace running");
  262. error = -EINVAL;
  263. goto fail;
  264. }
  265. rv->nodeids = ids;
  266. rv->node_count = count;
  267. rv->seq = ++ls->ls_recover_seq;
  268. rv_old = ls->ls_recover_args;
  269. ls->ls_recover_args = rv;
  270. spin_unlock(&ls->ls_recover_lock);
  271. if (rv_old) {
  272. kfree(rv_old->nodeids);
  273. kfree(rv_old);
  274. }
  275. dlm_recoverd_kick(ls);
  276. return 0;
  277. fail:
  278. kfree(rv);
  279. kfree(ids);
  280. return error;
  281. }