extents.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /*
  2. * linux/fs/nfs/blocklayout/blocklayout.h
  3. *
  4. * Module for the NFSv4.1 pNFS block layout driver.
  5. *
  6. * Copyright (c) 2006 The Regents of the University of Michigan.
  7. * All rights reserved.
  8. *
  9. * Andy Adamson <andros@citi.umich.edu>
  10. * Fred Isaman <iisaman@umich.edu>
  11. *
  12. * permission is granted to use, copy, create derivative works and
  13. * redistribute this software and such derivative works for any purpose,
  14. * so long as the name of the university of michigan is not used in
  15. * any advertising or publicity pertaining to the use or distribution
  16. * of this software without specific, written prior authorization. if
  17. * the above copyright notice or any other identification of the
  18. * university of michigan is included in any copy of any portion of
  19. * this software, then the disclaimer below must also be included.
  20. *
  21. * this software is provided as is, without representation from the
  22. * university of michigan as to its fitness for any purpose, and without
  23. * warranty by the university of michigan of any kind, either express
  24. * or implied, including without limitation the implied warranties of
  25. * merchantability and fitness for a particular purpose. the regents
  26. * of the university of michigan shall not be liable for any damages,
  27. * including special, indirect, incidental, or consequential damages,
  28. * with respect to any claim arising out or in connection with the use
  29. * of the software, even if it has been or is hereafter advised of the
  30. * possibility of such damages.
  31. */
  32. #include "blocklayout.h"
  33. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  34. static void print_bl_extent(struct pnfs_block_extent *be)
  35. {
  36. dprintk("PRINT EXTENT extent %p\n", be);
  37. if (be) {
  38. dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset);
  39. dprintk(" be_length %llu\n", (u64)be->be_length);
  40. dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset);
  41. dprintk(" be_state %d\n", be->be_state);
  42. }
  43. }
  44. static void
  45. destroy_extent(struct kref *kref)
  46. {
  47. struct pnfs_block_extent *be;
  48. be = container_of(kref, struct pnfs_block_extent, be_refcnt);
  49. dprintk("%s be=%p\n", __func__, be);
  50. kfree(be);
  51. }
  52. void
  53. bl_put_extent(struct pnfs_block_extent *be)
  54. {
  55. if (be) {
  56. dprintk("%s enter %p (%i)\n", __func__, be,
  57. atomic_read(&be->be_refcnt.refcount));
  58. kref_put(&be->be_refcnt, destroy_extent);
  59. }
  60. }
  61. struct pnfs_block_extent *bl_alloc_extent(void)
  62. {
  63. struct pnfs_block_extent *be;
  64. be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS);
  65. if (!be)
  66. return NULL;
  67. INIT_LIST_HEAD(&be->be_node);
  68. kref_init(&be->be_refcnt);
  69. be->be_inval = NULL;
  70. return be;
  71. }
  72. static void print_elist(struct list_head *list)
  73. {
  74. struct pnfs_block_extent *be;
  75. dprintk("****************\n");
  76. dprintk("Extent list looks like:\n");
  77. list_for_each_entry(be, list, be_node) {
  78. print_bl_extent(be);
  79. }
  80. dprintk("****************\n");
  81. }
  82. static inline int
  83. extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
  84. {
  85. /* Note this assumes new->be_f_offset >= old->be_f_offset */
  86. return (new->be_state == old->be_state) &&
  87. ((new->be_state == PNFS_BLOCK_NONE_DATA) ||
  88. ((new->be_v_offset - old->be_v_offset ==
  89. new->be_f_offset - old->be_f_offset) &&
  90. new->be_mdev == old->be_mdev));
  91. }
  92. /* Adds new to appropriate list in bl, modifying new and removing existing
  93. * extents as appropriate to deal with overlaps.
  94. *
  95. * See bl_find_get_extent for list constraints.
  96. *
  97. * Refcount on new is already set. If end up not using it, or error out,
  98. * need to put the reference.
  99. *
  100. * bl->bl_ext_lock is held by caller.
  101. */
  102. int
  103. bl_add_merge_extent(struct pnfs_block_layout *bl,
  104. struct pnfs_block_extent *new)
  105. {
  106. struct pnfs_block_extent *be, *tmp;
  107. sector_t end = new->be_f_offset + new->be_length;
  108. struct list_head *list;
  109. dprintk("%s enter with be=%p\n", __func__, new);
  110. print_bl_extent(new);
  111. list = &bl->bl_extents[bl_choose_list(new->be_state)];
  112. print_elist(list);
  113. /* Scan for proper place to insert, extending new to the left
  114. * as much as possible.
  115. */
  116. list_for_each_entry_safe(be, tmp, list, be_node) {
  117. if (new->be_f_offset < be->be_f_offset)
  118. break;
  119. if (end <= be->be_f_offset + be->be_length) {
  120. /* new is a subset of existing be*/
  121. if (extents_consistent(be, new)) {
  122. dprintk("%s: new is subset, ignoring\n",
  123. __func__);
  124. bl_put_extent(new);
  125. return 0;
  126. } else
  127. goto out_err;
  128. } else if (new->be_f_offset <=
  129. be->be_f_offset + be->be_length) {
  130. /* new overlaps or abuts existing be */
  131. if (extents_consistent(be, new)) {
  132. /* extend new to fully replace be */
  133. new->be_length += new->be_f_offset -
  134. be->be_f_offset;
  135. new->be_f_offset = be->be_f_offset;
  136. new->be_v_offset = be->be_v_offset;
  137. dprintk("%s: removing %p\n", __func__, be);
  138. list_del(&be->be_node);
  139. bl_put_extent(be);
  140. } else if (new->be_f_offset !=
  141. be->be_f_offset + be->be_length)
  142. goto out_err;
  143. }
  144. }
  145. /* Note that if we never hit the above break, be will not point to a
  146. * valid extent. However, in that case &be->be_node==list.
  147. */
  148. list_add_tail(&new->be_node, &be->be_node);
  149. dprintk("%s: inserting new\n", __func__);
  150. print_elist(list);
  151. /* Scan forward for overlaps. If we find any, extend new and
  152. * remove the overlapped extent.
  153. */
  154. be = list_prepare_entry(new, list, be_node);
  155. list_for_each_entry_safe_continue(be, tmp, list, be_node) {
  156. if (end < be->be_f_offset)
  157. break;
  158. /* new overlaps or abuts existing be */
  159. if (extents_consistent(be, new)) {
  160. if (end < be->be_f_offset + be->be_length) {
  161. /* extend new to fully cover be */
  162. end = be->be_f_offset + be->be_length;
  163. new->be_length = end - new->be_f_offset;
  164. }
  165. dprintk("%s: removing %p\n", __func__, be);
  166. list_del(&be->be_node);
  167. bl_put_extent(be);
  168. } else if (end != be->be_f_offset) {
  169. list_del(&new->be_node);
  170. goto out_err;
  171. }
  172. }
  173. dprintk("%s: after merging\n", __func__);
  174. print_elist(list);
  175. /* FIXME - The per-list consistency checks have all been done,
  176. * should now check cross-list consistency.
  177. */
  178. return 0;
  179. out_err:
  180. bl_put_extent(new);
  181. return -EIO;
  182. }
  183. /* Returns extent, or NULL. If a second READ extent exists, it is returned
  184. * in cow_read, if given.
  185. *
  186. * The extents are kept in two seperate ordered lists, one for READ and NONE,
  187. * one for READWRITE and INVALID. Within each list, we assume:
  188. * 1. Extents are ordered by file offset.
  189. * 2. For any given isect, there is at most one extents that matches.
  190. */
  191. struct pnfs_block_extent *
  192. bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
  193. struct pnfs_block_extent **cow_read)
  194. {
  195. struct pnfs_block_extent *be, *cow, *ret;
  196. int i;
  197. dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
  198. cow = ret = NULL;
  199. spin_lock(&bl->bl_ext_lock);
  200. for (i = 0; i < EXTENT_LISTS; i++) {
  201. list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
  202. if (isect >= be->be_f_offset + be->be_length)
  203. break;
  204. if (isect >= be->be_f_offset) {
  205. /* We have found an extent */
  206. dprintk("%s Get %p (%i)\n", __func__, be,
  207. atomic_read(&be->be_refcnt.refcount));
  208. kref_get(&be->be_refcnt);
  209. if (!ret)
  210. ret = be;
  211. else if (be->be_state != PNFS_BLOCK_READ_DATA)
  212. bl_put_extent(be);
  213. else
  214. cow = be;
  215. break;
  216. }
  217. }
  218. if (ret &&
  219. (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
  220. break;
  221. }
  222. spin_unlock(&bl->bl_ext_lock);
  223. if (cow_read)
  224. *cow_read = cow;
  225. print_bl_extent(ret);
  226. return ret;
  227. }