xfs_btree.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_dir2.h"
  28. #include "xfs_dmapi.h"
  29. #include "xfs_mount.h"
  30. #include "xfs_bmap_btree.h"
  31. #include "xfs_alloc_btree.h"
  32. #include "xfs_ialloc_btree.h"
  33. #include "xfs_dir2_sf.h"
  34. #include "xfs_attr_sf.h"
  35. #include "xfs_dinode.h"
  36. #include "xfs_inode.h"
  37. #include "xfs_inode_item.h"
  38. #include "xfs_btree.h"
  39. #include "xfs_btree_trace.h"
  40. #include "xfs_ialloc.h"
  41. #include "xfs_error.h"
  42. /*
  43. * Cursor allocation zone.
  44. */
  45. kmem_zone_t *xfs_btree_cur_zone;
  46. /*
  47. * Btree magic numbers.
  48. */
  49. const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
  50. XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
  51. };
  52. /*
  53. * External routines.
  54. */
  55. #ifdef DEBUG
  56. /*
  57. * Debug routine: check that keys are in the right order.
  58. */
  59. void
  60. xfs_btree_check_key(
  61. xfs_btnum_t btnum, /* btree identifier */
  62. void *ak1, /* pointer to left (lower) key */
  63. void *ak2) /* pointer to right (higher) key */
  64. {
  65. switch (btnum) {
  66. case XFS_BTNUM_BNO: {
  67. xfs_alloc_key_t *k1;
  68. xfs_alloc_key_t *k2;
  69. k1 = ak1;
  70. k2 = ak2;
  71. ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
  72. break;
  73. }
  74. case XFS_BTNUM_CNT: {
  75. xfs_alloc_key_t *k1;
  76. xfs_alloc_key_t *k2;
  77. k1 = ak1;
  78. k2 = ak2;
  79. ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
  80. (k1->ar_blockcount == k2->ar_blockcount &&
  81. be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
  82. break;
  83. }
  84. case XFS_BTNUM_BMAP: {
  85. xfs_bmbt_key_t *k1;
  86. xfs_bmbt_key_t *k2;
  87. k1 = ak1;
  88. k2 = ak2;
  89. ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
  90. break;
  91. }
  92. case XFS_BTNUM_INO: {
  93. xfs_inobt_key_t *k1;
  94. xfs_inobt_key_t *k2;
  95. k1 = ak1;
  96. k2 = ak2;
  97. ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
  98. break;
  99. }
  100. default:
  101. ASSERT(0);
  102. }
  103. }
  104. /*
  105. * Debug routine: check that records are in the right order.
  106. */
  107. void
  108. xfs_btree_check_rec(
  109. xfs_btnum_t btnum, /* btree identifier */
  110. void *ar1, /* pointer to left (lower) record */
  111. void *ar2) /* pointer to right (higher) record */
  112. {
  113. switch (btnum) {
  114. case XFS_BTNUM_BNO: {
  115. xfs_alloc_rec_t *r1;
  116. xfs_alloc_rec_t *r2;
  117. r1 = ar1;
  118. r2 = ar2;
  119. ASSERT(be32_to_cpu(r1->ar_startblock) +
  120. be32_to_cpu(r1->ar_blockcount) <=
  121. be32_to_cpu(r2->ar_startblock));
  122. break;
  123. }
  124. case XFS_BTNUM_CNT: {
  125. xfs_alloc_rec_t *r1;
  126. xfs_alloc_rec_t *r2;
  127. r1 = ar1;
  128. r2 = ar2;
  129. ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
  130. (r1->ar_blockcount == r2->ar_blockcount &&
  131. be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
  132. break;
  133. }
  134. case XFS_BTNUM_BMAP: {
  135. xfs_bmbt_rec_t *r1;
  136. xfs_bmbt_rec_t *r2;
  137. r1 = ar1;
  138. r2 = ar2;
  139. ASSERT(xfs_bmbt_disk_get_startoff(r1) +
  140. xfs_bmbt_disk_get_blockcount(r1) <=
  141. xfs_bmbt_disk_get_startoff(r2));
  142. break;
  143. }
  144. case XFS_BTNUM_INO: {
  145. xfs_inobt_rec_t *r1;
  146. xfs_inobt_rec_t *r2;
  147. r1 = ar1;
  148. r2 = ar2;
  149. ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
  150. be32_to_cpu(r2->ir_startino));
  151. break;
  152. }
  153. default:
  154. ASSERT(0);
  155. }
  156. }
  157. #endif /* DEBUG */
  158. int /* error (0 or EFSCORRUPTED) */
  159. xfs_btree_check_lblock(
  160. struct xfs_btree_cur *cur, /* btree cursor */
  161. struct xfs_btree_lblock *block, /* btree long form block pointer */
  162. int level, /* level of the btree block */
  163. struct xfs_buf *bp) /* buffer for block, if any */
  164. {
  165. int lblock_ok; /* block passes checks */
  166. struct xfs_mount *mp; /* file system mount point */
  167. mp = cur->bc_mp;
  168. lblock_ok =
  169. be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
  170. be16_to_cpu(block->bb_level) == level &&
  171. be16_to_cpu(block->bb_numrecs) <=
  172. cur->bc_ops->get_maxrecs(cur, level) &&
  173. block->bb_leftsib &&
  174. (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
  175. XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
  176. block->bb_rightsib &&
  177. (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
  178. XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
  179. if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
  180. XFS_ERRTAG_BTREE_CHECK_LBLOCK,
  181. XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
  182. if (bp)
  183. xfs_buftrace("LBTREE ERROR", bp);
  184. XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
  185. mp);
  186. return XFS_ERROR(EFSCORRUPTED);
  187. }
  188. return 0;
  189. }
  190. int /* error (0 or EFSCORRUPTED) */
  191. xfs_btree_check_sblock(
  192. struct xfs_btree_cur *cur, /* btree cursor */
  193. struct xfs_btree_sblock *block, /* btree short form block pointer */
  194. int level, /* level of the btree block */
  195. struct xfs_buf *bp) /* buffer containing block */
  196. {
  197. struct xfs_buf *agbp; /* buffer for ag. freespace struct */
  198. struct xfs_agf *agf; /* ag. freespace structure */
  199. xfs_agblock_t agflen; /* native ag. freespace length */
  200. int sblock_ok; /* block passes checks */
  201. agbp = cur->bc_private.a.agbp;
  202. agf = XFS_BUF_TO_AGF(agbp);
  203. agflen = be32_to_cpu(agf->agf_length);
  204. sblock_ok =
  205. be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
  206. be16_to_cpu(block->bb_level) == level &&
  207. be16_to_cpu(block->bb_numrecs) <=
  208. cur->bc_ops->get_maxrecs(cur, level) &&
  209. (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
  210. be32_to_cpu(block->bb_leftsib) < agflen) &&
  211. block->bb_leftsib &&
  212. (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
  213. be32_to_cpu(block->bb_rightsib) < agflen) &&
  214. block->bb_rightsib;
  215. if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
  216. XFS_ERRTAG_BTREE_CHECK_SBLOCK,
  217. XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
  218. if (bp)
  219. xfs_buftrace("SBTREE ERROR", bp);
  220. XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
  221. cur->bc_mp);
  222. return XFS_ERROR(EFSCORRUPTED);
  223. }
  224. return 0;
  225. }
  226. /*
  227. * Debug routine: check that block header is ok.
  228. */
  229. int
  230. xfs_btree_check_block(
  231. struct xfs_btree_cur *cur, /* btree cursor */
  232. struct xfs_btree_block *block, /* generic btree block pointer */
  233. int level, /* level of the btree block */
  234. struct xfs_buf *bp) /* buffer containing block, if any */
  235. {
  236. if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
  237. return xfs_btree_check_lblock(cur,
  238. (struct xfs_btree_lblock *)block, level, bp);
  239. } else {
  240. return xfs_btree_check_sblock(cur,
  241. (struct xfs_btree_sblock *)block, level, bp);
  242. }
  243. }
  244. /*
  245. * Check that (long) pointer is ok.
  246. */
  247. int /* error (0 or EFSCORRUPTED) */
  248. xfs_btree_check_lptr(
  249. struct xfs_btree_cur *cur, /* btree cursor */
  250. xfs_dfsbno_t bno, /* btree block disk address */
  251. int level) /* btree block level */
  252. {
  253. XFS_WANT_CORRUPTED_RETURN(
  254. level > 0 &&
  255. bno != NULLDFSBNO &&
  256. XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
  257. return 0;
  258. }
  259. /*
  260. * Check that (short) pointer is ok.
  261. */
  262. int /* error (0 or EFSCORRUPTED) */
  263. xfs_btree_check_sptr(
  264. struct xfs_btree_cur *cur, /* btree cursor */
  265. xfs_agblock_t bno, /* btree block disk address */
  266. int level) /* btree block level */
  267. {
  268. xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
  269. XFS_WANT_CORRUPTED_RETURN(
  270. level > 0 &&
  271. bno != NULLAGBLOCK &&
  272. bno != 0 &&
  273. bno < agblocks);
  274. return 0;
  275. }
  276. /*
  277. * Check that block ptr is ok.
  278. */
  279. int /* error (0 or EFSCORRUPTED) */
  280. xfs_btree_check_ptr(
  281. struct xfs_btree_cur *cur, /* btree cursor */
  282. union xfs_btree_ptr *ptr, /* btree block disk address */
  283. int index, /* offset from ptr to check */
  284. int level) /* btree block level */
  285. {
  286. if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
  287. return xfs_btree_check_lptr(cur,
  288. be64_to_cpu((&ptr->l)[index]), level);
  289. } else {
  290. return xfs_btree_check_sptr(cur,
  291. be32_to_cpu((&ptr->s)[index]), level);
  292. }
  293. }
  294. /*
  295. * Delete the btree cursor.
  296. */
  297. void
  298. xfs_btree_del_cursor(
  299. xfs_btree_cur_t *cur, /* btree cursor */
  300. int error) /* del because of error */
  301. {
  302. int i; /* btree level */
  303. /*
  304. * Clear the buffer pointers, and release the buffers.
  305. * If we're doing this in the face of an error, we
  306. * need to make sure to inspect all of the entries
  307. * in the bc_bufs array for buffers to be unlocked.
  308. * This is because some of the btree code works from
  309. * level n down to 0, and if we get an error along
  310. * the way we won't have initialized all the entries
  311. * down to 0.
  312. */
  313. for (i = 0; i < cur->bc_nlevels; i++) {
  314. if (cur->bc_bufs[i])
  315. xfs_btree_setbuf(cur, i, NULL);
  316. else if (!error)
  317. break;
  318. }
  319. /*
  320. * Can't free a bmap cursor without having dealt with the
  321. * allocated indirect blocks' accounting.
  322. */
  323. ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
  324. cur->bc_private.b.allocated == 0);
  325. /*
  326. * Free the cursor.
  327. */
  328. kmem_zone_free(xfs_btree_cur_zone, cur);
  329. }
  330. /*
  331. * Duplicate the btree cursor.
  332. * Allocate a new one, copy the record, re-get the buffers.
  333. */
  334. int /* error */
  335. xfs_btree_dup_cursor(
  336. xfs_btree_cur_t *cur, /* input cursor */
  337. xfs_btree_cur_t **ncur) /* output cursor */
  338. {
  339. xfs_buf_t *bp; /* btree block's buffer pointer */
  340. int error; /* error return value */
  341. int i; /* level number of btree block */
  342. xfs_mount_t *mp; /* mount structure for filesystem */
  343. xfs_btree_cur_t *new; /* new cursor value */
  344. xfs_trans_t *tp; /* transaction pointer, can be NULL */
  345. tp = cur->bc_tp;
  346. mp = cur->bc_mp;
  347. /*
  348. * Allocate a new cursor like the old one.
  349. */
  350. new = cur->bc_ops->dup_cursor(cur);
  351. /*
  352. * Copy the record currently in the cursor.
  353. */
  354. new->bc_rec = cur->bc_rec;
  355. /*
  356. * For each level current, re-get the buffer and copy the ptr value.
  357. */
  358. for (i = 0; i < new->bc_nlevels; i++) {
  359. new->bc_ptrs[i] = cur->bc_ptrs[i];
  360. new->bc_ra[i] = cur->bc_ra[i];
  361. if ((bp = cur->bc_bufs[i])) {
  362. if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
  363. XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
  364. xfs_btree_del_cursor(new, error);
  365. *ncur = NULL;
  366. return error;
  367. }
  368. new->bc_bufs[i] = bp;
  369. ASSERT(bp);
  370. ASSERT(!XFS_BUF_GETERROR(bp));
  371. } else
  372. new->bc_bufs[i] = NULL;
  373. }
  374. *ncur = new;
  375. return 0;
  376. }
  377. /*
  378. * XFS btree block layout and addressing:
  379. *
  380. * There are two types of blocks in the btree: leaf and non-leaf blocks.
  381. *
  382. * The leaf record start with a header then followed by records containing
  383. * the values. A non-leaf block also starts with the same header, and
  384. * then first contains lookup keys followed by an equal number of pointers
  385. * to the btree blocks at the previous level.
  386. *
  387. * +--------+-------+-------+-------+-------+-------+-------+
  388. * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
  389. * +--------+-------+-------+-------+-------+-------+-------+
  390. *
  391. * +--------+-------+-------+-------+-------+-------+-------+
  392. * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
  393. * +--------+-------+-------+-------+-------+-------+-------+
  394. *
  395. * The header is called struct xfs_btree_block for reasons better left unknown
  396. * and comes in different versions for short (32bit) and long (64bit) block
  397. * pointers. The record and key structures are defined by the btree instances
  398. * and opaque to the btree core. The block pointers are simple disk endian
  399. * integers, available in a short (32bit) and long (64bit) variant.
  400. *
  401. * The helpers below calculate the offset of a given record, key or pointer
  402. * into a btree block (xfs_btree_*_offset) or return a pointer to the given
  403. * record, key or pointer (xfs_btree_*_addr). Note that all addressing
  404. * inside the btree block is done using indices starting at one, not zero!
  405. */
  406. /*
  407. * Return size of the btree block header for this btree instance.
  408. */
  409. static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
  410. {
  411. return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
  412. sizeof(struct xfs_btree_lblock) :
  413. sizeof(struct xfs_btree_sblock);
  414. }
  415. /*
  416. * Return size of btree block pointers for this btree instance.
  417. */
  418. static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
  419. {
  420. return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
  421. sizeof(__be64) : sizeof(__be32);
  422. }
  423. /*
  424. * Calculate offset of the n-th record in a btree block.
  425. */
  426. STATIC size_t
  427. xfs_btree_rec_offset(
  428. struct xfs_btree_cur *cur,
  429. int n)
  430. {
  431. return xfs_btree_block_len(cur) +
  432. (n - 1) * cur->bc_ops->rec_len;
  433. }
  434. /*
  435. * Calculate offset of the n-th key in a btree block.
  436. */
  437. STATIC size_t
  438. xfs_btree_key_offset(
  439. struct xfs_btree_cur *cur,
  440. int n)
  441. {
  442. return xfs_btree_block_len(cur) +
  443. (n - 1) * cur->bc_ops->key_len;
  444. }
  445. /*
  446. * Calculate offset of the n-th block pointer in a btree block.
  447. */
  448. STATIC size_t
  449. xfs_btree_ptr_offset(
  450. struct xfs_btree_cur *cur,
  451. int n,
  452. int level)
  453. {
  454. return xfs_btree_block_len(cur) +
  455. cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
  456. (n - 1) * xfs_btree_ptr_len(cur);
  457. }
  458. /*
  459. * Return a pointer to the n-th record in the btree block.
  460. */
  461. STATIC union xfs_btree_rec *
  462. xfs_btree_rec_addr(
  463. struct xfs_btree_cur *cur,
  464. int n,
  465. struct xfs_btree_block *block)
  466. {
  467. return (union xfs_btree_rec *)
  468. ((char *)block + xfs_btree_rec_offset(cur, n));
  469. }
  470. /*
  471. * Return a pointer to the n-th key in the btree block.
  472. */
  473. STATIC union xfs_btree_key *
  474. xfs_btree_key_addr(
  475. struct xfs_btree_cur *cur,
  476. int n,
  477. struct xfs_btree_block *block)
  478. {
  479. return (union xfs_btree_key *)
  480. ((char *)block + xfs_btree_key_offset(cur, n));
  481. }
  482. /*
  483. * Return a pointer to the n-th block pointer in the btree block.
  484. */
  485. STATIC union xfs_btree_ptr *
  486. xfs_btree_ptr_addr(
  487. struct xfs_btree_cur *cur,
  488. int n,
  489. struct xfs_btree_block *block)
  490. {
  491. int level = xfs_btree_get_level(block);
  492. ASSERT(block->bb_level != 0);
  493. return (union xfs_btree_ptr *)
  494. ((char *)block + xfs_btree_ptr_offset(cur, n, level));
  495. }
  496. /*
  497. * Get a the root block which is stored in the inode.
  498. *
  499. * For now this btree implementation assumes the btree root is always
  500. * stored in the if_broot field of an inode fork.
  501. */
  502. STATIC struct xfs_btree_block *
  503. xfs_btree_get_iroot(
  504. struct xfs_btree_cur *cur)
  505. {
  506. struct xfs_ifork *ifp;
  507. ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
  508. return (struct xfs_btree_block *)ifp->if_broot;
  509. }
  510. /*
  511. * Retrieve the block pointer from the cursor at the given level.
  512. * This may be an inode btree root or from a buffer.
  513. */
  514. STATIC struct xfs_btree_block * /* generic btree block pointer */
  515. xfs_btree_get_block(
  516. struct xfs_btree_cur *cur, /* btree cursor */
  517. int level, /* level in btree */
  518. struct xfs_buf **bpp) /* buffer containing the block */
  519. {
  520. if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
  521. (level == cur->bc_nlevels - 1)) {
  522. *bpp = NULL;
  523. return xfs_btree_get_iroot(cur);
  524. }
  525. *bpp = cur->bc_bufs[level];
  526. return XFS_BUF_TO_BLOCK(*bpp);
  527. }
  528. /*
  529. * Get a buffer for the block, return it with no data read.
  530. * Long-form addressing.
  531. */
  532. xfs_buf_t * /* buffer for fsbno */
  533. xfs_btree_get_bufl(
  534. xfs_mount_t *mp, /* file system mount point */
  535. xfs_trans_t *tp, /* transaction pointer */
  536. xfs_fsblock_t fsbno, /* file system block number */
  537. uint lock) /* lock flags for get_buf */
  538. {
  539. xfs_buf_t *bp; /* buffer pointer (return value) */
  540. xfs_daddr_t d; /* real disk block address */
  541. ASSERT(fsbno != NULLFSBLOCK);
  542. d = XFS_FSB_TO_DADDR(mp, fsbno);
  543. bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
  544. ASSERT(bp);
  545. ASSERT(!XFS_BUF_GETERROR(bp));
  546. return bp;
  547. }
  548. /*
  549. * Get a buffer for the block, return it with no data read.
  550. * Short-form addressing.
  551. */
  552. xfs_buf_t * /* buffer for agno/agbno */
  553. xfs_btree_get_bufs(
  554. xfs_mount_t *mp, /* file system mount point */
  555. xfs_trans_t *tp, /* transaction pointer */
  556. xfs_agnumber_t agno, /* allocation group number */
  557. xfs_agblock_t agbno, /* allocation group block number */
  558. uint lock) /* lock flags for get_buf */
  559. {
  560. xfs_buf_t *bp; /* buffer pointer (return value) */
  561. xfs_daddr_t d; /* real disk block address */
  562. ASSERT(agno != NULLAGNUMBER);
  563. ASSERT(agbno != NULLAGBLOCK);
  564. d = XFS_AGB_TO_DADDR(mp, agno, agbno);
  565. bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
  566. ASSERT(bp);
  567. ASSERT(!XFS_BUF_GETERROR(bp));
  568. return bp;
  569. }
  570. /*
  571. * Check for the cursor referring to the last block at the given level.
  572. */
  573. int /* 1=is last block, 0=not last block */
  574. xfs_btree_islastblock(
  575. xfs_btree_cur_t *cur, /* btree cursor */
  576. int level) /* level to check */
  577. {
  578. xfs_btree_block_t *block; /* generic btree block pointer */
  579. xfs_buf_t *bp; /* buffer containing block */
  580. block = xfs_btree_get_block(cur, level, &bp);
  581. xfs_btree_check_block(cur, block, level, bp);
  582. if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
  583. return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
  584. else
  585. return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
  586. }
  587. /*
  588. * Change the cursor to point to the first record at the given level.
  589. * Other levels are unaffected.
  590. */
  591. int /* success=1, failure=0 */
  592. xfs_btree_firstrec(
  593. xfs_btree_cur_t *cur, /* btree cursor */
  594. int level) /* level to change */
  595. {
  596. xfs_btree_block_t *block; /* generic btree block pointer */
  597. xfs_buf_t *bp; /* buffer containing block */
  598. /*
  599. * Get the block pointer for this level.
  600. */
  601. block = xfs_btree_get_block(cur, level, &bp);
  602. xfs_btree_check_block(cur, block, level, bp);
  603. /*
  604. * It's empty, there is no such record.
  605. */
  606. if (!block->bb_numrecs)
  607. return 0;
  608. /*
  609. * Set the ptr value to 1, that's the first record/key.
  610. */
  611. cur->bc_ptrs[level] = 1;
  612. return 1;
  613. }
  614. /*
  615. * Change the cursor to point to the last record in the current block
  616. * at the given level. Other levels are unaffected.
  617. */
  618. int /* success=1, failure=0 */
  619. xfs_btree_lastrec(
  620. xfs_btree_cur_t *cur, /* btree cursor */
  621. int level) /* level to change */
  622. {
  623. xfs_btree_block_t *block; /* generic btree block pointer */
  624. xfs_buf_t *bp; /* buffer containing block */
  625. /*
  626. * Get the block pointer for this level.
  627. */
  628. block = xfs_btree_get_block(cur, level, &bp);
  629. xfs_btree_check_block(cur, block, level, bp);
  630. /*
  631. * It's empty, there is no such record.
  632. */
  633. if (!block->bb_numrecs)
  634. return 0;
  635. /*
  636. * Set the ptr value to numrecs, that's the last record/key.
  637. */
  638. cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
  639. return 1;
  640. }
  641. /*
  642. * Compute first and last byte offsets for the fields given.
  643. * Interprets the offsets table, which contains struct field offsets.
  644. */
  645. void
  646. xfs_btree_offsets(
  647. __int64_t fields, /* bitmask of fields */
  648. const short *offsets, /* table of field offsets */
  649. int nbits, /* number of bits to inspect */
  650. int *first, /* output: first byte offset */
  651. int *last) /* output: last byte offset */
  652. {
  653. int i; /* current bit number */
  654. __int64_t imask; /* mask for current bit number */
  655. ASSERT(fields != 0);
  656. /*
  657. * Find the lowest bit, so the first byte offset.
  658. */
  659. for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
  660. if (imask & fields) {
  661. *first = offsets[i];
  662. break;
  663. }
  664. }
  665. /*
  666. * Find the highest bit, so the last byte offset.
  667. */
  668. for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
  669. if (imask & fields) {
  670. *last = offsets[i + 1] - 1;
  671. break;
  672. }
  673. }
  674. }
  675. /*
  676. * Get a buffer for the block, return it read in.
  677. * Long-form addressing.
  678. */
  679. int /* error */
  680. xfs_btree_read_bufl(
  681. xfs_mount_t *mp, /* file system mount point */
  682. xfs_trans_t *tp, /* transaction pointer */
  683. xfs_fsblock_t fsbno, /* file system block number */
  684. uint lock, /* lock flags for read_buf */
  685. xfs_buf_t **bpp, /* buffer for fsbno */
  686. int refval) /* ref count value for buffer */
  687. {
  688. xfs_buf_t *bp; /* return value */
  689. xfs_daddr_t d; /* real disk block address */
  690. int error;
  691. ASSERT(fsbno != NULLFSBLOCK);
  692. d = XFS_FSB_TO_DADDR(mp, fsbno);
  693. if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
  694. mp->m_bsize, lock, &bp))) {
  695. return error;
  696. }
  697. ASSERT(!bp || !XFS_BUF_GETERROR(bp));
  698. if (bp != NULL) {
  699. XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
  700. }
  701. *bpp = bp;
  702. return 0;
  703. }
  704. /*
  705. * Get a buffer for the block, return it read in.
  706. * Short-form addressing.
  707. */
  708. int /* error */
  709. xfs_btree_read_bufs(
  710. xfs_mount_t *mp, /* file system mount point */
  711. xfs_trans_t *tp, /* transaction pointer */
  712. xfs_agnumber_t agno, /* allocation group number */
  713. xfs_agblock_t agbno, /* allocation group block number */
  714. uint lock, /* lock flags for read_buf */
  715. xfs_buf_t **bpp, /* buffer for agno/agbno */
  716. int refval) /* ref count value for buffer */
  717. {
  718. xfs_buf_t *bp; /* return value */
  719. xfs_daddr_t d; /* real disk block address */
  720. int error;
  721. ASSERT(agno != NULLAGNUMBER);
  722. ASSERT(agbno != NULLAGBLOCK);
  723. d = XFS_AGB_TO_DADDR(mp, agno, agbno);
  724. if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
  725. mp->m_bsize, lock, &bp))) {
  726. return error;
  727. }
  728. ASSERT(!bp || !XFS_BUF_GETERROR(bp));
  729. if (bp != NULL) {
  730. switch (refval) {
  731. case XFS_ALLOC_BTREE_REF:
  732. XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
  733. break;
  734. case XFS_INO_BTREE_REF:
  735. XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
  736. break;
  737. }
  738. }
  739. *bpp = bp;
  740. return 0;
  741. }
  742. /*
  743. * Read-ahead the block, don't wait for it, don't return a buffer.
  744. * Long-form addressing.
  745. */
  746. /* ARGSUSED */
  747. void
  748. xfs_btree_reada_bufl(
  749. xfs_mount_t *mp, /* file system mount point */
  750. xfs_fsblock_t fsbno, /* file system block number */
  751. xfs_extlen_t count) /* count of filesystem blocks */
  752. {
  753. xfs_daddr_t d;
  754. ASSERT(fsbno != NULLFSBLOCK);
  755. d = XFS_FSB_TO_DADDR(mp, fsbno);
  756. xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
  757. }
  758. /*
  759. * Read-ahead the block, don't wait for it, don't return a buffer.
  760. * Short-form addressing.
  761. */
  762. /* ARGSUSED */
  763. void
  764. xfs_btree_reada_bufs(
  765. xfs_mount_t *mp, /* file system mount point */
  766. xfs_agnumber_t agno, /* allocation group number */
  767. xfs_agblock_t agbno, /* allocation group block number */
  768. xfs_extlen_t count) /* count of filesystem blocks */
  769. {
  770. xfs_daddr_t d;
  771. ASSERT(agno != NULLAGNUMBER);
  772. ASSERT(agbno != NULLAGBLOCK);
  773. d = XFS_AGB_TO_DADDR(mp, agno, agbno);
  774. xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
  775. }
  776. STATIC int
  777. xfs_btree_readahead_lblock(
  778. struct xfs_btree_cur *cur,
  779. int lr,
  780. struct xfs_btree_block *block)
  781. {
  782. int rval = 0;
  783. xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
  784. xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
  785. if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
  786. xfs_btree_reada_bufl(cur->bc_mp, left, 1);
  787. rval++;
  788. }
  789. if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
  790. xfs_btree_reada_bufl(cur->bc_mp, right, 1);
  791. rval++;
  792. }
  793. return rval;
  794. }
  795. STATIC int
  796. xfs_btree_readahead_sblock(
  797. struct xfs_btree_cur *cur,
  798. int lr,
  799. struct xfs_btree_block *block)
  800. {
  801. int rval = 0;
  802. xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
  803. xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
  804. if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
  805. xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
  806. left, 1);
  807. rval++;
  808. }
  809. if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
  810. xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
  811. right, 1);
  812. rval++;
  813. }
  814. return rval;
  815. }
  816. /*
  817. * Read-ahead btree blocks, at the given level.
  818. * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
  819. */
  820. int
  821. xfs_btree_readahead(
  822. struct xfs_btree_cur *cur, /* btree cursor */
  823. int lev, /* level in btree */
  824. int lr) /* left/right bits */
  825. {
  826. struct xfs_btree_block *block;
  827. /*
  828. * No readahead needed if we are at the root level and the
  829. * btree root is stored in the inode.
  830. */
  831. if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
  832. (lev == cur->bc_nlevels - 1))
  833. return 0;
  834. if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
  835. return 0;
  836. cur->bc_ra[lev] |= lr;
  837. block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
  838. if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
  839. return xfs_btree_readahead_lblock(cur, lr, block);
  840. return xfs_btree_readahead_sblock(cur, lr, block);
  841. }
  842. /*
  843. * Set the buffer for level "lev" in the cursor to bp, releasing
  844. * any previous buffer.
  845. */
  846. void
  847. xfs_btree_setbuf(
  848. xfs_btree_cur_t *cur, /* btree cursor */
  849. int lev, /* level in btree */
  850. xfs_buf_t *bp) /* new buffer to set */
  851. {
  852. xfs_btree_block_t *b; /* btree block */
  853. xfs_buf_t *obp; /* old buffer pointer */
  854. obp = cur->bc_bufs[lev];
  855. if (obp)
  856. xfs_trans_brelse(cur->bc_tp, obp);
  857. cur->bc_bufs[lev] = bp;
  858. cur->bc_ra[lev] = 0;
  859. if (!bp)
  860. return;
  861. b = XFS_BUF_TO_BLOCK(bp);
  862. if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
  863. if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
  864. cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
  865. if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
  866. cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
  867. } else {
  868. if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
  869. cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
  870. if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
  871. cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
  872. }
  873. }
  874. STATIC int
  875. xfs_btree_ptr_is_null(
  876. struct xfs_btree_cur *cur,
  877. union xfs_btree_ptr *ptr)
  878. {
  879. if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
  880. return be64_to_cpu(ptr->l) == NULLFSBLOCK;
  881. else
  882. return be32_to_cpu(ptr->s) == NULLAGBLOCK;
  883. }
  884. /*
  885. * Get/set/init sibling pointers
  886. */
  887. STATIC void
  888. xfs_btree_get_sibling(
  889. struct xfs_btree_cur *cur,
  890. struct xfs_btree_block *block,
  891. union xfs_btree_ptr *ptr,
  892. int lr)
  893. {
  894. ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
  895. if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
  896. if (lr == XFS_BB_RIGHTSIB)
  897. ptr->l = block->bb_u.l.bb_rightsib;
  898. else
  899. ptr->l = block->bb_u.l.bb_leftsib;
  900. } else {
  901. if (lr == XFS_BB_RIGHTSIB)
  902. ptr->s = block->bb_u.s.bb_rightsib;
  903. else
  904. ptr->s = block->bb_u.s.bb_leftsib;
  905. }
  906. }
  907. STATIC xfs_daddr_t
  908. xfs_btree_ptr_to_daddr(
  909. struct xfs_btree_cur *cur,
  910. union xfs_btree_ptr *ptr)
  911. {
  912. if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
  913. ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
  914. return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
  915. } else {
  916. ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
  917. ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
  918. return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
  919. be32_to_cpu(ptr->s));
  920. }
  921. }
  922. STATIC void
  923. xfs_btree_set_refs(
  924. struct xfs_btree_cur *cur,
  925. struct xfs_buf *bp)
  926. {
  927. switch (cur->bc_btnum) {
  928. case XFS_BTNUM_BNO:
  929. case XFS_BTNUM_CNT:
  930. XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
  931. break;
  932. case XFS_BTNUM_INO:
  933. XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
  934. break;
  935. case XFS_BTNUM_BMAP:
  936. XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
  937. break;
  938. default:
  939. ASSERT(0);
  940. }
  941. }
  942. /*
  943. * Read in the buffer at the given ptr and return the buffer and
  944. * the block pointer within the buffer.
  945. */
  946. STATIC int
  947. xfs_btree_read_buf_block(
  948. struct xfs_btree_cur *cur,
  949. union xfs_btree_ptr *ptr,
  950. int level,
  951. int flags,
  952. struct xfs_btree_block **block,
  953. struct xfs_buf **bpp)
  954. {
  955. struct xfs_mount *mp = cur->bc_mp;
  956. xfs_daddr_t d;
  957. int error;
  958. /* need to sort out how callers deal with failures first */
  959. ASSERT(!(flags & XFS_BUF_TRYLOCK));
  960. d = xfs_btree_ptr_to_daddr(cur, ptr);
  961. error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
  962. mp->m_bsize, flags, bpp);
  963. if (error)
  964. return error;
  965. ASSERT(*bpp != NULL);
  966. ASSERT(!XFS_BUF_GETERROR(*bpp));
  967. xfs_btree_set_refs(cur, *bpp);
  968. *block = XFS_BUF_TO_BLOCK(*bpp);
  969. error = xfs_btree_check_block(cur, *block, level, *bpp);
  970. if (error)
  971. xfs_trans_brelse(cur->bc_tp, *bpp);
  972. return error;
  973. }
  974. /*
  975. * Copy keys from one btree block to another.
  976. */
  977. STATIC void
  978. xfs_btree_copy_keys(
  979. struct xfs_btree_cur *cur,
  980. union xfs_btree_key *dst_key,
  981. union xfs_btree_key *src_key,
  982. int numkeys)
  983. {
  984. ASSERT(numkeys >= 0);
  985. memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
  986. }
  987. /*
  988. * Log key values from the btree block.
  989. */
  990. STATIC void
  991. xfs_btree_log_keys(
  992. struct xfs_btree_cur *cur,
  993. struct xfs_buf *bp,
  994. int first,
  995. int last)
  996. {
  997. XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  998. XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
  999. if (bp) {
  1000. xfs_trans_log_buf(cur->bc_tp, bp,
  1001. xfs_btree_key_offset(cur, first),
  1002. xfs_btree_key_offset(cur, last + 1) - 1);
  1003. } else {
  1004. xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
  1005. xfs_ilog_fbroot(cur->bc_private.b.whichfork));
  1006. }
  1007. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1008. }
  1009. /*
  1010. * Increment cursor by one record at the level.
  1011. * For nonzero levels the leaf-ward information is untouched.
  1012. */
  1013. int /* error */
  1014. xfs_btree_increment(
  1015. struct xfs_btree_cur *cur,
  1016. int level,
  1017. int *stat) /* success/failure */
  1018. {
  1019. struct xfs_btree_block *block;
  1020. union xfs_btree_ptr ptr;
  1021. struct xfs_buf *bp;
  1022. int error; /* error return value */
  1023. int lev;
  1024. XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  1025. XFS_BTREE_TRACE_ARGI(cur, level);
  1026. ASSERT(level < cur->bc_nlevels);
  1027. /* Read-ahead to the right at this level. */
  1028. xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
  1029. /* Get a pointer to the btree block. */
  1030. block = xfs_btree_get_block(cur, level, &bp);
  1031. #ifdef DEBUG
  1032. error = xfs_btree_check_block(cur, block, level, bp);
  1033. if (error)
  1034. goto error0;
  1035. #endif
  1036. /* We're done if we remain in the block after the increment. */
  1037. if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
  1038. goto out1;
  1039. /* Fail if we just went off the right edge of the tree. */
  1040. xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
  1041. if (xfs_btree_ptr_is_null(cur, &ptr))
  1042. goto out0;
  1043. XFS_BTREE_STATS_INC(cur, increment);
  1044. /*
  1045. * March up the tree incrementing pointers.
  1046. * Stop when we don't go off the right edge of a block.
  1047. */
  1048. for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
  1049. block = xfs_btree_get_block(cur, lev, &bp);
  1050. #ifdef DEBUG
  1051. error = xfs_btree_check_block(cur, block, lev, bp);
  1052. if (error)
  1053. goto error0;
  1054. #endif
  1055. if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
  1056. break;
  1057. /* Read-ahead the right block for the next loop. */
  1058. xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
  1059. }
  1060. /*
  1061. * If we went off the root then we are either seriously
  1062. * confused or have the tree root in an inode.
  1063. */
  1064. if (lev == cur->bc_nlevels) {
  1065. if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
  1066. goto out0;
  1067. ASSERT(0);
  1068. error = EFSCORRUPTED;
  1069. goto error0;
  1070. }
  1071. ASSERT(lev < cur->bc_nlevels);
  1072. /*
  1073. * Now walk back down the tree, fixing up the cursor's buffer
  1074. * pointers and key numbers.
  1075. */
  1076. for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
  1077. union xfs_btree_ptr *ptrp;
  1078. ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
  1079. error = xfs_btree_read_buf_block(cur, ptrp, --lev,
  1080. 0, &block, &bp);
  1081. if (error)
  1082. goto error0;
  1083. xfs_btree_setbuf(cur, lev, bp);
  1084. cur->bc_ptrs[lev] = 1;
  1085. }
  1086. out1:
  1087. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1088. *stat = 1;
  1089. return 0;
  1090. out0:
  1091. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1092. *stat = 0;
  1093. return 0;
  1094. error0:
  1095. XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
  1096. return error;
  1097. }
  1098. /*
  1099. * Decrement cursor by one record at the level.
  1100. * For nonzero levels the leaf-ward information is untouched.
  1101. */
  1102. int /* error */
  1103. xfs_btree_decrement(
  1104. struct xfs_btree_cur *cur,
  1105. int level,
  1106. int *stat) /* success/failure */
  1107. {
  1108. struct xfs_btree_block *block;
  1109. xfs_buf_t *bp;
  1110. int error; /* error return value */
  1111. int lev;
  1112. union xfs_btree_ptr ptr;
  1113. XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  1114. XFS_BTREE_TRACE_ARGI(cur, level);
  1115. ASSERT(level < cur->bc_nlevels);
  1116. /* Read-ahead to the left at this level. */
  1117. xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
  1118. /* We're done if we remain in the block after the decrement. */
  1119. if (--cur->bc_ptrs[level] > 0)
  1120. goto out1;
  1121. /* Get a pointer to the btree block. */
  1122. block = xfs_btree_get_block(cur, level, &bp);
  1123. #ifdef DEBUG
  1124. error = xfs_btree_check_block(cur, block, level, bp);
  1125. if (error)
  1126. goto error0;
  1127. #endif
  1128. /* Fail if we just went off the left edge of the tree. */
  1129. xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
  1130. if (xfs_btree_ptr_is_null(cur, &ptr))
  1131. goto out0;
  1132. XFS_BTREE_STATS_INC(cur, decrement);
  1133. /*
  1134. * March up the tree decrementing pointers.
  1135. * Stop when we don't go off the left edge of a block.
  1136. */
  1137. for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
  1138. if (--cur->bc_ptrs[lev] > 0)
  1139. break;
  1140. /* Read-ahead the left block for the next loop. */
  1141. xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
  1142. }
  1143. /*
  1144. * If we went off the root then we are seriously confused.
  1145. * or the root of the tree is in an inode.
  1146. */
  1147. if (lev == cur->bc_nlevels) {
  1148. if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
  1149. goto out0;
  1150. ASSERT(0);
  1151. error = EFSCORRUPTED;
  1152. goto error0;
  1153. }
  1154. ASSERT(lev < cur->bc_nlevels);
  1155. /*
  1156. * Now walk back down the tree, fixing up the cursor's buffer
  1157. * pointers and key numbers.
  1158. */
  1159. for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
  1160. union xfs_btree_ptr *ptrp;
  1161. ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
  1162. error = xfs_btree_read_buf_block(cur, ptrp, --lev,
  1163. 0, &block, &bp);
  1164. if (error)
  1165. goto error0;
  1166. xfs_btree_setbuf(cur, lev, bp);
  1167. cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
  1168. }
  1169. out1:
  1170. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1171. *stat = 1;
  1172. return 0;
  1173. out0:
  1174. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1175. *stat = 0;
  1176. return 0;
  1177. error0:
  1178. XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
  1179. return error;
  1180. }
  1181. STATIC int
  1182. xfs_btree_lookup_get_block(
  1183. struct xfs_btree_cur *cur, /* btree cursor */
  1184. int level, /* level in the btree */
  1185. union xfs_btree_ptr *pp, /* ptr to btree block */
  1186. struct xfs_btree_block **blkp) /* return btree block */
  1187. {
  1188. struct xfs_buf *bp; /* buffer pointer for btree block */
  1189. int error = 0;
  1190. /* special case the root block if in an inode */
  1191. if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
  1192. (level == cur->bc_nlevels - 1)) {
  1193. *blkp = xfs_btree_get_iroot(cur);
  1194. return 0;
  1195. }
  1196. /*
  1197. * If the old buffer at this level for the disk address we are
  1198. * looking for re-use it.
  1199. *
  1200. * Otherwise throw it away and get a new one.
  1201. */
  1202. bp = cur->bc_bufs[level];
  1203. if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
  1204. *blkp = XFS_BUF_TO_BLOCK(bp);
  1205. return 0;
  1206. }
  1207. error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
  1208. if (error)
  1209. return error;
  1210. xfs_btree_setbuf(cur, level, bp);
  1211. return 0;
  1212. }
  1213. /*
  1214. * Get current search key. For level 0 we don't actually have a key
  1215. * structure so we make one up from the record. For all other levels
  1216. * we just return the right key.
  1217. */
  1218. STATIC union xfs_btree_key *
  1219. xfs_lookup_get_search_key(
  1220. struct xfs_btree_cur *cur,
  1221. int level,
  1222. int keyno,
  1223. struct xfs_btree_block *block,
  1224. union xfs_btree_key *kp)
  1225. {
  1226. if (level == 0) {
  1227. cur->bc_ops->init_key_from_rec(kp,
  1228. xfs_btree_rec_addr(cur, keyno, block));
  1229. return kp;
  1230. }
  1231. return xfs_btree_key_addr(cur, keyno, block);
  1232. }
  1233. /*
  1234. * Lookup the record. The cursor is made to point to it, based on dir.
  1235. * Return 0 if can't find any such record, 1 for success.
  1236. */
  1237. int /* error */
  1238. xfs_btree_lookup(
  1239. struct xfs_btree_cur *cur, /* btree cursor */
  1240. xfs_lookup_t dir, /* <=, ==, or >= */
  1241. int *stat) /* success/failure */
  1242. {
  1243. struct xfs_btree_block *block; /* current btree block */
  1244. __int64_t diff; /* difference for the current key */
  1245. int error; /* error return value */
  1246. int keyno; /* current key number */
  1247. int level; /* level in the btree */
  1248. union xfs_btree_ptr *pp; /* ptr to btree block */
  1249. union xfs_btree_ptr ptr; /* ptr to btree block */
  1250. XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  1251. XFS_BTREE_TRACE_ARGI(cur, dir);
  1252. XFS_BTREE_STATS_INC(cur, lookup);
  1253. block = NULL;
  1254. keyno = 0;
  1255. /* initialise start pointer from cursor */
  1256. cur->bc_ops->init_ptr_from_cur(cur, &ptr);
  1257. pp = &ptr;
  1258. /*
  1259. * Iterate over each level in the btree, starting at the root.
  1260. * For each level above the leaves, find the key we need, based
  1261. * on the lookup record, then follow the corresponding block
  1262. * pointer down to the next level.
  1263. */
  1264. for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
  1265. /* Get the block we need to do the lookup on. */
  1266. error = xfs_btree_lookup_get_block(cur, level, pp, &block);
  1267. if (error)
  1268. goto error0;
  1269. if (diff == 0) {
  1270. /*
  1271. * If we already had a key match at a higher level, we
  1272. * know we need to use the first entry in this block.
  1273. */
  1274. keyno = 1;
  1275. } else {
  1276. /* Otherwise search this block. Do a binary search. */
  1277. int high; /* high entry number */
  1278. int low; /* low entry number */
  1279. /* Set low and high entry numbers, 1-based. */
  1280. low = 1;
  1281. high = xfs_btree_get_numrecs(block);
  1282. if (!high) {
  1283. /* Block is empty, must be an empty leaf. */
  1284. ASSERT(level == 0 && cur->bc_nlevels == 1);
  1285. cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
  1286. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1287. *stat = 0;
  1288. return 0;
  1289. }
  1290. /* Binary search the block. */
  1291. while (low <= high) {
  1292. union xfs_btree_key key;
  1293. union xfs_btree_key *kp;
  1294. XFS_BTREE_STATS_INC(cur, compare);
  1295. /* keyno is average of low and high. */
  1296. keyno = (low + high) >> 1;
  1297. /* Get current search key */
  1298. kp = xfs_lookup_get_search_key(cur, level,
  1299. keyno, block, &key);
  1300. /*
  1301. * Compute difference to get next direction:
  1302. * - less than, move right
  1303. * - greater than, move left
  1304. * - equal, we're done
  1305. */
  1306. diff = cur->bc_ops->key_diff(cur, kp);
  1307. if (diff < 0)
  1308. low = keyno + 1;
  1309. else if (diff > 0)
  1310. high = keyno - 1;
  1311. else
  1312. break;
  1313. }
  1314. }
  1315. /*
  1316. * If there are more levels, set up for the next level
  1317. * by getting the block number and filling in the cursor.
  1318. */
  1319. if (level > 0) {
  1320. /*
  1321. * If we moved left, need the previous key number,
  1322. * unless there isn't one.
  1323. */
  1324. if (diff > 0 && --keyno < 1)
  1325. keyno = 1;
  1326. pp = xfs_btree_ptr_addr(cur, keyno, block);
  1327. #ifdef DEBUG
  1328. error = xfs_btree_check_ptr(cur, pp, 0, level);
  1329. if (error)
  1330. goto error0;
  1331. #endif
  1332. cur->bc_ptrs[level] = keyno;
  1333. }
  1334. }
  1335. /* Done with the search. See if we need to adjust the results. */
  1336. if (dir != XFS_LOOKUP_LE && diff < 0) {
  1337. keyno++;
  1338. /*
  1339. * If ge search and we went off the end of the block, but it's
  1340. * not the last block, we're in the wrong block.
  1341. */
  1342. xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
  1343. if (dir == XFS_LOOKUP_GE &&
  1344. keyno > xfs_btree_get_numrecs(block) &&
  1345. !xfs_btree_ptr_is_null(cur, &ptr)) {
  1346. int i;
  1347. cur->bc_ptrs[0] = keyno;
  1348. error = xfs_btree_increment(cur, 0, &i);
  1349. if (error)
  1350. goto error0;
  1351. XFS_WANT_CORRUPTED_RETURN(i == 1);
  1352. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1353. *stat = 1;
  1354. return 0;
  1355. }
  1356. } else if (dir == XFS_LOOKUP_LE && diff > 0)
  1357. keyno--;
  1358. cur->bc_ptrs[0] = keyno;
  1359. /* Return if we succeeded or not. */
  1360. if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
  1361. *stat = 0;
  1362. else if (dir != XFS_LOOKUP_EQ || diff == 0)
  1363. *stat = 1;
  1364. else
  1365. *stat = 0;
  1366. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1367. return 0;
  1368. error0:
  1369. XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
  1370. return error;
  1371. }
  1372. /*
  1373. * Update keys at all levels from here to the root along the cursor's path.
  1374. */
  1375. int
  1376. xfs_btree_updkey(
  1377. struct xfs_btree_cur *cur,
  1378. union xfs_btree_key *keyp,
  1379. int level)
  1380. {
  1381. struct xfs_btree_block *block;
  1382. struct xfs_buf *bp;
  1383. union xfs_btree_key *kp;
  1384. int ptr;
  1385. XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  1386. XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
  1387. ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
  1388. /*
  1389. * Go up the tree from this level toward the root.
  1390. * At each level, update the key value to the value input.
  1391. * Stop when we reach a level where the cursor isn't pointing
  1392. * at the first entry in the block.
  1393. */
  1394. for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
  1395. #ifdef DEBUG
  1396. int error;
  1397. #endif
  1398. block = xfs_btree_get_block(cur, level, &bp);
  1399. #ifdef DEBUG
  1400. error = xfs_btree_check_block(cur, block, level, bp);
  1401. if (error) {
  1402. XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
  1403. return error;
  1404. }
  1405. #endif
  1406. ptr = cur->bc_ptrs[level];
  1407. kp = xfs_btree_key_addr(cur, ptr, block);
  1408. xfs_btree_copy_keys(cur, kp, keyp, 1);
  1409. xfs_btree_log_keys(cur, bp, ptr, ptr);
  1410. }
  1411. XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
  1412. return 0;
  1413. }