replay.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. /*
  2. * This file is part of UBIFS.
  3. *
  4. * Copyright (C) 2006-2008 Nokia Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published by
  8. * the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along with
  16. * this program; if not, write to the Free Software Foundation, Inc., 51
  17. * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. *
  19. * Authors: Adrian Hunter
  20. * Artem Bityutskiy (Битюцкий Артём)
  21. */
  22. /*
  23. * This file contains journal replay code. It runs when the file-system is being
  24. * mounted and requires no locking.
  25. *
  26. * The larger is the journal, the longer it takes to scan it, so the longer it
  27. * takes to mount UBIFS. This is why the journal has limited size which may be
  28. * changed depending on the system requirements. But a larger journal gives
  29. * faster I/O speed because it writes the index less frequently. So this is a
  30. * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
  31. * larger is the journal, the more memory its index may consume.
  32. */
  33. #include "ubifs.h"
  34. /*
  35. * Replay flags.
  36. *
  37. * REPLAY_DELETION: node was deleted
  38. * REPLAY_REF: node is a reference node
  39. */
  40. enum {
  41. REPLAY_DELETION = 1,
  42. REPLAY_REF = 2,
  43. };
  44. /**
  45. * struct replay_entry - replay tree entry.
  46. * @lnum: logical eraseblock number of the node
  47. * @offs: node offset
  48. * @len: node length
  49. * @sqnum: node sequence number
  50. * @flags: replay flags
  51. * @rb: links the replay tree
  52. * @key: node key
  53. * @nm: directory entry name
  54. * @old_size: truncation old size
  55. * @new_size: truncation new size
  56. * @free: amount of free space in a bud
  57. * @dirty: amount of dirty space in a bud from padding and deletion nodes
  58. * @jhead: journal head number of the bud
  59. *
  60. * UBIFS journal replay must compare node sequence numbers, which means it must
  61. * build a tree of node information to insert into the TNC.
  62. */
  63. struct replay_entry {
  64. int lnum;
  65. int offs;
  66. int len;
  67. unsigned long long sqnum;
  68. int flags;
  69. struct rb_node rb;
  70. union ubifs_key key;
  71. union {
  72. struct qstr nm;
  73. struct {
  74. loff_t old_size;
  75. loff_t new_size;
  76. };
  77. struct {
  78. int free;
  79. int dirty;
  80. int jhead;
  81. };
  82. };
  83. };
  84. /**
  85. * struct bud_entry - entry in the list of buds to replay.
  86. * @list: next bud in the list
  87. * @bud: bud description object
  88. * @sqnum: reference node sequence number
  89. * @free: free bytes in the bud
  90. * @dirty: dirty bytes in the bud
  91. */
  92. struct bud_entry {
  93. struct list_head list;
  94. struct ubifs_bud *bud;
  95. unsigned long long sqnum;
  96. int free;
  97. int dirty;
  98. };
  99. /**
  100. * set_bud_lprops - set free and dirty space used by a bud.
  101. * @c: UBIFS file-system description object
  102. * @r: replay entry of bud
  103. */
  104. static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
  105. {
  106. const struct ubifs_lprops *lp;
  107. int err = 0, dirty;
  108. ubifs_get_lprops(c);
  109. lp = ubifs_lpt_lookup_dirty(c, r->lnum);
  110. if (IS_ERR(lp)) {
  111. err = PTR_ERR(lp);
  112. goto out;
  113. }
  114. dirty = lp->dirty;
  115. if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
  116. /*
  117. * The LEB was added to the journal with a starting offset of
  118. * zero which means the LEB must have been empty. The LEB
  119. * property values should be lp->free == c->leb_size and
  120. * lp->dirty == 0, but that is not the case. The reason is that
  121. * the LEB had been garbage collected before it became the bud,
  122. * and there was not commit inbetween. The garbage collector
  123. * resets the free and dirty space without recording it
  124. * anywhere except lprops, so if there was no commit then
  125. * lprops does not have that information.
  126. *
  127. * We do not need to adjust free space because the scan has told
  128. * us the exact value which is recorded in the replay entry as
  129. * r->free.
  130. *
  131. * However we do need to subtract from the dirty space the
  132. * amount of space that the garbage collector reclaimed, which
  133. * is the whole LEB minus the amount of space that was free.
  134. */
  135. dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
  136. lp->free, lp->dirty);
  137. dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
  138. lp->free, lp->dirty);
  139. dirty -= c->leb_size - lp->free;
  140. /*
  141. * If the replay order was perfect the dirty space would now be
  142. * zero. The order is not perfect because the journal heads
  143. * race with each other. This is not a problem but is does mean
  144. * that the dirty space may temporarily exceed c->leb_size
  145. * during the replay.
  146. */
  147. if (dirty != 0)
  148. dbg_msg("LEB %d lp: %d free %d dirty "
  149. "replay: %d free %d dirty", r->lnum, lp->free,
  150. lp->dirty, r->free, r->dirty);
  151. }
  152. lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
  153. lp->flags | LPROPS_TAKEN, 0);
  154. if (IS_ERR(lp)) {
  155. err = PTR_ERR(lp);
  156. goto out;
  157. }
  158. /* Make sure the journal head points to the latest bud */
  159. err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
  160. c->leb_size - r->free, UBI_SHORTTERM);
  161. out:
  162. ubifs_release_lprops(c);
  163. return err;
  164. }
  165. /**
  166. * trun_remove_range - apply a replay entry for a truncation to the TNC.
  167. * @c: UBIFS file-system description object
  168. * @r: replay entry of truncation
  169. */
  170. static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
  171. {
  172. unsigned min_blk, max_blk;
  173. union ubifs_key min_key, max_key;
  174. ino_t ino;
  175. min_blk = r->new_size / UBIFS_BLOCK_SIZE;
  176. if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
  177. min_blk += 1;
  178. max_blk = r->old_size / UBIFS_BLOCK_SIZE;
  179. if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
  180. max_blk -= 1;
  181. ino = key_inum(c, &r->key);
  182. data_key_init(c, &min_key, ino, min_blk);
  183. data_key_init(c, &max_key, ino, max_blk);
  184. return ubifs_tnc_remove_range(c, &min_key, &max_key);
  185. }
  186. /**
  187. * apply_replay_entry - apply a replay entry to the TNC.
  188. * @c: UBIFS file-system description object
  189. * @r: replay entry to apply
  190. *
  191. * Apply a replay entry to the TNC.
  192. */
  193. static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
  194. {
  195. int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
  196. dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
  197. r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
  198. /* Set c->replay_sqnum to help deal with dangling branches. */
  199. c->replay_sqnum = r->sqnum;
  200. if (r->flags & REPLAY_REF)
  201. err = set_bud_lprops(c, r);
  202. else if (is_hash_key(c, &r->key)) {
  203. if (deletion)
  204. err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
  205. else
  206. err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
  207. r->len, &r->nm);
  208. } else {
  209. if (deletion)
  210. switch (key_type(c, &r->key)) {
  211. case UBIFS_INO_KEY:
  212. {
  213. ino_t inum = key_inum(c, &r->key);
  214. err = ubifs_tnc_remove_ino(c, inum);
  215. break;
  216. }
  217. case UBIFS_TRUN_KEY:
  218. err = trun_remove_range(c, r);
  219. break;
  220. default:
  221. err = ubifs_tnc_remove(c, &r->key);
  222. break;
  223. }
  224. else
  225. err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
  226. r->len);
  227. if (err)
  228. return err;
  229. if (c->need_recovery)
  230. err = ubifs_recover_size_accum(c, &r->key, deletion,
  231. r->new_size);
  232. }
  233. return err;
  234. }
  235. /**
  236. * destroy_replay_tree - destroy the replay.
  237. * @c: UBIFS file-system description object
  238. *
  239. * Destroy the replay tree.
  240. */
  241. static void destroy_replay_tree(struct ubifs_info *c)
  242. {
  243. struct rb_node *this = c->replay_tree.rb_node;
  244. struct replay_entry *r;
  245. while (this) {
  246. if (this->rb_left) {
  247. this = this->rb_left;
  248. continue;
  249. } else if (this->rb_right) {
  250. this = this->rb_right;
  251. continue;
  252. }
  253. r = rb_entry(this, struct replay_entry, rb);
  254. this = rb_parent(this);
  255. if (this) {
  256. if (this->rb_left == &r->rb)
  257. this->rb_left = NULL;
  258. else
  259. this->rb_right = NULL;
  260. }
  261. if (is_hash_key(c, &r->key))
  262. kfree(r->nm.name);
  263. kfree(r);
  264. }
  265. c->replay_tree = RB_ROOT;
  266. }
  267. /**
  268. * apply_replay_tree - apply the replay tree to the TNC.
  269. * @c: UBIFS file-system description object
  270. *
  271. * Apply the replay tree.
  272. * Returns zero in case of success and a negative error code in case of
  273. * failure.
  274. */
  275. static int apply_replay_tree(struct ubifs_info *c)
  276. {
  277. struct rb_node *this = rb_first(&c->replay_tree);
  278. while (this) {
  279. struct replay_entry *r;
  280. int err;
  281. cond_resched();
  282. r = rb_entry(this, struct replay_entry, rb);
  283. err = apply_replay_entry(c, r);
  284. if (err)
  285. return err;
  286. this = rb_next(this);
  287. }
  288. return 0;
  289. }
  290. /**
  291. * insert_node - insert a node to the replay tree.
  292. * @c: UBIFS file-system description object
  293. * @lnum: node logical eraseblock number
  294. * @offs: node offset
  295. * @len: node length
  296. * @key: node key
  297. * @sqnum: sequence number
  298. * @deletion: non-zero if this is a deletion
  299. * @used: number of bytes in use in a LEB
  300. * @old_size: truncation old size
  301. * @new_size: truncation new size
  302. *
  303. * This function inserts a scanned non-direntry node to the replay tree. The
  304. * replay tree is an RB-tree containing @struct replay_entry elements which are
  305. * indexed by the sequence number. The replay tree is applied at the very end
  306. * of the replay process. Since the tree is sorted in sequence number order,
  307. * the older modifications are applied first. This function returns zero in
  308. * case of success and a negative error code in case of failure.
  309. */
  310. static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
  311. union ubifs_key *key, unsigned long long sqnum,
  312. int deletion, int *used, loff_t old_size,
  313. loff_t new_size)
  314. {
  315. struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
  316. struct replay_entry *r;
  317. if (key_inum(c, key) >= c->highest_inum)
  318. c->highest_inum = key_inum(c, key);
  319. dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
  320. while (*p) {
  321. parent = *p;
  322. r = rb_entry(parent, struct replay_entry, rb);
  323. if (sqnum < r->sqnum) {
  324. p = &(*p)->rb_left;
  325. continue;
  326. } else if (sqnum > r->sqnum) {
  327. p = &(*p)->rb_right;
  328. continue;
  329. }
  330. ubifs_err("duplicate sqnum in replay");
  331. return -EINVAL;
  332. }
  333. r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
  334. if (!r)
  335. return -ENOMEM;
  336. if (!deletion)
  337. *used += ALIGN(len, 8);
  338. r->lnum = lnum;
  339. r->offs = offs;
  340. r->len = len;
  341. r->sqnum = sqnum;
  342. r->flags = (deletion ? REPLAY_DELETION : 0);
  343. r->old_size = old_size;
  344. r->new_size = new_size;
  345. key_copy(c, key, &r->key);
  346. rb_link_node(&r->rb, parent, p);
  347. rb_insert_color(&r->rb, &c->replay_tree);
  348. return 0;
  349. }
  350. /**
  351. * insert_dent - insert a directory entry node into the replay tree.
  352. * @c: UBIFS file-system description object
  353. * @lnum: node logical eraseblock number
  354. * @offs: node offset
  355. * @len: node length
  356. * @key: node key
  357. * @name: directory entry name
  358. * @nlen: directory entry name length
  359. * @sqnum: sequence number
  360. * @deletion: non-zero if this is a deletion
  361. * @used: number of bytes in use in a LEB
  362. *
  363. * This function inserts a scanned directory entry node to the replay tree.
  364. * Returns zero in case of success and a negative error code in case of
  365. * failure.
  366. *
  367. * This function is also used for extended attribute entries because they are
  368. * implemented as directory entry nodes.
  369. */
  370. static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
  371. union ubifs_key *key, const char *name, int nlen,
  372. unsigned long long sqnum, int deletion, int *used)
  373. {
  374. struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
  375. struct replay_entry *r;
  376. char *nbuf;
  377. if (key_inum(c, key) >= c->highest_inum)
  378. c->highest_inum = key_inum(c, key);
  379. dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
  380. while (*p) {
  381. parent = *p;
  382. r = rb_entry(parent, struct replay_entry, rb);
  383. if (sqnum < r->sqnum) {
  384. p = &(*p)->rb_left;
  385. continue;
  386. }
  387. if (sqnum > r->sqnum) {
  388. p = &(*p)->rb_right;
  389. continue;
  390. }
  391. ubifs_err("duplicate sqnum in replay");
  392. return -EINVAL;
  393. }
  394. r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
  395. if (!r)
  396. return -ENOMEM;
  397. nbuf = kmalloc(nlen + 1, GFP_KERNEL);
  398. if (!nbuf) {
  399. kfree(r);
  400. return -ENOMEM;
  401. }
  402. if (!deletion)
  403. *used += ALIGN(len, 8);
  404. r->lnum = lnum;
  405. r->offs = offs;
  406. r->len = len;
  407. r->sqnum = sqnum;
  408. r->nm.len = nlen;
  409. memcpy(nbuf, name, nlen);
  410. nbuf[nlen] = '\0';
  411. r->nm.name = nbuf;
  412. r->flags = (deletion ? REPLAY_DELETION : 0);
  413. key_copy(c, key, &r->key);
  414. ubifs_assert(!*p);
  415. rb_link_node(&r->rb, parent, p);
  416. rb_insert_color(&r->rb, &c->replay_tree);
  417. return 0;
  418. }
  419. /**
  420. * ubifs_validate_entry - validate directory or extended attribute entry node.
  421. * @c: UBIFS file-system description object
  422. * @dent: the node to validate
  423. *
  424. * This function validates directory or extended attribute entry node @dent.
  425. * Returns zero if the node is all right and a %-EINVAL if not.
  426. */
  427. int ubifs_validate_entry(struct ubifs_info *c,
  428. const struct ubifs_dent_node *dent)
  429. {
  430. int key_type = key_type_flash(c, dent->key);
  431. int nlen = le16_to_cpu(dent->nlen);
  432. if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
  433. dent->type >= UBIFS_ITYPES_CNT ||
  434. nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
  435. strnlen(dent->name, nlen) != nlen ||
  436. le64_to_cpu(dent->inum) > MAX_INUM) {
  437. ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
  438. "directory entry" : "extended attribute entry");
  439. return -EINVAL;
  440. }
  441. if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
  442. ubifs_err("bad key type %d", key_type);
  443. return -EINVAL;
  444. }
  445. return 0;
  446. }
  447. /**
  448. * replay_bud - replay a bud logical eraseblock.
  449. * @c: UBIFS file-system description object
  450. * @lnum: bud logical eraseblock number to replay
  451. * @offs: bud start offset
  452. * @jhead: journal head to which this bud belongs
  453. * @free: amount of free space in the bud is returned here
  454. * @dirty: amount of dirty space from padding and deletion nodes is returned
  455. * here
  456. *
  457. * This function returns zero in case of success and a negative error code in
  458. * case of failure.
  459. */
  460. static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
  461. int *free, int *dirty)
  462. {
  463. int err = 0, used = 0;
  464. struct ubifs_scan_leb *sleb;
  465. struct ubifs_scan_node *snod;
  466. struct ubifs_bud *bud;
  467. dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs);
  468. if (c->need_recovery)
  469. sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
  470. else
  471. sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
  472. if (IS_ERR(sleb))
  473. return PTR_ERR(sleb);
  474. /*
  475. * The bud does not have to start from offset zero - the beginning of
  476. * the 'lnum' LEB may contain previously committed data. One of the
  477. * things we have to do in replay is to correctly update lprops with
  478. * newer information about this LEB.
  479. *
  480. * At this point lprops thinks that this LEB has 'c->leb_size - offs'
  481. * bytes of free space because it only contain information about
  482. * committed data.
  483. *
  484. * But we know that real amount of free space is 'c->leb_size -
  485. * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
  486. * 'sleb->endpt' is used by bud data. We have to correctly calculate
  487. * how much of these data are dirty and update lprops with this
  488. * information.
  489. *
  490. * The dirt in that LEB region is comprised of padding nodes, deletion
  491. * nodes, truncation nodes and nodes which are obsoleted by subsequent
  492. * nodes in this LEB. So instead of calculating clean space, we
  493. * calculate used space ('used' variable).
  494. */
  495. list_for_each_entry(snod, &sleb->nodes, list) {
  496. int deletion = 0;
  497. cond_resched();
  498. if (snod->sqnum >= SQNUM_WATERMARK) {
  499. ubifs_err("file system's life ended");
  500. goto out_dump;
  501. }
  502. if (snod->sqnum > c->max_sqnum)
  503. c->max_sqnum = snod->sqnum;
  504. switch (snod->type) {
  505. case UBIFS_INO_NODE:
  506. {
  507. struct ubifs_ino_node *ino = snod->node;
  508. loff_t new_size = le64_to_cpu(ino->size);
  509. if (le32_to_cpu(ino->nlink) == 0)
  510. deletion = 1;
  511. err = insert_node(c, lnum, snod->offs, snod->len,
  512. &snod->key, snod->sqnum, deletion,
  513. &used, 0, new_size);
  514. break;
  515. }
  516. case UBIFS_DATA_NODE:
  517. {
  518. struct ubifs_data_node *dn = snod->node;
  519. loff_t new_size = le32_to_cpu(dn->size) +
  520. key_block(c, &snod->key) *
  521. UBIFS_BLOCK_SIZE;
  522. err = insert_node(c, lnum, snod->offs, snod->len,
  523. &snod->key, snod->sqnum, deletion,
  524. &used, 0, new_size);
  525. break;
  526. }
  527. case UBIFS_DENT_NODE:
  528. case UBIFS_XENT_NODE:
  529. {
  530. struct ubifs_dent_node *dent = snod->node;
  531. err = ubifs_validate_entry(c, dent);
  532. if (err)
  533. goto out_dump;
  534. err = insert_dent(c, lnum, snod->offs, snod->len,
  535. &snod->key, dent->name,
  536. le16_to_cpu(dent->nlen), snod->sqnum,
  537. !le64_to_cpu(dent->inum), &used);
  538. break;
  539. }
  540. case UBIFS_TRUN_NODE:
  541. {
  542. struct ubifs_trun_node *trun = snod->node;
  543. loff_t old_size = le64_to_cpu(trun->old_size);
  544. loff_t new_size = le64_to_cpu(trun->new_size);
  545. union ubifs_key key;
  546. /* Validate truncation node */
  547. if (old_size < 0 || old_size > c->max_inode_sz ||
  548. new_size < 0 || new_size > c->max_inode_sz ||
  549. old_size <= new_size) {
  550. ubifs_err("bad truncation node");
  551. goto out_dump;
  552. }
  553. /*
  554. * Create a fake truncation key just to use the same
  555. * functions which expect nodes to have keys.
  556. */
  557. trun_key_init(c, &key, le32_to_cpu(trun->inum));
  558. err = insert_node(c, lnum, snod->offs, snod->len,
  559. &key, snod->sqnum, 1, &used,
  560. old_size, new_size);
  561. break;
  562. }
  563. default:
  564. ubifs_err("unexpected node type %d in bud LEB %d:%d",
  565. snod->type, lnum, snod->offs);
  566. err = -EINVAL;
  567. goto out_dump;
  568. }
  569. if (err)
  570. goto out;
  571. }
  572. bud = ubifs_search_bud(c, lnum);
  573. if (!bud)
  574. BUG();
  575. ubifs_assert(sleb->endpt - offs >= used);
  576. ubifs_assert(sleb->endpt % c->min_io_size == 0);
  577. *dirty = sleb->endpt - offs - used;
  578. *free = c->leb_size - sleb->endpt;
  579. dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, *dirty, *free);
  580. out:
  581. ubifs_scan_destroy(sleb);
  582. return err;
  583. out_dump:
  584. ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
  585. dbg_dump_node(c, snod->node);
  586. ubifs_scan_destroy(sleb);
  587. return -EINVAL;
  588. }
  589. /**
  590. * insert_ref_node - insert a reference node to the replay tree.
  591. * @c: UBIFS file-system description object
  592. * @lnum: node logical eraseblock number
  593. * @offs: node offset
  594. * @sqnum: sequence number
  595. * @free: amount of free space in bud
  596. * @dirty: amount of dirty space from padding and deletion nodes
  597. * @jhead: journal head number for the bud
  598. *
  599. * This function inserts a reference node to the replay tree and returns zero
  600. * in case of success or a negative error code in case of failure.
  601. */
  602. static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
  603. unsigned long long sqnum, int free, int dirty,
  604. int jhead)
  605. {
  606. struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
  607. struct replay_entry *r;
  608. dbg_mnt("add ref LEB %d:%d", lnum, offs);
  609. while (*p) {
  610. parent = *p;
  611. r = rb_entry(parent, struct replay_entry, rb);
  612. if (sqnum < r->sqnum) {
  613. p = &(*p)->rb_left;
  614. continue;
  615. } else if (sqnum > r->sqnum) {
  616. p = &(*p)->rb_right;
  617. continue;
  618. }
  619. ubifs_err("duplicate sqnum in replay tree");
  620. return -EINVAL;
  621. }
  622. r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
  623. if (!r)
  624. return -ENOMEM;
  625. r->lnum = lnum;
  626. r->offs = offs;
  627. r->sqnum = sqnum;
  628. r->flags = REPLAY_REF;
  629. r->free = free;
  630. r->dirty = dirty;
  631. r->jhead = jhead;
  632. rb_link_node(&r->rb, parent, p);
  633. rb_insert_color(&r->rb, &c->replay_tree);
  634. return 0;
  635. }
  636. /**
  637. * replay_buds - replay all buds.
  638. * @c: UBIFS file-system description object
  639. *
  640. * This function returns zero in case of success and a negative error code in
  641. * case of failure.
  642. */
  643. static int replay_buds(struct ubifs_info *c)
  644. {
  645. struct bud_entry *b;
  646. int err, uninitialized_var(free), uninitialized_var(dirty);
  647. unsigned long long prev_sqnum = 0;
  648. list_for_each_entry(b, &c->replay_buds, list) {
  649. err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
  650. &free, &dirty);
  651. if (err)
  652. return err;
  653. b->free = free;
  654. b->dirty = dirty;
  655. err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
  656. free, dirty, b->bud->jhead);
  657. if (err)
  658. return err;
  659. ubifs_assert(b->sqnum > prev_sqnum);
  660. prev_sqnum = b->sqnum;
  661. }
  662. return 0;
  663. }
  664. /**
  665. * destroy_bud_list - destroy the list of buds to replay.
  666. * @c: UBIFS file-system description object
  667. */
  668. static void destroy_bud_list(struct ubifs_info *c)
  669. {
  670. struct bud_entry *b;
  671. while (!list_empty(&c->replay_buds)) {
  672. b = list_entry(c->replay_buds.next, struct bud_entry, list);
  673. list_del(&b->list);
  674. kfree(b);
  675. }
  676. }
  677. /**
  678. * add_replay_bud - add a bud to the list of buds to replay.
  679. * @c: UBIFS file-system description object
  680. * @lnum: bud logical eraseblock number to replay
  681. * @offs: bud start offset
  682. * @jhead: journal head to which this bud belongs
  683. * @sqnum: reference node sequence number
  684. *
  685. * This function returns zero in case of success and a negative error code in
  686. * case of failure.
  687. */
  688. static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
  689. unsigned long long sqnum)
  690. {
  691. struct ubifs_bud *bud;
  692. struct bud_entry *b;
  693. dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
  694. bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
  695. if (!bud)
  696. return -ENOMEM;
  697. b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
  698. if (!b) {
  699. kfree(bud);
  700. return -ENOMEM;
  701. }
  702. bud->lnum = lnum;
  703. bud->start = offs;
  704. bud->jhead = jhead;
  705. ubifs_add_bud(c, bud);
  706. b->bud = bud;
  707. b->sqnum = sqnum;
  708. list_add_tail(&b->list, &c->replay_buds);
  709. return 0;
  710. }
  711. /**
  712. * validate_ref - validate a reference node.
  713. * @c: UBIFS file-system description object
  714. * @ref: the reference node to validate
  715. * @ref_lnum: LEB number of the reference node
  716. * @ref_offs: reference node offset
  717. *
  718. * This function returns %1 if a bud reference already exists for the LEB. %0 is
  719. * returned if the reference node is new, otherwise %-EINVAL is returned if
  720. * validation failed.
  721. */
  722. static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
  723. {
  724. struct ubifs_bud *bud;
  725. int lnum = le32_to_cpu(ref->lnum);
  726. unsigned int offs = le32_to_cpu(ref->offs);
  727. unsigned int jhead = le32_to_cpu(ref->jhead);
  728. /*
  729. * ref->offs may point to the end of LEB when the journal head points
  730. * to the end of LEB and we write reference node for it during commit.
  731. * So this is why we require 'offs > c->leb_size'.
  732. */
  733. if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
  734. lnum < c->main_first || offs > c->leb_size ||
  735. offs & (c->min_io_size - 1))
  736. return -EINVAL;
  737. /* Make sure we have not already looked at this bud */
  738. bud = ubifs_search_bud(c, lnum);
  739. if (bud) {
  740. if (bud->jhead == jhead && bud->start <= offs)
  741. return 1;
  742. ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
  743. return -EINVAL;
  744. }
  745. return 0;
  746. }
  747. /**
  748. * replay_log_leb - replay a log logical eraseblock.
  749. * @c: UBIFS file-system description object
  750. * @lnum: log logical eraseblock to replay
  751. * @offs: offset to start replaying from
  752. * @sbuf: scan buffer
  753. *
  754. * This function replays a log LEB and returns zero in case of success, %1 if
  755. * this is the last LEB in the log, and a negative error code in case of
  756. * failure.
  757. */
  758. static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
  759. {
  760. int err;
  761. struct ubifs_scan_leb *sleb;
  762. struct ubifs_scan_node *snod;
  763. const struct ubifs_cs_node *node;
  764. dbg_mnt("replay log LEB %d:%d", lnum, offs);
  765. sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
  766. if (IS_ERR(sleb)) {
  767. if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
  768. return PTR_ERR(sleb);
  769. /*
  770. * Note, the below function will recover this log LEB only if
  771. * it is the last, because unclean reboots can possibly corrupt
  772. * only the tail of the log.
  773. */
  774. sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
  775. if (IS_ERR(sleb))
  776. return PTR_ERR(sleb);
  777. }
  778. if (sleb->nodes_cnt == 0) {
  779. err = 1;
  780. goto out;
  781. }
  782. node = sleb->buf;
  783. snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
  784. if (c->cs_sqnum == 0) {
  785. /*
  786. * This is the first log LEB we are looking at, make sure that
  787. * the first node is a commit start node. Also record its
  788. * sequence number so that UBIFS can determine where the log
  789. * ends, because all nodes which were have higher sequence
  790. * numbers.
  791. */
  792. if (snod->type != UBIFS_CS_NODE) {
  793. dbg_err("first log node at LEB %d:%d is not CS node",
  794. lnum, offs);
  795. goto out_dump;
  796. }
  797. if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
  798. dbg_err("first CS node at LEB %d:%d has wrong "
  799. "commit number %llu expected %llu",
  800. lnum, offs,
  801. (unsigned long long)le64_to_cpu(node->cmt_no),
  802. c->cmt_no);
  803. goto out_dump;
  804. }
  805. c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
  806. dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
  807. }
  808. if (snod->sqnum < c->cs_sqnum) {
  809. /*
  810. * This means that we reached end of log and now
  811. * look to the older log data, which was already
  812. * committed but the eraseblock was not erased (UBIFS
  813. * only un-maps it). So this basically means we have to
  814. * exit with "end of log" code.
  815. */
  816. err = 1;
  817. goto out;
  818. }
  819. /* Make sure the first node sits at offset zero of the LEB */
  820. if (snod->offs != 0) {
  821. dbg_err("first node is not at zero offset");
  822. goto out_dump;
  823. }
  824. list_for_each_entry(snod, &sleb->nodes, list) {
  825. cond_resched();
  826. if (snod->sqnum >= SQNUM_WATERMARK) {
  827. ubifs_err("file system's life ended");
  828. goto out_dump;
  829. }
  830. if (snod->sqnum < c->cs_sqnum) {
  831. dbg_err("bad sqnum %llu, commit sqnum %llu",
  832. snod->sqnum, c->cs_sqnum);
  833. goto out_dump;
  834. }
  835. if (snod->sqnum > c->max_sqnum)
  836. c->max_sqnum = snod->sqnum;
  837. switch (snod->type) {
  838. case UBIFS_REF_NODE: {
  839. const struct ubifs_ref_node *ref = snod->node;
  840. err = validate_ref(c, ref);
  841. if (err == 1)
  842. break; /* Already have this bud */
  843. if (err)
  844. goto out_dump;
  845. err = add_replay_bud(c, le32_to_cpu(ref->lnum),
  846. le32_to_cpu(ref->offs),
  847. le32_to_cpu(ref->jhead),
  848. snod->sqnum);
  849. if (err)
  850. goto out;
  851. break;
  852. }
  853. case UBIFS_CS_NODE:
  854. /* Make sure it sits at the beginning of LEB */
  855. if (snod->offs != 0) {
  856. ubifs_err("unexpected node in log");
  857. goto out_dump;
  858. }
  859. break;
  860. default:
  861. ubifs_err("unexpected node in log");
  862. goto out_dump;
  863. }
  864. }
  865. if (sleb->endpt || c->lhead_offs >= c->leb_size) {
  866. c->lhead_lnum = lnum;
  867. c->lhead_offs = sleb->endpt;
  868. }
  869. err = !sleb->endpt;
  870. out:
  871. ubifs_scan_destroy(sleb);
  872. return err;
  873. out_dump:
  874. ubifs_err("log error detected while replaying the log at LEB %d:%d",
  875. lnum, offs + snod->offs);
  876. dbg_dump_node(c, snod->node);
  877. ubifs_scan_destroy(sleb);
  878. return -EINVAL;
  879. }
  880. /**
  881. * take_ihead - update the status of the index head in lprops to 'taken'.
  882. * @c: UBIFS file-system description object
  883. *
  884. * This function returns the amount of free space in the index head LEB or a
  885. * negative error code.
  886. */
  887. static int take_ihead(struct ubifs_info *c)
  888. {
  889. const struct ubifs_lprops *lp;
  890. int err, free;
  891. ubifs_get_lprops(c);
  892. lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
  893. if (IS_ERR(lp)) {
  894. err = PTR_ERR(lp);
  895. goto out;
  896. }
  897. free = lp->free;
  898. lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
  899. lp->flags | LPROPS_TAKEN, 0);
  900. if (IS_ERR(lp)) {
  901. err = PTR_ERR(lp);
  902. goto out;
  903. }
  904. err = free;
  905. out:
  906. ubifs_release_lprops(c);
  907. return err;
  908. }
  909. /**
  910. * ubifs_replay_journal - replay journal.
  911. * @c: UBIFS file-system description object
  912. *
  913. * This function scans the journal, replays and cleans it up. It makes sure all
  914. * memory data structures related to uncommitted journal are built (dirty TNC
  915. * tree, tree of buds, modified lprops, etc).
  916. */
  917. int ubifs_replay_journal(struct ubifs_info *c)
  918. {
  919. int err, i, lnum, offs, free;
  920. BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
  921. /* Update the status of the index head in lprops to 'taken' */
  922. free = take_ihead(c);
  923. if (free < 0)
  924. return free; /* Error code */
  925. if (c->ihead_offs != c->leb_size - free) {
  926. ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
  927. c->ihead_offs);
  928. return -EINVAL;
  929. }
  930. dbg_mnt("start replaying the journal");
  931. c->replaying = 1;
  932. lnum = c->ltail_lnum = c->lhead_lnum;
  933. offs = c->lhead_offs;
  934. for (i = 0; i < c->log_lebs; i++, lnum++) {
  935. if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
  936. /*
  937. * The log is logically circular, we reached the last
  938. * LEB, switch to the first one.
  939. */
  940. lnum = UBIFS_LOG_LNUM;
  941. offs = 0;
  942. }
  943. err = replay_log_leb(c, lnum, offs, c->sbuf);
  944. if (err == 1)
  945. /* We hit the end of the log */
  946. break;
  947. if (err)
  948. goto out;
  949. offs = 0;
  950. }
  951. err = replay_buds(c);
  952. if (err)
  953. goto out;
  954. err = apply_replay_tree(c);
  955. if (err)
  956. goto out;
  957. /*
  958. * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
  959. * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
  960. * depend on it. This means we have to initialize it to make sure
  961. * budgeting works properly.
  962. */
  963. c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
  964. c->bi.uncommitted_idx *= c->max_idx_node_sz;
  965. ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
  966. dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
  967. "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
  968. (unsigned long)c->highest_inum);
  969. out:
  970. destroy_replay_tree(c);
  971. destroy_bud_list(c);
  972. c->replaying = 0;
  973. return err;
  974. }