gc.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. /*
  2. * This file is part of UBIFS.
  3. *
  4. * Copyright (C) 2006-2008 Nokia Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published by
  8. * the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along with
  16. * this program; if not, write to the Free Software Foundation, Inc., 51
  17. * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. *
  19. * Authors: Adrian Hunter
  20. * Artem Bityutskiy (Битюцкий Артём)
  21. */
  22. /*
  23. * This file implements garbage collection. The procedure for garbage collection
  24. * is different depending on whether a LEB as an index LEB (contains index
  25. * nodes) or not. For non-index LEBs, garbage collection finds a LEB which
  26. * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
  27. * nodes to the journal, at which point the garbage-collected LEB is free to be
  28. * reused. For index LEBs, garbage collection marks the non-obsolete index nodes
  29. * dirty in the TNC, and after the next commit, the garbage-collected LEB is
  30. * to be reused. Garbage collection will cause the number of dirty index nodes
  31. * to grow, however sufficient space is reserved for the index to ensure the
  32. * commit will never run out of space.
  33. *
  34. * Notes about dead watermark. At current UBIFS implementation we assume that
  35. * LEBs which have less than @c->dead_wm bytes of free + dirty space are full
  36. * and not worth garbage-collecting. The dead watermark is one min. I/O unit
  37. * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
  38. * Garbage Collector has to synchronize the GC head's write buffer before
  39. * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
  40. * actually reclaim even very small pieces of dirty space by garbage collecting
  41. * enough dirty LEBs, but we do not bother doing this at this implementation.
  42. *
  43. * Notes about dark watermark. The results of GC work depends on how big are
  44. * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
  45. * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
  46. * have to waste large pieces of free space at the end of LEB B, because nodes
  47. * from LEB A would not fit. And the worst situation is when all nodes are of
  48. * maximum size. So dark watermark is the amount of free + dirty space in LEB
  49. * which are guaranteed to be reclaimable. If LEB has less space, the GC might
  50. * be unable to reclaim it. So, LEBs with free + dirty greater than dark
  51. * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
  52. * good, and GC takes extra care when moving them.
  53. */
  54. #include <linux/pagemap.h>
  55. #include "ubifs.h"
  56. /*
  57. * GC may need to move more than one LEB to make progress. The below constants
  58. * define "soft" and "hard" limits on the number of LEBs the garbage collector
  59. * may move.
  60. */
  61. #define SOFT_LEBS_LIMIT 4
  62. #define HARD_LEBS_LIMIT 32
  63. /**
  64. * switch_gc_head - switch the garbage collection journal head.
  65. * @c: UBIFS file-system description object
  66. * @buf: buffer to write
  67. * @len: length of the buffer to write
  68. * @lnum: LEB number written is returned here
  69. * @offs: offset written is returned here
  70. *
  71. * This function switch the GC head to the next LEB which is reserved in
  72. * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
  73. * and other negative error code in case of failures.
  74. */
  75. static int switch_gc_head(struct ubifs_info *c)
  76. {
  77. int err, gc_lnum = c->gc_lnum;
  78. struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  79. ubifs_assert(gc_lnum != -1);
  80. dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
  81. wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
  82. c->leb_size - wbuf->offs - wbuf->used);
  83. err = ubifs_wbuf_sync_nolock(wbuf);
  84. if (err)
  85. return err;
  86. /*
  87. * The GC write-buffer was synchronized, we may safely unmap
  88. * 'c->gc_lnum'.
  89. */
  90. err = ubifs_leb_unmap(c, gc_lnum);
  91. if (err)
  92. return err;
  93. err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
  94. if (err)
  95. return err;
  96. c->gc_lnum = -1;
  97. err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM);
  98. return err;
  99. }
  100. /**
  101. * list_sort - sort a list.
  102. * @priv: private data, passed to @cmp
  103. * @head: the list to sort
  104. * @cmp: the elements comparison function
  105. *
  106. * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
  107. * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
  108. * in ascending order.
  109. *
  110. * The comparison function @cmp is supposed to return a negative value if @a is
  111. * than @b, and a positive value if @a is greater than @b. If @a and @b are
  112. * equivalent, then it does not matter what this function returns.
  113. */
  114. static void list_sort(void *priv, struct list_head *head,
  115. int (*cmp)(void *priv, struct list_head *a,
  116. struct list_head *b))
  117. {
  118. struct list_head *p, *q, *e, *list, *tail, *oldhead;
  119. int insize, nmerges, psize, qsize, i;
  120. if (list_empty(head))
  121. return;
  122. list = head->next;
  123. list_del(head);
  124. insize = 1;
  125. for (;;) {
  126. p = oldhead = list;
  127. list = tail = NULL;
  128. nmerges = 0;
  129. while (p) {
  130. nmerges++;
  131. q = p;
  132. psize = 0;
  133. for (i = 0; i < insize; i++) {
  134. psize++;
  135. q = q->next == oldhead ? NULL : q->next;
  136. if (!q)
  137. break;
  138. }
  139. qsize = insize;
  140. while (psize > 0 || (qsize > 0 && q)) {
  141. if (!psize) {
  142. e = q;
  143. q = q->next;
  144. qsize--;
  145. if (q == oldhead)
  146. q = NULL;
  147. } else if (!qsize || !q) {
  148. e = p;
  149. p = p->next;
  150. psize--;
  151. if (p == oldhead)
  152. p = NULL;
  153. } else if (cmp(priv, p, q) <= 0) {
  154. e = p;
  155. p = p->next;
  156. psize--;
  157. if (p == oldhead)
  158. p = NULL;
  159. } else {
  160. e = q;
  161. q = q->next;
  162. qsize--;
  163. if (q == oldhead)
  164. q = NULL;
  165. }
  166. if (tail)
  167. tail->next = e;
  168. else
  169. list = e;
  170. e->prev = tail;
  171. tail = e;
  172. }
  173. p = q;
  174. }
  175. tail->next = list;
  176. list->prev = tail;
  177. if (nmerges <= 1)
  178. break;
  179. insize *= 2;
  180. }
  181. head->next = list;
  182. head->prev = list->prev;
  183. list->prev->next = head;
  184. list->prev = head;
  185. }
  186. /**
  187. * data_nodes_cmp - compare 2 data nodes.
  188. * @priv: UBIFS file-system description object
  189. * @a: first data node
  190. * @a: second data node
  191. *
  192. * This function compares data nodes @a and @b. Returns %1 if @a has greater
  193. * inode or block number, and %-1 otherwise.
  194. */
  195. int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
  196. {
  197. ino_t inuma, inumb;
  198. struct ubifs_info *c = priv;
  199. struct ubifs_scan_node *sa, *sb;
  200. cond_resched();
  201. sa = list_entry(a, struct ubifs_scan_node, list);
  202. sb = list_entry(b, struct ubifs_scan_node, list);
  203. ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
  204. ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
  205. inuma = key_inum(c, &sa->key);
  206. inumb = key_inum(c, &sb->key);
  207. if (inuma == inumb) {
  208. unsigned int blka = key_block(c, &sa->key);
  209. unsigned int blkb = key_block(c, &sb->key);
  210. if (blka <= blkb)
  211. return -1;
  212. } else if (inuma <= inumb)
  213. return -1;
  214. return 1;
  215. }
  216. /*
  217. * nondata_nodes_cmp - compare 2 non-data nodes.
  218. * @priv: UBIFS file-system description object
  219. * @a: first node
  220. * @a: second node
  221. *
  222. * This function compares nodes @a and @b. It makes sure that inode nodes go
  223. * first and sorted by length in descending order. Directory entry nodes go
  224. * after inode nodes and are sorted in ascending hash valuer order.
  225. */
  226. int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
  227. {
  228. int typea, typeb;
  229. ino_t inuma, inumb;
  230. struct ubifs_info *c = priv;
  231. struct ubifs_scan_node *sa, *sb;
  232. cond_resched();
  233. sa = list_entry(a, struct ubifs_scan_node, list);
  234. sb = list_entry(b, struct ubifs_scan_node, list);
  235. typea = key_type(c, &sa->key);
  236. typeb = key_type(c, &sb->key);
  237. ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY);
  238. /* Inodes go before directory entries */
  239. if (typea == UBIFS_INO_KEY) {
  240. if (typeb == UBIFS_INO_KEY)
  241. return sb->len - sa->len;
  242. return -1;
  243. }
  244. if (typeb == UBIFS_INO_KEY)
  245. return 1;
  246. ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY);
  247. inuma = key_inum(c, &sa->key);
  248. inumb = key_inum(c, &sb->key);
  249. if (inuma == inumb) {
  250. uint32_t hasha = key_hash(c, &sa->key);
  251. uint32_t hashb = key_hash(c, &sb->key);
  252. if (hasha <= hashb)
  253. return -1;
  254. } else if (inuma <= inumb)
  255. return -1;
  256. return 1;
  257. }
  258. /**
  259. * sort_nodes - sort nodes for GC.
  260. * @c: UBIFS file-system description object
  261. * @sleb: describes nodes to sort and contains the result on exit
  262. * @nondata: contains non-data nodes on exit
  263. * @min: minimum node size is returned here
  264. *
  265. * This function sorts the list of inodes to garbage collect. First of all, it
  266. * kills obsolete nodes and separates data and non-data nodes to the
  267. * @sleb->nodes and @nondata lists correspondingly.
  268. *
  269. * Data nodes are then sorted in block number order - this is important for
  270. * bulk-read; data nodes with lower inode number go before data nodes with
  271. * higher inode number, and data nodes with lower block number go before data
  272. * nodes with higher block number;
  273. *
  274. * Non-data nodes are sorted as follows.
  275. * o First go inode nodes - they are sorted in descending length order.
  276. * o Then go directory entry nodes - they are sorted in hash order, which
  277. * should supposedly optimize 'readdir()'. Direntry nodes with lower parent
  278. * inode number go before direntry nodes with higher parent inode number,
  279. * and direntry nodes with lower name hash values go before direntry nodes
  280. * with higher name hash values.
  281. *
  282. * This function returns zero in case of success and a negative error code in
  283. * case of failure.
  284. */
  285. static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
  286. struct list_head *nondata, int *min)
  287. {
  288. struct ubifs_scan_node *snod, *tmp;
  289. *min = INT_MAX;
  290. /* Separate data nodes and non-data nodes */
  291. list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
  292. int err;
  293. ubifs_assert(snod->type != UBIFS_IDX_NODE);
  294. ubifs_assert(snod->type != UBIFS_REF_NODE);
  295. ubifs_assert(snod->type != UBIFS_CS_NODE);
  296. err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
  297. snod->offs, 0);
  298. if (err < 0)
  299. return err;
  300. if (!err) {
  301. /* The node is obsolete, remove it from the list */
  302. list_del(&snod->list);
  303. kfree(snod);
  304. continue;
  305. }
  306. if (snod->len < *min)
  307. *min = snod->len;
  308. if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
  309. list_move_tail(&snod->list, nondata);
  310. }
  311. /* Sort data and non-data nodes */
  312. list_sort(c, &sleb->nodes, &data_nodes_cmp);
  313. list_sort(c, nondata, &nondata_nodes_cmp);
  314. return 0;
  315. }
  316. /**
  317. * move_node - move a node.
  318. * @c: UBIFS file-system description object
  319. * @sleb: describes the LEB to move nodes from
  320. * @snod: the mode to move
  321. * @wbuf: write-buffer to move node to
  322. *
  323. * This function moves node @snod to @wbuf, changes TNC correspondingly, and
  324. * destroys @snod. Returns zero in case of success and a negative error code in
  325. * case of failure.
  326. */
  327. static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
  328. struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
  329. {
  330. int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
  331. cond_resched();
  332. err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
  333. if (err)
  334. return err;
  335. err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
  336. snod->offs, new_lnum, new_offs,
  337. snod->len);
  338. list_del(&snod->list);
  339. kfree(snod);
  340. return err;
  341. }
  342. /**
  343. * move_nodes - move nodes.
  344. * @c: UBIFS file-system description object
  345. * @sleb: describes the LEB to move nodes from
  346. *
  347. * This function moves valid nodes from data LEB described by @sleb to the GC
  348. * journal head. This function returns zero in case of success, %-EAGAIN if
  349. * commit is required, and other negative error codes in case of other
  350. * failures.
  351. */
  352. static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
  353. {
  354. int err, min;
  355. LIST_HEAD(nondata);
  356. struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  357. if (wbuf->lnum == -1) {
  358. /*
  359. * The GC journal head is not set, because it is the first GC
  360. * invocation since mount.
  361. */
  362. err = switch_gc_head(c);
  363. if (err)
  364. return err;
  365. }
  366. err = sort_nodes(c, sleb, &nondata, &min);
  367. if (err)
  368. goto out;
  369. /* Write nodes to their new location. Use the first-fit strategy */
  370. while (1) {
  371. int avail;
  372. struct ubifs_scan_node *snod, *tmp;
  373. /* Move data nodes */
  374. list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
  375. avail = c->leb_size - wbuf->offs - wbuf->used;
  376. if (snod->len > avail)
  377. /*
  378. * Do not skip data nodes in order to optimize
  379. * bulk-read.
  380. */
  381. break;
  382. err = move_node(c, sleb, snod, wbuf);
  383. if (err)
  384. goto out;
  385. }
  386. /* Move non-data nodes */
  387. list_for_each_entry_safe(snod, tmp, &nondata, list) {
  388. avail = c->leb_size - wbuf->offs - wbuf->used;
  389. if (avail < min)
  390. break;
  391. if (snod->len > avail) {
  392. /*
  393. * Keep going only if this is an inode with
  394. * some data. Otherwise stop and switch the GC
  395. * head. IOW, we assume that data-less inode
  396. * nodes and direntry nodes are roughly of the
  397. * same size.
  398. */
  399. if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
  400. snod->len == UBIFS_INO_NODE_SZ)
  401. break;
  402. continue;
  403. }
  404. err = move_node(c, sleb, snod, wbuf);
  405. if (err)
  406. goto out;
  407. }
  408. if (list_empty(&sleb->nodes) && list_empty(&nondata))
  409. break;
  410. /*
  411. * Waste the rest of the space in the LEB and switch to the
  412. * next LEB.
  413. */
  414. err = switch_gc_head(c);
  415. if (err)
  416. goto out;
  417. }
  418. return 0;
  419. out:
  420. list_splice_tail(&nondata, &sleb->nodes);
  421. return err;
  422. }
  423. /**
  424. * gc_sync_wbufs - sync write-buffers for GC.
  425. * @c: UBIFS file-system description object
  426. *
  427. * We must guarantee that obsoleting nodes are on flash. Unfortunately they may
  428. * be in a write-buffer instead. That is, a node could be written to a
  429. * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
  430. * erased before the write-buffer is sync'd and then there is an unclean
  431. * unmount, then an existing node is lost. To avoid this, we sync all
  432. * write-buffers.
  433. *
  434. * This function returns %0 on success or a negative error code on failure.
  435. */
  436. static int gc_sync_wbufs(struct ubifs_info *c)
  437. {
  438. int err, i;
  439. for (i = 0; i < c->jhead_cnt; i++) {
  440. if (i == GCHD)
  441. continue;
  442. err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
  443. if (err)
  444. return err;
  445. }
  446. return 0;
  447. }
  448. /**
  449. * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
  450. * @c: UBIFS file-system description object
  451. * @lp: describes the LEB to garbage collect
  452. *
  453. * This function garbage-collects an LEB and returns one of the @LEB_FREED,
  454. * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
  455. * required, and other negative error codes in case of failures.
  456. */
  457. int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
  458. {
  459. struct ubifs_scan_leb *sleb;
  460. struct ubifs_scan_node *snod;
  461. struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  462. int err = 0, lnum = lp->lnum;
  463. ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
  464. c->need_recovery);
  465. ubifs_assert(c->gc_lnum != lnum);
  466. ubifs_assert(wbuf->lnum != lnum);
  467. /*
  468. * We scan the entire LEB even though we only really need to scan up to
  469. * (c->leb_size - lp->free).
  470. */
  471. sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
  472. if (IS_ERR(sleb))
  473. return PTR_ERR(sleb);
  474. ubifs_assert(!list_empty(&sleb->nodes));
  475. snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
  476. if (snod->type == UBIFS_IDX_NODE) {
  477. struct ubifs_gced_idx_leb *idx_gc;
  478. dbg_gc("indexing LEB %d (free %d, dirty %d)",
  479. lnum, lp->free, lp->dirty);
  480. list_for_each_entry(snod, &sleb->nodes, list) {
  481. struct ubifs_idx_node *idx = snod->node;
  482. int level = le16_to_cpu(idx->level);
  483. ubifs_assert(snod->type == UBIFS_IDX_NODE);
  484. key_read(c, ubifs_idx_key(c, idx), &snod->key);
  485. err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
  486. snod->offs);
  487. if (err)
  488. goto out;
  489. }
  490. idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
  491. if (!idx_gc) {
  492. err = -ENOMEM;
  493. goto out;
  494. }
  495. idx_gc->lnum = lnum;
  496. idx_gc->unmap = 0;
  497. list_add(&idx_gc->list, &c->idx_gc);
  498. /*
  499. * Don't release the LEB until after the next commit, because
  500. * it may contain data which is needed for recovery. So
  501. * although we freed this LEB, it will become usable only after
  502. * the commit.
  503. */
  504. err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
  505. LPROPS_INDEX, 1);
  506. if (err)
  507. goto out;
  508. err = LEB_FREED_IDX;
  509. } else {
  510. dbg_gc("data LEB %d (free %d, dirty %d)",
  511. lnum, lp->free, lp->dirty);
  512. err = move_nodes(c, sleb);
  513. if (err)
  514. goto out_inc_seq;
  515. err = gc_sync_wbufs(c);
  516. if (err)
  517. goto out_inc_seq;
  518. err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
  519. if (err)
  520. goto out_inc_seq;
  521. /* Allow for races with TNC */
  522. c->gced_lnum = lnum;
  523. smp_wmb();
  524. c->gc_seq += 1;
  525. smp_wmb();
  526. if (c->gc_lnum == -1) {
  527. c->gc_lnum = lnum;
  528. err = LEB_RETAINED;
  529. } else {
  530. err = ubifs_wbuf_sync_nolock(wbuf);
  531. if (err)
  532. goto out;
  533. err = ubifs_leb_unmap(c, lnum);
  534. if (err)
  535. goto out;
  536. err = LEB_FREED;
  537. }
  538. }
  539. out:
  540. ubifs_scan_destroy(sleb);
  541. return err;
  542. out_inc_seq:
  543. /* We may have moved at least some nodes so allow for races with TNC */
  544. c->gced_lnum = lnum;
  545. smp_wmb();
  546. c->gc_seq += 1;
  547. smp_wmb();
  548. goto out;
  549. }
  550. /**
  551. * ubifs_garbage_collect - UBIFS garbage collector.
  552. * @c: UBIFS file-system description object
  553. * @anyway: do GC even if there are free LEBs
  554. *
  555. * This function does out-of-place garbage collection. The return codes are:
  556. * o positive LEB number if the LEB has been freed and may be used;
  557. * o %-EAGAIN if the caller has to run commit;
  558. * o %-ENOSPC if GC failed to make any progress;
  559. * o other negative error codes in case of other errors.
  560. *
  561. * Garbage collector writes data to the journal when GC'ing data LEBs, and just
  562. * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
  563. * commit may be required. But commit cannot be run from inside GC, because the
  564. * caller might be holding the commit lock, so %-EAGAIN is returned instead;
  565. * And this error code means that the caller has to run commit, and re-run GC
  566. * if there is still no free space.
  567. *
  568. * There are many reasons why this function may return %-EAGAIN:
  569. * o the log is full and there is no space to write an LEB reference for
  570. * @c->gc_lnum;
  571. * o the journal is too large and exceeds size limitations;
  572. * o GC moved indexing LEBs, but they can be used only after the commit;
  573. * o the shrinker fails to find clean znodes to free and requests the commit;
  574. * o etc.
  575. *
  576. * Note, if the file-system is close to be full, this function may return
  577. * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
  578. * the function. E.g., this happens if the limits on the journal size are too
  579. * tough and GC writes too much to the journal before an LEB is freed. This
  580. * might also mean that the journal is too large, and the TNC becomes to big,
  581. * so that the shrinker is constantly called, finds not clean znodes to free,
  582. * and requests commit. Well, this may also happen if the journal is all right,
  583. * but another kernel process consumes too much memory. Anyway, infinite
  584. * %-EAGAIN may happen, but in some extreme/misconfiguration cases.
  585. */
  586. int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
  587. {
  588. int i, err, ret, min_space = c->dead_wm;
  589. struct ubifs_lprops lp;
  590. struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  591. ubifs_assert_cmt_locked(c);
  592. if (ubifs_gc_should_commit(c))
  593. return -EAGAIN;
  594. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  595. if (c->ro_media) {
  596. ret = -EROFS;
  597. goto out_unlock;
  598. }
  599. /* We expect the write-buffer to be empty on entry */
  600. ubifs_assert(!wbuf->used);
  601. for (i = 0; ; i++) {
  602. int space_before = c->leb_size - wbuf->offs - wbuf->used;
  603. int space_after;
  604. cond_resched();
  605. /* Give the commit an opportunity to run */
  606. if (ubifs_gc_should_commit(c)) {
  607. ret = -EAGAIN;
  608. break;
  609. }
  610. if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
  611. /*
  612. * We've done enough iterations. Indexing LEBs were
  613. * moved and will be available after the commit.
  614. */
  615. dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
  616. ubifs_commit_required(c);
  617. ret = -EAGAIN;
  618. break;
  619. }
  620. if (i > HARD_LEBS_LIMIT) {
  621. /*
  622. * We've moved too many LEBs and have not made
  623. * progress, give up.
  624. */
  625. dbg_gc("hard limit, -ENOSPC");
  626. ret = -ENOSPC;
  627. break;
  628. }
  629. /*
  630. * Empty and freeable LEBs can turn up while we waited for
  631. * the wbuf lock, or while we have been running GC. In that
  632. * case, we should just return one of those instead of
  633. * continuing to GC dirty LEBs. Hence we request
  634. * 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
  635. */
  636. ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
  637. if (ret) {
  638. if (ret == -ENOSPC)
  639. dbg_gc("no more dirty LEBs");
  640. break;
  641. }
  642. dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
  643. "(min. space %d)", lp.lnum, lp.free, lp.dirty,
  644. lp.free + lp.dirty, min_space);
  645. if (lp.free + lp.dirty == c->leb_size) {
  646. /* An empty LEB was returned */
  647. dbg_gc("LEB %d is free, return it", lp.lnum);
  648. /*
  649. * ubifs_find_dirty_leb() doesn't return freeable index
  650. * LEBs.
  651. */
  652. ubifs_assert(!(lp.flags & LPROPS_INDEX));
  653. if (lp.free != c->leb_size) {
  654. /*
  655. * Write buffers must be sync'd before
  656. * unmapping freeable LEBs, because one of them
  657. * may contain data which obsoletes something
  658. * in 'lp.pnum'.
  659. */
  660. ret = gc_sync_wbufs(c);
  661. if (ret)
  662. goto out;
  663. ret = ubifs_change_one_lp(c, lp.lnum,
  664. c->leb_size, 0, 0, 0,
  665. 0);
  666. if (ret)
  667. goto out;
  668. }
  669. ret = ubifs_leb_unmap(c, lp.lnum);
  670. if (ret)
  671. goto out;
  672. ret = lp.lnum;
  673. break;
  674. }
  675. space_before = c->leb_size - wbuf->offs - wbuf->used;
  676. if (wbuf->lnum == -1)
  677. space_before = 0;
  678. ret = ubifs_garbage_collect_leb(c, &lp);
  679. if (ret < 0) {
  680. if (ret == -EAGAIN || ret == -ENOSPC) {
  681. /*
  682. * These codes are not errors, so we have to
  683. * return the LEB to lprops. But if the
  684. * 'ubifs_return_leb()' function fails, its
  685. * failure code is propagated to the caller
  686. * instead of the original '-EAGAIN' or
  687. * '-ENOSPC'.
  688. */
  689. err = ubifs_return_leb(c, lp.lnum);
  690. if (err)
  691. ret = err;
  692. break;
  693. }
  694. goto out;
  695. }
  696. if (ret == LEB_FREED) {
  697. /* An LEB has been freed and is ready for use */
  698. dbg_gc("LEB %d freed, return", lp.lnum);
  699. ret = lp.lnum;
  700. break;
  701. }
  702. if (ret == LEB_FREED_IDX) {
  703. /*
  704. * This was an indexing LEB and it cannot be
  705. * immediately used. And instead of requesting the
  706. * commit straight away, we try to garbage collect some
  707. * more.
  708. */
  709. dbg_gc("indexing LEB %d freed, continue", lp.lnum);
  710. continue;
  711. }
  712. ubifs_assert(ret == LEB_RETAINED);
  713. space_after = c->leb_size - wbuf->offs - wbuf->used;
  714. dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
  715. space_after - space_before);
  716. if (space_after > space_before) {
  717. /* GC makes progress, keep working */
  718. min_space >>= 1;
  719. if (min_space < c->dead_wm)
  720. min_space = c->dead_wm;
  721. continue;
  722. }
  723. dbg_gc("did not make progress");
  724. /*
  725. * GC moved an LEB bud have not done any progress. This means
  726. * that the previous GC head LEB contained too few free space
  727. * and the LEB which was GC'ed contained only large nodes which
  728. * did not fit that space.
  729. *
  730. * We can do 2 things:
  731. * 1. pick another LEB in a hope it'll contain a small node
  732. * which will fit the space we have at the end of current GC
  733. * head LEB, but there is no guarantee, so we try this out
  734. * unless we have already been working for too long;
  735. * 2. request an LEB with more dirty space, which will force
  736. * 'ubifs_find_dirty_leb()' to start scanning the lprops
  737. * table, instead of just picking one from the heap
  738. * (previously it already picked the dirtiest LEB).
  739. */
  740. if (i < SOFT_LEBS_LIMIT) {
  741. dbg_gc("try again");
  742. continue;
  743. }
  744. min_space <<= 1;
  745. if (min_space > c->dark_wm)
  746. min_space = c->dark_wm;
  747. dbg_gc("set min. space to %d", min_space);
  748. }
  749. if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
  750. dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
  751. ubifs_commit_required(c);
  752. ret = -EAGAIN;
  753. }
  754. err = ubifs_wbuf_sync_nolock(wbuf);
  755. if (!err)
  756. err = ubifs_leb_unmap(c, c->gc_lnum);
  757. if (err) {
  758. ret = err;
  759. goto out;
  760. }
  761. out_unlock:
  762. mutex_unlock(&wbuf->io_mutex);
  763. return ret;
  764. out:
  765. ubifs_assert(ret < 0);
  766. ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
  767. ubifs_ro_mode(c, ret);
  768. ubifs_wbuf_sync_nolock(wbuf);
  769. mutex_unlock(&wbuf->io_mutex);
  770. ubifs_return_leb(c, lp.lnum);
  771. return ret;
  772. }
  773. /**
  774. * ubifs_gc_start_commit - garbage collection at start of commit.
  775. * @c: UBIFS file-system description object
  776. *
  777. * If a LEB has only dirty and free space, then we may safely unmap it and make
  778. * it free. Note, we cannot do this with indexing LEBs because dirty space may
  779. * correspond index nodes that are required for recovery. In that case, the
  780. * LEB cannot be unmapped until after the next commit.
  781. *
  782. * This function returns %0 upon success and a negative error code upon failure.
  783. */
  784. int ubifs_gc_start_commit(struct ubifs_info *c)
  785. {
  786. struct ubifs_gced_idx_leb *idx_gc;
  787. const struct ubifs_lprops *lp;
  788. int err = 0, flags;
  789. ubifs_get_lprops(c);
  790. /*
  791. * Unmap (non-index) freeable LEBs. Note that recovery requires that all
  792. * wbufs are sync'd before this, which is done in 'do_commit()'.
  793. */
  794. while (1) {
  795. lp = ubifs_fast_find_freeable(c);
  796. if (IS_ERR(lp)) {
  797. err = PTR_ERR(lp);
  798. goto out;
  799. }
  800. if (!lp)
  801. break;
  802. ubifs_assert(!(lp->flags & LPROPS_TAKEN));
  803. ubifs_assert(!(lp->flags & LPROPS_INDEX));
  804. err = ubifs_leb_unmap(c, lp->lnum);
  805. if (err)
  806. goto out;
  807. lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
  808. if (IS_ERR(lp)) {
  809. err = PTR_ERR(lp);
  810. goto out;
  811. }
  812. ubifs_assert(!(lp->flags & LPROPS_TAKEN));
  813. ubifs_assert(!(lp->flags & LPROPS_INDEX));
  814. }
  815. /* Mark GC'd index LEBs OK to unmap after this commit finishes */
  816. list_for_each_entry(idx_gc, &c->idx_gc, list)
  817. idx_gc->unmap = 1;
  818. /* Record index freeable LEBs for unmapping after commit */
  819. while (1) {
  820. lp = ubifs_fast_find_frdi_idx(c);
  821. if (IS_ERR(lp)) {
  822. err = PTR_ERR(lp);
  823. goto out;
  824. }
  825. if (!lp)
  826. break;
  827. idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
  828. if (!idx_gc) {
  829. err = -ENOMEM;
  830. goto out;
  831. }
  832. ubifs_assert(!(lp->flags & LPROPS_TAKEN));
  833. ubifs_assert(lp->flags & LPROPS_INDEX);
  834. /* Don't release the LEB until after the next commit */
  835. flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
  836. lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
  837. if (IS_ERR(lp)) {
  838. err = PTR_ERR(lp);
  839. kfree(idx_gc);
  840. goto out;
  841. }
  842. ubifs_assert(lp->flags & LPROPS_TAKEN);
  843. ubifs_assert(!(lp->flags & LPROPS_INDEX));
  844. idx_gc->lnum = lp->lnum;
  845. idx_gc->unmap = 1;
  846. list_add(&idx_gc->list, &c->idx_gc);
  847. }
  848. out:
  849. ubifs_release_lprops(c);
  850. return err;
  851. }
  852. /**
  853. * ubifs_gc_end_commit - garbage collection at end of commit.
  854. * @c: UBIFS file-system description object
  855. *
  856. * This function completes out-of-place garbage collection of index LEBs.
  857. */
  858. int ubifs_gc_end_commit(struct ubifs_info *c)
  859. {
  860. struct ubifs_gced_idx_leb *idx_gc, *tmp;
  861. struct ubifs_wbuf *wbuf;
  862. int err = 0;
  863. wbuf = &c->jheads[GCHD].wbuf;
  864. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  865. list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
  866. if (idx_gc->unmap) {
  867. dbg_gc("LEB %d", idx_gc->lnum);
  868. err = ubifs_leb_unmap(c, idx_gc->lnum);
  869. if (err)
  870. goto out;
  871. err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
  872. LPROPS_NC, 0, LPROPS_TAKEN, -1);
  873. if (err)
  874. goto out;
  875. list_del(&idx_gc->list);
  876. kfree(idx_gc);
  877. }
  878. out:
  879. mutex_unlock(&wbuf->io_mutex);
  880. return err;
  881. }
  882. /**
  883. * ubifs_destroy_idx_gc - destroy idx_gc list.
  884. * @c: UBIFS file-system description object
  885. *
  886. * This function destroys the @c->idx_gc list. It is called when unmounting
  887. * so locks are not needed. Returns zero in case of success and a negative
  888. * error code in case of failure.
  889. */
  890. void ubifs_destroy_idx_gc(struct ubifs_info *c)
  891. {
  892. while (!list_empty(&c->idx_gc)) {
  893. struct ubifs_gced_idx_leb *idx_gc;
  894. idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
  895. list);
  896. c->idx_gc_cnt -= 1;
  897. list_del(&idx_gc->list);
  898. kfree(idx_gc);
  899. }
  900. }
  901. /**
  902. * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
  903. * @c: UBIFS file-system description object
  904. *
  905. * Called during start commit so locks are not needed.
  906. */
  907. int ubifs_get_idx_gc_leb(struct ubifs_info *c)
  908. {
  909. struct ubifs_gced_idx_leb *idx_gc;
  910. int lnum;
  911. if (list_empty(&c->idx_gc))
  912. return -ENOSPC;
  913. idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
  914. lnum = idx_gc->lnum;
  915. /* c->idx_gc_cnt is updated by the caller when lprops are updated */
  916. list_del(&idx_gc->list);
  917. kfree(idx_gc);
  918. return lnum;
  919. }