alloc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /*
  2. * alloc.c - NILFS dat/inode allocator
  3. *
  4. * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19. *
  20. * Original code was written by Koji Sato <koji@osrg.net>.
  21. * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
  22. * Amagai Yoshiji <amagai@osrg.net>.
  23. */
  24. #include <linux/types.h>
  25. #include <linux/buffer_head.h>
  26. #include <linux/fs.h>
  27. #include <linux/bitops.h>
  28. #include <linux/slab.h>
  29. #include "mdt.h"
  30. #include "alloc.h"
  31. static inline unsigned long
  32. nilfs_palloc_groups_per_desc_block(const struct inode *inode)
  33. {
  34. return (1UL << inode->i_blkbits) /
  35. sizeof(struct nilfs_palloc_group_desc);
  36. }
  37. static inline unsigned long
  38. nilfs_palloc_groups_count(const struct inode *inode)
  39. {
  40. return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
  41. }
  42. int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
  43. {
  44. struct nilfs_mdt_info *mi = NILFS_MDT(inode);
  45. mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS);
  46. if (!mi->mi_bgl)
  47. return -ENOMEM;
  48. bgl_lock_init(mi->mi_bgl);
  49. nilfs_mdt_set_entry_size(inode, entry_size, 0);
  50. mi->mi_blocks_per_group =
  51. DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
  52. mi->mi_entries_per_block) + 1;
  53. /* Number of blocks in a group including entry blocks and
  54. a bitmap block */
  55. mi->mi_blocks_per_desc_block =
  56. nilfs_palloc_groups_per_desc_block(inode) *
  57. mi->mi_blocks_per_group + 1;
  58. /* Number of blocks per descriptor including the
  59. descriptor block */
  60. return 0;
  61. }
  62. static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
  63. unsigned long *offset)
  64. {
  65. __u64 group = nr;
  66. *offset = do_div(group, nilfs_palloc_entries_per_group(inode));
  67. return group;
  68. }
  69. static unsigned long
  70. nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
  71. {
  72. unsigned long desc_block =
  73. group / nilfs_palloc_groups_per_desc_block(inode);
  74. return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
  75. }
  76. static unsigned long
  77. nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
  78. {
  79. unsigned long desc_offset =
  80. group % nilfs_palloc_groups_per_desc_block(inode);
  81. return nilfs_palloc_desc_blkoff(inode, group) + 1 +
  82. desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
  83. }
  84. static unsigned long
  85. nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
  86. const struct nilfs_palloc_group_desc *desc)
  87. {
  88. unsigned long nfree;
  89. spin_lock(nilfs_mdt_bgl_lock(inode, group));
  90. nfree = le32_to_cpu(desc->pg_nfrees);
  91. spin_unlock(nilfs_mdt_bgl_lock(inode, group));
  92. return nfree;
  93. }
  94. static void
  95. nilfs_palloc_group_desc_add_entries(struct inode *inode,
  96. unsigned long group,
  97. struct nilfs_palloc_group_desc *desc,
  98. u32 n)
  99. {
  100. spin_lock(nilfs_mdt_bgl_lock(inode, group));
  101. le32_add_cpu(&desc->pg_nfrees, n);
  102. spin_unlock(nilfs_mdt_bgl_lock(inode, group));
  103. }
  104. static unsigned long
  105. nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
  106. {
  107. unsigned long group, group_offset;
  108. group = nilfs_palloc_group(inode, nr, &group_offset);
  109. return nilfs_palloc_bitmap_blkoff(inode, group) + 1 +
  110. group_offset / NILFS_MDT(inode)->mi_entries_per_block;
  111. }
  112. static void nilfs_palloc_desc_block_init(struct inode *inode,
  113. struct buffer_head *bh, void *kaddr)
  114. {
  115. struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
  116. unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
  117. __le32 nfrees;
  118. nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode));
  119. while (n-- > 0) {
  120. desc->pg_nfrees = nfrees;
  121. desc++;
  122. }
  123. }
  124. static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
  125. int create,
  126. void (*init_block)(struct inode *,
  127. struct buffer_head *,
  128. void *),
  129. struct buffer_head **bhp,
  130. struct nilfs_bh_assoc *prev,
  131. spinlock_t *lock)
  132. {
  133. int ret;
  134. spin_lock(lock);
  135. if (prev->bh && blkoff == prev->blkoff) {
  136. get_bh(prev->bh);
  137. *bhp = prev->bh;
  138. spin_unlock(lock);
  139. return 0;
  140. }
  141. spin_unlock(lock);
  142. ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
  143. if (!ret) {
  144. spin_lock(lock);
  145. /*
  146. * The following code must be safe for change of the
  147. * cache contents during the get block call.
  148. */
  149. brelse(prev->bh);
  150. get_bh(*bhp);
  151. prev->bh = *bhp;
  152. prev->blkoff = blkoff;
  153. spin_unlock(lock);
  154. }
  155. return ret;
  156. }
  157. static int nilfs_palloc_get_desc_block(struct inode *inode,
  158. unsigned long group,
  159. int create, struct buffer_head **bhp)
  160. {
  161. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  162. return nilfs_palloc_get_block(inode,
  163. nilfs_palloc_desc_blkoff(inode, group),
  164. create, nilfs_palloc_desc_block_init,
  165. bhp, &cache->prev_desc, &cache->lock);
  166. }
  167. static int nilfs_palloc_get_bitmap_block(struct inode *inode,
  168. unsigned long group,
  169. int create, struct buffer_head **bhp)
  170. {
  171. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  172. return nilfs_palloc_get_block(inode,
  173. nilfs_palloc_bitmap_blkoff(inode, group),
  174. create, NULL, bhp,
  175. &cache->prev_bitmap, &cache->lock);
  176. }
  177. int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
  178. int create, struct buffer_head **bhp)
  179. {
  180. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  181. return nilfs_palloc_get_block(inode,
  182. nilfs_palloc_entry_blkoff(inode, nr),
  183. create, NULL, bhp,
  184. &cache->prev_entry, &cache->lock);
  185. }
  186. static struct nilfs_palloc_group_desc *
  187. nilfs_palloc_block_get_group_desc(const struct inode *inode,
  188. unsigned long group,
  189. const struct buffer_head *bh, void *kaddr)
  190. {
  191. return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
  192. group % nilfs_palloc_groups_per_desc_block(inode);
  193. }
  194. void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
  195. const struct buffer_head *bh, void *kaddr)
  196. {
  197. unsigned long entry_offset, group_offset;
  198. nilfs_palloc_group(inode, nr, &group_offset);
  199. entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
  200. return kaddr + bh_offset(bh) +
  201. entry_offset * NILFS_MDT(inode)->mi_entry_size;
  202. }
  203. static int nilfs_palloc_find_available_slot(struct inode *inode,
  204. unsigned long group,
  205. unsigned long target,
  206. unsigned char *bitmap,
  207. int bsize) /* size in bits */
  208. {
  209. int curr, pos, end, i;
  210. if (target > 0) {
  211. end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
  212. if (end > bsize)
  213. end = bsize;
  214. pos = nilfs_find_next_zero_bit(bitmap, end, target);
  215. if (pos < end &&
  216. !nilfs_set_bit_atomic(
  217. nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
  218. return pos;
  219. } else
  220. end = 0;
  221. for (i = 0, curr = end;
  222. i < bsize;
  223. i += BITS_PER_LONG, curr += BITS_PER_LONG) {
  224. /* wrap around */
  225. if (curr >= bsize)
  226. curr = 0;
  227. while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
  228. != ~0UL) {
  229. end = curr + BITS_PER_LONG;
  230. if (end > bsize)
  231. end = bsize;
  232. pos = nilfs_find_next_zero_bit(bitmap, end, curr);
  233. if ((pos < end) &&
  234. !nilfs_set_bit_atomic(
  235. nilfs_mdt_bgl_lock(inode, group), pos,
  236. bitmap))
  237. return pos;
  238. }
  239. }
  240. return -ENOSPC;
  241. }
  242. static unsigned long
  243. nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
  244. unsigned long curr, unsigned long max)
  245. {
  246. return min_t(unsigned long,
  247. nilfs_palloc_groups_per_desc_block(inode) -
  248. curr % nilfs_palloc_groups_per_desc_block(inode),
  249. max - curr + 1);
  250. }
  251. int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
  252. struct nilfs_palloc_req *req)
  253. {
  254. struct buffer_head *desc_bh, *bitmap_bh;
  255. struct nilfs_palloc_group_desc *desc;
  256. unsigned char *bitmap;
  257. void *desc_kaddr, *bitmap_kaddr;
  258. unsigned long group, maxgroup, ngroups;
  259. unsigned long group_offset, maxgroup_offset;
  260. unsigned long n, entries_per_group, groups_per_desc_block;
  261. unsigned long i, j;
  262. int pos, ret;
  263. ngroups = nilfs_palloc_groups_count(inode);
  264. maxgroup = ngroups - 1;
  265. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  266. entries_per_group = nilfs_palloc_entries_per_group(inode);
  267. groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
  268. for (i = 0; i < ngroups; i += n) {
  269. if (group >= ngroups) {
  270. /* wrap around */
  271. group = 0;
  272. maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
  273. &maxgroup_offset) - 1;
  274. }
  275. ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
  276. if (ret < 0)
  277. return ret;
  278. desc_kaddr = kmap(desc_bh->b_page);
  279. desc = nilfs_palloc_block_get_group_desc(
  280. inode, group, desc_bh, desc_kaddr);
  281. n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
  282. maxgroup);
  283. for (j = 0; j < n; j++, desc++, group++) {
  284. if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
  285. > 0) {
  286. ret = nilfs_palloc_get_bitmap_block(
  287. inode, group, 1, &bitmap_bh);
  288. if (ret < 0)
  289. goto out_desc;
  290. bitmap_kaddr = kmap(bitmap_bh->b_page);
  291. bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
  292. pos = nilfs_palloc_find_available_slot(
  293. inode, group, group_offset, bitmap,
  294. entries_per_group);
  295. if (pos >= 0) {
  296. /* found a free entry */
  297. nilfs_palloc_group_desc_add_entries(
  298. inode, group, desc, -1);
  299. req->pr_entry_nr =
  300. entries_per_group * group + pos;
  301. kunmap(desc_bh->b_page);
  302. kunmap(bitmap_bh->b_page);
  303. req->pr_desc_bh = desc_bh;
  304. req->pr_bitmap_bh = bitmap_bh;
  305. return 0;
  306. }
  307. kunmap(bitmap_bh->b_page);
  308. brelse(bitmap_bh);
  309. }
  310. group_offset = 0;
  311. }
  312. kunmap(desc_bh->b_page);
  313. brelse(desc_bh);
  314. }
  315. /* no entries left */
  316. return -ENOSPC;
  317. out_desc:
  318. kunmap(desc_bh->b_page);
  319. brelse(desc_bh);
  320. return ret;
  321. }
  322. void nilfs_palloc_commit_alloc_entry(struct inode *inode,
  323. struct nilfs_palloc_req *req)
  324. {
  325. nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
  326. nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
  327. nilfs_mdt_mark_dirty(inode);
  328. brelse(req->pr_bitmap_bh);
  329. brelse(req->pr_desc_bh);
  330. }
  331. void nilfs_palloc_commit_free_entry(struct inode *inode,
  332. struct nilfs_palloc_req *req)
  333. {
  334. struct nilfs_palloc_group_desc *desc;
  335. unsigned long group, group_offset;
  336. unsigned char *bitmap;
  337. void *desc_kaddr, *bitmap_kaddr;
  338. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  339. desc_kaddr = kmap(req->pr_desc_bh->b_page);
  340. desc = nilfs_palloc_block_get_group_desc(inode, group,
  341. req->pr_desc_bh, desc_kaddr);
  342. bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
  343. bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
  344. if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
  345. group_offset, bitmap))
  346. printk(KERN_WARNING "%s: entry number %llu already freed\n",
  347. __func__, (unsigned long long)req->pr_entry_nr);
  348. nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
  349. kunmap(req->pr_bitmap_bh->b_page);
  350. kunmap(req->pr_desc_bh->b_page);
  351. nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
  352. nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
  353. nilfs_mdt_mark_dirty(inode);
  354. brelse(req->pr_bitmap_bh);
  355. brelse(req->pr_desc_bh);
  356. }
  357. void nilfs_palloc_abort_alloc_entry(struct inode *inode,
  358. struct nilfs_palloc_req *req)
  359. {
  360. struct nilfs_palloc_group_desc *desc;
  361. void *desc_kaddr, *bitmap_kaddr;
  362. unsigned char *bitmap;
  363. unsigned long group, group_offset;
  364. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  365. desc_kaddr = kmap(req->pr_desc_bh->b_page);
  366. desc = nilfs_palloc_block_get_group_desc(inode, group,
  367. req->pr_desc_bh, desc_kaddr);
  368. bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
  369. bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
  370. if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
  371. group_offset, bitmap))
  372. printk(KERN_WARNING "%s: entry number %llu already freed\n",
  373. __func__, (unsigned long long)req->pr_entry_nr);
  374. nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
  375. kunmap(req->pr_bitmap_bh->b_page);
  376. kunmap(req->pr_desc_bh->b_page);
  377. brelse(req->pr_bitmap_bh);
  378. brelse(req->pr_desc_bh);
  379. req->pr_entry_nr = 0;
  380. req->pr_bitmap_bh = NULL;
  381. req->pr_desc_bh = NULL;
  382. }
  383. int nilfs_palloc_prepare_free_entry(struct inode *inode,
  384. struct nilfs_palloc_req *req)
  385. {
  386. struct buffer_head *desc_bh, *bitmap_bh;
  387. unsigned long group, group_offset;
  388. int ret;
  389. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  390. ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
  391. if (ret < 0)
  392. return ret;
  393. ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh);
  394. if (ret < 0) {
  395. brelse(desc_bh);
  396. return ret;
  397. }
  398. req->pr_desc_bh = desc_bh;
  399. req->pr_bitmap_bh = bitmap_bh;
  400. return 0;
  401. }
  402. void nilfs_palloc_abort_free_entry(struct inode *inode,
  403. struct nilfs_palloc_req *req)
  404. {
  405. brelse(req->pr_bitmap_bh);
  406. brelse(req->pr_desc_bh);
  407. req->pr_entry_nr = 0;
  408. req->pr_bitmap_bh = NULL;
  409. req->pr_desc_bh = NULL;
  410. }
  411. static int
  412. nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
  413. {
  414. __u64 first, last;
  415. first = group * nilfs_palloc_entries_per_group(inode);
  416. last = first + nilfs_palloc_entries_per_group(inode) - 1;
  417. return (nr >= first) && (nr <= last);
  418. }
  419. int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
  420. {
  421. struct buffer_head *desc_bh, *bitmap_bh;
  422. struct nilfs_palloc_group_desc *desc;
  423. unsigned char *bitmap;
  424. void *desc_kaddr, *bitmap_kaddr;
  425. unsigned long group, group_offset;
  426. int i, j, n, ret;
  427. for (i = 0; i < nitems; i += n) {
  428. group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
  429. ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
  430. if (ret < 0)
  431. return ret;
  432. ret = nilfs_palloc_get_bitmap_block(inode, group, 0,
  433. &bitmap_bh);
  434. if (ret < 0) {
  435. brelse(desc_bh);
  436. return ret;
  437. }
  438. desc_kaddr = kmap(desc_bh->b_page);
  439. desc = nilfs_palloc_block_get_group_desc(
  440. inode, group, desc_bh, desc_kaddr);
  441. bitmap_kaddr = kmap(bitmap_bh->b_page);
  442. bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
  443. for (j = i, n = 0;
  444. (j < nitems) && nilfs_palloc_group_is_in(inode, group,
  445. entry_nrs[j]);
  446. j++, n++) {
  447. nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
  448. if (!nilfs_clear_bit_atomic(
  449. nilfs_mdt_bgl_lock(inode, group),
  450. group_offset, bitmap)) {
  451. printk(KERN_WARNING
  452. "%s: entry number %llu already freed\n",
  453. __func__,
  454. (unsigned long long)entry_nrs[j]);
  455. }
  456. }
  457. nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
  458. kunmap(bitmap_bh->b_page);
  459. kunmap(desc_bh->b_page);
  460. nilfs_mdt_mark_buffer_dirty(desc_bh);
  461. nilfs_mdt_mark_buffer_dirty(bitmap_bh);
  462. nilfs_mdt_mark_dirty(inode);
  463. brelse(bitmap_bh);
  464. brelse(desc_bh);
  465. }
  466. return 0;
  467. }
  468. void nilfs_palloc_setup_cache(struct inode *inode,
  469. struct nilfs_palloc_cache *cache)
  470. {
  471. NILFS_MDT(inode)->mi_palloc_cache = cache;
  472. spin_lock_init(&cache->lock);
  473. }
  474. void nilfs_palloc_clear_cache(struct inode *inode)
  475. {
  476. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  477. spin_lock(&cache->lock);
  478. brelse(cache->prev_desc.bh);
  479. brelse(cache->prev_bitmap.bh);
  480. brelse(cache->prev_entry.bh);
  481. cache->prev_desc.bh = NULL;
  482. cache->prev_bitmap.bh = NULL;
  483. cache->prev_entry.bh = NULL;
  484. spin_unlock(&cache->lock);
  485. }
  486. void nilfs_palloc_destroy_cache(struct inode *inode)
  487. {
  488. nilfs_palloc_clear_cache(inode);
  489. NILFS_MDT(inode)->mi_palloc_cache = NULL;
  490. }