jfs_metapage.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. /*
  2. * Copyright (C) International Business Machines Corp., 2000-2003
  3. * Portions Copyright (C) Christoph Hellwig, 2001-2002
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  13. * the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include <linux/fs.h>
  20. #include <linux/init.h>
  21. #include <linux/buffer_head.h>
  22. #include <linux/mempool.h>
  23. #include <linux/delay.h>
  24. #include "jfs_incore.h"
  25. #include "jfs_superblock.h"
  26. #include "jfs_filsys.h"
  27. #include "jfs_metapage.h"
  28. #include "jfs_txnmgr.h"
  29. #include "jfs_debug.h"
  30. static DEFINE_SPINLOCK(meta_lock);
  31. #ifdef CONFIG_JFS_STATISTICS
  32. static struct {
  33. uint pagealloc; /* # of page allocations */
  34. uint pagefree; /* # of page frees */
  35. uint lockwait; /* # of sleeping lock_metapage() calls */
  36. } mpStat;
  37. #endif
  38. #define HASH_BITS 10 /* This makes hash_table 1 4K page */
  39. #define HASH_SIZE (1 << HASH_BITS)
  40. static struct metapage **hash_table = NULL;
  41. static unsigned long hash_order;
  42. static inline int metapage_locked(struct metapage *mp)
  43. {
  44. return test_bit(META_locked, &mp->flag);
  45. }
  46. static inline int trylock_metapage(struct metapage *mp)
  47. {
  48. return test_and_set_bit(META_locked, &mp->flag);
  49. }
  50. static inline void unlock_metapage(struct metapage *mp)
  51. {
  52. clear_bit(META_locked, &mp->flag);
  53. wake_up(&mp->wait);
  54. }
  55. static void __lock_metapage(struct metapage *mp)
  56. {
  57. DECLARE_WAITQUEUE(wait, current);
  58. INCREMENT(mpStat.lockwait);
  59. add_wait_queue_exclusive(&mp->wait, &wait);
  60. do {
  61. set_current_state(TASK_UNINTERRUPTIBLE);
  62. if (metapage_locked(mp)) {
  63. spin_unlock(&meta_lock);
  64. schedule();
  65. spin_lock(&meta_lock);
  66. }
  67. } while (trylock_metapage(mp));
  68. __set_current_state(TASK_RUNNING);
  69. remove_wait_queue(&mp->wait, &wait);
  70. }
  71. /* needs meta_lock */
  72. static inline void lock_metapage(struct metapage *mp)
  73. {
  74. if (trylock_metapage(mp))
  75. __lock_metapage(mp);
  76. }
  77. #define METAPOOL_MIN_PAGES 32
  78. static kmem_cache_t *metapage_cache;
  79. static mempool_t *metapage_mempool;
  80. static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
  81. {
  82. struct metapage *mp = (struct metapage *)foo;
  83. if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
  84. SLAB_CTOR_CONSTRUCTOR) {
  85. mp->lid = 0;
  86. mp->lsn = 0;
  87. mp->flag = 0;
  88. mp->data = NULL;
  89. mp->clsn = 0;
  90. mp->log = NULL;
  91. set_bit(META_free, &mp->flag);
  92. init_waitqueue_head(&mp->wait);
  93. }
  94. }
  95. static inline struct metapage *alloc_metapage(int gfp_mask)
  96. {
  97. return mempool_alloc(metapage_mempool, gfp_mask);
  98. }
  99. static inline void free_metapage(struct metapage *mp)
  100. {
  101. mp->flag = 0;
  102. set_bit(META_free, &mp->flag);
  103. mempool_free(mp, metapage_mempool);
  104. }
  105. int __init metapage_init(void)
  106. {
  107. /*
  108. * Allocate the metapage structures
  109. */
  110. metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
  111. 0, 0, init_once, NULL);
  112. if (metapage_cache == NULL)
  113. return -ENOMEM;
  114. metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
  115. mempool_free_slab, metapage_cache);
  116. if (metapage_mempool == NULL) {
  117. kmem_cache_destroy(metapage_cache);
  118. return -ENOMEM;
  119. }
  120. /*
  121. * Now the hash list
  122. */
  123. for (hash_order = 0;
  124. ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
  125. hash_order++);
  126. hash_table =
  127. (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
  128. assert(hash_table);
  129. memset(hash_table, 0, PAGE_SIZE << hash_order);
  130. return 0;
  131. }
  132. void metapage_exit(void)
  133. {
  134. mempool_destroy(metapage_mempool);
  135. kmem_cache_destroy(metapage_cache);
  136. }
  137. /*
  138. * Basically same hash as in pagemap.h, but using our hash table
  139. */
  140. static struct metapage **meta_hash(struct address_space *mapping,
  141. unsigned long index)
  142. {
  143. #define i (((unsigned long)mapping)/ \
  144. (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
  145. #define s(x) ((x) + ((x) >> HASH_BITS))
  146. return hash_table + (s(i + index) & (HASH_SIZE - 1));
  147. #undef i
  148. #undef s
  149. }
  150. static struct metapage *search_hash(struct metapage ** hash_ptr,
  151. struct address_space *mapping,
  152. unsigned long index)
  153. {
  154. struct metapage *ptr;
  155. for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
  156. if ((ptr->mapping == mapping) && (ptr->index == index))
  157. return ptr;
  158. }
  159. return NULL;
  160. }
  161. static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
  162. {
  163. if (*hash_ptr)
  164. (*hash_ptr)->hash_prev = mp;
  165. mp->hash_prev = NULL;
  166. mp->hash_next = *hash_ptr;
  167. *hash_ptr = mp;
  168. }
  169. static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
  170. {
  171. if (mp->hash_prev)
  172. mp->hash_prev->hash_next = mp->hash_next;
  173. else {
  174. assert(*hash_ptr == mp);
  175. *hash_ptr = mp->hash_next;
  176. }
  177. if (mp->hash_next)
  178. mp->hash_next->hash_prev = mp->hash_prev;
  179. }
  180. struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
  181. unsigned int size, int absolute,
  182. unsigned long new)
  183. {
  184. struct metapage **hash_ptr;
  185. int l2BlocksPerPage;
  186. int l2bsize;
  187. struct address_space *mapping;
  188. struct metapage *mp;
  189. unsigned long page_index;
  190. unsigned long page_offset;
  191. jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
  192. if (absolute)
  193. mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
  194. else {
  195. /*
  196. * If an nfs client tries to read an inode that is larger
  197. * than any existing inodes, we may try to read past the
  198. * end of the inode map
  199. */
  200. if ((lblock << inode->i_blkbits) >= inode->i_size)
  201. return NULL;
  202. mapping = inode->i_mapping;
  203. }
  204. hash_ptr = meta_hash(mapping, lblock);
  205. again:
  206. spin_lock(&meta_lock);
  207. mp = search_hash(hash_ptr, mapping, lblock);
  208. if (mp) {
  209. page_found:
  210. if (test_bit(META_stale, &mp->flag)) {
  211. spin_unlock(&meta_lock);
  212. msleep(1);
  213. goto again;
  214. }
  215. mp->count++;
  216. lock_metapage(mp);
  217. spin_unlock(&meta_lock);
  218. if (test_bit(META_discard, &mp->flag)) {
  219. if (!new) {
  220. jfs_error(inode->i_sb,
  221. "__get_metapage: using a "
  222. "discarded metapage");
  223. release_metapage(mp);
  224. return NULL;
  225. }
  226. clear_bit(META_discard, &mp->flag);
  227. }
  228. jfs_info("__get_metapage: found 0x%p, in hash", mp);
  229. if (mp->logical_size != size) {
  230. jfs_error(inode->i_sb,
  231. "__get_metapage: mp->logical_size != size");
  232. release_metapage(mp);
  233. return NULL;
  234. }
  235. } else {
  236. l2bsize = inode->i_blkbits;
  237. l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
  238. page_index = lblock >> l2BlocksPerPage;
  239. page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
  240. l2bsize;
  241. if ((page_offset + size) > PAGE_CACHE_SIZE) {
  242. spin_unlock(&meta_lock);
  243. jfs_err("MetaData crosses page boundary!!");
  244. return NULL;
  245. }
  246. /*
  247. * Locks held on aggregate inode pages are usually
  248. * not held long, and they are taken in critical code
  249. * paths (committing dirty inodes, txCommit thread)
  250. *
  251. * Attempt to get metapage without blocking, tapping into
  252. * reserves if necessary.
  253. */
  254. mp = NULL;
  255. if (JFS_IP(inode)->fileset == AGGREGATE_I) {
  256. mp = alloc_metapage(GFP_ATOMIC);
  257. if (!mp) {
  258. /*
  259. * mempool is supposed to protect us from
  260. * failing here. We will try a blocking
  261. * call, but a deadlock is possible here
  262. */
  263. printk(KERN_WARNING
  264. "__get_metapage: atomic call to mempool_alloc failed.\n");
  265. printk(KERN_WARNING
  266. "Will attempt blocking call\n");
  267. }
  268. }
  269. if (!mp) {
  270. struct metapage *mp2;
  271. spin_unlock(&meta_lock);
  272. mp = alloc_metapage(GFP_NOFS);
  273. spin_lock(&meta_lock);
  274. /* we dropped the meta_lock, we need to search the
  275. * hash again.
  276. */
  277. mp2 = search_hash(hash_ptr, mapping, lblock);
  278. if (mp2) {
  279. free_metapage(mp);
  280. mp = mp2;
  281. goto page_found;
  282. }
  283. }
  284. mp->flag = 0;
  285. lock_metapage(mp);
  286. if (absolute)
  287. set_bit(META_absolute, &mp->flag);
  288. mp->xflag = COMMIT_PAGE;
  289. mp->count = 1;
  290. atomic_set(&mp->nohomeok,0);
  291. mp->mapping = mapping;
  292. mp->index = lblock;
  293. mp->page = NULL;
  294. mp->logical_size = size;
  295. add_to_hash(mp, hash_ptr);
  296. spin_unlock(&meta_lock);
  297. if (new) {
  298. jfs_info("__get_metapage: Calling grab_cache_page");
  299. mp->page = grab_cache_page(mapping, page_index);
  300. if (!mp->page) {
  301. jfs_err("grab_cache_page failed!");
  302. goto freeit;
  303. } else {
  304. INCREMENT(mpStat.pagealloc);
  305. unlock_page(mp->page);
  306. }
  307. } else {
  308. jfs_info("__get_metapage: Calling read_cache_page");
  309. mp->page = read_cache_page(mapping, lblock,
  310. (filler_t *)mapping->a_ops->readpage, NULL);
  311. if (IS_ERR(mp->page)) {
  312. jfs_err("read_cache_page failed!");
  313. goto freeit;
  314. } else
  315. INCREMENT(mpStat.pagealloc);
  316. }
  317. mp->data = kmap(mp->page) + page_offset;
  318. }
  319. if (new)
  320. memset(mp->data, 0, PSIZE);
  321. jfs_info("__get_metapage: returning = 0x%p", mp);
  322. return mp;
  323. freeit:
  324. spin_lock(&meta_lock);
  325. remove_from_hash(mp, hash_ptr);
  326. free_metapage(mp);
  327. spin_unlock(&meta_lock);
  328. return NULL;
  329. }
  330. void hold_metapage(struct metapage * mp, int force)
  331. {
  332. spin_lock(&meta_lock);
  333. mp->count++;
  334. if (force) {
  335. ASSERT (!(test_bit(META_forced, &mp->flag)));
  336. if (trylock_metapage(mp))
  337. set_bit(META_forced, &mp->flag);
  338. } else
  339. lock_metapage(mp);
  340. spin_unlock(&meta_lock);
  341. }
  342. static void __write_metapage(struct metapage * mp)
  343. {
  344. int l2bsize = mp->mapping->host->i_blkbits;
  345. int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
  346. unsigned long page_index;
  347. unsigned long page_offset;
  348. int rc;
  349. jfs_info("__write_metapage: mp = 0x%p", mp);
  350. page_index = mp->page->index;
  351. page_offset =
  352. (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
  353. lock_page(mp->page);
  354. rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
  355. page_offset +
  356. mp->logical_size);
  357. if (rc) {
  358. jfs_err("prepare_write return %d!", rc);
  359. ClearPageUptodate(mp->page);
  360. unlock_page(mp->page);
  361. clear_bit(META_dirty, &mp->flag);
  362. return;
  363. }
  364. rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
  365. page_offset +
  366. mp->logical_size);
  367. if (rc) {
  368. jfs_err("commit_write returned %d", rc);
  369. }
  370. unlock_page(mp->page);
  371. clear_bit(META_dirty, &mp->flag);
  372. jfs_info("__write_metapage done");
  373. }
  374. static inline void sync_metapage(struct metapage *mp)
  375. {
  376. struct page *page = mp->page;
  377. page_cache_get(page);
  378. lock_page(page);
  379. /* we're done with this page - no need to check for errors */
  380. if (page_has_buffers(page))
  381. write_one_page(page, 1);
  382. else
  383. unlock_page(page);
  384. page_cache_release(page);
  385. }
  386. void release_metapage(struct metapage * mp)
  387. {
  388. struct jfs_log *log;
  389. jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
  390. spin_lock(&meta_lock);
  391. if (test_bit(META_forced, &mp->flag)) {
  392. clear_bit(META_forced, &mp->flag);
  393. mp->count--;
  394. spin_unlock(&meta_lock);
  395. return;
  396. }
  397. assert(mp->count);
  398. if (--mp->count || atomic_read(&mp->nohomeok)) {
  399. unlock_metapage(mp);
  400. spin_unlock(&meta_lock);
  401. return;
  402. }
  403. if (mp->page) {
  404. set_bit(META_stale, &mp->flag);
  405. spin_unlock(&meta_lock);
  406. kunmap(mp->page);
  407. mp->data = NULL;
  408. if (test_bit(META_dirty, &mp->flag))
  409. __write_metapage(mp);
  410. if (test_bit(META_sync, &mp->flag)) {
  411. sync_metapage(mp);
  412. clear_bit(META_sync, &mp->flag);
  413. }
  414. if (test_bit(META_discard, &mp->flag)) {
  415. lock_page(mp->page);
  416. block_invalidatepage(mp->page, 0);
  417. unlock_page(mp->page);
  418. }
  419. page_cache_release(mp->page);
  420. mp->page = NULL;
  421. INCREMENT(mpStat.pagefree);
  422. spin_lock(&meta_lock);
  423. }
  424. if (mp->lsn) {
  425. /*
  426. * Remove metapage from logsynclist.
  427. */
  428. log = mp->log;
  429. LOGSYNC_LOCK(log);
  430. mp->log = NULL;
  431. mp->lsn = 0;
  432. mp->clsn = 0;
  433. log->count--;
  434. list_del(&mp->synclist);
  435. LOGSYNC_UNLOCK(log);
  436. }
  437. remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
  438. spin_unlock(&meta_lock);
  439. free_metapage(mp);
  440. }
  441. void __invalidate_metapages(struct inode *ip, s64 addr, int len)
  442. {
  443. struct metapage **hash_ptr;
  444. unsigned long lblock;
  445. int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
  446. /* All callers are interested in block device's mapping */
  447. struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
  448. struct metapage *mp;
  449. struct page *page;
  450. /*
  451. * First, mark metapages to discard. They will eventually be
  452. * released, but should not be written.
  453. */
  454. for (lblock = addr; lblock < addr + len;
  455. lblock += 1 << l2BlocksPerPage) {
  456. hash_ptr = meta_hash(mapping, lblock);
  457. again:
  458. spin_lock(&meta_lock);
  459. mp = search_hash(hash_ptr, mapping, lblock);
  460. if (mp) {
  461. if (test_bit(META_stale, &mp->flag)) {
  462. spin_unlock(&meta_lock);
  463. msleep(1);
  464. goto again;
  465. }
  466. clear_bit(META_dirty, &mp->flag);
  467. set_bit(META_discard, &mp->flag);
  468. spin_unlock(&meta_lock);
  469. } else {
  470. spin_unlock(&meta_lock);
  471. page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
  472. if (page) {
  473. block_invalidatepage(page, 0);
  474. unlock_page(page);
  475. page_cache_release(page);
  476. }
  477. }
  478. }
  479. }
  480. #ifdef CONFIG_JFS_STATISTICS
  481. int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
  482. int *eof, void *data)
  483. {
  484. int len = 0;
  485. off_t begin;
  486. len += sprintf(buffer,
  487. "JFS Metapage statistics\n"
  488. "=======================\n"
  489. "page allocations = %d\n"
  490. "page frees = %d\n"
  491. "lock waits = %d\n",
  492. mpStat.pagealloc,
  493. mpStat.pagefree,
  494. mpStat.lockwait);
  495. begin = offset;
  496. *start = buffer + begin;
  497. len -= begin;
  498. if (len > length)
  499. len = length;
  500. else
  501. *eof = 1;
  502. if (len < 0)
  503. len = 0;
  504. return len;
  505. }
  506. #endif