glops.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /*
  2. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  3. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  4. *
  5. * This copyrighted material is made available to anyone wishing to use,
  6. * modify, copy, or redistribute it subject to the terms and conditions
  7. * of the GNU General Public License version 2.
  8. */
  9. #include <linux/spinlock.h>
  10. #include <linux/completion.h>
  11. #include <linux/buffer_head.h>
  12. #include <linux/gfs2_ondisk.h>
  13. #include <linux/bio.h>
  14. #include <linux/posix_acl.h>
  15. #include "gfs2.h"
  16. #include "incore.h"
  17. #include "bmap.h"
  18. #include "glock.h"
  19. #include "glops.h"
  20. #include "inode.h"
  21. #include "log.h"
  22. #include "meta_io.h"
  23. #include "recovery.h"
  24. #include "rgrp.h"
  25. #include "util.h"
  26. #include "trans.h"
  27. /**
  28. * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
  29. * @gl: the glock
  30. *
  31. * None of the buffers should be dirty, locked, or pinned.
  32. */
  33. static void __gfs2_ail_flush(struct gfs2_glock *gl)
  34. {
  35. struct gfs2_sbd *sdp = gl->gl_sbd;
  36. struct list_head *head = &gl->gl_ail_list;
  37. struct gfs2_bufdata *bd;
  38. struct buffer_head *bh;
  39. spin_lock(&sdp->sd_ail_lock);
  40. while (!list_empty(head)) {
  41. bd = list_entry(head->next, struct gfs2_bufdata,
  42. bd_ail_gl_list);
  43. bh = bd->bd_bh;
  44. gfs2_remove_from_ail(bd);
  45. spin_unlock(&sdp->sd_ail_lock);
  46. bd->bd_bh = NULL;
  47. bh->b_private = NULL;
  48. bd->bd_blkno = bh->b_blocknr;
  49. gfs2_log_lock(sdp);
  50. gfs2_assert_withdraw(sdp, !buffer_busy(bh));
  51. gfs2_trans_add_revoke(sdp, bd);
  52. gfs2_log_unlock(sdp);
  53. spin_lock(&sdp->sd_ail_lock);
  54. }
  55. gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
  56. spin_unlock(&sdp->sd_ail_lock);
  57. }
  58. static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
  59. {
  60. struct gfs2_sbd *sdp = gl->gl_sbd;
  61. struct gfs2_trans tr;
  62. memset(&tr, 0, sizeof(tr));
  63. tr.tr_revokes = atomic_read(&gl->gl_ail_count);
  64. if (!tr.tr_revokes)
  65. return;
  66. /* A shortened, inline version of gfs2_trans_begin() */
  67. tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
  68. tr.tr_ip = (unsigned long)__builtin_return_address(0);
  69. INIT_LIST_HEAD(&tr.tr_list_buf);
  70. gfs2_log_reserve(sdp, tr.tr_reserved);
  71. BUG_ON(current->journal_info);
  72. current->journal_info = &tr;
  73. __gfs2_ail_flush(gl);
  74. gfs2_trans_end(sdp);
  75. gfs2_log_flush(sdp, NULL);
  76. }
  77. void gfs2_ail_flush(struct gfs2_glock *gl)
  78. {
  79. struct gfs2_sbd *sdp = gl->gl_sbd;
  80. unsigned int revokes = atomic_read(&gl->gl_ail_count);
  81. int ret;
  82. if (!revokes)
  83. return;
  84. ret = gfs2_trans_begin(sdp, 0, revokes);
  85. if (ret)
  86. return;
  87. __gfs2_ail_flush(gl);
  88. gfs2_trans_end(sdp);
  89. gfs2_log_flush(sdp, NULL);
  90. }
  91. /**
  92. * rgrp_go_sync - sync out the metadata for this glock
  93. * @gl: the glock
  94. *
  95. * Called when demoting or unlocking an EX glock. We must flush
  96. * to disk all dirty buffers/pages relating to this glock, and must not
  97. * not return to caller to demote/unlock the glock until I/O is complete.
  98. */
  99. static void rgrp_go_sync(struct gfs2_glock *gl)
  100. {
  101. struct address_space *metamapping = gfs2_glock2aspace(gl);
  102. int error;
  103. if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
  104. return;
  105. BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
  106. gfs2_log_flush(gl->gl_sbd, gl);
  107. filemap_fdatawrite(metamapping);
  108. error = filemap_fdatawait(metamapping);
  109. mapping_set_error(metamapping, error);
  110. gfs2_ail_empty_gl(gl);
  111. }
  112. /**
  113. * rgrp_go_inval - invalidate the metadata for this glock
  114. * @gl: the glock
  115. * @flags:
  116. *
  117. * We never used LM_ST_DEFERRED with resource groups, so that we
  118. * should always see the metadata flag set here.
  119. *
  120. */
  121. static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
  122. {
  123. struct address_space *mapping = gfs2_glock2aspace(gl);
  124. BUG_ON(!(flags & DIO_METADATA));
  125. gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
  126. truncate_inode_pages(mapping, 0);
  127. if (gl->gl_object) {
  128. struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
  129. rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
  130. }
  131. }
  132. /**
  133. * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
  134. * @gl: the glock protecting the inode
  135. *
  136. */
  137. static void inode_go_sync(struct gfs2_glock *gl)
  138. {
  139. struct gfs2_inode *ip = gl->gl_object;
  140. struct address_space *metamapping = gfs2_glock2aspace(gl);
  141. int error;
  142. if (ip && !S_ISREG(ip->i_inode.i_mode))
  143. ip = NULL;
  144. if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
  145. unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
  146. if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
  147. return;
  148. BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
  149. gfs2_log_flush(gl->gl_sbd, gl);
  150. filemap_fdatawrite(metamapping);
  151. if (ip) {
  152. struct address_space *mapping = ip->i_inode.i_mapping;
  153. filemap_fdatawrite(mapping);
  154. error = filemap_fdatawait(mapping);
  155. mapping_set_error(mapping, error);
  156. }
  157. error = filemap_fdatawait(metamapping);
  158. mapping_set_error(metamapping, error);
  159. gfs2_ail_empty_gl(gl);
  160. /*
  161. * Writeback of the data mapping may cause the dirty flag to be set
  162. * so we have to clear it again here.
  163. */
  164. smp_mb__before_clear_bit();
  165. clear_bit(GLF_DIRTY, &gl->gl_flags);
  166. }
  167. /**
  168. * inode_go_inval - prepare a inode glock to be released
  169. * @gl: the glock
  170. * @flags:
  171. *
  172. * Normally we invlidate everything, but if we are moving into
  173. * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
  174. * can keep hold of the metadata, since it won't have changed.
  175. *
  176. */
  177. static void inode_go_inval(struct gfs2_glock *gl, int flags)
  178. {
  179. struct gfs2_inode *ip = gl->gl_object;
  180. gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
  181. if (flags & DIO_METADATA) {
  182. struct address_space *mapping = gfs2_glock2aspace(gl);
  183. truncate_inode_pages(mapping, 0);
  184. if (ip) {
  185. set_bit(GIF_INVALID, &ip->i_flags);
  186. forget_all_cached_acls(&ip->i_inode);
  187. }
  188. }
  189. if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
  190. gl->gl_sbd->sd_rindex_uptodate = 0;
  191. if (ip && S_ISREG(ip->i_inode.i_mode))
  192. truncate_inode_pages(ip->i_inode.i_mapping, 0);
  193. }
  194. /**
  195. * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
  196. * @gl: the glock
  197. *
  198. * Returns: 1 if it's ok
  199. */
  200. static int inode_go_demote_ok(const struct gfs2_glock *gl)
  201. {
  202. struct gfs2_sbd *sdp = gl->gl_sbd;
  203. struct gfs2_holder *gh;
  204. if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
  205. return 0;
  206. if (!list_empty(&gl->gl_holders)) {
  207. gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
  208. if (gh->gh_list.next != &gl->gl_holders)
  209. return 0;
  210. }
  211. return 1;
  212. }
  213. /**
  214. * gfs2_set_nlink - Set the inode's link count based on on-disk info
  215. * @inode: The inode in question
  216. * @nlink: The link count
  217. *
  218. * If the link count has hit zero, it must never be raised, whatever the
  219. * on-disk inode might say. When new struct inodes are created the link
  220. * count is set to 1, so that we can safely use this test even when reading
  221. * in on disk information for the first time.
  222. */
  223. static void gfs2_set_nlink(struct inode *inode, u32 nlink)
  224. {
  225. /*
  226. * We will need to review setting the nlink count here in the
  227. * light of the forthcoming ro bind mount work. This is a reminder
  228. * to do that.
  229. */
  230. if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
  231. if (nlink == 0)
  232. clear_nlink(inode);
  233. else
  234. inode->i_nlink = nlink;
  235. }
  236. }
  237. static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
  238. {
  239. const struct gfs2_dinode *str = buf;
  240. struct timespec atime;
  241. u16 height, depth;
  242. if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
  243. goto corrupt;
  244. ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
  245. ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
  246. ip->i_inode.i_rdev = 0;
  247. switch (ip->i_inode.i_mode & S_IFMT) {
  248. case S_IFBLK:
  249. case S_IFCHR:
  250. ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
  251. be32_to_cpu(str->di_minor));
  252. break;
  253. };
  254. ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
  255. ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
  256. gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
  257. i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
  258. gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
  259. atime.tv_sec = be64_to_cpu(str->di_atime);
  260. atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
  261. if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
  262. ip->i_inode.i_atime = atime;
  263. ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
  264. ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
  265. ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
  266. ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
  267. ip->i_goal = be64_to_cpu(str->di_goal_meta);
  268. ip->i_generation = be64_to_cpu(str->di_generation);
  269. ip->i_diskflags = be32_to_cpu(str->di_flags);
  270. gfs2_set_inode_flags(&ip->i_inode);
  271. height = be16_to_cpu(str->di_height);
  272. if (unlikely(height > GFS2_MAX_META_HEIGHT))
  273. goto corrupt;
  274. ip->i_height = (u8)height;
  275. depth = be16_to_cpu(str->di_depth);
  276. if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
  277. goto corrupt;
  278. ip->i_depth = (u8)depth;
  279. ip->i_entries = be32_to_cpu(str->di_entries);
  280. ip->i_eattr = be64_to_cpu(str->di_eattr);
  281. if (S_ISREG(ip->i_inode.i_mode))
  282. gfs2_set_aops(&ip->i_inode);
  283. return 0;
  284. corrupt:
  285. gfs2_consist_inode(ip);
  286. return -EIO;
  287. }
  288. /**
  289. * gfs2_inode_refresh - Refresh the incore copy of the dinode
  290. * @ip: The GFS2 inode
  291. *
  292. * Returns: errno
  293. */
  294. int gfs2_inode_refresh(struct gfs2_inode *ip)
  295. {
  296. struct buffer_head *dibh;
  297. int error;
  298. error = gfs2_meta_inode_buffer(ip, &dibh);
  299. if (error)
  300. return error;
  301. if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
  302. brelse(dibh);
  303. return -EIO;
  304. }
  305. error = gfs2_dinode_in(ip, dibh->b_data);
  306. brelse(dibh);
  307. clear_bit(GIF_INVALID, &ip->i_flags);
  308. return error;
  309. }
  310. /**
  311. * inode_go_lock - operation done after an inode lock is locked by a process
  312. * @gl: the glock
  313. * @flags:
  314. *
  315. * Returns: errno
  316. */
  317. static int inode_go_lock(struct gfs2_holder *gh)
  318. {
  319. struct gfs2_glock *gl = gh->gh_gl;
  320. struct gfs2_sbd *sdp = gl->gl_sbd;
  321. struct gfs2_inode *ip = gl->gl_object;
  322. int error = 0;
  323. if (!ip || (gh->gh_flags & GL_SKIP))
  324. return 0;
  325. if (test_bit(GIF_INVALID, &ip->i_flags)) {
  326. error = gfs2_inode_refresh(ip);
  327. if (error)
  328. return error;
  329. }
  330. if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
  331. (gl->gl_state == LM_ST_EXCLUSIVE) &&
  332. (gh->gh_state == LM_ST_EXCLUSIVE)) {
  333. spin_lock(&sdp->sd_trunc_lock);
  334. if (list_empty(&ip->i_trunc_list))
  335. list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
  336. spin_unlock(&sdp->sd_trunc_lock);
  337. wake_up(&sdp->sd_quota_wait);
  338. return 1;
  339. }
  340. return error;
  341. }
  342. /**
  343. * inode_go_dump - print information about an inode
  344. * @seq: The iterator
  345. * @ip: the inode
  346. *
  347. * Returns: 0 on success, -ENOBUFS when we run out of space
  348. */
  349. static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
  350. {
  351. const struct gfs2_inode *ip = gl->gl_object;
  352. if (ip == NULL)
  353. return 0;
  354. gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
  355. (unsigned long long)ip->i_no_formal_ino,
  356. (unsigned long long)ip->i_no_addr,
  357. IF2DT(ip->i_inode.i_mode), ip->i_flags,
  358. (unsigned int)ip->i_diskflags,
  359. (unsigned long long)i_size_read(&ip->i_inode));
  360. return 0;
  361. }
  362. /**
  363. * rgrp_go_lock - operation done after an rgrp lock is locked by
  364. * a first holder on this node.
  365. * @gl: the glock
  366. * @flags:
  367. *
  368. * Returns: errno
  369. */
  370. static int rgrp_go_lock(struct gfs2_holder *gh)
  371. {
  372. return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
  373. }
  374. /**
  375. * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
  376. * a last holder on this node.
  377. * @gl: the glock
  378. * @flags:
  379. *
  380. */
  381. static void rgrp_go_unlock(struct gfs2_holder *gh)
  382. {
  383. gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
  384. }
  385. /**
  386. * trans_go_sync - promote/demote the transaction glock
  387. * @gl: the glock
  388. * @state: the requested state
  389. * @flags:
  390. *
  391. */
  392. static void trans_go_sync(struct gfs2_glock *gl)
  393. {
  394. struct gfs2_sbd *sdp = gl->gl_sbd;
  395. if (gl->gl_state != LM_ST_UNLOCKED &&
  396. test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
  397. gfs2_meta_syncfs(sdp);
  398. gfs2_log_shutdown(sdp);
  399. }
  400. }
  401. /**
  402. * trans_go_xmote_bh - After promoting/demoting the transaction glock
  403. * @gl: the glock
  404. *
  405. */
  406. static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
  407. {
  408. struct gfs2_sbd *sdp = gl->gl_sbd;
  409. struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
  410. struct gfs2_glock *j_gl = ip->i_gl;
  411. struct gfs2_log_header_host head;
  412. int error;
  413. if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
  414. j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
  415. error = gfs2_find_jhead(sdp->sd_jdesc, &head);
  416. if (error)
  417. gfs2_consist(sdp);
  418. if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
  419. gfs2_consist(sdp);
  420. /* Initialize some head of the log stuff */
  421. if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
  422. sdp->sd_log_sequence = head.lh_sequence + 1;
  423. gfs2_log_pointers_init(sdp, head.lh_blkno);
  424. }
  425. }
  426. return 0;
  427. }
  428. /**
  429. * trans_go_demote_ok
  430. * @gl: the glock
  431. *
  432. * Always returns 0
  433. */
  434. static int trans_go_demote_ok(const struct gfs2_glock *gl)
  435. {
  436. return 0;
  437. }
  438. /**
  439. * iopen_go_callback - schedule the dcache entry for the inode to be deleted
  440. * @gl: the glock
  441. *
  442. * gl_spin lock is held while calling this
  443. */
  444. static void iopen_go_callback(struct gfs2_glock *gl)
  445. {
  446. struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
  447. struct gfs2_sbd *sdp = gl->gl_sbd;
  448. if (sdp->sd_vfs->s_flags & MS_RDONLY)
  449. return;
  450. if (gl->gl_demote_state == LM_ST_UNLOCKED &&
  451. gl->gl_state == LM_ST_SHARED && ip) {
  452. gfs2_glock_hold(gl);
  453. if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
  454. gfs2_glock_put_nolock(gl);
  455. }
  456. }
  457. const struct gfs2_glock_operations gfs2_meta_glops = {
  458. .go_type = LM_TYPE_META,
  459. };
  460. const struct gfs2_glock_operations gfs2_inode_glops = {
  461. .go_xmote_th = inode_go_sync,
  462. .go_inval = inode_go_inval,
  463. .go_demote_ok = inode_go_demote_ok,
  464. .go_lock = inode_go_lock,
  465. .go_dump = inode_go_dump,
  466. .go_type = LM_TYPE_INODE,
  467. .go_min_hold_time = HZ / 5,
  468. .go_flags = GLOF_ASPACE,
  469. };
  470. const struct gfs2_glock_operations gfs2_rgrp_glops = {
  471. .go_xmote_th = rgrp_go_sync,
  472. .go_inval = rgrp_go_inval,
  473. .go_lock = rgrp_go_lock,
  474. .go_unlock = rgrp_go_unlock,
  475. .go_dump = gfs2_rgrp_dump,
  476. .go_type = LM_TYPE_RGRP,
  477. .go_min_hold_time = HZ / 5,
  478. .go_flags = GLOF_ASPACE,
  479. };
  480. const struct gfs2_glock_operations gfs2_trans_glops = {
  481. .go_xmote_th = trans_go_sync,
  482. .go_xmote_bh = trans_go_xmote_bh,
  483. .go_demote_ok = trans_go_demote_ok,
  484. .go_type = LM_TYPE_NONDISK,
  485. };
  486. const struct gfs2_glock_operations gfs2_iopen_glops = {
  487. .go_type = LM_TYPE_IOPEN,
  488. .go_callback = iopen_go_callback,
  489. };
  490. const struct gfs2_glock_operations gfs2_flock_glops = {
  491. .go_type = LM_TYPE_FLOCK,
  492. };
  493. const struct gfs2_glock_operations gfs2_nondisk_glops = {
  494. .go_type = LM_TYPE_NONDISK,
  495. };
  496. const struct gfs2_glock_operations gfs2_quota_glops = {
  497. .go_type = LM_TYPE_QUOTA,
  498. };
  499. const struct gfs2_glock_operations gfs2_journal_glops = {
  500. .go_type = LM_TYPE_JOURNAL,
  501. };
  502. const struct gfs2_glock_operations *gfs2_glops_list[] = {
  503. [LM_TYPE_META] = &gfs2_meta_glops,
  504. [LM_TYPE_INODE] = &gfs2_inode_glops,
  505. [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
  506. [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
  507. [LM_TYPE_FLOCK] = &gfs2_flock_glops,
  508. [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
  509. [LM_TYPE_QUOTA] = &gfs2_quota_glops,
  510. [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
  511. };