mmp.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. #include <linux/fs.h>
  2. #include <linux/random.h>
  3. #include <linux/buffer_head.h>
  4. #include <linux/utsname.h>
  5. #include <linux/kthread.h>
  6. #include "ext4.h"
  7. /* Checksumming functions */
  8. static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  9. {
  10. struct ext4_sb_info *sbi = EXT4_SB(sb);
  11. int offset = offsetof(struct mmp_struct, mmp_checksum);
  12. __u32 csum;
  13. csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  14. return cpu_to_le32(csum);
  15. }
  16. int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  17. {
  18. if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
  19. EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
  20. return 1;
  21. return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  22. }
  23. void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  24. {
  25. if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
  26. EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
  27. return;
  28. mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  29. }
  30. /*
  31. * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  32. * faster.
  33. */
  34. static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  35. {
  36. struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  37. /*
  38. * We protect against freezing so that we don't create dirty buffers
  39. * on frozen filesystem.
  40. */
  41. sb_start_write(sb);
  42. ext4_mmp_csum_set(sb, mmp);
  43. mark_buffer_dirty(bh);
  44. lock_buffer(bh);
  45. bh->b_end_io = end_buffer_write_sync;
  46. get_bh(bh);
  47. submit_bh(WRITE_SYNC, bh);
  48. wait_on_buffer(bh);
  49. sb_end_write(sb);
  50. if (unlikely(!buffer_uptodate(bh)))
  51. return 1;
  52. return 0;
  53. }
  54. /*
  55. * Read the MMP block. It _must_ be read from disk and hence we clear the
  56. * uptodate flag on the buffer.
  57. */
  58. static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  59. ext4_fsblk_t mmp_block)
  60. {
  61. struct mmp_struct *mmp;
  62. if (*bh)
  63. clear_buffer_uptodate(*bh);
  64. /* This would be sb_bread(sb, mmp_block), except we need to be sure
  65. * that the MD RAID device cache has been bypassed, and that the read
  66. * is not blocked in the elevator. */
  67. if (!*bh)
  68. *bh = sb_getblk(sb, mmp_block);
  69. if (!*bh)
  70. return -ENOMEM;
  71. if (*bh) {
  72. get_bh(*bh);
  73. lock_buffer(*bh);
  74. (*bh)->b_end_io = end_buffer_read_sync;
  75. submit_bh(READ_SYNC, *bh);
  76. wait_on_buffer(*bh);
  77. if (!buffer_uptodate(*bh)) {
  78. brelse(*bh);
  79. *bh = NULL;
  80. }
  81. }
  82. if (unlikely(!*bh)) {
  83. ext4_warning(sb, "Error while reading MMP block %llu",
  84. mmp_block);
  85. return -EIO;
  86. }
  87. mmp = (struct mmp_struct *)((*bh)->b_data);
  88. if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
  89. !ext4_mmp_csum_verify(sb, mmp))
  90. return -EINVAL;
  91. return 0;
  92. }
  93. /*
  94. * Dump as much information as possible to help the admin.
  95. */
  96. void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
  97. const char *function, unsigned int line, const char *msg)
  98. {
  99. __ext4_warning(sb, function, line, msg);
  100. __ext4_warning(sb, function, line,
  101. "MMP failure info: last update time: %llu, last update "
  102. "node: %s, last update device: %s\n",
  103. (long long unsigned int) le64_to_cpu(mmp->mmp_time),
  104. mmp->mmp_nodename, mmp->mmp_bdevname);
  105. }
  106. /*
  107. * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
  108. */
  109. static int kmmpd(void *data)
  110. {
  111. struct super_block *sb = ((struct mmpd_data *) data)->sb;
  112. struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
  113. struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  114. struct mmp_struct *mmp;
  115. ext4_fsblk_t mmp_block;
  116. u32 seq = 0;
  117. unsigned long failed_writes = 0;
  118. int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
  119. unsigned mmp_check_interval;
  120. unsigned long last_update_time;
  121. unsigned long diff;
  122. int retval;
  123. mmp_block = le64_to_cpu(es->s_mmp_block);
  124. mmp = (struct mmp_struct *)(bh->b_data);
  125. mmp->mmp_time = cpu_to_le64(get_seconds());
  126. /*
  127. * Start with the higher mmp_check_interval and reduce it if
  128. * the MMP block is being updated on time.
  129. */
  130. mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
  131. EXT4_MMP_MIN_CHECK_INTERVAL);
  132. mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
  133. bdevname(bh->b_bdev, mmp->mmp_bdevname);
  134. memcpy(mmp->mmp_nodename, init_utsname()->nodename,
  135. sizeof(mmp->mmp_nodename));
  136. while (!kthread_should_stop()) {
  137. if (++seq > EXT4_MMP_SEQ_MAX)
  138. seq = 1;
  139. mmp->mmp_seq = cpu_to_le32(seq);
  140. mmp->mmp_time = cpu_to_le64(get_seconds());
  141. last_update_time = jiffies;
  142. retval = write_mmp_block(sb, bh);
  143. /*
  144. * Don't spew too many error messages. Print one every
  145. * (s_mmp_update_interval * 60) seconds.
  146. */
  147. if (retval) {
  148. if ((failed_writes % 60) == 0)
  149. ext4_error(sb, "Error writing to MMP block");
  150. failed_writes++;
  151. }
  152. if (!(le32_to_cpu(es->s_feature_incompat) &
  153. EXT4_FEATURE_INCOMPAT_MMP)) {
  154. ext4_warning(sb, "kmmpd being stopped since MMP feature"
  155. " has been disabled.");
  156. EXT4_SB(sb)->s_mmp_tsk = NULL;
  157. goto failed;
  158. }
  159. if (sb->s_flags & MS_RDONLY) {
  160. ext4_warning(sb, "kmmpd being stopped since filesystem "
  161. "has been remounted as readonly.");
  162. EXT4_SB(sb)->s_mmp_tsk = NULL;
  163. goto failed;
  164. }
  165. diff = jiffies - last_update_time;
  166. if (diff < mmp_update_interval * HZ)
  167. schedule_timeout_interruptible(mmp_update_interval *
  168. HZ - diff);
  169. /*
  170. * We need to make sure that more than mmp_check_interval
  171. * seconds have not passed since writing. If that has happened
  172. * we need to check if the MMP block is as we left it.
  173. */
  174. diff = jiffies - last_update_time;
  175. if (diff > mmp_check_interval * HZ) {
  176. struct buffer_head *bh_check = NULL;
  177. struct mmp_struct *mmp_check;
  178. retval = read_mmp_block(sb, &bh_check, mmp_block);
  179. if (retval) {
  180. ext4_error(sb, "error reading MMP data: %d",
  181. retval);
  182. EXT4_SB(sb)->s_mmp_tsk = NULL;
  183. goto failed;
  184. }
  185. mmp_check = (struct mmp_struct *)(bh_check->b_data);
  186. if (mmp->mmp_seq != mmp_check->mmp_seq ||
  187. memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
  188. sizeof(mmp->mmp_nodename))) {
  189. dump_mmp_msg(sb, mmp_check,
  190. "Error while updating MMP info. "
  191. "The filesystem seems to have been"
  192. " multiply mounted.");
  193. ext4_error(sb, "abort");
  194. goto failed;
  195. }
  196. put_bh(bh_check);
  197. }
  198. /*
  199. * Adjust the mmp_check_interval depending on how much time
  200. * it took for the MMP block to be written.
  201. */
  202. mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
  203. EXT4_MMP_MAX_CHECK_INTERVAL),
  204. EXT4_MMP_MIN_CHECK_INTERVAL);
  205. mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
  206. }
  207. /*
  208. * Unmount seems to be clean.
  209. */
  210. mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
  211. mmp->mmp_time = cpu_to_le64(get_seconds());
  212. retval = write_mmp_block(sb, bh);
  213. failed:
  214. kfree(data);
  215. brelse(bh);
  216. return retval;
  217. }
  218. /*
  219. * Get a random new sequence number but make sure it is not greater than
  220. * EXT4_MMP_SEQ_MAX.
  221. */
  222. static unsigned int mmp_new_seq(void)
  223. {
  224. u32 new_seq;
  225. do {
  226. get_random_bytes(&new_seq, sizeof(u32));
  227. } while (new_seq > EXT4_MMP_SEQ_MAX);
  228. return new_seq;
  229. }
  230. /*
  231. * Protect the filesystem from being mounted more than once.
  232. */
  233. int ext4_multi_mount_protect(struct super_block *sb,
  234. ext4_fsblk_t mmp_block)
  235. {
  236. struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  237. struct buffer_head *bh = NULL;
  238. struct mmp_struct *mmp = NULL;
  239. struct mmpd_data *mmpd_data;
  240. u32 seq;
  241. unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
  242. unsigned int wait_time = 0;
  243. int retval;
  244. if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
  245. mmp_block >= ext4_blocks_count(es)) {
  246. ext4_warning(sb, "Invalid MMP block in superblock");
  247. goto failed;
  248. }
  249. retval = read_mmp_block(sb, &bh, mmp_block);
  250. if (retval)
  251. goto failed;
  252. mmp = (struct mmp_struct *)(bh->b_data);
  253. if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
  254. mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
  255. /*
  256. * If check_interval in MMP block is larger, use that instead of
  257. * update_interval from the superblock.
  258. */
  259. if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
  260. mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
  261. seq = le32_to_cpu(mmp->mmp_seq);
  262. if (seq == EXT4_MMP_SEQ_CLEAN)
  263. goto skip;
  264. if (seq == EXT4_MMP_SEQ_FSCK) {
  265. dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
  266. goto failed;
  267. }
  268. wait_time = min(mmp_check_interval * 2 + 1,
  269. mmp_check_interval + 60);
  270. /* Print MMP interval if more than 20 secs. */
  271. if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
  272. ext4_warning(sb, "MMP interval %u higher than expected, please"
  273. " wait.\n", wait_time * 2);
  274. if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
  275. ext4_warning(sb, "MMP startup interrupted, failing mount\n");
  276. goto failed;
  277. }
  278. retval = read_mmp_block(sb, &bh, mmp_block);
  279. if (retval)
  280. goto failed;
  281. mmp = (struct mmp_struct *)(bh->b_data);
  282. if (seq != le32_to_cpu(mmp->mmp_seq)) {
  283. dump_mmp_msg(sb, mmp,
  284. "Device is already active on another node.");
  285. goto failed;
  286. }
  287. skip:
  288. /*
  289. * write a new random sequence number.
  290. */
  291. seq = mmp_new_seq();
  292. mmp->mmp_seq = cpu_to_le32(seq);
  293. retval = write_mmp_block(sb, bh);
  294. if (retval)
  295. goto failed;
  296. /*
  297. * wait for MMP interval and check mmp_seq.
  298. */
  299. if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
  300. ext4_warning(sb, "MMP startup interrupted, failing mount\n");
  301. goto failed;
  302. }
  303. retval = read_mmp_block(sb, &bh, mmp_block);
  304. if (retval)
  305. goto failed;
  306. mmp = (struct mmp_struct *)(bh->b_data);
  307. if (seq != le32_to_cpu(mmp->mmp_seq)) {
  308. dump_mmp_msg(sb, mmp,
  309. "Device is already active on another node.");
  310. goto failed;
  311. }
  312. mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
  313. if (!mmpd_data) {
  314. ext4_warning(sb, "not enough memory for mmpd_data");
  315. goto failed;
  316. }
  317. mmpd_data->sb = sb;
  318. mmpd_data->bh = bh;
  319. /*
  320. * Start a kernel thread to update the MMP block periodically.
  321. */
  322. EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
  323. bdevname(bh->b_bdev,
  324. mmp->mmp_bdevname));
  325. if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
  326. EXT4_SB(sb)->s_mmp_tsk = NULL;
  327. kfree(mmpd_data);
  328. ext4_warning(sb, "Unable to create kmmpd thread for %s.",
  329. sb->s_id);
  330. goto failed;
  331. }
  332. return 0;
  333. failed:
  334. brelse(bh);
  335. return 1;
  336. }