migrate.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /*
  2. * Copyright IBM Corporation, 2007
  3. * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of version 2.1 of the GNU Lesser General Public License
  7. * as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. *
  13. */
  14. #include <linux/module.h>
  15. #include <linux/ext4_jbd2.h>
  16. #include <linux/ext4_fs_extents.h>
  17. /*
  18. * The contiguous blocks details which can be
  19. * represented by a single extent
  20. */
  21. struct list_blocks_struct {
  22. ext4_lblk_t first_block, last_block;
  23. ext4_fsblk_t first_pblock, last_pblock;
  24. };
  25. static int finish_range(handle_t *handle, struct inode *inode,
  26. struct list_blocks_struct *lb)
  27. {
  28. int retval = 0, needed;
  29. struct ext4_extent newext;
  30. struct ext4_ext_path *path;
  31. if (lb->first_pblock == 0)
  32. return 0;
  33. /* Add the extent to temp inode*/
  34. newext.ee_block = cpu_to_le32(lb->first_block);
  35. newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
  36. ext4_ext_store_pblock(&newext, lb->first_pblock);
  37. path = ext4_ext_find_extent(inode, lb->first_block, NULL);
  38. if (IS_ERR(path)) {
  39. retval = PTR_ERR(path);
  40. goto err_out;
  41. }
  42. /*
  43. * Calculate the credit needed to inserting this extent
  44. * Since we are doing this in loop we may accumalate extra
  45. * credit. But below we try to not accumalate too much
  46. * of them by restarting the journal.
  47. */
  48. needed = ext4_ext_calc_credits_for_insert(inode, path);
  49. /*
  50. * Make sure the credit we accumalated is not really high
  51. */
  52. if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
  53. retval = ext4_journal_restart(handle, needed);
  54. if (retval)
  55. goto err_out;
  56. }
  57. if (needed) {
  58. retval = ext4_journal_extend(handle, needed);
  59. if (retval != 0) {
  60. /*
  61. * IF not able to extend the journal restart the journal
  62. */
  63. retval = ext4_journal_restart(handle, needed);
  64. if (retval)
  65. goto err_out;
  66. }
  67. }
  68. retval = ext4_ext_insert_extent(handle, inode, path, &newext);
  69. err_out:
  70. lb->first_pblock = 0;
  71. return retval;
  72. }
  73. static int update_extent_range(handle_t *handle, struct inode *inode,
  74. ext4_fsblk_t pblock, ext4_lblk_t blk_num,
  75. struct list_blocks_struct *lb)
  76. {
  77. int retval;
  78. /*
  79. * See if we can add on to the existing range (if it exists)
  80. */
  81. if (lb->first_pblock &&
  82. (lb->last_pblock+1 == pblock) &&
  83. (lb->last_block+1 == blk_num)) {
  84. lb->last_pblock = pblock;
  85. lb->last_block = blk_num;
  86. return 0;
  87. }
  88. /*
  89. * Start a new range.
  90. */
  91. retval = finish_range(handle, inode, lb);
  92. lb->first_pblock = lb->last_pblock = pblock;
  93. lb->first_block = lb->last_block = blk_num;
  94. return retval;
  95. }
  96. static int update_ind_extent_range(handle_t *handle, struct inode *inode,
  97. ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
  98. struct list_blocks_struct *lb)
  99. {
  100. struct buffer_head *bh;
  101. __le32 *i_data;
  102. int i, retval = 0;
  103. ext4_lblk_t blk_count = *blk_nump;
  104. unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
  105. if (!pblock) {
  106. /* Only update the file block number */
  107. *blk_nump += max_entries;
  108. return 0;
  109. }
  110. bh = sb_bread(inode->i_sb, pblock);
  111. if (!bh)
  112. return -EIO;
  113. i_data = (__le32 *)bh->b_data;
  114. for (i = 0; i < max_entries; i++, blk_count++) {
  115. if (i_data[i]) {
  116. retval = update_extent_range(handle, inode,
  117. le32_to_cpu(i_data[i]),
  118. blk_count, lb);
  119. if (retval)
  120. break;
  121. }
  122. }
  123. /* Update the file block number */
  124. *blk_nump = blk_count;
  125. put_bh(bh);
  126. return retval;
  127. }
  128. static int update_dind_extent_range(handle_t *handle, struct inode *inode,
  129. ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
  130. struct list_blocks_struct *lb)
  131. {
  132. struct buffer_head *bh;
  133. __le32 *i_data;
  134. int i, retval = 0;
  135. ext4_lblk_t blk_count = *blk_nump;
  136. unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
  137. if (!pblock) {
  138. /* Only update the file block number */
  139. *blk_nump += max_entries * max_entries;
  140. return 0;
  141. }
  142. bh = sb_bread(inode->i_sb, pblock);
  143. if (!bh)
  144. return -EIO;
  145. i_data = (__le32 *)bh->b_data;
  146. for (i = 0; i < max_entries; i++) {
  147. if (i_data[i]) {
  148. retval = update_ind_extent_range(handle, inode,
  149. le32_to_cpu(i_data[i]),
  150. &blk_count, lb);
  151. if (retval)
  152. break;
  153. } else {
  154. /* Only update the file block number */
  155. blk_count += max_entries;
  156. }
  157. }
  158. /* Update the file block number */
  159. *blk_nump = blk_count;
  160. put_bh(bh);
  161. return retval;
  162. }
  163. static int update_tind_extent_range(handle_t *handle, struct inode *inode,
  164. ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
  165. struct list_blocks_struct *lb)
  166. {
  167. struct buffer_head *bh;
  168. __le32 *i_data;
  169. int i, retval = 0;
  170. ext4_lblk_t blk_count = *blk_nump;
  171. unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
  172. if (!pblock) {
  173. /* Only update the file block number */
  174. *blk_nump += max_entries * max_entries * max_entries;
  175. return 0;
  176. }
  177. bh = sb_bread(inode->i_sb, pblock);
  178. if (!bh)
  179. return -EIO;
  180. i_data = (__le32 *)bh->b_data;
  181. for (i = 0; i < max_entries; i++) {
  182. if (i_data[i]) {
  183. retval = update_dind_extent_range(handle, inode,
  184. le32_to_cpu(i_data[i]),
  185. &blk_count, lb);
  186. if (retval)
  187. break;
  188. } else
  189. /* Only update the file block number */
  190. blk_count += max_entries * max_entries;
  191. }
  192. /* Update the file block number */
  193. *blk_nump = blk_count;
  194. put_bh(bh);
  195. return retval;
  196. }
  197. static int free_dind_blocks(handle_t *handle,
  198. struct inode *inode, __le32 i_data)
  199. {
  200. int i;
  201. __le32 *tmp_idata;
  202. struct buffer_head *bh;
  203. unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
  204. bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
  205. if (!bh)
  206. return -EIO;
  207. tmp_idata = (__le32 *)bh->b_data;
  208. for (i = 0; i < max_entries; i++) {
  209. if (tmp_idata[i])
  210. ext4_free_blocks(handle, inode,
  211. le32_to_cpu(tmp_idata[i]), 1, 1);
  212. }
  213. put_bh(bh);
  214. ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
  215. return 0;
  216. }
  217. static int free_tind_blocks(handle_t *handle,
  218. struct inode *inode, __le32 i_data)
  219. {
  220. int i, retval = 0;
  221. __le32 *tmp_idata;
  222. struct buffer_head *bh;
  223. unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
  224. bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
  225. if (!bh)
  226. return -EIO;
  227. tmp_idata = (__le32 *)bh->b_data;
  228. for (i = 0; i < max_entries; i++) {
  229. if (tmp_idata[i]) {
  230. retval = free_dind_blocks(handle,
  231. inode, tmp_idata[i]);
  232. if (retval) {
  233. put_bh(bh);
  234. return retval;
  235. }
  236. }
  237. }
  238. put_bh(bh);
  239. ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
  240. return 0;
  241. }
  242. static int free_ind_block(handle_t *handle, struct inode *inode)
  243. {
  244. int retval;
  245. struct ext4_inode_info *ei = EXT4_I(inode);
  246. if (ei->i_data[EXT4_IND_BLOCK])
  247. ext4_free_blocks(handle, inode,
  248. le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
  249. if (ei->i_data[EXT4_DIND_BLOCK]) {
  250. retval = free_dind_blocks(handle, inode,
  251. ei->i_data[EXT4_DIND_BLOCK]);
  252. if (retval)
  253. return retval;
  254. }
  255. if (ei->i_data[EXT4_TIND_BLOCK]) {
  256. retval = free_tind_blocks(handle, inode,
  257. ei->i_data[EXT4_TIND_BLOCK]);
  258. if (retval)
  259. return retval;
  260. }
  261. return 0;
  262. }
  263. static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
  264. struct inode *tmp_inode, int retval)
  265. {
  266. struct ext4_inode_info *ei = EXT4_I(inode);
  267. struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
  268. retval = free_ind_block(handle, inode);
  269. if (retval)
  270. goto err_out;
  271. /*
  272. * One credit accounted for writing the
  273. * i_data field of the original inode
  274. */
  275. retval = ext4_journal_extend(handle, 1);
  276. if (retval != 0) {
  277. retval = ext4_journal_restart(handle, 1);
  278. if (retval)
  279. goto err_out;
  280. }
  281. /*
  282. * We have the extent map build with the tmp inode.
  283. * Now copy the i_data across
  284. */
  285. ei->i_flags |= EXT4_EXTENTS_FL;
  286. memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
  287. /*
  288. * Update i_blocks with the new blocks that got
  289. * allocated while adding extents for extent index
  290. * blocks.
  291. *
  292. * While converting to extents we need not
  293. * update the orignal inode i_blocks for extent blocks
  294. * via quota APIs. The quota update happened via tmp_inode already.
  295. */
  296. spin_lock(&inode->i_lock);
  297. inode->i_blocks += tmp_inode->i_blocks;
  298. spin_unlock(&inode->i_lock);
  299. ext4_mark_inode_dirty(handle, inode);
  300. err_out:
  301. return retval;
  302. }
  303. static int free_ext_idx(handle_t *handle, struct inode *inode,
  304. struct ext4_extent_idx *ix)
  305. {
  306. int i, retval = 0;
  307. ext4_fsblk_t block;
  308. struct buffer_head *bh;
  309. struct ext4_extent_header *eh;
  310. block = idx_pblock(ix);
  311. bh = sb_bread(inode->i_sb, block);
  312. if (!bh)
  313. return -EIO;
  314. eh = (struct ext4_extent_header *)bh->b_data;
  315. if (eh->eh_depth != 0) {
  316. ix = EXT_FIRST_INDEX(eh);
  317. for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
  318. retval = free_ext_idx(handle, inode, ix);
  319. if (retval)
  320. break;
  321. }
  322. }
  323. put_bh(bh);
  324. ext4_free_blocks(handle, inode, block, 1, 1);
  325. return retval;
  326. }
  327. /*
  328. * Free the extent meta data blocks only
  329. */
  330. static int free_ext_block(handle_t *handle, struct inode *inode)
  331. {
  332. int i, retval = 0;
  333. struct ext4_inode_info *ei = EXT4_I(inode);
  334. struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
  335. struct ext4_extent_idx *ix;
  336. if (eh->eh_depth == 0)
  337. /*
  338. * No extra blocks allocated for extent meta data
  339. */
  340. return 0;
  341. ix = EXT_FIRST_INDEX(eh);
  342. for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
  343. retval = free_ext_idx(handle, inode, ix);
  344. if (retval)
  345. return retval;
  346. }
  347. return retval;
  348. }
  349. int ext4_ext_migrate(struct inode *inode, struct file *filp,
  350. unsigned int cmd, unsigned long arg)
  351. {
  352. handle_t *handle;
  353. int retval = 0, i;
  354. __le32 *i_data;
  355. ext4_lblk_t blk_count = 0;
  356. struct ext4_inode_info *ei;
  357. struct inode *tmp_inode = NULL;
  358. struct list_blocks_struct lb;
  359. unsigned long max_entries;
  360. if (!test_opt(inode->i_sb, EXTENTS))
  361. /*
  362. * if mounted with noextents we don't allow the migrate
  363. */
  364. return -EINVAL;
  365. if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
  366. return -EINVAL;
  367. down_write(&EXT4_I(inode)->i_data_sem);
  368. handle = ext4_journal_start(inode,
  369. EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
  370. EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
  371. 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
  372. + 1);
  373. if (IS_ERR(handle)) {
  374. retval = PTR_ERR(handle);
  375. goto err_out;
  376. }
  377. tmp_inode = ext4_new_inode(handle,
  378. inode->i_sb->s_root->d_inode,
  379. S_IFREG);
  380. if (IS_ERR(tmp_inode)) {
  381. retval = -ENOMEM;
  382. ext4_journal_stop(handle);
  383. tmp_inode = NULL;
  384. goto err_out;
  385. }
  386. i_size_write(tmp_inode, i_size_read(inode));
  387. /*
  388. * We don't want the inode to be reclaimed
  389. * if we got interrupted in between. We have
  390. * this tmp inode carrying reference to the
  391. * data blocks of the original file. We set
  392. * the i_nlink to zero at the last stage after
  393. * switching the original file to extent format
  394. */
  395. tmp_inode->i_nlink = 1;
  396. ext4_ext_tree_init(handle, tmp_inode);
  397. ext4_orphan_add(handle, tmp_inode);
  398. ext4_journal_stop(handle);
  399. ei = EXT4_I(inode);
  400. i_data = ei->i_data;
  401. memset(&lb, 0, sizeof(lb));
  402. /* 32 bit block address 4 bytes */
  403. max_entries = inode->i_sb->s_blocksize >> 2;
  404. /*
  405. * start with one credit accounted for
  406. * superblock modification.
  407. *
  408. * For the tmp_inode we already have commited the
  409. * trascation that created the inode. Later as and
  410. * when we add extents we extent the journal
  411. */
  412. handle = ext4_journal_start(inode, 1);
  413. for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
  414. if (i_data[i]) {
  415. retval = update_extent_range(handle, tmp_inode,
  416. le32_to_cpu(i_data[i]),
  417. blk_count, &lb);
  418. if (retval)
  419. goto err_out;
  420. }
  421. }
  422. if (i_data[EXT4_IND_BLOCK]) {
  423. retval = update_ind_extent_range(handle, tmp_inode,
  424. le32_to_cpu(i_data[EXT4_IND_BLOCK]),
  425. &blk_count, &lb);
  426. if (retval)
  427. goto err_out;
  428. } else
  429. blk_count += max_entries;
  430. if (i_data[EXT4_DIND_BLOCK]) {
  431. retval = update_dind_extent_range(handle, tmp_inode,
  432. le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
  433. &blk_count, &lb);
  434. if (retval)
  435. goto err_out;
  436. } else
  437. blk_count += max_entries * max_entries;
  438. if (i_data[EXT4_TIND_BLOCK]) {
  439. retval = update_tind_extent_range(handle, tmp_inode,
  440. le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
  441. &blk_count, &lb);
  442. if (retval)
  443. goto err_out;
  444. }
  445. /*
  446. * Build the last extent
  447. */
  448. retval = finish_range(handle, tmp_inode, &lb);
  449. err_out:
  450. /*
  451. * We are either freeing extent information or indirect
  452. * blocks. During this we touch superblock, group descriptor
  453. * and block bitmap. Later we mark the tmp_inode dirty
  454. * via ext4_ext_tree_init. So allocate a credit of 4
  455. * We may update quota (user and group).
  456. *
  457. * FIXME!! we may be touching bitmaps in different block groups.
  458. */
  459. if (ext4_journal_extend(handle,
  460. 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
  461. ext4_journal_restart(handle,
  462. 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
  463. if (retval)
  464. /*
  465. * Failure case delete the extent information with the
  466. * tmp_inode
  467. */
  468. free_ext_block(handle, tmp_inode);
  469. else
  470. retval = ext4_ext_swap_inode_data(handle, inode,
  471. tmp_inode, retval);
  472. /*
  473. * Mark the tmp_inode as of size zero
  474. */
  475. i_size_write(tmp_inode, 0);
  476. /*
  477. * set the i_blocks count to zero
  478. * so that the ext4_delete_inode does the
  479. * right job
  480. *
  481. * We don't need to take the i_lock because
  482. * the inode is not visible to user space.
  483. */
  484. tmp_inode->i_blocks = 0;
  485. /* Reset the extent details */
  486. ext4_ext_tree_init(handle, tmp_inode);
  487. /*
  488. * Set the i_nlink to zero so that
  489. * generic_drop_inode really deletes the
  490. * inode
  491. */
  492. tmp_inode->i_nlink = 0;
  493. ext4_journal_stop(handle);
  494. up_write(&EXT4_I(inode)->i_data_sem);
  495. if (tmp_inode)
  496. iput(tmp_inode);
  497. return retval;
  498. }