file.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. #include <linux/module.h>
  2. #include <linux/buffer_head.h>
  3. #include <linux/fs.h>
  4. #include <linux/pagemap.h>
  5. #include <linux/highmem.h>
  6. #include <linux/time.h>
  7. #include <linux/init.h>
  8. #include <linux/string.h>
  9. #include <linux/smp_lock.h>
  10. #include <linux/backing-dev.h>
  11. #include <linux/mpage.h>
  12. #include <linux/swap.h>
  13. #include <linux/writeback.h>
  14. #include <linux/statfs.h>
  15. #include <linux/compat.h>
  16. #include "ctree.h"
  17. #include "disk-io.h"
  18. #include "transaction.h"
  19. #include "btrfs_inode.h"
  20. #include "ioctl.h"
  21. #include "print-tree.h"
  22. static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
  23. struct page **prepared_pages,
  24. const char __user * buf)
  25. {
  26. long page_fault = 0;
  27. int i;
  28. int offset = pos & (PAGE_CACHE_SIZE - 1);
  29. for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
  30. size_t count = min_t(size_t,
  31. PAGE_CACHE_SIZE - offset, write_bytes);
  32. struct page *page = prepared_pages[i];
  33. fault_in_pages_readable(buf, count);
  34. /* Copy data from userspace to the current page */
  35. kmap(page);
  36. page_fault = __copy_from_user(page_address(page) + offset,
  37. buf, count);
  38. /* Flush processor's dcache for this page */
  39. flush_dcache_page(page);
  40. kunmap(page);
  41. buf += count;
  42. write_bytes -= count;
  43. if (page_fault)
  44. break;
  45. }
  46. return page_fault ? -EFAULT : 0;
  47. }
  48. static void btrfs_drop_pages(struct page **pages, size_t num_pages)
  49. {
  50. size_t i;
  51. for (i = 0; i < num_pages; i++) {
  52. if (!pages[i])
  53. break;
  54. unlock_page(pages[i]);
  55. mark_page_accessed(pages[i]);
  56. page_cache_release(pages[i]);
  57. }
  58. }
  59. static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
  60. struct btrfs_root *root,
  61. struct file *file,
  62. struct page **pages,
  63. size_t num_pages,
  64. loff_t pos,
  65. size_t write_bytes)
  66. {
  67. int i;
  68. int offset;
  69. int err = 0;
  70. int ret;
  71. int this_write;
  72. struct inode *inode = file->f_path.dentry->d_inode;
  73. struct buffer_head *bh;
  74. struct btrfs_file_extent_item *ei;
  75. for (i = 0; i < num_pages; i++) {
  76. offset = pos & (PAGE_CACHE_SIZE -1);
  77. this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
  78. /* FIXME, one block at a time */
  79. mutex_lock(&root->fs_info->fs_mutex);
  80. trans = btrfs_start_transaction(root, 1);
  81. btrfs_set_trans_block_group(trans, inode);
  82. bh = page_buffers(pages[i]);
  83. if (buffer_mapped(bh) && bh->b_blocknr == 0) {
  84. struct btrfs_key key;
  85. struct btrfs_path *path;
  86. char *ptr;
  87. u32 datasize;
  88. /* create an inline extent, and copy the data in */
  89. path = btrfs_alloc_path();
  90. BUG_ON(!path);
  91. key.objectid = inode->i_ino;
  92. key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
  93. key.flags = 0;
  94. btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
  95. BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
  96. datasize = offset +
  97. btrfs_file_extent_calc_inline_size(write_bytes);
  98. ret = btrfs_insert_empty_item(trans, root, path, &key,
  99. datasize);
  100. BUG_ON(ret);
  101. ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
  102. path->slots[0], struct btrfs_file_extent_item);
  103. btrfs_set_file_extent_generation(ei, trans->transid);
  104. btrfs_set_file_extent_type(ei,
  105. BTRFS_FILE_EXTENT_INLINE);
  106. ptr = btrfs_file_extent_inline_start(ei);
  107. btrfs_memcpy(root, path->nodes[0]->b_data,
  108. ptr, bh->b_data, offset + write_bytes);
  109. mark_buffer_dirty(path->nodes[0]);
  110. btrfs_free_path(path);
  111. } else if (buffer_mapped(bh)) {
  112. /* csum the file data */
  113. btrfs_csum_file_block(trans, root, inode->i_ino,
  114. pages[i]->index << PAGE_CACHE_SHIFT,
  115. kmap(pages[i]), PAGE_CACHE_SIZE);
  116. kunmap(pages[i]);
  117. }
  118. SetPageChecked(pages[i]);
  119. ret = btrfs_end_transaction(trans, root);
  120. BUG_ON(ret);
  121. mutex_unlock(&root->fs_info->fs_mutex);
  122. ret = btrfs_commit_write(file, pages[i], offset,
  123. offset + this_write);
  124. pos += this_write;
  125. if (ret) {
  126. err = ret;
  127. goto failed;
  128. }
  129. WARN_ON(this_write > write_bytes);
  130. write_bytes -= this_write;
  131. }
  132. failed:
  133. return err;
  134. }
  135. /*
  136. * this is very complex, but the basic idea is to drop all extents
  137. * in the range start - end. hint_block is filled in with a block number
  138. * that would be a good hint to the block allocator for this file.
  139. *
  140. * If an extent intersects the range but is not entirely inside the range
  141. * it is either truncated or split. Anything entirely inside the range
  142. * is deleted from the tree.
  143. */
  144. int btrfs_drop_extents(struct btrfs_trans_handle *trans,
  145. struct btrfs_root *root, struct inode *inode,
  146. u64 start, u64 end, u64 *hint_block)
  147. {
  148. int ret;
  149. struct btrfs_key key;
  150. struct btrfs_leaf *leaf;
  151. int slot;
  152. struct btrfs_file_extent_item *extent;
  153. u64 extent_end = 0;
  154. int keep;
  155. struct btrfs_file_extent_item old;
  156. struct btrfs_path *path;
  157. u64 search_start = start;
  158. int bookend;
  159. int found_type;
  160. int found_extent;
  161. int found_inline;
  162. path = btrfs_alloc_path();
  163. if (!path)
  164. return -ENOMEM;
  165. while(1) {
  166. btrfs_release_path(root, path);
  167. ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
  168. search_start, -1);
  169. if (ret < 0)
  170. goto out;
  171. if (ret > 0) {
  172. if (path->slots[0] == 0) {
  173. ret = 0;
  174. goto out;
  175. }
  176. path->slots[0]--;
  177. }
  178. keep = 0;
  179. bookend = 0;
  180. found_extent = 0;
  181. found_inline = 0;
  182. extent = NULL;
  183. leaf = btrfs_buffer_leaf(path->nodes[0]);
  184. slot = path->slots[0];
  185. btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
  186. if (key.offset >= end || key.objectid != inode->i_ino) {
  187. ret = 0;
  188. goto out;
  189. }
  190. if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
  191. ret = 0;
  192. goto out;
  193. }
  194. extent = btrfs_item_ptr(leaf, slot,
  195. struct btrfs_file_extent_item);
  196. found_type = btrfs_file_extent_type(extent);
  197. if (found_type == BTRFS_FILE_EXTENT_REG) {
  198. extent_end = key.offset +
  199. (btrfs_file_extent_num_blocks(extent) <<
  200. inode->i_blkbits);
  201. found_extent = 1;
  202. } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
  203. found_inline = 1;
  204. extent_end = key.offset +
  205. btrfs_file_extent_inline_len(leaf->items + slot);
  206. }
  207. /* we found nothing we can drop */
  208. if (!found_extent && !found_inline) {
  209. ret = 0;
  210. goto out;
  211. }
  212. /* we found nothing inside the range */
  213. if (search_start >= extent_end) {
  214. ret = 0;
  215. goto out;
  216. }
  217. /* FIXME, there's only one inline extent allowed right now */
  218. if (found_inline) {
  219. u64 mask = root->blocksize - 1;
  220. search_start = (extent_end + mask) & ~mask;
  221. } else
  222. search_start = extent_end;
  223. if (end < extent_end && end >= key.offset) {
  224. if (found_extent) {
  225. u64 disk_blocknr =
  226. btrfs_file_extent_disk_blocknr(extent);
  227. u64 disk_num_blocks =
  228. btrfs_file_extent_disk_num_blocks(extent);
  229. memcpy(&old, extent, sizeof(old));
  230. if (disk_blocknr != 0) {
  231. ret = btrfs_inc_extent_ref(trans, root,
  232. disk_blocknr, disk_num_blocks);
  233. BUG_ON(ret);
  234. }
  235. }
  236. WARN_ON(found_inline);
  237. bookend = 1;
  238. }
  239. /* truncate existing extent */
  240. if (start > key.offset) {
  241. u64 new_num;
  242. u64 old_num;
  243. keep = 1;
  244. WARN_ON(start & (root->blocksize - 1));
  245. if (found_extent) {
  246. new_num = (start - key.offset) >>
  247. inode->i_blkbits;
  248. old_num = btrfs_file_extent_num_blocks(extent);
  249. *hint_block =
  250. btrfs_file_extent_disk_blocknr(extent);
  251. if (btrfs_file_extent_disk_blocknr(extent)) {
  252. inode->i_blocks -=
  253. (old_num - new_num) << 3;
  254. }
  255. btrfs_set_file_extent_num_blocks(extent,
  256. new_num);
  257. mark_buffer_dirty(path->nodes[0]);
  258. } else {
  259. WARN_ON(1);
  260. }
  261. }
  262. /* delete the entire extent */
  263. if (!keep) {
  264. u64 disk_blocknr = 0;
  265. u64 disk_num_blocks = 0;
  266. u64 extent_num_blocks = 0;
  267. if (found_extent) {
  268. disk_blocknr =
  269. btrfs_file_extent_disk_blocknr(extent);
  270. disk_num_blocks =
  271. btrfs_file_extent_disk_num_blocks(extent);
  272. extent_num_blocks =
  273. btrfs_file_extent_num_blocks(extent);
  274. *hint_block =
  275. btrfs_file_extent_disk_blocknr(extent);
  276. }
  277. ret = btrfs_del_item(trans, root, path);
  278. BUG_ON(ret);
  279. btrfs_release_path(root, path);
  280. extent = NULL;
  281. if (found_extent && disk_blocknr != 0) {
  282. inode->i_blocks -= extent_num_blocks << 3;
  283. ret = btrfs_free_extent(trans, root,
  284. disk_blocknr,
  285. disk_num_blocks, 0);
  286. }
  287. BUG_ON(ret);
  288. if (!bookend && search_start >= end) {
  289. ret = 0;
  290. goto out;
  291. }
  292. if (!bookend)
  293. continue;
  294. }
  295. /* create bookend, splitting the extent in two */
  296. if (bookend && found_extent) {
  297. struct btrfs_key ins;
  298. ins.objectid = inode->i_ino;
  299. ins.offset = end;
  300. ins.flags = 0;
  301. btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
  302. btrfs_release_path(root, path);
  303. ret = btrfs_insert_empty_item(trans, root, path, &ins,
  304. sizeof(*extent));
  305. BUG_ON(ret);
  306. extent = btrfs_item_ptr(
  307. btrfs_buffer_leaf(path->nodes[0]),
  308. path->slots[0],
  309. struct btrfs_file_extent_item);
  310. btrfs_set_file_extent_disk_blocknr(extent,
  311. btrfs_file_extent_disk_blocknr(&old));
  312. btrfs_set_file_extent_disk_num_blocks(extent,
  313. btrfs_file_extent_disk_num_blocks(&old));
  314. btrfs_set_file_extent_offset(extent,
  315. btrfs_file_extent_offset(&old) +
  316. ((end - key.offset) >> inode->i_blkbits));
  317. WARN_ON(btrfs_file_extent_num_blocks(&old) <
  318. (extent_end - end) >> inode->i_blkbits);
  319. btrfs_set_file_extent_num_blocks(extent,
  320. (extent_end - end) >> inode->i_blkbits);
  321. btrfs_set_file_extent_type(extent,
  322. BTRFS_FILE_EXTENT_REG);
  323. btrfs_set_file_extent_generation(extent,
  324. btrfs_file_extent_generation(&old));
  325. btrfs_mark_buffer_dirty(path->nodes[0]);
  326. if (btrfs_file_extent_disk_blocknr(&old) != 0) {
  327. inode->i_blocks +=
  328. btrfs_file_extent_num_blocks(extent) << 3;
  329. }
  330. ret = 0;
  331. goto out;
  332. }
  333. }
  334. out:
  335. btrfs_free_path(path);
  336. return ret;
  337. }
  338. /*
  339. * this gets pages into the page cache and locks them down
  340. */
  341. static int prepare_pages(struct btrfs_root *root,
  342. struct file *file,
  343. struct page **pages,
  344. size_t num_pages,
  345. loff_t pos,
  346. unsigned long first_index,
  347. unsigned long last_index,
  348. size_t write_bytes,
  349. u64 alloc_extent_start)
  350. {
  351. int i;
  352. unsigned long index = pos >> PAGE_CACHE_SHIFT;
  353. struct inode *inode = file->f_path.dentry->d_inode;
  354. int offset;
  355. int err = 0;
  356. int this_write;
  357. struct buffer_head *bh;
  358. struct buffer_head *head;
  359. loff_t isize = i_size_read(inode);
  360. memset(pages, 0, num_pages * sizeof(struct page *));
  361. for (i = 0; i < num_pages; i++) {
  362. pages[i] = grab_cache_page(inode->i_mapping, index + i);
  363. if (!pages[i]) {
  364. err = -ENOMEM;
  365. goto failed_release;
  366. }
  367. cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
  368. wait_on_page_writeback(pages[i]);
  369. offset = pos & (PAGE_CACHE_SIZE -1);
  370. this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
  371. if (!page_has_buffers(pages[i])) {
  372. create_empty_buffers(pages[i],
  373. root->fs_info->sb->s_blocksize,
  374. (1 << BH_Uptodate));
  375. }
  376. head = page_buffers(pages[i]);
  377. bh = head;
  378. do {
  379. err = btrfs_map_bh_to_logical(root, bh,
  380. alloc_extent_start);
  381. BUG_ON(err);
  382. if (err)
  383. goto failed_truncate;
  384. bh = bh->b_this_page;
  385. if (alloc_extent_start)
  386. alloc_extent_start++;
  387. } while (bh != head);
  388. pos += this_write;
  389. WARN_ON(this_write > write_bytes);
  390. write_bytes -= this_write;
  391. }
  392. return 0;
  393. failed_release:
  394. btrfs_drop_pages(pages, num_pages);
  395. return err;
  396. failed_truncate:
  397. btrfs_drop_pages(pages, num_pages);
  398. if (pos > isize)
  399. vmtruncate(inode, isize);
  400. return err;
  401. }
  402. static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
  403. size_t count, loff_t *ppos)
  404. {
  405. loff_t pos;
  406. size_t num_written = 0;
  407. int err = 0;
  408. int ret = 0;
  409. struct inode *inode = file->f_path.dentry->d_inode;
  410. struct btrfs_root *root = BTRFS_I(inode)->root;
  411. struct page *pages[8];
  412. struct page *pinned[2];
  413. unsigned long first_index;
  414. unsigned long last_index;
  415. u64 start_pos;
  416. u64 num_blocks;
  417. u64 alloc_extent_start;
  418. u64 hint_block;
  419. struct btrfs_trans_handle *trans;
  420. struct btrfs_key ins;
  421. pinned[0] = NULL;
  422. pinned[1] = NULL;
  423. if (file->f_flags & O_DIRECT)
  424. return -EINVAL;
  425. pos = *ppos;
  426. vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
  427. current->backing_dev_info = inode->i_mapping->backing_dev_info;
  428. err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
  429. if (err)
  430. goto out;
  431. if (count == 0)
  432. goto out;
  433. err = remove_suid(file->f_path.dentry);
  434. if (err)
  435. goto out;
  436. file_update_time(file);
  437. start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
  438. num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
  439. inode->i_blkbits;
  440. mutex_lock(&inode->i_mutex);
  441. first_index = pos >> PAGE_CACHE_SHIFT;
  442. last_index = (pos + count) >> PAGE_CACHE_SHIFT;
  443. /*
  444. * there are lots of better ways to do this, but this code
  445. * makes sure the first and last page in the file range are
  446. * up to date and ready for cow
  447. */
  448. if ((pos & (PAGE_CACHE_SIZE - 1))) {
  449. pinned[0] = grab_cache_page(inode->i_mapping, first_index);
  450. if (!PageUptodate(pinned[0])) {
  451. ret = mpage_readpage(pinned[0], btrfs_get_block);
  452. BUG_ON(ret);
  453. wait_on_page_locked(pinned[0]);
  454. } else {
  455. unlock_page(pinned[0]);
  456. }
  457. }
  458. if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
  459. pinned[1] = grab_cache_page(inode->i_mapping, last_index);
  460. if (!PageUptodate(pinned[1])) {
  461. ret = mpage_readpage(pinned[1], btrfs_get_block);
  462. BUG_ON(ret);
  463. wait_on_page_locked(pinned[1]);
  464. } else {
  465. unlock_page(pinned[1]);
  466. }
  467. }
  468. mutex_lock(&root->fs_info->fs_mutex);
  469. trans = btrfs_start_transaction(root, 1);
  470. if (!trans) {
  471. err = -ENOMEM;
  472. mutex_unlock(&root->fs_info->fs_mutex);
  473. goto out_unlock;
  474. }
  475. btrfs_set_trans_block_group(trans, inode);
  476. /* FIXME blocksize != 4096 */
  477. inode->i_blocks += num_blocks << 3;
  478. hint_block = 0;
  479. /* FIXME...EIEIO, ENOSPC and more */
  480. /* step one, delete the existing extents in this range */
  481. if (start_pos < inode->i_size) {
  482. /* FIXME blocksize != pagesize */
  483. ret = btrfs_drop_extents(trans, root, inode,
  484. start_pos,
  485. (pos + count + root->blocksize -1) &
  486. ~((u64)root->blocksize - 1),
  487. &hint_block);
  488. BUG_ON(ret);
  489. }
  490. /* insert any holes we need to create */
  491. if (inode->i_size < start_pos) {
  492. u64 last_pos_in_file;
  493. u64 hole_size;
  494. u64 mask = root->blocksize - 1;
  495. last_pos_in_file = (inode->i_size + mask) & ~mask;
  496. hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
  497. hole_size >>= inode->i_blkbits;
  498. if (last_pos_in_file < start_pos) {
  499. ret = btrfs_insert_file_extent(trans, root,
  500. inode->i_ino,
  501. last_pos_in_file,
  502. 0, 0, hole_size);
  503. }
  504. BUG_ON(ret);
  505. }
  506. /*
  507. * either allocate an extent for the new bytes or setup the key
  508. * to show we are doing inline data in the extent
  509. */
  510. if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
  511. pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
  512. ret = btrfs_alloc_extent(trans, root, inode->i_ino,
  513. num_blocks, hint_block, (u64)-1,
  514. &ins, 1);
  515. BUG_ON(ret);
  516. ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
  517. start_pos, ins.objectid, ins.offset,
  518. ins.offset);
  519. BUG_ON(ret);
  520. } else {
  521. ins.offset = 0;
  522. ins.objectid = 0;
  523. }
  524. BUG_ON(ret);
  525. alloc_extent_start = ins.objectid;
  526. ret = btrfs_end_transaction(trans, root);
  527. mutex_unlock(&root->fs_info->fs_mutex);
  528. while(count > 0) {
  529. size_t offset = pos & (PAGE_CACHE_SIZE - 1);
  530. size_t write_bytes = min(count,
  531. (size_t)PAGE_CACHE_SIZE - offset);
  532. size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
  533. PAGE_CACHE_SHIFT;
  534. memset(pages, 0, sizeof(pages));
  535. ret = prepare_pages(root, file, pages, num_pages,
  536. pos, first_index, last_index,
  537. write_bytes, alloc_extent_start);
  538. BUG_ON(ret);
  539. /* FIXME blocks != pagesize */
  540. if (alloc_extent_start)
  541. alloc_extent_start += num_pages;
  542. ret = btrfs_copy_from_user(pos, num_pages,
  543. write_bytes, pages, buf);
  544. BUG_ON(ret);
  545. ret = dirty_and_release_pages(NULL, root, file, pages,
  546. num_pages, pos, write_bytes);
  547. BUG_ON(ret);
  548. btrfs_drop_pages(pages, num_pages);
  549. buf += write_bytes;
  550. count -= write_bytes;
  551. pos += write_bytes;
  552. num_written += write_bytes;
  553. balance_dirty_pages_ratelimited(inode->i_mapping);
  554. btrfs_btree_balance_dirty(root);
  555. cond_resched();
  556. }
  557. out_unlock:
  558. mutex_unlock(&inode->i_mutex);
  559. out:
  560. if (pinned[0])
  561. page_cache_release(pinned[0]);
  562. if (pinned[1])
  563. page_cache_release(pinned[1]);
  564. *ppos = pos;
  565. current->backing_dev_info = NULL;
  566. mark_inode_dirty(inode);
  567. return num_written ? num_written : err;
  568. }
  569. /*
  570. * FIXME, do this by stuffing the csum we want in the info hanging off
  571. * page->private. For now, verify file csums on read
  572. */
  573. static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
  574. unsigned long offset, unsigned long size)
  575. {
  576. char *kaddr;
  577. unsigned long left, count = desc->count;
  578. struct inode *inode = page->mapping->host;
  579. if (size > count)
  580. size = count;
  581. if (!PageChecked(page)) {
  582. /* FIXME, do it per block */
  583. struct btrfs_root *root = BTRFS_I(inode)->root;
  584. int ret;
  585. struct buffer_head *bh;
  586. if (page_has_buffers(page)) {
  587. bh = page_buffers(page);
  588. if (!buffer_mapped(bh)) {
  589. SetPageChecked(page);
  590. goto checked;
  591. }
  592. }
  593. ret = btrfs_csum_verify_file_block(root,
  594. page->mapping->host->i_ino,
  595. page->index << PAGE_CACHE_SHIFT,
  596. kmap(page), PAGE_CACHE_SIZE);
  597. if (ret) {
  598. if (ret != -ENOENT) {
  599. printk("failed to verify ino %lu page %lu ret %d\n",
  600. page->mapping->host->i_ino,
  601. page->index, ret);
  602. memset(page_address(page), 1, PAGE_CACHE_SIZE);
  603. flush_dcache_page(page);
  604. }
  605. }
  606. SetPageChecked(page);
  607. kunmap(page);
  608. }
  609. checked:
  610. /*
  611. * Faults on the destination of a read are common, so do it before
  612. * taking the kmap.
  613. */
  614. if (!fault_in_pages_writeable(desc->arg.buf, size)) {
  615. kaddr = kmap_atomic(page, KM_USER0);
  616. left = __copy_to_user_inatomic(desc->arg.buf,
  617. kaddr + offset, size);
  618. kunmap_atomic(kaddr, KM_USER0);
  619. if (left == 0)
  620. goto success;
  621. }
  622. /* Do it the slow way */
  623. kaddr = kmap(page);
  624. left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
  625. kunmap(page);
  626. if (left) {
  627. size -= left;
  628. desc->error = -EFAULT;
  629. }
  630. success:
  631. desc->count = count - size;
  632. desc->written += size;
  633. desc->arg.buf += size;
  634. return size;
  635. }
  636. /**
  637. * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
  638. * @iocb: kernel I/O control block
  639. * @iov: io vector request
  640. * @nr_segs: number of segments in the iovec
  641. * @pos: current file position
  642. */
  643. static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
  644. unsigned long nr_segs, loff_t pos)
  645. {
  646. struct file *filp = iocb->ki_filp;
  647. ssize_t retval;
  648. unsigned long seg;
  649. size_t count;
  650. loff_t *ppos = &iocb->ki_pos;
  651. count = 0;
  652. for (seg = 0; seg < nr_segs; seg++) {
  653. const struct iovec *iv = &iov[seg];
  654. /*
  655. * If any segment has a negative length, or the cumulative
  656. * length ever wraps negative then return -EINVAL.
  657. */
  658. count += iv->iov_len;
  659. if (unlikely((ssize_t)(count|iv->iov_len) < 0))
  660. return -EINVAL;
  661. if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
  662. continue;
  663. if (seg == 0)
  664. return -EFAULT;
  665. nr_segs = seg;
  666. count -= iv->iov_len; /* This segment is no good */
  667. break;
  668. }
  669. retval = 0;
  670. if (count) {
  671. for (seg = 0; seg < nr_segs; seg++) {
  672. read_descriptor_t desc;
  673. desc.written = 0;
  674. desc.arg.buf = iov[seg].iov_base;
  675. desc.count = iov[seg].iov_len;
  676. if (desc.count == 0)
  677. continue;
  678. desc.error = 0;
  679. do_generic_file_read(filp, ppos, &desc,
  680. btrfs_read_actor);
  681. retval += desc.written;
  682. if (desc.error) {
  683. retval = retval ?: desc.error;
  684. break;
  685. }
  686. }
  687. }
  688. return retval;
  689. }
  690. static int btrfs_sync_file(struct file *file,
  691. struct dentry *dentry, int datasync)
  692. {
  693. struct inode *inode = dentry->d_inode;
  694. struct btrfs_root *root = BTRFS_I(inode)->root;
  695. int ret;
  696. struct btrfs_trans_handle *trans;
  697. /*
  698. * FIXME, use inode generation number to check if we can skip the
  699. * commit
  700. */
  701. mutex_lock(&root->fs_info->fs_mutex);
  702. trans = btrfs_start_transaction(root, 1);
  703. if (!trans) {
  704. ret = -ENOMEM;
  705. goto out;
  706. }
  707. ret = btrfs_commit_transaction(trans, root);
  708. mutex_unlock(&root->fs_info->fs_mutex);
  709. out:
  710. return ret > 0 ? EIO : ret;
  711. }
  712. struct file_operations btrfs_file_operations = {
  713. .llseek = generic_file_llseek,
  714. .read = do_sync_read,
  715. .aio_read = btrfs_file_aio_read,
  716. .write = btrfs_file_write,
  717. .mmap = generic_file_mmap,
  718. .open = generic_file_open,
  719. .ioctl = btrfs_ioctl,
  720. .fsync = btrfs_sync_file,
  721. #ifdef CONFIG_COMPAT
  722. .compat_ioctl = btrfs_compat_ioctl,
  723. #endif
  724. };