file.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. /*
  2. * linux/fs/nfs/file.c
  3. *
  4. * Copyright (C) 1992 Rick Sladkey
  5. *
  6. * Changes Copyright (C) 1994 by Florian La Roche
  7. * - Do not copy data too often around in the kernel.
  8. * - In nfs_file_read the return value of kmalloc wasn't checked.
  9. * - Put in a better version of read look-ahead buffering. Original idea
  10. * and implementation by Wai S Kok elekokws@ee.nus.sg.
  11. *
  12. * Expire cache on write to a file by Wai S Kok (Oct 1994).
  13. *
  14. * Total rewrite of read side for new NFS buffer cache.. Linus.
  15. *
  16. * nfs regular file handling functions
  17. */
  18. #include <linux/time.h>
  19. #include <linux/kernel.h>
  20. #include <linux/errno.h>
  21. #include <linux/fcntl.h>
  22. #include <linux/stat.h>
  23. #include <linux/nfs_fs.h>
  24. #include <linux/nfs_mount.h>
  25. #include <linux/mm.h>
  26. #include <linux/pagemap.h>
  27. #include <linux/aio.h>
  28. #include <linux/gfp.h>
  29. #include <linux/swap.h>
  30. #include <asm/uaccess.h>
  31. #include "delegation.h"
  32. #include "internal.h"
  33. #include "iostat.h"
  34. #include "fscache.h"
  35. #include "pnfs.h"
  36. #define NFSDBG_FACILITY NFSDBG_FILE
  37. static const struct vm_operations_struct nfs_file_vm_ops;
  38. const struct inode_operations nfs_file_inode_operations = {
  39. .permission = nfs_permission,
  40. .getattr = nfs_getattr,
  41. .setattr = nfs_setattr,
  42. };
  43. #ifdef CONFIG_NFS_V3
  44. const struct inode_operations nfs3_file_inode_operations = {
  45. .permission = nfs_permission,
  46. .getattr = nfs_getattr,
  47. .setattr = nfs_setattr,
  48. .listxattr = nfs3_listxattr,
  49. .getxattr = nfs3_getxattr,
  50. .setxattr = nfs3_setxattr,
  51. .removexattr = nfs3_removexattr,
  52. };
  53. #endif /* CONFIG_NFS_v3 */
  54. /* Hack for future NFS swap support */
  55. #ifndef IS_SWAPFILE
  56. # define IS_SWAPFILE(inode) (0)
  57. #endif
  58. static int nfs_check_flags(int flags)
  59. {
  60. if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
  61. return -EINVAL;
  62. return 0;
  63. }
  64. /*
  65. * Open file
  66. */
  67. static int
  68. nfs_file_open(struct inode *inode, struct file *filp)
  69. {
  70. int res;
  71. dprintk("NFS: open file(%s/%s)\n",
  72. filp->f_path.dentry->d_parent->d_name.name,
  73. filp->f_path.dentry->d_name.name);
  74. nfs_inc_stats(inode, NFSIOS_VFSOPEN);
  75. res = nfs_check_flags(filp->f_flags);
  76. if (res)
  77. return res;
  78. res = nfs_open(inode, filp);
  79. return res;
  80. }
  81. static int
  82. nfs_file_release(struct inode *inode, struct file *filp)
  83. {
  84. dprintk("NFS: release(%s/%s)\n",
  85. filp->f_path.dentry->d_parent->d_name.name,
  86. filp->f_path.dentry->d_name.name);
  87. nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
  88. return nfs_release(inode, filp);
  89. }
  90. /**
  91. * nfs_revalidate_size - Revalidate the file size
  92. * @inode - pointer to inode struct
  93. * @file - pointer to struct file
  94. *
  95. * Revalidates the file length. This is basically a wrapper around
  96. * nfs_revalidate_inode() that takes into account the fact that we may
  97. * have cached writes (in which case we don't care about the server's
  98. * idea of what the file length is), or O_DIRECT (in which case we
  99. * shouldn't trust the cache).
  100. */
  101. static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
  102. {
  103. struct nfs_server *server = NFS_SERVER(inode);
  104. struct nfs_inode *nfsi = NFS_I(inode);
  105. if (nfs_have_delegated_attributes(inode))
  106. goto out_noreval;
  107. if (filp->f_flags & O_DIRECT)
  108. goto force_reval;
  109. if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
  110. goto force_reval;
  111. if (nfs_attribute_timeout(inode))
  112. goto force_reval;
  113. out_noreval:
  114. return 0;
  115. force_reval:
  116. return __nfs_revalidate_inode(server, inode);
  117. }
  118. static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
  119. {
  120. dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
  121. filp->f_path.dentry->d_parent->d_name.name,
  122. filp->f_path.dentry->d_name.name,
  123. offset, origin);
  124. /*
  125. * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
  126. * the cached file length
  127. */
  128. if (origin != SEEK_SET && origin != SEEK_CUR) {
  129. struct inode *inode = filp->f_mapping->host;
  130. int retval = nfs_revalidate_file_size(inode, filp);
  131. if (retval < 0)
  132. return (loff_t)retval;
  133. }
  134. return generic_file_llseek(filp, offset, origin);
  135. }
  136. /*
  137. * Flush all dirty pages, and check for write errors.
  138. */
  139. static int
  140. nfs_file_flush(struct file *file, fl_owner_t id)
  141. {
  142. struct dentry *dentry = file->f_path.dentry;
  143. struct inode *inode = dentry->d_inode;
  144. dprintk("NFS: flush(%s/%s)\n",
  145. dentry->d_parent->d_name.name,
  146. dentry->d_name.name);
  147. nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
  148. if ((file->f_mode & FMODE_WRITE) == 0)
  149. return 0;
  150. /*
  151. * If we're holding a write delegation, then just start the i/o
  152. * but don't wait for completion (or send a commit).
  153. */
  154. if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
  155. return filemap_fdatawrite(file->f_mapping);
  156. /* Flush writes to the server and return any errors */
  157. return vfs_fsync(file, 0);
  158. }
  159. static ssize_t
  160. nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
  161. unsigned long nr_segs, loff_t pos)
  162. {
  163. struct dentry * dentry = iocb->ki_filp->f_path.dentry;
  164. struct inode * inode = dentry->d_inode;
  165. ssize_t result;
  166. if (iocb->ki_filp->f_flags & O_DIRECT)
  167. return nfs_file_direct_read(iocb, iov, nr_segs, pos);
  168. dprintk("NFS: read(%s/%s, %lu@%lu)\n",
  169. dentry->d_parent->d_name.name, dentry->d_name.name,
  170. (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
  171. result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
  172. if (!result) {
  173. result = generic_file_aio_read(iocb, iov, nr_segs, pos);
  174. if (result > 0)
  175. nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
  176. }
  177. return result;
  178. }
  179. static ssize_t
  180. nfs_file_splice_read(struct file *filp, loff_t *ppos,
  181. struct pipe_inode_info *pipe, size_t count,
  182. unsigned int flags)
  183. {
  184. struct dentry *dentry = filp->f_path.dentry;
  185. struct inode *inode = dentry->d_inode;
  186. ssize_t res;
  187. dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
  188. dentry->d_parent->d_name.name, dentry->d_name.name,
  189. (unsigned long) count, (unsigned long long) *ppos);
  190. res = nfs_revalidate_mapping(inode, filp->f_mapping);
  191. if (!res) {
  192. res = generic_file_splice_read(filp, ppos, pipe, count, flags);
  193. if (res > 0)
  194. nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
  195. }
  196. return res;
  197. }
  198. static int
  199. nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
  200. {
  201. struct dentry *dentry = file->f_path.dentry;
  202. struct inode *inode = dentry->d_inode;
  203. int status;
  204. dprintk("NFS: mmap(%s/%s)\n",
  205. dentry->d_parent->d_name.name, dentry->d_name.name);
  206. /* Note: generic_file_mmap() returns ENOSYS on nommu systems
  207. * so we call that before revalidating the mapping
  208. */
  209. status = generic_file_mmap(file, vma);
  210. if (!status) {
  211. vma->vm_ops = &nfs_file_vm_ops;
  212. status = nfs_revalidate_mapping(inode, file->f_mapping);
  213. }
  214. return status;
  215. }
  216. /*
  217. * Flush any dirty pages for this process, and check for write errors.
  218. * The return status from this call provides a reliable indication of
  219. * whether any write errors occurred for this process.
  220. *
  221. * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
  222. * disk, but it retrieves and clears ctx->error after synching, despite
  223. * the two being set at the same time in nfs_context_set_write_error().
  224. * This is because the former is used to notify the _next_ call to
  225. * nfs_file_write() that a write error occurred, and hence cause it to
  226. * fall back to doing a synchronous write.
  227. */
  228. static int
  229. nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
  230. {
  231. struct dentry *dentry = file->f_path.dentry;
  232. struct nfs_open_context *ctx = nfs_file_open_context(file);
  233. struct inode *inode = dentry->d_inode;
  234. int have_error, status;
  235. int ret = 0;
  236. dprintk("NFS: fsync file(%s/%s) datasync %d\n",
  237. dentry->d_parent->d_name.name, dentry->d_name.name,
  238. datasync);
  239. nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
  240. have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
  241. status = nfs_commit_inode(inode, FLUSH_SYNC);
  242. if (status >= 0 && ret < 0)
  243. status = ret;
  244. have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
  245. if (have_error)
  246. ret = xchg(&ctx->error, 0);
  247. if (!ret && status < 0)
  248. ret = status;
  249. return ret;
  250. }
  251. static int
  252. nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  253. {
  254. int ret;
  255. struct inode *inode = file->f_path.dentry->d_inode;
  256. ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
  257. mutex_lock(&inode->i_mutex);
  258. ret = nfs_file_fsync_commit(file, start, end, datasync);
  259. mutex_unlock(&inode->i_mutex);
  260. return ret;
  261. }
  262. /*
  263. * Decide whether a read/modify/write cycle may be more efficient
  264. * then a modify/write/read cycle when writing to a page in the
  265. * page cache.
  266. *
  267. * The modify/write/read cycle may occur if a page is read before
  268. * being completely filled by the writer. In this situation, the
  269. * page must be completely written to stable storage on the server
  270. * before it can be refilled by reading in the page from the server.
  271. * This can lead to expensive, small, FILE_SYNC mode writes being
  272. * done.
  273. *
  274. * It may be more efficient to read the page first if the file is
  275. * open for reading in addition to writing, the page is not marked
  276. * as Uptodate, it is not dirty or waiting to be committed,
  277. * indicating that it was previously allocated and then modified,
  278. * that there were valid bytes of data in that range of the file,
  279. * and that the new data won't completely replace the old data in
  280. * that range of the file.
  281. */
  282. static int nfs_want_read_modify_write(struct file *file, struct page *page,
  283. loff_t pos, unsigned len)
  284. {
  285. unsigned int pglen = nfs_page_length(page);
  286. unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
  287. unsigned int end = offset + len;
  288. if ((file->f_mode & FMODE_READ) && /* open for read? */
  289. !PageUptodate(page) && /* Uptodate? */
  290. !PagePrivate(page) && /* i/o request already? */
  291. pglen && /* valid bytes of file? */
  292. (end < pglen || offset)) /* replace all valid bytes? */
  293. return 1;
  294. return 0;
  295. }
  296. /*
  297. * This does the "real" work of the write. We must allocate and lock the
  298. * page to be sent back to the generic routine, which then copies the
  299. * data from user space.
  300. *
  301. * If the writer ends up delaying the write, the writer needs to
  302. * increment the page use counts until he is done with the page.
  303. */
  304. static int nfs_write_begin(struct file *file, struct address_space *mapping,
  305. loff_t pos, unsigned len, unsigned flags,
  306. struct page **pagep, void **fsdata)
  307. {
  308. int ret;
  309. pgoff_t index = pos >> PAGE_CACHE_SHIFT;
  310. struct page *page;
  311. int once_thru = 0;
  312. dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
  313. file->f_path.dentry->d_parent->d_name.name,
  314. file->f_path.dentry->d_name.name,
  315. mapping->host->i_ino, len, (long long) pos);
  316. start:
  317. /*
  318. * Prevent starvation issues if someone is doing a consistency
  319. * sync-to-disk
  320. */
  321. ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
  322. nfs_wait_bit_killable, TASK_KILLABLE);
  323. if (ret)
  324. return ret;
  325. page = grab_cache_page_write_begin(mapping, index, flags);
  326. if (!page)
  327. return -ENOMEM;
  328. *pagep = page;
  329. ret = nfs_flush_incompatible(file, page);
  330. if (ret) {
  331. unlock_page(page);
  332. page_cache_release(page);
  333. } else if (!once_thru &&
  334. nfs_want_read_modify_write(file, page, pos, len)) {
  335. once_thru = 1;
  336. ret = nfs_readpage(file, page);
  337. page_cache_release(page);
  338. if (!ret)
  339. goto start;
  340. }
  341. return ret;
  342. }
  343. static int nfs_write_end(struct file *file, struct address_space *mapping,
  344. loff_t pos, unsigned len, unsigned copied,
  345. struct page *page, void *fsdata)
  346. {
  347. unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
  348. int status;
  349. dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
  350. file->f_path.dentry->d_parent->d_name.name,
  351. file->f_path.dentry->d_name.name,
  352. mapping->host->i_ino, len, (long long) pos);
  353. /*
  354. * Zero any uninitialised parts of the page, and then mark the page
  355. * as up to date if it turns out that we're extending the file.
  356. */
  357. if (!PageUptodate(page)) {
  358. unsigned pglen = nfs_page_length(page);
  359. unsigned end = offset + len;
  360. if (pglen == 0) {
  361. zero_user_segments(page, 0, offset,
  362. end, PAGE_CACHE_SIZE);
  363. SetPageUptodate(page);
  364. } else if (end >= pglen) {
  365. zero_user_segment(page, end, PAGE_CACHE_SIZE);
  366. if (offset == 0)
  367. SetPageUptodate(page);
  368. } else
  369. zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
  370. }
  371. status = nfs_updatepage(file, page, offset, copied);
  372. unlock_page(page);
  373. page_cache_release(page);
  374. if (status < 0)
  375. return status;
  376. NFS_I(mapping->host)->write_io += copied;
  377. return copied;
  378. }
  379. /*
  380. * Partially or wholly invalidate a page
  381. * - Release the private state associated with a page if undergoing complete
  382. * page invalidation
  383. * - Called if either PG_private or PG_fscache is set on the page
  384. * - Caller holds page lock
  385. */
  386. static void nfs_invalidate_page(struct page *page, unsigned long offset)
  387. {
  388. dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset);
  389. if (offset != 0)
  390. return;
  391. /* Cancel any unstarted writes on this page */
  392. nfs_wb_page_cancel(page->mapping->host, page);
  393. nfs_fscache_invalidate_page(page, page->mapping->host);
  394. }
  395. /*
  396. * Attempt to release the private state associated with a page
  397. * - Called if either PG_private or PG_fscache is set on the page
  398. * - Caller holds page lock
  399. * - Return true (may release page) or false (may not)
  400. */
  401. static int nfs_release_page(struct page *page, gfp_t gfp)
  402. {
  403. struct address_space *mapping = page->mapping;
  404. dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
  405. /* Only do I/O if gfp is a superset of GFP_KERNEL */
  406. if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
  407. int how = FLUSH_SYNC;
  408. /* Don't let kswapd deadlock waiting for OOM RPC calls */
  409. if (current_is_kswapd())
  410. how = 0;
  411. nfs_commit_inode(mapping->host, how);
  412. }
  413. /* If PagePrivate() is set, then the page is not freeable */
  414. if (PagePrivate(page))
  415. return 0;
  416. return nfs_fscache_release_page(page, gfp);
  417. }
  418. /*
  419. * Attempt to clear the private state associated with a page when an error
  420. * occurs that requires the cached contents of an inode to be written back or
  421. * destroyed
  422. * - Called if either PG_private or fscache is set on the page
  423. * - Caller holds page lock
  424. * - Return 0 if successful, -error otherwise
  425. */
  426. static int nfs_launder_page(struct page *page)
  427. {
  428. struct inode *inode = page->mapping->host;
  429. struct nfs_inode *nfsi = NFS_I(inode);
  430. dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
  431. inode->i_ino, (long long)page_offset(page));
  432. nfs_fscache_wait_on_page_write(nfsi, page);
  433. return nfs_wb_page(inode, page);
  434. }
  435. const struct address_space_operations nfs_file_aops = {
  436. .readpage = nfs_readpage,
  437. .readpages = nfs_readpages,
  438. .set_page_dirty = __set_page_dirty_nobuffers,
  439. .writepage = nfs_writepage,
  440. .writepages = nfs_writepages,
  441. .write_begin = nfs_write_begin,
  442. .write_end = nfs_write_end,
  443. .invalidatepage = nfs_invalidate_page,
  444. .releasepage = nfs_release_page,
  445. .direct_IO = nfs_direct_IO,
  446. .migratepage = nfs_migrate_page,
  447. .launder_page = nfs_launder_page,
  448. .error_remove_page = generic_error_remove_page,
  449. };
  450. /*
  451. * Notification that a PTE pointing to an NFS page is about to be made
  452. * writable, implying that someone is about to modify the page through a
  453. * shared-writable mapping
  454. */
  455. static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  456. {
  457. struct page *page = vmf->page;
  458. struct file *filp = vma->vm_file;
  459. struct dentry *dentry = filp->f_path.dentry;
  460. unsigned pagelen;
  461. int ret = VM_FAULT_NOPAGE;
  462. struct address_space *mapping;
  463. dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
  464. dentry->d_parent->d_name.name, dentry->d_name.name,
  465. filp->f_mapping->host->i_ino,
  466. (long long)page_offset(page));
  467. /* make sure the cache has finished storing the page */
  468. nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
  469. lock_page(page);
  470. mapping = page->mapping;
  471. if (mapping != dentry->d_inode->i_mapping)
  472. goto out_unlock;
  473. wait_on_page_writeback(page);
  474. pagelen = nfs_page_length(page);
  475. if (pagelen == 0)
  476. goto out_unlock;
  477. ret = VM_FAULT_LOCKED;
  478. if (nfs_flush_incompatible(filp, page) == 0 &&
  479. nfs_updatepage(filp, page, 0, pagelen) == 0)
  480. goto out;
  481. ret = VM_FAULT_SIGBUS;
  482. out_unlock:
  483. unlock_page(page);
  484. out:
  485. return ret;
  486. }
  487. static const struct vm_operations_struct nfs_file_vm_ops = {
  488. .fault = filemap_fault,
  489. .page_mkwrite = nfs_vm_page_mkwrite,
  490. };
  491. static int nfs_need_sync_write(struct file *filp, struct inode *inode)
  492. {
  493. struct nfs_open_context *ctx;
  494. if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
  495. return 1;
  496. ctx = nfs_file_open_context(filp);
  497. if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
  498. return 1;
  499. return 0;
  500. }
  501. static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
  502. unsigned long nr_segs, loff_t pos)
  503. {
  504. struct dentry * dentry = iocb->ki_filp->f_path.dentry;
  505. struct inode * inode = dentry->d_inode;
  506. unsigned long written = 0;
  507. ssize_t result;
  508. size_t count = iov_length(iov, nr_segs);
  509. if (iocb->ki_filp->f_flags & O_DIRECT)
  510. return nfs_file_direct_write(iocb, iov, nr_segs, pos);
  511. dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
  512. dentry->d_parent->d_name.name, dentry->d_name.name,
  513. (unsigned long) count, (long long) pos);
  514. result = -EBUSY;
  515. if (IS_SWAPFILE(inode))
  516. goto out_swapfile;
  517. /*
  518. * O_APPEND implies that we must revalidate the file length.
  519. */
  520. if (iocb->ki_filp->f_flags & O_APPEND) {
  521. result = nfs_revalidate_file_size(inode, iocb->ki_filp);
  522. if (result)
  523. goto out;
  524. }
  525. result = count;
  526. if (!count)
  527. goto out;
  528. result = generic_file_aio_write(iocb, iov, nr_segs, pos);
  529. if (result > 0)
  530. written = result;
  531. /* Return error values for O_DSYNC and IS_SYNC() */
  532. if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
  533. int err = vfs_fsync(iocb->ki_filp, 0);
  534. if (err < 0)
  535. result = err;
  536. }
  537. if (result > 0)
  538. nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
  539. out:
  540. return result;
  541. out_swapfile:
  542. printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
  543. goto out;
  544. }
  545. static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
  546. struct file *filp, loff_t *ppos,
  547. size_t count, unsigned int flags)
  548. {
  549. struct dentry *dentry = filp->f_path.dentry;
  550. struct inode *inode = dentry->d_inode;
  551. unsigned long written = 0;
  552. ssize_t ret;
  553. dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
  554. dentry->d_parent->d_name.name, dentry->d_name.name,
  555. (unsigned long) count, (unsigned long long) *ppos);
  556. /*
  557. * The combination of splice and an O_APPEND destination is disallowed.
  558. */
  559. ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
  560. if (ret > 0)
  561. written = ret;
  562. if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
  563. int err = vfs_fsync(filp, 0);
  564. if (err < 0)
  565. ret = err;
  566. }
  567. if (ret > 0)
  568. nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
  569. return ret;
  570. }
  571. static int
  572. do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  573. {
  574. struct inode *inode = filp->f_mapping->host;
  575. int status = 0;
  576. unsigned int saved_type = fl->fl_type;
  577. /* Try local locking first */
  578. posix_test_lock(filp, fl);
  579. if (fl->fl_type != F_UNLCK) {
  580. /* found a conflict */
  581. goto out;
  582. }
  583. fl->fl_type = saved_type;
  584. if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
  585. goto out_noconflict;
  586. if (is_local)
  587. goto out_noconflict;
  588. status = NFS_PROTO(inode)->lock(filp, cmd, fl);
  589. out:
  590. return status;
  591. out_noconflict:
  592. fl->fl_type = F_UNLCK;
  593. goto out;
  594. }
  595. static int do_vfs_lock(struct file *file, struct file_lock *fl)
  596. {
  597. int res = 0;
  598. switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
  599. case FL_POSIX:
  600. res = posix_lock_file_wait(file, fl);
  601. break;
  602. case FL_FLOCK:
  603. res = flock_lock_file_wait(file, fl);
  604. break;
  605. default:
  606. BUG();
  607. }
  608. return res;
  609. }
  610. static int
  611. do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  612. {
  613. struct inode *inode = filp->f_mapping->host;
  614. int status;
  615. /*
  616. * Flush all pending writes before doing anything
  617. * with locks..
  618. */
  619. nfs_sync_mapping(filp->f_mapping);
  620. /* NOTE: special case
  621. * If we're signalled while cleaning up locks on process exit, we
  622. * still need to complete the unlock.
  623. */
  624. /*
  625. * Use local locking if mounted with "-onolock" or with appropriate
  626. * "-olocal_lock="
  627. */
  628. if (!is_local)
  629. status = NFS_PROTO(inode)->lock(filp, cmd, fl);
  630. else
  631. status = do_vfs_lock(filp, fl);
  632. return status;
  633. }
  634. static int
  635. is_time_granular(struct timespec *ts) {
  636. return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
  637. }
  638. static int
  639. do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  640. {
  641. struct inode *inode = filp->f_mapping->host;
  642. int status;
  643. /*
  644. * Flush all pending writes before doing anything
  645. * with locks..
  646. */
  647. status = nfs_sync_mapping(filp->f_mapping);
  648. if (status != 0)
  649. goto out;
  650. /*
  651. * Use local locking if mounted with "-onolock" or with appropriate
  652. * "-olocal_lock="
  653. */
  654. if (!is_local)
  655. status = NFS_PROTO(inode)->lock(filp, cmd, fl);
  656. else
  657. status = do_vfs_lock(filp, fl);
  658. if (status < 0)
  659. goto out;
  660. /*
  661. * Revalidate the cache if the server has time stamps granular
  662. * enough to detect subsecond changes. Otherwise, clear the
  663. * cache to prevent missing any changes.
  664. *
  665. * This makes locking act as a cache coherency point.
  666. */
  667. nfs_sync_mapping(filp->f_mapping);
  668. if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
  669. if (is_time_granular(&NFS_SERVER(inode)->time_delta))
  670. __nfs_revalidate_inode(NFS_SERVER(inode), inode);
  671. else
  672. nfs_zap_caches(inode);
  673. }
  674. out:
  675. return status;
  676. }
  677. /*
  678. * Lock a (portion of) a file
  679. */
  680. static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
  681. {
  682. struct inode *inode = filp->f_mapping->host;
  683. int ret = -ENOLCK;
  684. int is_local = 0;
  685. dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
  686. filp->f_path.dentry->d_parent->d_name.name,
  687. filp->f_path.dentry->d_name.name,
  688. fl->fl_type, fl->fl_flags,
  689. (long long)fl->fl_start, (long long)fl->fl_end);
  690. nfs_inc_stats(inode, NFSIOS_VFSLOCK);
  691. /* No mandatory locks over NFS */
  692. if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
  693. goto out_err;
  694. if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
  695. is_local = 1;
  696. if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
  697. ret = NFS_PROTO(inode)->lock_check_bounds(fl);
  698. if (ret < 0)
  699. goto out_err;
  700. }
  701. if (IS_GETLK(cmd))
  702. ret = do_getlk(filp, cmd, fl, is_local);
  703. else if (fl->fl_type == F_UNLCK)
  704. ret = do_unlk(filp, cmd, fl, is_local);
  705. else
  706. ret = do_setlk(filp, cmd, fl, is_local);
  707. out_err:
  708. return ret;
  709. }
  710. /*
  711. * Lock a (portion of) a file
  712. */
  713. static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
  714. {
  715. struct inode *inode = filp->f_mapping->host;
  716. int is_local = 0;
  717. dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
  718. filp->f_path.dentry->d_parent->d_name.name,
  719. filp->f_path.dentry->d_name.name,
  720. fl->fl_type, fl->fl_flags);
  721. if (!(fl->fl_flags & FL_FLOCK))
  722. return -ENOLCK;
  723. if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
  724. is_local = 1;
  725. /* We're simulating flock() locks using posix locks on the server */
  726. fl->fl_owner = (fl_owner_t)filp;
  727. fl->fl_start = 0;
  728. fl->fl_end = OFFSET_MAX;
  729. if (fl->fl_type == F_UNLCK)
  730. return do_unlk(filp, cmd, fl, is_local);
  731. return do_setlk(filp, cmd, fl, is_local);
  732. }
  733. /*
  734. * There is no protocol support for leases, so we have no way to implement
  735. * them correctly in the face of opens by other clients.
  736. */
  737. static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
  738. {
  739. dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
  740. file->f_path.dentry->d_parent->d_name.name,
  741. file->f_path.dentry->d_name.name, arg);
  742. return -EINVAL;
  743. }
  744. const struct file_operations nfs_file_operations = {
  745. .llseek = nfs_file_llseek,
  746. .read = do_sync_read,
  747. .write = do_sync_write,
  748. .aio_read = nfs_file_read,
  749. .aio_write = nfs_file_write,
  750. .mmap = nfs_file_mmap,
  751. .open = nfs_file_open,
  752. .flush = nfs_file_flush,
  753. .release = nfs_file_release,
  754. .fsync = nfs_file_fsync,
  755. .lock = nfs_lock,
  756. .flock = nfs_flock,
  757. .splice_read = nfs_file_splice_read,
  758. .splice_write = nfs_file_splice_write,
  759. .check_flags = nfs_check_flags,
  760. .setlease = nfs_setlease,
  761. };
  762. #ifdef CONFIG_NFS_V4
  763. static int
  764. nfs4_file_open(struct inode *inode, struct file *filp)
  765. {
  766. struct nfs_open_context *ctx;
  767. struct dentry *dentry = filp->f_path.dentry;
  768. struct dentry *parent = NULL;
  769. struct inode *dir;
  770. unsigned openflags = filp->f_flags;
  771. struct iattr attr;
  772. int err;
  773. BUG_ON(inode != dentry->d_inode);
  774. /*
  775. * If no cached dentry exists or if it's negative, NFSv4 handled the
  776. * opens in ->lookup() or ->create().
  777. *
  778. * We only get this far for a cached positive dentry. We skipped
  779. * revalidation, so handle it here by dropping the dentry and returning
  780. * -EOPENSTALE. The VFS will retry the lookup/create/open.
  781. */
  782. dprintk("NFS: open file(%s/%s)\n",
  783. dentry->d_parent->d_name.name,
  784. dentry->d_name.name);
  785. if ((openflags & O_ACCMODE) == 3)
  786. openflags--;
  787. /* We can't create new files here */
  788. openflags &= ~(O_CREAT|O_EXCL);
  789. parent = dget_parent(dentry);
  790. dir = parent->d_inode;
  791. ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
  792. err = PTR_ERR(ctx);
  793. if (IS_ERR(ctx))
  794. goto out;
  795. attr.ia_valid = ATTR_OPEN;
  796. if (openflags & O_TRUNC) {
  797. attr.ia_valid |= ATTR_SIZE;
  798. attr.ia_size = 0;
  799. nfs_wb_all(inode);
  800. }
  801. inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
  802. if (IS_ERR(inode)) {
  803. err = PTR_ERR(inode);
  804. switch (err) {
  805. case -EPERM:
  806. case -EACCES:
  807. case -EDQUOT:
  808. case -ENOSPC:
  809. case -EROFS:
  810. goto out_put_ctx;
  811. default:
  812. goto out_drop;
  813. }
  814. }
  815. iput(inode);
  816. if (inode != dentry->d_inode)
  817. goto out_drop;
  818. nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  819. nfs_file_set_open_context(filp, ctx);
  820. err = 0;
  821. out_put_ctx:
  822. put_nfs_open_context(ctx);
  823. out:
  824. dput(parent);
  825. return err;
  826. out_drop:
  827. d_drop(dentry);
  828. err = -EOPENSTALE;
  829. goto out_put_ctx;
  830. }
  831. static int
  832. nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  833. {
  834. int ret;
  835. struct inode *inode = file->f_path.dentry->d_inode;
  836. ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
  837. mutex_lock(&inode->i_mutex);
  838. ret = nfs_file_fsync_commit(file, start, end, datasync);
  839. if (!ret && !datasync)
  840. /* application has asked for meta-data sync */
  841. ret = pnfs_layoutcommit_inode(inode, true);
  842. mutex_unlock(&inode->i_mutex);
  843. return ret;
  844. }
  845. const struct file_operations nfs4_file_operations = {
  846. .llseek = nfs_file_llseek,
  847. .read = do_sync_read,
  848. .write = do_sync_write,
  849. .aio_read = nfs_file_read,
  850. .aio_write = nfs_file_write,
  851. .mmap = nfs_file_mmap,
  852. .open = nfs4_file_open,
  853. .flush = nfs_file_flush,
  854. .release = nfs_file_release,
  855. .fsync = nfs4_file_fsync,
  856. .lock = nfs_lock,
  857. .flock = nfs_flock,
  858. .splice_read = nfs_file_splice_read,
  859. .splice_write = nfs_file_splice_write,
  860. .check_flags = nfs_check_flags,
  861. .setlease = nfs_setlease,
  862. };
  863. #endif /* CONFIG_NFS_V4 */