read_write.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. /*
  2. * linux/fs/read_write.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/file.h>
  10. #include <linux/uio.h>
  11. #include <linux/fsnotify.h>
  12. #include <linux/security.h>
  13. #include <linux/module.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/pagemap.h>
  16. #include <linux/splice.h>
  17. #include "read_write.h"
  18. #include <asm/uaccess.h>
  19. #include <asm/unistd.h>
  20. const struct file_operations generic_ro_fops = {
  21. .llseek = generic_file_llseek,
  22. .read = do_sync_read,
  23. .aio_read = generic_file_aio_read,
  24. .mmap = generic_file_readonly_mmap,
  25. .splice_read = generic_file_splice_read,
  26. };
  27. EXPORT_SYMBOL(generic_ro_fops);
  28. static inline int unsigned_offsets(struct file *file)
  29. {
  30. return file->f_mode & FMODE_UNSIGNED_OFFSET;
  31. }
  32. /**
  33. * generic_file_llseek_unlocked - lockless generic llseek implementation
  34. * @file: file structure to seek on
  35. * @offset: file offset to seek to
  36. * @origin: type of seek
  37. *
  38. * Updates the file offset to the value specified by @offset and @origin.
  39. * Locking must be provided by the caller.
  40. */
  41. loff_t
  42. generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
  43. {
  44. struct inode *inode = file->f_mapping->host;
  45. switch (origin) {
  46. case SEEK_END:
  47. offset += inode->i_size;
  48. break;
  49. case SEEK_CUR:
  50. /*
  51. * Here we special-case the lseek(fd, 0, SEEK_CUR)
  52. * position-querying operation. Avoid rewriting the "same"
  53. * f_pos value back to the file because a concurrent read(),
  54. * write() or lseek() might have altered it
  55. */
  56. if (offset == 0)
  57. return file->f_pos;
  58. offset += file->f_pos;
  59. break;
  60. case SEEK_DATA:
  61. /*
  62. * In the generic case the entire file is data, so as long as
  63. * offset isn't at the end of the file then the offset is data.
  64. */
  65. if (offset >= inode->i_size)
  66. return -ENXIO;
  67. break;
  68. case SEEK_HOLE:
  69. /*
  70. * There is a virtual hole at the end of the file, so as long as
  71. * offset isn't i_size or larger, return i_size.
  72. */
  73. if (offset >= inode->i_size)
  74. return -ENXIO;
  75. offset = inode->i_size;
  76. break;
  77. }
  78. if (offset < 0 && !unsigned_offsets(file))
  79. return -EINVAL;
  80. if (offset > inode->i_sb->s_maxbytes)
  81. return -EINVAL;
  82. /* Special lock needed here? */
  83. if (offset != file->f_pos) {
  84. file->f_pos = offset;
  85. file->f_version = 0;
  86. }
  87. return offset;
  88. }
  89. EXPORT_SYMBOL(generic_file_llseek_unlocked);
  90. /**
  91. * generic_file_llseek - generic llseek implementation for regular files
  92. * @file: file structure to seek on
  93. * @offset: file offset to seek to
  94. * @origin: type of seek
  95. *
  96. * This is a generic implemenation of ->llseek useable for all normal local
  97. * filesystems. It just updates the file offset to the value specified by
  98. * @offset and @origin under i_mutex.
  99. */
  100. loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
  101. {
  102. loff_t rval;
  103. mutex_lock(&file->f_dentry->d_inode->i_mutex);
  104. rval = generic_file_llseek_unlocked(file, offset, origin);
  105. mutex_unlock(&file->f_dentry->d_inode->i_mutex);
  106. return rval;
  107. }
  108. EXPORT_SYMBOL(generic_file_llseek);
  109. /**
  110. * noop_llseek - No Operation Performed llseek implementation
  111. * @file: file structure to seek on
  112. * @offset: file offset to seek to
  113. * @origin: type of seek
  114. *
  115. * This is an implementation of ->llseek useable for the rare special case when
  116. * userspace expects the seek to succeed but the (device) file is actually not
  117. * able to perform the seek. In this case you use noop_llseek() instead of
  118. * falling back to the default implementation of ->llseek.
  119. */
  120. loff_t noop_llseek(struct file *file, loff_t offset, int origin)
  121. {
  122. return file->f_pos;
  123. }
  124. EXPORT_SYMBOL(noop_llseek);
  125. loff_t no_llseek(struct file *file, loff_t offset, int origin)
  126. {
  127. return -ESPIPE;
  128. }
  129. EXPORT_SYMBOL(no_llseek);
  130. loff_t default_llseek(struct file *file, loff_t offset, int origin)
  131. {
  132. struct inode *inode = file->f_path.dentry->d_inode;
  133. loff_t retval;
  134. mutex_lock(&inode->i_mutex);
  135. switch (origin) {
  136. case SEEK_END:
  137. offset += i_size_read(inode);
  138. break;
  139. case SEEK_CUR:
  140. if (offset == 0) {
  141. retval = file->f_pos;
  142. goto out;
  143. }
  144. offset += file->f_pos;
  145. break;
  146. case SEEK_DATA:
  147. /*
  148. * In the generic case the entire file is data, so as
  149. * long as offset isn't at the end of the file then the
  150. * offset is data.
  151. */
  152. if (offset >= inode->i_size) {
  153. retval = -ENXIO;
  154. goto out;
  155. }
  156. break;
  157. case SEEK_HOLE:
  158. /*
  159. * There is a virtual hole at the end of the file, so
  160. * as long as offset isn't i_size or larger, return
  161. * i_size.
  162. */
  163. if (offset >= inode->i_size) {
  164. retval = -ENXIO;
  165. goto out;
  166. }
  167. offset = inode->i_size;
  168. break;
  169. }
  170. retval = -EINVAL;
  171. if (offset >= 0 || unsigned_offsets(file)) {
  172. if (offset != file->f_pos) {
  173. file->f_pos = offset;
  174. file->f_version = 0;
  175. }
  176. retval = offset;
  177. }
  178. out:
  179. mutex_unlock(&inode->i_mutex);
  180. return retval;
  181. }
  182. EXPORT_SYMBOL(default_llseek);
  183. loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
  184. {
  185. loff_t (*fn)(struct file *, loff_t, int);
  186. fn = no_llseek;
  187. if (file->f_mode & FMODE_LSEEK) {
  188. if (file->f_op && file->f_op->llseek)
  189. fn = file->f_op->llseek;
  190. }
  191. return fn(file, offset, origin);
  192. }
  193. EXPORT_SYMBOL(vfs_llseek);
  194. SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
  195. {
  196. off_t retval;
  197. struct file * file;
  198. int fput_needed;
  199. retval = -EBADF;
  200. file = fget_light(fd, &fput_needed);
  201. if (!file)
  202. goto bad;
  203. retval = -EINVAL;
  204. if (origin <= SEEK_MAX) {
  205. loff_t res = vfs_llseek(file, offset, origin);
  206. retval = res;
  207. if (res != (loff_t)retval)
  208. retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
  209. }
  210. fput_light(file, fput_needed);
  211. bad:
  212. return retval;
  213. }
  214. #ifdef __ARCH_WANT_SYS_LLSEEK
  215. SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  216. unsigned long, offset_low, loff_t __user *, result,
  217. unsigned int, origin)
  218. {
  219. int retval;
  220. struct file * file;
  221. loff_t offset;
  222. int fput_needed;
  223. retval = -EBADF;
  224. file = fget_light(fd, &fput_needed);
  225. if (!file)
  226. goto bad;
  227. retval = -EINVAL;
  228. if (origin > SEEK_MAX)
  229. goto out_putf;
  230. offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
  231. origin);
  232. retval = (int)offset;
  233. if (offset >= 0) {
  234. retval = -EFAULT;
  235. if (!copy_to_user(result, &offset, sizeof(offset)))
  236. retval = 0;
  237. }
  238. out_putf:
  239. fput_light(file, fput_needed);
  240. bad:
  241. return retval;
  242. }
  243. #endif
  244. /*
  245. * rw_verify_area doesn't like huge counts. We limit
  246. * them to something that fits in "int" so that others
  247. * won't have to do range checks all the time.
  248. */
  249. int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
  250. {
  251. struct inode *inode;
  252. loff_t pos;
  253. int retval = -EINVAL;
  254. inode = file->f_path.dentry->d_inode;
  255. if (unlikely((ssize_t) count < 0))
  256. return retval;
  257. pos = *ppos;
  258. if (unlikely(pos < 0)) {
  259. if (!unsigned_offsets(file))
  260. return retval;
  261. if (count >= -pos) /* both values are in 0..LLONG_MAX */
  262. return -EOVERFLOW;
  263. } else if (unlikely((loff_t) (pos + count) < 0)) {
  264. if (!unsigned_offsets(file))
  265. return retval;
  266. }
  267. if (unlikely(inode->i_flock && mandatory_lock(inode))) {
  268. retval = locks_mandatory_area(
  269. read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
  270. inode, file, pos, count);
  271. if (retval < 0)
  272. return retval;
  273. }
  274. retval = security_file_permission(file,
  275. read_write == READ ? MAY_READ : MAY_WRITE);
  276. if (retval)
  277. return retval;
  278. return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
  279. }
  280. static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
  281. {
  282. set_current_state(TASK_UNINTERRUPTIBLE);
  283. if (!kiocbIsKicked(iocb))
  284. schedule();
  285. else
  286. kiocbClearKicked(iocb);
  287. __set_current_state(TASK_RUNNING);
  288. }
  289. ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  290. {
  291. struct iovec iov = { .iov_base = buf, .iov_len = len };
  292. struct kiocb kiocb;
  293. ssize_t ret;
  294. init_sync_kiocb(&kiocb, filp);
  295. kiocb.ki_pos = *ppos;
  296. kiocb.ki_left = len;
  297. kiocb.ki_nbytes = len;
  298. for (;;) {
  299. ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
  300. if (ret != -EIOCBRETRY)
  301. break;
  302. wait_on_retry_sync_kiocb(&kiocb);
  303. }
  304. if (-EIOCBQUEUED == ret)
  305. ret = wait_on_sync_kiocb(&kiocb);
  306. *ppos = kiocb.ki_pos;
  307. return ret;
  308. }
  309. EXPORT_SYMBOL(do_sync_read);
  310. ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  311. {
  312. ssize_t ret;
  313. if (!(file->f_mode & FMODE_READ))
  314. return -EBADF;
  315. if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
  316. return -EINVAL;
  317. if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  318. return -EFAULT;
  319. ret = rw_verify_area(READ, file, pos, count);
  320. if (ret >= 0) {
  321. count = ret;
  322. if (file->f_op->read)
  323. ret = file->f_op->read(file, buf, count, pos);
  324. else
  325. ret = do_sync_read(file, buf, count, pos);
  326. if (ret > 0) {
  327. fsnotify_access(file);
  328. add_rchar(current, ret);
  329. }
  330. inc_syscr(current);
  331. }
  332. return ret;
  333. }
  334. EXPORT_SYMBOL(vfs_read);
  335. ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  336. {
  337. struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  338. struct kiocb kiocb;
  339. ssize_t ret;
  340. init_sync_kiocb(&kiocb, filp);
  341. kiocb.ki_pos = *ppos;
  342. kiocb.ki_left = len;
  343. kiocb.ki_nbytes = len;
  344. for (;;) {
  345. ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
  346. if (ret != -EIOCBRETRY)
  347. break;
  348. wait_on_retry_sync_kiocb(&kiocb);
  349. }
  350. if (-EIOCBQUEUED == ret)
  351. ret = wait_on_sync_kiocb(&kiocb);
  352. *ppos = kiocb.ki_pos;
  353. return ret;
  354. }
  355. EXPORT_SYMBOL(do_sync_write);
  356. ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  357. {
  358. ssize_t ret;
  359. if (!(file->f_mode & FMODE_WRITE))
  360. return -EBADF;
  361. if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
  362. return -EINVAL;
  363. if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  364. return -EFAULT;
  365. ret = rw_verify_area(WRITE, file, pos, count);
  366. if (ret >= 0) {
  367. count = ret;
  368. if (file->f_op->write)
  369. ret = file->f_op->write(file, buf, count, pos);
  370. else
  371. ret = do_sync_write(file, buf, count, pos);
  372. if (ret > 0) {
  373. fsnotify_modify(file);
  374. add_wchar(current, ret);
  375. }
  376. inc_syscw(current);
  377. }
  378. return ret;
  379. }
  380. EXPORT_SYMBOL(vfs_write);
  381. static inline loff_t file_pos_read(struct file *file)
  382. {
  383. return file->f_pos;
  384. }
  385. static inline void file_pos_write(struct file *file, loff_t pos)
  386. {
  387. file->f_pos = pos;
  388. }
  389. SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  390. {
  391. struct file *file;
  392. ssize_t ret = -EBADF;
  393. int fput_needed;
  394. file = fget_light(fd, &fput_needed);
  395. if (file) {
  396. loff_t pos = file_pos_read(file);
  397. ret = vfs_read(file, buf, count, &pos);
  398. file_pos_write(file, pos);
  399. fput_light(file, fput_needed);
  400. }
  401. return ret;
  402. }
  403. SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  404. size_t, count)
  405. {
  406. struct file *file;
  407. ssize_t ret = -EBADF;
  408. int fput_needed;
  409. file = fget_light(fd, &fput_needed);
  410. if (file) {
  411. loff_t pos = file_pos_read(file);
  412. ret = vfs_write(file, buf, count, &pos);
  413. file_pos_write(file, pos);
  414. fput_light(file, fput_needed);
  415. }
  416. return ret;
  417. }
  418. SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
  419. size_t count, loff_t pos)
  420. {
  421. struct file *file;
  422. ssize_t ret = -EBADF;
  423. int fput_needed;
  424. if (pos < 0)
  425. return -EINVAL;
  426. file = fget_light(fd, &fput_needed);
  427. if (file) {
  428. ret = -ESPIPE;
  429. if (file->f_mode & FMODE_PREAD)
  430. ret = vfs_read(file, buf, count, &pos);
  431. fput_light(file, fput_needed);
  432. }
  433. return ret;
  434. }
  435. #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  436. asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
  437. {
  438. return SYSC_pread64((unsigned int) fd, (char __user *) buf,
  439. (size_t) count, pos);
  440. }
  441. SYSCALL_ALIAS(sys_pread64, SyS_pread64);
  442. #endif
  443. SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
  444. size_t count, loff_t pos)
  445. {
  446. struct file *file;
  447. ssize_t ret = -EBADF;
  448. int fput_needed;
  449. if (pos < 0)
  450. return -EINVAL;
  451. file = fget_light(fd, &fput_needed);
  452. if (file) {
  453. ret = -ESPIPE;
  454. if (file->f_mode & FMODE_PWRITE)
  455. ret = vfs_write(file, buf, count, &pos);
  456. fput_light(file, fput_needed);
  457. }
  458. return ret;
  459. }
  460. #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  461. asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
  462. {
  463. return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
  464. (size_t) count, pos);
  465. }
  466. SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
  467. #endif
  468. /*
  469. * Reduce an iovec's length in-place. Return the resulting number of segments
  470. */
  471. unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  472. {
  473. unsigned long seg = 0;
  474. size_t len = 0;
  475. while (seg < nr_segs) {
  476. seg++;
  477. if (len + iov->iov_len >= to) {
  478. iov->iov_len = to - len;
  479. break;
  480. }
  481. len += iov->iov_len;
  482. iov++;
  483. }
  484. return seg;
  485. }
  486. EXPORT_SYMBOL(iov_shorten);
  487. ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
  488. unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
  489. {
  490. struct kiocb kiocb;
  491. ssize_t ret;
  492. init_sync_kiocb(&kiocb, filp);
  493. kiocb.ki_pos = *ppos;
  494. kiocb.ki_left = len;
  495. kiocb.ki_nbytes = len;
  496. for (;;) {
  497. ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
  498. if (ret != -EIOCBRETRY)
  499. break;
  500. wait_on_retry_sync_kiocb(&kiocb);
  501. }
  502. if (ret == -EIOCBQUEUED)
  503. ret = wait_on_sync_kiocb(&kiocb);
  504. *ppos = kiocb.ki_pos;
  505. return ret;
  506. }
  507. /* Do it by hand, with file-ops */
  508. ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
  509. unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
  510. {
  511. struct iovec *vector = iov;
  512. ssize_t ret = 0;
  513. while (nr_segs > 0) {
  514. void __user *base;
  515. size_t len;
  516. ssize_t nr;
  517. base = vector->iov_base;
  518. len = vector->iov_len;
  519. vector++;
  520. nr_segs--;
  521. nr = fn(filp, base, len, ppos);
  522. if (nr < 0) {
  523. if (!ret)
  524. ret = nr;
  525. break;
  526. }
  527. ret += nr;
  528. if (nr != len)
  529. break;
  530. }
  531. return ret;
  532. }
  533. /* A write operation does a read from user space and vice versa */
  534. #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
  535. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  536. unsigned long nr_segs, unsigned long fast_segs,
  537. struct iovec *fast_pointer,
  538. struct iovec **ret_pointer)
  539. {
  540. unsigned long seg;
  541. ssize_t ret;
  542. struct iovec *iov = fast_pointer;
  543. /*
  544. * SuS says "The readv() function *may* fail if the iovcnt argument
  545. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  546. * traditionally returned zero for zero segments, so...
  547. */
  548. if (nr_segs == 0) {
  549. ret = 0;
  550. goto out;
  551. }
  552. /*
  553. * First get the "struct iovec" from user memory and
  554. * verify all the pointers
  555. */
  556. if (nr_segs > UIO_MAXIOV) {
  557. ret = -EINVAL;
  558. goto out;
  559. }
  560. if (nr_segs > fast_segs) {
  561. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  562. if (iov == NULL) {
  563. ret = -ENOMEM;
  564. goto out;
  565. }
  566. }
  567. if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  568. ret = -EFAULT;
  569. goto out;
  570. }
  571. /*
  572. * According to the Single Unix Specification we should return EINVAL
  573. * if an element length is < 0 when cast to ssize_t or if the
  574. * total length would overflow the ssize_t return value of the
  575. * system call.
  576. *
  577. * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  578. * overflow case.
  579. */
  580. ret = 0;
  581. for (seg = 0; seg < nr_segs; seg++) {
  582. void __user *buf = iov[seg].iov_base;
  583. ssize_t len = (ssize_t)iov[seg].iov_len;
  584. /* see if we we're about to use an invalid len or if
  585. * it's about to overflow ssize_t */
  586. if (len < 0) {
  587. ret = -EINVAL;
  588. goto out;
  589. }
  590. if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  591. ret = -EFAULT;
  592. goto out;
  593. }
  594. if (len > MAX_RW_COUNT - ret) {
  595. len = MAX_RW_COUNT - ret;
  596. iov[seg].iov_len = len;
  597. }
  598. ret += len;
  599. }
  600. out:
  601. *ret_pointer = iov;
  602. return ret;
  603. }
  604. static ssize_t do_readv_writev(int type, struct file *file,
  605. const struct iovec __user * uvector,
  606. unsigned long nr_segs, loff_t *pos)
  607. {
  608. size_t tot_len;
  609. struct iovec iovstack[UIO_FASTIOV];
  610. struct iovec *iov = iovstack;
  611. ssize_t ret;
  612. io_fn_t fn;
  613. iov_fn_t fnv;
  614. if (!file->f_op) {
  615. ret = -EINVAL;
  616. goto out;
  617. }
  618. ret = rw_copy_check_uvector(type, uvector, nr_segs,
  619. ARRAY_SIZE(iovstack), iovstack, &iov);
  620. if (ret <= 0)
  621. goto out;
  622. tot_len = ret;
  623. ret = rw_verify_area(type, file, pos, tot_len);
  624. if (ret < 0)
  625. goto out;
  626. fnv = NULL;
  627. if (type == READ) {
  628. fn = file->f_op->read;
  629. fnv = file->f_op->aio_read;
  630. } else {
  631. fn = (io_fn_t)file->f_op->write;
  632. fnv = file->f_op->aio_write;
  633. }
  634. if (fnv)
  635. ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
  636. pos, fnv);
  637. else
  638. ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
  639. out:
  640. if (iov != iovstack)
  641. kfree(iov);
  642. if ((ret + (type == READ)) > 0) {
  643. if (type == READ)
  644. fsnotify_access(file);
  645. else
  646. fsnotify_modify(file);
  647. }
  648. return ret;
  649. }
  650. ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  651. unsigned long vlen, loff_t *pos)
  652. {
  653. if (!(file->f_mode & FMODE_READ))
  654. return -EBADF;
  655. if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
  656. return -EINVAL;
  657. return do_readv_writev(READ, file, vec, vlen, pos);
  658. }
  659. EXPORT_SYMBOL(vfs_readv);
  660. ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  661. unsigned long vlen, loff_t *pos)
  662. {
  663. if (!(file->f_mode & FMODE_WRITE))
  664. return -EBADF;
  665. if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
  666. return -EINVAL;
  667. return do_readv_writev(WRITE, file, vec, vlen, pos);
  668. }
  669. EXPORT_SYMBOL(vfs_writev);
  670. SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  671. unsigned long, vlen)
  672. {
  673. struct file *file;
  674. ssize_t ret = -EBADF;
  675. int fput_needed;
  676. file = fget_light(fd, &fput_needed);
  677. if (file) {
  678. loff_t pos = file_pos_read(file);
  679. ret = vfs_readv(file, vec, vlen, &pos);
  680. file_pos_write(file, pos);
  681. fput_light(file, fput_needed);
  682. }
  683. if (ret > 0)
  684. add_rchar(current, ret);
  685. inc_syscr(current);
  686. return ret;
  687. }
  688. SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  689. unsigned long, vlen)
  690. {
  691. struct file *file;
  692. ssize_t ret = -EBADF;
  693. int fput_needed;
  694. file = fget_light(fd, &fput_needed);
  695. if (file) {
  696. loff_t pos = file_pos_read(file);
  697. ret = vfs_writev(file, vec, vlen, &pos);
  698. file_pos_write(file, pos);
  699. fput_light(file, fput_needed);
  700. }
  701. if (ret > 0)
  702. add_wchar(current, ret);
  703. inc_syscw(current);
  704. return ret;
  705. }
  706. static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  707. {
  708. #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  709. return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  710. }
  711. SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  712. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  713. {
  714. loff_t pos = pos_from_hilo(pos_h, pos_l);
  715. struct file *file;
  716. ssize_t ret = -EBADF;
  717. int fput_needed;
  718. if (pos < 0)
  719. return -EINVAL;
  720. file = fget_light(fd, &fput_needed);
  721. if (file) {
  722. ret = -ESPIPE;
  723. if (file->f_mode & FMODE_PREAD)
  724. ret = vfs_readv(file, vec, vlen, &pos);
  725. fput_light(file, fput_needed);
  726. }
  727. if (ret > 0)
  728. add_rchar(current, ret);
  729. inc_syscr(current);
  730. return ret;
  731. }
  732. SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  733. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  734. {
  735. loff_t pos = pos_from_hilo(pos_h, pos_l);
  736. struct file *file;
  737. ssize_t ret = -EBADF;
  738. int fput_needed;
  739. if (pos < 0)
  740. return -EINVAL;
  741. file = fget_light(fd, &fput_needed);
  742. if (file) {
  743. ret = -ESPIPE;
  744. if (file->f_mode & FMODE_PWRITE)
  745. ret = vfs_writev(file, vec, vlen, &pos);
  746. fput_light(file, fput_needed);
  747. }
  748. if (ret > 0)
  749. add_wchar(current, ret);
  750. inc_syscw(current);
  751. return ret;
  752. }
  753. static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  754. size_t count, loff_t max)
  755. {
  756. struct file * in_file, * out_file;
  757. struct inode * in_inode, * out_inode;
  758. loff_t pos;
  759. ssize_t retval;
  760. int fput_needed_in, fput_needed_out, fl;
  761. /*
  762. * Get input file, and verify that it is ok..
  763. */
  764. retval = -EBADF;
  765. in_file = fget_light(in_fd, &fput_needed_in);
  766. if (!in_file)
  767. goto out;
  768. if (!(in_file->f_mode & FMODE_READ))
  769. goto fput_in;
  770. retval = -ESPIPE;
  771. if (!ppos)
  772. ppos = &in_file->f_pos;
  773. else
  774. if (!(in_file->f_mode & FMODE_PREAD))
  775. goto fput_in;
  776. retval = rw_verify_area(READ, in_file, ppos, count);
  777. if (retval < 0)
  778. goto fput_in;
  779. count = retval;
  780. /*
  781. * Get output file, and verify that it is ok..
  782. */
  783. retval = -EBADF;
  784. out_file = fget_light(out_fd, &fput_needed_out);
  785. if (!out_file)
  786. goto fput_in;
  787. if (!(out_file->f_mode & FMODE_WRITE))
  788. goto fput_out;
  789. retval = -EINVAL;
  790. in_inode = in_file->f_path.dentry->d_inode;
  791. out_inode = out_file->f_path.dentry->d_inode;
  792. retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
  793. if (retval < 0)
  794. goto fput_out;
  795. count = retval;
  796. if (!max)
  797. max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  798. pos = *ppos;
  799. if (unlikely(pos + count > max)) {
  800. retval = -EOVERFLOW;
  801. if (pos >= max)
  802. goto fput_out;
  803. count = max - pos;
  804. }
  805. fl = 0;
  806. #if 0
  807. /*
  808. * We need to debate whether we can enable this or not. The
  809. * man page documents EAGAIN return for the output at least,
  810. * and the application is arguably buggy if it doesn't expect
  811. * EAGAIN on a non-blocking file descriptor.
  812. */
  813. if (in_file->f_flags & O_NONBLOCK)
  814. fl = SPLICE_F_NONBLOCK;
  815. #endif
  816. retval = do_splice_direct(in_file, ppos, out_file, count, fl);
  817. if (retval > 0) {
  818. add_rchar(current, retval);
  819. add_wchar(current, retval);
  820. }
  821. inc_syscr(current);
  822. inc_syscw(current);
  823. if (*ppos > max)
  824. retval = -EOVERFLOW;
  825. fput_out:
  826. fput_light(out_file, fput_needed_out);
  827. fput_in:
  828. fput_light(in_file, fput_needed_in);
  829. out:
  830. return retval;
  831. }
  832. SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
  833. {
  834. loff_t pos;
  835. off_t off;
  836. ssize_t ret;
  837. if (offset) {
  838. if (unlikely(get_user(off, offset)))
  839. return -EFAULT;
  840. pos = off;
  841. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  842. if (unlikely(put_user(pos, offset)))
  843. return -EFAULT;
  844. return ret;
  845. }
  846. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  847. }
  848. SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
  849. {
  850. loff_t pos;
  851. ssize_t ret;
  852. if (offset) {
  853. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  854. return -EFAULT;
  855. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  856. if (unlikely(put_user(pos, offset)))
  857. return -EFAULT;
  858. return ret;
  859. }
  860. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  861. }