read_write.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. /*
  2. * linux/fs/read_write.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/file.h>
  10. #include <linux/uio.h>
  11. #include <linux/fsnotify.h>
  12. #include <linux/security.h>
  13. #include <linux/export.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/pagemap.h>
  16. #include <linux/splice.h>
  17. #include <linux/compat.h>
  18. #include "read_write.h"
  19. #include <asm/uaccess.h>
  20. #include <asm/unistd.h>
  21. const struct file_operations generic_ro_fops = {
  22. .llseek = generic_file_llseek,
  23. .read = do_sync_read,
  24. .aio_read = generic_file_aio_read,
  25. .mmap = generic_file_readonly_mmap,
  26. .splice_read = generic_file_splice_read,
  27. };
  28. EXPORT_SYMBOL(generic_ro_fops);
  29. static inline int unsigned_offsets(struct file *file)
  30. {
  31. return file->f_mode & FMODE_UNSIGNED_OFFSET;
  32. }
  33. static loff_t lseek_execute(struct file *file, struct inode *inode,
  34. loff_t offset, loff_t maxsize)
  35. {
  36. if (offset < 0 && !unsigned_offsets(file))
  37. return -EINVAL;
  38. if (offset > maxsize)
  39. return -EINVAL;
  40. if (offset != file->f_pos) {
  41. file->f_pos = offset;
  42. file->f_version = 0;
  43. }
  44. return offset;
  45. }
  46. /**
  47. * generic_file_llseek_size - generic llseek implementation for regular files
  48. * @file: file structure to seek on
  49. * @offset: file offset to seek to
  50. * @whence: type of seek
  51. * @size: max size of this file in file system
  52. * @eof: offset used for SEEK_END position
  53. *
  54. * This is a variant of generic_file_llseek that allows passing in a custom
  55. * maximum file size and a custom EOF position, for e.g. hashed directories
  56. *
  57. * Synchronization:
  58. * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  59. * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  60. * read/writes behave like SEEK_SET against seeks.
  61. */
  62. loff_t
  63. generic_file_llseek_size(struct file *file, loff_t offset, int whence,
  64. loff_t maxsize, loff_t eof)
  65. {
  66. struct inode *inode = file->f_mapping->host;
  67. switch (whence) {
  68. case SEEK_END:
  69. offset += eof;
  70. break;
  71. case SEEK_CUR:
  72. /*
  73. * Here we special-case the lseek(fd, 0, SEEK_CUR)
  74. * position-querying operation. Avoid rewriting the "same"
  75. * f_pos value back to the file because a concurrent read(),
  76. * write() or lseek() might have altered it
  77. */
  78. if (offset == 0)
  79. return file->f_pos;
  80. /*
  81. * f_lock protects against read/modify/write race with other
  82. * SEEK_CURs. Note that parallel writes and reads behave
  83. * like SEEK_SET.
  84. */
  85. spin_lock(&file->f_lock);
  86. offset = lseek_execute(file, inode, file->f_pos + offset,
  87. maxsize);
  88. spin_unlock(&file->f_lock);
  89. return offset;
  90. case SEEK_DATA:
  91. /*
  92. * In the generic case the entire file is data, so as long as
  93. * offset isn't at the end of the file then the offset is data.
  94. */
  95. if (offset >= eof)
  96. return -ENXIO;
  97. break;
  98. case SEEK_HOLE:
  99. /*
  100. * There is a virtual hole at the end of the file, so as long as
  101. * offset isn't i_size or larger, return i_size.
  102. */
  103. if (offset >= eof)
  104. return -ENXIO;
  105. offset = eof;
  106. break;
  107. }
  108. return lseek_execute(file, inode, offset, maxsize);
  109. }
  110. EXPORT_SYMBOL(generic_file_llseek_size);
  111. /**
  112. * generic_file_llseek - generic llseek implementation for regular files
  113. * @file: file structure to seek on
  114. * @offset: file offset to seek to
  115. * @whence: type of seek
  116. *
  117. * This is a generic implemenation of ->llseek useable for all normal local
  118. * filesystems. It just updates the file offset to the value specified by
  119. * @offset and @whence under i_mutex.
  120. */
  121. loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
  122. {
  123. struct inode *inode = file->f_mapping->host;
  124. return generic_file_llseek_size(file, offset, whence,
  125. inode->i_sb->s_maxbytes,
  126. i_size_read(inode));
  127. }
  128. EXPORT_SYMBOL(generic_file_llseek);
  129. /**
  130. * noop_llseek - No Operation Performed llseek implementation
  131. * @file: file structure to seek on
  132. * @offset: file offset to seek to
  133. * @whence: type of seek
  134. *
  135. * This is an implementation of ->llseek useable for the rare special case when
  136. * userspace expects the seek to succeed but the (device) file is actually not
  137. * able to perform the seek. In this case you use noop_llseek() instead of
  138. * falling back to the default implementation of ->llseek.
  139. */
  140. loff_t noop_llseek(struct file *file, loff_t offset, int whence)
  141. {
  142. return file->f_pos;
  143. }
  144. EXPORT_SYMBOL(noop_llseek);
  145. loff_t no_llseek(struct file *file, loff_t offset, int whence)
  146. {
  147. return -ESPIPE;
  148. }
  149. EXPORT_SYMBOL(no_llseek);
  150. loff_t default_llseek(struct file *file, loff_t offset, int whence)
  151. {
  152. struct inode *inode = file_inode(file);
  153. loff_t retval;
  154. mutex_lock(&inode->i_mutex);
  155. switch (whence) {
  156. case SEEK_END:
  157. offset += i_size_read(inode);
  158. break;
  159. case SEEK_CUR:
  160. if (offset == 0) {
  161. retval = file->f_pos;
  162. goto out;
  163. }
  164. offset += file->f_pos;
  165. break;
  166. case SEEK_DATA:
  167. /*
  168. * In the generic case the entire file is data, so as
  169. * long as offset isn't at the end of the file then the
  170. * offset is data.
  171. */
  172. if (offset >= inode->i_size) {
  173. retval = -ENXIO;
  174. goto out;
  175. }
  176. break;
  177. case SEEK_HOLE:
  178. /*
  179. * There is a virtual hole at the end of the file, so
  180. * as long as offset isn't i_size or larger, return
  181. * i_size.
  182. */
  183. if (offset >= inode->i_size) {
  184. retval = -ENXIO;
  185. goto out;
  186. }
  187. offset = inode->i_size;
  188. break;
  189. }
  190. retval = -EINVAL;
  191. if (offset >= 0 || unsigned_offsets(file)) {
  192. if (offset != file->f_pos) {
  193. file->f_pos = offset;
  194. file->f_version = 0;
  195. }
  196. retval = offset;
  197. }
  198. out:
  199. mutex_unlock(&inode->i_mutex);
  200. return retval;
  201. }
  202. EXPORT_SYMBOL(default_llseek);
  203. loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
  204. {
  205. loff_t (*fn)(struct file *, loff_t, int);
  206. fn = no_llseek;
  207. if (file->f_mode & FMODE_LSEEK) {
  208. if (file->f_op && file->f_op->llseek)
  209. fn = file->f_op->llseek;
  210. }
  211. return fn(file, offset, whence);
  212. }
  213. EXPORT_SYMBOL(vfs_llseek);
  214. SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
  215. {
  216. off_t retval;
  217. struct fd f = fdget(fd);
  218. if (!f.file)
  219. return -EBADF;
  220. retval = -EINVAL;
  221. if (whence <= SEEK_MAX) {
  222. loff_t res = vfs_llseek(f.file, offset, whence);
  223. retval = res;
  224. if (res != (loff_t)retval)
  225. retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
  226. }
  227. fdput(f);
  228. return retval;
  229. }
  230. #ifdef CONFIG_COMPAT
  231. COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  232. {
  233. return sys_lseek(fd, offset, whence);
  234. }
  235. #endif
  236. #ifdef __ARCH_WANT_SYS_LLSEEK
  237. SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  238. unsigned long, offset_low, loff_t __user *, result,
  239. unsigned int, whence)
  240. {
  241. int retval;
  242. struct fd f = fdget(fd);
  243. loff_t offset;
  244. if (!f.file)
  245. return -EBADF;
  246. retval = -EINVAL;
  247. if (whence > SEEK_MAX)
  248. goto out_putf;
  249. offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
  250. whence);
  251. retval = (int)offset;
  252. if (offset >= 0) {
  253. retval = -EFAULT;
  254. if (!copy_to_user(result, &offset, sizeof(offset)))
  255. retval = 0;
  256. }
  257. out_putf:
  258. fdput(f);
  259. return retval;
  260. }
  261. #endif
  262. /*
  263. * rw_verify_area doesn't like huge counts. We limit
  264. * them to something that fits in "int" so that others
  265. * won't have to do range checks all the time.
  266. */
  267. int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
  268. {
  269. struct inode *inode;
  270. loff_t pos;
  271. int retval = -EINVAL;
  272. inode = file_inode(file);
  273. if (unlikely((ssize_t) count < 0))
  274. return retval;
  275. pos = *ppos;
  276. if (unlikely(pos < 0)) {
  277. if (!unsigned_offsets(file))
  278. return retval;
  279. if (count >= -pos) /* both values are in 0..LLONG_MAX */
  280. return -EOVERFLOW;
  281. } else if (unlikely((loff_t) (pos + count) < 0)) {
  282. if (!unsigned_offsets(file))
  283. return retval;
  284. }
  285. if (unlikely(inode->i_flock && mandatory_lock(inode))) {
  286. retval = locks_mandatory_area(
  287. read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
  288. inode, file, pos, count);
  289. if (retval < 0)
  290. return retval;
  291. }
  292. retval = security_file_permission(file,
  293. read_write == READ ? MAY_READ : MAY_WRITE);
  294. if (retval)
  295. return retval;
  296. return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
  297. }
  298. static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
  299. {
  300. set_current_state(TASK_UNINTERRUPTIBLE);
  301. if (!kiocbIsKicked(iocb))
  302. schedule();
  303. else
  304. kiocbClearKicked(iocb);
  305. __set_current_state(TASK_RUNNING);
  306. }
  307. ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  308. {
  309. struct iovec iov = { .iov_base = buf, .iov_len = len };
  310. struct kiocb kiocb;
  311. ssize_t ret;
  312. init_sync_kiocb(&kiocb, filp);
  313. kiocb.ki_pos = *ppos;
  314. kiocb.ki_left = len;
  315. kiocb.ki_nbytes = len;
  316. for (;;) {
  317. ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
  318. if (ret != -EIOCBRETRY)
  319. break;
  320. wait_on_retry_sync_kiocb(&kiocb);
  321. }
  322. if (-EIOCBQUEUED == ret)
  323. ret = wait_on_sync_kiocb(&kiocb);
  324. *ppos = kiocb.ki_pos;
  325. return ret;
  326. }
  327. EXPORT_SYMBOL(do_sync_read);
  328. ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  329. {
  330. ssize_t ret;
  331. if (!(file->f_mode & FMODE_READ))
  332. return -EBADF;
  333. if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
  334. return -EINVAL;
  335. if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  336. return -EFAULT;
  337. ret = rw_verify_area(READ, file, pos, count);
  338. if (ret >= 0) {
  339. count = ret;
  340. if (file->f_op->read)
  341. ret = file->f_op->read(file, buf, count, pos);
  342. else
  343. ret = do_sync_read(file, buf, count, pos);
  344. if (ret > 0) {
  345. fsnotify_access(file);
  346. add_rchar(current, ret);
  347. }
  348. inc_syscr(current);
  349. }
  350. return ret;
  351. }
  352. EXPORT_SYMBOL(vfs_read);
  353. ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  354. {
  355. struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  356. struct kiocb kiocb;
  357. ssize_t ret;
  358. init_sync_kiocb(&kiocb, filp);
  359. kiocb.ki_pos = *ppos;
  360. kiocb.ki_left = len;
  361. kiocb.ki_nbytes = len;
  362. for (;;) {
  363. ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
  364. if (ret != -EIOCBRETRY)
  365. break;
  366. wait_on_retry_sync_kiocb(&kiocb);
  367. }
  368. if (-EIOCBQUEUED == ret)
  369. ret = wait_on_sync_kiocb(&kiocb);
  370. *ppos = kiocb.ki_pos;
  371. return ret;
  372. }
  373. EXPORT_SYMBOL(do_sync_write);
  374. ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  375. {
  376. ssize_t ret;
  377. if (!(file->f_mode & FMODE_WRITE))
  378. return -EBADF;
  379. if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
  380. return -EINVAL;
  381. if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  382. return -EFAULT;
  383. ret = rw_verify_area(WRITE, file, pos, count);
  384. if (ret >= 0) {
  385. count = ret;
  386. if (file->f_op->write)
  387. ret = file->f_op->write(file, buf, count, pos);
  388. else
  389. ret = do_sync_write(file, buf, count, pos);
  390. if (ret > 0) {
  391. fsnotify_modify(file);
  392. add_wchar(current, ret);
  393. }
  394. inc_syscw(current);
  395. }
  396. return ret;
  397. }
  398. EXPORT_SYMBOL(vfs_write);
  399. static inline loff_t file_pos_read(struct file *file)
  400. {
  401. return file->f_pos;
  402. }
  403. static inline void file_pos_write(struct file *file, loff_t pos)
  404. {
  405. file->f_pos = pos;
  406. }
  407. SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  408. {
  409. struct fd f = fdget(fd);
  410. ssize_t ret = -EBADF;
  411. if (f.file) {
  412. loff_t pos = file_pos_read(f.file);
  413. ret = vfs_read(f.file, buf, count, &pos);
  414. file_pos_write(f.file, pos);
  415. fdput(f);
  416. }
  417. return ret;
  418. }
  419. SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  420. size_t, count)
  421. {
  422. struct fd f = fdget(fd);
  423. ssize_t ret = -EBADF;
  424. if (f.file) {
  425. loff_t pos = file_pos_read(f.file);
  426. ret = vfs_write(f.file, buf, count, &pos);
  427. file_pos_write(f.file, pos);
  428. fdput(f);
  429. }
  430. return ret;
  431. }
  432. SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  433. size_t, count, loff_t, pos)
  434. {
  435. struct fd f;
  436. ssize_t ret = -EBADF;
  437. if (pos < 0)
  438. return -EINVAL;
  439. f = fdget(fd);
  440. if (f.file) {
  441. ret = -ESPIPE;
  442. if (f.file->f_mode & FMODE_PREAD)
  443. ret = vfs_read(f.file, buf, count, &pos);
  444. fdput(f);
  445. }
  446. return ret;
  447. }
  448. SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  449. size_t, count, loff_t, pos)
  450. {
  451. struct fd f;
  452. ssize_t ret = -EBADF;
  453. if (pos < 0)
  454. return -EINVAL;
  455. f = fdget(fd);
  456. if (f.file) {
  457. ret = -ESPIPE;
  458. if (f.file->f_mode & FMODE_PWRITE)
  459. ret = vfs_write(f.file, buf, count, &pos);
  460. fdput(f);
  461. }
  462. return ret;
  463. }
  464. /*
  465. * Reduce an iovec's length in-place. Return the resulting number of segments
  466. */
  467. unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  468. {
  469. unsigned long seg = 0;
  470. size_t len = 0;
  471. while (seg < nr_segs) {
  472. seg++;
  473. if (len + iov->iov_len >= to) {
  474. iov->iov_len = to - len;
  475. break;
  476. }
  477. len += iov->iov_len;
  478. iov++;
  479. }
  480. return seg;
  481. }
  482. EXPORT_SYMBOL(iov_shorten);
  483. ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
  484. unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
  485. {
  486. struct kiocb kiocb;
  487. ssize_t ret;
  488. init_sync_kiocb(&kiocb, filp);
  489. kiocb.ki_pos = *ppos;
  490. kiocb.ki_left = len;
  491. kiocb.ki_nbytes = len;
  492. for (;;) {
  493. ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
  494. if (ret != -EIOCBRETRY)
  495. break;
  496. wait_on_retry_sync_kiocb(&kiocb);
  497. }
  498. if (ret == -EIOCBQUEUED)
  499. ret = wait_on_sync_kiocb(&kiocb);
  500. *ppos = kiocb.ki_pos;
  501. return ret;
  502. }
  503. /* Do it by hand, with file-ops */
  504. ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
  505. unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
  506. {
  507. struct iovec *vector = iov;
  508. ssize_t ret = 0;
  509. while (nr_segs > 0) {
  510. void __user *base;
  511. size_t len;
  512. ssize_t nr;
  513. base = vector->iov_base;
  514. len = vector->iov_len;
  515. vector++;
  516. nr_segs--;
  517. nr = fn(filp, base, len, ppos);
  518. if (nr < 0) {
  519. if (!ret)
  520. ret = nr;
  521. break;
  522. }
  523. ret += nr;
  524. if (nr != len)
  525. break;
  526. }
  527. return ret;
  528. }
  529. /* A write operation does a read from user space and vice versa */
  530. #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
  531. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  532. unsigned long nr_segs, unsigned long fast_segs,
  533. struct iovec *fast_pointer,
  534. struct iovec **ret_pointer)
  535. {
  536. unsigned long seg;
  537. ssize_t ret;
  538. struct iovec *iov = fast_pointer;
  539. /*
  540. * SuS says "The readv() function *may* fail if the iovcnt argument
  541. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  542. * traditionally returned zero for zero segments, so...
  543. */
  544. if (nr_segs == 0) {
  545. ret = 0;
  546. goto out;
  547. }
  548. /*
  549. * First get the "struct iovec" from user memory and
  550. * verify all the pointers
  551. */
  552. if (nr_segs > UIO_MAXIOV) {
  553. ret = -EINVAL;
  554. goto out;
  555. }
  556. if (nr_segs > fast_segs) {
  557. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  558. if (iov == NULL) {
  559. ret = -ENOMEM;
  560. goto out;
  561. }
  562. }
  563. if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  564. ret = -EFAULT;
  565. goto out;
  566. }
  567. /*
  568. * According to the Single Unix Specification we should return EINVAL
  569. * if an element length is < 0 when cast to ssize_t or if the
  570. * total length would overflow the ssize_t return value of the
  571. * system call.
  572. *
  573. * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  574. * overflow case.
  575. */
  576. ret = 0;
  577. for (seg = 0; seg < nr_segs; seg++) {
  578. void __user *buf = iov[seg].iov_base;
  579. ssize_t len = (ssize_t)iov[seg].iov_len;
  580. /* see if we we're about to use an invalid len or if
  581. * it's about to overflow ssize_t */
  582. if (len < 0) {
  583. ret = -EINVAL;
  584. goto out;
  585. }
  586. if (type >= 0
  587. && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  588. ret = -EFAULT;
  589. goto out;
  590. }
  591. if (len > MAX_RW_COUNT - ret) {
  592. len = MAX_RW_COUNT - ret;
  593. iov[seg].iov_len = len;
  594. }
  595. ret += len;
  596. }
  597. out:
  598. *ret_pointer = iov;
  599. return ret;
  600. }
  601. static ssize_t do_readv_writev(int type, struct file *file,
  602. const struct iovec __user * uvector,
  603. unsigned long nr_segs, loff_t *pos)
  604. {
  605. size_t tot_len;
  606. struct iovec iovstack[UIO_FASTIOV];
  607. struct iovec *iov = iovstack;
  608. ssize_t ret;
  609. io_fn_t fn;
  610. iov_fn_t fnv;
  611. if (!file->f_op) {
  612. ret = -EINVAL;
  613. goto out;
  614. }
  615. ret = rw_copy_check_uvector(type, uvector, nr_segs,
  616. ARRAY_SIZE(iovstack), iovstack, &iov);
  617. if (ret <= 0)
  618. goto out;
  619. tot_len = ret;
  620. ret = rw_verify_area(type, file, pos, tot_len);
  621. if (ret < 0)
  622. goto out;
  623. fnv = NULL;
  624. if (type == READ) {
  625. fn = file->f_op->read;
  626. fnv = file->f_op->aio_read;
  627. } else {
  628. fn = (io_fn_t)file->f_op->write;
  629. fnv = file->f_op->aio_write;
  630. }
  631. if (fnv)
  632. ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
  633. pos, fnv);
  634. else
  635. ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
  636. out:
  637. if (iov != iovstack)
  638. kfree(iov);
  639. if ((ret + (type == READ)) > 0) {
  640. if (type == READ)
  641. fsnotify_access(file);
  642. else
  643. fsnotify_modify(file);
  644. }
  645. return ret;
  646. }
  647. ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  648. unsigned long vlen, loff_t *pos)
  649. {
  650. if (!(file->f_mode & FMODE_READ))
  651. return -EBADF;
  652. if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
  653. return -EINVAL;
  654. return do_readv_writev(READ, file, vec, vlen, pos);
  655. }
  656. EXPORT_SYMBOL(vfs_readv);
  657. ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  658. unsigned long vlen, loff_t *pos)
  659. {
  660. if (!(file->f_mode & FMODE_WRITE))
  661. return -EBADF;
  662. if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
  663. return -EINVAL;
  664. return do_readv_writev(WRITE, file, vec, vlen, pos);
  665. }
  666. EXPORT_SYMBOL(vfs_writev);
  667. SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  668. unsigned long, vlen)
  669. {
  670. struct fd f = fdget(fd);
  671. ssize_t ret = -EBADF;
  672. if (f.file) {
  673. loff_t pos = file_pos_read(f.file);
  674. ret = vfs_readv(f.file, vec, vlen, &pos);
  675. file_pos_write(f.file, pos);
  676. fdput(f);
  677. }
  678. if (ret > 0)
  679. add_rchar(current, ret);
  680. inc_syscr(current);
  681. return ret;
  682. }
  683. SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  684. unsigned long, vlen)
  685. {
  686. struct fd f = fdget(fd);
  687. ssize_t ret = -EBADF;
  688. if (f.file) {
  689. loff_t pos = file_pos_read(f.file);
  690. ret = vfs_writev(f.file, vec, vlen, &pos);
  691. file_pos_write(f.file, pos);
  692. fdput(f);
  693. }
  694. if (ret > 0)
  695. add_wchar(current, ret);
  696. inc_syscw(current);
  697. return ret;
  698. }
  699. static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  700. {
  701. #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  702. return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  703. }
  704. SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  705. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  706. {
  707. loff_t pos = pos_from_hilo(pos_h, pos_l);
  708. struct fd f;
  709. ssize_t ret = -EBADF;
  710. if (pos < 0)
  711. return -EINVAL;
  712. f = fdget(fd);
  713. if (f.file) {
  714. ret = -ESPIPE;
  715. if (f.file->f_mode & FMODE_PREAD)
  716. ret = vfs_readv(f.file, vec, vlen, &pos);
  717. fdput(f);
  718. }
  719. if (ret > 0)
  720. add_rchar(current, ret);
  721. inc_syscr(current);
  722. return ret;
  723. }
  724. SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  725. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  726. {
  727. loff_t pos = pos_from_hilo(pos_h, pos_l);
  728. struct fd f;
  729. ssize_t ret = -EBADF;
  730. if (pos < 0)
  731. return -EINVAL;
  732. f = fdget(fd);
  733. if (f.file) {
  734. ret = -ESPIPE;
  735. if (f.file->f_mode & FMODE_PWRITE)
  736. ret = vfs_writev(f.file, vec, vlen, &pos);
  737. fdput(f);
  738. }
  739. if (ret > 0)
  740. add_wchar(current, ret);
  741. inc_syscw(current);
  742. return ret;
  743. }
  744. static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  745. size_t count, loff_t max)
  746. {
  747. struct fd in, out;
  748. struct inode *in_inode, *out_inode;
  749. loff_t pos;
  750. ssize_t retval;
  751. int fl;
  752. /*
  753. * Get input file, and verify that it is ok..
  754. */
  755. retval = -EBADF;
  756. in = fdget(in_fd);
  757. if (!in.file)
  758. goto out;
  759. if (!(in.file->f_mode & FMODE_READ))
  760. goto fput_in;
  761. retval = -ESPIPE;
  762. if (!ppos)
  763. ppos = &in.file->f_pos;
  764. else
  765. if (!(in.file->f_mode & FMODE_PREAD))
  766. goto fput_in;
  767. retval = rw_verify_area(READ, in.file, ppos, count);
  768. if (retval < 0)
  769. goto fput_in;
  770. count = retval;
  771. /*
  772. * Get output file, and verify that it is ok..
  773. */
  774. retval = -EBADF;
  775. out = fdget(out_fd);
  776. if (!out.file)
  777. goto fput_in;
  778. if (!(out.file->f_mode & FMODE_WRITE))
  779. goto fput_out;
  780. retval = -EINVAL;
  781. in_inode = file_inode(in.file);
  782. out_inode = file_inode(out.file);
  783. retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
  784. if (retval < 0)
  785. goto fput_out;
  786. count = retval;
  787. if (!max)
  788. max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  789. pos = *ppos;
  790. if (unlikely(pos + count > max)) {
  791. retval = -EOVERFLOW;
  792. if (pos >= max)
  793. goto fput_out;
  794. count = max - pos;
  795. }
  796. fl = 0;
  797. #if 0
  798. /*
  799. * We need to debate whether we can enable this or not. The
  800. * man page documents EAGAIN return for the output at least,
  801. * and the application is arguably buggy if it doesn't expect
  802. * EAGAIN on a non-blocking file descriptor.
  803. */
  804. if (in.file->f_flags & O_NONBLOCK)
  805. fl = SPLICE_F_NONBLOCK;
  806. #endif
  807. retval = do_splice_direct(in.file, ppos, out.file, count, fl);
  808. if (retval > 0) {
  809. add_rchar(current, retval);
  810. add_wchar(current, retval);
  811. fsnotify_access(in.file);
  812. fsnotify_modify(out.file);
  813. }
  814. inc_syscr(current);
  815. inc_syscw(current);
  816. if (*ppos > max)
  817. retval = -EOVERFLOW;
  818. fput_out:
  819. fdput(out);
  820. fput_in:
  821. fdput(in);
  822. out:
  823. return retval;
  824. }
  825. SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
  826. {
  827. loff_t pos;
  828. off_t off;
  829. ssize_t ret;
  830. if (offset) {
  831. if (unlikely(get_user(off, offset)))
  832. return -EFAULT;
  833. pos = off;
  834. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  835. if (unlikely(put_user(pos, offset)))
  836. return -EFAULT;
  837. return ret;
  838. }
  839. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  840. }
  841. SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
  842. {
  843. loff_t pos;
  844. ssize_t ret;
  845. if (offset) {
  846. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  847. return -EFAULT;
  848. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  849. if (unlikely(put_user(pos, offset)))
  850. return -EFAULT;
  851. return ret;
  852. }
  853. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  854. }
  855. #ifdef CONFIG_COMPAT
  856. COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  857. compat_off_t __user *, offset, compat_size_t, count)
  858. {
  859. loff_t pos;
  860. off_t off;
  861. ssize_t ret;
  862. if (offset) {
  863. if (unlikely(get_user(off, offset)))
  864. return -EFAULT;
  865. pos = off;
  866. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  867. if (unlikely(put_user(pos, offset)))
  868. return -EFAULT;
  869. return ret;
  870. }
  871. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  872. }
  873. COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  874. compat_loff_t __user *, offset, compat_size_t, count)
  875. {
  876. loff_t pos;
  877. ssize_t ret;
  878. if (offset) {
  879. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  880. return -EFAULT;
  881. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  882. if (unlikely(put_user(pos, offset)))
  883. return -EFAULT;
  884. return ret;
  885. }
  886. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  887. }
  888. #endif