read_write.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987
  1. /*
  2. * linux/fs/read_write.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/file.h>
  10. #include <linux/uio.h>
  11. #include <linux/fsnotify.h>
  12. #include <linux/security.h>
  13. #include <linux/export.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/pagemap.h>
  16. #include <linux/splice.h>
  17. #include "read_write.h"
  18. #include <asm/uaccess.h>
  19. #include <asm/unistd.h>
  20. const struct file_operations generic_ro_fops = {
  21. .llseek = generic_file_llseek,
  22. .read = do_sync_read,
  23. .aio_read = generic_file_aio_read,
  24. .mmap = generic_file_readonly_mmap,
  25. .splice_read = generic_file_splice_read,
  26. };
  27. EXPORT_SYMBOL(generic_ro_fops);
  28. static inline int unsigned_offsets(struct file *file)
  29. {
  30. return file->f_mode & FMODE_UNSIGNED_OFFSET;
  31. }
  32. static loff_t lseek_execute(struct file *file, struct inode *inode,
  33. loff_t offset, loff_t maxsize)
  34. {
  35. if (offset < 0 && !unsigned_offsets(file))
  36. return -EINVAL;
  37. if (offset > maxsize)
  38. return -EINVAL;
  39. if (offset != file->f_pos) {
  40. file->f_pos = offset;
  41. file->f_version = 0;
  42. }
  43. return offset;
  44. }
  45. /**
  46. * generic_file_llseek_size - generic llseek implementation for regular files
  47. * @file: file structure to seek on
  48. * @offset: file offset to seek to
  49. * @origin: type of seek
  50. * @size: max size of this file in file system
  51. * @eof: offset used for SEEK_END position
  52. *
  53. * This is a variant of generic_file_llseek that allows passing in a custom
  54. * maximum file size and a custom EOF position, for e.g. hashed directories
  55. *
  56. * Synchronization:
  57. * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  58. * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  59. * read/writes behave like SEEK_SET against seeks.
  60. */
  61. loff_t
  62. generic_file_llseek_size(struct file *file, loff_t offset, int origin,
  63. loff_t maxsize, loff_t eof)
  64. {
  65. struct inode *inode = file->f_mapping->host;
  66. switch (origin) {
  67. case SEEK_END:
  68. offset += eof;
  69. break;
  70. case SEEK_CUR:
  71. /*
  72. * Here we special-case the lseek(fd, 0, SEEK_CUR)
  73. * position-querying operation. Avoid rewriting the "same"
  74. * f_pos value back to the file because a concurrent read(),
  75. * write() or lseek() might have altered it
  76. */
  77. if (offset == 0)
  78. return file->f_pos;
  79. /*
  80. * f_lock protects against read/modify/write race with other
  81. * SEEK_CURs. Note that parallel writes and reads behave
  82. * like SEEK_SET.
  83. */
  84. spin_lock(&file->f_lock);
  85. offset = lseek_execute(file, inode, file->f_pos + offset,
  86. maxsize);
  87. spin_unlock(&file->f_lock);
  88. return offset;
  89. case SEEK_DATA:
  90. /*
  91. * In the generic case the entire file is data, so as long as
  92. * offset isn't at the end of the file then the offset is data.
  93. */
  94. if (offset >= eof)
  95. return -ENXIO;
  96. break;
  97. case SEEK_HOLE:
  98. /*
  99. * There is a virtual hole at the end of the file, so as long as
  100. * offset isn't i_size or larger, return i_size.
  101. */
  102. if (offset >= eof)
  103. return -ENXIO;
  104. offset = eof;
  105. break;
  106. }
  107. return lseek_execute(file, inode, offset, maxsize);
  108. }
  109. EXPORT_SYMBOL(generic_file_llseek_size);
  110. /**
  111. * generic_file_llseek - generic llseek implementation for regular files
  112. * @file: file structure to seek on
  113. * @offset: file offset to seek to
  114. * @origin: type of seek
  115. *
  116. * This is a generic implemenation of ->llseek useable for all normal local
  117. * filesystems. It just updates the file offset to the value specified by
  118. * @offset and @origin under i_mutex.
  119. */
  120. loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
  121. {
  122. struct inode *inode = file->f_mapping->host;
  123. return generic_file_llseek_size(file, offset, origin,
  124. inode->i_sb->s_maxbytes,
  125. i_size_read(inode));
  126. }
  127. EXPORT_SYMBOL(generic_file_llseek);
  128. /**
  129. * noop_llseek - No Operation Performed llseek implementation
  130. * @file: file structure to seek on
  131. * @offset: file offset to seek to
  132. * @origin: type of seek
  133. *
  134. * This is an implementation of ->llseek useable for the rare special case when
  135. * userspace expects the seek to succeed but the (device) file is actually not
  136. * able to perform the seek. In this case you use noop_llseek() instead of
  137. * falling back to the default implementation of ->llseek.
  138. */
  139. loff_t noop_llseek(struct file *file, loff_t offset, int origin)
  140. {
  141. return file->f_pos;
  142. }
  143. EXPORT_SYMBOL(noop_llseek);
  144. loff_t no_llseek(struct file *file, loff_t offset, int origin)
  145. {
  146. return -ESPIPE;
  147. }
  148. EXPORT_SYMBOL(no_llseek);
  149. loff_t default_llseek(struct file *file, loff_t offset, int origin)
  150. {
  151. struct inode *inode = file->f_path.dentry->d_inode;
  152. loff_t retval;
  153. mutex_lock(&inode->i_mutex);
  154. switch (origin) {
  155. case SEEK_END:
  156. offset += i_size_read(inode);
  157. break;
  158. case SEEK_CUR:
  159. if (offset == 0) {
  160. retval = file->f_pos;
  161. goto out;
  162. }
  163. offset += file->f_pos;
  164. break;
  165. case SEEK_DATA:
  166. /*
  167. * In the generic case the entire file is data, so as
  168. * long as offset isn't at the end of the file then the
  169. * offset is data.
  170. */
  171. if (offset >= inode->i_size) {
  172. retval = -ENXIO;
  173. goto out;
  174. }
  175. break;
  176. case SEEK_HOLE:
  177. /*
  178. * There is a virtual hole at the end of the file, so
  179. * as long as offset isn't i_size or larger, return
  180. * i_size.
  181. */
  182. if (offset >= inode->i_size) {
  183. retval = -ENXIO;
  184. goto out;
  185. }
  186. offset = inode->i_size;
  187. break;
  188. }
  189. retval = -EINVAL;
  190. if (offset >= 0 || unsigned_offsets(file)) {
  191. if (offset != file->f_pos) {
  192. file->f_pos = offset;
  193. file->f_version = 0;
  194. }
  195. retval = offset;
  196. }
  197. out:
  198. mutex_unlock(&inode->i_mutex);
  199. return retval;
  200. }
  201. EXPORT_SYMBOL(default_llseek);
  202. loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
  203. {
  204. loff_t (*fn)(struct file *, loff_t, int);
  205. fn = no_llseek;
  206. if (file->f_mode & FMODE_LSEEK) {
  207. if (file->f_op && file->f_op->llseek)
  208. fn = file->f_op->llseek;
  209. }
  210. return fn(file, offset, origin);
  211. }
  212. EXPORT_SYMBOL(vfs_llseek);
  213. SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
  214. {
  215. off_t retval;
  216. struct fd f = fdget(fd);
  217. if (!f.file)
  218. return -EBADF;
  219. retval = -EINVAL;
  220. if (origin <= SEEK_MAX) {
  221. loff_t res = vfs_llseek(f.file, offset, origin);
  222. retval = res;
  223. if (res != (loff_t)retval)
  224. retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
  225. }
  226. fdput(f);
  227. return retval;
  228. }
  229. #ifdef __ARCH_WANT_SYS_LLSEEK
  230. SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  231. unsigned long, offset_low, loff_t __user *, result,
  232. unsigned int, origin)
  233. {
  234. int retval;
  235. struct fd f = fdget(fd);
  236. loff_t offset;
  237. if (!f.file)
  238. return -EBADF;
  239. retval = -EINVAL;
  240. if (origin > SEEK_MAX)
  241. goto out_putf;
  242. offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
  243. origin);
  244. retval = (int)offset;
  245. if (offset >= 0) {
  246. retval = -EFAULT;
  247. if (!copy_to_user(result, &offset, sizeof(offset)))
  248. retval = 0;
  249. }
  250. out_putf:
  251. fdput(f);
  252. return retval;
  253. }
  254. #endif
  255. /*
  256. * rw_verify_area doesn't like huge counts. We limit
  257. * them to something that fits in "int" so that others
  258. * won't have to do range checks all the time.
  259. */
  260. int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
  261. {
  262. struct inode *inode;
  263. loff_t pos;
  264. int retval = -EINVAL;
  265. inode = file->f_path.dentry->d_inode;
  266. if (unlikely((ssize_t) count < 0))
  267. return retval;
  268. pos = *ppos;
  269. if (unlikely(pos < 0)) {
  270. if (!unsigned_offsets(file))
  271. return retval;
  272. if (count >= -pos) /* both values are in 0..LLONG_MAX */
  273. return -EOVERFLOW;
  274. } else if (unlikely((loff_t) (pos + count) < 0)) {
  275. if (!unsigned_offsets(file))
  276. return retval;
  277. }
  278. if (unlikely(inode->i_flock && mandatory_lock(inode))) {
  279. retval = locks_mandatory_area(
  280. read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
  281. inode, file, pos, count);
  282. if (retval < 0)
  283. return retval;
  284. }
  285. retval = security_file_permission(file,
  286. read_write == READ ? MAY_READ : MAY_WRITE);
  287. if (retval)
  288. return retval;
  289. return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
  290. }
  291. static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
  292. {
  293. set_current_state(TASK_UNINTERRUPTIBLE);
  294. if (!kiocbIsKicked(iocb))
  295. schedule();
  296. else
  297. kiocbClearKicked(iocb);
  298. __set_current_state(TASK_RUNNING);
  299. }
  300. ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  301. {
  302. struct iovec iov = { .iov_base = buf, .iov_len = len };
  303. struct kiocb kiocb;
  304. ssize_t ret;
  305. init_sync_kiocb(&kiocb, filp);
  306. kiocb.ki_pos = *ppos;
  307. kiocb.ki_left = len;
  308. kiocb.ki_nbytes = len;
  309. for (;;) {
  310. ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
  311. if (ret != -EIOCBRETRY)
  312. break;
  313. wait_on_retry_sync_kiocb(&kiocb);
  314. }
  315. if (-EIOCBQUEUED == ret)
  316. ret = wait_on_sync_kiocb(&kiocb);
  317. *ppos = kiocb.ki_pos;
  318. return ret;
  319. }
  320. EXPORT_SYMBOL(do_sync_read);
  321. ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  322. {
  323. ssize_t ret;
  324. if (!(file->f_mode & FMODE_READ))
  325. return -EBADF;
  326. if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
  327. return -EINVAL;
  328. if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  329. return -EFAULT;
  330. ret = rw_verify_area(READ, file, pos, count);
  331. if (ret >= 0) {
  332. count = ret;
  333. if (file->f_op->read)
  334. ret = file->f_op->read(file, buf, count, pos);
  335. else
  336. ret = do_sync_read(file, buf, count, pos);
  337. if (ret > 0) {
  338. fsnotify_access(file);
  339. add_rchar(current, ret);
  340. }
  341. inc_syscr(current);
  342. }
  343. return ret;
  344. }
  345. EXPORT_SYMBOL(vfs_read);
  346. ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  347. {
  348. struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  349. struct kiocb kiocb;
  350. ssize_t ret;
  351. init_sync_kiocb(&kiocb, filp);
  352. kiocb.ki_pos = *ppos;
  353. kiocb.ki_left = len;
  354. kiocb.ki_nbytes = len;
  355. for (;;) {
  356. ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
  357. if (ret != -EIOCBRETRY)
  358. break;
  359. wait_on_retry_sync_kiocb(&kiocb);
  360. }
  361. if (-EIOCBQUEUED == ret)
  362. ret = wait_on_sync_kiocb(&kiocb);
  363. *ppos = kiocb.ki_pos;
  364. return ret;
  365. }
  366. EXPORT_SYMBOL(do_sync_write);
  367. ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  368. {
  369. ssize_t ret;
  370. if (!(file->f_mode & FMODE_WRITE))
  371. return -EBADF;
  372. if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
  373. return -EINVAL;
  374. if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  375. return -EFAULT;
  376. ret = rw_verify_area(WRITE, file, pos, count);
  377. if (ret >= 0) {
  378. count = ret;
  379. if (file->f_op->write)
  380. ret = file->f_op->write(file, buf, count, pos);
  381. else
  382. ret = do_sync_write(file, buf, count, pos);
  383. if (ret > 0) {
  384. fsnotify_modify(file);
  385. add_wchar(current, ret);
  386. }
  387. inc_syscw(current);
  388. }
  389. return ret;
  390. }
  391. EXPORT_SYMBOL(vfs_write);
  392. static inline loff_t file_pos_read(struct file *file)
  393. {
  394. return file->f_pos;
  395. }
  396. static inline void file_pos_write(struct file *file, loff_t pos)
  397. {
  398. file->f_pos = pos;
  399. }
  400. SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  401. {
  402. struct fd f = fdget(fd);
  403. ssize_t ret = -EBADF;
  404. if (f.file) {
  405. loff_t pos = file_pos_read(f.file);
  406. ret = vfs_read(f.file, buf, count, &pos);
  407. file_pos_write(f.file, pos);
  408. fdput(f);
  409. }
  410. return ret;
  411. }
  412. SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  413. size_t, count)
  414. {
  415. struct fd f = fdget(fd);
  416. ssize_t ret = -EBADF;
  417. if (f.file) {
  418. loff_t pos = file_pos_read(f.file);
  419. ret = vfs_write(f.file, buf, count, &pos);
  420. file_pos_write(f.file, pos);
  421. fdput(f);
  422. }
  423. return ret;
  424. }
  425. SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
  426. size_t count, loff_t pos)
  427. {
  428. struct fd f;
  429. ssize_t ret = -EBADF;
  430. if (pos < 0)
  431. return -EINVAL;
  432. f = fdget(fd);
  433. if (f.file) {
  434. ret = -ESPIPE;
  435. if (f.file->f_mode & FMODE_PREAD)
  436. ret = vfs_read(f.file, buf, count, &pos);
  437. fdput(f);
  438. }
  439. return ret;
  440. }
  441. #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  442. asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
  443. {
  444. return SYSC_pread64((unsigned int) fd, (char __user *) buf,
  445. (size_t) count, pos);
  446. }
  447. SYSCALL_ALIAS(sys_pread64, SyS_pread64);
  448. #endif
  449. SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
  450. size_t count, loff_t pos)
  451. {
  452. struct fd f;
  453. ssize_t ret = -EBADF;
  454. if (pos < 0)
  455. return -EINVAL;
  456. f = fdget(fd);
  457. if (f.file) {
  458. ret = -ESPIPE;
  459. if (f.file->f_mode & FMODE_PWRITE)
  460. ret = vfs_write(f.file, buf, count, &pos);
  461. fdput(f);
  462. }
  463. return ret;
  464. }
  465. #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  466. asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
  467. {
  468. return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
  469. (size_t) count, pos);
  470. }
  471. SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
  472. #endif
  473. /*
  474. * Reduce an iovec's length in-place. Return the resulting number of segments
  475. */
  476. unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  477. {
  478. unsigned long seg = 0;
  479. size_t len = 0;
  480. while (seg < nr_segs) {
  481. seg++;
  482. if (len + iov->iov_len >= to) {
  483. iov->iov_len = to - len;
  484. break;
  485. }
  486. len += iov->iov_len;
  487. iov++;
  488. }
  489. return seg;
  490. }
  491. EXPORT_SYMBOL(iov_shorten);
  492. ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
  493. unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
  494. {
  495. struct kiocb kiocb;
  496. ssize_t ret;
  497. init_sync_kiocb(&kiocb, filp);
  498. kiocb.ki_pos = *ppos;
  499. kiocb.ki_left = len;
  500. kiocb.ki_nbytes = len;
  501. for (;;) {
  502. ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
  503. if (ret != -EIOCBRETRY)
  504. break;
  505. wait_on_retry_sync_kiocb(&kiocb);
  506. }
  507. if (ret == -EIOCBQUEUED)
  508. ret = wait_on_sync_kiocb(&kiocb);
  509. *ppos = kiocb.ki_pos;
  510. return ret;
  511. }
  512. /* Do it by hand, with file-ops */
  513. ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
  514. unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
  515. {
  516. struct iovec *vector = iov;
  517. ssize_t ret = 0;
  518. while (nr_segs > 0) {
  519. void __user *base;
  520. size_t len;
  521. ssize_t nr;
  522. base = vector->iov_base;
  523. len = vector->iov_len;
  524. vector++;
  525. nr_segs--;
  526. nr = fn(filp, base, len, ppos);
  527. if (nr < 0) {
  528. if (!ret)
  529. ret = nr;
  530. break;
  531. }
  532. ret += nr;
  533. if (nr != len)
  534. break;
  535. }
  536. return ret;
  537. }
  538. /* A write operation does a read from user space and vice versa */
  539. #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
  540. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  541. unsigned long nr_segs, unsigned long fast_segs,
  542. struct iovec *fast_pointer,
  543. struct iovec **ret_pointer)
  544. {
  545. unsigned long seg;
  546. ssize_t ret;
  547. struct iovec *iov = fast_pointer;
  548. /*
  549. * SuS says "The readv() function *may* fail if the iovcnt argument
  550. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  551. * traditionally returned zero for zero segments, so...
  552. */
  553. if (nr_segs == 0) {
  554. ret = 0;
  555. goto out;
  556. }
  557. /*
  558. * First get the "struct iovec" from user memory and
  559. * verify all the pointers
  560. */
  561. if (nr_segs > UIO_MAXIOV) {
  562. ret = -EINVAL;
  563. goto out;
  564. }
  565. if (nr_segs > fast_segs) {
  566. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  567. if (iov == NULL) {
  568. ret = -ENOMEM;
  569. goto out;
  570. }
  571. }
  572. if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  573. ret = -EFAULT;
  574. goto out;
  575. }
  576. /*
  577. * According to the Single Unix Specification we should return EINVAL
  578. * if an element length is < 0 when cast to ssize_t or if the
  579. * total length would overflow the ssize_t return value of the
  580. * system call.
  581. *
  582. * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  583. * overflow case.
  584. */
  585. ret = 0;
  586. for (seg = 0; seg < nr_segs; seg++) {
  587. void __user *buf = iov[seg].iov_base;
  588. ssize_t len = (ssize_t)iov[seg].iov_len;
  589. /* see if we we're about to use an invalid len or if
  590. * it's about to overflow ssize_t */
  591. if (len < 0) {
  592. ret = -EINVAL;
  593. goto out;
  594. }
  595. if (type >= 0
  596. && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  597. ret = -EFAULT;
  598. goto out;
  599. }
  600. if (len > MAX_RW_COUNT - ret) {
  601. len = MAX_RW_COUNT - ret;
  602. iov[seg].iov_len = len;
  603. }
  604. ret += len;
  605. }
  606. out:
  607. *ret_pointer = iov;
  608. return ret;
  609. }
  610. static ssize_t do_readv_writev(int type, struct file *file,
  611. const struct iovec __user * uvector,
  612. unsigned long nr_segs, loff_t *pos)
  613. {
  614. size_t tot_len;
  615. struct iovec iovstack[UIO_FASTIOV];
  616. struct iovec *iov = iovstack;
  617. ssize_t ret;
  618. io_fn_t fn;
  619. iov_fn_t fnv;
  620. if (!file->f_op) {
  621. ret = -EINVAL;
  622. goto out;
  623. }
  624. ret = rw_copy_check_uvector(type, uvector, nr_segs,
  625. ARRAY_SIZE(iovstack), iovstack, &iov);
  626. if (ret <= 0)
  627. goto out;
  628. tot_len = ret;
  629. ret = rw_verify_area(type, file, pos, tot_len);
  630. if (ret < 0)
  631. goto out;
  632. fnv = NULL;
  633. if (type == READ) {
  634. fn = file->f_op->read;
  635. fnv = file->f_op->aio_read;
  636. } else {
  637. fn = (io_fn_t)file->f_op->write;
  638. fnv = file->f_op->aio_write;
  639. }
  640. if (fnv)
  641. ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
  642. pos, fnv);
  643. else
  644. ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
  645. out:
  646. if (iov != iovstack)
  647. kfree(iov);
  648. if ((ret + (type == READ)) > 0) {
  649. if (type == READ)
  650. fsnotify_access(file);
  651. else
  652. fsnotify_modify(file);
  653. }
  654. return ret;
  655. }
  656. ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  657. unsigned long vlen, loff_t *pos)
  658. {
  659. if (!(file->f_mode & FMODE_READ))
  660. return -EBADF;
  661. if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
  662. return -EINVAL;
  663. return do_readv_writev(READ, file, vec, vlen, pos);
  664. }
  665. EXPORT_SYMBOL(vfs_readv);
  666. ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  667. unsigned long vlen, loff_t *pos)
  668. {
  669. if (!(file->f_mode & FMODE_WRITE))
  670. return -EBADF;
  671. if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
  672. return -EINVAL;
  673. return do_readv_writev(WRITE, file, vec, vlen, pos);
  674. }
  675. EXPORT_SYMBOL(vfs_writev);
  676. SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  677. unsigned long, vlen)
  678. {
  679. struct fd f = fdget(fd);
  680. ssize_t ret = -EBADF;
  681. if (f.file) {
  682. loff_t pos = file_pos_read(f.file);
  683. ret = vfs_readv(f.file, vec, vlen, &pos);
  684. file_pos_write(f.file, pos);
  685. fdput(f);
  686. }
  687. if (ret > 0)
  688. add_rchar(current, ret);
  689. inc_syscr(current);
  690. return ret;
  691. }
  692. SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  693. unsigned long, vlen)
  694. {
  695. struct fd f = fdget(fd);
  696. ssize_t ret = -EBADF;
  697. if (f.file) {
  698. loff_t pos = file_pos_read(f.file);
  699. ret = vfs_writev(f.file, vec, vlen, &pos);
  700. file_pos_write(f.file, pos);
  701. fdput(f);
  702. }
  703. if (ret > 0)
  704. add_wchar(current, ret);
  705. inc_syscw(current);
  706. return ret;
  707. }
  708. static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  709. {
  710. #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  711. return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  712. }
  713. SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  714. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  715. {
  716. loff_t pos = pos_from_hilo(pos_h, pos_l);
  717. struct fd f;
  718. ssize_t ret = -EBADF;
  719. if (pos < 0)
  720. return -EINVAL;
  721. f = fdget(fd);
  722. if (f.file) {
  723. ret = -ESPIPE;
  724. if (f.file->f_mode & FMODE_PREAD)
  725. ret = vfs_readv(f.file, vec, vlen, &pos);
  726. fdput(f);
  727. }
  728. if (ret > 0)
  729. add_rchar(current, ret);
  730. inc_syscr(current);
  731. return ret;
  732. }
  733. SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  734. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  735. {
  736. loff_t pos = pos_from_hilo(pos_h, pos_l);
  737. struct fd f;
  738. ssize_t ret = -EBADF;
  739. if (pos < 0)
  740. return -EINVAL;
  741. f = fdget(fd);
  742. if (f.file) {
  743. ret = -ESPIPE;
  744. if (f.file->f_mode & FMODE_PWRITE)
  745. ret = vfs_writev(f.file, vec, vlen, &pos);
  746. fdput(f);
  747. }
  748. if (ret > 0)
  749. add_wchar(current, ret);
  750. inc_syscw(current);
  751. return ret;
  752. }
  753. ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
  754. loff_t max)
  755. {
  756. struct fd in, out;
  757. struct inode *in_inode, *out_inode;
  758. loff_t pos;
  759. ssize_t retval;
  760. int fl;
  761. /*
  762. * Get input file, and verify that it is ok..
  763. */
  764. retval = -EBADF;
  765. in = fdget(in_fd);
  766. if (!in.file)
  767. goto out;
  768. if (!(in.file->f_mode & FMODE_READ))
  769. goto fput_in;
  770. retval = -ESPIPE;
  771. if (!ppos)
  772. ppos = &in.file->f_pos;
  773. else
  774. if (!(in.file->f_mode & FMODE_PREAD))
  775. goto fput_in;
  776. retval = rw_verify_area(READ, in.file, ppos, count);
  777. if (retval < 0)
  778. goto fput_in;
  779. count = retval;
  780. /*
  781. * Get output file, and verify that it is ok..
  782. */
  783. retval = -EBADF;
  784. out = fdget(out_fd);
  785. if (!out.file)
  786. goto fput_in;
  787. if (!(out.file->f_mode & FMODE_WRITE))
  788. goto fput_out;
  789. retval = -EINVAL;
  790. in_inode = in.file->f_path.dentry->d_inode;
  791. out_inode = out.file->f_path.dentry->d_inode;
  792. retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
  793. if (retval < 0)
  794. goto fput_out;
  795. count = retval;
  796. if (!max)
  797. max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  798. pos = *ppos;
  799. if (unlikely(pos + count > max)) {
  800. retval = -EOVERFLOW;
  801. if (pos >= max)
  802. goto fput_out;
  803. count = max - pos;
  804. }
  805. fl = 0;
  806. #if 0
  807. /*
  808. * We need to debate whether we can enable this or not. The
  809. * man page documents EAGAIN return for the output at least,
  810. * and the application is arguably buggy if it doesn't expect
  811. * EAGAIN on a non-blocking file descriptor.
  812. */
  813. if (in.file->f_flags & O_NONBLOCK)
  814. fl = SPLICE_F_NONBLOCK;
  815. #endif
  816. retval = do_splice_direct(in.file, ppos, out.file, count, fl);
  817. if (retval > 0) {
  818. add_rchar(current, retval);
  819. add_wchar(current, retval);
  820. }
  821. inc_syscr(current);
  822. inc_syscw(current);
  823. if (*ppos > max)
  824. retval = -EOVERFLOW;
  825. fput_out:
  826. fdput(out);
  827. fput_in:
  828. fdput(in);
  829. out:
  830. return retval;
  831. }
  832. SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
  833. {
  834. loff_t pos;
  835. off_t off;
  836. ssize_t ret;
  837. if (offset) {
  838. if (unlikely(get_user(off, offset)))
  839. return -EFAULT;
  840. pos = off;
  841. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  842. if (unlikely(put_user(pos, offset)))
  843. return -EFAULT;
  844. return ret;
  845. }
  846. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  847. }
  848. SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
  849. {
  850. loff_t pos;
  851. ssize_t ret;
  852. if (offset) {
  853. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  854. return -EFAULT;
  855. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  856. if (unlikely(put_user(pos, offset)))
  857. return -EFAULT;
  858. return ret;
  859. }
  860. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  861. }