eventfd.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /*
  2. * fs/eventfd.c
  3. *
  4. * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
  5. *
  6. */
  7. #include <linux/file.h>
  8. #include <linux/poll.h>
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/sched.h>
  12. #include <linux/kernel.h>
  13. #include <linux/list.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/anon_inodes.h>
  16. #include <linux/eventfd.h>
  17. #include <linux/syscalls.h>
  18. #include <linux/module.h>
  19. struct eventfd_ctx {
  20. wait_queue_head_t wqh;
  21. /*
  22. * Every time that a write(2) is performed on an eventfd, the
  23. * value of the __u64 being written is added to "count" and a
  24. * wakeup is performed on "wqh". A read(2) will return the "count"
  25. * value to userspace, and will reset "count" to zero. The kernel
  26. * size eventfd_signal() also, adds to the "count" counter and
  27. * issue a wakeup.
  28. */
  29. __u64 count;
  30. unsigned int flags;
  31. };
  32. /*
  33. * Adds "n" to the eventfd counter "count". Returns "n" in case of
  34. * success, or a value lower then "n" in case of coutner overflow.
  35. * This function is supposed to be called by the kernel in paths
  36. * that do not allow sleeping. In this function we allow the counter
  37. * to reach the ULLONG_MAX value, and we signal this as overflow
  38. * condition by returining a POLLERR to poll(2).
  39. */
  40. int eventfd_signal(struct file *file, int n)
  41. {
  42. struct eventfd_ctx *ctx = file->private_data;
  43. unsigned long flags;
  44. if (n < 0)
  45. return -EINVAL;
  46. spin_lock_irqsave(&ctx->wqh.lock, flags);
  47. if (ULLONG_MAX - ctx->count < n)
  48. n = (int) (ULLONG_MAX - ctx->count);
  49. ctx->count += n;
  50. if (waitqueue_active(&ctx->wqh))
  51. wake_up_locked_poll(&ctx->wqh, POLLIN);
  52. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  53. return n;
  54. }
  55. EXPORT_SYMBOL_GPL(eventfd_signal);
  56. static int eventfd_release(struct inode *inode, struct file *file)
  57. {
  58. kfree(file->private_data);
  59. return 0;
  60. }
  61. static unsigned int eventfd_poll(struct file *file, poll_table *wait)
  62. {
  63. struct eventfd_ctx *ctx = file->private_data;
  64. unsigned int events = 0;
  65. unsigned long flags;
  66. poll_wait(file, &ctx->wqh, wait);
  67. spin_lock_irqsave(&ctx->wqh.lock, flags);
  68. if (ctx->count > 0)
  69. events |= POLLIN;
  70. if (ctx->count == ULLONG_MAX)
  71. events |= POLLERR;
  72. if (ULLONG_MAX - 1 > ctx->count)
  73. events |= POLLOUT;
  74. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  75. return events;
  76. }
  77. static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
  78. loff_t *ppos)
  79. {
  80. struct eventfd_ctx *ctx = file->private_data;
  81. ssize_t res;
  82. __u64 ucnt = 0;
  83. DECLARE_WAITQUEUE(wait, current);
  84. if (count < sizeof(ucnt))
  85. return -EINVAL;
  86. spin_lock_irq(&ctx->wqh.lock);
  87. res = -EAGAIN;
  88. if (ctx->count > 0)
  89. res = sizeof(ucnt);
  90. else if (!(file->f_flags & O_NONBLOCK)) {
  91. __add_wait_queue(&ctx->wqh, &wait);
  92. for (res = 0;;) {
  93. set_current_state(TASK_INTERRUPTIBLE);
  94. if (ctx->count > 0) {
  95. res = sizeof(ucnt);
  96. break;
  97. }
  98. if (signal_pending(current)) {
  99. res = -ERESTARTSYS;
  100. break;
  101. }
  102. spin_unlock_irq(&ctx->wqh.lock);
  103. schedule();
  104. spin_lock_irq(&ctx->wqh.lock);
  105. }
  106. __remove_wait_queue(&ctx->wqh, &wait);
  107. __set_current_state(TASK_RUNNING);
  108. }
  109. if (likely(res > 0)) {
  110. ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
  111. ctx->count -= ucnt;
  112. if (waitqueue_active(&ctx->wqh))
  113. wake_up_locked_poll(&ctx->wqh, POLLOUT);
  114. }
  115. spin_unlock_irq(&ctx->wqh.lock);
  116. if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
  117. return -EFAULT;
  118. return res;
  119. }
  120. static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
  121. loff_t *ppos)
  122. {
  123. struct eventfd_ctx *ctx = file->private_data;
  124. ssize_t res;
  125. __u64 ucnt;
  126. DECLARE_WAITQUEUE(wait, current);
  127. if (count < sizeof(ucnt))
  128. return -EINVAL;
  129. if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
  130. return -EFAULT;
  131. if (ucnt == ULLONG_MAX)
  132. return -EINVAL;
  133. spin_lock_irq(&ctx->wqh.lock);
  134. res = -EAGAIN;
  135. if (ULLONG_MAX - ctx->count > ucnt)
  136. res = sizeof(ucnt);
  137. else if (!(file->f_flags & O_NONBLOCK)) {
  138. __add_wait_queue(&ctx->wqh, &wait);
  139. for (res = 0;;) {
  140. set_current_state(TASK_INTERRUPTIBLE);
  141. if (ULLONG_MAX - ctx->count > ucnt) {
  142. res = sizeof(ucnt);
  143. break;
  144. }
  145. if (signal_pending(current)) {
  146. res = -ERESTARTSYS;
  147. break;
  148. }
  149. spin_unlock_irq(&ctx->wqh.lock);
  150. schedule();
  151. spin_lock_irq(&ctx->wqh.lock);
  152. }
  153. __remove_wait_queue(&ctx->wqh, &wait);
  154. __set_current_state(TASK_RUNNING);
  155. }
  156. if (likely(res > 0)) {
  157. ctx->count += ucnt;
  158. if (waitqueue_active(&ctx->wqh))
  159. wake_up_locked_poll(&ctx->wqh, POLLIN);
  160. }
  161. spin_unlock_irq(&ctx->wqh.lock);
  162. return res;
  163. }
  164. static const struct file_operations eventfd_fops = {
  165. .release = eventfd_release,
  166. .poll = eventfd_poll,
  167. .read = eventfd_read,
  168. .write = eventfd_write,
  169. };
  170. struct file *eventfd_fget(int fd)
  171. {
  172. struct file *file;
  173. file = fget(fd);
  174. if (!file)
  175. return ERR_PTR(-EBADF);
  176. if (file->f_op != &eventfd_fops) {
  177. fput(file);
  178. return ERR_PTR(-EINVAL);
  179. }
  180. return file;
  181. }
  182. EXPORT_SYMBOL_GPL(eventfd_fget);
  183. SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
  184. {
  185. int fd;
  186. struct eventfd_ctx *ctx;
  187. /* Check the EFD_* constants for consistency. */
  188. BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
  189. BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
  190. if (flags & ~EFD_FLAGS_SET)
  191. return -EINVAL;
  192. ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  193. if (!ctx)
  194. return -ENOMEM;
  195. init_waitqueue_head(&ctx->wqh);
  196. ctx->count = count;
  197. ctx->flags = flags;
  198. /*
  199. * When we call this, the initialization must be complete, since
  200. * anon_inode_getfd() will install the fd.
  201. */
  202. fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
  203. flags & EFD_SHARED_FCNTL_FLAGS);
  204. if (fd < 0)
  205. kfree(ctx);
  206. return fd;
  207. }
  208. SYSCALL_DEFINE1(eventfd, unsigned int, count)
  209. {
  210. return sys_eventfd2(count, 0);
  211. }