eventfd.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * fs/eventfd.c
  3. *
  4. * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
  5. *
  6. */
  7. #include <linux/file.h>
  8. #include <linux/poll.h>
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/sched.h>
  12. #include <linux/kernel.h>
  13. #include <linux/list.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/anon_inodes.h>
  16. #include <linux/eventfd.h>
  17. #include <linux/syscalls.h>
  18. struct eventfd_ctx {
  19. wait_queue_head_t wqh;
  20. /*
  21. * Every time that a write(2) is performed on an eventfd, the
  22. * value of the __u64 being written is added to "count" and a
  23. * wakeup is performed on "wqh". A read(2) will return the "count"
  24. * value to userspace, and will reset "count" to zero. The kernel
  25. * size eventfd_signal() also, adds to the "count" counter and
  26. * issue a wakeup.
  27. */
  28. __u64 count;
  29. unsigned int flags;
  30. };
  31. /*
  32. * Adds "n" to the eventfd counter "count". Returns "n" in case of
  33. * success, or a value lower then "n" in case of coutner overflow.
  34. * This function is supposed to be called by the kernel in paths
  35. * that do not allow sleeping. In this function we allow the counter
  36. * to reach the ULLONG_MAX value, and we signal this as overflow
  37. * condition by returining a POLLERR to poll(2).
  38. */
  39. int eventfd_signal(struct file *file, int n)
  40. {
  41. struct eventfd_ctx *ctx = file->private_data;
  42. unsigned long flags;
  43. if (n < 0)
  44. return -EINVAL;
  45. spin_lock_irqsave(&ctx->wqh.lock, flags);
  46. if (ULLONG_MAX - ctx->count < n)
  47. n = (int) (ULLONG_MAX - ctx->count);
  48. ctx->count += n;
  49. if (waitqueue_active(&ctx->wqh))
  50. wake_up_locked_poll(&ctx->wqh, POLLIN);
  51. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  52. return n;
  53. }
  54. static int eventfd_release(struct inode *inode, struct file *file)
  55. {
  56. kfree(file->private_data);
  57. return 0;
  58. }
  59. static unsigned int eventfd_poll(struct file *file, poll_table *wait)
  60. {
  61. struct eventfd_ctx *ctx = file->private_data;
  62. unsigned int events = 0;
  63. unsigned long flags;
  64. poll_wait(file, &ctx->wqh, wait);
  65. spin_lock_irqsave(&ctx->wqh.lock, flags);
  66. if (ctx->count > 0)
  67. events |= POLLIN;
  68. if (ctx->count == ULLONG_MAX)
  69. events |= POLLERR;
  70. if (ULLONG_MAX - 1 > ctx->count)
  71. events |= POLLOUT;
  72. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  73. return events;
  74. }
  75. static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
  76. loff_t *ppos)
  77. {
  78. struct eventfd_ctx *ctx = file->private_data;
  79. ssize_t res;
  80. __u64 ucnt = 0;
  81. DECLARE_WAITQUEUE(wait, current);
  82. if (count < sizeof(ucnt))
  83. return -EINVAL;
  84. spin_lock_irq(&ctx->wqh.lock);
  85. res = -EAGAIN;
  86. if (ctx->count > 0)
  87. res = sizeof(ucnt);
  88. else if (!(file->f_flags & O_NONBLOCK)) {
  89. __add_wait_queue(&ctx->wqh, &wait);
  90. for (res = 0;;) {
  91. set_current_state(TASK_INTERRUPTIBLE);
  92. if (ctx->count > 0) {
  93. res = sizeof(ucnt);
  94. break;
  95. }
  96. if (signal_pending(current)) {
  97. res = -ERESTARTSYS;
  98. break;
  99. }
  100. spin_unlock_irq(&ctx->wqh.lock);
  101. schedule();
  102. spin_lock_irq(&ctx->wqh.lock);
  103. }
  104. __remove_wait_queue(&ctx->wqh, &wait);
  105. __set_current_state(TASK_RUNNING);
  106. }
  107. if (likely(res > 0)) {
  108. ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
  109. ctx->count -= ucnt;
  110. if (waitqueue_active(&ctx->wqh))
  111. wake_up_locked_poll(&ctx->wqh, POLLOUT);
  112. }
  113. spin_unlock_irq(&ctx->wqh.lock);
  114. if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
  115. return -EFAULT;
  116. return res;
  117. }
  118. static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
  119. loff_t *ppos)
  120. {
  121. struct eventfd_ctx *ctx = file->private_data;
  122. ssize_t res;
  123. __u64 ucnt;
  124. DECLARE_WAITQUEUE(wait, current);
  125. if (count < sizeof(ucnt))
  126. return -EINVAL;
  127. if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
  128. return -EFAULT;
  129. if (ucnt == ULLONG_MAX)
  130. return -EINVAL;
  131. spin_lock_irq(&ctx->wqh.lock);
  132. res = -EAGAIN;
  133. if (ULLONG_MAX - ctx->count > ucnt)
  134. res = sizeof(ucnt);
  135. else if (!(file->f_flags & O_NONBLOCK)) {
  136. __add_wait_queue(&ctx->wqh, &wait);
  137. for (res = 0;;) {
  138. set_current_state(TASK_INTERRUPTIBLE);
  139. if (ULLONG_MAX - ctx->count > ucnt) {
  140. res = sizeof(ucnt);
  141. break;
  142. }
  143. if (signal_pending(current)) {
  144. res = -ERESTARTSYS;
  145. break;
  146. }
  147. spin_unlock_irq(&ctx->wqh.lock);
  148. schedule();
  149. spin_lock_irq(&ctx->wqh.lock);
  150. }
  151. __remove_wait_queue(&ctx->wqh, &wait);
  152. __set_current_state(TASK_RUNNING);
  153. }
  154. if (likely(res > 0)) {
  155. ctx->count += ucnt;
  156. if (waitqueue_active(&ctx->wqh))
  157. wake_up_locked_poll(&ctx->wqh, POLLIN);
  158. }
  159. spin_unlock_irq(&ctx->wqh.lock);
  160. return res;
  161. }
  162. static const struct file_operations eventfd_fops = {
  163. .release = eventfd_release,
  164. .poll = eventfd_poll,
  165. .read = eventfd_read,
  166. .write = eventfd_write,
  167. };
  168. struct file *eventfd_fget(int fd)
  169. {
  170. struct file *file;
  171. file = fget(fd);
  172. if (!file)
  173. return ERR_PTR(-EBADF);
  174. if (file->f_op != &eventfd_fops) {
  175. fput(file);
  176. return ERR_PTR(-EINVAL);
  177. }
  178. return file;
  179. }
  180. SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
  181. {
  182. int fd;
  183. struct eventfd_ctx *ctx;
  184. /* Check the EFD_* constants for consistency. */
  185. BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
  186. BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
  187. if (flags & ~EFD_FLAGS_SET)
  188. return -EINVAL;
  189. ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  190. if (!ctx)
  191. return -ENOMEM;
  192. init_waitqueue_head(&ctx->wqh);
  193. ctx->count = count;
  194. ctx->flags = flags;
  195. /*
  196. * When we call this, the initialization must be complete, since
  197. * anon_inode_getfd() will install the fd.
  198. */
  199. fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
  200. flags & EFD_SHARED_FCNTL_FLAGS);
  201. if (fd < 0)
  202. kfree(ctx);
  203. return fd;
  204. }
  205. SYSCALL_DEFINE1(eventfd, unsigned int, count)
  206. {
  207. return sys_eventfd2(count, 0);
  208. }