eventfd.c 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * fs/eventfd.c
  3. *
  4. * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
  5. *
  6. */
  7. #include <linux/file.h>
  8. #include <linux/poll.h>
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/sched.h>
  12. #include <linux/kernel.h>
  13. #include <linux/list.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/anon_inodes.h>
  16. #include <linux/eventfd.h>
  17. struct eventfd_ctx {
  18. wait_queue_head_t wqh;
  19. /*
  20. * Every time that a write(2) is performed on an eventfd, the
  21. * value of the __u64 being written is added to "count" and a
  22. * wakeup is performed on "wqh". A read(2) will return the "count"
  23. * value to userspace, and will reset "count" to zero. The kernel
  24. * size eventfd_signal() also, adds to the "count" counter and
  25. * issue a wakeup.
  26. */
  27. __u64 count;
  28. };
  29. /*
  30. * Adds "n" to the eventfd counter "count". Returns "n" in case of
  31. * success, or a value lower then "n" in case of coutner overflow.
  32. * This function is supposed to be called by the kernel in paths
  33. * that do not allow sleeping. In this function we allow the counter
  34. * to reach the ULLONG_MAX value, and we signal this as overflow
  35. * condition by returining a POLLERR to poll(2).
  36. */
  37. int eventfd_signal(struct file *file, int n)
  38. {
  39. struct eventfd_ctx *ctx = file->private_data;
  40. unsigned long flags;
  41. if (n < 0)
  42. return -EINVAL;
  43. spin_lock_irqsave(&ctx->wqh.lock, flags);
  44. if (ULLONG_MAX - ctx->count < n)
  45. n = (int) (ULLONG_MAX - ctx->count);
  46. ctx->count += n;
  47. if (waitqueue_active(&ctx->wqh))
  48. wake_up_locked(&ctx->wqh);
  49. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  50. return n;
  51. }
  52. static int eventfd_release(struct inode *inode, struct file *file)
  53. {
  54. kfree(file->private_data);
  55. return 0;
  56. }
  57. static unsigned int eventfd_poll(struct file *file, poll_table *wait)
  58. {
  59. struct eventfd_ctx *ctx = file->private_data;
  60. unsigned int events = 0;
  61. unsigned long flags;
  62. poll_wait(file, &ctx->wqh, wait);
  63. spin_lock_irqsave(&ctx->wqh.lock, flags);
  64. if (ctx->count > 0)
  65. events |= POLLIN;
  66. if (ctx->count == ULLONG_MAX)
  67. events |= POLLERR;
  68. if (ULLONG_MAX - 1 > ctx->count)
  69. events |= POLLOUT;
  70. spin_unlock_irqrestore(&ctx->wqh.lock, flags);
  71. return events;
  72. }
  73. static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
  74. loff_t *ppos)
  75. {
  76. struct eventfd_ctx *ctx = file->private_data;
  77. ssize_t res;
  78. __u64 ucnt;
  79. DECLARE_WAITQUEUE(wait, current);
  80. if (count < sizeof(ucnt))
  81. return -EINVAL;
  82. spin_lock_irq(&ctx->wqh.lock);
  83. res = -EAGAIN;
  84. ucnt = ctx->count;
  85. if (ucnt > 0)
  86. res = sizeof(ucnt);
  87. else if (!(file->f_flags & O_NONBLOCK)) {
  88. __add_wait_queue(&ctx->wqh, &wait);
  89. for (res = 0;;) {
  90. set_current_state(TASK_INTERRUPTIBLE);
  91. if (ctx->count > 0) {
  92. ucnt = ctx->count;
  93. res = sizeof(ucnt);
  94. break;
  95. }
  96. if (signal_pending(current)) {
  97. res = -ERESTARTSYS;
  98. break;
  99. }
  100. spin_unlock_irq(&ctx->wqh.lock);
  101. schedule();
  102. spin_lock_irq(&ctx->wqh.lock);
  103. }
  104. __remove_wait_queue(&ctx->wqh, &wait);
  105. __set_current_state(TASK_RUNNING);
  106. }
  107. if (res > 0) {
  108. ctx->count = 0;
  109. if (waitqueue_active(&ctx->wqh))
  110. wake_up_locked(&ctx->wqh);
  111. }
  112. spin_unlock_irq(&ctx->wqh.lock);
  113. if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
  114. return -EFAULT;
  115. return res;
  116. }
  117. static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
  118. loff_t *ppos)
  119. {
  120. struct eventfd_ctx *ctx = file->private_data;
  121. ssize_t res;
  122. __u64 ucnt;
  123. DECLARE_WAITQUEUE(wait, current);
  124. if (count < sizeof(ucnt))
  125. return -EINVAL;
  126. if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
  127. return -EFAULT;
  128. if (ucnt == ULLONG_MAX)
  129. return -EINVAL;
  130. spin_lock_irq(&ctx->wqh.lock);
  131. res = -EAGAIN;
  132. if (ULLONG_MAX - ctx->count > ucnt)
  133. res = sizeof(ucnt);
  134. else if (!(file->f_flags & O_NONBLOCK)) {
  135. __add_wait_queue(&ctx->wqh, &wait);
  136. for (res = 0;;) {
  137. set_current_state(TASK_INTERRUPTIBLE);
  138. if (ULLONG_MAX - ctx->count > ucnt) {
  139. res = sizeof(ucnt);
  140. break;
  141. }
  142. if (signal_pending(current)) {
  143. res = -ERESTARTSYS;
  144. break;
  145. }
  146. spin_unlock_irq(&ctx->wqh.lock);
  147. schedule();
  148. spin_lock_irq(&ctx->wqh.lock);
  149. }
  150. __remove_wait_queue(&ctx->wqh, &wait);
  151. __set_current_state(TASK_RUNNING);
  152. }
  153. if (res > 0) {
  154. ctx->count += ucnt;
  155. if (waitqueue_active(&ctx->wqh))
  156. wake_up_locked(&ctx->wqh);
  157. }
  158. spin_unlock_irq(&ctx->wqh.lock);
  159. return res;
  160. }
  161. static const struct file_operations eventfd_fops = {
  162. .release = eventfd_release,
  163. .poll = eventfd_poll,
  164. .read = eventfd_read,
  165. .write = eventfd_write,
  166. };
  167. struct file *eventfd_fget(int fd)
  168. {
  169. struct file *file;
  170. file = fget(fd);
  171. if (!file)
  172. return ERR_PTR(-EBADF);
  173. if (file->f_op != &eventfd_fops) {
  174. fput(file);
  175. return ERR_PTR(-EINVAL);
  176. }
  177. return file;
  178. }
  179. asmlinkage long sys_eventfd(unsigned int count)
  180. {
  181. int error, fd;
  182. struct eventfd_ctx *ctx;
  183. struct file *file;
  184. struct inode *inode;
  185. ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  186. if (!ctx)
  187. return -ENOMEM;
  188. init_waitqueue_head(&ctx->wqh);
  189. ctx->count = count;
  190. /*
  191. * When we call this, the initialization must be complete, since
  192. * anon_inode_getfd() will install the fd.
  193. */
  194. error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
  195. &eventfd_fops, ctx);
  196. if (!error)
  197. return fd;
  198. kfree(ctx);
  199. return error;
  200. }