percpu-rwsem.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. #include <linux/mutex.h>
  2. #include <linux/rwsem.h>
  3. #include <linux/percpu.h>
  4. #include <linux/wait.h>
  5. #include <linux/percpu-rwsem.h>
  6. #include <linux/rcupdate.h>
  7. #include <linux/sched.h>
  8. #include <linux/errno.h>
  9. int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
  10. {
  11. brw->fast_read_ctr = alloc_percpu(int);
  12. if (unlikely(!brw->fast_read_ctr))
  13. return -ENOMEM;
  14. mutex_init(&brw->writer_mutex);
  15. init_rwsem(&brw->rw_sem);
  16. atomic_set(&brw->slow_read_ctr, 0);
  17. init_waitqueue_head(&brw->write_waitq);
  18. return 0;
  19. }
  20. void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  21. {
  22. free_percpu(brw->fast_read_ctr);
  23. brw->fast_read_ctr = NULL; /* catch use after free bugs */
  24. }
  25. /*
  26. * This is the fast-path for down_read/up_read, it only needs to ensure
  27. * there is no pending writer (!mutex_is_locked() check) and inc/dec the
  28. * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
  29. * serialize with the preempt-disabled section below.
  30. *
  31. * The nontrivial part is that we should guarantee acquire/release semantics
  32. * in case when
  33. *
  34. * R_W: down_write() comes after up_read(), the writer should see all
  35. * changes done by the reader
  36. * or
  37. * W_R: down_read() comes after up_write(), the reader should see all
  38. * changes done by the writer
  39. *
  40. * If this helper fails the callers rely on the normal rw_semaphore and
  41. * atomic_dec_and_test(), so in this case we have the necessary barriers.
  42. *
  43. * But if it succeeds we do not have any barriers, mutex_is_locked() or
  44. * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
  45. * reader inside the critical section. See the comments in down_write and
  46. * up_write below.
  47. */
  48. static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
  49. {
  50. bool success = false;
  51. preempt_disable();
  52. if (likely(!mutex_is_locked(&brw->writer_mutex))) {
  53. __this_cpu_add(*brw->fast_read_ctr, val);
  54. success = true;
  55. }
  56. preempt_enable();
  57. return success;
  58. }
  59. /*
  60. * Like the normal down_read() this is not recursive, the writer can
  61. * come after the first percpu_down_read() and create the deadlock.
  62. */
  63. void percpu_down_read(struct percpu_rw_semaphore *brw)
  64. {
  65. if (likely(update_fast_ctr(brw, +1)))
  66. return;
  67. down_read(&brw->rw_sem);
  68. atomic_inc(&brw->slow_read_ctr);
  69. up_read(&brw->rw_sem);
  70. }
  71. void percpu_up_read(struct percpu_rw_semaphore *brw)
  72. {
  73. if (likely(update_fast_ctr(brw, -1)))
  74. return;
  75. /* false-positive is possible but harmless */
  76. if (atomic_dec_and_test(&brw->slow_read_ctr))
  77. wake_up_all(&brw->write_waitq);
  78. }
  79. static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  80. {
  81. unsigned int sum = 0;
  82. int cpu;
  83. for_each_possible_cpu(cpu) {
  84. sum += per_cpu(*brw->fast_read_ctr, cpu);
  85. per_cpu(*brw->fast_read_ctr, cpu) = 0;
  86. }
  87. return sum;
  88. }
  89. /*
  90. * A writer takes ->writer_mutex to exclude other writers and to force the
  91. * readers to switch to the slow mode, note the mutex_is_locked() check in
  92. * update_fast_ctr().
  93. *
  94. * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
  95. * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
  96. * counter it represents the number of active readers.
  97. *
  98. * Finally the writer takes ->rw_sem for writing and blocks the new readers,
  99. * then waits until the slow counter becomes zero.
  100. */
  101. void percpu_down_write(struct percpu_rw_semaphore *brw)
  102. {
  103. /* also blocks update_fast_ctr() which checks mutex_is_locked() */
  104. mutex_lock(&brw->writer_mutex);
  105. /*
  106. * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
  107. * so that update_fast_ctr() can't succeed.
  108. *
  109. * 2. Ensures we see the result of every previous this_cpu_add() in
  110. * update_fast_ctr().
  111. *
  112. * 3. Ensures that if any reader has exited its critical section via
  113. * fast-path, it executes a full memory barrier before we return.
  114. * See R_W case in the comment above update_fast_ctr().
  115. */
  116. synchronize_sched_expedited();
  117. /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
  118. atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
  119. /* block the new readers completely */
  120. down_write(&brw->rw_sem);
  121. /* wait for all readers to complete their percpu_up_read() */
  122. wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
  123. }
  124. void percpu_up_write(struct percpu_rw_semaphore *brw)
  125. {
  126. /* allow the new readers, but only the slow-path */
  127. up_write(&brw->rw_sem);
  128. /*
  129. * Insert the barrier before the next fast-path in down_read,
  130. * see W_R case in the comment above update_fast_ctr().
  131. */
  132. synchronize_sched_expedited();
  133. mutex_unlock(&brw->writer_mutex);
  134. }