proportions.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /*
  2. * Floating proportions
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  5. *
  6. * Description:
  7. *
  8. * The floating proportion is a time derivative with an exponentially decaying
  9. * history:
  10. *
  11. * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12. *
  13. * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14. * and i the time period over which the differential is taken. So d/dt_{-i} is
  15. * the differential over the i-th last period.
  16. *
  17. * The decaying history gives smooth transitions. The time differential carries
  18. * the notion of speed.
  19. *
  20. * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21. *
  22. * \Sum_{i=0} 1/2^(1+i) = 1
  23. *
  24. * Further more, if we measure time (t) in the same events as x; so that:
  25. *
  26. * t = \Sum_{j} x_{j}
  27. *
  28. * we get that:
  29. *
  30. * \Sum_{j} p_{j} = 1
  31. *
  32. * Writing this in an iterative fashion we get (dropping the 'd's):
  33. *
  34. * if (++x_{j}, ++t > period)
  35. * t /= 2;
  36. * for_each (j)
  37. * x_{j} /= 2;
  38. *
  39. * so that:
  40. *
  41. * p_{j} = x_{j} / t;
  42. *
  43. * We optimize away the '/= 2' for the global time delta by noting that:
  44. *
  45. * if (++t > period) t /= 2:
  46. *
  47. * Can be approximated by:
  48. *
  49. * period/2 + (++t % period/2)
  50. *
  51. * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52. * binary operations and wraparound artefacts disappear. ]
  53. *
  54. * Also note that this yields a natural counter of the elapsed periods:
  55. *
  56. * c = t / (period/2)
  57. *
  58. * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59. * around issue. ]
  60. *
  61. * This allows us to do away with the loop over all prop_locals on each period
  62. * expiration. By remembering the period count under which it was last accessed
  63. * as c_{j}, we can obtain the number of 'missed' cycles from:
  64. *
  65. * c - c_{j}
  66. *
  67. * We can then lazily catch up to the global period count every time we are
  68. * going to use x_{j}, by doing:
  69. *
  70. * x_{j} /= 2^(c - c_{j}), c_{j} = c
  71. */
  72. #include <linux/proportions.h>
  73. #include <linux/rcupdate.h>
  74. /*
  75. * Limit the time part in order to ensure there are some bits left for the
  76. * cycle counter.
  77. */
  78. #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
  79. int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  80. {
  81. int err;
  82. if (shift > PROP_MAX_SHIFT)
  83. shift = PROP_MAX_SHIFT;
  84. pd->index = 0;
  85. pd->pg[0].shift = shift;
  86. mutex_init(&pd->mutex);
  87. err = percpu_counter_init_irq(&pd->pg[0].events, 0);
  88. if (err)
  89. goto out;
  90. err = percpu_counter_init_irq(&pd->pg[1].events, 0);
  91. if (err)
  92. percpu_counter_destroy(&pd->pg[0].events);
  93. out:
  94. return err;
  95. }
  96. /*
  97. * We have two copies, and flip between them to make it seem like an atomic
  98. * update. The update is not really atomic wrt the events counter, but
  99. * it is internally consistent with the bit layout depending on shift.
  100. *
  101. * We copy the events count, move the bits around and flip the index.
  102. */
  103. void prop_change_shift(struct prop_descriptor *pd, int shift)
  104. {
  105. int index;
  106. int offset;
  107. u64 events;
  108. unsigned long flags;
  109. if (shift > PROP_MAX_SHIFT)
  110. shift = PROP_MAX_SHIFT;
  111. mutex_lock(&pd->mutex);
  112. index = pd->index ^ 1;
  113. offset = pd->pg[pd->index].shift - shift;
  114. if (!offset)
  115. goto out;
  116. pd->pg[index].shift = shift;
  117. local_irq_save(flags);
  118. events = percpu_counter_sum(&pd->pg[pd->index].events);
  119. if (offset < 0)
  120. events <<= -offset;
  121. else
  122. events >>= offset;
  123. percpu_counter_set(&pd->pg[index].events, events);
  124. /*
  125. * ensure the new pg is fully written before the switch
  126. */
  127. smp_wmb();
  128. pd->index = index;
  129. local_irq_restore(flags);
  130. synchronize_rcu();
  131. out:
  132. mutex_unlock(&pd->mutex);
  133. }
  134. /*
  135. * wrap the access to the data in an rcu_read_lock() section;
  136. * this is used to track the active references.
  137. */
  138. static struct prop_global *prop_get_global(struct prop_descriptor *pd)
  139. {
  140. int index;
  141. rcu_read_lock();
  142. index = pd->index;
  143. /*
  144. * match the wmb from vcd_flip()
  145. */
  146. smp_rmb();
  147. return &pd->pg[index];
  148. }
  149. static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
  150. {
  151. rcu_read_unlock();
  152. }
  153. static void
  154. prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  155. {
  156. int offset = *pl_shift - new_shift;
  157. if (!offset)
  158. return;
  159. if (offset < 0)
  160. *pl_period <<= -offset;
  161. else
  162. *pl_period >>= offset;
  163. *pl_shift = new_shift;
  164. }
  165. /*
  166. * PERCPU
  167. */
  168. int prop_local_init_percpu(struct prop_local_percpu *pl)
  169. {
  170. spin_lock_init(&pl->lock);
  171. pl->shift = 0;
  172. pl->period = 0;
  173. return percpu_counter_init_irq(&pl->events, 0);
  174. }
  175. void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  176. {
  177. percpu_counter_destroy(&pl->events);
  178. }
  179. /*
  180. * Catch up with missed period expirations.
  181. *
  182. * until (c_{j} == c)
  183. * x_{j} -= x_{j}/2;
  184. * c_{j}++;
  185. */
  186. static
  187. void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  188. {
  189. unsigned long period = 1UL << (pg->shift - 1);
  190. unsigned long period_mask = ~(period - 1);
  191. unsigned long global_period;
  192. unsigned long flags;
  193. global_period = percpu_counter_read(&pg->events);
  194. global_period &= period_mask;
  195. /*
  196. * Fast path - check if the local and global period count still match
  197. * outside of the lock.
  198. */
  199. if (pl->period == global_period)
  200. return;
  201. spin_lock_irqsave(&pl->lock, flags);
  202. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  203. /*
  204. * For each missed period, we half the local counter.
  205. * basically:
  206. * pl->events >> (global_period - pl->period);
  207. *
  208. * but since the distributed nature of percpu counters make division
  209. * rather hard, use a regular subtraction loop. This is safe, because
  210. * the events will only every be incremented, hence the subtraction
  211. * can never result in a negative number.
  212. */
  213. while (pl->period != global_period) {
  214. unsigned long val = percpu_counter_read(&pl->events);
  215. unsigned long half = (val + 1) >> 1;
  216. /*
  217. * Half of zero won't be much less, break out.
  218. * This limits the loop to shift iterations, even
  219. * if we missed a million.
  220. */
  221. if (!val)
  222. break;
  223. percpu_counter_add(&pl->events, -half);
  224. pl->period += period;
  225. }
  226. pl->period = global_period;
  227. spin_unlock_irqrestore(&pl->lock, flags);
  228. }
  229. /*
  230. * ++x_{j}, ++t
  231. */
  232. void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  233. {
  234. struct prop_global *pg = prop_get_global(pd);
  235. prop_norm_percpu(pg, pl);
  236. percpu_counter_add(&pl->events, 1);
  237. percpu_counter_add(&pg->events, 1);
  238. prop_put_global(pd, pg);
  239. }
  240. /*
  241. * Obtain a fraction of this proportion
  242. *
  243. * p_{j} = x_{j} / (period/2 + t % period/2)
  244. */
  245. void prop_fraction_percpu(struct prop_descriptor *pd,
  246. struct prop_local_percpu *pl,
  247. long *numerator, long *denominator)
  248. {
  249. struct prop_global *pg = prop_get_global(pd);
  250. unsigned long period_2 = 1UL << (pg->shift - 1);
  251. unsigned long counter_mask = period_2 - 1;
  252. unsigned long global_count;
  253. prop_norm_percpu(pg, pl);
  254. *numerator = percpu_counter_read_positive(&pl->events);
  255. global_count = percpu_counter_read(&pg->events);
  256. *denominator = period_2 + (global_count & counter_mask);
  257. prop_put_global(pd, pg);
  258. }
  259. /*
  260. * SINGLE
  261. */
  262. int prop_local_init_single(struct prop_local_single *pl)
  263. {
  264. spin_lock_init(&pl->lock);
  265. pl->shift = 0;
  266. pl->period = 0;
  267. pl->events = 0;
  268. return 0;
  269. }
  270. void prop_local_destroy_single(struct prop_local_single *pl)
  271. {
  272. }
  273. /*
  274. * Catch up with missed period expirations.
  275. */
  276. static
  277. void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  278. {
  279. unsigned long period = 1UL << (pg->shift - 1);
  280. unsigned long period_mask = ~(period - 1);
  281. unsigned long global_period;
  282. unsigned long flags;
  283. global_period = percpu_counter_read(&pg->events);
  284. global_period &= period_mask;
  285. /*
  286. * Fast path - check if the local and global period count still match
  287. * outside of the lock.
  288. */
  289. if (pl->period == global_period)
  290. return;
  291. spin_lock_irqsave(&pl->lock, flags);
  292. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  293. /*
  294. * For each missed period, we half the local counter.
  295. */
  296. period = (global_period - pl->period) >> (pg->shift - 1);
  297. if (likely(period < BITS_PER_LONG))
  298. pl->events >>= period;
  299. else
  300. pl->events = 0;
  301. pl->period = global_period;
  302. spin_unlock_irqrestore(&pl->lock, flags);
  303. }
  304. /*
  305. * ++x_{j}, ++t
  306. */
  307. void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  308. {
  309. struct prop_global *pg = prop_get_global(pd);
  310. prop_norm_single(pg, pl);
  311. pl->events++;
  312. percpu_counter_add(&pg->events, 1);
  313. prop_put_global(pd, pg);
  314. }
  315. /*
  316. * Obtain a fraction of this proportion
  317. *
  318. * p_{j} = x_{j} / (period/2 + t % period/2)
  319. */
  320. void prop_fraction_single(struct prop_descriptor *pd,
  321. struct prop_local_single *pl,
  322. long *numerator, long *denominator)
  323. {
  324. struct prop_global *pg = prop_get_global(pd);
  325. unsigned long period_2 = 1UL << (pg->shift - 1);
  326. unsigned long counter_mask = period_2 - 1;
  327. unsigned long global_count;
  328. prop_norm_single(pg, pl);
  329. *numerator = pl->events;
  330. global_count = percpu_counter_read(&pg->events);
  331. *denominator = period_2 + (global_count & counter_mask);
  332. prop_put_global(pd, pg);
  333. }