proportions.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /*
  2. * Floating proportions
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  5. *
  6. * Description:
  7. *
  8. * The floating proportion is a time derivative with an exponentially decaying
  9. * history:
  10. *
  11. * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12. *
  13. * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14. * and i the time period over which the differential is taken. So d/dt_{-i} is
  15. * the differential over the i-th last period.
  16. *
  17. * The decaying history gives smooth transitions. The time differential carries
  18. * the notion of speed.
  19. *
  20. * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21. *
  22. * \Sum_{i=0} 1/2^(1+i) = 1
  23. *
  24. * Further more, if we measure time (t) in the same events as x; so that:
  25. *
  26. * t = \Sum_{j} x_{j}
  27. *
  28. * we get that:
  29. *
  30. * \Sum_{j} p_{j} = 1
  31. *
  32. * Writing this in an iterative fashion we get (dropping the 'd's):
  33. *
  34. * if (++x_{j}, ++t > period)
  35. * t /= 2;
  36. * for_each (j)
  37. * x_{j} /= 2;
  38. *
  39. * so that:
  40. *
  41. * p_{j} = x_{j} / t;
  42. *
  43. * We optimize away the '/= 2' for the global time delta by noting that:
  44. *
  45. * if (++t > period) t /= 2:
  46. *
  47. * Can be approximated by:
  48. *
  49. * period/2 + (++t % period/2)
  50. *
  51. * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52. * binary operations and wraparound artefacts disappear. ]
  53. *
  54. * Also note that this yields a natural counter of the elapsed periods:
  55. *
  56. * c = t / (period/2)
  57. *
  58. * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59. * around issue. ]
  60. *
  61. * This allows us to do away with the loop over all prop_locals on each period
  62. * expiration. By remembering the period count under which it was last accessed
  63. * as c_{j}, we can obtain the number of 'missed' cycles from:
  64. *
  65. * c - c_{j}
  66. *
  67. * We can then lazily catch up to the global period count every time we are
  68. * going to use x_{j}, by doing:
  69. *
  70. * x_{j} /= 2^(c - c_{j}), c_{j} = c
  71. */
  72. #include <linux/proportions.h>
  73. #include <linux/rcupdate.h>
  74. int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  75. {
  76. int err;
  77. if (shift > PROP_MAX_SHIFT)
  78. shift = PROP_MAX_SHIFT;
  79. pd->index = 0;
  80. pd->pg[0].shift = shift;
  81. mutex_init(&pd->mutex);
  82. err = percpu_counter_init_irq(&pd->pg[0].events, 0);
  83. if (err)
  84. goto out;
  85. err = percpu_counter_init_irq(&pd->pg[1].events, 0);
  86. if (err)
  87. percpu_counter_destroy(&pd->pg[0].events);
  88. out:
  89. return err;
  90. }
  91. /*
  92. * We have two copies, and flip between them to make it seem like an atomic
  93. * update. The update is not really atomic wrt the events counter, but
  94. * it is internally consistent with the bit layout depending on shift.
  95. *
  96. * We copy the events count, move the bits around and flip the index.
  97. */
  98. void prop_change_shift(struct prop_descriptor *pd, int shift)
  99. {
  100. int index;
  101. int offset;
  102. u64 events;
  103. unsigned long flags;
  104. if (shift > PROP_MAX_SHIFT)
  105. shift = PROP_MAX_SHIFT;
  106. mutex_lock(&pd->mutex);
  107. index = pd->index ^ 1;
  108. offset = pd->pg[pd->index].shift - shift;
  109. if (!offset)
  110. goto out;
  111. pd->pg[index].shift = shift;
  112. local_irq_save(flags);
  113. events = percpu_counter_sum(&pd->pg[pd->index].events);
  114. if (offset < 0)
  115. events <<= -offset;
  116. else
  117. events >>= offset;
  118. percpu_counter_set(&pd->pg[index].events, events);
  119. /*
  120. * ensure the new pg is fully written before the switch
  121. */
  122. smp_wmb();
  123. pd->index = index;
  124. local_irq_restore(flags);
  125. synchronize_rcu();
  126. out:
  127. mutex_unlock(&pd->mutex);
  128. }
  129. /*
  130. * wrap the access to the data in an rcu_read_lock() section;
  131. * this is used to track the active references.
  132. */
  133. static struct prop_global *prop_get_global(struct prop_descriptor *pd)
  134. {
  135. int index;
  136. rcu_read_lock();
  137. index = pd->index;
  138. /*
  139. * match the wmb from vcd_flip()
  140. */
  141. smp_rmb();
  142. return &pd->pg[index];
  143. }
  144. static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
  145. {
  146. rcu_read_unlock();
  147. }
  148. static void
  149. prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  150. {
  151. int offset = *pl_shift - new_shift;
  152. if (!offset)
  153. return;
  154. if (offset < 0)
  155. *pl_period <<= -offset;
  156. else
  157. *pl_period >>= offset;
  158. *pl_shift = new_shift;
  159. }
  160. /*
  161. * PERCPU
  162. */
  163. #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
  164. int prop_local_init_percpu(struct prop_local_percpu *pl)
  165. {
  166. spin_lock_init(&pl->lock);
  167. pl->shift = 0;
  168. pl->period = 0;
  169. return percpu_counter_init_irq(&pl->events, 0);
  170. }
  171. void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  172. {
  173. percpu_counter_destroy(&pl->events);
  174. }
  175. /*
  176. * Catch up with missed period expirations.
  177. *
  178. * until (c_{j} == c)
  179. * x_{j} -= x_{j}/2;
  180. * c_{j}++;
  181. */
  182. static
  183. void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  184. {
  185. unsigned long period = 1UL << (pg->shift - 1);
  186. unsigned long period_mask = ~(period - 1);
  187. unsigned long global_period;
  188. unsigned long flags;
  189. global_period = percpu_counter_read(&pg->events);
  190. global_period &= period_mask;
  191. /*
  192. * Fast path - check if the local and global period count still match
  193. * outside of the lock.
  194. */
  195. if (pl->period == global_period)
  196. return;
  197. spin_lock_irqsave(&pl->lock, flags);
  198. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  199. /*
  200. * For each missed period, we half the local counter.
  201. * basically:
  202. * pl->events >> (global_period - pl->period);
  203. */
  204. period = (global_period - pl->period) >> (pg->shift - 1);
  205. if (period < BITS_PER_LONG) {
  206. s64 val = percpu_counter_read(&pl->events);
  207. if (val < (nr_cpu_ids * PROP_BATCH))
  208. val = percpu_counter_sum(&pl->events);
  209. __percpu_counter_add(&pl->events, -val + (val >> period),
  210. PROP_BATCH);
  211. } else
  212. percpu_counter_set(&pl->events, 0);
  213. pl->period = global_period;
  214. spin_unlock_irqrestore(&pl->lock, flags);
  215. }
  216. /*
  217. * ++x_{j}, ++t
  218. */
  219. void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  220. {
  221. struct prop_global *pg = prop_get_global(pd);
  222. prop_norm_percpu(pg, pl);
  223. __percpu_counter_add(&pl->events, 1, PROP_BATCH);
  224. percpu_counter_add(&pg->events, 1);
  225. prop_put_global(pd, pg);
  226. }
  227. /*
  228. * identical to __prop_inc_percpu, except that it limits this pl's fraction to
  229. * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
  230. */
  231. void __prop_inc_percpu_max(struct prop_descriptor *pd,
  232. struct prop_local_percpu *pl, long frac)
  233. {
  234. struct prop_global *pg = prop_get_global(pd);
  235. prop_norm_percpu(pg, pl);
  236. if (unlikely(frac != PROP_FRAC_BASE)) {
  237. unsigned long period_2 = 1UL << (pg->shift - 1);
  238. unsigned long counter_mask = period_2 - 1;
  239. unsigned long global_count;
  240. long numerator, denominator;
  241. numerator = percpu_counter_read_positive(&pl->events);
  242. global_count = percpu_counter_read(&pg->events);
  243. denominator = period_2 + (global_count & counter_mask);
  244. if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
  245. goto out_put;
  246. }
  247. percpu_counter_add(&pl->events, 1);
  248. percpu_counter_add(&pg->events, 1);
  249. out_put:
  250. prop_put_global(pd, pg);
  251. }
  252. /*
  253. * Obtain a fraction of this proportion
  254. *
  255. * p_{j} = x_{j} / (period/2 + t % period/2)
  256. */
  257. void prop_fraction_percpu(struct prop_descriptor *pd,
  258. struct prop_local_percpu *pl,
  259. long *numerator, long *denominator)
  260. {
  261. struct prop_global *pg = prop_get_global(pd);
  262. unsigned long period_2 = 1UL << (pg->shift - 1);
  263. unsigned long counter_mask = period_2 - 1;
  264. unsigned long global_count;
  265. prop_norm_percpu(pg, pl);
  266. *numerator = percpu_counter_read_positive(&pl->events);
  267. global_count = percpu_counter_read(&pg->events);
  268. *denominator = period_2 + (global_count & counter_mask);
  269. prop_put_global(pd, pg);
  270. }
  271. /*
  272. * SINGLE
  273. */
  274. int prop_local_init_single(struct prop_local_single *pl)
  275. {
  276. spin_lock_init(&pl->lock);
  277. pl->shift = 0;
  278. pl->period = 0;
  279. pl->events = 0;
  280. return 0;
  281. }
  282. void prop_local_destroy_single(struct prop_local_single *pl)
  283. {
  284. }
  285. /*
  286. * Catch up with missed period expirations.
  287. */
  288. static
  289. void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  290. {
  291. unsigned long period = 1UL << (pg->shift - 1);
  292. unsigned long period_mask = ~(period - 1);
  293. unsigned long global_period;
  294. unsigned long flags;
  295. global_period = percpu_counter_read(&pg->events);
  296. global_period &= period_mask;
  297. /*
  298. * Fast path - check if the local and global period count still match
  299. * outside of the lock.
  300. */
  301. if (pl->period == global_period)
  302. return;
  303. spin_lock_irqsave(&pl->lock, flags);
  304. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  305. /*
  306. * For each missed period, we half the local counter.
  307. */
  308. period = (global_period - pl->period) >> (pg->shift - 1);
  309. if (likely(period < BITS_PER_LONG))
  310. pl->events >>= period;
  311. else
  312. pl->events = 0;
  313. pl->period = global_period;
  314. spin_unlock_irqrestore(&pl->lock, flags);
  315. }
  316. /*
  317. * ++x_{j}, ++t
  318. */
  319. void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  320. {
  321. struct prop_global *pg = prop_get_global(pd);
  322. prop_norm_single(pg, pl);
  323. pl->events++;
  324. percpu_counter_add(&pg->events, 1);
  325. prop_put_global(pd, pg);
  326. }
  327. /*
  328. * Obtain a fraction of this proportion
  329. *
  330. * p_{j} = x_{j} / (period/2 + t % period/2)
  331. */
  332. void prop_fraction_single(struct prop_descriptor *pd,
  333. struct prop_local_single *pl,
  334. long *numerator, long *denominator)
  335. {
  336. struct prop_global *pg = prop_get_global(pd);
  337. unsigned long period_2 = 1UL << (pg->shift - 1);
  338. unsigned long counter_mask = period_2 - 1;
  339. unsigned long global_count;
  340. prop_norm_single(pg, pl);
  341. *numerator = pl->events;
  342. global_count = percpu_counter_read(&pg->events);
  343. *denominator = period_2 + (global_count & counter_mask);
  344. prop_put_global(pd, pg);
  345. }