proportions.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. /*
  2. * Floating proportions
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  5. *
  6. * Description:
  7. *
  8. * The floating proportion is a time derivative with an exponentially decaying
  9. * history:
  10. *
  11. * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12. *
  13. * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14. * and i the time period over which the differential is taken. So d/dt_{-i} is
  15. * the differential over the i-th last period.
  16. *
  17. * The decaying history gives smooth transitions. The time differential carries
  18. * the notion of speed.
  19. *
  20. * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21. *
  22. * \Sum_{i=0} 1/2^(1+i) = 1
  23. *
  24. * Further more, if we measure time (t) in the same events as x; so that:
  25. *
  26. * t = \Sum_{j} x_{j}
  27. *
  28. * we get that:
  29. *
  30. * \Sum_{j} p_{j} = 1
  31. *
  32. * Writing this in an iterative fashion we get (dropping the 'd's):
  33. *
  34. * if (++x_{j}, ++t > period)
  35. * t /= 2;
  36. * for_each (j)
  37. * x_{j} /= 2;
  38. *
  39. * so that:
  40. *
  41. * p_{j} = x_{j} / t;
  42. *
  43. * We optimize away the '/= 2' for the global time delta by noting that:
  44. *
  45. * if (++t > period) t /= 2:
  46. *
  47. * Can be approximated by:
  48. *
  49. * period/2 + (++t % period/2)
  50. *
  51. * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52. * binary operations and wraparound artefacts disappear. ]
  53. *
  54. * Also note that this yields a natural counter of the elapsed periods:
  55. *
  56. * c = t / (period/2)
  57. *
  58. * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59. * around issue. ]
  60. *
  61. * This allows us to do away with the loop over all prop_locals on each period
  62. * expiration. By remembering the period count under which it was last accessed
  63. * as c_{j}, we can obtain the number of 'missed' cycles from:
  64. *
  65. * c - c_{j}
  66. *
  67. * We can then lazily catch up to the global period count every time we are
  68. * going to use x_{j}, by doing:
  69. *
  70. * x_{j} /= 2^(c - c_{j}), c_{j} = c
  71. */
  72. #include <linux/proportions.h>
  73. #include <linux/rcupdate.h>
  74. /*
  75. * Limit the time part in order to ensure there are some bits left for the
  76. * cycle counter.
  77. */
  78. #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
  79. int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  80. {
  81. int err;
  82. if (shift > PROP_MAX_SHIFT)
  83. shift = PROP_MAX_SHIFT;
  84. pd->index = 0;
  85. pd->pg[0].shift = shift;
  86. mutex_init(&pd->mutex);
  87. err = percpu_counter_init_irq(&pd->pg[0].events, 0);
  88. if (err)
  89. goto out;
  90. err = percpu_counter_init_irq(&pd->pg[1].events, 0);
  91. if (err)
  92. percpu_counter_destroy(&pd->pg[0].events);
  93. out:
  94. return err;
  95. }
  96. /*
  97. * We have two copies, and flip between them to make it seem like an atomic
  98. * update. The update is not really atomic wrt the events counter, but
  99. * it is internally consistent with the bit layout depending on shift.
  100. *
  101. * We copy the events count, move the bits around and flip the index.
  102. */
  103. void prop_change_shift(struct prop_descriptor *pd, int shift)
  104. {
  105. int index;
  106. int offset;
  107. u64 events;
  108. unsigned long flags;
  109. if (shift > PROP_MAX_SHIFT)
  110. shift = PROP_MAX_SHIFT;
  111. mutex_lock(&pd->mutex);
  112. index = pd->index ^ 1;
  113. offset = pd->pg[pd->index].shift - shift;
  114. if (!offset)
  115. goto out;
  116. pd->pg[index].shift = shift;
  117. local_irq_save(flags);
  118. events = percpu_counter_sum(&pd->pg[pd->index].events);
  119. if (offset < 0)
  120. events <<= -offset;
  121. else
  122. events >>= offset;
  123. percpu_counter_set(&pd->pg[index].events, events);
  124. /*
  125. * ensure the new pg is fully written before the switch
  126. */
  127. smp_wmb();
  128. pd->index = index;
  129. local_irq_restore(flags);
  130. synchronize_rcu();
  131. out:
  132. mutex_unlock(&pd->mutex);
  133. }
  134. /*
  135. * wrap the access to the data in an rcu_read_lock() section;
  136. * this is used to track the active references.
  137. */
  138. static struct prop_global *prop_get_global(struct prop_descriptor *pd)
  139. {
  140. int index;
  141. rcu_read_lock();
  142. index = pd->index;
  143. /*
  144. * match the wmb from vcd_flip()
  145. */
  146. smp_rmb();
  147. return &pd->pg[index];
  148. }
  149. static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
  150. {
  151. rcu_read_unlock();
  152. }
  153. static void
  154. prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  155. {
  156. int offset = *pl_shift - new_shift;
  157. if (!offset)
  158. return;
  159. if (offset < 0)
  160. *pl_period <<= -offset;
  161. else
  162. *pl_period >>= offset;
  163. *pl_shift = new_shift;
  164. }
  165. /*
  166. * PERCPU
  167. */
  168. #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
  169. int prop_local_init_percpu(struct prop_local_percpu *pl)
  170. {
  171. spin_lock_init(&pl->lock);
  172. pl->shift = 0;
  173. pl->period = 0;
  174. return percpu_counter_init_irq(&pl->events, 0);
  175. }
  176. void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  177. {
  178. percpu_counter_destroy(&pl->events);
  179. }
  180. /*
  181. * Catch up with missed period expirations.
  182. *
  183. * until (c_{j} == c)
  184. * x_{j} -= x_{j}/2;
  185. * c_{j}++;
  186. */
  187. static
  188. void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  189. {
  190. unsigned long period = 1UL << (pg->shift - 1);
  191. unsigned long period_mask = ~(period - 1);
  192. unsigned long global_period;
  193. unsigned long flags;
  194. global_period = percpu_counter_read(&pg->events);
  195. global_period &= period_mask;
  196. /*
  197. * Fast path - check if the local and global period count still match
  198. * outside of the lock.
  199. */
  200. if (pl->period == global_period)
  201. return;
  202. spin_lock_irqsave(&pl->lock, flags);
  203. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  204. /*
  205. * For each missed period, we half the local counter.
  206. * basically:
  207. * pl->events >> (global_period - pl->period);
  208. */
  209. period = (global_period - pl->period) >> (pg->shift - 1);
  210. if (period < BITS_PER_LONG) {
  211. s64 val = percpu_counter_read(&pl->events);
  212. if (val < (nr_cpu_ids * PROP_BATCH))
  213. val = percpu_counter_sum(&pl->events);
  214. __percpu_counter_add(&pl->events, -val + (val >> period),
  215. PROP_BATCH);
  216. } else
  217. percpu_counter_set(&pl->events, 0);
  218. pl->period = global_period;
  219. spin_unlock_irqrestore(&pl->lock, flags);
  220. }
  221. /*
  222. * ++x_{j}, ++t
  223. */
  224. void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  225. {
  226. struct prop_global *pg = prop_get_global(pd);
  227. prop_norm_percpu(pg, pl);
  228. __percpu_counter_add(&pl->events, 1, PROP_BATCH);
  229. percpu_counter_add(&pg->events, 1);
  230. prop_put_global(pd, pg);
  231. }
  232. /*
  233. * Obtain a fraction of this proportion
  234. *
  235. * p_{j} = x_{j} / (period/2 + t % period/2)
  236. */
  237. void prop_fraction_percpu(struct prop_descriptor *pd,
  238. struct prop_local_percpu *pl,
  239. long *numerator, long *denominator)
  240. {
  241. struct prop_global *pg = prop_get_global(pd);
  242. unsigned long period_2 = 1UL << (pg->shift - 1);
  243. unsigned long counter_mask = period_2 - 1;
  244. unsigned long global_count;
  245. prop_norm_percpu(pg, pl);
  246. *numerator = percpu_counter_read_positive(&pl->events);
  247. global_count = percpu_counter_read(&pg->events);
  248. *denominator = period_2 + (global_count & counter_mask);
  249. prop_put_global(pd, pg);
  250. }
  251. /*
  252. * SINGLE
  253. */
  254. int prop_local_init_single(struct prop_local_single *pl)
  255. {
  256. spin_lock_init(&pl->lock);
  257. pl->shift = 0;
  258. pl->period = 0;
  259. pl->events = 0;
  260. return 0;
  261. }
  262. void prop_local_destroy_single(struct prop_local_single *pl)
  263. {
  264. }
  265. /*
  266. * Catch up with missed period expirations.
  267. */
  268. static
  269. void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  270. {
  271. unsigned long period = 1UL << (pg->shift - 1);
  272. unsigned long period_mask = ~(period - 1);
  273. unsigned long global_period;
  274. unsigned long flags;
  275. global_period = percpu_counter_read(&pg->events);
  276. global_period &= period_mask;
  277. /*
  278. * Fast path - check if the local and global period count still match
  279. * outside of the lock.
  280. */
  281. if (pl->period == global_period)
  282. return;
  283. spin_lock_irqsave(&pl->lock, flags);
  284. prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  285. /*
  286. * For each missed period, we half the local counter.
  287. */
  288. period = (global_period - pl->period) >> (pg->shift - 1);
  289. if (likely(period < BITS_PER_LONG))
  290. pl->events >>= period;
  291. else
  292. pl->events = 0;
  293. pl->period = global_period;
  294. spin_unlock_irqrestore(&pl->lock, flags);
  295. }
  296. /*
  297. * ++x_{j}, ++t
  298. */
  299. void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  300. {
  301. struct prop_global *pg = prop_get_global(pd);
  302. prop_norm_single(pg, pl);
  303. pl->events++;
  304. percpu_counter_add(&pg->events, 1);
  305. prop_put_global(pd, pg);
  306. }
  307. /*
  308. * Obtain a fraction of this proportion
  309. *
  310. * p_{j} = x_{j} / (period/2 + t % period/2)
  311. */
  312. void prop_fraction_single(struct prop_descriptor *pd,
  313. struct prop_local_single *pl,
  314. long *numerator, long *denominator)
  315. {
  316. struct prop_global *pg = prop_get_global(pd);
  317. unsigned long period_2 = 1UL << (pg->shift - 1);
  318. unsigned long counter_mask = period_2 - 1;
  319. unsigned long global_count;
  320. prop_norm_single(pg, pl);
  321. *numerator = pl->events;
  322. global_count = percpu_counter_read(&pg->events);
  323. *denominator = period_2 + (global_count & counter_mask);
  324. prop_put_global(pd, pg);
  325. }