cputime.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #include <linux/export.h>
  2. #include <linux/sched.h>
  3. #include <linux/tsacct_kern.h>
  4. #include <linux/kernel_stat.h>
  5. #include <linux/static_key.h>
  6. #include "sched.h"
  7. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  8. /*
  9. * There are no locks covering percpu hardirq/softirq time.
  10. * They are only modified in vtime_account, on corresponding CPU
  11. * with interrupts disabled. So, writes are safe.
  12. * They are read and saved off onto struct rq in update_rq_clock().
  13. * This may result in other CPU reading this CPU's irq time and can
  14. * race with irq/vtime_account on this CPU. We would either get old
  15. * or new value with a side effect of accounting a slice of irq time to wrong
  16. * task when irq is in progress while we read rq->clock. That is a worthy
  17. * compromise in place of having locks on each irq in account_system_time.
  18. */
  19. DEFINE_PER_CPU(u64, cpu_hardirq_time);
  20. DEFINE_PER_CPU(u64, cpu_softirq_time);
  21. static DEFINE_PER_CPU(u64, irq_start_time);
  22. static int sched_clock_irqtime;
  23. void enable_sched_clock_irqtime(void)
  24. {
  25. sched_clock_irqtime = 1;
  26. }
  27. void disable_sched_clock_irqtime(void)
  28. {
  29. sched_clock_irqtime = 0;
  30. }
  31. #ifndef CONFIG_64BIT
  32. DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  33. #endif /* CONFIG_64BIT */
  34. /*
  35. * Called before incrementing preempt_count on {soft,}irq_enter
  36. * and before decrementing preempt_count on {soft,}irq_exit.
  37. */
  38. void vtime_account(struct task_struct *curr)
  39. {
  40. unsigned long flags;
  41. s64 delta;
  42. int cpu;
  43. if (!sched_clock_irqtime)
  44. return;
  45. local_irq_save(flags);
  46. cpu = smp_processor_id();
  47. delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
  48. __this_cpu_add(irq_start_time, delta);
  49. irq_time_write_begin();
  50. /*
  51. * We do not account for softirq time from ksoftirqd here.
  52. * We want to continue accounting softirq time to ksoftirqd thread
  53. * in that case, so as not to confuse scheduler with a special task
  54. * that do not consume any time, but still wants to run.
  55. */
  56. if (hardirq_count())
  57. __this_cpu_add(cpu_hardirq_time, delta);
  58. else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  59. __this_cpu_add(cpu_softirq_time, delta);
  60. irq_time_write_end();
  61. local_irq_restore(flags);
  62. }
  63. EXPORT_SYMBOL_GPL(vtime_account);
  64. static int irqtime_account_hi_update(void)
  65. {
  66. u64 *cpustat = kcpustat_this_cpu->cpustat;
  67. unsigned long flags;
  68. u64 latest_ns;
  69. int ret = 0;
  70. local_irq_save(flags);
  71. latest_ns = this_cpu_read(cpu_hardirq_time);
  72. if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
  73. ret = 1;
  74. local_irq_restore(flags);
  75. return ret;
  76. }
  77. static int irqtime_account_si_update(void)
  78. {
  79. u64 *cpustat = kcpustat_this_cpu->cpustat;
  80. unsigned long flags;
  81. u64 latest_ns;
  82. int ret = 0;
  83. local_irq_save(flags);
  84. latest_ns = this_cpu_read(cpu_softirq_time);
  85. if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
  86. ret = 1;
  87. local_irq_restore(flags);
  88. return ret;
  89. }
  90. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  91. #define sched_clock_irqtime (0)
  92. #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
  93. static inline void task_group_account_field(struct task_struct *p, int index,
  94. u64 tmp)
  95. {
  96. #ifdef CONFIG_CGROUP_CPUACCT
  97. struct kernel_cpustat *kcpustat;
  98. struct cpuacct *ca;
  99. #endif
  100. /*
  101. * Since all updates are sure to touch the root cgroup, we
  102. * get ourselves ahead and touch it first. If the root cgroup
  103. * is the only cgroup, then nothing else should be necessary.
  104. *
  105. */
  106. __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
  107. #ifdef CONFIG_CGROUP_CPUACCT
  108. if (unlikely(!cpuacct_subsys.active))
  109. return;
  110. rcu_read_lock();
  111. ca = task_ca(p);
  112. while (ca && (ca != &root_cpuacct)) {
  113. kcpustat = this_cpu_ptr(ca->cpustat);
  114. kcpustat->cpustat[index] += tmp;
  115. ca = parent_ca(ca);
  116. }
  117. rcu_read_unlock();
  118. #endif
  119. }
  120. /*
  121. * Account user cpu time to a process.
  122. * @p: the process that the cpu time gets accounted to
  123. * @cputime: the cpu time spent in user space since the last update
  124. * @cputime_scaled: cputime scaled by cpu frequency
  125. */
  126. void account_user_time(struct task_struct *p, cputime_t cputime,
  127. cputime_t cputime_scaled)
  128. {
  129. int index;
  130. /* Add user time to process. */
  131. p->utime += cputime;
  132. p->utimescaled += cputime_scaled;
  133. account_group_user_time(p, cputime);
  134. index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
  135. /* Add user time to cpustat. */
  136. task_group_account_field(p, index, (__force u64) cputime);
  137. /* Account for user time used */
  138. acct_update_integrals(p);
  139. }
  140. /*
  141. * Account guest cpu time to a process.
  142. * @p: the process that the cpu time gets accounted to
  143. * @cputime: the cpu time spent in virtual machine since the last update
  144. * @cputime_scaled: cputime scaled by cpu frequency
  145. */
  146. static void account_guest_time(struct task_struct *p, cputime_t cputime,
  147. cputime_t cputime_scaled)
  148. {
  149. u64 *cpustat = kcpustat_this_cpu->cpustat;
  150. /* Add guest time to process. */
  151. p->utime += cputime;
  152. p->utimescaled += cputime_scaled;
  153. account_group_user_time(p, cputime);
  154. p->gtime += cputime;
  155. /* Add guest time to cpustat. */
  156. if (TASK_NICE(p) > 0) {
  157. cpustat[CPUTIME_NICE] += (__force u64) cputime;
  158. cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
  159. } else {
  160. cpustat[CPUTIME_USER] += (__force u64) cputime;
  161. cpustat[CPUTIME_GUEST] += (__force u64) cputime;
  162. }
  163. }
  164. /*
  165. * Account system cpu time to a process and desired cpustat field
  166. * @p: the process that the cpu time gets accounted to
  167. * @cputime: the cpu time spent in kernel space since the last update
  168. * @cputime_scaled: cputime scaled by cpu frequency
  169. * @target_cputime64: pointer to cpustat field that has to be updated
  170. */
  171. static inline
  172. void __account_system_time(struct task_struct *p, cputime_t cputime,
  173. cputime_t cputime_scaled, int index)
  174. {
  175. /* Add system time to process. */
  176. p->stime += cputime;
  177. p->stimescaled += cputime_scaled;
  178. account_group_system_time(p, cputime);
  179. /* Add system time to cpustat. */
  180. task_group_account_field(p, index, (__force u64) cputime);
  181. /* Account for system time used */
  182. acct_update_integrals(p);
  183. }
  184. /*
  185. * Account system cpu time to a process.
  186. * @p: the process that the cpu time gets accounted to
  187. * @hardirq_offset: the offset to subtract from hardirq_count()
  188. * @cputime: the cpu time spent in kernel space since the last update
  189. * @cputime_scaled: cputime scaled by cpu frequency
  190. */
  191. void account_system_time(struct task_struct *p, int hardirq_offset,
  192. cputime_t cputime, cputime_t cputime_scaled)
  193. {
  194. int index;
  195. if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
  196. account_guest_time(p, cputime, cputime_scaled);
  197. return;
  198. }
  199. if (hardirq_count() - hardirq_offset)
  200. index = CPUTIME_IRQ;
  201. else if (in_serving_softirq())
  202. index = CPUTIME_SOFTIRQ;
  203. else
  204. index = CPUTIME_SYSTEM;
  205. __account_system_time(p, cputime, cputime_scaled, index);
  206. }
  207. /*
  208. * Account for involuntary wait time.
  209. * @cputime: the cpu time spent in involuntary wait
  210. */
  211. void account_steal_time(cputime_t cputime)
  212. {
  213. u64 *cpustat = kcpustat_this_cpu->cpustat;
  214. cpustat[CPUTIME_STEAL] += (__force u64) cputime;
  215. }
  216. /*
  217. * Account for idle time.
  218. * @cputime: the cpu time spent in idle wait
  219. */
  220. void account_idle_time(cputime_t cputime)
  221. {
  222. u64 *cpustat = kcpustat_this_cpu->cpustat;
  223. struct rq *rq = this_rq();
  224. if (atomic_read(&rq->nr_iowait) > 0)
  225. cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
  226. else
  227. cpustat[CPUTIME_IDLE] += (__force u64) cputime;
  228. }
  229. static __always_inline bool steal_account_process_tick(void)
  230. {
  231. #ifdef CONFIG_PARAVIRT
  232. if (static_key_false(&paravirt_steal_enabled)) {
  233. u64 steal, st = 0;
  234. steal = paravirt_steal_clock(smp_processor_id());
  235. steal -= this_rq()->prev_steal_time;
  236. st = steal_ticks(steal);
  237. this_rq()->prev_steal_time += st * TICK_NSEC;
  238. account_steal_time(st);
  239. return st;
  240. }
  241. #endif
  242. return false;
  243. }
  244. #ifndef CONFIG_VIRT_CPU_ACCOUNTING
  245. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  246. /*
  247. * Account a tick to a process and cpustat
  248. * @p: the process that the cpu time gets accounted to
  249. * @user_tick: is the tick from userspace
  250. * @rq: the pointer to rq
  251. *
  252. * Tick demultiplexing follows the order
  253. * - pending hardirq update
  254. * - pending softirq update
  255. * - user_time
  256. * - idle_time
  257. * - system time
  258. * - check for guest_time
  259. * - else account as system_time
  260. *
  261. * Check for hardirq is done both for system and user time as there is
  262. * no timer going off while we are on hardirq and hence we may never get an
  263. * opportunity to update it solely in system time.
  264. * p->stime and friends are only updated on system time and not on irq
  265. * softirq as those do not count in task exec_runtime any more.
  266. */
  267. static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  268. struct rq *rq)
  269. {
  270. cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
  271. u64 *cpustat = kcpustat_this_cpu->cpustat;
  272. if (steal_account_process_tick())
  273. return;
  274. if (irqtime_account_hi_update()) {
  275. cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
  276. } else if (irqtime_account_si_update()) {
  277. cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
  278. } else if (this_cpu_ksoftirqd() == p) {
  279. /*
  280. * ksoftirqd time do not get accounted in cpu_softirq_time.
  281. * So, we have to handle it separately here.
  282. * Also, p->stime needs to be updated for ksoftirqd.
  283. */
  284. __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
  285. CPUTIME_SOFTIRQ);
  286. } else if (user_tick) {
  287. account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
  288. } else if (p == rq->idle) {
  289. account_idle_time(cputime_one_jiffy);
  290. } else if (p->flags & PF_VCPU) { /* System time or guest time */
  291. account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
  292. } else {
  293. __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
  294. CPUTIME_SYSTEM);
  295. }
  296. }
  297. static void irqtime_account_idle_ticks(int ticks)
  298. {
  299. int i;
  300. struct rq *rq = this_rq();
  301. for (i = 0; i < ticks; i++)
  302. irqtime_account_process_tick(current, 0, rq);
  303. }
  304. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  305. static void irqtime_account_idle_ticks(int ticks) {}
  306. static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  307. struct rq *rq) {}
  308. #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  309. /*
  310. * Account a single tick of cpu time.
  311. * @p: the process that the cpu time gets accounted to
  312. * @user_tick: indicates if the tick is a user or a system tick
  313. */
  314. void account_process_tick(struct task_struct *p, int user_tick)
  315. {
  316. cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
  317. struct rq *rq = this_rq();
  318. if (sched_clock_irqtime) {
  319. irqtime_account_process_tick(p, user_tick, rq);
  320. return;
  321. }
  322. if (steal_account_process_tick())
  323. return;
  324. if (user_tick)
  325. account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
  326. else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
  327. account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
  328. one_jiffy_scaled);
  329. else
  330. account_idle_time(cputime_one_jiffy);
  331. }
  332. /*
  333. * Account multiple ticks of steal time.
  334. * @p: the process from which the cpu time has been stolen
  335. * @ticks: number of stolen ticks
  336. */
  337. void account_steal_ticks(unsigned long ticks)
  338. {
  339. account_steal_time(jiffies_to_cputime(ticks));
  340. }
  341. /*
  342. * Account multiple ticks of idle time.
  343. * @ticks: number of stolen ticks
  344. */
  345. void account_idle_ticks(unsigned long ticks)
  346. {
  347. if (sched_clock_irqtime) {
  348. irqtime_account_idle_ticks(ticks);
  349. return;
  350. }
  351. account_idle_time(jiffies_to_cputime(ticks));
  352. }
  353. #endif
  354. /*
  355. * Use precise platform statistics if available:
  356. */
  357. #ifdef CONFIG_VIRT_CPU_ACCOUNTING
  358. void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  359. {
  360. *ut = p->utime;
  361. *st = p->stime;
  362. }
  363. void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  364. {
  365. struct task_cputime cputime;
  366. thread_group_cputime(p, &cputime);
  367. *ut = cputime.utime;
  368. *st = cputime.stime;
  369. }
  370. /*
  371. * Archs that account the whole time spent in the idle task
  372. * (outside irq) as idle time can rely on this and just implement
  373. * vtime_account_system() and vtime_account_idle(). Archs that
  374. * have other meaning of the idle time (s390 only includes the
  375. * time spent by the CPU when it's in low power mode) must override
  376. * vtime_account().
  377. */
  378. #ifndef __ARCH_HAS_VTIME_ACCOUNT
  379. void vtime_account(struct task_struct *tsk)
  380. {
  381. unsigned long flags;
  382. local_irq_save(flags);
  383. if (in_interrupt() || !is_idle_task(tsk))
  384. vtime_account_system(tsk);
  385. else
  386. vtime_account_idle(tsk);
  387. local_irq_restore(flags);
  388. }
  389. EXPORT_SYMBOL_GPL(vtime_account);
  390. #endif /* __ARCH_HAS_VTIME_ACCOUNT */
  391. #else
  392. #ifndef nsecs_to_cputime
  393. # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
  394. #endif
  395. static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
  396. {
  397. u64 temp = (__force u64) rtime;
  398. temp *= (__force u64) utime;
  399. if (sizeof(cputime_t) == 4)
  400. temp = div_u64(temp, (__force u32) total);
  401. else
  402. temp = div64_u64(temp, (__force u64) total);
  403. return (__force cputime_t) temp;
  404. }
  405. void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  406. {
  407. cputime_t rtime, utime = p->utime, total = utime + p->stime;
  408. /*
  409. * Use CFS's precise accounting:
  410. */
  411. rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
  412. if (total)
  413. utime = scale_utime(utime, rtime, total);
  414. else
  415. utime = rtime;
  416. /*
  417. * Compare with previous values, to keep monotonicity:
  418. */
  419. p->prev_utime = max(p->prev_utime, utime);
  420. p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
  421. *ut = p->prev_utime;
  422. *st = p->prev_stime;
  423. }
  424. /*
  425. * Must be called with siglock held.
  426. */
  427. void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  428. {
  429. struct signal_struct *sig = p->signal;
  430. struct task_cputime cputime;
  431. cputime_t rtime, utime, total;
  432. thread_group_cputime(p, &cputime);
  433. total = cputime.utime + cputime.stime;
  434. rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
  435. if (total)
  436. utime = scale_utime(cputime.utime, rtime, total);
  437. else
  438. utime = rtime;
  439. sig->prev_utime = max(sig->prev_utime, utime);
  440. sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
  441. *ut = sig->prev_utime;
  442. *st = sig->prev_stime;
  443. }
  444. #endif