cputime.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. #include <linux/export.h>
  2. #include <linux/sched.h>
  3. #include <linux/tsacct_kern.h>
  4. #include <linux/kernel_stat.h>
  5. #include <linux/static_key.h>
  6. #include "sched.h"
  7. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  8. /*
  9. * There are no locks covering percpu hardirq/softirq time.
  10. * They are only modified in vtime_account, on corresponding CPU
  11. * with interrupts disabled. So, writes are safe.
  12. * They are read and saved off onto struct rq in update_rq_clock().
  13. * This may result in other CPU reading this CPU's irq time and can
  14. * race with irq/vtime_account on this CPU. We would either get old
  15. * or new value with a side effect of accounting a slice of irq time to wrong
  16. * task when irq is in progress while we read rq->clock. That is a worthy
  17. * compromise in place of having locks on each irq in account_system_time.
  18. */
  19. DEFINE_PER_CPU(u64, cpu_hardirq_time);
  20. DEFINE_PER_CPU(u64, cpu_softirq_time);
  21. static DEFINE_PER_CPU(u64, irq_start_time);
  22. static int sched_clock_irqtime;
  23. void enable_sched_clock_irqtime(void)
  24. {
  25. sched_clock_irqtime = 1;
  26. }
  27. void disable_sched_clock_irqtime(void)
  28. {
  29. sched_clock_irqtime = 0;
  30. }
  31. #ifndef CONFIG_64BIT
  32. DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  33. #endif /* CONFIG_64BIT */
  34. /*
  35. * Called before incrementing preempt_count on {soft,}irq_enter
  36. * and before decrementing preempt_count on {soft,}irq_exit.
  37. */
  38. void irqtime_account_irq(struct task_struct *curr)
  39. {
  40. unsigned long flags;
  41. s64 delta;
  42. int cpu;
  43. if (!sched_clock_irqtime)
  44. return;
  45. local_irq_save(flags);
  46. cpu = smp_processor_id();
  47. delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
  48. __this_cpu_add(irq_start_time, delta);
  49. irq_time_write_begin();
  50. /*
  51. * We do not account for softirq time from ksoftirqd here.
  52. * We want to continue accounting softirq time to ksoftirqd thread
  53. * in that case, so as not to confuse scheduler with a special task
  54. * that do not consume any time, but still wants to run.
  55. */
  56. if (hardirq_count())
  57. __this_cpu_add(cpu_hardirq_time, delta);
  58. else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  59. __this_cpu_add(cpu_softirq_time, delta);
  60. irq_time_write_end();
  61. local_irq_restore(flags);
  62. }
  63. EXPORT_SYMBOL_GPL(irqtime_account_irq);
  64. static int irqtime_account_hi_update(void)
  65. {
  66. u64 *cpustat = kcpustat_this_cpu->cpustat;
  67. unsigned long flags;
  68. u64 latest_ns;
  69. int ret = 0;
  70. local_irq_save(flags);
  71. latest_ns = this_cpu_read(cpu_hardirq_time);
  72. if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
  73. ret = 1;
  74. local_irq_restore(flags);
  75. return ret;
  76. }
  77. static int irqtime_account_si_update(void)
  78. {
  79. u64 *cpustat = kcpustat_this_cpu->cpustat;
  80. unsigned long flags;
  81. u64 latest_ns;
  82. int ret = 0;
  83. local_irq_save(flags);
  84. latest_ns = this_cpu_read(cpu_softirq_time);
  85. if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
  86. ret = 1;
  87. local_irq_restore(flags);
  88. return ret;
  89. }
  90. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  91. #define sched_clock_irqtime (0)
  92. #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
  93. static inline void task_group_account_field(struct task_struct *p, int index,
  94. u64 tmp)
  95. {
  96. #ifdef CONFIG_CGROUP_CPUACCT
  97. struct kernel_cpustat *kcpustat;
  98. struct cpuacct *ca;
  99. #endif
  100. /*
  101. * Since all updates are sure to touch the root cgroup, we
  102. * get ourselves ahead and touch it first. If the root cgroup
  103. * is the only cgroup, then nothing else should be necessary.
  104. *
  105. */
  106. __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
  107. #ifdef CONFIG_CGROUP_CPUACCT
  108. if (unlikely(!cpuacct_subsys.active))
  109. return;
  110. rcu_read_lock();
  111. ca = task_ca(p);
  112. while (ca && (ca != &root_cpuacct)) {
  113. kcpustat = this_cpu_ptr(ca->cpustat);
  114. kcpustat->cpustat[index] += tmp;
  115. ca = parent_ca(ca);
  116. }
  117. rcu_read_unlock();
  118. #endif
  119. }
  120. /*
  121. * Account user cpu time to a process.
  122. * @p: the process that the cpu time gets accounted to
  123. * @cputime: the cpu time spent in user space since the last update
  124. * @cputime_scaled: cputime scaled by cpu frequency
  125. */
  126. void account_user_time(struct task_struct *p, cputime_t cputime,
  127. cputime_t cputime_scaled)
  128. {
  129. int index;
  130. /* Add user time to process. */
  131. p->utime += cputime;
  132. p->utimescaled += cputime_scaled;
  133. account_group_user_time(p, cputime);
  134. index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
  135. /* Add user time to cpustat. */
  136. task_group_account_field(p, index, (__force u64) cputime);
  137. /* Account for user time used */
  138. acct_update_integrals(p);
  139. }
  140. /*
  141. * Account guest cpu time to a process.
  142. * @p: the process that the cpu time gets accounted to
  143. * @cputime: the cpu time spent in virtual machine since the last update
  144. * @cputime_scaled: cputime scaled by cpu frequency
  145. */
  146. static void account_guest_time(struct task_struct *p, cputime_t cputime,
  147. cputime_t cputime_scaled)
  148. {
  149. u64 *cpustat = kcpustat_this_cpu->cpustat;
  150. /* Add guest time to process. */
  151. p->utime += cputime;
  152. p->utimescaled += cputime_scaled;
  153. account_group_user_time(p, cputime);
  154. p->gtime += cputime;
  155. /* Add guest time to cpustat. */
  156. if (TASK_NICE(p) > 0) {
  157. cpustat[CPUTIME_NICE] += (__force u64) cputime;
  158. cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
  159. } else {
  160. cpustat[CPUTIME_USER] += (__force u64) cputime;
  161. cpustat[CPUTIME_GUEST] += (__force u64) cputime;
  162. }
  163. }
  164. /*
  165. * Account system cpu time to a process and desired cpustat field
  166. * @p: the process that the cpu time gets accounted to
  167. * @cputime: the cpu time spent in kernel space since the last update
  168. * @cputime_scaled: cputime scaled by cpu frequency
  169. * @target_cputime64: pointer to cpustat field that has to be updated
  170. */
  171. static inline
  172. void __account_system_time(struct task_struct *p, cputime_t cputime,
  173. cputime_t cputime_scaled, int index)
  174. {
  175. /* Add system time to process. */
  176. p->stime += cputime;
  177. p->stimescaled += cputime_scaled;
  178. account_group_system_time(p, cputime);
  179. /* Add system time to cpustat. */
  180. task_group_account_field(p, index, (__force u64) cputime);
  181. /* Account for system time used */
  182. acct_update_integrals(p);
  183. }
  184. /*
  185. * Account system cpu time to a process.
  186. * @p: the process that the cpu time gets accounted to
  187. * @hardirq_offset: the offset to subtract from hardirq_count()
  188. * @cputime: the cpu time spent in kernel space since the last update
  189. * @cputime_scaled: cputime scaled by cpu frequency
  190. */
  191. void account_system_time(struct task_struct *p, int hardirq_offset,
  192. cputime_t cputime, cputime_t cputime_scaled)
  193. {
  194. int index;
  195. if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
  196. account_guest_time(p, cputime, cputime_scaled);
  197. return;
  198. }
  199. if (hardirq_count() - hardirq_offset)
  200. index = CPUTIME_IRQ;
  201. else if (in_serving_softirq())
  202. index = CPUTIME_SOFTIRQ;
  203. else
  204. index = CPUTIME_SYSTEM;
  205. __account_system_time(p, cputime, cputime_scaled, index);
  206. }
  207. /*
  208. * Account for involuntary wait time.
  209. * @cputime: the cpu time spent in involuntary wait
  210. */
  211. void account_steal_time(cputime_t cputime)
  212. {
  213. u64 *cpustat = kcpustat_this_cpu->cpustat;
  214. cpustat[CPUTIME_STEAL] += (__force u64) cputime;
  215. }
  216. /*
  217. * Account for idle time.
  218. * @cputime: the cpu time spent in idle wait
  219. */
  220. void account_idle_time(cputime_t cputime)
  221. {
  222. u64 *cpustat = kcpustat_this_cpu->cpustat;
  223. struct rq *rq = this_rq();
  224. if (atomic_read(&rq->nr_iowait) > 0)
  225. cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
  226. else
  227. cpustat[CPUTIME_IDLE] += (__force u64) cputime;
  228. }
  229. static __always_inline bool steal_account_process_tick(void)
  230. {
  231. #ifdef CONFIG_PARAVIRT
  232. if (static_key_false(&paravirt_steal_enabled)) {
  233. u64 steal, st = 0;
  234. steal = paravirt_steal_clock(smp_processor_id());
  235. steal -= this_rq()->prev_steal_time;
  236. st = steal_ticks(steal);
  237. this_rq()->prev_steal_time += st * TICK_NSEC;
  238. account_steal_time(st);
  239. return st;
  240. }
  241. #endif
  242. return false;
  243. }
  244. /*
  245. * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  246. * tasks (sum on group iteration) belonging to @tsk's group.
  247. */
  248. void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
  249. {
  250. struct signal_struct *sig = tsk->signal;
  251. struct task_struct *t;
  252. times->utime = sig->utime;
  253. times->stime = sig->stime;
  254. times->sum_exec_runtime = sig->sum_sched_runtime;
  255. rcu_read_lock();
  256. /* make sure we can trust tsk->thread_group list */
  257. if (!likely(pid_alive(tsk)))
  258. goto out;
  259. t = tsk;
  260. do {
  261. times->utime += t->utime;
  262. times->stime += t->stime;
  263. times->sum_exec_runtime += task_sched_runtime(t);
  264. } while_each_thread(tsk, t);
  265. out:
  266. rcu_read_unlock();
  267. }
  268. #ifndef CONFIG_VIRT_CPU_ACCOUNTING
  269. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  270. /*
  271. * Account a tick to a process and cpustat
  272. * @p: the process that the cpu time gets accounted to
  273. * @user_tick: is the tick from userspace
  274. * @rq: the pointer to rq
  275. *
  276. * Tick demultiplexing follows the order
  277. * - pending hardirq update
  278. * - pending softirq update
  279. * - user_time
  280. * - idle_time
  281. * - system time
  282. * - check for guest_time
  283. * - else account as system_time
  284. *
  285. * Check for hardirq is done both for system and user time as there is
  286. * no timer going off while we are on hardirq and hence we may never get an
  287. * opportunity to update it solely in system time.
  288. * p->stime and friends are only updated on system time and not on irq
  289. * softirq as those do not count in task exec_runtime any more.
  290. */
  291. static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  292. struct rq *rq)
  293. {
  294. cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
  295. u64 *cpustat = kcpustat_this_cpu->cpustat;
  296. if (steal_account_process_tick())
  297. return;
  298. if (irqtime_account_hi_update()) {
  299. cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
  300. } else if (irqtime_account_si_update()) {
  301. cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
  302. } else if (this_cpu_ksoftirqd() == p) {
  303. /*
  304. * ksoftirqd time do not get accounted in cpu_softirq_time.
  305. * So, we have to handle it separately here.
  306. * Also, p->stime needs to be updated for ksoftirqd.
  307. */
  308. __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
  309. CPUTIME_SOFTIRQ);
  310. } else if (user_tick) {
  311. account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
  312. } else if (p == rq->idle) {
  313. account_idle_time(cputime_one_jiffy);
  314. } else if (p->flags & PF_VCPU) { /* System time or guest time */
  315. account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
  316. } else {
  317. __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
  318. CPUTIME_SYSTEM);
  319. }
  320. }
  321. static void irqtime_account_idle_ticks(int ticks)
  322. {
  323. int i;
  324. struct rq *rq = this_rq();
  325. for (i = 0; i < ticks; i++)
  326. irqtime_account_process_tick(current, 0, rq);
  327. }
  328. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  329. static void irqtime_account_idle_ticks(int ticks) {}
  330. static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  331. struct rq *rq) {}
  332. #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  333. /*
  334. * Account a single tick of cpu time.
  335. * @p: the process that the cpu time gets accounted to
  336. * @user_tick: indicates if the tick is a user or a system tick
  337. */
  338. void account_process_tick(struct task_struct *p, int user_tick)
  339. {
  340. cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
  341. struct rq *rq = this_rq();
  342. if (sched_clock_irqtime) {
  343. irqtime_account_process_tick(p, user_tick, rq);
  344. return;
  345. }
  346. if (steal_account_process_tick())
  347. return;
  348. if (user_tick)
  349. account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
  350. else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
  351. account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
  352. one_jiffy_scaled);
  353. else
  354. account_idle_time(cputime_one_jiffy);
  355. }
  356. /*
  357. * Account multiple ticks of steal time.
  358. * @p: the process from which the cpu time has been stolen
  359. * @ticks: number of stolen ticks
  360. */
  361. void account_steal_ticks(unsigned long ticks)
  362. {
  363. account_steal_time(jiffies_to_cputime(ticks));
  364. }
  365. /*
  366. * Account multiple ticks of idle time.
  367. * @ticks: number of stolen ticks
  368. */
  369. void account_idle_ticks(unsigned long ticks)
  370. {
  371. if (sched_clock_irqtime) {
  372. irqtime_account_idle_ticks(ticks);
  373. return;
  374. }
  375. account_idle_time(jiffies_to_cputime(ticks));
  376. }
  377. #endif
  378. /*
  379. * Use precise platform statistics if available:
  380. */
  381. #ifdef CONFIG_VIRT_CPU_ACCOUNTING
  382. void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  383. {
  384. *ut = p->utime;
  385. *st = p->stime;
  386. }
  387. void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  388. {
  389. struct task_cputime cputime;
  390. thread_group_cputime(p, &cputime);
  391. *ut = cputime.utime;
  392. *st = cputime.stime;
  393. }
  394. void vtime_account_system_irqsafe(struct task_struct *tsk)
  395. {
  396. unsigned long flags;
  397. local_irq_save(flags);
  398. vtime_account_system(tsk);
  399. local_irq_restore(flags);
  400. }
  401. EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
  402. #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
  403. void vtime_task_switch(struct task_struct *prev)
  404. {
  405. if (is_idle_task(prev))
  406. vtime_account_idle(prev);
  407. else
  408. vtime_account_system(prev);
  409. vtime_account_user(prev);
  410. arch_vtime_task_switch(prev);
  411. }
  412. #endif
  413. /*
  414. * Archs that account the whole time spent in the idle task
  415. * (outside irq) as idle time can rely on this and just implement
  416. * vtime_account_system() and vtime_account_idle(). Archs that
  417. * have other meaning of the idle time (s390 only includes the
  418. * time spent by the CPU when it's in low power mode) must override
  419. * vtime_account().
  420. */
  421. #ifndef __ARCH_HAS_VTIME_ACCOUNT
  422. void vtime_account(struct task_struct *tsk)
  423. {
  424. if (in_interrupt() || !is_idle_task(tsk))
  425. vtime_account_system(tsk);
  426. else
  427. vtime_account_idle(tsk);
  428. }
  429. EXPORT_SYMBOL_GPL(vtime_account);
  430. #endif /* __ARCH_HAS_VTIME_ACCOUNT */
  431. #else
  432. #ifndef nsecs_to_cputime
  433. # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
  434. #endif
  435. static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
  436. {
  437. u64 temp = (__force u64) rtime;
  438. temp *= (__force u64) utime;
  439. if (sizeof(cputime_t) == 4)
  440. temp = div_u64(temp, (__force u32) total);
  441. else
  442. temp = div64_u64(temp, (__force u64) total);
  443. return (__force cputime_t) temp;
  444. }
  445. /*
  446. * Adjust tick based cputime random precision against scheduler
  447. * runtime accounting.
  448. */
  449. static void cputime_adjust(struct task_cputime *curr,
  450. struct cputime *prev,
  451. cputime_t *ut, cputime_t *st)
  452. {
  453. cputime_t rtime, utime, total;
  454. utime = curr->utime;
  455. total = utime + curr->stime;
  456. /*
  457. * Tick based cputime accounting depend on random scheduling
  458. * timeslices of a task to be interrupted or not by the timer.
  459. * Depending on these circumstances, the number of these interrupts
  460. * may be over or under-optimistic, matching the real user and system
  461. * cputime with a variable precision.
  462. *
  463. * Fix this by scaling these tick based values against the total
  464. * runtime accounted by the CFS scheduler.
  465. */
  466. rtime = nsecs_to_cputime(curr->sum_exec_runtime);
  467. if (total)
  468. utime = scale_utime(utime, rtime, total);
  469. else
  470. utime = rtime;
  471. /*
  472. * If the tick based count grows faster than the scheduler one,
  473. * the result of the scaling may go backward.
  474. * Let's enforce monotonicity.
  475. */
  476. prev->utime = max(prev->utime, utime);
  477. prev->stime = max(prev->stime, rtime - prev->utime);
  478. *ut = prev->utime;
  479. *st = prev->stime;
  480. }
  481. void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  482. {
  483. struct task_cputime cputime = {
  484. .utime = p->utime,
  485. .stime = p->stime,
  486. .sum_exec_runtime = p->se.sum_exec_runtime,
  487. };
  488. cputime_adjust(&cputime, &p->prev_cputime, ut, st);
  489. }
  490. /*
  491. * Must be called with siglock held.
  492. */
  493. void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  494. {
  495. struct task_cputime cputime;
  496. thread_group_cputime(p, &cputime);
  497. cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
  498. }
  499. #endif