vmitime.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. /*
  2. * VMI paravirtual timer support routines.
  3. *
  4. * Copyright (C) 2005, VMware, Inc.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  14. * NON INFRINGEMENT. See the GNU General Public License for more
  15. * details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20. *
  21. * Send feedback to dhecht@vmware.com
  22. *
  23. */
  24. /*
  25. * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
  26. * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
  27. * See comments there for proper credits.
  28. */
  29. #include <linux/spinlock.h>
  30. #include <linux/init.h>
  31. #include <linux/errno.h>
  32. #include <linux/jiffies.h>
  33. #include <linux/interrupt.h>
  34. #include <linux/kernel_stat.h>
  35. #include <linux/rcupdate.h>
  36. #include <linux/clocksource.h>
  37. #include <asm/timer.h>
  38. #include <asm/io.h>
  39. #include <asm/apic.h>
  40. #include <asm/div64.h>
  41. #include <asm/timer.h>
  42. #include <asm/desc.h>
  43. #include <asm/vmi.h>
  44. #include <asm/vmi_time.h>
  45. #include <mach_timer.h>
  46. #include <io_ports.h>
  47. #ifdef CONFIG_X86_LOCAL_APIC
  48. #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
  49. #else
  50. #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
  51. #endif
  52. /* Cached VMI operations */
  53. struct vmi_timer_ops vmi_timer_ops;
  54. #ifdef CONFIG_NO_IDLE_HZ
  55. /* /proc/sys/kernel/hz_timer state. */
  56. int sysctl_hz_timer;
  57. /* Some stats */
  58. static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
  59. static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
  60. static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
  61. #endif /* CONFIG_NO_IDLE_HZ */
  62. /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
  63. static int alarm_hz = CONFIG_VMI_ALARM_HZ;
  64. /* Cache of the value get_cycle_frequency / HZ. */
  65. static signed long long cycles_per_jiffy;
  66. /* Cache of the value get_cycle_frequency / alarm_hz. */
  67. static signed long long cycles_per_alarm;
  68. /* The number of cycles accounted for by the 'jiffies'/'xtime' count.
  69. * Protected by xtime_lock. */
  70. static unsigned long long real_cycles_accounted_system;
  71. /* The number of cycles accounted for by update_process_times(), per cpu. */
  72. static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
  73. /* The number of stolen cycles accounted, per cpu. */
  74. static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
  75. /* Clock source. */
  76. static cycle_t read_real_cycles(void)
  77. {
  78. return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
  79. }
  80. static cycle_t read_available_cycles(void)
  81. {
  82. return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
  83. }
  84. #if 0
  85. static cycle_t read_stolen_cycles(void)
  86. {
  87. return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
  88. }
  89. #endif /* 0 */
  90. static struct clocksource clocksource_vmi = {
  91. .name = "vmi-timer",
  92. .rating = 450,
  93. .read = read_real_cycles,
  94. .mask = CLOCKSOURCE_MASK(64),
  95. .mult = 0, /* to be set */
  96. .shift = 22,
  97. .flags = CLOCK_SOURCE_IS_CONTINUOUS,
  98. };
  99. /* Timer interrupt handler. */
  100. static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
  101. static struct irqaction vmi_timer_irq = {
  102. .handler = vmi_timer_interrupt,
  103. .flags = IRQF_DISABLED,
  104. .mask = CPU_MASK_NONE,
  105. .name = "VMI-alarm",
  106. };
  107. /* Alarm rate */
  108. static int __init vmi_timer_alarm_rate_setup(char* str)
  109. {
  110. int alarm_rate;
  111. if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
  112. alarm_hz = alarm_rate;
  113. printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
  114. }
  115. return 1;
  116. }
  117. __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
  118. /* Initialization */
  119. static void vmi_get_wallclock_ts(struct timespec *ts)
  120. {
  121. unsigned long long wallclock;
  122. wallclock = vmi_timer_ops.get_wallclock(); // nsec units
  123. ts->tv_nsec = do_div(wallclock, 1000000000);
  124. ts->tv_sec = wallclock;
  125. }
  126. unsigned long vmi_get_wallclock(void)
  127. {
  128. struct timespec ts;
  129. vmi_get_wallclock_ts(&ts);
  130. return ts.tv_sec;
  131. }
  132. int vmi_set_wallclock(unsigned long now)
  133. {
  134. return -1;
  135. }
  136. unsigned long long vmi_get_sched_cycles(void)
  137. {
  138. return read_available_cycles();
  139. }
  140. unsigned long vmi_cpu_khz(void)
  141. {
  142. unsigned long long khz;
  143. khz = vmi_timer_ops.get_cycle_frequency();
  144. (void)do_div(khz, 1000);
  145. return khz;
  146. }
  147. void __init vmi_time_init(void)
  148. {
  149. unsigned long long cycles_per_sec, cycles_per_msec;
  150. unsigned long flags;
  151. local_irq_save(flags);
  152. setup_irq(0, &vmi_timer_irq);
  153. #ifdef CONFIG_X86_LOCAL_APIC
  154. set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
  155. #endif
  156. real_cycles_accounted_system = read_real_cycles();
  157. per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
  158. cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
  159. cycles_per_jiffy = cycles_per_sec;
  160. (void)do_div(cycles_per_jiffy, HZ);
  161. cycles_per_alarm = cycles_per_sec;
  162. (void)do_div(cycles_per_alarm, alarm_hz);
  163. cycles_per_msec = cycles_per_sec;
  164. (void)do_div(cycles_per_msec, 1000);
  165. printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
  166. "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
  167. cycles_per_alarm);
  168. clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
  169. clocksource_vmi.shift);
  170. if (clocksource_register(&clocksource_vmi))
  171. printk(KERN_WARNING "Error registering VMITIME clocksource.");
  172. /* Disable PIT. */
  173. outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
  174. /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
  175. * reduce the latency calling update_process_times. */
  176. vmi_timer_ops.set_alarm(
  177. VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
  178. per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
  179. cycles_per_alarm);
  180. local_irq_restore(flags);
  181. }
  182. #ifdef CONFIG_X86_LOCAL_APIC
  183. void __init vmi_timer_setup_boot_alarm(void)
  184. {
  185. local_irq_disable();
  186. /* Route the interrupt to the correct vector. */
  187. apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
  188. /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
  189. vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
  190. vmi_timer_ops.set_alarm(
  191. VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
  192. per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
  193. cycles_per_alarm);
  194. local_irq_enable();
  195. }
  196. /* Initialize the time accounting variables for an AP on an SMP system.
  197. * Also, set the local alarm for the AP. */
  198. void __devinit vmi_timer_setup_secondary_alarm(void)
  199. {
  200. int cpu = smp_processor_id();
  201. /* Route the interrupt to the correct vector. */
  202. apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
  203. per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
  204. vmi_timer_ops.set_alarm(
  205. VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
  206. per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
  207. cycles_per_alarm);
  208. }
  209. #endif
  210. /* Update system wide (real) time accounting (e.g. jiffies, xtime). */
  211. static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
  212. {
  213. long long cycles_not_accounted;
  214. write_seqlock(&xtime_lock);
  215. cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
  216. while (cycles_not_accounted >= cycles_per_jiffy) {
  217. /* systems wide jiffies. */
  218. do_timer(1);
  219. cycles_not_accounted -= cycles_per_jiffy;
  220. real_cycles_accounted_system += cycles_per_jiffy;
  221. }
  222. write_sequnlock(&xtime_lock);
  223. }
  224. /* Update per-cpu process times. */
  225. static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
  226. unsigned long long cur_process_times_cycles)
  227. {
  228. long long cycles_not_accounted;
  229. cycles_not_accounted = cur_process_times_cycles -
  230. per_cpu(process_times_cycles_accounted_cpu, cpu);
  231. while (cycles_not_accounted >= cycles_per_jiffy) {
  232. /* Account time to the current process. This includes
  233. * calling into the scheduler to decrement the timeslice
  234. * and possibly reschedule.*/
  235. update_process_times(user_mode(regs));
  236. /* XXX handle /proc/profile multiplier. */
  237. profile_tick(CPU_PROFILING);
  238. cycles_not_accounted -= cycles_per_jiffy;
  239. per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
  240. }
  241. }
  242. #ifdef CONFIG_NO_IDLE_HZ
  243. /* Update per-cpu idle times. Used when a no-hz halt is ended. */
  244. static void vmi_account_no_hz_idle_cycles(int cpu,
  245. unsigned long long cur_process_times_cycles)
  246. {
  247. long long cycles_not_accounted;
  248. unsigned long no_idle_hz_jiffies = 0;
  249. cycles_not_accounted = cur_process_times_cycles -
  250. per_cpu(process_times_cycles_accounted_cpu, cpu);
  251. while (cycles_not_accounted >= cycles_per_jiffy) {
  252. no_idle_hz_jiffies++;
  253. cycles_not_accounted -= cycles_per_jiffy;
  254. per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
  255. }
  256. /* Account time to the idle process. */
  257. account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
  258. }
  259. #endif
  260. /* Update per-cpu stolen time. */
  261. static void vmi_account_stolen_cycles(int cpu,
  262. unsigned long long cur_real_cycles,
  263. unsigned long long cur_avail_cycles)
  264. {
  265. long long stolen_cycles_not_accounted;
  266. unsigned long stolen_jiffies = 0;
  267. if (cur_real_cycles < cur_avail_cycles)
  268. return;
  269. stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
  270. per_cpu(stolen_cycles_accounted_cpu, cpu);
  271. while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
  272. stolen_jiffies++;
  273. stolen_cycles_not_accounted -= cycles_per_jiffy;
  274. per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
  275. }
  276. /* HACK: pass NULL to force time onto cpustat->steal. */
  277. account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
  278. }
  279. /* Body of either IRQ0 interrupt handler (UP no local-APIC) or
  280. * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
  281. static void vmi_local_timer_interrupt(int cpu)
  282. {
  283. unsigned long long cur_real_cycles, cur_process_times_cycles;
  284. cur_real_cycles = read_real_cycles();
  285. cur_process_times_cycles = read_available_cycles();
  286. /* Update system wide (real) time state (xtime, jiffies). */
  287. vmi_account_real_cycles(cur_real_cycles);
  288. /* Update per-cpu process times. */
  289. vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
  290. /* Update time stolen from this cpu by the hypervisor. */
  291. vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
  292. }
  293. #ifdef CONFIG_NO_IDLE_HZ
  294. /* Must be called only from idle loop, with interrupts disabled. */
  295. int vmi_stop_hz_timer(void)
  296. {
  297. /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
  298. unsigned long seq, next;
  299. unsigned long long real_cycles_expiry;
  300. int cpu = smp_processor_id();
  301. BUG_ON(!irqs_disabled());
  302. if (sysctl_hz_timer != 0)
  303. return 0;
  304. cpu_set(cpu, nohz_cpu_mask);
  305. smp_mb();
  306. if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
  307. (next = next_timer_interrupt(),
  308. time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
  309. cpu_clear(cpu, nohz_cpu_mask);
  310. return 0;
  311. }
  312. /* Convert jiffies to the real cycle counter. */
  313. do {
  314. seq = read_seqbegin(&xtime_lock);
  315. real_cycles_expiry = real_cycles_accounted_system +
  316. (long)(next - jiffies) * cycles_per_jiffy;
  317. } while (read_seqretry(&xtime_lock, seq));
  318. /* This cpu is going idle. Disable the periodic alarm. */
  319. vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
  320. per_cpu(idle_start_jiffies, cpu) = jiffies;
  321. /* Set the real time alarm to expire at the next event. */
  322. vmi_timer_ops.set_alarm(
  323. VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
  324. real_cycles_expiry, 0);
  325. return 1;
  326. }
  327. static void vmi_reenable_hz_timer(int cpu)
  328. {
  329. /* For /proc/vmi/info idle_hz stat. */
  330. per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
  331. per_cpu(vmi_idle_no_hz_irqs, cpu)++;
  332. /* Don't bother explicitly cancelling the one-shot alarm -- at
  333. * worse we will receive a spurious timer interrupt. */
  334. vmi_timer_ops.set_alarm(
  335. VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
  336. per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
  337. cycles_per_alarm);
  338. /* Indicate this cpu is no longer nohz idle. */
  339. cpu_clear(cpu, nohz_cpu_mask);
  340. }
  341. /* Called from interrupt handlers when (local) HZ timer is disabled. */
  342. void vmi_account_time_restart_hz_timer(void)
  343. {
  344. unsigned long long cur_real_cycles, cur_process_times_cycles;
  345. int cpu = smp_processor_id();
  346. BUG_ON(!irqs_disabled());
  347. /* Account the time during which the HZ timer was disabled. */
  348. cur_real_cycles = read_real_cycles();
  349. cur_process_times_cycles = read_available_cycles();
  350. /* Update system wide (real) time state (xtime, jiffies). */
  351. vmi_account_real_cycles(cur_real_cycles);
  352. /* Update per-cpu idle times. */
  353. vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
  354. /* Update time stolen from this cpu by the hypervisor. */
  355. vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
  356. /* Reenable the hz timer. */
  357. vmi_reenable_hz_timer(cpu);
  358. }
  359. #endif /* CONFIG_NO_IDLE_HZ */
  360. /* UP (and no local-APIC) VMI-timer alarm interrupt handler.
  361. * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
  362. * APIC setup and setup_boot_vmi_alarm() is called. */
  363. static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
  364. {
  365. vmi_local_timer_interrupt(smp_processor_id());
  366. return IRQ_HANDLED;
  367. }
  368. #ifdef CONFIG_X86_LOCAL_APIC
  369. /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
  370. * Also used in UP when CONFIG_X86_LOCAL_APIC.
  371. * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
  372. void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
  373. {
  374. struct pt_regs *old_regs = set_irq_regs(regs);
  375. int cpu = smp_processor_id();
  376. /*
  377. * the NMI deadlock-detector uses this.
  378. */
  379. per_cpu(irq_stat,cpu).apic_timer_irqs++;
  380. /*
  381. * NOTE! We'd better ACK the irq immediately,
  382. * because timer handling can be slow.
  383. */
  384. ack_APIC_irq();
  385. /*
  386. * update_process_times() expects us to have done irq_enter().
  387. * Besides, if we don't timer interrupts ignore the global
  388. * interrupt lock, which is the WrongThing (tm) to do.
  389. */
  390. irq_enter();
  391. vmi_local_timer_interrupt(cpu);
  392. irq_exit();
  393. set_irq_regs(old_regs);
  394. }
  395. #endif /* CONFIG_X86_LOCAL_APIC */