timer_tsc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /*
  2. * This code largely moved from arch/i386/kernel/time.c.
  3. * See comments there for proper credits.
  4. *
  5. * 2004-06-25 Jesper Juhl
  6. * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
  7. * failing to inline.
  8. */
  9. #include <linux/spinlock.h>
  10. #include <linux/init.h>
  11. #include <linux/timex.h>
  12. #include <linux/errno.h>
  13. #include <linux/cpufreq.h>
  14. #include <linux/string.h>
  15. #include <linux/jiffies.h>
  16. #include <asm/timer.h>
  17. #include <asm/io.h>
  18. /* processor.h for distable_tsc flag */
  19. #include <asm/processor.h>
  20. #include "io_ports.h"
  21. #include "mach_timer.h"
  22. #include <asm/hpet.h>
  23. #include <asm/i8253.h>
  24. #ifdef CONFIG_HPET_TIMER
  25. static unsigned long hpet_usec_quotient;
  26. static unsigned long hpet_last;
  27. static struct timer_opts timer_tsc;
  28. #endif
  29. static inline void cpufreq_delayed_get(void);
  30. int tsc_disable __devinitdata = 0;
  31. static int use_tsc;
  32. /* Number of usecs that the last interrupt was delayed */
  33. static int delay_at_last_interrupt;
  34. static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
  35. static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
  36. static unsigned long long monotonic_base;
  37. static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
  38. /* Avoid compensating for lost ticks before TSCs are synched */
  39. static int detect_lost_ticks;
  40. static int __init start_lost_tick_compensation(void)
  41. {
  42. detect_lost_ticks = 1;
  43. return 0;
  44. }
  45. late_initcall(start_lost_tick_compensation);
  46. /* convert from cycles(64bits) => nanoseconds (64bits)
  47. * basic equation:
  48. * ns = cycles / (freq / ns_per_sec)
  49. * ns = cycles * (ns_per_sec / freq)
  50. * ns = cycles * (10^9 / (cpu_khz * 10^3))
  51. * ns = cycles * (10^6 / cpu_khz)
  52. *
  53. * Then we use scaling math (suggested by george@mvista.com) to get:
  54. * ns = cycles * (10^6 * SC / cpu_khz) / SC
  55. * ns = cycles * cyc2ns_scale / SC
  56. *
  57. * And since SC is a constant power of two, we can convert the div
  58. * into a shift.
  59. *
  60. * We can use khz divisor instead of mhz to keep a better percision, since
  61. * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
  62. * (mathieu.desnoyers@polymtl.ca)
  63. *
  64. * -johnstul@us.ibm.com "math is hard, lets go shopping!"
  65. */
  66. static unsigned long cyc2ns_scale __read_mostly;
  67. #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
  68. static inline void set_cyc2ns_scale(unsigned long cpu_khz)
  69. {
  70. cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
  71. }
  72. static inline unsigned long long cycles_2_ns(unsigned long long cyc)
  73. {
  74. return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
  75. }
  76. static int count2; /* counter for mark_offset_tsc() */
  77. /* Cached *multiplier* to convert TSC counts to microseconds.
  78. * (see the equation below).
  79. * Equal to 2^32 * (1 / (clocks per usec) ).
  80. * Initialized in time_init.
  81. */
  82. static unsigned long fast_gettimeoffset_quotient;
  83. static unsigned long get_offset_tsc(void)
  84. {
  85. register unsigned long eax, edx;
  86. /* Read the Time Stamp Counter */
  87. rdtsc(eax,edx);
  88. /* .. relative to previous jiffy (32 bits is enough) */
  89. eax -= last_tsc_low; /* tsc_low delta */
  90. /*
  91. * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
  92. * = (tsc_low delta) * (usecs_per_clock)
  93. * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
  94. *
  95. * Using a mull instead of a divl saves up to 31 clock cycles
  96. * in the critical path.
  97. */
  98. __asm__("mull %2"
  99. :"=a" (eax), "=d" (edx)
  100. :"rm" (fast_gettimeoffset_quotient),
  101. "0" (eax));
  102. /* our adjusted time offset in microseconds */
  103. return delay_at_last_interrupt + edx;
  104. }
  105. static unsigned long long monotonic_clock_tsc(void)
  106. {
  107. unsigned long long last_offset, this_offset, base;
  108. unsigned seq;
  109. /* atomically read monotonic base & last_offset */
  110. do {
  111. seq = read_seqbegin(&monotonic_lock);
  112. last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
  113. base = monotonic_base;
  114. } while (read_seqretry(&monotonic_lock, seq));
  115. /* Read the Time Stamp Counter */
  116. rdtscll(this_offset);
  117. /* return the value in ns */
  118. return base + cycles_2_ns(this_offset - last_offset);
  119. }
  120. /*
  121. * Scheduler clock - returns current time in nanosec units.
  122. */
  123. unsigned long long sched_clock(void)
  124. {
  125. unsigned long long this_offset;
  126. /*
  127. * In the NUMA case we dont use the TSC as they are not
  128. * synchronized across all CPUs.
  129. */
  130. #ifndef CONFIG_NUMA
  131. if (!use_tsc)
  132. #endif
  133. /* no locking but a rare wrong value is not a big deal */
  134. return jiffies_64 * (1000000000 / HZ);
  135. /* Read the Time Stamp Counter */
  136. rdtscll(this_offset);
  137. /* return the value in ns */
  138. return cycles_2_ns(this_offset);
  139. }
  140. static void delay_tsc(unsigned long loops)
  141. {
  142. unsigned long bclock, now;
  143. rdtscl(bclock);
  144. do
  145. {
  146. rep_nop();
  147. rdtscl(now);
  148. } while ((now-bclock) < loops);
  149. }
  150. #ifdef CONFIG_HPET_TIMER
  151. static void mark_offset_tsc_hpet(void)
  152. {
  153. unsigned long long this_offset, last_offset;
  154. unsigned long offset, temp, hpet_current;
  155. write_seqlock(&monotonic_lock);
  156. last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
  157. /*
  158. * It is important that these two operations happen almost at
  159. * the same time. We do the RDTSC stuff first, since it's
  160. * faster. To avoid any inconsistencies, we need interrupts
  161. * disabled locally.
  162. */
  163. /*
  164. * Interrupts are just disabled locally since the timer irq
  165. * has the SA_INTERRUPT flag set. -arca
  166. */
  167. /* read Pentium cycle counter */
  168. hpet_current = hpet_readl(HPET_COUNTER);
  169. rdtsc(last_tsc_low, last_tsc_high);
  170. /* lost tick compensation */
  171. offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
  172. if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
  173. && detect_lost_ticks) {
  174. int lost_ticks = (offset - hpet_last) / hpet_tick;
  175. jiffies_64 += lost_ticks;
  176. }
  177. hpet_last = hpet_current;
  178. /* update the monotonic base value */
  179. this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
  180. monotonic_base += cycles_2_ns(this_offset - last_offset);
  181. write_sequnlock(&monotonic_lock);
  182. /* calculate delay_at_last_interrupt */
  183. /*
  184. * Time offset = (hpet delta) * ( usecs per HPET clock )
  185. * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
  186. * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
  187. * Where,
  188. * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
  189. */
  190. delay_at_last_interrupt = hpet_current - offset;
  191. ASM_MUL64_REG(temp, delay_at_last_interrupt,
  192. hpet_usec_quotient, delay_at_last_interrupt);
  193. }
  194. #endif
  195. #ifdef CONFIG_CPU_FREQ
  196. #include <linux/workqueue.h>
  197. static unsigned int cpufreq_delayed_issched = 0;
  198. static unsigned int cpufreq_init = 0;
  199. static struct work_struct cpufreq_delayed_get_work;
  200. static void handle_cpufreq_delayed_get(void *v)
  201. {
  202. unsigned int cpu;
  203. for_each_online_cpu(cpu) {
  204. cpufreq_get(cpu);
  205. }
  206. cpufreq_delayed_issched = 0;
  207. }
  208. /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
  209. * to verify the CPU frequency the timing core thinks the CPU is running
  210. * at is still correct.
  211. */
  212. static inline void cpufreq_delayed_get(void)
  213. {
  214. if (cpufreq_init && !cpufreq_delayed_issched) {
  215. cpufreq_delayed_issched = 1;
  216. printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
  217. schedule_work(&cpufreq_delayed_get_work);
  218. }
  219. }
  220. /* If the CPU frequency is scaled, TSC-based delays will need a different
  221. * loops_per_jiffy value to function properly.
  222. */
  223. static unsigned int ref_freq = 0;
  224. static unsigned long loops_per_jiffy_ref = 0;
  225. #ifndef CONFIG_SMP
  226. static unsigned long fast_gettimeoffset_ref = 0;
  227. static unsigned int cpu_khz_ref = 0;
  228. #endif
  229. static int
  230. time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
  231. void *data)
  232. {
  233. struct cpufreq_freqs *freq = data;
  234. if (val != CPUFREQ_RESUMECHANGE)
  235. write_seqlock_irq(&xtime_lock);
  236. if (!ref_freq) {
  237. if (!freq->old){
  238. ref_freq = freq->new;
  239. goto end;
  240. }
  241. ref_freq = freq->old;
  242. loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
  243. #ifndef CONFIG_SMP
  244. fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
  245. cpu_khz_ref = cpu_khz;
  246. #endif
  247. }
  248. if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
  249. (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
  250. (val == CPUFREQ_RESUMECHANGE)) {
  251. if (!(freq->flags & CPUFREQ_CONST_LOOPS))
  252. cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
  253. #ifndef CONFIG_SMP
  254. if (cpu_khz)
  255. cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
  256. if (use_tsc) {
  257. if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
  258. fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
  259. set_cyc2ns_scale(cpu_khz);
  260. }
  261. }
  262. #endif
  263. }
  264. end:
  265. if (val != CPUFREQ_RESUMECHANGE)
  266. write_sequnlock_irq(&xtime_lock);
  267. return 0;
  268. }
  269. static struct notifier_block time_cpufreq_notifier_block = {
  270. .notifier_call = time_cpufreq_notifier
  271. };
  272. static int __init cpufreq_tsc(void)
  273. {
  274. int ret;
  275. INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
  276. ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
  277. CPUFREQ_TRANSITION_NOTIFIER);
  278. if (!ret)
  279. cpufreq_init = 1;
  280. return ret;
  281. }
  282. core_initcall(cpufreq_tsc);
  283. #else /* CONFIG_CPU_FREQ */
  284. static inline void cpufreq_delayed_get(void) { return; }
  285. #endif
  286. int recalibrate_cpu_khz(void)
  287. {
  288. #ifndef CONFIG_SMP
  289. unsigned int cpu_khz_old = cpu_khz;
  290. if (cpu_has_tsc) {
  291. local_irq_disable();
  292. init_cpu_khz();
  293. local_irq_enable();
  294. cpu_data[0].loops_per_jiffy =
  295. cpufreq_scale(cpu_data[0].loops_per_jiffy,
  296. cpu_khz_old,
  297. cpu_khz);
  298. return 0;
  299. } else
  300. return -ENODEV;
  301. #else
  302. return -ENODEV;
  303. #endif
  304. }
  305. EXPORT_SYMBOL(recalibrate_cpu_khz);
  306. static void mark_offset_tsc(void)
  307. {
  308. unsigned long lost,delay;
  309. unsigned long delta = last_tsc_low;
  310. int count;
  311. int countmp;
  312. static int count1 = 0;
  313. unsigned long long this_offset, last_offset;
  314. static int lost_count = 0;
  315. write_seqlock(&monotonic_lock);
  316. last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
  317. /*
  318. * It is important that these two operations happen almost at
  319. * the same time. We do the RDTSC stuff first, since it's
  320. * faster. To avoid any inconsistencies, we need interrupts
  321. * disabled locally.
  322. */
  323. /*
  324. * Interrupts are just disabled locally since the timer irq
  325. * has the SA_INTERRUPT flag set. -arca
  326. */
  327. /* read Pentium cycle counter */
  328. rdtsc(last_tsc_low, last_tsc_high);
  329. spin_lock(&i8253_lock);
  330. outb_p(0x00, PIT_MODE); /* latch the count ASAP */
  331. count = inb_p(PIT_CH0); /* read the latched count */
  332. count |= inb(PIT_CH0) << 8;
  333. /*
  334. * VIA686a test code... reset the latch if count > max + 1
  335. * from timer_pit.c - cjb
  336. */
  337. if (count > LATCH) {
  338. outb_p(0x34, PIT_MODE);
  339. outb_p(LATCH & 0xff, PIT_CH0);
  340. outb(LATCH >> 8, PIT_CH0);
  341. count = LATCH - 1;
  342. }
  343. spin_unlock(&i8253_lock);
  344. if (pit_latch_buggy) {
  345. /* get center value of last 3 time lutch */
  346. if ((count2 >= count && count >= count1)
  347. || (count1 >= count && count >= count2)) {
  348. count2 = count1; count1 = count;
  349. } else if ((count1 >= count2 && count2 >= count)
  350. || (count >= count2 && count2 >= count1)) {
  351. countmp = count;count = count2;
  352. count2 = count1;count1 = countmp;
  353. } else {
  354. count2 = count1; count1 = count; count = count1;
  355. }
  356. }
  357. /* lost tick compensation */
  358. delta = last_tsc_low - delta;
  359. {
  360. register unsigned long eax, edx;
  361. eax = delta;
  362. __asm__("mull %2"
  363. :"=a" (eax), "=d" (edx)
  364. :"rm" (fast_gettimeoffset_quotient),
  365. "0" (eax));
  366. delta = edx;
  367. }
  368. delta += delay_at_last_interrupt;
  369. lost = delta/(1000000/HZ);
  370. delay = delta%(1000000/HZ);
  371. if (lost >= 2 && detect_lost_ticks) {
  372. jiffies_64 += lost-1;
  373. /* sanity check to ensure we're not always losing ticks */
  374. if (lost_count++ > 100) {
  375. printk(KERN_WARNING "Losing too many ticks!\n");
  376. printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
  377. printk(KERN_WARNING "Possible reasons for this are:\n");
  378. printk(KERN_WARNING " You're running with Speedstep,\n");
  379. printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
  380. printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
  381. printk(KERN_WARNING "Falling back to a sane timesource now.\n");
  382. clock_fallback();
  383. }
  384. /* ... but give the TSC a fair chance */
  385. if (lost_count > 25)
  386. cpufreq_delayed_get();
  387. } else
  388. lost_count = 0;
  389. /* update the monotonic base value */
  390. this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
  391. monotonic_base += cycles_2_ns(this_offset - last_offset);
  392. write_sequnlock(&monotonic_lock);
  393. /* calculate delay_at_last_interrupt */
  394. count = ((LATCH-1) - count) * TICK_SIZE;
  395. delay_at_last_interrupt = (count + LATCH/2) / LATCH;
  396. /* catch corner case where tick rollover occured
  397. * between tsc and pit reads (as noted when
  398. * usec delta is > 90% # of usecs/tick)
  399. */
  400. if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
  401. jiffies_64++;
  402. }
  403. static int __init init_tsc(char* override)
  404. {
  405. /* check clock override */
  406. if (override[0] && strncmp(override,"tsc",3)) {
  407. #ifdef CONFIG_HPET_TIMER
  408. if (is_hpet_enabled()) {
  409. printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
  410. } else
  411. #endif
  412. {
  413. return -ENODEV;
  414. }
  415. }
  416. /*
  417. * If we have APM enabled or the CPU clock speed is variable
  418. * (CPU stops clock on HLT or slows clock to save power)
  419. * then the TSC timestamps may diverge by up to 1 jiffy from
  420. * 'real time' but nothing will break.
  421. * The most frequent case is that the CPU is "woken" from a halt
  422. * state by the timer interrupt itself, so we get 0 error. In the
  423. * rare cases where a driver would "wake" the CPU and request a
  424. * timestamp, the maximum error is < 1 jiffy. But timestamps are
  425. * still perfectly ordered.
  426. * Note that the TSC counter will be reset if APM suspends
  427. * to disk; this won't break the kernel, though, 'cuz we're
  428. * smart. See arch/i386/kernel/apm.c.
  429. */
  430. /*
  431. * Firstly we have to do a CPU check for chips with
  432. * a potentially buggy TSC. At this point we haven't run
  433. * the ident/bugs checks so we must run this hook as it
  434. * may turn off the TSC flag.
  435. *
  436. * NOTE: this doesn't yet handle SMP 486 machines where only
  437. * some CPU's have a TSC. Thats never worked and nobody has
  438. * moaned if you have the only one in the world - you fix it!
  439. */
  440. count2 = LATCH; /* initialize counter for mark_offset_tsc() */
  441. if (cpu_has_tsc) {
  442. unsigned long tsc_quotient;
  443. #ifdef CONFIG_HPET_TIMER
  444. if (is_hpet_enabled() && hpet_use_timer) {
  445. unsigned long result, remain;
  446. printk("Using TSC for gettimeofday\n");
  447. tsc_quotient = calibrate_tsc_hpet(NULL);
  448. timer_tsc.mark_offset = &mark_offset_tsc_hpet;
  449. /*
  450. * Math to calculate hpet to usec multiplier
  451. * Look for the comments at get_offset_tsc_hpet()
  452. */
  453. ASM_DIV64_REG(result, remain, hpet_tick,
  454. 0, KERNEL_TICK_USEC);
  455. if (remain > (hpet_tick >> 1))
  456. result++; /* rounding the result */
  457. hpet_usec_quotient = result;
  458. } else
  459. #endif
  460. {
  461. tsc_quotient = calibrate_tsc();
  462. }
  463. if (tsc_quotient) {
  464. fast_gettimeoffset_quotient = tsc_quotient;
  465. use_tsc = 1;
  466. /*
  467. * We could be more selective here I suspect
  468. * and just enable this for the next intel chips ?
  469. */
  470. /* report CPU clock rate in Hz.
  471. * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
  472. * clock/second. Our precision is about 100 ppm.
  473. */
  474. { unsigned long eax=0, edx=1000;
  475. __asm__("divl %2"
  476. :"=a" (cpu_khz), "=d" (edx)
  477. :"r" (tsc_quotient),
  478. "0" (eax), "1" (edx));
  479. printk("Detected %u.%03u MHz processor.\n",
  480. cpu_khz / 1000, cpu_khz % 1000);
  481. }
  482. set_cyc2ns_scale(cpu_khz);
  483. return 0;
  484. }
  485. }
  486. return -ENODEV;
  487. }
  488. static int tsc_resume(void)
  489. {
  490. write_seqlock(&monotonic_lock);
  491. /* Assume this is the last mark offset time */
  492. rdtsc(last_tsc_low, last_tsc_high);
  493. #ifdef CONFIG_HPET_TIMER
  494. if (is_hpet_enabled() && hpet_use_timer)
  495. hpet_last = hpet_readl(HPET_COUNTER);
  496. #endif
  497. write_sequnlock(&monotonic_lock);
  498. return 0;
  499. }
  500. #ifndef CONFIG_X86_TSC
  501. /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
  502. * in cpu/common.c */
  503. static int __init tsc_setup(char *str)
  504. {
  505. tsc_disable = 1;
  506. return 1;
  507. }
  508. #else
  509. static int __init tsc_setup(char *str)
  510. {
  511. printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
  512. "cannot disable TSC.\n");
  513. return 1;
  514. }
  515. #endif
  516. __setup("notsc", tsc_setup);
  517. /************************************************************/
  518. /* tsc timer_opts struct */
  519. static struct timer_opts timer_tsc = {
  520. .name = "tsc",
  521. .mark_offset = mark_offset_tsc,
  522. .get_offset = get_offset_tsc,
  523. .monotonic_clock = monotonic_clock_tsc,
  524. .delay = delay_tsc,
  525. .read_timer = read_timer_tsc,
  526. .resume = tsc_resume,
  527. };
  528. struct init_timer_opts __initdata timer_tsc_init = {
  529. .init = init_tsc,
  530. .opts = &timer_tsc,
  531. };