ftape-calibr.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /*
  2. * Copyright (C) 1993-1996 Bas Laarhoven.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; see the file COPYING. If not, write to
  13. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  14. *
  15. * $Source: /homes/cvs/ftape-stacked/ftape/lowlevel/ftape-calibr.c,v $
  16. * $Revision: 1.2 $
  17. * $Date: 1997/10/05 19:18:08 $
  18. *
  19. * GP calibration routine for processor speed dependent
  20. * functions.
  21. */
  22. #include <linux/config.h>
  23. #include <linux/errno.h>
  24. #include <linux/jiffies.h>
  25. #include <asm/system.h>
  26. #include <asm/io.h>
  27. #if defined(__alpha__)
  28. # include <asm/hwrpb.h>
  29. #elif defined(__x86_64__)
  30. # include <asm/msr.h>
  31. # include <asm/timex.h>
  32. #elif defined(__i386__)
  33. # include <linux/timex.h>
  34. #endif
  35. #include <linux/ftape.h>
  36. #include "../lowlevel/ftape-tracing.h"
  37. #include "../lowlevel/ftape-calibr.h"
  38. #include "../lowlevel/fdc-io.h"
  39. #undef DEBUG
  40. #if !defined(__alpha__) && !defined(__i386__) && !defined(__x86_64__)
  41. # error Ftape is not implemented for this architecture!
  42. #endif
  43. #if defined(__alpha__) || defined(__x86_64__)
  44. static unsigned long ps_per_cycle = 0;
  45. #endif
  46. static spinlock_t calibr_lock;
  47. /*
  48. * Note: On Intel PCs, the clock ticks at 100 Hz (HZ==100) which is
  49. * too slow for certain timeouts (and that clock doesn't even tick
  50. * when interrupts are disabled). For that reason, the 8254 timer is
  51. * used directly to implement fine-grained timeouts. However, on
  52. * Alpha PCs, the 8254 is *not* used to implement the clock tick
  53. * (which is 1024 Hz, normally) and the 8254 timer runs at some
  54. * "random" frequency (it seems to run at 18Hz, but it's not safe to
  55. * rely on this value). Instead, we use the Alpha's "rpcc"
  56. * instruction to read cycle counts. As this is a 32 bit counter,
  57. * it will overflow only once per 30 seconds (on a 200MHz machine),
  58. * which is plenty.
  59. */
  60. unsigned int ftape_timestamp(void)
  61. {
  62. #if defined(__alpha__)
  63. unsigned long r;
  64. asm volatile ("rpcc %0" : "=r" (r));
  65. return r;
  66. #elif defined(__x86_64__)
  67. unsigned long r;
  68. rdtscl(r);
  69. return r;
  70. #elif defined(__i386__)
  71. /*
  72. * Note that there is some time between counter underflowing and jiffies
  73. * increasing, so the code below won't always give correct output.
  74. * -Vojtech
  75. */
  76. unsigned long flags;
  77. __u16 lo;
  78. __u16 hi;
  79. spin_lock_irqsave(&calibr_lock, flags);
  80. outb_p(0x00, 0x43); /* latch the count ASAP */
  81. lo = inb_p(0x40); /* read the latched count */
  82. lo |= inb(0x40) << 8;
  83. hi = jiffies;
  84. spin_unlock_irqrestore(&calibr_lock, flags);
  85. return ((hi + 1) * (unsigned int) LATCH) - lo; /* downcounter ! */
  86. #endif
  87. }
  88. static unsigned int short_ftape_timestamp(void)
  89. {
  90. #if defined(__alpha__) || defined(__x86_64__)
  91. return ftape_timestamp();
  92. #elif defined(__i386__)
  93. unsigned int count;
  94. unsigned long flags;
  95. spin_lock_irqsave(&calibr_lock, flags);
  96. outb_p(0x00, 0x43); /* latch the count ASAP */
  97. count = inb_p(0x40); /* read the latched count */
  98. count |= inb(0x40) << 8;
  99. spin_unlock_irqrestore(&calibr_lock, flags);
  100. return (LATCH - count); /* normal: downcounter */
  101. #endif
  102. }
  103. static unsigned int diff(unsigned int t0, unsigned int t1)
  104. {
  105. #if defined(__alpha__) || defined(__x86_64__)
  106. return (t1 - t0);
  107. #elif defined(__i386__)
  108. /*
  109. * This is tricky: to work for both short and full ftape_timestamps
  110. * we'll have to discriminate between these.
  111. * If it _looks_ like short stamps with wrapping around we'll
  112. * asume it are. This will generate a small error if it really
  113. * was a (very large) delta from full ftape_timestamps.
  114. */
  115. return (t1 <= t0 && t0 <= LATCH) ? t1 + LATCH - t0 : t1 - t0;
  116. #endif
  117. }
  118. static unsigned int usecs(unsigned int count)
  119. {
  120. #if defined(__alpha__) || defined(__x86_64__)
  121. return (ps_per_cycle * count) / 1000000UL;
  122. #elif defined(__i386__)
  123. return (10000 * count) / ((CLOCK_TICK_RATE + 50) / 100);
  124. #endif
  125. }
  126. unsigned int ftape_timediff(unsigned int t0, unsigned int t1)
  127. {
  128. /*
  129. * Calculate difference in usec for ftape_timestamp results t0 & t1.
  130. * Note that on the i386 platform with short time-stamps, the
  131. * maximum allowed timespan is 1/HZ or we'll lose ticks!
  132. */
  133. return usecs(diff(t0, t1));
  134. }
  135. /* To get an indication of the I/O performance,
  136. * measure the duration of the inb() function.
  137. */
  138. static void time_inb(void)
  139. {
  140. int i;
  141. int t0, t1;
  142. unsigned long flags;
  143. int status;
  144. TRACE_FUN(ft_t_any);
  145. spin_lock_irqsave(&calibr_lock, flags);
  146. t0 = short_ftape_timestamp();
  147. for (i = 0; i < 1000; ++i) {
  148. status = inb(fdc.msr);
  149. }
  150. t1 = short_ftape_timestamp();
  151. spin_unlock_irqrestore(&calibr_lock, flags);
  152. TRACE(ft_t_info, "inb() duration: %d nsec", ftape_timediff(t0, t1));
  153. TRACE_EXIT;
  154. }
  155. static void init_clock(void)
  156. {
  157. TRACE_FUN(ft_t_any);
  158. #if defined(__x86_64__)
  159. ps_per_cycle = 1000000000UL / cpu_khz;
  160. #elif defined(__alpha__)
  161. extern struct hwrpb_struct *hwrpb;
  162. ps_per_cycle = (1000*1000*1000*1000UL) / hwrpb->cycle_freq;
  163. #endif
  164. TRACE_EXIT;
  165. }
  166. /*
  167. * Input: function taking int count as parameter.
  168. * pointers to calculated calibration variables.
  169. */
  170. void ftape_calibrate(char *name,
  171. void (*fun) (unsigned int),
  172. unsigned int *calibr_count,
  173. unsigned int *calibr_time)
  174. {
  175. static int first_time = 1;
  176. int i;
  177. unsigned int tc = 0;
  178. unsigned int count;
  179. unsigned int time;
  180. #if defined(__i386__)
  181. unsigned int old_tc = 0;
  182. unsigned int old_count = 1;
  183. unsigned int old_time = 1;
  184. #endif
  185. TRACE_FUN(ft_t_flow);
  186. if (first_time) { /* get idea of I/O performance */
  187. init_clock();
  188. time_inb();
  189. first_time = 0;
  190. }
  191. /* value of timeout must be set so that on very slow systems
  192. * it will give a time less than one jiffy, and on
  193. * very fast systems it'll give reasonable precision.
  194. */
  195. count = 40;
  196. for (i = 0; i < 15; ++i) {
  197. unsigned int t0;
  198. unsigned int t1;
  199. unsigned int once;
  200. unsigned int multiple;
  201. unsigned long flags;
  202. *calibr_count =
  203. *calibr_time = count; /* set TC to 1 */
  204. spin_lock_irqsave(&calibr_lock, flags);
  205. fun(0); /* dummy, get code into cache */
  206. t0 = short_ftape_timestamp();
  207. fun(0); /* overhead + one test */
  208. t1 = short_ftape_timestamp();
  209. once = diff(t0, t1);
  210. t0 = short_ftape_timestamp();
  211. fun(count); /* overhead + count tests */
  212. t1 = short_ftape_timestamp();
  213. multiple = diff(t0, t1);
  214. spin_unlock_irqrestore(&calibr_lock, flags);
  215. time = ftape_timediff(0, multiple - once);
  216. tc = (1000 * time) / (count - 1);
  217. TRACE(ft_t_any, "once:%3d us,%6d times:%6d us, TC:%5d ns",
  218. usecs(once), count - 1, usecs(multiple), tc);
  219. #if defined(__alpha__) || defined(__x86_64__)
  220. /*
  221. * Increase the calibration count exponentially until the
  222. * calibration time exceeds 100 ms.
  223. */
  224. if (time >= 100*1000) {
  225. break;
  226. }
  227. #elif defined(__i386__)
  228. /*
  229. * increase the count until the resulting time nears 2/HZ,
  230. * then the tc will drop sharply because we lose LATCH counts.
  231. */
  232. if (tc <= old_tc / 2) {
  233. time = old_time;
  234. count = old_count;
  235. break;
  236. }
  237. old_tc = tc;
  238. old_count = count;
  239. old_time = time;
  240. #endif
  241. count *= 2;
  242. }
  243. *calibr_count = count - 1;
  244. *calibr_time = time;
  245. TRACE(ft_t_info, "TC for `%s()' = %d nsec (at %d counts)",
  246. name, (1000 * *calibr_time) / *calibr_count, *calibr_count);
  247. TRACE_EXIT;
  248. }