ftape-calibr.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. * Copyright (C) 1993-1996 Bas Laarhoven.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; see the file COPYING. If not, write to
  13. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  14. *
  15. * $Source: /homes/cvs/ftape-stacked/ftape/lowlevel/ftape-calibr.c,v $
  16. * $Revision: 1.2 $
  17. * $Date: 1997/10/05 19:18:08 $
  18. *
  19. * GP calibration routine for processor speed dependent
  20. * functions.
  21. */
  22. #include <linux/errno.h>
  23. #include <linux/jiffies.h>
  24. #include <asm/system.h>
  25. #include <asm/io.h>
  26. #if defined(__alpha__)
  27. # include <asm/hwrpb.h>
  28. #elif defined(__x86_64__)
  29. # include <asm/msr.h>
  30. # include <asm/timex.h>
  31. #elif defined(__i386__)
  32. # include <linux/timex.h>
  33. #endif
  34. #include <linux/ftape.h>
  35. #include "../lowlevel/ftape-tracing.h"
  36. #include "../lowlevel/ftape-calibr.h"
  37. #include "../lowlevel/fdc-io.h"
  38. #undef DEBUG
  39. #if !defined(__alpha__) && !defined(__i386__) && !defined(__x86_64__)
  40. # error Ftape is not implemented for this architecture!
  41. #endif
  42. #if defined(__alpha__) || defined(__x86_64__)
  43. static unsigned long ps_per_cycle = 0;
  44. #endif
  45. static spinlock_t calibr_lock;
  46. /*
  47. * Note: On Intel PCs, the clock ticks at 100 Hz (HZ==100) which is
  48. * too slow for certain timeouts (and that clock doesn't even tick
  49. * when interrupts are disabled). For that reason, the 8254 timer is
  50. * used directly to implement fine-grained timeouts. However, on
  51. * Alpha PCs, the 8254 is *not* used to implement the clock tick
  52. * (which is 1024 Hz, normally) and the 8254 timer runs at some
  53. * "random" frequency (it seems to run at 18Hz, but it's not safe to
  54. * rely on this value). Instead, we use the Alpha's "rpcc"
  55. * instruction to read cycle counts. As this is a 32 bit counter,
  56. * it will overflow only once per 30 seconds (on a 200MHz machine),
  57. * which is plenty.
  58. */
  59. unsigned int ftape_timestamp(void)
  60. {
  61. #if defined(__alpha__)
  62. unsigned long r;
  63. asm volatile ("rpcc %0" : "=r" (r));
  64. return r;
  65. #elif defined(__x86_64__)
  66. unsigned long r;
  67. rdtscl(r);
  68. return r;
  69. #elif defined(__i386__)
  70. /*
  71. * Note that there is some time between counter underflowing and jiffies
  72. * increasing, so the code below won't always give correct output.
  73. * -Vojtech
  74. */
  75. unsigned long flags;
  76. __u16 lo;
  77. __u16 hi;
  78. spin_lock_irqsave(&calibr_lock, flags);
  79. outb_p(0x00, 0x43); /* latch the count ASAP */
  80. lo = inb_p(0x40); /* read the latched count */
  81. lo |= inb(0x40) << 8;
  82. hi = jiffies;
  83. spin_unlock_irqrestore(&calibr_lock, flags);
  84. return ((hi + 1) * (unsigned int) LATCH) - lo; /* downcounter ! */
  85. #endif
  86. }
  87. static unsigned int short_ftape_timestamp(void)
  88. {
  89. #if defined(__alpha__) || defined(__x86_64__)
  90. return ftape_timestamp();
  91. #elif defined(__i386__)
  92. unsigned int count;
  93. unsigned long flags;
  94. spin_lock_irqsave(&calibr_lock, flags);
  95. outb_p(0x00, 0x43); /* latch the count ASAP */
  96. count = inb_p(0x40); /* read the latched count */
  97. count |= inb(0x40) << 8;
  98. spin_unlock_irqrestore(&calibr_lock, flags);
  99. return (LATCH - count); /* normal: downcounter */
  100. #endif
  101. }
  102. static unsigned int diff(unsigned int t0, unsigned int t1)
  103. {
  104. #if defined(__alpha__) || defined(__x86_64__)
  105. return (t1 - t0);
  106. #elif defined(__i386__)
  107. /*
  108. * This is tricky: to work for both short and full ftape_timestamps
  109. * we'll have to discriminate between these.
  110. * If it _looks_ like short stamps with wrapping around we'll
  111. * asume it are. This will generate a small error if it really
  112. * was a (very large) delta from full ftape_timestamps.
  113. */
  114. return (t1 <= t0 && t0 <= LATCH) ? t1 + LATCH - t0 : t1 - t0;
  115. #endif
  116. }
  117. static unsigned int usecs(unsigned int count)
  118. {
  119. #if defined(__alpha__) || defined(__x86_64__)
  120. return (ps_per_cycle * count) / 1000000UL;
  121. #elif defined(__i386__)
  122. return (10000 * count) / ((CLOCK_TICK_RATE + 50) / 100);
  123. #endif
  124. }
  125. unsigned int ftape_timediff(unsigned int t0, unsigned int t1)
  126. {
  127. /*
  128. * Calculate difference in usec for ftape_timestamp results t0 & t1.
  129. * Note that on the i386 platform with short time-stamps, the
  130. * maximum allowed timespan is 1/HZ or we'll lose ticks!
  131. */
  132. return usecs(diff(t0, t1));
  133. }
  134. /* To get an indication of the I/O performance,
  135. * measure the duration of the inb() function.
  136. */
  137. static void time_inb(void)
  138. {
  139. int i;
  140. int t0, t1;
  141. unsigned long flags;
  142. int status;
  143. TRACE_FUN(ft_t_any);
  144. spin_lock_irqsave(&calibr_lock, flags);
  145. t0 = short_ftape_timestamp();
  146. for (i = 0; i < 1000; ++i) {
  147. status = inb(fdc.msr);
  148. }
  149. t1 = short_ftape_timestamp();
  150. spin_unlock_irqrestore(&calibr_lock, flags);
  151. TRACE(ft_t_info, "inb() duration: %d nsec", ftape_timediff(t0, t1));
  152. TRACE_EXIT;
  153. }
  154. static void init_clock(void)
  155. {
  156. TRACE_FUN(ft_t_any);
  157. #if defined(__x86_64__)
  158. ps_per_cycle = 1000000000UL / cpu_khz;
  159. #elif defined(__alpha__)
  160. extern struct hwrpb_struct *hwrpb;
  161. ps_per_cycle = (1000*1000*1000*1000UL) / hwrpb->cycle_freq;
  162. #endif
  163. TRACE_EXIT;
  164. }
  165. /*
  166. * Input: function taking int count as parameter.
  167. * pointers to calculated calibration variables.
  168. */
  169. void ftape_calibrate(char *name,
  170. void (*fun) (unsigned int),
  171. unsigned int *calibr_count,
  172. unsigned int *calibr_time)
  173. {
  174. static int first_time = 1;
  175. int i;
  176. unsigned int tc = 0;
  177. unsigned int count;
  178. unsigned int time;
  179. #if defined(__i386__)
  180. unsigned int old_tc = 0;
  181. unsigned int old_count = 1;
  182. unsigned int old_time = 1;
  183. #endif
  184. TRACE_FUN(ft_t_flow);
  185. if (first_time) { /* get idea of I/O performance */
  186. init_clock();
  187. time_inb();
  188. first_time = 0;
  189. }
  190. /* value of timeout must be set so that on very slow systems
  191. * it will give a time less than one jiffy, and on
  192. * very fast systems it'll give reasonable precision.
  193. */
  194. count = 40;
  195. for (i = 0; i < 15; ++i) {
  196. unsigned int t0;
  197. unsigned int t1;
  198. unsigned int once;
  199. unsigned int multiple;
  200. unsigned long flags;
  201. *calibr_count =
  202. *calibr_time = count; /* set TC to 1 */
  203. spin_lock_irqsave(&calibr_lock, flags);
  204. fun(0); /* dummy, get code into cache */
  205. t0 = short_ftape_timestamp();
  206. fun(0); /* overhead + one test */
  207. t1 = short_ftape_timestamp();
  208. once = diff(t0, t1);
  209. t0 = short_ftape_timestamp();
  210. fun(count); /* overhead + count tests */
  211. t1 = short_ftape_timestamp();
  212. multiple = diff(t0, t1);
  213. spin_unlock_irqrestore(&calibr_lock, flags);
  214. time = ftape_timediff(0, multiple - once);
  215. tc = (1000 * time) / (count - 1);
  216. TRACE(ft_t_any, "once:%3d us,%6d times:%6d us, TC:%5d ns",
  217. usecs(once), count - 1, usecs(multiple), tc);
  218. #if defined(__alpha__) || defined(__x86_64__)
  219. /*
  220. * Increase the calibration count exponentially until the
  221. * calibration time exceeds 100 ms.
  222. */
  223. if (time >= 100*1000) {
  224. break;
  225. }
  226. #elif defined(__i386__)
  227. /*
  228. * increase the count until the resulting time nears 2/HZ,
  229. * then the tc will drop sharply because we lose LATCH counts.
  230. */
  231. if (tc <= old_tc / 2) {
  232. time = old_time;
  233. count = old_count;
  234. break;
  235. }
  236. old_tc = tc;
  237. old_count = count;
  238. old_time = time;
  239. #endif
  240. count *= 2;
  241. }
  242. *calibr_count = count - 1;
  243. *calibr_time = time;
  244. TRACE(ft_t_info, "TC for `%s()' = %d nsec (at %d counts)",
  245. name, (1000 * *calibr_time) / *calibr_count, *calibr_count);
  246. TRACE_EXIT;
  247. }