raid6x86.h 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. /* ----------------------------------------------------------------------- *
  2. *
  3. * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  8. * Bostom MA 02111-1307, USA; either version 2 of the License, or
  9. * (at your option) any later version; incorporated herein by reference.
  10. *
  11. * ----------------------------------------------------------------------- */
  12. /*
  13. * raid6x86.h
  14. *
  15. * Definitions common to x86 and x86-64 RAID-6 code only
  16. */
  17. #ifndef LINUX_RAID_RAID6X86_H
  18. #define LINUX_RAID_RAID6X86_H
  19. #if defined(__i386__) || defined(__x86_64__)
  20. #ifdef __x86_64__
  21. typedef struct {
  22. unsigned int fsave[27];
  23. unsigned long cr0;
  24. } raid6_mmx_save_t __attribute__((aligned(16)));
  25. /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since
  26. the code doesn't know about the additional x86-64 registers */
  27. typedef struct {
  28. unsigned int sarea[8*4+2];
  29. unsigned long cr0;
  30. } raid6_sse_save_t __attribute__((aligned(16)));
  31. /* This is for x86-64-specific code which uses all 16 XMM registers */
  32. typedef struct {
  33. unsigned int sarea[16*4+2];
  34. unsigned long cr0;
  35. } raid6_sse16_save_t __attribute__((aligned(16)));
  36. /* On x86-64 the stack *SHOULD* be 16-byte aligned, but currently this
  37. is buggy in the kernel and it's only 8-byte aligned in places, so
  38. we need to do this anyway. Sigh. */
  39. #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15))
  40. #else /* __i386__ */
  41. typedef struct {
  42. unsigned int fsave[27];
  43. unsigned long cr0;
  44. } raid6_mmx_save_t;
  45. /* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte
  46. alignment. The +3 is so we have the slack space to manually align
  47. a properly-sized area correctly. */
  48. typedef struct {
  49. unsigned int sarea[8*4+3];
  50. unsigned long cr0;
  51. } raid6_sse_save_t;
  52. /* Find the 16-byte aligned save area */
  53. #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15))
  54. #endif
  55. #ifdef __KERNEL__ /* Real code */
  56. /* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */
  57. static inline unsigned long raid6_get_fpu(void)
  58. {
  59. unsigned long cr0;
  60. preempt_disable();
  61. asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0));
  62. return cr0;
  63. }
  64. static inline void raid6_put_fpu(unsigned long cr0)
  65. {
  66. asm volatile("mov %0,%%cr0" : : "r" (cr0));
  67. preempt_enable();
  68. }
  69. #else /* Dummy code for user space testing */
  70. static inline unsigned long raid6_get_fpu(void)
  71. {
  72. return 0xf00ba6;
  73. }
  74. static inline void raid6_put_fpu(unsigned long cr0)
  75. {
  76. (void)cr0;
  77. }
  78. #endif
  79. static inline void raid6_before_mmx(raid6_mmx_save_t *s)
  80. {
  81. s->cr0 = raid6_get_fpu();
  82. asm volatile("fsave %0 ; fwait" : "=m" (s->fsave[0]));
  83. }
  84. static inline void raid6_after_mmx(raid6_mmx_save_t *s)
  85. {
  86. asm volatile("frstor %0" : : "m" (s->fsave[0]));
  87. raid6_put_fpu(s->cr0);
  88. }
  89. static inline void raid6_before_sse(raid6_sse_save_t *s)
  90. {
  91. unsigned int *rsa = SAREA(s);
  92. s->cr0 = raid6_get_fpu();
  93. asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0]));
  94. asm volatile("movaps %%xmm1,%0" : "=m" (rsa[4]));
  95. asm volatile("movaps %%xmm2,%0" : "=m" (rsa[8]));
  96. asm volatile("movaps %%xmm3,%0" : "=m" (rsa[12]));
  97. asm volatile("movaps %%xmm4,%0" : "=m" (rsa[16]));
  98. asm volatile("movaps %%xmm5,%0" : "=m" (rsa[20]));
  99. asm volatile("movaps %%xmm6,%0" : "=m" (rsa[24]));
  100. asm volatile("movaps %%xmm7,%0" : "=m" (rsa[28]));
  101. }
  102. static inline void raid6_after_sse(raid6_sse_save_t *s)
  103. {
  104. unsigned int *rsa = SAREA(s);
  105. asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0]));
  106. asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4]));
  107. asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8]));
  108. asm volatile("movaps %0,%%xmm3" : : "m" (rsa[12]));
  109. asm volatile("movaps %0,%%xmm4" : : "m" (rsa[16]));
  110. asm volatile("movaps %0,%%xmm5" : : "m" (rsa[20]));
  111. asm volatile("movaps %0,%%xmm6" : : "m" (rsa[24]));
  112. asm volatile("movaps %0,%%xmm7" : : "m" (rsa[28]));
  113. raid6_put_fpu(s->cr0);
  114. }
  115. static inline void raid6_before_sse2(raid6_sse_save_t *s)
  116. {
  117. unsigned int *rsa = SAREA(s);
  118. s->cr0 = raid6_get_fpu();
  119. asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0]));
  120. asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4]));
  121. asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8]));
  122. asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12]));
  123. asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16]));
  124. asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20]));
  125. asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24]));
  126. asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28]));
  127. }
  128. static inline void raid6_after_sse2(raid6_sse_save_t *s)
  129. {
  130. unsigned int *rsa = SAREA(s);
  131. asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
  132. asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));
  133. asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8]));
  134. asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12]));
  135. asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16]));
  136. asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20]));
  137. asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24]));
  138. asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28]));
  139. raid6_put_fpu(s->cr0);
  140. }
  141. #ifdef __x86_64__
  142. static inline void raid6_before_sse16(raid6_sse16_save_t *s)
  143. {
  144. unsigned int *rsa = SAREA(s);
  145. s->cr0 = raid6_get_fpu();
  146. asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0]));
  147. asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4]));
  148. asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8]));
  149. asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12]));
  150. asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16]));
  151. asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20]));
  152. asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24]));
  153. asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28]));
  154. asm volatile("movdqa %%xmm8,%0" : "=m" (rsa[32]));
  155. asm volatile("movdqa %%xmm9,%0" : "=m" (rsa[36]));
  156. asm volatile("movdqa %%xmm10,%0" : "=m" (rsa[40]));
  157. asm volatile("movdqa %%xmm11,%0" : "=m" (rsa[44]));
  158. asm volatile("movdqa %%xmm12,%0" : "=m" (rsa[48]));
  159. asm volatile("movdqa %%xmm13,%0" : "=m" (rsa[52]));
  160. asm volatile("movdqa %%xmm14,%0" : "=m" (rsa[56]));
  161. asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60]));
  162. }
  163. static inline void raid6_after_sse16(raid6_sse16_save_t *s)
  164. {
  165. unsigned int *rsa = SAREA(s);
  166. asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0]));
  167. asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4]));
  168. asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8]));
  169. asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12]));
  170. asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16]));
  171. asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20]));
  172. asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24]));
  173. asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28]));
  174. asm volatile("movdqa %0,%%xmm8" : : "m" (rsa[32]));
  175. asm volatile("movdqa %0,%%xmm9" : : "m" (rsa[36]));
  176. asm volatile("movdqa %0,%%xmm10" : : "m" (rsa[40]));
  177. asm volatile("movdqa %0,%%xmm11" : : "m" (rsa[44]));
  178. asm volatile("movdqa %0,%%xmm12" : : "m" (rsa[48]));
  179. asm volatile("movdqa %0,%%xmm13" : : "m" (rsa[52]));
  180. asm volatile("movdqa %0,%%xmm14" : : "m" (rsa[56]));
  181. asm volatile("movdqa %0,%%xmm15" : : "m" (rsa[60]));
  182. raid6_put_fpu(s->cr0);
  183. }
  184. #endif /* __x86_64__ */
  185. /* User space test hack */
  186. #ifndef __KERNEL__
  187. static inline int cpuid_features(void)
  188. {
  189. u32 eax = 1;
  190. u32 ebx, ecx, edx;
  191. asm volatile("cpuid" :
  192. "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx));
  193. return edx;
  194. }
  195. #endif /* ndef __KERNEL__ */
  196. #endif
  197. #endif