udivdi3.S 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. /*
  2. * udivdi3.S - unsigned long long division
  3. *
  4. * Copyright 2003-2007 Analog Devices Inc.
  5. * Enter bugs at http://blackfin.uclinux.org/
  6. *
  7. * Licensed under the GPLv2 or later.
  8. */
  9. #include <linux/linkage.h>
  10. #define CARRY AC0
  11. #ifdef CONFIG_ARITHMETIC_OPS_L1
  12. .section .l1.text
  13. #else
  14. .text
  15. #endif
  16. ENTRY(___udivdi3)
  17. R3 = [SP + 12];
  18. [--SP] = (R7:4, P5:3);
  19. /* Attempt to use divide primitive first; these will handle
  20. ** most cases, and they're quick - avoids stalls incurred by
  21. ** testing for identities.
  22. */
  23. R4 = R2 | R3;
  24. CC = R4 == 0;
  25. IF CC JUMP .LDIV_BY_ZERO;
  26. R4.H = 0x8000;
  27. R4 >>>= 16; // R4 now 0xFFFF8000
  28. R5 = R0 | R2; // If either dividend or
  29. R4 = R5 & R4; // divisor have bits in
  30. CC = R4; // top half or low half's sign
  31. IF CC JUMP .LIDENTS; // bit, skip builtins.
  32. R4 = R1 | R3; // Also check top halves
  33. CC = R4;
  34. IF CC JUMP .LIDENTS;
  35. /* Can use the builtins. */
  36. AQ = CC; // Clear AQ (CC==0)
  37. DIVQ(R0, R2);
  38. DIVQ(R0, R2);
  39. DIVQ(R0, R2);
  40. DIVQ(R0, R2);
  41. DIVQ(R0, R2);
  42. DIVQ(R0, R2);
  43. DIVQ(R0, R2);
  44. DIVQ(R0, R2);
  45. DIVQ(R0, R2);
  46. DIVQ(R0, R2);
  47. DIVQ(R0, R2);
  48. DIVQ(R0, R2);
  49. DIVQ(R0, R2);
  50. DIVQ(R0, R2);
  51. DIVQ(R0, R2);
  52. DIVQ(R0, R2);
  53. DIVQ(R0, R2);
  54. R0 = R0.L (Z);
  55. R1 = 0;
  56. (R7:4, P5:3) = [SP++];
  57. RTS;
  58. .LIDENTS:
  59. /* Test for common identities. Value to be returned is
  60. ** placed in R6,R7.
  61. */
  62. // Check for 0/y, return 0
  63. R4 = R0 | R1;
  64. CC = R4 == 0;
  65. IF CC JUMP .LRETURN_R0;
  66. // Check for x/x, return 1
  67. R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero
  68. R7 = R1 - R3;
  69. R4 = R6 | R7; // making R4 zero.
  70. R6 += 1; // which would now make R6:R7==1.
  71. CC = R4 == 0;
  72. IF CC JUMP .LRETURN_IDENT;
  73. // Check for x/1, return x
  74. R6 = R0;
  75. R7 = R1;
  76. CC = R3 == 0;
  77. IF !CC JUMP .Lnexttest;
  78. CC = R2 == 1;
  79. IF CC JUMP .LRETURN_IDENT;
  80. .Lnexttest:
  81. R4.L = ONES R2; // check for div by power of two which
  82. R5.L = ONES R3; // can be done using a shift
  83. R6 = PACK (R5.L, R4.L);
  84. CC = R6 == 1;
  85. IF CC JUMP .Lpower_of_two_upper_zero;
  86. R6 = PACK (R4.L, R5.L);
  87. CC = R6 == 1;
  88. IF CC JUMP .Lpower_of_two_lower_zero;
  89. // Check for x < y, return 0
  90. R6 = 0;
  91. R7 = R6;
  92. CC = R1 < R3 (IU);
  93. IF CC JUMP .LRETURN_IDENT;
  94. CC = R1 == R3;
  95. IF !CC JUMP .Lno_idents;
  96. CC = R0 < R2 (IU);
  97. IF CC JUMP .LRETURN_IDENT;
  98. .Lno_idents: // Idents don't match. Go for the full operation
  99. // If X, or X and Y have high bit set, it'll affect the
  100. // results, so shift right one to stop this. Note: we've already
  101. // checked that X >= Y, so Y's msb won't be set unless X's
  102. // is.
  103. R4 = 0;
  104. CC = R1 < 0;
  105. IF !CC JUMP .Lx_msb_clear;
  106. CC = !CC; // 1 -> 0;
  107. R1 = ROT R1 BY -1; // Shift X >> 1
  108. R0 = ROT R0 BY -1; // lsb -> CC
  109. BITSET(R4,31); // to record only x msb was set
  110. CC = R3 < 0;
  111. IF !CC JUMP .Ly_msb_clear;
  112. CC = !CC;
  113. R3 = ROT R3 BY -1; // Shift Y >> 1
  114. R2 = ROT R2 BY -1;
  115. BITCLR(R4,31); // clear bit to record only x msb was set
  116. .Ly_msb_clear:
  117. .Lx_msb_clear:
  118. // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
  119. // were lost, so we should shift result left by one.
  120. [--SP] = R4; // save for later
  121. // In the loop that follows, each iteration we add
  122. // either Y' or -Y' to the Remainder. We compute the
  123. // negated Y', and store, for convenience. Y' goes
  124. // into P0:P1, while -Y' goes into P2:P3.
  125. P0 = R2;
  126. P1 = R3;
  127. R2 = -R2;
  128. CC = CARRY;
  129. CC = !CC;
  130. R4 = CC;
  131. R3 = -R3;
  132. R3 = R3 - R4;
  133. R6 = 0; // remainder = 0
  134. R7 = R6;
  135. [--SP] = R2; P2 = SP;
  136. [--SP] = R3; P3 = SP;
  137. [--SP] = R6; P5 = SP; // AQ = 0
  138. [--SP] = P1;
  139. /* In the loop that follows, we use the following
  140. ** register assignments:
  141. ** R0,R1 X, workspace
  142. ** R2,R3 Y, workspace
  143. ** R4,R5 partial Div
  144. ** R6,R7 partial remainder
  145. ** P5 AQ
  146. ** The remainder and div form a 128-bit number, with
  147. ** the remainder in the high 64-bits.
  148. */
  149. R4 = R0; // Div = X'
  150. R5 = R1;
  151. R3 = 0;
  152. P4 = 64; // Iterate once per bit
  153. LSETUP(.LULST,.LULEND) LC0 = P4;
  154. .LULST:
  155. /* Shift Div and remainder up by one. The bit shifted
  156. ** out of the top of the quotient is shifted into the bottom
  157. ** of the remainder.
  158. */
  159. CC = R3;
  160. R4 = ROT R4 BY 1;
  161. R5 = ROT R5 BY 1 || // low q to high q
  162. R2 = [P5]; // load saved AQ
  163. R6 = ROT R6 BY 1 || // high q to low r
  164. R0 = [P2]; // load -Y'
  165. R7 = ROT R7 BY 1 || // low r to high r
  166. R1 = [P3];
  167. // Assume add -Y'
  168. CC = R2 < 0; // But if AQ is set...
  169. IF CC R0 = P0; // then add Y' instead
  170. IF CC R1 = P1;
  171. R6 = R6 + R0; // Rem += (Y' or -Y')
  172. CC = CARRY;
  173. R0 = CC;
  174. R7 = R7 + R1;
  175. R7 = R7 + R0 (NS) ||
  176. R1 = [SP];
  177. // Set the next AQ bit
  178. R1 = R7 ^ R1; // from Remainder and Y'
  179. R1 = R1 >> 31 || // Negate AQ's value, and
  180. [P5] = R1; // save next AQ
  181. BITTGL(R1, 0); // add neg AQ to the Div
  182. .LULEND: R4 = R4 + R1;
  183. R6 = [SP + 16];
  184. R0 = R4;
  185. R1 = R5;
  186. CC = BITTST(R6,30); // Just set CC=0
  187. R4 = ROT R0 BY 1; // but if we had to shift X,
  188. R5 = ROT R1 BY 1; // and didn't shift any bits out,
  189. CC = BITTST(R6,31); // then the result will be half as
  190. IF CC R0 = R4; // much as required, so shift left
  191. IF CC R1 = R5; // one space.
  192. SP += 20;
  193. (R7:4, P5:3) = [SP++];
  194. RTS;
  195. .Lpower_of_two:
  196. /* Y has a single bit set, which means it's a power of two.
  197. ** That means we can perform the division just by shifting
  198. ** X to the right the appropriate number of bits
  199. */
  200. /* signbits returns the number of sign bits, minus one.
  201. ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
  202. ** to shift right n-signbits spaces. It also means 0x80000000
  203. ** is a special case, because that *also* gives a signbits of 0
  204. */
  205. .Lpower_of_two_lower_zero:
  206. R7 = 0;
  207. R6 = R1 >> 31;
  208. CC = R3 < 0;
  209. IF CC JUMP .LRETURN_IDENT;
  210. R2.L = SIGNBITS R3;
  211. R2 = R2.L (Z);
  212. R2 += -62;
  213. (R7:4, P5:3) = [SP++];
  214. JUMP ___lshftli;
  215. .Lpower_of_two_upper_zero:
  216. CC = R2 < 0;
  217. IF CC JUMP .Lmaxint_shift;
  218. R2.L = SIGNBITS R2;
  219. R2 = R2.L (Z);
  220. R2 += -30;
  221. (R7:4, P5:3) = [SP++];
  222. JUMP ___lshftli;
  223. .Lmaxint_shift:
  224. R2 = -31;
  225. (R7:4, P5:3) = [SP++];
  226. JUMP ___lshftli;
  227. .LRETURN_IDENT:
  228. R0 = R6;
  229. R1 = R7;
  230. .LRETURN_R0:
  231. (R7:4, P5:3) = [SP++];
  232. RTS;
  233. .LDIV_BY_ZERO:
  234. R0 = ~R2;
  235. R1 = R0;
  236. (R7:4, P5:3) = [SP++];
  237. RTS;
  238. ENDPROC(___udivdi3)
  239. ENTRY(___lshftli)
  240. CC = R2 == 0;
  241. IF CC JUMP .Lfinished; // nothing to do
  242. CC = R2 < 0;
  243. IF CC JUMP .Lrshift;
  244. R3 = 64;
  245. CC = R2 < R3;
  246. IF !CC JUMP .Lretzero;
  247. // We're shifting left, and it's less than 64 bits, so
  248. // a valid result will be returned.
  249. R3 >>= 1; // R3 now 32
  250. CC = R2 < R3;
  251. IF !CC JUMP .Lzerohalf;
  252. // We're shifting left, between 1 and 31 bits, which means
  253. // some of the low half will be shifted into the high half.
  254. // Work out how much.
  255. R3 = R3 - R2;
  256. // Save that much data from the bottom half.
  257. P1 = R7;
  258. R7 = R0;
  259. R7 >>= R3;
  260. // Adjust both parts of the parameter.
  261. R0 <<= R2;
  262. R1 <<= R2;
  263. // And include the bits moved across.
  264. R1 = R1 | R7;
  265. R7 = P1;
  266. RTS;
  267. .Lzerohalf:
  268. // We're shifting left, between 32 and 63 bits, so the
  269. // bottom half will become zero, and the top half will
  270. // lose some bits. How many?
  271. R2 = R2 - R3; // N - 32
  272. R1 = LSHIFT R0 BY R2.L;
  273. R0 = R0 - R0;
  274. RTS;
  275. .Lretzero:
  276. R0 = R0 - R0;
  277. R1 = R0;
  278. .Lfinished:
  279. RTS;
  280. .Lrshift:
  281. // We're shifting right, but by how much?
  282. R2 = -R2;
  283. R3 = 64;
  284. CC = R2 < R3;
  285. IF !CC JUMP .Lretzero;
  286. // Shifting right less than 64 bits, so some result bits will
  287. // be retained.
  288. R3 >>= 1; // R3 now 32
  289. CC = R2 < R3;
  290. IF !CC JUMP .Lsignhalf;
  291. // Shifting right between 1 and 31 bits, so need to copy
  292. // data across words.
  293. P1 = R7;
  294. R3 = R3 - R2;
  295. R7 = R1;
  296. R7 <<= R3;
  297. R1 >>= R2;
  298. R0 >>= R2;
  299. R0 = R7 | R0;
  300. R7 = P1;
  301. RTS;
  302. .Lsignhalf:
  303. // Shifting right between 32 and 63 bits, so the top half
  304. // will become all zero-bits, and the bottom half is some
  305. // of the top half. But how much?
  306. R2 = R2 - R3;
  307. R0 = R1;
  308. R0 >>= R2;
  309. R1 = 0;
  310. RTS;
  311. ENDPROC(___lshftli)