atomic.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. /*
  2. * Copyright 2007-2008 Analog Devices Inc.
  3. * Philippe Gerum <rpm@xenomai.org>
  4. *
  5. * Licensed under the GPL-2 or later.
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/blackfin.h>
  9. #include <asm/cache.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/rwlock.h>
  12. #include <asm/cplb.h>
  13. .text
  14. .macro coreslot_loadaddr reg:req
  15. \reg\().l = _corelock;
  16. \reg\().h = _corelock;
  17. .endm
  18. /*
  19. * r0 = address of atomic data to flush and invalidate (32bit).
  20. *
  21. * Clear interrupts and return the old mask.
  22. * We assume that no atomic data can span cachelines.
  23. *
  24. * Clobbers: r2:0, p0
  25. */
  26. ENTRY(_get_core_lock)
  27. r1 = -L1_CACHE_BYTES;
  28. r1 = r0 & r1;
  29. cli r0;
  30. coreslot_loadaddr p0;
  31. .Lretry_corelock:
  32. testset (p0);
  33. if cc jump .Ldone_corelock;
  34. SSYNC(r2);
  35. jump .Lretry_corelock
  36. .Ldone_corelock:
  37. p0 = r1;
  38. CSYNC(r2);
  39. flushinv[p0];
  40. SSYNC(r2);
  41. rts;
  42. ENDPROC(_get_core_lock)
  43. /*
  44. * r0 = address of atomic data in uncacheable memory region (32bit).
  45. *
  46. * Clear interrupts and return the old mask.
  47. *
  48. * Clobbers: r0, p0
  49. */
  50. ENTRY(_get_core_lock_noflush)
  51. cli r0;
  52. coreslot_loadaddr p0;
  53. .Lretry_corelock_noflush:
  54. testset (p0);
  55. if cc jump .Ldone_corelock_noflush;
  56. SSYNC(r2);
  57. jump .Lretry_corelock_noflush
  58. .Ldone_corelock_noflush:
  59. rts;
  60. ENDPROC(_get_core_lock_noflush)
  61. /*
  62. * r0 = interrupt mask to restore.
  63. * r1 = address of atomic data to flush and invalidate (32bit).
  64. *
  65. * Interrupts are masked on entry (see _get_core_lock).
  66. * Clobbers: r2:0, p0
  67. */
  68. ENTRY(_put_core_lock)
  69. /* Write-through cache assumed, so no flush needed here. */
  70. coreslot_loadaddr p0;
  71. r1 = 0;
  72. [p0] = r1;
  73. SSYNC(r2);
  74. sti r0;
  75. rts;
  76. ENDPROC(_put_core_lock)
  77. #ifdef __ARCH_SYNC_CORE_DCACHE
  78. ENTRY(___raw_smp_mark_barrier_asm)
  79. [--sp] = rets;
  80. [--sp] = ( r7:5 );
  81. [--sp] = r0;
  82. [--sp] = p1;
  83. [--sp] = p0;
  84. call _get_core_lock_noflush;
  85. /*
  86. * Calculate current core mask
  87. */
  88. GET_CPUID(p1, r7);
  89. r6 = 1;
  90. r6 <<= r7;
  91. /*
  92. * Set bit of other cores in barrier mask. Don't change current core bit.
  93. */
  94. p1.l = _barrier_mask;
  95. p1.h = _barrier_mask;
  96. r7 = [p1];
  97. r5 = r7 & r6;
  98. r7 = ~r6;
  99. cc = r5 == 0;
  100. if cc jump 1f;
  101. r7 = r7 | r6;
  102. 1:
  103. [p1] = r7;
  104. SSYNC(r2);
  105. call _put_core_lock;
  106. p0 = [sp++];
  107. p1 = [sp++];
  108. r0 = [sp++];
  109. ( r7:5 ) = [sp++];
  110. rets = [sp++];
  111. rts;
  112. ENDPROC(___raw_smp_mark_barrier_asm)
  113. ENTRY(___raw_smp_check_barrier_asm)
  114. [--sp] = rets;
  115. [--sp] = ( r7:5 );
  116. [--sp] = r0;
  117. [--sp] = p1;
  118. [--sp] = p0;
  119. call _get_core_lock_noflush;
  120. /*
  121. * Calculate current core mask
  122. */
  123. GET_CPUID(p1, r7);
  124. r6 = 1;
  125. r6 <<= r7;
  126. /*
  127. * Clear current core bit in barrier mask if it is set.
  128. */
  129. p1.l = _barrier_mask;
  130. p1.h = _barrier_mask;
  131. r7 = [p1];
  132. r5 = r7 & r6;
  133. cc = r5 == 0;
  134. if cc jump 1f;
  135. r6 = ~r6;
  136. r7 = r7 & r6;
  137. [p1] = r7;
  138. SSYNC(r2);
  139. call _put_core_lock;
  140. /*
  141. * Invalidate the entire D-cache of current core.
  142. */
  143. sp += -12;
  144. call _resync_core_dcache
  145. sp += 12;
  146. jump 2f;
  147. 1:
  148. call _put_core_lock;
  149. 2:
  150. p0 = [sp++];
  151. p1 = [sp++];
  152. r0 = [sp++];
  153. ( r7:5 ) = [sp++];
  154. rets = [sp++];
  155. rts;
  156. ENDPROC(___raw_smp_check_barrier_asm)
  157. /*
  158. * r0 = irqflags
  159. * r1 = address of atomic data
  160. *
  161. * Clobbers: r2:0, p1:0
  162. */
  163. _start_lock_coherent:
  164. [--sp] = rets;
  165. [--sp] = ( r7:6 );
  166. r7 = r0;
  167. p1 = r1;
  168. /*
  169. * Determine whether the atomic data was previously
  170. * owned by another CPU (=r6).
  171. */
  172. GET_CPUID(p0, r2);
  173. r1 = 1;
  174. r1 <<= r2;
  175. r2 = ~r1;
  176. r1 = [p1];
  177. r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
  178. r6 = r1 & r2;
  179. r1 = [p1];
  180. r1 <<= 4;
  181. r1 >>= 4;
  182. [p1] = r1;
  183. /*
  184. * Release the core lock now, but keep IRQs disabled while we are
  185. * performing the remaining housekeeping chores for the current CPU.
  186. */
  187. coreslot_loadaddr p0;
  188. r1 = 0;
  189. [p0] = r1;
  190. /*
  191. * If another CPU has owned the same atomic section before us,
  192. * then our D-cached copy of the shared data protected by the
  193. * current spin/write_lock may be obsolete.
  194. */
  195. cc = r6 == 0;
  196. if cc jump .Lcache_synced
  197. /*
  198. * Invalidate the entire D-cache of the current core.
  199. */
  200. sp += -12;
  201. call _resync_core_dcache
  202. sp += 12;
  203. .Lcache_synced:
  204. SSYNC(r2);
  205. sti r7;
  206. ( r7:6 ) = [sp++];
  207. rets = [sp++];
  208. rts
  209. /*
  210. * r0 = irqflags
  211. * r1 = address of atomic data
  212. *
  213. * Clobbers: r2:0, p1:0
  214. */
  215. _end_lock_coherent:
  216. p1 = r1;
  217. GET_CPUID(p0, r2);
  218. r2 += 28;
  219. r1 = 1;
  220. r1 <<= r2;
  221. r2 = [p1];
  222. r2 = r1 | r2;
  223. [p1] = r2;
  224. r1 = p1;
  225. jump _put_core_lock;
  226. #endif /* __ARCH_SYNC_CORE_DCACHE */
  227. /*
  228. * r0 = &spinlock->lock
  229. *
  230. * Clobbers: r3:0, p1:0
  231. */
  232. ENTRY(___raw_spin_is_locked_asm)
  233. p1 = r0;
  234. [--sp] = rets;
  235. call _get_core_lock;
  236. r3 = [p1];
  237. cc = bittst( r3, 0 );
  238. r3 = cc;
  239. r1 = p1;
  240. call _put_core_lock;
  241. rets = [sp++];
  242. r0 = r3;
  243. rts;
  244. ENDPROC(___raw_spin_is_locked_asm)
  245. /*
  246. * r0 = &spinlock->lock
  247. *
  248. * Clobbers: r3:0, p1:0
  249. */
  250. ENTRY(___raw_spin_lock_asm)
  251. p1 = r0;
  252. [--sp] = rets;
  253. .Lretry_spinlock:
  254. call _get_core_lock;
  255. r1 = p1;
  256. r2 = [p1];
  257. cc = bittst( r2, 0 );
  258. if cc jump .Lbusy_spinlock
  259. #ifdef __ARCH_SYNC_CORE_DCACHE
  260. r3 = p1;
  261. bitset ( r2, 0 ); /* Raise the lock bit. */
  262. [p1] = r2;
  263. call _start_lock_coherent
  264. #else
  265. r2 = 1;
  266. [p1] = r2;
  267. call _put_core_lock;
  268. #endif
  269. rets = [sp++];
  270. rts;
  271. .Lbusy_spinlock:
  272. /* We don't touch the atomic area if busy, so that flush
  273. will behave like nop in _put_core_lock. */
  274. call _put_core_lock;
  275. SSYNC(r2);
  276. r0 = p1;
  277. jump .Lretry_spinlock
  278. ENDPROC(___raw_spin_lock_asm)
  279. /*
  280. * r0 = &spinlock->lock
  281. *
  282. * Clobbers: r3:0, p1:0
  283. */
  284. ENTRY(___raw_spin_trylock_asm)
  285. p1 = r0;
  286. [--sp] = rets;
  287. call _get_core_lock;
  288. r1 = p1;
  289. r3 = [p1];
  290. cc = bittst( r3, 0 );
  291. if cc jump .Lfailed_trylock
  292. #ifdef __ARCH_SYNC_CORE_DCACHE
  293. bitset ( r3, 0 ); /* Raise the lock bit. */
  294. [p1] = r3;
  295. call _start_lock_coherent
  296. #else
  297. r2 = 1;
  298. [p1] = r2;
  299. call _put_core_lock;
  300. #endif
  301. r0 = 1;
  302. rets = [sp++];
  303. rts;
  304. .Lfailed_trylock:
  305. call _put_core_lock;
  306. r0 = 0;
  307. rets = [sp++];
  308. rts;
  309. ENDPROC(___raw_spin_trylock_asm)
  310. /*
  311. * r0 = &spinlock->lock
  312. *
  313. * Clobbers: r2:0, p1:0
  314. */
  315. ENTRY(___raw_spin_unlock_asm)
  316. p1 = r0;
  317. [--sp] = rets;
  318. call _get_core_lock;
  319. r2 = [p1];
  320. bitclr ( r2, 0 );
  321. [p1] = r2;
  322. r1 = p1;
  323. #ifdef __ARCH_SYNC_CORE_DCACHE
  324. call _end_lock_coherent
  325. #else
  326. call _put_core_lock;
  327. #endif
  328. rets = [sp++];
  329. rts;
  330. ENDPROC(___raw_spin_unlock_asm)
  331. /*
  332. * r0 = &rwlock->lock
  333. *
  334. * Clobbers: r2:0, p1:0
  335. */
  336. ENTRY(___raw_read_lock_asm)
  337. p1 = r0;
  338. [--sp] = rets;
  339. call _get_core_lock;
  340. .Lrdlock_try:
  341. r1 = [p1];
  342. r1 += -1;
  343. [p1] = r1;
  344. cc = r1 < 0;
  345. if cc jump .Lrdlock_failed
  346. r1 = p1;
  347. #ifdef __ARCH_SYNC_CORE_DCACHE
  348. call _start_lock_coherent
  349. #else
  350. call _put_core_lock;
  351. #endif
  352. rets = [sp++];
  353. rts;
  354. .Lrdlock_failed:
  355. r1 += 1;
  356. [p1] = r1;
  357. .Lrdlock_wait:
  358. r1 = p1;
  359. call _put_core_lock;
  360. SSYNC(r2);
  361. r0 = p1;
  362. call _get_core_lock;
  363. r1 = [p1];
  364. cc = r1 < 2;
  365. if cc jump .Lrdlock_wait;
  366. jump .Lrdlock_try
  367. ENDPROC(___raw_read_lock_asm)
  368. /*
  369. * r0 = &rwlock->lock
  370. *
  371. * Clobbers: r3:0, p1:0
  372. */
  373. ENTRY(___raw_read_trylock_asm)
  374. p1 = r0;
  375. [--sp] = rets;
  376. call _get_core_lock;
  377. r1 = [p1];
  378. cc = r1 <= 0;
  379. if cc jump .Lfailed_tryrdlock;
  380. r1 += -1;
  381. [p1] = r1;
  382. r1 = p1;
  383. #ifdef __ARCH_SYNC_CORE_DCACHE
  384. call _start_lock_coherent
  385. #else
  386. call _put_core_lock;
  387. #endif
  388. rets = [sp++];
  389. r0 = 1;
  390. rts;
  391. .Lfailed_tryrdlock:
  392. r1 = p1;
  393. call _put_core_lock;
  394. rets = [sp++];
  395. r0 = 0;
  396. rts;
  397. ENDPROC(___raw_read_trylock_asm)
  398. /*
  399. * r0 = &rwlock->lock
  400. *
  401. * Note: Processing controlled by a reader lock should not have
  402. * any side-effect on cache issues with the other core, so we
  403. * just release the core lock and exit (no _end_lock_coherent).
  404. *
  405. * Clobbers: r3:0, p1:0
  406. */
  407. ENTRY(___raw_read_unlock_asm)
  408. p1 = r0;
  409. [--sp] = rets;
  410. call _get_core_lock;
  411. r1 = [p1];
  412. r1 += 1;
  413. [p1] = r1;
  414. r1 = p1;
  415. call _put_core_lock;
  416. rets = [sp++];
  417. rts;
  418. ENDPROC(___raw_read_unlock_asm)
  419. /*
  420. * r0 = &rwlock->lock
  421. *
  422. * Clobbers: r3:0, p1:0
  423. */
  424. ENTRY(___raw_write_lock_asm)
  425. p1 = r0;
  426. r3.l = lo(RW_LOCK_BIAS);
  427. r3.h = hi(RW_LOCK_BIAS);
  428. [--sp] = rets;
  429. call _get_core_lock;
  430. .Lwrlock_try:
  431. r1 = [p1];
  432. r1 = r1 - r3;
  433. #ifdef __ARCH_SYNC_CORE_DCACHE
  434. r2 = r1;
  435. r2 <<= 4;
  436. r2 >>= 4;
  437. cc = r2 == 0;
  438. #else
  439. cc = r1 == 0;
  440. #endif
  441. if !cc jump .Lwrlock_wait
  442. [p1] = r1;
  443. r1 = p1;
  444. #ifdef __ARCH_SYNC_CORE_DCACHE
  445. call _start_lock_coherent
  446. #else
  447. call _put_core_lock;
  448. #endif
  449. rets = [sp++];
  450. rts;
  451. .Lwrlock_wait:
  452. r1 = p1;
  453. call _put_core_lock;
  454. SSYNC(r2);
  455. r0 = p1;
  456. call _get_core_lock;
  457. r1 = [p1];
  458. #ifdef __ARCH_SYNC_CORE_DCACHE
  459. r1 <<= 4;
  460. r1 >>= 4;
  461. #endif
  462. cc = r1 == r3;
  463. if !cc jump .Lwrlock_wait;
  464. jump .Lwrlock_try
  465. ENDPROC(___raw_write_lock_asm)
  466. /*
  467. * r0 = &rwlock->lock
  468. *
  469. * Clobbers: r3:0, p1:0
  470. */
  471. ENTRY(___raw_write_trylock_asm)
  472. p1 = r0;
  473. [--sp] = rets;
  474. call _get_core_lock;
  475. r1 = [p1];
  476. r2.l = lo(RW_LOCK_BIAS);
  477. r2.h = hi(RW_LOCK_BIAS);
  478. cc = r1 == r2;
  479. if !cc jump .Lfailed_trywrlock;
  480. #ifdef __ARCH_SYNC_CORE_DCACHE
  481. r1 >>= 28;
  482. r1 <<= 28;
  483. #else
  484. r1 = 0;
  485. #endif
  486. [p1] = r1;
  487. r1 = p1;
  488. #ifdef __ARCH_SYNC_CORE_DCACHE
  489. call _start_lock_coherent
  490. #else
  491. call _put_core_lock;
  492. #endif
  493. rets = [sp++];
  494. r0 = 1;
  495. rts;
  496. .Lfailed_trywrlock:
  497. r1 = p1;
  498. call _put_core_lock;
  499. rets = [sp++];
  500. r0 = 0;
  501. rts;
  502. ENDPROC(___raw_write_trylock_asm)
  503. /*
  504. * r0 = &rwlock->lock
  505. *
  506. * Clobbers: r3:0, p1:0
  507. */
  508. ENTRY(___raw_write_unlock_asm)
  509. p1 = r0;
  510. r3.l = lo(RW_LOCK_BIAS);
  511. r3.h = hi(RW_LOCK_BIAS);
  512. [--sp] = rets;
  513. call _get_core_lock;
  514. r1 = [p1];
  515. r1 = r1 + r3;
  516. [p1] = r1;
  517. r1 = p1;
  518. #ifdef __ARCH_SYNC_CORE_DCACHE
  519. call _end_lock_coherent
  520. #else
  521. call _put_core_lock;
  522. #endif
  523. rets = [sp++];
  524. rts;
  525. ENDPROC(___raw_write_unlock_asm)
  526. /*
  527. * r0 = ptr
  528. * r1 = value
  529. *
  530. * Add a signed value to a 32bit word and return the new value atomically.
  531. * Clobbers: r3:0, p1:0
  532. */
  533. ENTRY(___raw_atomic_update_asm)
  534. p1 = r0;
  535. r3 = r1;
  536. [--sp] = rets;
  537. call _get_core_lock;
  538. r2 = [p1];
  539. r3 = r3 + r2;
  540. [p1] = r3;
  541. r1 = p1;
  542. call _put_core_lock;
  543. r0 = r3;
  544. rets = [sp++];
  545. rts;
  546. ENDPROC(___raw_atomic_update_asm)
  547. /*
  548. * r0 = ptr
  549. * r1 = mask
  550. *
  551. * Clear the mask bits from a 32bit word and return the old 32bit value
  552. * atomically.
  553. * Clobbers: r3:0, p1:0
  554. */
  555. ENTRY(___raw_atomic_clear_asm)
  556. p1 = r0;
  557. r3 = ~r1;
  558. [--sp] = rets;
  559. call _get_core_lock;
  560. r2 = [p1];
  561. r3 = r2 & r3;
  562. [p1] = r3;
  563. r3 = r2;
  564. r1 = p1;
  565. call _put_core_lock;
  566. r0 = r3;
  567. rets = [sp++];
  568. rts;
  569. ENDPROC(___raw_atomic_clear_asm)
  570. /*
  571. * r0 = ptr
  572. * r1 = mask
  573. *
  574. * Set the mask bits into a 32bit word and return the old 32bit value
  575. * atomically.
  576. * Clobbers: r3:0, p1:0
  577. */
  578. ENTRY(___raw_atomic_set_asm)
  579. p1 = r0;
  580. r3 = r1;
  581. [--sp] = rets;
  582. call _get_core_lock;
  583. r2 = [p1];
  584. r3 = r2 | r3;
  585. [p1] = r3;
  586. r3 = r2;
  587. r1 = p1;
  588. call _put_core_lock;
  589. r0 = r3;
  590. rets = [sp++];
  591. rts;
  592. ENDPROC(___raw_atomic_set_asm)
  593. /*
  594. * r0 = ptr
  595. * r1 = mask
  596. *
  597. * XOR the mask bits with a 32bit word and return the old 32bit value
  598. * atomically.
  599. * Clobbers: r3:0, p1:0
  600. */
  601. ENTRY(___raw_atomic_xor_asm)
  602. p1 = r0;
  603. r3 = r1;
  604. [--sp] = rets;
  605. call _get_core_lock;
  606. r2 = [p1];
  607. r3 = r2 ^ r3;
  608. [p1] = r3;
  609. r3 = r2;
  610. r1 = p1;
  611. call _put_core_lock;
  612. r0 = r3;
  613. rets = [sp++];
  614. rts;
  615. ENDPROC(___raw_atomic_xor_asm)
  616. /*
  617. * r0 = ptr
  618. * r1 = mask
  619. *
  620. * Perform a logical AND between the mask bits and a 32bit word, and
  621. * return the masked value. We need this on this architecture in
  622. * order to invalidate the local cache before testing.
  623. *
  624. * Clobbers: r3:0, p1:0
  625. */
  626. ENTRY(___raw_atomic_test_asm)
  627. p1 = r0;
  628. r3 = r1;
  629. r1 = -L1_CACHE_BYTES;
  630. r1 = r0 & r1;
  631. p0 = r1;
  632. flushinv[p0];
  633. SSYNC(r2);
  634. r0 = [p1];
  635. r0 = r0 & r3;
  636. rts;
  637. ENDPROC(___raw_atomic_test_asm)
  638. /*
  639. * r0 = ptr
  640. * r1 = value
  641. *
  642. * Swap *ptr with value and return the old 32bit value atomically.
  643. * Clobbers: r3:0, p1:0
  644. */
  645. #define __do_xchg(src, dst) \
  646. p1 = r0; \
  647. r3 = r1; \
  648. [--sp] = rets; \
  649. call _get_core_lock; \
  650. r2 = src; \
  651. dst = r3; \
  652. r3 = r2; \
  653. r1 = p1; \
  654. call _put_core_lock; \
  655. r0 = r3; \
  656. rets = [sp++]; \
  657. rts;
  658. ENTRY(___raw_xchg_1_asm)
  659. __do_xchg(b[p1] (z), b[p1])
  660. ENDPROC(___raw_xchg_1_asm)
  661. ENTRY(___raw_xchg_2_asm)
  662. __do_xchg(w[p1] (z), w[p1])
  663. ENDPROC(___raw_xchg_2_asm)
  664. ENTRY(___raw_xchg_4_asm)
  665. __do_xchg([p1], [p1])
  666. ENDPROC(___raw_xchg_4_asm)
  667. /*
  668. * r0 = ptr
  669. * r1 = new
  670. * r2 = old
  671. *
  672. * Swap *ptr with new if *ptr == old and return the previous *ptr
  673. * value atomically.
  674. *
  675. * Clobbers: r3:0, p1:0
  676. */
  677. #define __do_cmpxchg(src, dst) \
  678. [--sp] = rets; \
  679. [--sp] = r4; \
  680. p1 = r0; \
  681. r3 = r1; \
  682. r4 = r2; \
  683. call _get_core_lock; \
  684. r2 = src; \
  685. cc = r2 == r4; \
  686. if !cc jump 1f; \
  687. dst = r3; \
  688. 1: r3 = r2; \
  689. r1 = p1; \
  690. call _put_core_lock; \
  691. r0 = r3; \
  692. r4 = [sp++]; \
  693. rets = [sp++]; \
  694. rts;
  695. ENTRY(___raw_cmpxchg_1_asm)
  696. __do_cmpxchg(b[p1] (z), b[p1])
  697. ENDPROC(___raw_cmpxchg_1_asm)
  698. ENTRY(___raw_cmpxchg_2_asm)
  699. __do_cmpxchg(w[p1] (z), w[p1])
  700. ENDPROC(___raw_cmpxchg_2_asm)
  701. ENTRY(___raw_cmpxchg_4_asm)
  702. __do_cmpxchg([p1], [p1])
  703. ENDPROC(___raw_cmpxchg_4_asm)
  704. /*
  705. * r0 = ptr
  706. * r1 = bitnr
  707. *
  708. * Set a bit in a 32bit word and return the old 32bit value atomically.
  709. * Clobbers: r3:0, p1:0
  710. */
  711. ENTRY(___raw_bit_set_asm)
  712. r2 = r1;
  713. r1 = 1;
  714. r1 <<= r2;
  715. jump ___raw_atomic_set_asm
  716. ENDPROC(___raw_bit_set_asm)
  717. /*
  718. * r0 = ptr
  719. * r1 = bitnr
  720. *
  721. * Clear a bit in a 32bit word and return the old 32bit value atomically.
  722. * Clobbers: r3:0, p1:0
  723. */
  724. ENTRY(___raw_bit_clear_asm)
  725. r2 = r1;
  726. r1 = 1;
  727. r1 <<= r2;
  728. jump ___raw_atomic_clear_asm
  729. ENDPROC(___raw_bit_clear_asm)
  730. /*
  731. * r0 = ptr
  732. * r1 = bitnr
  733. *
  734. * Toggle a bit in a 32bit word and return the old 32bit value atomically.
  735. * Clobbers: r3:0, p1:0
  736. */
  737. ENTRY(___raw_bit_toggle_asm)
  738. r2 = r1;
  739. r1 = 1;
  740. r1 <<= r2;
  741. jump ___raw_atomic_xor_asm
  742. ENDPROC(___raw_bit_toggle_asm)
  743. /*
  744. * r0 = ptr
  745. * r1 = bitnr
  746. *
  747. * Test-and-set a bit in a 32bit word and return the old bit value atomically.
  748. * Clobbers: r3:0, p1:0
  749. */
  750. ENTRY(___raw_bit_test_set_asm)
  751. [--sp] = rets;
  752. [--sp] = r1;
  753. call ___raw_bit_set_asm
  754. r1 = [sp++];
  755. r2 = 1;
  756. r2 <<= r1;
  757. r0 = r0 & r2;
  758. cc = r0 == 0;
  759. if cc jump 1f
  760. r0 = 1;
  761. 1:
  762. rets = [sp++];
  763. rts;
  764. ENDPROC(___raw_bit_test_set_asm)
  765. /*
  766. * r0 = ptr
  767. * r1 = bitnr
  768. *
  769. * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
  770. * Clobbers: r3:0, p1:0
  771. */
  772. ENTRY(___raw_bit_test_clear_asm)
  773. [--sp] = rets;
  774. [--sp] = r1;
  775. call ___raw_bit_clear_asm
  776. r1 = [sp++];
  777. r2 = 1;
  778. r2 <<= r1;
  779. r0 = r0 & r2;
  780. cc = r0 == 0;
  781. if cc jump 1f
  782. r0 = 1;
  783. 1:
  784. rets = [sp++];
  785. rts;
  786. ENDPROC(___raw_bit_test_clear_asm)
  787. /*
  788. * r0 = ptr
  789. * r1 = bitnr
  790. *
  791. * Test-and-toggle a bit in a 32bit word,
  792. * and return the old bit value atomically.
  793. * Clobbers: r3:0, p1:0
  794. */
  795. ENTRY(___raw_bit_test_toggle_asm)
  796. [--sp] = rets;
  797. [--sp] = r1;
  798. call ___raw_bit_toggle_asm
  799. r1 = [sp++];
  800. r2 = 1;
  801. r2 <<= r1;
  802. r0 = r0 & r2;
  803. cc = r0 == 0;
  804. if cc jump 1f
  805. r0 = 1;
  806. 1:
  807. rets = [sp++];
  808. rts;
  809. ENDPROC(___raw_bit_test_toggle_asm)
  810. /*
  811. * r0 = ptr
  812. * r1 = bitnr
  813. *
  814. * Test a bit in a 32bit word and return its value.
  815. * We need this on this architecture in order to invalidate
  816. * the local cache before testing.
  817. *
  818. * Clobbers: r3:0, p1:0
  819. */
  820. ENTRY(___raw_bit_test_asm)
  821. r2 = r1;
  822. r1 = 1;
  823. r1 <<= r2;
  824. jump ___raw_atomic_test_asm
  825. ENDPROC(___raw_bit_test_asm)
  826. /*
  827. * r0 = ptr
  828. *
  829. * Fetch and return an uncached 32bit value.
  830. *
  831. * Clobbers: r2:0, p1:0
  832. */
  833. ENTRY(___raw_uncached_fetch_asm)
  834. p1 = r0;
  835. r1 = -L1_CACHE_BYTES;
  836. r1 = r0 & r1;
  837. p0 = r1;
  838. flushinv[p0];
  839. SSYNC(r2);
  840. r0 = [p1];
  841. rts;
  842. ENDPROC(___raw_uncached_fetch_asm)