atomic.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. /*
  2. * Copyright 2007-2008 Analog Devices Inc.
  3. * Philippe Gerum <rpm@xenomai.org>
  4. *
  5. * Licensed under the GPL-2 or later.
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/blackfin.h>
  9. #include <asm/cache.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/rwlock.h>
  12. #include <asm/cplb.h>
  13. .text
  14. .macro coreslot_loadaddr reg:req
  15. \reg\().l = _corelock;
  16. \reg\().h = _corelock;
  17. .endm
  18. .macro safe_testset addr:req, scratch:req
  19. #if ANOMALY_05000477
  20. cli \scratch;
  21. testset (\addr);
  22. sti \scratch;
  23. #else
  24. testset (\addr);
  25. #endif
  26. .endm
  27. /*
  28. * r0 = address of atomic data to flush and invalidate (32bit).
  29. *
  30. * Clear interrupts and return the old mask.
  31. * We assume that no atomic data can span cachelines.
  32. *
  33. * Clobbers: r2:0, p0
  34. */
  35. ENTRY(_get_core_lock)
  36. r1 = -L1_CACHE_BYTES;
  37. r1 = r0 & r1;
  38. cli r0;
  39. coreslot_loadaddr p0;
  40. .Lretry_corelock:
  41. safe_testset p0, r2;
  42. if cc jump .Ldone_corelock;
  43. SSYNC(r2);
  44. jump .Lretry_corelock
  45. .Ldone_corelock:
  46. p0 = r1;
  47. CSYNC(r2);
  48. flushinv[p0];
  49. SSYNC(r2);
  50. rts;
  51. ENDPROC(_get_core_lock)
  52. /*
  53. * r0 = address of atomic data in uncacheable memory region (32bit).
  54. *
  55. * Clear interrupts and return the old mask.
  56. *
  57. * Clobbers: r0, p0
  58. */
  59. ENTRY(_get_core_lock_noflush)
  60. cli r0;
  61. coreslot_loadaddr p0;
  62. .Lretry_corelock_noflush:
  63. safe_testset p0, r2;
  64. if cc jump .Ldone_corelock_noflush;
  65. SSYNC(r2);
  66. jump .Lretry_corelock_noflush
  67. .Ldone_corelock_noflush:
  68. rts;
  69. ENDPROC(_get_core_lock_noflush)
  70. /*
  71. * r0 = interrupt mask to restore.
  72. * r1 = address of atomic data to flush and invalidate (32bit).
  73. *
  74. * Interrupts are masked on entry (see _get_core_lock).
  75. * Clobbers: r2:0, p0
  76. */
  77. ENTRY(_put_core_lock)
  78. /* Write-through cache assumed, so no flush needed here. */
  79. coreslot_loadaddr p0;
  80. r1 = 0;
  81. [p0] = r1;
  82. SSYNC(r2);
  83. sti r0;
  84. rts;
  85. ENDPROC(_put_core_lock)
  86. #ifdef __ARCH_SYNC_CORE_DCACHE
  87. ENTRY(___raw_smp_mark_barrier_asm)
  88. [--sp] = rets;
  89. [--sp] = ( r7:5 );
  90. [--sp] = r0;
  91. [--sp] = p1;
  92. [--sp] = p0;
  93. call _get_core_lock_noflush;
  94. /*
  95. * Calculate current core mask
  96. */
  97. GET_CPUID(p1, r7);
  98. r6 = 1;
  99. r6 <<= r7;
  100. /*
  101. * Set bit of other cores in barrier mask. Don't change current core bit.
  102. */
  103. p1.l = _barrier_mask;
  104. p1.h = _barrier_mask;
  105. r7 = [p1];
  106. r5 = r7 & r6;
  107. r7 = ~r6;
  108. cc = r5 == 0;
  109. if cc jump 1f;
  110. r7 = r7 | r6;
  111. 1:
  112. [p1] = r7;
  113. SSYNC(r2);
  114. call _put_core_lock;
  115. p0 = [sp++];
  116. p1 = [sp++];
  117. r0 = [sp++];
  118. ( r7:5 ) = [sp++];
  119. rets = [sp++];
  120. rts;
  121. ENDPROC(___raw_smp_mark_barrier_asm)
  122. ENTRY(___raw_smp_check_barrier_asm)
  123. [--sp] = rets;
  124. [--sp] = ( r7:5 );
  125. [--sp] = r0;
  126. [--sp] = p1;
  127. [--sp] = p0;
  128. call _get_core_lock_noflush;
  129. /*
  130. * Calculate current core mask
  131. */
  132. GET_CPUID(p1, r7);
  133. r6 = 1;
  134. r6 <<= r7;
  135. /*
  136. * Clear current core bit in barrier mask if it is set.
  137. */
  138. p1.l = _barrier_mask;
  139. p1.h = _barrier_mask;
  140. r7 = [p1];
  141. r5 = r7 & r6;
  142. cc = r5 == 0;
  143. if cc jump 1f;
  144. r6 = ~r6;
  145. r7 = r7 & r6;
  146. [p1] = r7;
  147. SSYNC(r2);
  148. call _put_core_lock;
  149. /*
  150. * Invalidate the entire D-cache of current core.
  151. */
  152. sp += -12;
  153. call _resync_core_dcache
  154. sp += 12;
  155. jump 2f;
  156. 1:
  157. call _put_core_lock;
  158. 2:
  159. p0 = [sp++];
  160. p1 = [sp++];
  161. r0 = [sp++];
  162. ( r7:5 ) = [sp++];
  163. rets = [sp++];
  164. rts;
  165. ENDPROC(___raw_smp_check_barrier_asm)
  166. /*
  167. * r0 = irqflags
  168. * r1 = address of atomic data
  169. *
  170. * Clobbers: r2:0, p1:0
  171. */
  172. _start_lock_coherent:
  173. [--sp] = rets;
  174. [--sp] = ( r7:6 );
  175. r7 = r0;
  176. p1 = r1;
  177. /*
  178. * Determine whether the atomic data was previously
  179. * owned by another CPU (=r6).
  180. */
  181. GET_CPUID(p0, r2);
  182. r1 = 1;
  183. r1 <<= r2;
  184. r2 = ~r1;
  185. r1 = [p1];
  186. r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
  187. r6 = r1 & r2;
  188. r1 = [p1];
  189. r1 <<= 4;
  190. r1 >>= 4;
  191. [p1] = r1;
  192. /*
  193. * Release the core lock now, but keep IRQs disabled while we are
  194. * performing the remaining housekeeping chores for the current CPU.
  195. */
  196. coreslot_loadaddr p0;
  197. r1 = 0;
  198. [p0] = r1;
  199. /*
  200. * If another CPU has owned the same atomic section before us,
  201. * then our D-cached copy of the shared data protected by the
  202. * current spin/write_lock may be obsolete.
  203. */
  204. cc = r6 == 0;
  205. if cc jump .Lcache_synced
  206. /*
  207. * Invalidate the entire D-cache of the current core.
  208. */
  209. sp += -12;
  210. call _resync_core_dcache
  211. sp += 12;
  212. .Lcache_synced:
  213. SSYNC(r2);
  214. sti r7;
  215. ( r7:6 ) = [sp++];
  216. rets = [sp++];
  217. rts
  218. /*
  219. * r0 = irqflags
  220. * r1 = address of atomic data
  221. *
  222. * Clobbers: r2:0, p1:0
  223. */
  224. _end_lock_coherent:
  225. p1 = r1;
  226. GET_CPUID(p0, r2);
  227. r2 += 28;
  228. r1 = 1;
  229. r1 <<= r2;
  230. r2 = [p1];
  231. r2 = r1 | r2;
  232. [p1] = r2;
  233. r1 = p1;
  234. jump _put_core_lock;
  235. #endif /* __ARCH_SYNC_CORE_DCACHE */
  236. /*
  237. * r0 = &spinlock->lock
  238. *
  239. * Clobbers: r3:0, p1:0
  240. */
  241. ENTRY(___raw_spin_is_locked_asm)
  242. p1 = r0;
  243. [--sp] = rets;
  244. call _get_core_lock;
  245. r3 = [p1];
  246. cc = bittst( r3, 0 );
  247. r3 = cc;
  248. r1 = p1;
  249. call _put_core_lock;
  250. rets = [sp++];
  251. r0 = r3;
  252. rts;
  253. ENDPROC(___raw_spin_is_locked_asm)
  254. /*
  255. * r0 = &spinlock->lock
  256. *
  257. * Clobbers: r3:0, p1:0
  258. */
  259. ENTRY(___raw_spin_lock_asm)
  260. p1 = r0;
  261. [--sp] = rets;
  262. .Lretry_spinlock:
  263. call _get_core_lock;
  264. r1 = p1;
  265. r2 = [p1];
  266. cc = bittst( r2, 0 );
  267. if cc jump .Lbusy_spinlock
  268. #ifdef __ARCH_SYNC_CORE_DCACHE
  269. r3 = p1;
  270. bitset ( r2, 0 ); /* Raise the lock bit. */
  271. [p1] = r2;
  272. call _start_lock_coherent
  273. #else
  274. r2 = 1;
  275. [p1] = r2;
  276. call _put_core_lock;
  277. #endif
  278. rets = [sp++];
  279. rts;
  280. .Lbusy_spinlock:
  281. /* We don't touch the atomic area if busy, so that flush
  282. will behave like nop in _put_core_lock. */
  283. call _put_core_lock;
  284. SSYNC(r2);
  285. r0 = p1;
  286. jump .Lretry_spinlock
  287. ENDPROC(___raw_spin_lock_asm)
  288. /*
  289. * r0 = &spinlock->lock
  290. *
  291. * Clobbers: r3:0, p1:0
  292. */
  293. ENTRY(___raw_spin_trylock_asm)
  294. p1 = r0;
  295. [--sp] = rets;
  296. call _get_core_lock;
  297. r1 = p1;
  298. r3 = [p1];
  299. cc = bittst( r3, 0 );
  300. if cc jump .Lfailed_trylock
  301. #ifdef __ARCH_SYNC_CORE_DCACHE
  302. bitset ( r3, 0 ); /* Raise the lock bit. */
  303. [p1] = r3;
  304. call _start_lock_coherent
  305. #else
  306. r2 = 1;
  307. [p1] = r2;
  308. call _put_core_lock;
  309. #endif
  310. r0 = 1;
  311. rets = [sp++];
  312. rts;
  313. .Lfailed_trylock:
  314. call _put_core_lock;
  315. r0 = 0;
  316. rets = [sp++];
  317. rts;
  318. ENDPROC(___raw_spin_trylock_asm)
  319. /*
  320. * r0 = &spinlock->lock
  321. *
  322. * Clobbers: r2:0, p1:0
  323. */
  324. ENTRY(___raw_spin_unlock_asm)
  325. p1 = r0;
  326. [--sp] = rets;
  327. call _get_core_lock;
  328. r2 = [p1];
  329. bitclr ( r2, 0 );
  330. [p1] = r2;
  331. r1 = p1;
  332. #ifdef __ARCH_SYNC_CORE_DCACHE
  333. call _end_lock_coherent
  334. #else
  335. call _put_core_lock;
  336. #endif
  337. rets = [sp++];
  338. rts;
  339. ENDPROC(___raw_spin_unlock_asm)
  340. /*
  341. * r0 = &rwlock->lock
  342. *
  343. * Clobbers: r2:0, p1:0
  344. */
  345. ENTRY(___raw_read_lock_asm)
  346. p1 = r0;
  347. [--sp] = rets;
  348. call _get_core_lock;
  349. .Lrdlock_try:
  350. r1 = [p1];
  351. r1 += -1;
  352. [p1] = r1;
  353. cc = r1 < 0;
  354. if cc jump .Lrdlock_failed
  355. r1 = p1;
  356. #ifdef __ARCH_SYNC_CORE_DCACHE
  357. call _start_lock_coherent
  358. #else
  359. call _put_core_lock;
  360. #endif
  361. rets = [sp++];
  362. rts;
  363. .Lrdlock_failed:
  364. r1 += 1;
  365. [p1] = r1;
  366. .Lrdlock_wait:
  367. r1 = p1;
  368. call _put_core_lock;
  369. SSYNC(r2);
  370. r0 = p1;
  371. call _get_core_lock;
  372. r1 = [p1];
  373. cc = r1 < 2;
  374. if cc jump .Lrdlock_wait;
  375. jump .Lrdlock_try
  376. ENDPROC(___raw_read_lock_asm)
  377. /*
  378. * r0 = &rwlock->lock
  379. *
  380. * Clobbers: r3:0, p1:0
  381. */
  382. ENTRY(___raw_read_trylock_asm)
  383. p1 = r0;
  384. [--sp] = rets;
  385. call _get_core_lock;
  386. r1 = [p1];
  387. cc = r1 <= 0;
  388. if cc jump .Lfailed_tryrdlock;
  389. r1 += -1;
  390. [p1] = r1;
  391. r1 = p1;
  392. #ifdef __ARCH_SYNC_CORE_DCACHE
  393. call _start_lock_coherent
  394. #else
  395. call _put_core_lock;
  396. #endif
  397. rets = [sp++];
  398. r0 = 1;
  399. rts;
  400. .Lfailed_tryrdlock:
  401. r1 = p1;
  402. call _put_core_lock;
  403. rets = [sp++];
  404. r0 = 0;
  405. rts;
  406. ENDPROC(___raw_read_trylock_asm)
  407. /*
  408. * r0 = &rwlock->lock
  409. *
  410. * Note: Processing controlled by a reader lock should not have
  411. * any side-effect on cache issues with the other core, so we
  412. * just release the core lock and exit (no _end_lock_coherent).
  413. *
  414. * Clobbers: r3:0, p1:0
  415. */
  416. ENTRY(___raw_read_unlock_asm)
  417. p1 = r0;
  418. [--sp] = rets;
  419. call _get_core_lock;
  420. r1 = [p1];
  421. r1 += 1;
  422. [p1] = r1;
  423. r1 = p1;
  424. call _put_core_lock;
  425. rets = [sp++];
  426. rts;
  427. ENDPROC(___raw_read_unlock_asm)
  428. /*
  429. * r0 = &rwlock->lock
  430. *
  431. * Clobbers: r3:0, p1:0
  432. */
  433. ENTRY(___raw_write_lock_asm)
  434. p1 = r0;
  435. r3.l = lo(RW_LOCK_BIAS);
  436. r3.h = hi(RW_LOCK_BIAS);
  437. [--sp] = rets;
  438. call _get_core_lock;
  439. .Lwrlock_try:
  440. r1 = [p1];
  441. r1 = r1 - r3;
  442. #ifdef __ARCH_SYNC_CORE_DCACHE
  443. r2 = r1;
  444. r2 <<= 4;
  445. r2 >>= 4;
  446. cc = r2 == 0;
  447. #else
  448. cc = r1 == 0;
  449. #endif
  450. if !cc jump .Lwrlock_wait
  451. [p1] = r1;
  452. r1 = p1;
  453. #ifdef __ARCH_SYNC_CORE_DCACHE
  454. call _start_lock_coherent
  455. #else
  456. call _put_core_lock;
  457. #endif
  458. rets = [sp++];
  459. rts;
  460. .Lwrlock_wait:
  461. r1 = p1;
  462. call _put_core_lock;
  463. SSYNC(r2);
  464. r0 = p1;
  465. call _get_core_lock;
  466. r1 = [p1];
  467. #ifdef __ARCH_SYNC_CORE_DCACHE
  468. r1 <<= 4;
  469. r1 >>= 4;
  470. #endif
  471. cc = r1 == r3;
  472. if !cc jump .Lwrlock_wait;
  473. jump .Lwrlock_try
  474. ENDPROC(___raw_write_lock_asm)
  475. /*
  476. * r0 = &rwlock->lock
  477. *
  478. * Clobbers: r3:0, p1:0
  479. */
  480. ENTRY(___raw_write_trylock_asm)
  481. p1 = r0;
  482. [--sp] = rets;
  483. call _get_core_lock;
  484. r1 = [p1];
  485. r2.l = lo(RW_LOCK_BIAS);
  486. r2.h = hi(RW_LOCK_BIAS);
  487. cc = r1 == r2;
  488. if !cc jump .Lfailed_trywrlock;
  489. #ifdef __ARCH_SYNC_CORE_DCACHE
  490. r1 >>= 28;
  491. r1 <<= 28;
  492. #else
  493. r1 = 0;
  494. #endif
  495. [p1] = r1;
  496. r1 = p1;
  497. #ifdef __ARCH_SYNC_CORE_DCACHE
  498. call _start_lock_coherent
  499. #else
  500. call _put_core_lock;
  501. #endif
  502. rets = [sp++];
  503. r0 = 1;
  504. rts;
  505. .Lfailed_trywrlock:
  506. r1 = p1;
  507. call _put_core_lock;
  508. rets = [sp++];
  509. r0 = 0;
  510. rts;
  511. ENDPROC(___raw_write_trylock_asm)
  512. /*
  513. * r0 = &rwlock->lock
  514. *
  515. * Clobbers: r3:0, p1:0
  516. */
  517. ENTRY(___raw_write_unlock_asm)
  518. p1 = r0;
  519. r3.l = lo(RW_LOCK_BIAS);
  520. r3.h = hi(RW_LOCK_BIAS);
  521. [--sp] = rets;
  522. call _get_core_lock;
  523. r1 = [p1];
  524. r1 = r1 + r3;
  525. [p1] = r1;
  526. r1 = p1;
  527. #ifdef __ARCH_SYNC_CORE_DCACHE
  528. call _end_lock_coherent
  529. #else
  530. call _put_core_lock;
  531. #endif
  532. rets = [sp++];
  533. rts;
  534. ENDPROC(___raw_write_unlock_asm)
  535. /*
  536. * r0 = ptr
  537. * r1 = value
  538. *
  539. * Add a signed value to a 32bit word and return the new value atomically.
  540. * Clobbers: r3:0, p1:0
  541. */
  542. ENTRY(___raw_atomic_update_asm)
  543. p1 = r0;
  544. r3 = r1;
  545. [--sp] = rets;
  546. call _get_core_lock;
  547. r2 = [p1];
  548. r3 = r3 + r2;
  549. [p1] = r3;
  550. r1 = p1;
  551. call _put_core_lock;
  552. r0 = r3;
  553. rets = [sp++];
  554. rts;
  555. ENDPROC(___raw_atomic_update_asm)
  556. /*
  557. * r0 = ptr
  558. * r1 = mask
  559. *
  560. * Clear the mask bits from a 32bit word and return the old 32bit value
  561. * atomically.
  562. * Clobbers: r3:0, p1:0
  563. */
  564. ENTRY(___raw_atomic_clear_asm)
  565. p1 = r0;
  566. r3 = ~r1;
  567. [--sp] = rets;
  568. call _get_core_lock;
  569. r2 = [p1];
  570. r3 = r2 & r3;
  571. [p1] = r3;
  572. r3 = r2;
  573. r1 = p1;
  574. call _put_core_lock;
  575. r0 = r3;
  576. rets = [sp++];
  577. rts;
  578. ENDPROC(___raw_atomic_clear_asm)
  579. /*
  580. * r0 = ptr
  581. * r1 = mask
  582. *
  583. * Set the mask bits into a 32bit word and return the old 32bit value
  584. * atomically.
  585. * Clobbers: r3:0, p1:0
  586. */
  587. ENTRY(___raw_atomic_set_asm)
  588. p1 = r0;
  589. r3 = r1;
  590. [--sp] = rets;
  591. call _get_core_lock;
  592. r2 = [p1];
  593. r3 = r2 | r3;
  594. [p1] = r3;
  595. r3 = r2;
  596. r1 = p1;
  597. call _put_core_lock;
  598. r0 = r3;
  599. rets = [sp++];
  600. rts;
  601. ENDPROC(___raw_atomic_set_asm)
  602. /*
  603. * r0 = ptr
  604. * r1 = mask
  605. *
  606. * XOR the mask bits with a 32bit word and return the old 32bit value
  607. * atomically.
  608. * Clobbers: r3:0, p1:0
  609. */
  610. ENTRY(___raw_atomic_xor_asm)
  611. p1 = r0;
  612. r3 = r1;
  613. [--sp] = rets;
  614. call _get_core_lock;
  615. r2 = [p1];
  616. r3 = r2 ^ r3;
  617. [p1] = r3;
  618. r3 = r2;
  619. r1 = p1;
  620. call _put_core_lock;
  621. r0 = r3;
  622. rets = [sp++];
  623. rts;
  624. ENDPROC(___raw_atomic_xor_asm)
  625. /*
  626. * r0 = ptr
  627. * r1 = mask
  628. *
  629. * Perform a logical AND between the mask bits and a 32bit word, and
  630. * return the masked value. We need this on this architecture in
  631. * order to invalidate the local cache before testing.
  632. *
  633. * Clobbers: r3:0, p1:0
  634. */
  635. ENTRY(___raw_atomic_test_asm)
  636. p1 = r0;
  637. r3 = r1;
  638. r1 = -L1_CACHE_BYTES;
  639. r1 = r0 & r1;
  640. p0 = r1;
  641. flushinv[p0];
  642. SSYNC(r2);
  643. r0 = [p1];
  644. r0 = r0 & r3;
  645. rts;
  646. ENDPROC(___raw_atomic_test_asm)
  647. /*
  648. * r0 = ptr
  649. * r1 = value
  650. *
  651. * Swap *ptr with value and return the old 32bit value atomically.
  652. * Clobbers: r3:0, p1:0
  653. */
  654. #define __do_xchg(src, dst) \
  655. p1 = r0; \
  656. r3 = r1; \
  657. [--sp] = rets; \
  658. call _get_core_lock; \
  659. r2 = src; \
  660. dst = r3; \
  661. r3 = r2; \
  662. r1 = p1; \
  663. call _put_core_lock; \
  664. r0 = r3; \
  665. rets = [sp++]; \
  666. rts;
  667. ENTRY(___raw_xchg_1_asm)
  668. __do_xchg(b[p1] (z), b[p1])
  669. ENDPROC(___raw_xchg_1_asm)
  670. ENTRY(___raw_xchg_2_asm)
  671. __do_xchg(w[p1] (z), w[p1])
  672. ENDPROC(___raw_xchg_2_asm)
  673. ENTRY(___raw_xchg_4_asm)
  674. __do_xchg([p1], [p1])
  675. ENDPROC(___raw_xchg_4_asm)
  676. /*
  677. * r0 = ptr
  678. * r1 = new
  679. * r2 = old
  680. *
  681. * Swap *ptr with new if *ptr == old and return the previous *ptr
  682. * value atomically.
  683. *
  684. * Clobbers: r3:0, p1:0
  685. */
  686. #define __do_cmpxchg(src, dst) \
  687. [--sp] = rets; \
  688. [--sp] = r4; \
  689. p1 = r0; \
  690. r3 = r1; \
  691. r4 = r2; \
  692. call _get_core_lock; \
  693. r2 = src; \
  694. cc = r2 == r4; \
  695. if !cc jump 1f; \
  696. dst = r3; \
  697. 1: r3 = r2; \
  698. r1 = p1; \
  699. call _put_core_lock; \
  700. r0 = r3; \
  701. r4 = [sp++]; \
  702. rets = [sp++]; \
  703. rts;
  704. ENTRY(___raw_cmpxchg_1_asm)
  705. __do_cmpxchg(b[p1] (z), b[p1])
  706. ENDPROC(___raw_cmpxchg_1_asm)
  707. ENTRY(___raw_cmpxchg_2_asm)
  708. __do_cmpxchg(w[p1] (z), w[p1])
  709. ENDPROC(___raw_cmpxchg_2_asm)
  710. ENTRY(___raw_cmpxchg_4_asm)
  711. __do_cmpxchg([p1], [p1])
  712. ENDPROC(___raw_cmpxchg_4_asm)
  713. /*
  714. * r0 = ptr
  715. * r1 = bitnr
  716. *
  717. * Set a bit in a 32bit word and return the old 32bit value atomically.
  718. * Clobbers: r3:0, p1:0
  719. */
  720. ENTRY(___raw_bit_set_asm)
  721. r2 = r1;
  722. r1 = 1;
  723. r1 <<= r2;
  724. jump ___raw_atomic_set_asm
  725. ENDPROC(___raw_bit_set_asm)
  726. /*
  727. * r0 = ptr
  728. * r1 = bitnr
  729. *
  730. * Clear a bit in a 32bit word and return the old 32bit value atomically.
  731. * Clobbers: r3:0, p1:0
  732. */
  733. ENTRY(___raw_bit_clear_asm)
  734. r2 = r1;
  735. r1 = 1;
  736. r1 <<= r2;
  737. jump ___raw_atomic_clear_asm
  738. ENDPROC(___raw_bit_clear_asm)
  739. /*
  740. * r0 = ptr
  741. * r1 = bitnr
  742. *
  743. * Toggle a bit in a 32bit word and return the old 32bit value atomically.
  744. * Clobbers: r3:0, p1:0
  745. */
  746. ENTRY(___raw_bit_toggle_asm)
  747. r2 = r1;
  748. r1 = 1;
  749. r1 <<= r2;
  750. jump ___raw_atomic_xor_asm
  751. ENDPROC(___raw_bit_toggle_asm)
  752. /*
  753. * r0 = ptr
  754. * r1 = bitnr
  755. *
  756. * Test-and-set a bit in a 32bit word and return the old bit value atomically.
  757. * Clobbers: r3:0, p1:0
  758. */
  759. ENTRY(___raw_bit_test_set_asm)
  760. [--sp] = rets;
  761. [--sp] = r1;
  762. call ___raw_bit_set_asm
  763. r1 = [sp++];
  764. r2 = 1;
  765. r2 <<= r1;
  766. r0 = r0 & r2;
  767. cc = r0 == 0;
  768. if cc jump 1f
  769. r0 = 1;
  770. 1:
  771. rets = [sp++];
  772. rts;
  773. ENDPROC(___raw_bit_test_set_asm)
  774. /*
  775. * r0 = ptr
  776. * r1 = bitnr
  777. *
  778. * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
  779. * Clobbers: r3:0, p1:0
  780. */
  781. ENTRY(___raw_bit_test_clear_asm)
  782. [--sp] = rets;
  783. [--sp] = r1;
  784. call ___raw_bit_clear_asm
  785. r1 = [sp++];
  786. r2 = 1;
  787. r2 <<= r1;
  788. r0 = r0 & r2;
  789. cc = r0 == 0;
  790. if cc jump 1f
  791. r0 = 1;
  792. 1:
  793. rets = [sp++];
  794. rts;
  795. ENDPROC(___raw_bit_test_clear_asm)
  796. /*
  797. * r0 = ptr
  798. * r1 = bitnr
  799. *
  800. * Test-and-toggle a bit in a 32bit word,
  801. * and return the old bit value atomically.
  802. * Clobbers: r3:0, p1:0
  803. */
  804. ENTRY(___raw_bit_test_toggle_asm)
  805. [--sp] = rets;
  806. [--sp] = r1;
  807. call ___raw_bit_toggle_asm
  808. r1 = [sp++];
  809. r2 = 1;
  810. r2 <<= r1;
  811. r0 = r0 & r2;
  812. cc = r0 == 0;
  813. if cc jump 1f
  814. r0 = 1;
  815. 1:
  816. rets = [sp++];
  817. rts;
  818. ENDPROC(___raw_bit_test_toggle_asm)
  819. /*
  820. * r0 = ptr
  821. * r1 = bitnr
  822. *
  823. * Test a bit in a 32bit word and return its value.
  824. * We need this on this architecture in order to invalidate
  825. * the local cache before testing.
  826. *
  827. * Clobbers: r3:0, p1:0
  828. */
  829. ENTRY(___raw_bit_test_asm)
  830. r2 = r1;
  831. r1 = 1;
  832. r1 <<= r2;
  833. jump ___raw_atomic_test_asm
  834. ENDPROC(___raw_bit_test_asm)
  835. /*
  836. * r0 = ptr
  837. *
  838. * Fetch and return an uncached 32bit value.
  839. *
  840. * Clobbers: r2:0, p1:0
  841. */
  842. ENTRY(___raw_uncached_fetch_asm)
  843. p1 = r0;
  844. r1 = -L1_CACHE_BYTES;
  845. r1 = r0 & r1;
  846. p0 = r1;
  847. flushinv[p0];
  848. SSYNC(r2);
  849. r0 = [p1];
  850. rts;
  851. ENDPROC(___raw_uncached_fetch_asm)