atomic.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. /*
  2. * File: arch/blackfin/mach-bf561/atomic.S
  3. * Author: Philippe Gerum <rpm@xenomai.org>
  4. *
  5. * Copyright 2007 Analog Devices Inc.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, see the file COPYING, or write
  19. * to the Free Software Foundation, Inc.,
  20. * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include <linux/linkage.h>
  23. #include <asm/blackfin.h>
  24. #include <asm/cache.h>
  25. #include <asm/asm-offsets.h>
  26. #include <asm/rwlock.h>
  27. #include <asm/cplb.h>
  28. .text
  29. .macro coreslot_loadaddr reg:req
  30. \reg\().l = _corelock;
  31. \reg\().h = _corelock;
  32. .endm
  33. /*
  34. * r0 = address of atomic data to flush and invalidate (32bit).
  35. *
  36. * Clear interrupts and return the old mask.
  37. * We assume that no atomic data can span cachelines.
  38. *
  39. * Clobbers: r2:0, p0
  40. */
  41. ENTRY(_get_core_lock)
  42. r1 = -L1_CACHE_BYTES;
  43. r1 = r0 & r1;
  44. cli r0;
  45. coreslot_loadaddr p0;
  46. .Lretry_corelock:
  47. testset (p0);
  48. if cc jump .Ldone_corelock;
  49. SSYNC(r2);
  50. jump .Lretry_corelock
  51. .Ldone_corelock:
  52. p0 = r1;
  53. CSYNC(r2);
  54. flushinv[p0];
  55. SSYNC(r2);
  56. rts;
  57. ENDPROC(_get_core_lock)
  58. /*
  59. * r0 = address of atomic data in uncacheable memory region (32bit).
  60. *
  61. * Clear interrupts and return the old mask.
  62. *
  63. * Clobbers: r0, p0
  64. */
  65. ENTRY(_get_core_lock_noflush)
  66. cli r0;
  67. coreslot_loadaddr p0;
  68. .Lretry_corelock_noflush:
  69. testset (p0);
  70. if cc jump .Ldone_corelock_noflush;
  71. SSYNC(r2);
  72. jump .Lretry_corelock_noflush
  73. .Ldone_corelock_noflush:
  74. rts;
  75. ENDPROC(_get_core_lock_noflush)
  76. /*
  77. * r0 = interrupt mask to restore.
  78. * r1 = address of atomic data to flush and invalidate (32bit).
  79. *
  80. * Interrupts are masked on entry (see _get_core_lock).
  81. * Clobbers: r2:0, p0
  82. */
  83. ENTRY(_put_core_lock)
  84. /* Write-through cache assumed, so no flush needed here. */
  85. coreslot_loadaddr p0;
  86. r1 = 0;
  87. [p0] = r1;
  88. SSYNC(r2);
  89. sti r0;
  90. rts;
  91. ENDPROC(_put_core_lock)
  92. #ifdef __ARCH_SYNC_CORE_DCACHE
  93. ENTRY(___raw_smp_mark_barrier_asm)
  94. [--sp] = rets;
  95. [--sp] = ( r7:5 );
  96. [--sp] = r0;
  97. [--sp] = p1;
  98. [--sp] = p0;
  99. call _get_core_lock_noflush;
  100. /*
  101. * Calculate current core mask
  102. */
  103. GET_CPUID(p1, r7);
  104. r6 = 1;
  105. r6 <<= r7;
  106. /*
  107. * Set bit of other cores in barrier mask. Don't change current core bit.
  108. */
  109. p1.l = _barrier_mask;
  110. p1.h = _barrier_mask;
  111. r7 = [p1];
  112. r5 = r7 & r6;
  113. r7 = ~r6;
  114. cc = r5 == 0;
  115. if cc jump 1f;
  116. r7 = r7 | r6;
  117. 1:
  118. [p1] = r7;
  119. SSYNC(r2);
  120. call _put_core_lock;
  121. p0 = [sp++];
  122. p1 = [sp++];
  123. r0 = [sp++];
  124. ( r7:5 ) = [sp++];
  125. rets = [sp++];
  126. rts;
  127. ENDPROC(___raw_smp_mark_barrier_asm)
  128. ENTRY(___raw_smp_check_barrier_asm)
  129. [--sp] = rets;
  130. [--sp] = ( r7:5 );
  131. [--sp] = r0;
  132. [--sp] = p1;
  133. [--sp] = p0;
  134. call _get_core_lock_noflush;
  135. /*
  136. * Calculate current core mask
  137. */
  138. GET_CPUID(p1, r7);
  139. r6 = 1;
  140. r6 <<= r7;
  141. /*
  142. * Clear current core bit in barrier mask if it is set.
  143. */
  144. p1.l = _barrier_mask;
  145. p1.h = _barrier_mask;
  146. r7 = [p1];
  147. r5 = r7 & r6;
  148. cc = r5 == 0;
  149. if cc jump 1f;
  150. r6 = ~r6;
  151. r7 = r7 & r6;
  152. [p1] = r7;
  153. SSYNC(r2);
  154. call _put_core_lock;
  155. /*
  156. * Invalidate the entire D-cache of current core.
  157. */
  158. sp += -12;
  159. call _resync_core_dcache
  160. sp += 12;
  161. jump 2f;
  162. 1:
  163. call _put_core_lock;
  164. 2:
  165. p0 = [sp++];
  166. p1 = [sp++];
  167. r0 = [sp++];
  168. ( r7:5 ) = [sp++];
  169. rets = [sp++];
  170. rts;
  171. ENDPROC(___raw_smp_check_barrier_asm)
  172. /*
  173. * r0 = irqflags
  174. * r1 = address of atomic data
  175. *
  176. * Clobbers: r2:0, p1:0
  177. */
  178. _start_lock_coherent:
  179. [--sp] = rets;
  180. [--sp] = ( r7:6 );
  181. r7 = r0;
  182. p1 = r1;
  183. /*
  184. * Determine whether the atomic data was previously
  185. * owned by another CPU (=r6).
  186. */
  187. GET_CPUID(p0, r2);
  188. r1 = 1;
  189. r1 <<= r2;
  190. r2 = ~r1;
  191. r1 = [p1];
  192. r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
  193. r6 = r1 & r2;
  194. r1 = [p1];
  195. r1 <<= 4;
  196. r1 >>= 4;
  197. [p1] = r1;
  198. /*
  199. * Release the core lock now, but keep IRQs disabled while we are
  200. * performing the remaining housekeeping chores for the current CPU.
  201. */
  202. coreslot_loadaddr p0;
  203. r1 = 0;
  204. [p0] = r1;
  205. /*
  206. * If another CPU has owned the same atomic section before us,
  207. * then our D-cached copy of the shared data protected by the
  208. * current spin/write_lock may be obsolete.
  209. */
  210. cc = r6 == 0;
  211. if cc jump .Lcache_synced
  212. /*
  213. * Invalidate the entire D-cache of the current core.
  214. */
  215. sp += -12;
  216. call _resync_core_dcache
  217. sp += 12;
  218. .Lcache_synced:
  219. SSYNC(r2);
  220. sti r7;
  221. ( r7:6 ) = [sp++];
  222. rets = [sp++];
  223. rts
  224. /*
  225. * r0 = irqflags
  226. * r1 = address of atomic data
  227. *
  228. * Clobbers: r2:0, p1:0
  229. */
  230. _end_lock_coherent:
  231. p1 = r1;
  232. GET_CPUID(p0, r2);
  233. r2 += 28;
  234. r1 = 1;
  235. r1 <<= r2;
  236. r2 = [p1];
  237. r2 = r1 | r2;
  238. [p1] = r2;
  239. r1 = p1;
  240. jump _put_core_lock;
  241. #endif /* __ARCH_SYNC_CORE_DCACHE */
  242. /*
  243. * r0 = &spinlock->lock
  244. *
  245. * Clobbers: r3:0, p1:0
  246. */
  247. ENTRY(___raw_spin_is_locked_asm)
  248. p1 = r0;
  249. [--sp] = rets;
  250. call _get_core_lock;
  251. r3 = [p1];
  252. cc = bittst( r3, 0 );
  253. r3 = cc;
  254. r1 = p1;
  255. call _put_core_lock;
  256. rets = [sp++];
  257. r0 = r3;
  258. rts;
  259. ENDPROC(___raw_spin_is_locked_asm)
  260. /*
  261. * r0 = &spinlock->lock
  262. *
  263. * Clobbers: r3:0, p1:0
  264. */
  265. ENTRY(___raw_spin_lock_asm)
  266. p1 = r0;
  267. [--sp] = rets;
  268. .Lretry_spinlock:
  269. call _get_core_lock;
  270. r1 = p1;
  271. r2 = [p1];
  272. cc = bittst( r2, 0 );
  273. if cc jump .Lbusy_spinlock
  274. #ifdef __ARCH_SYNC_CORE_DCACHE
  275. r3 = p1;
  276. bitset ( r2, 0 ); /* Raise the lock bit. */
  277. [p1] = r2;
  278. call _start_lock_coherent
  279. #else
  280. r2 = 1;
  281. [p1] = r2;
  282. call _put_core_lock;
  283. #endif
  284. rets = [sp++];
  285. rts;
  286. .Lbusy_spinlock:
  287. /* We don't touch the atomic area if busy, so that flush
  288. will behave like nop in _put_core_lock. */
  289. call _put_core_lock;
  290. SSYNC(r2);
  291. r0 = p1;
  292. jump .Lretry_spinlock
  293. ENDPROC(___raw_spin_lock_asm)
  294. /*
  295. * r0 = &spinlock->lock
  296. *
  297. * Clobbers: r3:0, p1:0
  298. */
  299. ENTRY(___raw_spin_trylock_asm)
  300. p1 = r0;
  301. [--sp] = rets;
  302. call _get_core_lock;
  303. r1 = p1;
  304. r3 = [p1];
  305. cc = bittst( r3, 0 );
  306. if cc jump .Lfailed_trylock
  307. #ifdef __ARCH_SYNC_CORE_DCACHE
  308. bitset ( r3, 0 ); /* Raise the lock bit. */
  309. [p1] = r3;
  310. call _start_lock_coherent
  311. #else
  312. r2 = 1;
  313. [p1] = r2;
  314. call _put_core_lock;
  315. #endif
  316. r0 = 1;
  317. rets = [sp++];
  318. rts;
  319. .Lfailed_trylock:
  320. call _put_core_lock;
  321. r0 = 0;
  322. rets = [sp++];
  323. rts;
  324. ENDPROC(___raw_spin_trylock_asm)
  325. /*
  326. * r0 = &spinlock->lock
  327. *
  328. * Clobbers: r2:0, p1:0
  329. */
  330. ENTRY(___raw_spin_unlock_asm)
  331. p1 = r0;
  332. [--sp] = rets;
  333. call _get_core_lock;
  334. r2 = [p1];
  335. bitclr ( r2, 0 );
  336. [p1] = r2;
  337. r1 = p1;
  338. #ifdef __ARCH_SYNC_CORE_DCACHE
  339. call _end_lock_coherent
  340. #else
  341. call _put_core_lock;
  342. #endif
  343. rets = [sp++];
  344. rts;
  345. ENDPROC(___raw_spin_unlock_asm)
  346. /*
  347. * r0 = &rwlock->lock
  348. *
  349. * Clobbers: r2:0, p1:0
  350. */
  351. ENTRY(___raw_read_lock_asm)
  352. p1 = r0;
  353. [--sp] = rets;
  354. call _get_core_lock;
  355. .Lrdlock_try:
  356. r1 = [p1];
  357. r1 += -1;
  358. [p1] = r1;
  359. cc = r1 < 0;
  360. if cc jump .Lrdlock_failed
  361. r1 = p1;
  362. #ifdef __ARCH_SYNC_CORE_DCACHE
  363. call _start_lock_coherent
  364. #else
  365. call _put_core_lock;
  366. #endif
  367. rets = [sp++];
  368. rts;
  369. .Lrdlock_failed:
  370. r1 += 1;
  371. [p1] = r1;
  372. .Lrdlock_wait:
  373. r1 = p1;
  374. call _put_core_lock;
  375. SSYNC(r2);
  376. r0 = p1;
  377. call _get_core_lock;
  378. r1 = [p1];
  379. cc = r1 < 2;
  380. if cc jump .Lrdlock_wait;
  381. jump .Lrdlock_try
  382. ENDPROC(___raw_read_lock_asm)
  383. /*
  384. * r0 = &rwlock->lock
  385. *
  386. * Clobbers: r3:0, p1:0
  387. */
  388. ENTRY(___raw_read_trylock_asm)
  389. p1 = r0;
  390. [--sp] = rets;
  391. call _get_core_lock;
  392. r1 = [p1];
  393. cc = r1 <= 0;
  394. if cc jump .Lfailed_tryrdlock;
  395. r1 += -1;
  396. [p1] = r1;
  397. r1 = p1;
  398. #ifdef __ARCH_SYNC_CORE_DCACHE
  399. call _start_lock_coherent
  400. #else
  401. call _put_core_lock;
  402. #endif
  403. rets = [sp++];
  404. r0 = 1;
  405. rts;
  406. .Lfailed_tryrdlock:
  407. r1 = p1;
  408. call _put_core_lock;
  409. rets = [sp++];
  410. r0 = 0;
  411. rts;
  412. ENDPROC(___raw_read_trylock_asm)
  413. /*
  414. * r0 = &rwlock->lock
  415. *
  416. * Note: Processing controlled by a reader lock should not have
  417. * any side-effect on cache issues with the other core, so we
  418. * just release the core lock and exit (no _end_lock_coherent).
  419. *
  420. * Clobbers: r3:0, p1:0
  421. */
  422. ENTRY(___raw_read_unlock_asm)
  423. p1 = r0;
  424. [--sp] = rets;
  425. call _get_core_lock;
  426. r1 = [p1];
  427. r1 += 1;
  428. [p1] = r1;
  429. r1 = p1;
  430. call _put_core_lock;
  431. rets = [sp++];
  432. rts;
  433. ENDPROC(___raw_read_unlock_asm)
  434. /*
  435. * r0 = &rwlock->lock
  436. *
  437. * Clobbers: r3:0, p1:0
  438. */
  439. ENTRY(___raw_write_lock_asm)
  440. p1 = r0;
  441. r3.l = lo(RW_LOCK_BIAS);
  442. r3.h = hi(RW_LOCK_BIAS);
  443. [--sp] = rets;
  444. call _get_core_lock;
  445. .Lwrlock_try:
  446. r1 = [p1];
  447. r1 = r1 - r3;
  448. #ifdef __ARCH_SYNC_CORE_DCACHE
  449. r2 = r1;
  450. r2 <<= 4;
  451. r2 >>= 4;
  452. cc = r2 == 0;
  453. #else
  454. cc = r1 == 0;
  455. #endif
  456. if !cc jump .Lwrlock_wait
  457. [p1] = r1;
  458. r1 = p1;
  459. #ifdef __ARCH_SYNC_CORE_DCACHE
  460. call _start_lock_coherent
  461. #else
  462. call _put_core_lock;
  463. #endif
  464. rets = [sp++];
  465. rts;
  466. .Lwrlock_wait:
  467. r1 = p1;
  468. call _put_core_lock;
  469. SSYNC(r2);
  470. r0 = p1;
  471. call _get_core_lock;
  472. r1 = [p1];
  473. #ifdef __ARCH_SYNC_CORE_DCACHE
  474. r1 <<= 4;
  475. r1 >>= 4;
  476. #endif
  477. cc = r1 == r3;
  478. if !cc jump .Lwrlock_wait;
  479. jump .Lwrlock_try
  480. ENDPROC(___raw_write_lock_asm)
  481. /*
  482. * r0 = &rwlock->lock
  483. *
  484. * Clobbers: r3:0, p1:0
  485. */
  486. ENTRY(___raw_write_trylock_asm)
  487. p1 = r0;
  488. [--sp] = rets;
  489. call _get_core_lock;
  490. r1 = [p1];
  491. r2.l = lo(RW_LOCK_BIAS);
  492. r2.h = hi(RW_LOCK_BIAS);
  493. cc = r1 == r2;
  494. if !cc jump .Lfailed_trywrlock;
  495. #ifdef __ARCH_SYNC_CORE_DCACHE
  496. r1 >>= 28;
  497. r1 <<= 28;
  498. #else
  499. r1 = 0;
  500. #endif
  501. [p1] = r1;
  502. r1 = p1;
  503. #ifdef __ARCH_SYNC_CORE_DCACHE
  504. call _start_lock_coherent
  505. #else
  506. call _put_core_lock;
  507. #endif
  508. rets = [sp++];
  509. r0 = 1;
  510. rts;
  511. .Lfailed_trywrlock:
  512. r1 = p1;
  513. call _put_core_lock;
  514. rets = [sp++];
  515. r0 = 0;
  516. rts;
  517. ENDPROC(___raw_write_trylock_asm)
  518. /*
  519. * r0 = &rwlock->lock
  520. *
  521. * Clobbers: r3:0, p1:0
  522. */
  523. ENTRY(___raw_write_unlock_asm)
  524. p1 = r0;
  525. r3.l = lo(RW_LOCK_BIAS);
  526. r3.h = hi(RW_LOCK_BIAS);
  527. [--sp] = rets;
  528. call _get_core_lock;
  529. r1 = [p1];
  530. r1 = r1 + r3;
  531. [p1] = r1;
  532. r1 = p1;
  533. #ifdef __ARCH_SYNC_CORE_DCACHE
  534. call _end_lock_coherent
  535. #else
  536. call _put_core_lock;
  537. #endif
  538. rets = [sp++];
  539. rts;
  540. ENDPROC(___raw_write_unlock_asm)
  541. /*
  542. * r0 = ptr
  543. * r1 = value
  544. *
  545. * Add a signed value to a 32bit word and return the new value atomically.
  546. * Clobbers: r3:0, p1:0
  547. */
  548. ENTRY(___raw_atomic_update_asm)
  549. p1 = r0;
  550. r3 = r1;
  551. [--sp] = rets;
  552. call _get_core_lock;
  553. r2 = [p1];
  554. r3 = r3 + r2;
  555. [p1] = r3;
  556. r1 = p1;
  557. call _put_core_lock;
  558. r0 = r3;
  559. rets = [sp++];
  560. rts;
  561. ENDPROC(___raw_atomic_update_asm)
  562. /*
  563. * r0 = ptr
  564. * r1 = mask
  565. *
  566. * Clear the mask bits from a 32bit word and return the old 32bit value
  567. * atomically.
  568. * Clobbers: r3:0, p1:0
  569. */
  570. ENTRY(___raw_atomic_clear_asm)
  571. p1 = r0;
  572. r3 = ~r1;
  573. [--sp] = rets;
  574. call _get_core_lock;
  575. r2 = [p1];
  576. r3 = r2 & r3;
  577. [p1] = r3;
  578. r3 = r2;
  579. r1 = p1;
  580. call _put_core_lock;
  581. r0 = r3;
  582. rets = [sp++];
  583. rts;
  584. ENDPROC(___raw_atomic_clear_asm)
  585. /*
  586. * r0 = ptr
  587. * r1 = mask
  588. *
  589. * Set the mask bits into a 32bit word and return the old 32bit value
  590. * atomically.
  591. * Clobbers: r3:0, p1:0
  592. */
  593. ENTRY(___raw_atomic_set_asm)
  594. p1 = r0;
  595. r3 = r1;
  596. [--sp] = rets;
  597. call _get_core_lock;
  598. r2 = [p1];
  599. r3 = r2 | r3;
  600. [p1] = r3;
  601. r3 = r2;
  602. r1 = p1;
  603. call _put_core_lock;
  604. r0 = r3;
  605. rets = [sp++];
  606. rts;
  607. ENDPROC(___raw_atomic_set_asm)
  608. /*
  609. * r0 = ptr
  610. * r1 = mask
  611. *
  612. * XOR the mask bits with a 32bit word and return the old 32bit value
  613. * atomically.
  614. * Clobbers: r3:0, p1:0
  615. */
  616. ENTRY(___raw_atomic_xor_asm)
  617. p1 = r0;
  618. r3 = r1;
  619. [--sp] = rets;
  620. call _get_core_lock;
  621. r2 = [p1];
  622. r3 = r2 ^ r3;
  623. [p1] = r3;
  624. r3 = r2;
  625. r1 = p1;
  626. call _put_core_lock;
  627. r0 = r3;
  628. rets = [sp++];
  629. rts;
  630. ENDPROC(___raw_atomic_xor_asm)
  631. /*
  632. * r0 = ptr
  633. * r1 = mask
  634. *
  635. * Perform a logical AND between the mask bits and a 32bit word, and
  636. * return the masked value. We need this on this architecture in
  637. * order to invalidate the local cache before testing.
  638. *
  639. * Clobbers: r3:0, p1:0
  640. */
  641. ENTRY(___raw_atomic_test_asm)
  642. p1 = r0;
  643. r3 = r1;
  644. r1 = -L1_CACHE_BYTES;
  645. r1 = r0 & r1;
  646. p0 = r1;
  647. flushinv[p0];
  648. SSYNC(r2);
  649. r0 = [p1];
  650. r0 = r0 & r3;
  651. rts;
  652. ENDPROC(___raw_atomic_test_asm)
  653. /*
  654. * r0 = ptr
  655. * r1 = value
  656. *
  657. * Swap *ptr with value and return the old 32bit value atomically.
  658. * Clobbers: r3:0, p1:0
  659. */
  660. #define __do_xchg(src, dst) \
  661. p1 = r0; \
  662. r3 = r1; \
  663. [--sp] = rets; \
  664. call _get_core_lock; \
  665. r2 = src; \
  666. dst = r3; \
  667. r3 = r2; \
  668. r1 = p1; \
  669. call _put_core_lock; \
  670. r0 = r3; \
  671. rets = [sp++]; \
  672. rts;
  673. ENTRY(___raw_xchg_1_asm)
  674. __do_xchg(b[p1] (z), b[p1])
  675. ENDPROC(___raw_xchg_1_asm)
  676. ENTRY(___raw_xchg_2_asm)
  677. __do_xchg(w[p1] (z), w[p1])
  678. ENDPROC(___raw_xchg_2_asm)
  679. ENTRY(___raw_xchg_4_asm)
  680. __do_xchg([p1], [p1])
  681. ENDPROC(___raw_xchg_4_asm)
  682. /*
  683. * r0 = ptr
  684. * r1 = new
  685. * r2 = old
  686. *
  687. * Swap *ptr with new if *ptr == old and return the previous *ptr
  688. * value atomically.
  689. *
  690. * Clobbers: r3:0, p1:0
  691. */
  692. #define __do_cmpxchg(src, dst) \
  693. [--sp] = rets; \
  694. [--sp] = r4; \
  695. p1 = r0; \
  696. r3 = r1; \
  697. r4 = r2; \
  698. call _get_core_lock; \
  699. r2 = src; \
  700. cc = r2 == r4; \
  701. if !cc jump 1f; \
  702. dst = r3; \
  703. 1: r3 = r2; \
  704. r1 = p1; \
  705. call _put_core_lock; \
  706. r0 = r3; \
  707. r4 = [sp++]; \
  708. rets = [sp++]; \
  709. rts;
  710. ENTRY(___raw_cmpxchg_1_asm)
  711. __do_cmpxchg(b[p1] (z), b[p1])
  712. ENDPROC(___raw_cmpxchg_1_asm)
  713. ENTRY(___raw_cmpxchg_2_asm)
  714. __do_cmpxchg(w[p1] (z), w[p1])
  715. ENDPROC(___raw_cmpxchg_2_asm)
  716. ENTRY(___raw_cmpxchg_4_asm)
  717. __do_cmpxchg([p1], [p1])
  718. ENDPROC(___raw_cmpxchg_4_asm)
  719. /*
  720. * r0 = ptr
  721. * r1 = bitnr
  722. *
  723. * Set a bit in a 32bit word and return the old 32bit value atomically.
  724. * Clobbers: r3:0, p1:0
  725. */
  726. ENTRY(___raw_bit_set_asm)
  727. r2 = r1;
  728. r1 = 1;
  729. r1 <<= r2;
  730. jump ___raw_atomic_set_asm
  731. ENDPROC(___raw_bit_set_asm)
  732. /*
  733. * r0 = ptr
  734. * r1 = bitnr
  735. *
  736. * Clear a bit in a 32bit word and return the old 32bit value atomically.
  737. * Clobbers: r3:0, p1:0
  738. */
  739. ENTRY(___raw_bit_clear_asm)
  740. r2 = r1;
  741. r1 = 1;
  742. r1 <<= r2;
  743. jump ___raw_atomic_clear_asm
  744. ENDPROC(___raw_bit_clear_asm)
  745. /*
  746. * r0 = ptr
  747. * r1 = bitnr
  748. *
  749. * Toggle a bit in a 32bit word and return the old 32bit value atomically.
  750. * Clobbers: r3:0, p1:0
  751. */
  752. ENTRY(___raw_bit_toggle_asm)
  753. r2 = r1;
  754. r1 = 1;
  755. r1 <<= r2;
  756. jump ___raw_atomic_xor_asm
  757. ENDPROC(___raw_bit_toggle_asm)
  758. /*
  759. * r0 = ptr
  760. * r1 = bitnr
  761. *
  762. * Test-and-set a bit in a 32bit word and return the old bit value atomically.
  763. * Clobbers: r3:0, p1:0
  764. */
  765. ENTRY(___raw_bit_test_set_asm)
  766. [--sp] = rets;
  767. [--sp] = r1;
  768. call ___raw_bit_set_asm
  769. r1 = [sp++];
  770. r2 = 1;
  771. r2 <<= r1;
  772. r0 = r0 & r2;
  773. cc = r0 == 0;
  774. if cc jump 1f
  775. r0 = 1;
  776. 1:
  777. rets = [sp++];
  778. rts;
  779. ENDPROC(___raw_bit_test_set_asm)
  780. /*
  781. * r0 = ptr
  782. * r1 = bitnr
  783. *
  784. * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
  785. * Clobbers: r3:0, p1:0
  786. */
  787. ENTRY(___raw_bit_test_clear_asm)
  788. [--sp] = rets;
  789. [--sp] = r1;
  790. call ___raw_bit_clear_asm
  791. r1 = [sp++];
  792. r2 = 1;
  793. r2 <<= r1;
  794. r0 = r0 & r2;
  795. cc = r0 == 0;
  796. if cc jump 1f
  797. r0 = 1;
  798. 1:
  799. rets = [sp++];
  800. rts;
  801. ENDPROC(___raw_bit_test_clear_asm)
  802. /*
  803. * r0 = ptr
  804. * r1 = bitnr
  805. *
  806. * Test-and-toggle a bit in a 32bit word,
  807. * and return the old bit value atomically.
  808. * Clobbers: r3:0, p1:0
  809. */
  810. ENTRY(___raw_bit_test_toggle_asm)
  811. [--sp] = rets;
  812. [--sp] = r1;
  813. call ___raw_bit_toggle_asm
  814. r1 = [sp++];
  815. r2 = 1;
  816. r2 <<= r1;
  817. r0 = r0 & r2;
  818. cc = r0 == 0;
  819. if cc jump 1f
  820. r0 = 1;
  821. 1:
  822. rets = [sp++];
  823. rts;
  824. ENDPROC(___raw_bit_test_toggle_asm)
  825. /*
  826. * r0 = ptr
  827. * r1 = bitnr
  828. *
  829. * Test a bit in a 32bit word and return its value.
  830. * We need this on this architecture in order to invalidate
  831. * the local cache before testing.
  832. *
  833. * Clobbers: r3:0, p1:0
  834. */
  835. ENTRY(___raw_bit_test_asm)
  836. r2 = r1;
  837. r1 = 1;
  838. r1 <<= r2;
  839. jump ___raw_atomic_test_asm
  840. ENDPROC(___raw_bit_test_asm)
  841. /*
  842. * r0 = ptr
  843. *
  844. * Fetch and return an uncached 32bit value.
  845. *
  846. * Clobbers: r2:0, p1:0
  847. */
  848. ENTRY(___raw_uncached_fetch_asm)
  849. p1 = r0;
  850. r1 = -L1_CACHE_BYTES;
  851. r1 = r0 & r1;
  852. p0 = r1;
  853. flushinv[p0];
  854. SSYNC(r2);
  855. r0 = [p1];
  856. rts;
  857. ENDPROC(___raw_uncached_fetch_asm)