entry.S 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #include <linux/linkage.h>
  29. #include <asm/segment.h>
  30. #include <asm/cache.h>
  31. #include <asm/errno.h>
  32. #include <asm/dwarf2.h>
  33. #include <asm/calling.h>
  34. #include <asm/asm-offsets.h>
  35. #include <asm/msr.h>
  36. #include <asm/unistd.h>
  37. #include <asm/thread_info.h>
  38. #include <asm/hw_irq.h>
  39. #include <asm/page.h>
  40. #include <asm/irqflags.h>
  41. .code64
  42. #ifndef CONFIG_PREEMPT
  43. #define retint_kernel retint_restore_args
  44. #endif
  45. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  46. #ifdef CONFIG_TRACE_IRQFLAGS
  47. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  48. jnc 1f
  49. TRACE_IRQS_ON
  50. 1:
  51. #endif
  52. .endm
  53. /*
  54. * C code is not supposed to know about undefined top of stack. Every time
  55. * a C function with an pt_regs argument is called from the SYSCALL based
  56. * fast path FIXUP_TOP_OF_STACK is needed.
  57. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  58. * manipulation.
  59. */
  60. /* %rsp:at FRAMEEND */
  61. .macro FIXUP_TOP_OF_STACK tmp
  62. movq %gs:pda_oldrsp,\tmp
  63. movq \tmp,RSP(%rsp)
  64. movq $__USER_DS,SS(%rsp)
  65. movq $__USER_CS,CS(%rsp)
  66. movq $-1,RCX(%rsp)
  67. movq R11(%rsp),\tmp /* get eflags */
  68. movq \tmp,EFLAGS(%rsp)
  69. .endm
  70. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  71. movq RSP-\offset(%rsp),\tmp
  72. movq \tmp,%gs:pda_oldrsp
  73. movq EFLAGS-\offset(%rsp),\tmp
  74. movq \tmp,R11-\offset(%rsp)
  75. .endm
  76. .macro FAKE_STACK_FRAME child_rip
  77. /* push in order ss, rsp, eflags, cs, rip */
  78. xorl %eax, %eax
  79. pushq %rax /* ss */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET ss,0*/
  82. pushq %rax /* rsp */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. CFI_REL_OFFSET rsp,0
  85. pushq $(1<<9) /* eflags - interrupts on */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. /*CFI_REL_OFFSET rflags,0*/
  88. pushq $__KERNEL_CS /* cs */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. /*CFI_REL_OFFSET cs,0*/
  91. pushq \child_rip /* rip */
  92. CFI_ADJUST_CFA_OFFSET 8
  93. CFI_REL_OFFSET rip,0
  94. pushq %rax /* orig rax */
  95. CFI_ADJUST_CFA_OFFSET 8
  96. .endm
  97. .macro UNFAKE_STACK_FRAME
  98. addq $8*6, %rsp
  99. CFI_ADJUST_CFA_OFFSET -(6*8)
  100. .endm
  101. .macro CFI_DEFAULT_STACK start=1
  102. .if \start
  103. CFI_STARTPROC simple
  104. CFI_DEF_CFA rsp,SS+8
  105. .else
  106. CFI_DEF_CFA_OFFSET SS+8
  107. .endif
  108. CFI_REL_OFFSET r15,R15
  109. CFI_REL_OFFSET r14,R14
  110. CFI_REL_OFFSET r13,R13
  111. CFI_REL_OFFSET r12,R12
  112. CFI_REL_OFFSET rbp,RBP
  113. CFI_REL_OFFSET rbx,RBX
  114. CFI_REL_OFFSET r11,R11
  115. CFI_REL_OFFSET r10,R10
  116. CFI_REL_OFFSET r9,R9
  117. CFI_REL_OFFSET r8,R8
  118. CFI_REL_OFFSET rax,RAX
  119. CFI_REL_OFFSET rcx,RCX
  120. CFI_REL_OFFSET rdx,RDX
  121. CFI_REL_OFFSET rsi,RSI
  122. CFI_REL_OFFSET rdi,RDI
  123. CFI_REL_OFFSET rip,RIP
  124. /*CFI_REL_OFFSET cs,CS*/
  125. /*CFI_REL_OFFSET rflags,EFLAGS*/
  126. CFI_REL_OFFSET rsp,RSP
  127. /*CFI_REL_OFFSET ss,SS*/
  128. .endm
  129. /*
  130. * A newly forked process directly context switches into this.
  131. */
  132. /* rdi: prev */
  133. ENTRY(ret_from_fork)
  134. CFI_DEFAULT_STACK
  135. call schedule_tail
  136. GET_THREAD_INFO(%rcx)
  137. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  138. jnz rff_trace
  139. rff_action:
  140. RESTORE_REST
  141. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  142. je int_ret_from_sys_call
  143. testl $_TIF_IA32,threadinfo_flags(%rcx)
  144. jnz int_ret_from_sys_call
  145. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  146. jmp ret_from_sys_call
  147. rff_trace:
  148. movq %rsp,%rdi
  149. call syscall_trace_leave
  150. GET_THREAD_INFO(%rcx)
  151. jmp rff_action
  152. CFI_ENDPROC
  153. END(ret_from_fork)
  154. /*
  155. * System call entry. Upto 6 arguments in registers are supported.
  156. *
  157. * SYSCALL does not save anything on the stack and does not change the
  158. * stack pointer.
  159. */
  160. /*
  161. * Register setup:
  162. * rax system call number
  163. * rdi arg0
  164. * rcx return address for syscall/sysret, C arg3
  165. * rsi arg1
  166. * rdx arg2
  167. * r10 arg3 (--> moved to rcx for C)
  168. * r8 arg4
  169. * r9 arg5
  170. * r11 eflags for syscall/sysret, temporary for C
  171. * r12-r15,rbp,rbx saved by C code, not touched.
  172. *
  173. * Interrupts are off on entry.
  174. * Only called from user space.
  175. *
  176. * XXX if we had a free scratch register we could save the RSP into the stack frame
  177. * and report it properly in ps. Unfortunately we haven't.
  178. *
  179. * When user can change the frames always force IRET. That is because
  180. * it deals with uncanonical addresses better. SYSRET has trouble
  181. * with them due to bugs in both AMD and Intel CPUs.
  182. */
  183. ENTRY(system_call)
  184. CFI_STARTPROC simple
  185. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  186. CFI_REGISTER rip,rcx
  187. /*CFI_REGISTER rflags,r11*/
  188. swapgs
  189. movq %rsp,%gs:pda_oldrsp
  190. movq %gs:pda_kernelstack,%rsp
  191. /*
  192. * No need to follow this irqs off/on section - it's straight
  193. * and short:
  194. */
  195. sti
  196. SAVE_ARGS 8,1
  197. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  198. movq %rcx,RIP-ARGOFFSET(%rsp)
  199. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  200. GET_THREAD_INFO(%rcx)
  201. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  202. CFI_REMEMBER_STATE
  203. jnz tracesys
  204. cmpq $__NR_syscall_max,%rax
  205. ja badsys
  206. movq %r10,%rcx
  207. call *sys_call_table(,%rax,8) # XXX: rip relative
  208. movq %rax,RAX-ARGOFFSET(%rsp)
  209. /*
  210. * Syscall return path ending with SYSRET (fast path)
  211. * Has incomplete stack frame and undefined top of stack.
  212. */
  213. .globl ret_from_sys_call
  214. ret_from_sys_call:
  215. movl $_TIF_ALLWORK_MASK,%edi
  216. /* edi: flagmask */
  217. sysret_check:
  218. GET_THREAD_INFO(%rcx)
  219. cli
  220. TRACE_IRQS_OFF
  221. movl threadinfo_flags(%rcx),%edx
  222. andl %edi,%edx
  223. CFI_REMEMBER_STATE
  224. jnz sysret_careful
  225. /*
  226. * sysretq will re-enable interrupts:
  227. */
  228. TRACE_IRQS_ON
  229. movq RIP-ARGOFFSET(%rsp),%rcx
  230. CFI_REGISTER rip,rcx
  231. RESTORE_ARGS 0,-ARG_SKIP,1
  232. /*CFI_REGISTER rflags,r11*/
  233. movq %gs:pda_oldrsp,%rsp
  234. swapgs
  235. sysretq
  236. /* Handle reschedules */
  237. /* edx: work, edi: workmask */
  238. sysret_careful:
  239. CFI_RESTORE_STATE
  240. bt $TIF_NEED_RESCHED,%edx
  241. jnc sysret_signal
  242. TRACE_IRQS_ON
  243. sti
  244. pushq %rdi
  245. CFI_ADJUST_CFA_OFFSET 8
  246. call schedule
  247. popq %rdi
  248. CFI_ADJUST_CFA_OFFSET -8
  249. jmp sysret_check
  250. /* Handle a signal */
  251. sysret_signal:
  252. TRACE_IRQS_ON
  253. sti
  254. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  255. jz 1f
  256. /* Really a signal */
  257. /* edx: work flags (arg3) */
  258. leaq do_notify_resume(%rip),%rax
  259. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  260. xorl %esi,%esi # oldset -> arg2
  261. call ptregscall_common
  262. 1: movl $_TIF_NEED_RESCHED,%edi
  263. /* Use IRET because user could have changed frame. This
  264. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  265. cli
  266. TRACE_IRQS_OFF
  267. jmp int_with_check
  268. badsys:
  269. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  270. jmp ret_from_sys_call
  271. /* Do syscall tracing */
  272. tracesys:
  273. CFI_RESTORE_STATE
  274. SAVE_REST
  275. movq $-ENOSYS,RAX(%rsp)
  276. FIXUP_TOP_OF_STACK %rdi
  277. movq %rsp,%rdi
  278. call syscall_trace_enter
  279. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  280. RESTORE_REST
  281. cmpq $__NR_syscall_max,%rax
  282. ja 1f
  283. movq %r10,%rcx /* fixup for C */
  284. call *sys_call_table(,%rax,8)
  285. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  286. /* Use IRET because user could have changed frame */
  287. jmp int_ret_from_sys_call
  288. CFI_ENDPROC
  289. END(system_call)
  290. /*
  291. * Syscall return path ending with IRET.
  292. * Has correct top of stack, but partial stack frame.
  293. */
  294. ENTRY(int_ret_from_sys_call)
  295. CFI_STARTPROC simple
  296. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  297. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  298. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  299. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  300. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  301. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  302. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  303. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  304. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  305. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  306. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  307. CFI_REL_OFFSET r8,R8-ARGOFFSET
  308. CFI_REL_OFFSET r9,R9-ARGOFFSET
  309. CFI_REL_OFFSET r10,R10-ARGOFFSET
  310. CFI_REL_OFFSET r11,R11-ARGOFFSET
  311. cli
  312. TRACE_IRQS_OFF
  313. testl $3,CS-ARGOFFSET(%rsp)
  314. je retint_restore_args
  315. movl $_TIF_ALLWORK_MASK,%edi
  316. /* edi: mask to check */
  317. int_with_check:
  318. GET_THREAD_INFO(%rcx)
  319. movl threadinfo_flags(%rcx),%edx
  320. andl %edi,%edx
  321. jnz int_careful
  322. andl $~TS_COMPAT,threadinfo_status(%rcx)
  323. jmp retint_swapgs
  324. /* Either reschedule or signal or syscall exit tracking needed. */
  325. /* First do a reschedule test. */
  326. /* edx: work, edi: workmask */
  327. int_careful:
  328. bt $TIF_NEED_RESCHED,%edx
  329. jnc int_very_careful
  330. TRACE_IRQS_ON
  331. sti
  332. pushq %rdi
  333. CFI_ADJUST_CFA_OFFSET 8
  334. call schedule
  335. popq %rdi
  336. CFI_ADJUST_CFA_OFFSET -8
  337. cli
  338. TRACE_IRQS_OFF
  339. jmp int_with_check
  340. /* handle signals and tracing -- both require a full stack frame */
  341. int_very_careful:
  342. TRACE_IRQS_ON
  343. sti
  344. SAVE_REST
  345. /* Check for syscall exit trace */
  346. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  347. jz int_signal
  348. pushq %rdi
  349. CFI_ADJUST_CFA_OFFSET 8
  350. leaq 8(%rsp),%rdi # &ptregs -> arg1
  351. call syscall_trace_leave
  352. popq %rdi
  353. CFI_ADJUST_CFA_OFFSET -8
  354. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  355. cli
  356. TRACE_IRQS_OFF
  357. jmp int_restore_rest
  358. int_signal:
  359. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  360. jz 1f
  361. movq %rsp,%rdi # &ptregs -> arg1
  362. xorl %esi,%esi # oldset -> arg2
  363. call do_notify_resume
  364. 1: movl $_TIF_NEED_RESCHED,%edi
  365. int_restore_rest:
  366. RESTORE_REST
  367. cli
  368. TRACE_IRQS_OFF
  369. jmp int_with_check
  370. CFI_ENDPROC
  371. END(int_ret_from_sys_call)
  372. /*
  373. * Certain special system calls that need to save a complete full stack frame.
  374. */
  375. .macro PTREGSCALL label,func,arg
  376. .globl \label
  377. \label:
  378. leaq \func(%rip),%rax
  379. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  380. jmp ptregscall_common
  381. END(\label)
  382. .endm
  383. CFI_STARTPROC
  384. PTREGSCALL stub_clone, sys_clone, %r8
  385. PTREGSCALL stub_fork, sys_fork, %rdi
  386. PTREGSCALL stub_vfork, sys_vfork, %rdi
  387. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  388. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  389. PTREGSCALL stub_iopl, sys_iopl, %rsi
  390. ENTRY(ptregscall_common)
  391. popq %r11
  392. CFI_ADJUST_CFA_OFFSET -8
  393. CFI_REGISTER rip, r11
  394. SAVE_REST
  395. movq %r11, %r15
  396. CFI_REGISTER rip, r15
  397. FIXUP_TOP_OF_STACK %r11
  398. call *%rax
  399. RESTORE_TOP_OF_STACK %r11
  400. movq %r15, %r11
  401. CFI_REGISTER rip, r11
  402. RESTORE_REST
  403. pushq %r11
  404. CFI_ADJUST_CFA_OFFSET 8
  405. CFI_REL_OFFSET rip, 0
  406. ret
  407. CFI_ENDPROC
  408. END(ptregscall_common)
  409. ENTRY(stub_execve)
  410. CFI_STARTPROC
  411. popq %r11
  412. CFI_ADJUST_CFA_OFFSET -8
  413. CFI_REGISTER rip, r11
  414. SAVE_REST
  415. FIXUP_TOP_OF_STACK %r11
  416. call sys_execve
  417. RESTORE_TOP_OF_STACK %r11
  418. movq %rax,RAX(%rsp)
  419. RESTORE_REST
  420. jmp int_ret_from_sys_call
  421. CFI_ENDPROC
  422. END(stub_execve)
  423. /*
  424. * sigreturn is special because it needs to restore all registers on return.
  425. * This cannot be done with SYSRET, so use the IRET return path instead.
  426. */
  427. ENTRY(stub_rt_sigreturn)
  428. CFI_STARTPROC
  429. addq $8, %rsp
  430. CFI_ADJUST_CFA_OFFSET -8
  431. SAVE_REST
  432. movq %rsp,%rdi
  433. FIXUP_TOP_OF_STACK %r11
  434. call sys_rt_sigreturn
  435. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  436. RESTORE_REST
  437. jmp int_ret_from_sys_call
  438. CFI_ENDPROC
  439. END(stub_rt_sigreturn)
  440. /*
  441. * initial frame state for interrupts and exceptions
  442. */
  443. .macro _frame ref
  444. CFI_STARTPROC simple
  445. CFI_DEF_CFA rsp,SS+8-\ref
  446. /*CFI_REL_OFFSET ss,SS-\ref*/
  447. CFI_REL_OFFSET rsp,RSP-\ref
  448. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  449. /*CFI_REL_OFFSET cs,CS-\ref*/
  450. CFI_REL_OFFSET rip,RIP-\ref
  451. .endm
  452. /* initial frame state for interrupts (and exceptions without error code) */
  453. #define INTR_FRAME _frame RIP
  454. /* initial frame state for exceptions with error code (and interrupts with
  455. vector already pushed) */
  456. #define XCPT_FRAME _frame ORIG_RAX
  457. /*
  458. * Interrupt entry/exit.
  459. *
  460. * Interrupt entry points save only callee clobbered registers in fast path.
  461. *
  462. * Entry runs with interrupts off.
  463. */
  464. /* 0(%rsp): interrupt number */
  465. .macro interrupt func
  466. cld
  467. SAVE_ARGS
  468. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  469. pushq %rbp
  470. CFI_ADJUST_CFA_OFFSET 8
  471. CFI_REL_OFFSET rbp, 0
  472. movq %rsp,%rbp
  473. CFI_DEF_CFA_REGISTER rbp
  474. testl $3,CS(%rdi)
  475. je 1f
  476. swapgs
  477. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  478. cmoveq %gs:pda_irqstackptr,%rsp
  479. push %rbp # backlink for old unwinder
  480. /*
  481. * We entered an interrupt context - irqs are off:
  482. */
  483. TRACE_IRQS_OFF
  484. call \func
  485. .endm
  486. ENTRY(common_interrupt)
  487. XCPT_FRAME
  488. interrupt do_IRQ
  489. /* 0(%rsp): oldrsp-ARGOFFSET */
  490. ret_from_intr:
  491. cli
  492. TRACE_IRQS_OFF
  493. decl %gs:pda_irqcount
  494. leaveq
  495. CFI_DEF_CFA_REGISTER rsp
  496. CFI_ADJUST_CFA_OFFSET -8
  497. exit_intr:
  498. GET_THREAD_INFO(%rcx)
  499. testl $3,CS-ARGOFFSET(%rsp)
  500. je retint_kernel
  501. /* Interrupt came from user space */
  502. /*
  503. * Has a correct top of stack, but a partial stack frame
  504. * %rcx: thread info. Interrupts off.
  505. */
  506. retint_with_reschedule:
  507. movl $_TIF_WORK_MASK,%edi
  508. retint_check:
  509. movl threadinfo_flags(%rcx),%edx
  510. andl %edi,%edx
  511. CFI_REMEMBER_STATE
  512. jnz retint_careful
  513. retint_swapgs:
  514. /*
  515. * The iretq could re-enable interrupts:
  516. */
  517. cli
  518. TRACE_IRQS_IRETQ
  519. swapgs
  520. jmp restore_args
  521. retint_restore_args:
  522. cli
  523. /*
  524. * The iretq could re-enable interrupts:
  525. */
  526. TRACE_IRQS_IRETQ
  527. restore_args:
  528. RESTORE_ARGS 0,8,0
  529. iret_label:
  530. iretq
  531. .section __ex_table,"a"
  532. .quad iret_label,bad_iret
  533. .previous
  534. .section .fixup,"ax"
  535. /* force a signal here? this matches i386 behaviour */
  536. /* running with kernel gs */
  537. bad_iret:
  538. movq $11,%rdi /* SIGSEGV */
  539. TRACE_IRQS_ON
  540. sti
  541. jmp do_exit
  542. .previous
  543. /* edi: workmask, edx: work */
  544. retint_careful:
  545. CFI_RESTORE_STATE
  546. bt $TIF_NEED_RESCHED,%edx
  547. jnc retint_signal
  548. TRACE_IRQS_ON
  549. sti
  550. pushq %rdi
  551. CFI_ADJUST_CFA_OFFSET 8
  552. call schedule
  553. popq %rdi
  554. CFI_ADJUST_CFA_OFFSET -8
  555. GET_THREAD_INFO(%rcx)
  556. cli
  557. TRACE_IRQS_OFF
  558. jmp retint_check
  559. retint_signal:
  560. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  561. jz retint_swapgs
  562. TRACE_IRQS_ON
  563. sti
  564. SAVE_REST
  565. movq $-1,ORIG_RAX(%rsp)
  566. xorl %esi,%esi # oldset
  567. movq %rsp,%rdi # &pt_regs
  568. call do_notify_resume
  569. RESTORE_REST
  570. cli
  571. TRACE_IRQS_OFF
  572. movl $_TIF_NEED_RESCHED,%edi
  573. GET_THREAD_INFO(%rcx)
  574. jmp retint_check
  575. #ifdef CONFIG_PREEMPT
  576. /* Returning to kernel space. Check if we need preemption */
  577. /* rcx: threadinfo. interrupts off. */
  578. .p2align
  579. retint_kernel:
  580. cmpl $0,threadinfo_preempt_count(%rcx)
  581. jnz retint_restore_args
  582. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  583. jnc retint_restore_args
  584. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  585. jnc retint_restore_args
  586. call preempt_schedule_irq
  587. jmp exit_intr
  588. #endif
  589. CFI_ENDPROC
  590. END(common_interrupt)
  591. /*
  592. * APIC interrupts.
  593. */
  594. .macro apicinterrupt num,func
  595. INTR_FRAME
  596. pushq $~(\num)
  597. CFI_ADJUST_CFA_OFFSET 8
  598. interrupt \func
  599. jmp ret_from_intr
  600. CFI_ENDPROC
  601. .endm
  602. ENTRY(thermal_interrupt)
  603. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  604. END(thermal_interrupt)
  605. ENTRY(threshold_interrupt)
  606. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  607. END(threshold_interrupt)
  608. #ifdef CONFIG_SMP
  609. ENTRY(reschedule_interrupt)
  610. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  611. END(reschedule_interrupt)
  612. .macro INVALIDATE_ENTRY num
  613. ENTRY(invalidate_interrupt\num)
  614. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  615. END(invalidate_interrupt\num)
  616. .endm
  617. INVALIDATE_ENTRY 0
  618. INVALIDATE_ENTRY 1
  619. INVALIDATE_ENTRY 2
  620. INVALIDATE_ENTRY 3
  621. INVALIDATE_ENTRY 4
  622. INVALIDATE_ENTRY 5
  623. INVALIDATE_ENTRY 6
  624. INVALIDATE_ENTRY 7
  625. ENTRY(call_function_interrupt)
  626. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  627. END(call_function_interrupt)
  628. #endif
  629. #ifdef CONFIG_X86_LOCAL_APIC
  630. ENTRY(apic_timer_interrupt)
  631. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  632. END(apic_timer_interrupt)
  633. ENTRY(error_interrupt)
  634. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  635. END(error_interrupt)
  636. ENTRY(spurious_interrupt)
  637. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  638. END(spurious_interrupt)
  639. #endif
  640. /*
  641. * Exception entry points.
  642. */
  643. .macro zeroentry sym
  644. INTR_FRAME
  645. pushq $0 /* push error code/oldrax */
  646. CFI_ADJUST_CFA_OFFSET 8
  647. pushq %rax /* push real oldrax to the rdi slot */
  648. CFI_ADJUST_CFA_OFFSET 8
  649. leaq \sym(%rip),%rax
  650. jmp error_entry
  651. CFI_ENDPROC
  652. .endm
  653. .macro errorentry sym
  654. XCPT_FRAME
  655. pushq %rax
  656. CFI_ADJUST_CFA_OFFSET 8
  657. leaq \sym(%rip),%rax
  658. jmp error_entry
  659. CFI_ENDPROC
  660. .endm
  661. /* error code is on the stack already */
  662. /* handle NMI like exceptions that can happen everywhere */
  663. .macro paranoidentry sym, ist=0, irqtrace=1
  664. SAVE_ALL
  665. cld
  666. movl $1,%ebx
  667. movl $MSR_GS_BASE,%ecx
  668. rdmsr
  669. testl %edx,%edx
  670. js 1f
  671. swapgs
  672. xorl %ebx,%ebx
  673. 1:
  674. .if \ist
  675. movq %gs:pda_data_offset, %rbp
  676. .endif
  677. movq %rsp,%rdi
  678. movq ORIG_RAX(%rsp),%rsi
  679. movq $-1,ORIG_RAX(%rsp)
  680. .if \ist
  681. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  682. .endif
  683. call \sym
  684. .if \ist
  685. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  686. .endif
  687. cli
  688. .if \irqtrace
  689. TRACE_IRQS_OFF
  690. .endif
  691. .endm
  692. /*
  693. * "Paranoid" exit path from exception stack.
  694. * Paranoid because this is used by NMIs and cannot take
  695. * any kernel state for granted.
  696. * We don't do kernel preemption checks here, because only
  697. * NMI should be common and it does not enable IRQs and
  698. * cannot get reschedule ticks.
  699. *
  700. * "trace" is 0 for the NMI handler only, because irq-tracing
  701. * is fundamentally NMI-unsafe. (we cannot change the soft and
  702. * hard flags at once, atomically)
  703. */
  704. .macro paranoidexit trace=1
  705. /* ebx: no swapgs flag */
  706. paranoid_exit\trace:
  707. testl %ebx,%ebx /* swapgs needed? */
  708. jnz paranoid_restore\trace
  709. testl $3,CS(%rsp)
  710. jnz paranoid_userspace\trace
  711. paranoid_swapgs\trace:
  712. TRACE_IRQS_IRETQ 0
  713. swapgs
  714. paranoid_restore\trace:
  715. RESTORE_ALL 8
  716. iretq
  717. paranoid_userspace\trace:
  718. GET_THREAD_INFO(%rcx)
  719. movl threadinfo_flags(%rcx),%ebx
  720. andl $_TIF_WORK_MASK,%ebx
  721. jz paranoid_swapgs\trace
  722. movq %rsp,%rdi /* &pt_regs */
  723. call sync_regs
  724. movq %rax,%rsp /* switch stack for scheduling */
  725. testl $_TIF_NEED_RESCHED,%ebx
  726. jnz paranoid_schedule\trace
  727. movl %ebx,%edx /* arg3: thread flags */
  728. .if \trace
  729. TRACE_IRQS_ON
  730. .endif
  731. sti
  732. xorl %esi,%esi /* arg2: oldset */
  733. movq %rsp,%rdi /* arg1: &pt_regs */
  734. call do_notify_resume
  735. cli
  736. .if \trace
  737. TRACE_IRQS_OFF
  738. .endif
  739. jmp paranoid_userspace\trace
  740. paranoid_schedule\trace:
  741. .if \trace
  742. TRACE_IRQS_ON
  743. .endif
  744. sti
  745. call schedule
  746. cli
  747. .if \trace
  748. TRACE_IRQS_OFF
  749. .endif
  750. jmp paranoid_userspace\trace
  751. CFI_ENDPROC
  752. .endm
  753. /*
  754. * Exception entry point. This expects an error code/orig_rax on the stack
  755. * and the exception handler in %rax.
  756. */
  757. ENTRY(error_entry)
  758. _frame RDI
  759. /* rdi slot contains rax, oldrax contains error code */
  760. cld
  761. subq $14*8,%rsp
  762. CFI_ADJUST_CFA_OFFSET (14*8)
  763. movq %rsi,13*8(%rsp)
  764. CFI_REL_OFFSET rsi,RSI
  765. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  766. movq %rdx,12*8(%rsp)
  767. CFI_REL_OFFSET rdx,RDX
  768. movq %rcx,11*8(%rsp)
  769. CFI_REL_OFFSET rcx,RCX
  770. movq %rsi,10*8(%rsp) /* store rax */
  771. CFI_REL_OFFSET rax,RAX
  772. movq %r8, 9*8(%rsp)
  773. CFI_REL_OFFSET r8,R8
  774. movq %r9, 8*8(%rsp)
  775. CFI_REL_OFFSET r9,R9
  776. movq %r10,7*8(%rsp)
  777. CFI_REL_OFFSET r10,R10
  778. movq %r11,6*8(%rsp)
  779. CFI_REL_OFFSET r11,R11
  780. movq %rbx,5*8(%rsp)
  781. CFI_REL_OFFSET rbx,RBX
  782. movq %rbp,4*8(%rsp)
  783. CFI_REL_OFFSET rbp,RBP
  784. movq %r12,3*8(%rsp)
  785. CFI_REL_OFFSET r12,R12
  786. movq %r13,2*8(%rsp)
  787. CFI_REL_OFFSET r13,R13
  788. movq %r14,1*8(%rsp)
  789. CFI_REL_OFFSET r14,R14
  790. movq %r15,(%rsp)
  791. CFI_REL_OFFSET r15,R15
  792. xorl %ebx,%ebx
  793. testl $3,CS(%rsp)
  794. je error_kernelspace
  795. error_swapgs:
  796. swapgs
  797. error_sti:
  798. movq %rdi,RDI(%rsp)
  799. movq %rsp,%rdi
  800. movq ORIG_RAX(%rsp),%rsi /* get error code */
  801. movq $-1,ORIG_RAX(%rsp)
  802. call *%rax
  803. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  804. error_exit:
  805. movl %ebx,%eax
  806. RESTORE_REST
  807. cli
  808. TRACE_IRQS_OFF
  809. GET_THREAD_INFO(%rcx)
  810. testl %eax,%eax
  811. jne retint_kernel
  812. movl threadinfo_flags(%rcx),%edx
  813. movl $_TIF_WORK_MASK,%edi
  814. andl %edi,%edx
  815. jnz retint_careful
  816. /*
  817. * The iret might restore flags:
  818. */
  819. TRACE_IRQS_IRETQ
  820. swapgs
  821. RESTORE_ARGS 0,8,0
  822. jmp iret_label
  823. CFI_ENDPROC
  824. error_kernelspace:
  825. incl %ebx
  826. /* There are two places in the kernel that can potentially fault with
  827. usergs. Handle them here. The exception handlers after
  828. iret run with kernel gs again, so don't set the user space flag.
  829. B stepping K8s sometimes report an truncated RIP for IRET
  830. exceptions returning to compat mode. Check for these here too. */
  831. leaq iret_label(%rip),%rbp
  832. cmpq %rbp,RIP(%rsp)
  833. je error_swapgs
  834. movl %ebp,%ebp /* zero extend */
  835. cmpq %rbp,RIP(%rsp)
  836. je error_swapgs
  837. cmpq $gs_change,RIP(%rsp)
  838. je error_swapgs
  839. jmp error_sti
  840. END(error_entry)
  841. /* Reload gs selector with exception handling */
  842. /* edi: new selector */
  843. ENTRY(load_gs_index)
  844. CFI_STARTPROC
  845. pushf
  846. CFI_ADJUST_CFA_OFFSET 8
  847. cli
  848. swapgs
  849. gs_change:
  850. movl %edi,%gs
  851. 2: mfence /* workaround */
  852. swapgs
  853. popf
  854. CFI_ADJUST_CFA_OFFSET -8
  855. ret
  856. CFI_ENDPROC
  857. ENDPROC(load_gs_index)
  858. .section __ex_table,"a"
  859. .align 8
  860. .quad gs_change,bad_gs
  861. .previous
  862. .section .fixup,"ax"
  863. /* running with kernelgs */
  864. bad_gs:
  865. swapgs /* switch back to user gs */
  866. xorl %eax,%eax
  867. movl %eax,%gs
  868. jmp 2b
  869. .previous
  870. /*
  871. * Create a kernel thread.
  872. *
  873. * C extern interface:
  874. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  875. *
  876. * asm input arguments:
  877. * rdi: fn, rsi: arg, rdx: flags
  878. */
  879. ENTRY(kernel_thread)
  880. CFI_STARTPROC
  881. FAKE_STACK_FRAME $child_rip
  882. SAVE_ALL
  883. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  884. movq %rdx,%rdi
  885. orq kernel_thread_flags(%rip),%rdi
  886. movq $-1, %rsi
  887. movq %rsp, %rdx
  888. xorl %r8d,%r8d
  889. xorl %r9d,%r9d
  890. # clone now
  891. call do_fork
  892. movq %rax,RAX(%rsp)
  893. xorl %edi,%edi
  894. /*
  895. * It isn't worth to check for reschedule here,
  896. * so internally to the x86_64 port you can rely on kernel_thread()
  897. * not to reschedule the child before returning, this avoids the need
  898. * of hacks for example to fork off the per-CPU idle tasks.
  899. * [Hopefully no generic code relies on the reschedule -AK]
  900. */
  901. RESTORE_ALL
  902. UNFAKE_STACK_FRAME
  903. ret
  904. CFI_ENDPROC
  905. ENDPROC(kernel_thread)
  906. child_rip:
  907. pushq $0 # fake return address
  908. CFI_STARTPROC
  909. /*
  910. * Here we are in the child and the registers are set as they were
  911. * at kernel_thread() invocation in the parent.
  912. */
  913. movq %rdi, %rax
  914. movq %rsi, %rdi
  915. call *%rax
  916. # exit
  917. xorl %edi, %edi
  918. call do_exit
  919. CFI_ENDPROC
  920. ENDPROC(child_rip)
  921. /*
  922. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  923. *
  924. * C extern interface:
  925. * extern long execve(char *name, char **argv, char **envp)
  926. *
  927. * asm input arguments:
  928. * rdi: name, rsi: argv, rdx: envp
  929. *
  930. * We want to fallback into:
  931. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  932. *
  933. * do_sys_execve asm fallback arguments:
  934. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  935. */
  936. ENTRY(execve)
  937. CFI_STARTPROC
  938. FAKE_STACK_FRAME $0
  939. SAVE_ALL
  940. call sys_execve
  941. movq %rax, RAX(%rsp)
  942. RESTORE_REST
  943. testq %rax,%rax
  944. je int_ret_from_sys_call
  945. RESTORE_ARGS
  946. UNFAKE_STACK_FRAME
  947. ret
  948. CFI_ENDPROC
  949. ENDPROC(execve)
  950. KPROBE_ENTRY(page_fault)
  951. errorentry do_page_fault
  952. END(page_fault)
  953. .previous .text
  954. ENTRY(coprocessor_error)
  955. zeroentry do_coprocessor_error
  956. END(coprocessor_error)
  957. ENTRY(simd_coprocessor_error)
  958. zeroentry do_simd_coprocessor_error
  959. END(simd_coprocessor_error)
  960. ENTRY(device_not_available)
  961. zeroentry math_state_restore
  962. END(device_not_available)
  963. /* runs on exception stack */
  964. KPROBE_ENTRY(debug)
  965. INTR_FRAME
  966. pushq $0
  967. CFI_ADJUST_CFA_OFFSET 8
  968. paranoidentry do_debug, DEBUG_STACK
  969. paranoidexit
  970. END(debug)
  971. .previous .text
  972. /* runs on exception stack */
  973. KPROBE_ENTRY(nmi)
  974. INTR_FRAME
  975. pushq $-1
  976. CFI_ADJUST_CFA_OFFSET 8
  977. paranoidentry do_nmi, 0, 0
  978. #ifdef CONFIG_TRACE_IRQFLAGS
  979. paranoidexit 0
  980. #else
  981. jmp paranoid_exit1
  982. CFI_ENDPROC
  983. #endif
  984. END(nmi)
  985. .previous .text
  986. KPROBE_ENTRY(int3)
  987. INTR_FRAME
  988. pushq $0
  989. CFI_ADJUST_CFA_OFFSET 8
  990. paranoidentry do_int3, DEBUG_STACK
  991. jmp paranoid_exit1
  992. CFI_ENDPROC
  993. END(int3)
  994. .previous .text
  995. ENTRY(overflow)
  996. zeroentry do_overflow
  997. END(overflow)
  998. ENTRY(bounds)
  999. zeroentry do_bounds
  1000. END(bounds)
  1001. ENTRY(invalid_op)
  1002. zeroentry do_invalid_op
  1003. END(invalid_op)
  1004. ENTRY(coprocessor_segment_overrun)
  1005. zeroentry do_coprocessor_segment_overrun
  1006. END(coprocessor_segment_overrun)
  1007. ENTRY(reserved)
  1008. zeroentry do_reserved
  1009. END(reserved)
  1010. /* runs on exception stack */
  1011. ENTRY(double_fault)
  1012. XCPT_FRAME
  1013. paranoidentry do_double_fault
  1014. jmp paranoid_exit1
  1015. CFI_ENDPROC
  1016. END(double_fault)
  1017. ENTRY(invalid_TSS)
  1018. errorentry do_invalid_TSS
  1019. END(invalid_TSS)
  1020. ENTRY(segment_not_present)
  1021. errorentry do_segment_not_present
  1022. END(segment_not_present)
  1023. /* runs on exception stack */
  1024. ENTRY(stack_segment)
  1025. XCPT_FRAME
  1026. paranoidentry do_stack_segment
  1027. jmp paranoid_exit1
  1028. CFI_ENDPROC
  1029. END(stack_segment)
  1030. KPROBE_ENTRY(general_protection)
  1031. errorentry do_general_protection
  1032. END(general_protection)
  1033. .previous .text
  1034. ENTRY(alignment_check)
  1035. errorentry do_alignment_check
  1036. END(alignment_check)
  1037. ENTRY(divide_error)
  1038. zeroentry do_divide_error
  1039. END(divide_error)
  1040. ENTRY(spurious_interrupt_bug)
  1041. zeroentry do_spurious_interrupt_bug
  1042. END(spurious_interrupt_bug)
  1043. #ifdef CONFIG_X86_MCE
  1044. /* runs on exception stack */
  1045. ENTRY(machine_check)
  1046. INTR_FRAME
  1047. pushq $0
  1048. CFI_ADJUST_CFA_OFFSET 8
  1049. paranoidentry do_machine_check
  1050. jmp paranoid_exit1
  1051. CFI_ENDPROC
  1052. END(machine_check)
  1053. #endif
  1054. /* Call softirq on interrupt stack. Interrupts are off. */
  1055. ENTRY(call_softirq)
  1056. CFI_STARTPROC
  1057. push %rbp
  1058. CFI_ADJUST_CFA_OFFSET 8
  1059. CFI_REL_OFFSET rbp,0
  1060. mov %rsp,%rbp
  1061. CFI_DEF_CFA_REGISTER rbp
  1062. incl %gs:pda_irqcount
  1063. cmove %gs:pda_irqstackptr,%rsp
  1064. push %rbp # backlink for old unwinder
  1065. call __do_softirq
  1066. leaveq
  1067. CFI_DEF_CFA_REGISTER rsp
  1068. CFI_ADJUST_CFA_OFFSET -8
  1069. decl %gs:pda_irqcount
  1070. ret
  1071. CFI_ENDPROC
  1072. ENDPROC(call_softirq)
  1073. #ifdef CONFIG_STACK_UNWIND
  1074. ENTRY(arch_unwind_init_running)
  1075. CFI_STARTPROC
  1076. movq %r15, R15(%rdi)
  1077. movq %r14, R14(%rdi)
  1078. xchgq %rsi, %rdx
  1079. movq %r13, R13(%rdi)
  1080. movq %r12, R12(%rdi)
  1081. xorl %eax, %eax
  1082. movq %rbp, RBP(%rdi)
  1083. movq %rbx, RBX(%rdi)
  1084. movq (%rsp), %rcx
  1085. movq %rax, R11(%rdi)
  1086. movq %rax, R10(%rdi)
  1087. movq %rax, R9(%rdi)
  1088. movq %rax, R8(%rdi)
  1089. movq %rax, RAX(%rdi)
  1090. movq %rax, RCX(%rdi)
  1091. movq %rax, RDX(%rdi)
  1092. movq %rax, RSI(%rdi)
  1093. movq %rax, RDI(%rdi)
  1094. movq %rax, ORIG_RAX(%rdi)
  1095. movq %rcx, RIP(%rdi)
  1096. leaq 8(%rsp), %rcx
  1097. movq $__KERNEL_CS, CS(%rdi)
  1098. movq %rax, EFLAGS(%rdi)
  1099. movq %rcx, RSP(%rdi)
  1100. movq $__KERNEL_DS, SS(%rdi)
  1101. jmpq *%rdx
  1102. CFI_ENDPROC
  1103. ENDPROC(arch_unwind_init_running)
  1104. #endif