entry_64.S 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. .code64
  52. #ifndef CONFIG_PREEMPT
  53. #define retint_kernel retint_restore_args
  54. #endif
  55. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  56. #ifdef CONFIG_TRACE_IRQFLAGS
  57. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  58. jnc 1f
  59. TRACE_IRQS_ON
  60. 1:
  61. #endif
  62. .endm
  63. /*
  64. * C code is not supposed to know about undefined top of stack. Every time
  65. * a C function with an pt_regs argument is called from the SYSCALL based
  66. * fast path FIXUP_TOP_OF_STACK is needed.
  67. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  68. * manipulation.
  69. */
  70. /* %rsp:at FRAMEEND */
  71. .macro FIXUP_TOP_OF_STACK tmp
  72. movq %gs:pda_oldrsp,\tmp
  73. movq \tmp,RSP(%rsp)
  74. movq $__USER_DS,SS(%rsp)
  75. movq $__USER_CS,CS(%rsp)
  76. movq $-1,RCX(%rsp)
  77. movq R11(%rsp),\tmp /* get eflags */
  78. movq \tmp,EFLAGS(%rsp)
  79. .endm
  80. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  81. movq RSP-\offset(%rsp),\tmp
  82. movq \tmp,%gs:pda_oldrsp
  83. movq EFLAGS-\offset(%rsp),\tmp
  84. movq \tmp,R11-\offset(%rsp)
  85. .endm
  86. .macro FAKE_STACK_FRAME child_rip
  87. /* push in order ss, rsp, eflags, cs, rip */
  88. xorl %eax, %eax
  89. pushq %rax /* ss */
  90. CFI_ADJUST_CFA_OFFSET 8
  91. /*CFI_REL_OFFSET ss,0*/
  92. pushq %rax /* rsp */
  93. CFI_ADJUST_CFA_OFFSET 8
  94. CFI_REL_OFFSET rsp,0
  95. pushq $(1<<9) /* eflags - interrupts on */
  96. CFI_ADJUST_CFA_OFFSET 8
  97. /*CFI_REL_OFFSET rflags,0*/
  98. pushq $__KERNEL_CS /* cs */
  99. CFI_ADJUST_CFA_OFFSET 8
  100. /*CFI_REL_OFFSET cs,0*/
  101. pushq \child_rip /* rip */
  102. CFI_ADJUST_CFA_OFFSET 8
  103. CFI_REL_OFFSET rip,0
  104. pushq %rax /* orig rax */
  105. CFI_ADJUST_CFA_OFFSET 8
  106. .endm
  107. .macro UNFAKE_STACK_FRAME
  108. addq $8*6, %rsp
  109. CFI_ADJUST_CFA_OFFSET -(6*8)
  110. .endm
  111. .macro CFI_DEFAULT_STACK start=1
  112. .if \start
  113. CFI_STARTPROC simple
  114. CFI_SIGNAL_FRAME
  115. CFI_DEF_CFA rsp,SS+8
  116. .else
  117. CFI_DEF_CFA_OFFSET SS+8
  118. .endif
  119. CFI_REL_OFFSET r15,R15
  120. CFI_REL_OFFSET r14,R14
  121. CFI_REL_OFFSET r13,R13
  122. CFI_REL_OFFSET r12,R12
  123. CFI_REL_OFFSET rbp,RBP
  124. CFI_REL_OFFSET rbx,RBX
  125. CFI_REL_OFFSET r11,R11
  126. CFI_REL_OFFSET r10,R10
  127. CFI_REL_OFFSET r9,R9
  128. CFI_REL_OFFSET r8,R8
  129. CFI_REL_OFFSET rax,RAX
  130. CFI_REL_OFFSET rcx,RCX
  131. CFI_REL_OFFSET rdx,RDX
  132. CFI_REL_OFFSET rsi,RSI
  133. CFI_REL_OFFSET rdi,RDI
  134. CFI_REL_OFFSET rip,RIP
  135. /*CFI_REL_OFFSET cs,CS*/
  136. /*CFI_REL_OFFSET rflags,EFLAGS*/
  137. CFI_REL_OFFSET rsp,RSP
  138. /*CFI_REL_OFFSET ss,SS*/
  139. .endm
  140. /*
  141. * A newly forked process directly context switches into this.
  142. */
  143. /* rdi: prev */
  144. ENTRY(ret_from_fork)
  145. CFI_DEFAULT_STACK
  146. push kernel_eflags(%rip)
  147. CFI_ADJUST_CFA_OFFSET 4
  148. popf # reset kernel eflags
  149. CFI_ADJUST_CFA_OFFSET -4
  150. call schedule_tail
  151. GET_THREAD_INFO(%rcx)
  152. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  153. jnz rff_trace
  154. rff_action:
  155. RESTORE_REST
  156. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  157. je int_ret_from_sys_call
  158. testl $_TIF_IA32,threadinfo_flags(%rcx)
  159. jnz int_ret_from_sys_call
  160. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  161. jmp ret_from_sys_call
  162. rff_trace:
  163. movq %rsp,%rdi
  164. call syscall_trace_leave
  165. GET_THREAD_INFO(%rcx)
  166. jmp rff_action
  167. CFI_ENDPROC
  168. END(ret_from_fork)
  169. /*
  170. * System call entry. Upto 6 arguments in registers are supported.
  171. *
  172. * SYSCALL does not save anything on the stack and does not change the
  173. * stack pointer.
  174. */
  175. /*
  176. * Register setup:
  177. * rax system call number
  178. * rdi arg0
  179. * rcx return address for syscall/sysret, C arg3
  180. * rsi arg1
  181. * rdx arg2
  182. * r10 arg3 (--> moved to rcx for C)
  183. * r8 arg4
  184. * r9 arg5
  185. * r11 eflags for syscall/sysret, temporary for C
  186. * r12-r15,rbp,rbx saved by C code, not touched.
  187. *
  188. * Interrupts are off on entry.
  189. * Only called from user space.
  190. *
  191. * XXX if we had a free scratch register we could save the RSP into the stack frame
  192. * and report it properly in ps. Unfortunately we haven't.
  193. *
  194. * When user can change the frames always force IRET. That is because
  195. * it deals with uncanonical addresses better. SYSRET has trouble
  196. * with them due to bugs in both AMD and Intel CPUs.
  197. */
  198. ENTRY(system_call)
  199. CFI_STARTPROC simple
  200. CFI_SIGNAL_FRAME
  201. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  202. CFI_REGISTER rip,rcx
  203. /*CFI_REGISTER rflags,r11*/
  204. swapgs
  205. movq %rsp,%gs:pda_oldrsp
  206. movq %gs:pda_kernelstack,%rsp
  207. /*
  208. * No need to follow this irqs off/on section - it's straight
  209. * and short:
  210. */
  211. sti
  212. SAVE_ARGS 8,1
  213. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  214. movq %rcx,RIP-ARGOFFSET(%rsp)
  215. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  216. GET_THREAD_INFO(%rcx)
  217. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  218. jnz tracesys
  219. cmpq $__NR_syscall_max,%rax
  220. ja badsys
  221. movq %r10,%rcx
  222. call *sys_call_table(,%rax,8) # XXX: rip relative
  223. movq %rax,RAX-ARGOFFSET(%rsp)
  224. /*
  225. * Syscall return path ending with SYSRET (fast path)
  226. * Has incomplete stack frame and undefined top of stack.
  227. */
  228. ret_from_sys_call:
  229. movl $_TIF_ALLWORK_MASK,%edi
  230. /* edi: flagmask */
  231. sysret_check:
  232. GET_THREAD_INFO(%rcx)
  233. cli
  234. TRACE_IRQS_OFF
  235. movl threadinfo_flags(%rcx),%edx
  236. andl %edi,%edx
  237. jnz sysret_careful
  238. CFI_REMEMBER_STATE
  239. /*
  240. * sysretq will re-enable interrupts:
  241. */
  242. TRACE_IRQS_ON
  243. movq RIP-ARGOFFSET(%rsp),%rcx
  244. CFI_REGISTER rip,rcx
  245. RESTORE_ARGS 0,-ARG_SKIP,1
  246. /*CFI_REGISTER rflags,r11*/
  247. movq %gs:pda_oldrsp,%rsp
  248. swapgs
  249. sysretq
  250. CFI_RESTORE_STATE
  251. /* Handle reschedules */
  252. /* edx: work, edi: workmask */
  253. sysret_careful:
  254. bt $TIF_NEED_RESCHED,%edx
  255. jnc sysret_signal
  256. TRACE_IRQS_ON
  257. sti
  258. pushq %rdi
  259. CFI_ADJUST_CFA_OFFSET 8
  260. call schedule
  261. popq %rdi
  262. CFI_ADJUST_CFA_OFFSET -8
  263. jmp sysret_check
  264. /* Handle a signal */
  265. sysret_signal:
  266. TRACE_IRQS_ON
  267. sti
  268. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  269. jz 1f
  270. /* Really a signal */
  271. /* edx: work flags (arg3) */
  272. leaq do_notify_resume(%rip),%rax
  273. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  274. xorl %esi,%esi # oldset -> arg2
  275. call ptregscall_common
  276. 1: movl $_TIF_NEED_RESCHED,%edi
  277. /* Use IRET because user could have changed frame. This
  278. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  279. cli
  280. TRACE_IRQS_OFF
  281. jmp int_with_check
  282. badsys:
  283. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  284. jmp ret_from_sys_call
  285. /* Do syscall tracing */
  286. tracesys:
  287. SAVE_REST
  288. movq $-ENOSYS,RAX(%rsp)
  289. FIXUP_TOP_OF_STACK %rdi
  290. movq %rsp,%rdi
  291. call syscall_trace_enter
  292. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  293. RESTORE_REST
  294. cmpq $__NR_syscall_max,%rax
  295. movq $-ENOSYS,%rcx
  296. cmova %rcx,%rax
  297. ja 1f
  298. movq %r10,%rcx /* fixup for C */
  299. call *sys_call_table(,%rax,8)
  300. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  301. /* Use IRET because user could have changed frame */
  302. /*
  303. * Syscall return path ending with IRET.
  304. * Has correct top of stack, but partial stack frame.
  305. */
  306. .globl int_ret_from_sys_call
  307. int_ret_from_sys_call:
  308. cli
  309. TRACE_IRQS_OFF
  310. testl $3,CS-ARGOFFSET(%rsp)
  311. je retint_restore_args
  312. movl $_TIF_ALLWORK_MASK,%edi
  313. /* edi: mask to check */
  314. int_with_check:
  315. GET_THREAD_INFO(%rcx)
  316. movl threadinfo_flags(%rcx),%edx
  317. andl %edi,%edx
  318. jnz int_careful
  319. andl $~TS_COMPAT,threadinfo_status(%rcx)
  320. jmp retint_swapgs
  321. /* Either reschedule or signal or syscall exit tracking needed. */
  322. /* First do a reschedule test. */
  323. /* edx: work, edi: workmask */
  324. int_careful:
  325. bt $TIF_NEED_RESCHED,%edx
  326. jnc int_very_careful
  327. TRACE_IRQS_ON
  328. sti
  329. pushq %rdi
  330. CFI_ADJUST_CFA_OFFSET 8
  331. call schedule
  332. popq %rdi
  333. CFI_ADJUST_CFA_OFFSET -8
  334. cli
  335. TRACE_IRQS_OFF
  336. jmp int_with_check
  337. /* handle signals and tracing -- both require a full stack frame */
  338. int_very_careful:
  339. TRACE_IRQS_ON
  340. sti
  341. SAVE_REST
  342. /* Check for syscall exit trace */
  343. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  344. jz int_signal
  345. pushq %rdi
  346. CFI_ADJUST_CFA_OFFSET 8
  347. leaq 8(%rsp),%rdi # &ptregs -> arg1
  348. call syscall_trace_leave
  349. popq %rdi
  350. CFI_ADJUST_CFA_OFFSET -8
  351. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  352. jmp int_restore_rest
  353. int_signal:
  354. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  355. jz 1f
  356. movq %rsp,%rdi # &ptregs -> arg1
  357. xorl %esi,%esi # oldset -> arg2
  358. call do_notify_resume
  359. 1: movl $_TIF_NEED_RESCHED,%edi
  360. int_restore_rest:
  361. RESTORE_REST
  362. cli
  363. TRACE_IRQS_OFF
  364. jmp int_with_check
  365. CFI_ENDPROC
  366. END(system_call)
  367. /*
  368. * Certain special system calls that need to save a complete full stack frame.
  369. */
  370. .macro PTREGSCALL label,func,arg
  371. .globl \label
  372. \label:
  373. leaq \func(%rip),%rax
  374. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  375. jmp ptregscall_common
  376. END(\label)
  377. .endm
  378. CFI_STARTPROC
  379. PTREGSCALL stub_clone, sys_clone, %r8
  380. PTREGSCALL stub_fork, sys_fork, %rdi
  381. PTREGSCALL stub_vfork, sys_vfork, %rdi
  382. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  383. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  384. PTREGSCALL stub_iopl, sys_iopl, %rsi
  385. ENTRY(ptregscall_common)
  386. popq %r11
  387. CFI_ADJUST_CFA_OFFSET -8
  388. CFI_REGISTER rip, r11
  389. SAVE_REST
  390. movq %r11, %r15
  391. CFI_REGISTER rip, r15
  392. FIXUP_TOP_OF_STACK %r11
  393. call *%rax
  394. RESTORE_TOP_OF_STACK %r11
  395. movq %r15, %r11
  396. CFI_REGISTER rip, r11
  397. RESTORE_REST
  398. pushq %r11
  399. CFI_ADJUST_CFA_OFFSET 8
  400. CFI_REL_OFFSET rip, 0
  401. ret
  402. CFI_ENDPROC
  403. END(ptregscall_common)
  404. ENTRY(stub_execve)
  405. CFI_STARTPROC
  406. popq %r11
  407. CFI_ADJUST_CFA_OFFSET -8
  408. CFI_REGISTER rip, r11
  409. SAVE_REST
  410. FIXUP_TOP_OF_STACK %r11
  411. call sys_execve
  412. RESTORE_TOP_OF_STACK %r11
  413. movq %rax,RAX(%rsp)
  414. RESTORE_REST
  415. jmp int_ret_from_sys_call
  416. CFI_ENDPROC
  417. END(stub_execve)
  418. /*
  419. * sigreturn is special because it needs to restore all registers on return.
  420. * This cannot be done with SYSRET, so use the IRET return path instead.
  421. */
  422. ENTRY(stub_rt_sigreturn)
  423. CFI_STARTPROC
  424. addq $8, %rsp
  425. CFI_ADJUST_CFA_OFFSET -8
  426. SAVE_REST
  427. movq %rsp,%rdi
  428. FIXUP_TOP_OF_STACK %r11
  429. call sys_rt_sigreturn
  430. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  431. RESTORE_REST
  432. jmp int_ret_from_sys_call
  433. CFI_ENDPROC
  434. END(stub_rt_sigreturn)
  435. /*
  436. * initial frame state for interrupts and exceptions
  437. */
  438. .macro _frame ref
  439. CFI_STARTPROC simple
  440. CFI_SIGNAL_FRAME
  441. CFI_DEF_CFA rsp,SS+8-\ref
  442. /*CFI_REL_OFFSET ss,SS-\ref*/
  443. CFI_REL_OFFSET rsp,RSP-\ref
  444. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  445. /*CFI_REL_OFFSET cs,CS-\ref*/
  446. CFI_REL_OFFSET rip,RIP-\ref
  447. .endm
  448. /* initial frame state for interrupts (and exceptions without error code) */
  449. #define INTR_FRAME _frame RIP
  450. /* initial frame state for exceptions with error code (and interrupts with
  451. vector already pushed) */
  452. #define XCPT_FRAME _frame ORIG_RAX
  453. /*
  454. * Interrupt entry/exit.
  455. *
  456. * Interrupt entry points save only callee clobbered registers in fast path.
  457. *
  458. * Entry runs with interrupts off.
  459. */
  460. /* 0(%rsp): interrupt number */
  461. .macro interrupt func
  462. cld
  463. SAVE_ARGS
  464. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  465. pushq %rbp
  466. CFI_ADJUST_CFA_OFFSET 8
  467. CFI_REL_OFFSET rbp, 0
  468. movq %rsp,%rbp
  469. CFI_DEF_CFA_REGISTER rbp
  470. testl $3,CS(%rdi)
  471. je 1f
  472. swapgs
  473. /* irqcount is used to check if a CPU is already on an interrupt
  474. stack or not. While this is essentially redundant with preempt_count
  475. it is a little cheaper to use a separate counter in the PDA
  476. (short of moving irq_enter into assembly, which would be too
  477. much work) */
  478. 1: incl %gs:pda_irqcount
  479. cmoveq %gs:pda_irqstackptr,%rsp
  480. push %rbp # backlink for old unwinder
  481. /*
  482. * We entered an interrupt context - irqs are off:
  483. */
  484. TRACE_IRQS_OFF
  485. call \func
  486. .endm
  487. ENTRY(common_interrupt)
  488. XCPT_FRAME
  489. interrupt do_IRQ
  490. /* 0(%rsp): oldrsp-ARGOFFSET */
  491. ret_from_intr:
  492. cli
  493. TRACE_IRQS_OFF
  494. decl %gs:pda_irqcount
  495. leaveq
  496. CFI_DEF_CFA_REGISTER rsp
  497. CFI_ADJUST_CFA_OFFSET -8
  498. exit_intr:
  499. GET_THREAD_INFO(%rcx)
  500. testl $3,CS-ARGOFFSET(%rsp)
  501. je retint_kernel
  502. /* Interrupt came from user space */
  503. /*
  504. * Has a correct top of stack, but a partial stack frame
  505. * %rcx: thread info. Interrupts off.
  506. */
  507. retint_with_reschedule:
  508. movl $_TIF_WORK_MASK,%edi
  509. retint_check:
  510. movl threadinfo_flags(%rcx),%edx
  511. andl %edi,%edx
  512. CFI_REMEMBER_STATE
  513. jnz retint_careful
  514. retint_swapgs:
  515. /*
  516. * The iretq could re-enable interrupts:
  517. */
  518. cli
  519. TRACE_IRQS_IRETQ
  520. swapgs
  521. jmp restore_args
  522. retint_restore_args:
  523. cli
  524. /*
  525. * The iretq could re-enable interrupts:
  526. */
  527. TRACE_IRQS_IRETQ
  528. restore_args:
  529. RESTORE_ARGS 0,8,0
  530. iret_label:
  531. iretq
  532. .section __ex_table,"a"
  533. .quad iret_label,bad_iret
  534. .previous
  535. .section .fixup,"ax"
  536. /* force a signal here? this matches i386 behaviour */
  537. /* running with kernel gs */
  538. bad_iret:
  539. movq $11,%rdi /* SIGSEGV */
  540. TRACE_IRQS_ON
  541. sti
  542. jmp do_exit
  543. .previous
  544. /* edi: workmask, edx: work */
  545. retint_careful:
  546. CFI_RESTORE_STATE
  547. bt $TIF_NEED_RESCHED,%edx
  548. jnc retint_signal
  549. TRACE_IRQS_ON
  550. sti
  551. pushq %rdi
  552. CFI_ADJUST_CFA_OFFSET 8
  553. call schedule
  554. popq %rdi
  555. CFI_ADJUST_CFA_OFFSET -8
  556. GET_THREAD_INFO(%rcx)
  557. cli
  558. TRACE_IRQS_OFF
  559. jmp retint_check
  560. retint_signal:
  561. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  562. jz retint_swapgs
  563. TRACE_IRQS_ON
  564. sti
  565. SAVE_REST
  566. movq $-1,ORIG_RAX(%rsp)
  567. xorl %esi,%esi # oldset
  568. movq %rsp,%rdi # &pt_regs
  569. call do_notify_resume
  570. RESTORE_REST
  571. cli
  572. TRACE_IRQS_OFF
  573. movl $_TIF_NEED_RESCHED,%edi
  574. GET_THREAD_INFO(%rcx)
  575. jmp retint_check
  576. #ifdef CONFIG_PREEMPT
  577. /* Returning to kernel space. Check if we need preemption */
  578. /* rcx: threadinfo. interrupts off. */
  579. ENTRY(retint_kernel)
  580. cmpl $0,threadinfo_preempt_count(%rcx)
  581. jnz retint_restore_args
  582. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  583. jnc retint_restore_args
  584. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  585. jnc retint_restore_args
  586. call preempt_schedule_irq
  587. jmp exit_intr
  588. #endif
  589. CFI_ENDPROC
  590. END(common_interrupt)
  591. /*
  592. * APIC interrupts.
  593. */
  594. .macro apicinterrupt num,func
  595. INTR_FRAME
  596. pushq $~(\num)
  597. CFI_ADJUST_CFA_OFFSET 8
  598. interrupt \func
  599. jmp ret_from_intr
  600. CFI_ENDPROC
  601. .endm
  602. ENTRY(thermal_interrupt)
  603. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  604. END(thermal_interrupt)
  605. ENTRY(threshold_interrupt)
  606. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  607. END(threshold_interrupt)
  608. #ifdef CONFIG_SMP
  609. ENTRY(reschedule_interrupt)
  610. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  611. END(reschedule_interrupt)
  612. .macro INVALIDATE_ENTRY num
  613. ENTRY(invalidate_interrupt\num)
  614. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  615. END(invalidate_interrupt\num)
  616. .endm
  617. INVALIDATE_ENTRY 0
  618. INVALIDATE_ENTRY 1
  619. INVALIDATE_ENTRY 2
  620. INVALIDATE_ENTRY 3
  621. INVALIDATE_ENTRY 4
  622. INVALIDATE_ENTRY 5
  623. INVALIDATE_ENTRY 6
  624. INVALIDATE_ENTRY 7
  625. ENTRY(call_function_interrupt)
  626. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  627. END(call_function_interrupt)
  628. ENTRY(irq_move_cleanup_interrupt)
  629. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  630. END(irq_move_cleanup_interrupt)
  631. #endif
  632. ENTRY(apic_timer_interrupt)
  633. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  634. END(apic_timer_interrupt)
  635. ENTRY(error_interrupt)
  636. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  637. END(error_interrupt)
  638. ENTRY(spurious_interrupt)
  639. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  640. END(spurious_interrupt)
  641. /*
  642. * Exception entry points.
  643. */
  644. .macro zeroentry sym
  645. INTR_FRAME
  646. pushq $0 /* push error code/oldrax */
  647. CFI_ADJUST_CFA_OFFSET 8
  648. pushq %rax /* push real oldrax to the rdi slot */
  649. CFI_ADJUST_CFA_OFFSET 8
  650. CFI_REL_OFFSET rax,0
  651. leaq \sym(%rip),%rax
  652. jmp error_entry
  653. CFI_ENDPROC
  654. .endm
  655. .macro errorentry sym
  656. XCPT_FRAME
  657. pushq %rax
  658. CFI_ADJUST_CFA_OFFSET 8
  659. CFI_REL_OFFSET rax,0
  660. leaq \sym(%rip),%rax
  661. jmp error_entry
  662. CFI_ENDPROC
  663. .endm
  664. /* error code is on the stack already */
  665. /* handle NMI like exceptions that can happen everywhere */
  666. .macro paranoidentry sym, ist=0, irqtrace=1
  667. SAVE_ALL
  668. cld
  669. movl $1,%ebx
  670. movl $MSR_GS_BASE,%ecx
  671. rdmsr
  672. testl %edx,%edx
  673. js 1f
  674. swapgs
  675. xorl %ebx,%ebx
  676. 1:
  677. .if \ist
  678. movq %gs:pda_data_offset, %rbp
  679. .endif
  680. movq %rsp,%rdi
  681. movq ORIG_RAX(%rsp),%rsi
  682. movq $-1,ORIG_RAX(%rsp)
  683. .if \ist
  684. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  685. .endif
  686. call \sym
  687. .if \ist
  688. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  689. .endif
  690. cli
  691. .if \irqtrace
  692. TRACE_IRQS_OFF
  693. .endif
  694. .endm
  695. /*
  696. * "Paranoid" exit path from exception stack.
  697. * Paranoid because this is used by NMIs and cannot take
  698. * any kernel state for granted.
  699. * We don't do kernel preemption checks here, because only
  700. * NMI should be common and it does not enable IRQs and
  701. * cannot get reschedule ticks.
  702. *
  703. * "trace" is 0 for the NMI handler only, because irq-tracing
  704. * is fundamentally NMI-unsafe. (we cannot change the soft and
  705. * hard flags at once, atomically)
  706. */
  707. .macro paranoidexit trace=1
  708. /* ebx: no swapgs flag */
  709. paranoid_exit\trace:
  710. testl %ebx,%ebx /* swapgs needed? */
  711. jnz paranoid_restore\trace
  712. testl $3,CS(%rsp)
  713. jnz paranoid_userspace\trace
  714. paranoid_swapgs\trace:
  715. .if \trace
  716. TRACE_IRQS_IRETQ 0
  717. .endif
  718. swapgs
  719. paranoid_restore\trace:
  720. RESTORE_ALL 8
  721. iretq
  722. paranoid_userspace\trace:
  723. GET_THREAD_INFO(%rcx)
  724. movl threadinfo_flags(%rcx),%ebx
  725. andl $_TIF_WORK_MASK,%ebx
  726. jz paranoid_swapgs\trace
  727. movq %rsp,%rdi /* &pt_regs */
  728. call sync_regs
  729. movq %rax,%rsp /* switch stack for scheduling */
  730. testl $_TIF_NEED_RESCHED,%ebx
  731. jnz paranoid_schedule\trace
  732. movl %ebx,%edx /* arg3: thread flags */
  733. .if \trace
  734. TRACE_IRQS_ON
  735. .endif
  736. sti
  737. xorl %esi,%esi /* arg2: oldset */
  738. movq %rsp,%rdi /* arg1: &pt_regs */
  739. call do_notify_resume
  740. cli
  741. .if \trace
  742. TRACE_IRQS_OFF
  743. .endif
  744. jmp paranoid_userspace\trace
  745. paranoid_schedule\trace:
  746. .if \trace
  747. TRACE_IRQS_ON
  748. .endif
  749. sti
  750. call schedule
  751. cli
  752. .if \trace
  753. TRACE_IRQS_OFF
  754. .endif
  755. jmp paranoid_userspace\trace
  756. CFI_ENDPROC
  757. .endm
  758. /*
  759. * Exception entry point. This expects an error code/orig_rax on the stack
  760. * and the exception handler in %rax.
  761. */
  762. KPROBE_ENTRY(error_entry)
  763. _frame RDI
  764. CFI_REL_OFFSET rax,0
  765. /* rdi slot contains rax, oldrax contains error code */
  766. cld
  767. subq $14*8,%rsp
  768. CFI_ADJUST_CFA_OFFSET (14*8)
  769. movq %rsi,13*8(%rsp)
  770. CFI_REL_OFFSET rsi,RSI
  771. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  772. CFI_REGISTER rax,rsi
  773. movq %rdx,12*8(%rsp)
  774. CFI_REL_OFFSET rdx,RDX
  775. movq %rcx,11*8(%rsp)
  776. CFI_REL_OFFSET rcx,RCX
  777. movq %rsi,10*8(%rsp) /* store rax */
  778. CFI_REL_OFFSET rax,RAX
  779. movq %r8, 9*8(%rsp)
  780. CFI_REL_OFFSET r8,R8
  781. movq %r9, 8*8(%rsp)
  782. CFI_REL_OFFSET r9,R9
  783. movq %r10,7*8(%rsp)
  784. CFI_REL_OFFSET r10,R10
  785. movq %r11,6*8(%rsp)
  786. CFI_REL_OFFSET r11,R11
  787. movq %rbx,5*8(%rsp)
  788. CFI_REL_OFFSET rbx,RBX
  789. movq %rbp,4*8(%rsp)
  790. CFI_REL_OFFSET rbp,RBP
  791. movq %r12,3*8(%rsp)
  792. CFI_REL_OFFSET r12,R12
  793. movq %r13,2*8(%rsp)
  794. CFI_REL_OFFSET r13,R13
  795. movq %r14,1*8(%rsp)
  796. CFI_REL_OFFSET r14,R14
  797. movq %r15,(%rsp)
  798. CFI_REL_OFFSET r15,R15
  799. xorl %ebx,%ebx
  800. testl $3,CS(%rsp)
  801. je error_kernelspace
  802. error_swapgs:
  803. swapgs
  804. error_sti:
  805. movq %rdi,RDI(%rsp)
  806. CFI_REL_OFFSET rdi,RDI
  807. movq %rsp,%rdi
  808. movq ORIG_RAX(%rsp),%rsi /* get error code */
  809. movq $-1,ORIG_RAX(%rsp)
  810. call *%rax
  811. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  812. error_exit:
  813. movl %ebx,%eax
  814. RESTORE_REST
  815. cli
  816. TRACE_IRQS_OFF
  817. GET_THREAD_INFO(%rcx)
  818. testl %eax,%eax
  819. jne retint_kernel
  820. movl threadinfo_flags(%rcx),%edx
  821. movl $_TIF_WORK_MASK,%edi
  822. andl %edi,%edx
  823. jnz retint_careful
  824. /*
  825. * The iret might restore flags:
  826. */
  827. TRACE_IRQS_IRETQ
  828. swapgs
  829. RESTORE_ARGS 0,8,0
  830. jmp iret_label
  831. CFI_ENDPROC
  832. error_kernelspace:
  833. incl %ebx
  834. /* There are two places in the kernel that can potentially fault with
  835. usergs. Handle them here. The exception handlers after
  836. iret run with kernel gs again, so don't set the user space flag.
  837. B stepping K8s sometimes report an truncated RIP for IRET
  838. exceptions returning to compat mode. Check for these here too. */
  839. leaq iret_label(%rip),%rbp
  840. cmpq %rbp,RIP(%rsp)
  841. je error_swapgs
  842. movl %ebp,%ebp /* zero extend */
  843. cmpq %rbp,RIP(%rsp)
  844. je error_swapgs
  845. cmpq $gs_change,RIP(%rsp)
  846. je error_swapgs
  847. jmp error_sti
  848. KPROBE_END(error_entry)
  849. /* Reload gs selector with exception handling */
  850. /* edi: new selector */
  851. ENTRY(load_gs_index)
  852. CFI_STARTPROC
  853. pushf
  854. CFI_ADJUST_CFA_OFFSET 8
  855. cli
  856. swapgs
  857. gs_change:
  858. movl %edi,%gs
  859. 2: mfence /* workaround */
  860. swapgs
  861. popf
  862. CFI_ADJUST_CFA_OFFSET -8
  863. ret
  864. CFI_ENDPROC
  865. ENDPROC(load_gs_index)
  866. .section __ex_table,"a"
  867. .align 8
  868. .quad gs_change,bad_gs
  869. .previous
  870. .section .fixup,"ax"
  871. /* running with kernelgs */
  872. bad_gs:
  873. swapgs /* switch back to user gs */
  874. xorl %eax,%eax
  875. movl %eax,%gs
  876. jmp 2b
  877. .previous
  878. /*
  879. * Create a kernel thread.
  880. *
  881. * C extern interface:
  882. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  883. *
  884. * asm input arguments:
  885. * rdi: fn, rsi: arg, rdx: flags
  886. */
  887. ENTRY(kernel_thread)
  888. CFI_STARTPROC
  889. FAKE_STACK_FRAME $child_rip
  890. SAVE_ALL
  891. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  892. movq %rdx,%rdi
  893. orq kernel_thread_flags(%rip),%rdi
  894. movq $-1, %rsi
  895. movq %rsp, %rdx
  896. xorl %r8d,%r8d
  897. xorl %r9d,%r9d
  898. # clone now
  899. call do_fork
  900. movq %rax,RAX(%rsp)
  901. xorl %edi,%edi
  902. /*
  903. * It isn't worth to check for reschedule here,
  904. * so internally to the x86_64 port you can rely on kernel_thread()
  905. * not to reschedule the child before returning, this avoids the need
  906. * of hacks for example to fork off the per-CPU idle tasks.
  907. * [Hopefully no generic code relies on the reschedule -AK]
  908. */
  909. RESTORE_ALL
  910. UNFAKE_STACK_FRAME
  911. ret
  912. CFI_ENDPROC
  913. ENDPROC(kernel_thread)
  914. child_rip:
  915. pushq $0 # fake return address
  916. CFI_STARTPROC
  917. /*
  918. * Here we are in the child and the registers are set as they were
  919. * at kernel_thread() invocation in the parent.
  920. */
  921. movq %rdi, %rax
  922. movq %rsi, %rdi
  923. call *%rax
  924. # exit
  925. xorl %edi, %edi
  926. call do_exit
  927. CFI_ENDPROC
  928. ENDPROC(child_rip)
  929. /*
  930. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  931. *
  932. * C extern interface:
  933. * extern long execve(char *name, char **argv, char **envp)
  934. *
  935. * asm input arguments:
  936. * rdi: name, rsi: argv, rdx: envp
  937. *
  938. * We want to fallback into:
  939. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  940. *
  941. * do_sys_execve asm fallback arguments:
  942. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  943. */
  944. ENTRY(kernel_execve)
  945. CFI_STARTPROC
  946. FAKE_STACK_FRAME $0
  947. SAVE_ALL
  948. call sys_execve
  949. movq %rax, RAX(%rsp)
  950. RESTORE_REST
  951. testq %rax,%rax
  952. je int_ret_from_sys_call
  953. RESTORE_ARGS
  954. UNFAKE_STACK_FRAME
  955. ret
  956. CFI_ENDPROC
  957. ENDPROC(kernel_execve)
  958. KPROBE_ENTRY(page_fault)
  959. errorentry do_page_fault
  960. KPROBE_END(page_fault)
  961. ENTRY(coprocessor_error)
  962. zeroentry do_coprocessor_error
  963. END(coprocessor_error)
  964. ENTRY(simd_coprocessor_error)
  965. zeroentry do_simd_coprocessor_error
  966. END(simd_coprocessor_error)
  967. ENTRY(device_not_available)
  968. zeroentry math_state_restore
  969. END(device_not_available)
  970. /* runs on exception stack */
  971. KPROBE_ENTRY(debug)
  972. INTR_FRAME
  973. pushq $0
  974. CFI_ADJUST_CFA_OFFSET 8
  975. paranoidentry do_debug, DEBUG_STACK
  976. paranoidexit
  977. KPROBE_END(debug)
  978. /* runs on exception stack */
  979. KPROBE_ENTRY(nmi)
  980. INTR_FRAME
  981. pushq $-1
  982. CFI_ADJUST_CFA_OFFSET 8
  983. paranoidentry do_nmi, 0, 0
  984. #ifdef CONFIG_TRACE_IRQFLAGS
  985. paranoidexit 0
  986. #else
  987. jmp paranoid_exit1
  988. CFI_ENDPROC
  989. #endif
  990. KPROBE_END(nmi)
  991. KPROBE_ENTRY(int3)
  992. INTR_FRAME
  993. pushq $0
  994. CFI_ADJUST_CFA_OFFSET 8
  995. paranoidentry do_int3, DEBUG_STACK
  996. jmp paranoid_exit1
  997. CFI_ENDPROC
  998. KPROBE_END(int3)
  999. ENTRY(overflow)
  1000. zeroentry do_overflow
  1001. END(overflow)
  1002. ENTRY(bounds)
  1003. zeroentry do_bounds
  1004. END(bounds)
  1005. ENTRY(invalid_op)
  1006. zeroentry do_invalid_op
  1007. END(invalid_op)
  1008. ENTRY(coprocessor_segment_overrun)
  1009. zeroentry do_coprocessor_segment_overrun
  1010. END(coprocessor_segment_overrun)
  1011. ENTRY(reserved)
  1012. zeroentry do_reserved
  1013. END(reserved)
  1014. /* runs on exception stack */
  1015. ENTRY(double_fault)
  1016. XCPT_FRAME
  1017. paranoidentry do_double_fault
  1018. jmp paranoid_exit1
  1019. CFI_ENDPROC
  1020. END(double_fault)
  1021. ENTRY(invalid_TSS)
  1022. errorentry do_invalid_TSS
  1023. END(invalid_TSS)
  1024. ENTRY(segment_not_present)
  1025. errorentry do_segment_not_present
  1026. END(segment_not_present)
  1027. /* runs on exception stack */
  1028. ENTRY(stack_segment)
  1029. XCPT_FRAME
  1030. paranoidentry do_stack_segment
  1031. jmp paranoid_exit1
  1032. CFI_ENDPROC
  1033. END(stack_segment)
  1034. KPROBE_ENTRY(general_protection)
  1035. errorentry do_general_protection
  1036. KPROBE_END(general_protection)
  1037. ENTRY(alignment_check)
  1038. errorentry do_alignment_check
  1039. END(alignment_check)
  1040. ENTRY(divide_error)
  1041. zeroentry do_divide_error
  1042. END(divide_error)
  1043. ENTRY(spurious_interrupt_bug)
  1044. zeroentry do_spurious_interrupt_bug
  1045. END(spurious_interrupt_bug)
  1046. #ifdef CONFIG_X86_MCE
  1047. /* runs on exception stack */
  1048. ENTRY(machine_check)
  1049. INTR_FRAME
  1050. pushq $0
  1051. CFI_ADJUST_CFA_OFFSET 8
  1052. paranoidentry do_machine_check
  1053. jmp paranoid_exit1
  1054. CFI_ENDPROC
  1055. END(machine_check)
  1056. #endif
  1057. /* Call softirq on interrupt stack. Interrupts are off. */
  1058. ENTRY(call_softirq)
  1059. CFI_STARTPROC
  1060. push %rbp
  1061. CFI_ADJUST_CFA_OFFSET 8
  1062. CFI_REL_OFFSET rbp,0
  1063. mov %rsp,%rbp
  1064. CFI_DEF_CFA_REGISTER rbp
  1065. incl %gs:pda_irqcount
  1066. cmove %gs:pda_irqstackptr,%rsp
  1067. push %rbp # backlink for old unwinder
  1068. call __do_softirq
  1069. leaveq
  1070. CFI_DEF_CFA_REGISTER rsp
  1071. CFI_ADJUST_CFA_OFFSET -8
  1072. decl %gs:pda_irqcount
  1073. ret
  1074. CFI_ENDPROC
  1075. ENDPROC(call_softirq)
  1076. KPROBE_ENTRY(ignore_sysret)
  1077. CFI_STARTPROC
  1078. mov $-ENOSYS,%eax
  1079. sysret
  1080. CFI_ENDPROC
  1081. ENDPROC(ignore_sysret)