entry.S 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. .code64
  52. #ifndef CONFIG_PREEMPT
  53. #define retint_kernel retint_restore_args
  54. #endif
  55. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  56. #ifdef CONFIG_TRACE_IRQFLAGS
  57. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  58. jnc 1f
  59. TRACE_IRQS_ON
  60. 1:
  61. #endif
  62. .endm
  63. /*
  64. * C code is not supposed to know about undefined top of stack. Every time
  65. * a C function with an pt_regs argument is called from the SYSCALL based
  66. * fast path FIXUP_TOP_OF_STACK is needed.
  67. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  68. * manipulation.
  69. */
  70. /* %rsp:at FRAMEEND */
  71. .macro FIXUP_TOP_OF_STACK tmp
  72. movq %gs:pda_oldrsp,\tmp
  73. movq \tmp,RSP(%rsp)
  74. movq $__USER_DS,SS(%rsp)
  75. movq $__USER_CS,CS(%rsp)
  76. movq $-1,RCX(%rsp)
  77. movq R11(%rsp),\tmp /* get eflags */
  78. movq \tmp,EFLAGS(%rsp)
  79. .endm
  80. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  81. movq RSP-\offset(%rsp),\tmp
  82. movq \tmp,%gs:pda_oldrsp
  83. movq EFLAGS-\offset(%rsp),\tmp
  84. movq \tmp,R11-\offset(%rsp)
  85. .endm
  86. .macro FAKE_STACK_FRAME child_rip
  87. /* push in order ss, rsp, eflags, cs, rip */
  88. xorl %eax, %eax
  89. pushq %rax /* ss */
  90. CFI_ADJUST_CFA_OFFSET 8
  91. /*CFI_REL_OFFSET ss,0*/
  92. pushq %rax /* rsp */
  93. CFI_ADJUST_CFA_OFFSET 8
  94. CFI_REL_OFFSET rsp,0
  95. pushq $(1<<9) /* eflags - interrupts on */
  96. CFI_ADJUST_CFA_OFFSET 8
  97. /*CFI_REL_OFFSET rflags,0*/
  98. pushq $__KERNEL_CS /* cs */
  99. CFI_ADJUST_CFA_OFFSET 8
  100. /*CFI_REL_OFFSET cs,0*/
  101. pushq \child_rip /* rip */
  102. CFI_ADJUST_CFA_OFFSET 8
  103. CFI_REL_OFFSET rip,0
  104. pushq %rax /* orig rax */
  105. CFI_ADJUST_CFA_OFFSET 8
  106. .endm
  107. .macro UNFAKE_STACK_FRAME
  108. addq $8*6, %rsp
  109. CFI_ADJUST_CFA_OFFSET -(6*8)
  110. .endm
  111. .macro CFI_DEFAULT_STACK start=1
  112. .if \start
  113. CFI_STARTPROC simple
  114. CFI_SIGNAL_FRAME
  115. CFI_DEF_CFA rsp,SS+8
  116. .else
  117. CFI_DEF_CFA_OFFSET SS+8
  118. .endif
  119. CFI_REL_OFFSET r15,R15
  120. CFI_REL_OFFSET r14,R14
  121. CFI_REL_OFFSET r13,R13
  122. CFI_REL_OFFSET r12,R12
  123. CFI_REL_OFFSET rbp,RBP
  124. CFI_REL_OFFSET rbx,RBX
  125. CFI_REL_OFFSET r11,R11
  126. CFI_REL_OFFSET r10,R10
  127. CFI_REL_OFFSET r9,R9
  128. CFI_REL_OFFSET r8,R8
  129. CFI_REL_OFFSET rax,RAX
  130. CFI_REL_OFFSET rcx,RCX
  131. CFI_REL_OFFSET rdx,RDX
  132. CFI_REL_OFFSET rsi,RSI
  133. CFI_REL_OFFSET rdi,RDI
  134. CFI_REL_OFFSET rip,RIP
  135. /*CFI_REL_OFFSET cs,CS*/
  136. /*CFI_REL_OFFSET rflags,EFLAGS*/
  137. CFI_REL_OFFSET rsp,RSP
  138. /*CFI_REL_OFFSET ss,SS*/
  139. .endm
  140. /*
  141. * A newly forked process directly context switches into this.
  142. */
  143. /* rdi: prev */
  144. ENTRY(ret_from_fork)
  145. CFI_DEFAULT_STACK
  146. push kernel_eflags(%rip)
  147. CFI_ADJUST_CFA_OFFSET 4
  148. popf # reset kernel eflags
  149. CFI_ADJUST_CFA_OFFSET -4
  150. call schedule_tail
  151. GET_THREAD_INFO(%rcx)
  152. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  153. jnz rff_trace
  154. rff_action:
  155. RESTORE_REST
  156. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  157. je int_ret_from_sys_call
  158. testl $_TIF_IA32,threadinfo_flags(%rcx)
  159. jnz int_ret_from_sys_call
  160. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  161. jmp ret_from_sys_call
  162. rff_trace:
  163. movq %rsp,%rdi
  164. call syscall_trace_leave
  165. GET_THREAD_INFO(%rcx)
  166. jmp rff_action
  167. CFI_ENDPROC
  168. END(ret_from_fork)
  169. /*
  170. * System call entry. Upto 6 arguments in registers are supported.
  171. *
  172. * SYSCALL does not save anything on the stack and does not change the
  173. * stack pointer.
  174. */
  175. /*
  176. * Register setup:
  177. * rax system call number
  178. * rdi arg0
  179. * rcx return address for syscall/sysret, C arg3
  180. * rsi arg1
  181. * rdx arg2
  182. * r10 arg3 (--> moved to rcx for C)
  183. * r8 arg4
  184. * r9 arg5
  185. * r11 eflags for syscall/sysret, temporary for C
  186. * r12-r15,rbp,rbx saved by C code, not touched.
  187. *
  188. * Interrupts are off on entry.
  189. * Only called from user space.
  190. *
  191. * XXX if we had a free scratch register we could save the RSP into the stack frame
  192. * and report it properly in ps. Unfortunately we haven't.
  193. *
  194. * When user can change the frames always force IRET. That is because
  195. * it deals with uncanonical addresses better. SYSRET has trouble
  196. * with them due to bugs in both AMD and Intel CPUs.
  197. */
  198. ENTRY(system_call)
  199. CFI_STARTPROC simple
  200. CFI_SIGNAL_FRAME
  201. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  202. CFI_REGISTER rip,rcx
  203. /*CFI_REGISTER rflags,r11*/
  204. swapgs
  205. movq %rsp,%gs:pda_oldrsp
  206. movq %gs:pda_kernelstack,%rsp
  207. /*
  208. * No need to follow this irqs off/on section - it's straight
  209. * and short:
  210. */
  211. sti
  212. SAVE_ARGS 8,1
  213. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  214. movq %rcx,RIP-ARGOFFSET(%rsp)
  215. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  216. GET_THREAD_INFO(%rcx)
  217. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  218. jnz tracesys
  219. cmpq $__NR_syscall_max,%rax
  220. ja badsys
  221. movq %r10,%rcx
  222. call *sys_call_table(,%rax,8) # XXX: rip relative
  223. movq %rax,RAX-ARGOFFSET(%rsp)
  224. /*
  225. * Syscall return path ending with SYSRET (fast path)
  226. * Has incomplete stack frame and undefined top of stack.
  227. */
  228. ret_from_sys_call:
  229. movl $_TIF_ALLWORK_MASK,%edi
  230. /* edi: flagmask */
  231. sysret_check:
  232. GET_THREAD_INFO(%rcx)
  233. cli
  234. TRACE_IRQS_OFF
  235. movl threadinfo_flags(%rcx),%edx
  236. andl %edi,%edx
  237. jnz sysret_careful
  238. CFI_REMEMBER_STATE
  239. /*
  240. * sysretq will re-enable interrupts:
  241. */
  242. TRACE_IRQS_ON
  243. movq RIP-ARGOFFSET(%rsp),%rcx
  244. CFI_REGISTER rip,rcx
  245. RESTORE_ARGS 0,-ARG_SKIP,1
  246. /*CFI_REGISTER rflags,r11*/
  247. movq %gs:pda_oldrsp,%rsp
  248. swapgs
  249. sysretq
  250. CFI_RESTORE_STATE
  251. /* Handle reschedules */
  252. /* edx: work, edi: workmask */
  253. sysret_careful:
  254. bt $TIF_NEED_RESCHED,%edx
  255. jnc sysret_signal
  256. TRACE_IRQS_ON
  257. sti
  258. pushq %rdi
  259. CFI_ADJUST_CFA_OFFSET 8
  260. call schedule
  261. popq %rdi
  262. CFI_ADJUST_CFA_OFFSET -8
  263. jmp sysret_check
  264. /* Handle a signal */
  265. sysret_signal:
  266. TRACE_IRQS_ON
  267. sti
  268. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  269. jz 1f
  270. /* Really a signal */
  271. /* edx: work flags (arg3) */
  272. leaq do_notify_resume(%rip),%rax
  273. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  274. xorl %esi,%esi # oldset -> arg2
  275. call ptregscall_common
  276. 1: movl $_TIF_NEED_RESCHED,%edi
  277. /* Use IRET because user could have changed frame. This
  278. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  279. cli
  280. TRACE_IRQS_OFF
  281. jmp int_with_check
  282. badsys:
  283. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  284. jmp ret_from_sys_call
  285. /* Do syscall tracing */
  286. tracesys:
  287. SAVE_REST
  288. movq $-ENOSYS,RAX(%rsp)
  289. FIXUP_TOP_OF_STACK %rdi
  290. movq %rsp,%rdi
  291. call syscall_trace_enter
  292. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  293. RESTORE_REST
  294. cmpq $__NR_syscall_max,%rax
  295. movq $-ENOSYS,%rcx
  296. cmova %rcx,%rax
  297. ja 1f
  298. movq %r10,%rcx /* fixup for C */
  299. call *sys_call_table(,%rax,8)
  300. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  301. /* Use IRET because user could have changed frame */
  302. /*
  303. * Syscall return path ending with IRET.
  304. * Has correct top of stack, but partial stack frame.
  305. */
  306. .globl int_ret_from_sys_call
  307. int_ret_from_sys_call:
  308. cli
  309. TRACE_IRQS_OFF
  310. testl $3,CS-ARGOFFSET(%rsp)
  311. je retint_restore_args
  312. movl $_TIF_ALLWORK_MASK,%edi
  313. /* edi: mask to check */
  314. int_with_check:
  315. GET_THREAD_INFO(%rcx)
  316. movl threadinfo_flags(%rcx),%edx
  317. andl %edi,%edx
  318. jnz int_careful
  319. andl $~TS_COMPAT,threadinfo_status(%rcx)
  320. jmp retint_swapgs
  321. /* Either reschedule or signal or syscall exit tracking needed. */
  322. /* First do a reschedule test. */
  323. /* edx: work, edi: workmask */
  324. int_careful:
  325. bt $TIF_NEED_RESCHED,%edx
  326. jnc int_very_careful
  327. TRACE_IRQS_ON
  328. sti
  329. pushq %rdi
  330. CFI_ADJUST_CFA_OFFSET 8
  331. call schedule
  332. popq %rdi
  333. CFI_ADJUST_CFA_OFFSET -8
  334. cli
  335. TRACE_IRQS_OFF
  336. jmp int_with_check
  337. /* handle signals and tracing -- both require a full stack frame */
  338. int_very_careful:
  339. TRACE_IRQS_ON
  340. sti
  341. SAVE_REST
  342. /* Check for syscall exit trace */
  343. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  344. jz int_signal
  345. pushq %rdi
  346. CFI_ADJUST_CFA_OFFSET 8
  347. leaq 8(%rsp),%rdi # &ptregs -> arg1
  348. call syscall_trace_leave
  349. popq %rdi
  350. CFI_ADJUST_CFA_OFFSET -8
  351. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  352. jmp int_restore_rest
  353. int_signal:
  354. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  355. jz 1f
  356. movq %rsp,%rdi # &ptregs -> arg1
  357. xorl %esi,%esi # oldset -> arg2
  358. call do_notify_resume
  359. 1: movl $_TIF_NEED_RESCHED,%edi
  360. int_restore_rest:
  361. RESTORE_REST
  362. cli
  363. TRACE_IRQS_OFF
  364. jmp int_with_check
  365. CFI_ENDPROC
  366. END(system_call)
  367. /*
  368. * Certain special system calls that need to save a complete full stack frame.
  369. */
  370. .macro PTREGSCALL label,func,arg
  371. .globl \label
  372. \label:
  373. leaq \func(%rip),%rax
  374. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  375. jmp ptregscall_common
  376. END(\label)
  377. .endm
  378. CFI_STARTPROC
  379. PTREGSCALL stub_clone, sys_clone, %r8
  380. PTREGSCALL stub_fork, sys_fork, %rdi
  381. PTREGSCALL stub_vfork, sys_vfork, %rdi
  382. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  383. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  384. PTREGSCALL stub_iopl, sys_iopl, %rsi
  385. ENTRY(ptregscall_common)
  386. popq %r11
  387. CFI_ADJUST_CFA_OFFSET -8
  388. CFI_REGISTER rip, r11
  389. SAVE_REST
  390. movq %r11, %r15
  391. CFI_REGISTER rip, r15
  392. FIXUP_TOP_OF_STACK %r11
  393. call *%rax
  394. RESTORE_TOP_OF_STACK %r11
  395. movq %r15, %r11
  396. CFI_REGISTER rip, r11
  397. RESTORE_REST
  398. pushq %r11
  399. CFI_ADJUST_CFA_OFFSET 8
  400. CFI_REL_OFFSET rip, 0
  401. ret
  402. CFI_ENDPROC
  403. END(ptregscall_common)
  404. ENTRY(stub_execve)
  405. CFI_STARTPROC
  406. popq %r11
  407. CFI_ADJUST_CFA_OFFSET -8
  408. CFI_REGISTER rip, r11
  409. SAVE_REST
  410. FIXUP_TOP_OF_STACK %r11
  411. call sys_execve
  412. RESTORE_TOP_OF_STACK %r11
  413. movq %rax,RAX(%rsp)
  414. RESTORE_REST
  415. jmp int_ret_from_sys_call
  416. CFI_ENDPROC
  417. END(stub_execve)
  418. /*
  419. * sigreturn is special because it needs to restore all registers on return.
  420. * This cannot be done with SYSRET, so use the IRET return path instead.
  421. */
  422. ENTRY(stub_rt_sigreturn)
  423. CFI_STARTPROC
  424. addq $8, %rsp
  425. CFI_ADJUST_CFA_OFFSET -8
  426. SAVE_REST
  427. movq %rsp,%rdi
  428. FIXUP_TOP_OF_STACK %r11
  429. call sys_rt_sigreturn
  430. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  431. RESTORE_REST
  432. jmp int_ret_from_sys_call
  433. CFI_ENDPROC
  434. END(stub_rt_sigreturn)
  435. /*
  436. * initial frame state for interrupts and exceptions
  437. */
  438. .macro _frame ref
  439. CFI_STARTPROC simple
  440. CFI_SIGNAL_FRAME
  441. CFI_DEF_CFA rsp,SS+8-\ref
  442. /*CFI_REL_OFFSET ss,SS-\ref*/
  443. CFI_REL_OFFSET rsp,RSP-\ref
  444. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  445. /*CFI_REL_OFFSET cs,CS-\ref*/
  446. CFI_REL_OFFSET rip,RIP-\ref
  447. .endm
  448. /* initial frame state for interrupts (and exceptions without error code) */
  449. #define INTR_FRAME _frame RIP
  450. /* initial frame state for exceptions with error code (and interrupts with
  451. vector already pushed) */
  452. #define XCPT_FRAME _frame ORIG_RAX
  453. /*
  454. * Interrupt entry/exit.
  455. *
  456. * Interrupt entry points save only callee clobbered registers in fast path.
  457. *
  458. * Entry runs with interrupts off.
  459. */
  460. /* 0(%rsp): interrupt number */
  461. .macro interrupt func
  462. cld
  463. SAVE_ARGS
  464. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  465. pushq %rbp
  466. CFI_ADJUST_CFA_OFFSET 8
  467. CFI_REL_OFFSET rbp, 0
  468. movq %rsp,%rbp
  469. CFI_DEF_CFA_REGISTER rbp
  470. testl $3,CS(%rdi)
  471. je 1f
  472. swapgs
  473. /* irqcount is used to check if a CPU is already on an interrupt
  474. stack or not. While this is essentially redundant with preempt_count
  475. it is a little cheaper to use a separate counter in the PDA
  476. (short of moving irq_enter into assembly, which would be too
  477. much work) */
  478. 1: incl %gs:pda_irqcount
  479. cmoveq %gs:pda_irqstackptr,%rsp
  480. push %rbp # backlink for old unwinder
  481. /*
  482. * We entered an interrupt context - irqs are off:
  483. */
  484. TRACE_IRQS_OFF
  485. call \func
  486. .endm
  487. ENTRY(common_interrupt)
  488. XCPT_FRAME
  489. interrupt do_IRQ
  490. /* 0(%rsp): oldrsp-ARGOFFSET */
  491. ret_from_intr:
  492. cli
  493. TRACE_IRQS_OFF
  494. decl %gs:pda_irqcount
  495. leaveq
  496. CFI_DEF_CFA_REGISTER rsp
  497. CFI_ADJUST_CFA_OFFSET -8
  498. exit_intr:
  499. GET_THREAD_INFO(%rcx)
  500. testl $3,CS-ARGOFFSET(%rsp)
  501. je retint_kernel
  502. /* Interrupt came from user space */
  503. /*
  504. * Has a correct top of stack, but a partial stack frame
  505. * %rcx: thread info. Interrupts off.
  506. */
  507. retint_with_reschedule:
  508. movl $_TIF_WORK_MASK,%edi
  509. retint_check:
  510. movl threadinfo_flags(%rcx),%edx
  511. andl %edi,%edx
  512. CFI_REMEMBER_STATE
  513. jnz retint_careful
  514. retint_swapgs:
  515. /*
  516. * The iretq could re-enable interrupts:
  517. */
  518. cli
  519. TRACE_IRQS_IRETQ
  520. swapgs
  521. jmp restore_args
  522. retint_restore_args:
  523. cli
  524. /*
  525. * The iretq could re-enable interrupts:
  526. */
  527. TRACE_IRQS_IRETQ
  528. restore_args:
  529. RESTORE_ARGS 0,8,0
  530. iret_label:
  531. iretq
  532. .section __ex_table,"a"
  533. .quad iret_label,bad_iret
  534. .previous
  535. .section .fixup,"ax"
  536. /* force a signal here? this matches i386 behaviour */
  537. /* running with kernel gs */
  538. bad_iret:
  539. movq $11,%rdi /* SIGSEGV */
  540. TRACE_IRQS_ON
  541. sti
  542. jmp do_exit
  543. .previous
  544. /* edi: workmask, edx: work */
  545. retint_careful:
  546. CFI_RESTORE_STATE
  547. bt $TIF_NEED_RESCHED,%edx
  548. jnc retint_signal
  549. TRACE_IRQS_ON
  550. sti
  551. pushq %rdi
  552. CFI_ADJUST_CFA_OFFSET 8
  553. call schedule
  554. popq %rdi
  555. CFI_ADJUST_CFA_OFFSET -8
  556. GET_THREAD_INFO(%rcx)
  557. cli
  558. TRACE_IRQS_OFF
  559. jmp retint_check
  560. retint_signal:
  561. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  562. jz retint_swapgs
  563. TRACE_IRQS_ON
  564. sti
  565. SAVE_REST
  566. movq $-1,ORIG_RAX(%rsp)
  567. xorl %esi,%esi # oldset
  568. movq %rsp,%rdi # &pt_regs
  569. call do_notify_resume
  570. RESTORE_REST
  571. cli
  572. TRACE_IRQS_OFF
  573. movl $_TIF_NEED_RESCHED,%edi
  574. GET_THREAD_INFO(%rcx)
  575. jmp retint_check
  576. #ifdef CONFIG_PREEMPT
  577. /* Returning to kernel space. Check if we need preemption */
  578. /* rcx: threadinfo. interrupts off. */
  579. ENTRY(retint_kernel)
  580. cmpl $0,threadinfo_preempt_count(%rcx)
  581. jnz retint_restore_args
  582. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  583. jnc retint_restore_args
  584. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  585. jnc retint_restore_args
  586. call preempt_schedule_irq
  587. jmp exit_intr
  588. #endif
  589. CFI_ENDPROC
  590. END(common_interrupt)
  591. /*
  592. * APIC interrupts.
  593. */
  594. .macro apicinterrupt num,func
  595. INTR_FRAME
  596. pushq $~(\num)
  597. CFI_ADJUST_CFA_OFFSET 8
  598. interrupt \func
  599. jmp ret_from_intr
  600. CFI_ENDPROC
  601. .endm
  602. ENTRY(thermal_interrupt)
  603. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  604. END(thermal_interrupt)
  605. ENTRY(threshold_interrupt)
  606. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  607. END(threshold_interrupt)
  608. #ifdef CONFIG_SMP
  609. ENTRY(reschedule_interrupt)
  610. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  611. END(reschedule_interrupt)
  612. .macro INVALIDATE_ENTRY num
  613. ENTRY(invalidate_interrupt\num)
  614. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  615. END(invalidate_interrupt\num)
  616. .endm
  617. INVALIDATE_ENTRY 0
  618. INVALIDATE_ENTRY 1
  619. INVALIDATE_ENTRY 2
  620. INVALIDATE_ENTRY 3
  621. INVALIDATE_ENTRY 4
  622. INVALIDATE_ENTRY 5
  623. INVALIDATE_ENTRY 6
  624. INVALIDATE_ENTRY 7
  625. ENTRY(call_function_interrupt)
  626. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  627. END(call_function_interrupt)
  628. #endif
  629. ENTRY(apic_timer_interrupt)
  630. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  631. END(apic_timer_interrupt)
  632. ENTRY(error_interrupt)
  633. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  634. END(error_interrupt)
  635. ENTRY(spurious_interrupt)
  636. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  637. END(spurious_interrupt)
  638. /*
  639. * Exception entry points.
  640. */
  641. .macro zeroentry sym
  642. INTR_FRAME
  643. pushq $0 /* push error code/oldrax */
  644. CFI_ADJUST_CFA_OFFSET 8
  645. pushq %rax /* push real oldrax to the rdi slot */
  646. CFI_ADJUST_CFA_OFFSET 8
  647. leaq \sym(%rip),%rax
  648. jmp error_entry
  649. CFI_ENDPROC
  650. .endm
  651. .macro errorentry sym
  652. XCPT_FRAME
  653. pushq %rax
  654. CFI_ADJUST_CFA_OFFSET 8
  655. leaq \sym(%rip),%rax
  656. jmp error_entry
  657. CFI_ENDPROC
  658. .endm
  659. /* error code is on the stack already */
  660. /* handle NMI like exceptions that can happen everywhere */
  661. .macro paranoidentry sym, ist=0, irqtrace=1
  662. SAVE_ALL
  663. cld
  664. movl $1,%ebx
  665. movl $MSR_GS_BASE,%ecx
  666. rdmsr
  667. testl %edx,%edx
  668. js 1f
  669. swapgs
  670. xorl %ebx,%ebx
  671. 1:
  672. .if \ist
  673. movq %gs:pda_data_offset, %rbp
  674. .endif
  675. movq %rsp,%rdi
  676. movq ORIG_RAX(%rsp),%rsi
  677. movq $-1,ORIG_RAX(%rsp)
  678. .if \ist
  679. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  680. .endif
  681. call \sym
  682. .if \ist
  683. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  684. .endif
  685. cli
  686. .if \irqtrace
  687. TRACE_IRQS_OFF
  688. .endif
  689. .endm
  690. /*
  691. * "Paranoid" exit path from exception stack.
  692. * Paranoid because this is used by NMIs and cannot take
  693. * any kernel state for granted.
  694. * We don't do kernel preemption checks here, because only
  695. * NMI should be common and it does not enable IRQs and
  696. * cannot get reschedule ticks.
  697. *
  698. * "trace" is 0 for the NMI handler only, because irq-tracing
  699. * is fundamentally NMI-unsafe. (we cannot change the soft and
  700. * hard flags at once, atomically)
  701. */
  702. .macro paranoidexit trace=1
  703. /* ebx: no swapgs flag */
  704. paranoid_exit\trace:
  705. testl %ebx,%ebx /* swapgs needed? */
  706. jnz paranoid_restore\trace
  707. testl $3,CS(%rsp)
  708. jnz paranoid_userspace\trace
  709. paranoid_swapgs\trace:
  710. .if \trace
  711. TRACE_IRQS_IRETQ 0
  712. .endif
  713. swapgs
  714. paranoid_restore\trace:
  715. RESTORE_ALL 8
  716. iretq
  717. paranoid_userspace\trace:
  718. GET_THREAD_INFO(%rcx)
  719. movl threadinfo_flags(%rcx),%ebx
  720. andl $_TIF_WORK_MASK,%ebx
  721. jz paranoid_swapgs\trace
  722. movq %rsp,%rdi /* &pt_regs */
  723. call sync_regs
  724. movq %rax,%rsp /* switch stack for scheduling */
  725. testl $_TIF_NEED_RESCHED,%ebx
  726. jnz paranoid_schedule\trace
  727. movl %ebx,%edx /* arg3: thread flags */
  728. .if \trace
  729. TRACE_IRQS_ON
  730. .endif
  731. sti
  732. xorl %esi,%esi /* arg2: oldset */
  733. movq %rsp,%rdi /* arg1: &pt_regs */
  734. call do_notify_resume
  735. cli
  736. .if \trace
  737. TRACE_IRQS_OFF
  738. .endif
  739. jmp paranoid_userspace\trace
  740. paranoid_schedule\trace:
  741. .if \trace
  742. TRACE_IRQS_ON
  743. .endif
  744. sti
  745. call schedule
  746. cli
  747. .if \trace
  748. TRACE_IRQS_OFF
  749. .endif
  750. jmp paranoid_userspace\trace
  751. CFI_ENDPROC
  752. .endm
  753. /*
  754. * Exception entry point. This expects an error code/orig_rax on the stack
  755. * and the exception handler in %rax.
  756. */
  757. KPROBE_ENTRY(error_entry)
  758. _frame RDI
  759. /* rdi slot contains rax, oldrax contains error code */
  760. cld
  761. subq $14*8,%rsp
  762. CFI_ADJUST_CFA_OFFSET (14*8)
  763. movq %rsi,13*8(%rsp)
  764. CFI_REL_OFFSET rsi,RSI
  765. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  766. movq %rdx,12*8(%rsp)
  767. CFI_REL_OFFSET rdx,RDX
  768. movq %rcx,11*8(%rsp)
  769. CFI_REL_OFFSET rcx,RCX
  770. movq %rsi,10*8(%rsp) /* store rax */
  771. CFI_REL_OFFSET rax,RAX
  772. movq %r8, 9*8(%rsp)
  773. CFI_REL_OFFSET r8,R8
  774. movq %r9, 8*8(%rsp)
  775. CFI_REL_OFFSET r9,R9
  776. movq %r10,7*8(%rsp)
  777. CFI_REL_OFFSET r10,R10
  778. movq %r11,6*8(%rsp)
  779. CFI_REL_OFFSET r11,R11
  780. movq %rbx,5*8(%rsp)
  781. CFI_REL_OFFSET rbx,RBX
  782. movq %rbp,4*8(%rsp)
  783. CFI_REL_OFFSET rbp,RBP
  784. movq %r12,3*8(%rsp)
  785. CFI_REL_OFFSET r12,R12
  786. movq %r13,2*8(%rsp)
  787. CFI_REL_OFFSET r13,R13
  788. movq %r14,1*8(%rsp)
  789. CFI_REL_OFFSET r14,R14
  790. movq %r15,(%rsp)
  791. CFI_REL_OFFSET r15,R15
  792. xorl %ebx,%ebx
  793. testl $3,CS(%rsp)
  794. je error_kernelspace
  795. error_swapgs:
  796. swapgs
  797. error_sti:
  798. movq %rdi,RDI(%rsp)
  799. movq %rsp,%rdi
  800. movq ORIG_RAX(%rsp),%rsi /* get error code */
  801. movq $-1,ORIG_RAX(%rsp)
  802. call *%rax
  803. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  804. error_exit:
  805. movl %ebx,%eax
  806. RESTORE_REST
  807. cli
  808. TRACE_IRQS_OFF
  809. GET_THREAD_INFO(%rcx)
  810. testl %eax,%eax
  811. jne retint_kernel
  812. movl threadinfo_flags(%rcx),%edx
  813. movl $_TIF_WORK_MASK,%edi
  814. andl %edi,%edx
  815. jnz retint_careful
  816. /*
  817. * The iret might restore flags:
  818. */
  819. TRACE_IRQS_IRETQ
  820. swapgs
  821. RESTORE_ARGS 0,8,0
  822. jmp iret_label
  823. CFI_ENDPROC
  824. error_kernelspace:
  825. incl %ebx
  826. /* There are two places in the kernel that can potentially fault with
  827. usergs. Handle them here. The exception handlers after
  828. iret run with kernel gs again, so don't set the user space flag.
  829. B stepping K8s sometimes report an truncated RIP for IRET
  830. exceptions returning to compat mode. Check for these here too. */
  831. leaq iret_label(%rip),%rbp
  832. cmpq %rbp,RIP(%rsp)
  833. je error_swapgs
  834. movl %ebp,%ebp /* zero extend */
  835. cmpq %rbp,RIP(%rsp)
  836. je error_swapgs
  837. cmpq $gs_change,RIP(%rsp)
  838. je error_swapgs
  839. jmp error_sti
  840. KPROBE_END(error_entry)
  841. /* Reload gs selector with exception handling */
  842. /* edi: new selector */
  843. ENTRY(load_gs_index)
  844. CFI_STARTPROC
  845. pushf
  846. CFI_ADJUST_CFA_OFFSET 8
  847. cli
  848. swapgs
  849. gs_change:
  850. movl %edi,%gs
  851. 2: mfence /* workaround */
  852. swapgs
  853. popf
  854. CFI_ADJUST_CFA_OFFSET -8
  855. ret
  856. CFI_ENDPROC
  857. ENDPROC(load_gs_index)
  858. .section __ex_table,"a"
  859. .align 8
  860. .quad gs_change,bad_gs
  861. .previous
  862. .section .fixup,"ax"
  863. /* running with kernelgs */
  864. bad_gs:
  865. swapgs /* switch back to user gs */
  866. xorl %eax,%eax
  867. movl %eax,%gs
  868. jmp 2b
  869. .previous
  870. /*
  871. * Create a kernel thread.
  872. *
  873. * C extern interface:
  874. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  875. *
  876. * asm input arguments:
  877. * rdi: fn, rsi: arg, rdx: flags
  878. */
  879. ENTRY(kernel_thread)
  880. CFI_STARTPROC
  881. FAKE_STACK_FRAME $child_rip
  882. SAVE_ALL
  883. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  884. movq %rdx,%rdi
  885. orq kernel_thread_flags(%rip),%rdi
  886. movq $-1, %rsi
  887. movq %rsp, %rdx
  888. xorl %r8d,%r8d
  889. xorl %r9d,%r9d
  890. # clone now
  891. call do_fork
  892. movq %rax,RAX(%rsp)
  893. xorl %edi,%edi
  894. /*
  895. * It isn't worth to check for reschedule here,
  896. * so internally to the x86_64 port you can rely on kernel_thread()
  897. * not to reschedule the child before returning, this avoids the need
  898. * of hacks for example to fork off the per-CPU idle tasks.
  899. * [Hopefully no generic code relies on the reschedule -AK]
  900. */
  901. RESTORE_ALL
  902. UNFAKE_STACK_FRAME
  903. ret
  904. CFI_ENDPROC
  905. ENDPROC(kernel_thread)
  906. child_rip:
  907. pushq $0 # fake return address
  908. CFI_STARTPROC
  909. /*
  910. * Here we are in the child and the registers are set as they were
  911. * at kernel_thread() invocation in the parent.
  912. */
  913. movq %rdi, %rax
  914. movq %rsi, %rdi
  915. call *%rax
  916. # exit
  917. xorl %edi, %edi
  918. call do_exit
  919. CFI_ENDPROC
  920. ENDPROC(child_rip)
  921. /*
  922. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  923. *
  924. * C extern interface:
  925. * extern long execve(char *name, char **argv, char **envp)
  926. *
  927. * asm input arguments:
  928. * rdi: name, rsi: argv, rdx: envp
  929. *
  930. * We want to fallback into:
  931. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  932. *
  933. * do_sys_execve asm fallback arguments:
  934. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  935. */
  936. ENTRY(kernel_execve)
  937. CFI_STARTPROC
  938. FAKE_STACK_FRAME $0
  939. SAVE_ALL
  940. call sys_execve
  941. movq %rax, RAX(%rsp)
  942. RESTORE_REST
  943. testq %rax,%rax
  944. je int_ret_from_sys_call
  945. RESTORE_ARGS
  946. UNFAKE_STACK_FRAME
  947. ret
  948. CFI_ENDPROC
  949. ENDPROC(kernel_execve)
  950. KPROBE_ENTRY(page_fault)
  951. errorentry do_page_fault
  952. KPROBE_END(page_fault)
  953. ENTRY(coprocessor_error)
  954. zeroentry do_coprocessor_error
  955. END(coprocessor_error)
  956. ENTRY(simd_coprocessor_error)
  957. zeroentry do_simd_coprocessor_error
  958. END(simd_coprocessor_error)
  959. ENTRY(device_not_available)
  960. zeroentry math_state_restore
  961. END(device_not_available)
  962. /* runs on exception stack */
  963. KPROBE_ENTRY(debug)
  964. INTR_FRAME
  965. pushq $0
  966. CFI_ADJUST_CFA_OFFSET 8
  967. paranoidentry do_debug, DEBUG_STACK
  968. paranoidexit
  969. KPROBE_END(debug)
  970. /* runs on exception stack */
  971. KPROBE_ENTRY(nmi)
  972. INTR_FRAME
  973. pushq $-1
  974. CFI_ADJUST_CFA_OFFSET 8
  975. paranoidentry do_nmi, 0, 0
  976. #ifdef CONFIG_TRACE_IRQFLAGS
  977. paranoidexit 0
  978. #else
  979. jmp paranoid_exit1
  980. CFI_ENDPROC
  981. #endif
  982. KPROBE_END(nmi)
  983. KPROBE_ENTRY(int3)
  984. INTR_FRAME
  985. pushq $0
  986. CFI_ADJUST_CFA_OFFSET 8
  987. paranoidentry do_int3, DEBUG_STACK
  988. jmp paranoid_exit1
  989. CFI_ENDPROC
  990. KPROBE_END(int3)
  991. ENTRY(overflow)
  992. zeroentry do_overflow
  993. END(overflow)
  994. ENTRY(bounds)
  995. zeroentry do_bounds
  996. END(bounds)
  997. ENTRY(invalid_op)
  998. zeroentry do_invalid_op
  999. END(invalid_op)
  1000. ENTRY(coprocessor_segment_overrun)
  1001. zeroentry do_coprocessor_segment_overrun
  1002. END(coprocessor_segment_overrun)
  1003. ENTRY(reserved)
  1004. zeroentry do_reserved
  1005. END(reserved)
  1006. /* runs on exception stack */
  1007. ENTRY(double_fault)
  1008. XCPT_FRAME
  1009. paranoidentry do_double_fault
  1010. jmp paranoid_exit1
  1011. CFI_ENDPROC
  1012. END(double_fault)
  1013. ENTRY(invalid_TSS)
  1014. errorentry do_invalid_TSS
  1015. END(invalid_TSS)
  1016. ENTRY(segment_not_present)
  1017. errorentry do_segment_not_present
  1018. END(segment_not_present)
  1019. /* runs on exception stack */
  1020. ENTRY(stack_segment)
  1021. XCPT_FRAME
  1022. paranoidentry do_stack_segment
  1023. jmp paranoid_exit1
  1024. CFI_ENDPROC
  1025. END(stack_segment)
  1026. KPROBE_ENTRY(general_protection)
  1027. errorentry do_general_protection
  1028. KPROBE_END(general_protection)
  1029. ENTRY(alignment_check)
  1030. errorentry do_alignment_check
  1031. END(alignment_check)
  1032. ENTRY(divide_error)
  1033. zeroentry do_divide_error
  1034. END(divide_error)
  1035. ENTRY(spurious_interrupt_bug)
  1036. zeroentry do_spurious_interrupt_bug
  1037. END(spurious_interrupt_bug)
  1038. #ifdef CONFIG_X86_MCE
  1039. /* runs on exception stack */
  1040. ENTRY(machine_check)
  1041. INTR_FRAME
  1042. pushq $0
  1043. CFI_ADJUST_CFA_OFFSET 8
  1044. paranoidentry do_machine_check
  1045. jmp paranoid_exit1
  1046. CFI_ENDPROC
  1047. END(machine_check)
  1048. #endif
  1049. /* Call softirq on interrupt stack. Interrupts are off. */
  1050. ENTRY(call_softirq)
  1051. CFI_STARTPROC
  1052. push %rbp
  1053. CFI_ADJUST_CFA_OFFSET 8
  1054. CFI_REL_OFFSET rbp,0
  1055. mov %rsp,%rbp
  1056. CFI_DEF_CFA_REGISTER rbp
  1057. incl %gs:pda_irqcount
  1058. cmove %gs:pda_irqstackptr,%rsp
  1059. push %rbp # backlink for old unwinder
  1060. call __do_softirq
  1061. leaveq
  1062. CFI_DEF_CFA_REGISTER rsp
  1063. CFI_ADJUST_CFA_OFFSET -8
  1064. decl %gs:pda_irqcount
  1065. ret
  1066. CFI_ENDPROC
  1067. ENDPROC(call_softirq)