entry.S 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/linkage.h>
  30. #include <asm/segment.h>
  31. #include <asm/smp.h>
  32. #include <asm/cache.h>
  33. #include <asm/errno.h>
  34. #include <asm/dwarf2.h>
  35. #include <asm/calling.h>
  36. #include <asm/asm-offsets.h>
  37. #include <asm/msr.h>
  38. #include <asm/unistd.h>
  39. #include <asm/thread_info.h>
  40. #include <asm/hw_irq.h>
  41. #include <asm/page.h>
  42. #include <asm/irqflags.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  48. #ifdef CONFIG_TRACE_IRQFLAGS
  49. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  50. jnc 1f
  51. TRACE_IRQS_ON
  52. 1:
  53. #endif
  54. .endm
  55. /*
  56. * C code is not supposed to know about undefined top of stack. Every time
  57. * a C function with an pt_regs argument is called from the SYSCALL based
  58. * fast path FIXUP_TOP_OF_STACK is needed.
  59. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  60. * manipulation.
  61. */
  62. /* %rsp:at FRAMEEND */
  63. .macro FIXUP_TOP_OF_STACK tmp
  64. movq %gs:pda_oldrsp,\tmp
  65. movq \tmp,RSP(%rsp)
  66. movq $__USER_DS,SS(%rsp)
  67. movq $__USER_CS,CS(%rsp)
  68. movq $-1,RCX(%rsp)
  69. movq R11(%rsp),\tmp /* get eflags */
  70. movq \tmp,EFLAGS(%rsp)
  71. .endm
  72. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  73. movq RSP-\offset(%rsp),\tmp
  74. movq \tmp,%gs:pda_oldrsp
  75. movq EFLAGS-\offset(%rsp),\tmp
  76. movq \tmp,R11-\offset(%rsp)
  77. .endm
  78. .macro FAKE_STACK_FRAME child_rip
  79. /* push in order ss, rsp, eflags, cs, rip */
  80. xorl %eax, %eax
  81. pushq %rax /* ss */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. /*CFI_REL_OFFSET ss,0*/
  84. pushq %rax /* rsp */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_REL_OFFSET rsp,0
  87. pushq $(1<<9) /* eflags - interrupts on */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. /*CFI_REL_OFFSET rflags,0*/
  90. pushq $__KERNEL_CS /* cs */
  91. CFI_ADJUST_CFA_OFFSET 8
  92. /*CFI_REL_OFFSET cs,0*/
  93. pushq \child_rip /* rip */
  94. CFI_ADJUST_CFA_OFFSET 8
  95. CFI_REL_OFFSET rip,0
  96. pushq %rax /* orig rax */
  97. CFI_ADJUST_CFA_OFFSET 8
  98. .endm
  99. .macro UNFAKE_STACK_FRAME
  100. addq $8*6, %rsp
  101. CFI_ADJUST_CFA_OFFSET -(6*8)
  102. .endm
  103. .macro CFI_DEFAULT_STACK start=1
  104. .if \start
  105. CFI_STARTPROC simple
  106. CFI_DEF_CFA rsp,SS+8
  107. .else
  108. CFI_DEF_CFA_OFFSET SS+8
  109. .endif
  110. CFI_REL_OFFSET r15,R15
  111. CFI_REL_OFFSET r14,R14
  112. CFI_REL_OFFSET r13,R13
  113. CFI_REL_OFFSET r12,R12
  114. CFI_REL_OFFSET rbp,RBP
  115. CFI_REL_OFFSET rbx,RBX
  116. CFI_REL_OFFSET r11,R11
  117. CFI_REL_OFFSET r10,R10
  118. CFI_REL_OFFSET r9,R9
  119. CFI_REL_OFFSET r8,R8
  120. CFI_REL_OFFSET rax,RAX
  121. CFI_REL_OFFSET rcx,RCX
  122. CFI_REL_OFFSET rdx,RDX
  123. CFI_REL_OFFSET rsi,RSI
  124. CFI_REL_OFFSET rdi,RDI
  125. CFI_REL_OFFSET rip,RIP
  126. /*CFI_REL_OFFSET cs,CS*/
  127. /*CFI_REL_OFFSET rflags,EFLAGS*/
  128. CFI_REL_OFFSET rsp,RSP
  129. /*CFI_REL_OFFSET ss,SS*/
  130. .endm
  131. /*
  132. * A newly forked process directly context switches into this.
  133. */
  134. /* rdi: prev */
  135. ENTRY(ret_from_fork)
  136. CFI_DEFAULT_STACK
  137. call schedule_tail
  138. GET_THREAD_INFO(%rcx)
  139. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  140. jnz rff_trace
  141. rff_action:
  142. RESTORE_REST
  143. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  144. je int_ret_from_sys_call
  145. testl $_TIF_IA32,threadinfo_flags(%rcx)
  146. jnz int_ret_from_sys_call
  147. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  148. jmp ret_from_sys_call
  149. rff_trace:
  150. movq %rsp,%rdi
  151. call syscall_trace_leave
  152. GET_THREAD_INFO(%rcx)
  153. jmp rff_action
  154. CFI_ENDPROC
  155. END(ret_from_fork)
  156. /*
  157. * System call entry. Upto 6 arguments in registers are supported.
  158. *
  159. * SYSCALL does not save anything on the stack and does not change the
  160. * stack pointer.
  161. */
  162. /*
  163. * Register setup:
  164. * rax system call number
  165. * rdi arg0
  166. * rcx return address for syscall/sysret, C arg3
  167. * rsi arg1
  168. * rdx arg2
  169. * r10 arg3 (--> moved to rcx for C)
  170. * r8 arg4
  171. * r9 arg5
  172. * r11 eflags for syscall/sysret, temporary for C
  173. * r12-r15,rbp,rbx saved by C code, not touched.
  174. *
  175. * Interrupts are off on entry.
  176. * Only called from user space.
  177. *
  178. * XXX if we had a free scratch register we could save the RSP into the stack frame
  179. * and report it properly in ps. Unfortunately we haven't.
  180. *
  181. * When user can change the frames always force IRET. That is because
  182. * it deals with uncanonical addresses better. SYSRET has trouble
  183. * with them due to bugs in both AMD and Intel CPUs.
  184. */
  185. ENTRY(system_call)
  186. CFI_STARTPROC simple
  187. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  188. CFI_REGISTER rip,rcx
  189. /*CFI_REGISTER rflags,r11*/
  190. swapgs
  191. movq %rsp,%gs:pda_oldrsp
  192. movq %gs:pda_kernelstack,%rsp
  193. /*
  194. * No need to follow this irqs off/on section - it's straight
  195. * and short:
  196. */
  197. sti
  198. SAVE_ARGS 8,1
  199. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  200. movq %rcx,RIP-ARGOFFSET(%rsp)
  201. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  202. GET_THREAD_INFO(%rcx)
  203. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  204. CFI_REMEMBER_STATE
  205. jnz tracesys
  206. cmpq $__NR_syscall_max,%rax
  207. ja badsys
  208. movq %r10,%rcx
  209. call *sys_call_table(,%rax,8) # XXX: rip relative
  210. movq %rax,RAX-ARGOFFSET(%rsp)
  211. /*
  212. * Syscall return path ending with SYSRET (fast path)
  213. * Has incomplete stack frame and undefined top of stack.
  214. */
  215. .globl ret_from_sys_call
  216. ret_from_sys_call:
  217. movl $_TIF_ALLWORK_MASK,%edi
  218. /* edi: flagmask */
  219. sysret_check:
  220. GET_THREAD_INFO(%rcx)
  221. cli
  222. TRACE_IRQS_OFF
  223. movl threadinfo_flags(%rcx),%edx
  224. andl %edi,%edx
  225. CFI_REMEMBER_STATE
  226. jnz sysret_careful
  227. /*
  228. * sysretq will re-enable interrupts:
  229. */
  230. TRACE_IRQS_ON
  231. movq RIP-ARGOFFSET(%rsp),%rcx
  232. CFI_REGISTER rip,rcx
  233. RESTORE_ARGS 0,-ARG_SKIP,1
  234. /*CFI_REGISTER rflags,r11*/
  235. movq %gs:pda_oldrsp,%rsp
  236. swapgs
  237. sysretq
  238. /* Handle reschedules */
  239. /* edx: work, edi: workmask */
  240. sysret_careful:
  241. CFI_RESTORE_STATE
  242. bt $TIF_NEED_RESCHED,%edx
  243. jnc sysret_signal
  244. TRACE_IRQS_ON
  245. sti
  246. pushq %rdi
  247. CFI_ADJUST_CFA_OFFSET 8
  248. call schedule
  249. popq %rdi
  250. CFI_ADJUST_CFA_OFFSET -8
  251. jmp sysret_check
  252. /* Handle a signal */
  253. sysret_signal:
  254. TRACE_IRQS_ON
  255. sti
  256. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  257. jz 1f
  258. /* Really a signal */
  259. /* edx: work flags (arg3) */
  260. leaq do_notify_resume(%rip),%rax
  261. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  262. xorl %esi,%esi # oldset -> arg2
  263. call ptregscall_common
  264. 1: movl $_TIF_NEED_RESCHED,%edi
  265. /* Use IRET because user could have changed frame. This
  266. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  267. cli
  268. TRACE_IRQS_OFF
  269. jmp int_with_check
  270. badsys:
  271. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  272. jmp ret_from_sys_call
  273. /* Do syscall tracing */
  274. tracesys:
  275. CFI_RESTORE_STATE
  276. SAVE_REST
  277. movq $-ENOSYS,RAX(%rsp)
  278. FIXUP_TOP_OF_STACK %rdi
  279. movq %rsp,%rdi
  280. call syscall_trace_enter
  281. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  282. RESTORE_REST
  283. cmpq $__NR_syscall_max,%rax
  284. ja 1f
  285. movq %r10,%rcx /* fixup for C */
  286. call *sys_call_table(,%rax,8)
  287. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  288. /* Use IRET because user could have changed frame */
  289. jmp int_ret_from_sys_call
  290. CFI_ENDPROC
  291. END(system_call)
  292. /*
  293. * Syscall return path ending with IRET.
  294. * Has correct top of stack, but partial stack frame.
  295. */
  296. ENTRY(int_ret_from_sys_call)
  297. CFI_STARTPROC simple
  298. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  299. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  300. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  301. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  302. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  303. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  304. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  305. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  306. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  307. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  308. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  309. CFI_REL_OFFSET r8,R8-ARGOFFSET
  310. CFI_REL_OFFSET r9,R9-ARGOFFSET
  311. CFI_REL_OFFSET r10,R10-ARGOFFSET
  312. CFI_REL_OFFSET r11,R11-ARGOFFSET
  313. cli
  314. TRACE_IRQS_OFF
  315. testl $3,CS-ARGOFFSET(%rsp)
  316. je retint_restore_args
  317. movl $_TIF_ALLWORK_MASK,%edi
  318. /* edi: mask to check */
  319. int_with_check:
  320. GET_THREAD_INFO(%rcx)
  321. movl threadinfo_flags(%rcx),%edx
  322. andl %edi,%edx
  323. jnz int_careful
  324. andl $~TS_COMPAT,threadinfo_status(%rcx)
  325. jmp retint_swapgs
  326. /* Either reschedule or signal or syscall exit tracking needed. */
  327. /* First do a reschedule test. */
  328. /* edx: work, edi: workmask */
  329. int_careful:
  330. bt $TIF_NEED_RESCHED,%edx
  331. jnc int_very_careful
  332. TRACE_IRQS_ON
  333. sti
  334. pushq %rdi
  335. CFI_ADJUST_CFA_OFFSET 8
  336. call schedule
  337. popq %rdi
  338. CFI_ADJUST_CFA_OFFSET -8
  339. cli
  340. TRACE_IRQS_OFF
  341. jmp int_with_check
  342. /* handle signals and tracing -- both require a full stack frame */
  343. int_very_careful:
  344. TRACE_IRQS_ON
  345. sti
  346. SAVE_REST
  347. /* Check for syscall exit trace */
  348. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  349. jz int_signal
  350. pushq %rdi
  351. CFI_ADJUST_CFA_OFFSET 8
  352. leaq 8(%rsp),%rdi # &ptregs -> arg1
  353. call syscall_trace_leave
  354. popq %rdi
  355. CFI_ADJUST_CFA_OFFSET -8
  356. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  357. cli
  358. TRACE_IRQS_OFF
  359. jmp int_restore_rest
  360. int_signal:
  361. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  362. jz 1f
  363. movq %rsp,%rdi # &ptregs -> arg1
  364. xorl %esi,%esi # oldset -> arg2
  365. call do_notify_resume
  366. 1: movl $_TIF_NEED_RESCHED,%edi
  367. int_restore_rest:
  368. RESTORE_REST
  369. cli
  370. TRACE_IRQS_OFF
  371. jmp int_with_check
  372. CFI_ENDPROC
  373. END(int_ret_from_sys_call)
  374. /*
  375. * Certain special system calls that need to save a complete full stack frame.
  376. */
  377. .macro PTREGSCALL label,func,arg
  378. .globl \label
  379. \label:
  380. leaq \func(%rip),%rax
  381. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  382. jmp ptregscall_common
  383. END(\label)
  384. .endm
  385. CFI_STARTPROC
  386. PTREGSCALL stub_clone, sys_clone, %r8
  387. PTREGSCALL stub_fork, sys_fork, %rdi
  388. PTREGSCALL stub_vfork, sys_vfork, %rdi
  389. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  390. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  391. PTREGSCALL stub_iopl, sys_iopl, %rsi
  392. ENTRY(ptregscall_common)
  393. popq %r11
  394. CFI_ADJUST_CFA_OFFSET -8
  395. CFI_REGISTER rip, r11
  396. SAVE_REST
  397. movq %r11, %r15
  398. CFI_REGISTER rip, r15
  399. FIXUP_TOP_OF_STACK %r11
  400. call *%rax
  401. RESTORE_TOP_OF_STACK %r11
  402. movq %r15, %r11
  403. CFI_REGISTER rip, r11
  404. RESTORE_REST
  405. pushq %r11
  406. CFI_ADJUST_CFA_OFFSET 8
  407. CFI_REL_OFFSET rip, 0
  408. ret
  409. CFI_ENDPROC
  410. END(ptregscall_common)
  411. ENTRY(stub_execve)
  412. CFI_STARTPROC
  413. popq %r11
  414. CFI_ADJUST_CFA_OFFSET -8
  415. CFI_REGISTER rip, r11
  416. SAVE_REST
  417. FIXUP_TOP_OF_STACK %r11
  418. call sys_execve
  419. RESTORE_TOP_OF_STACK %r11
  420. movq %rax,RAX(%rsp)
  421. RESTORE_REST
  422. jmp int_ret_from_sys_call
  423. CFI_ENDPROC
  424. END(stub_execve)
  425. /*
  426. * sigreturn is special because it needs to restore all registers on return.
  427. * This cannot be done with SYSRET, so use the IRET return path instead.
  428. */
  429. ENTRY(stub_rt_sigreturn)
  430. CFI_STARTPROC
  431. addq $8, %rsp
  432. CFI_ADJUST_CFA_OFFSET -8
  433. SAVE_REST
  434. movq %rsp,%rdi
  435. FIXUP_TOP_OF_STACK %r11
  436. call sys_rt_sigreturn
  437. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  438. RESTORE_REST
  439. jmp int_ret_from_sys_call
  440. CFI_ENDPROC
  441. END(stub_rt_sigreturn)
  442. /*
  443. * initial frame state for interrupts and exceptions
  444. */
  445. .macro _frame ref
  446. CFI_STARTPROC simple
  447. CFI_DEF_CFA rsp,SS+8-\ref
  448. /*CFI_REL_OFFSET ss,SS-\ref*/
  449. CFI_REL_OFFSET rsp,RSP-\ref
  450. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  451. /*CFI_REL_OFFSET cs,CS-\ref*/
  452. CFI_REL_OFFSET rip,RIP-\ref
  453. .endm
  454. /* initial frame state for interrupts (and exceptions without error code) */
  455. #define INTR_FRAME _frame RIP
  456. /* initial frame state for exceptions with error code (and interrupts with
  457. vector already pushed) */
  458. #define XCPT_FRAME _frame ORIG_RAX
  459. /*
  460. * Interrupt entry/exit.
  461. *
  462. * Interrupt entry points save only callee clobbered registers in fast path.
  463. *
  464. * Entry runs with interrupts off.
  465. */
  466. /* 0(%rsp): interrupt number */
  467. .macro interrupt func
  468. cld
  469. SAVE_ARGS
  470. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  471. pushq %rbp
  472. CFI_ADJUST_CFA_OFFSET 8
  473. CFI_REL_OFFSET rbp, 0
  474. movq %rsp,%rbp
  475. CFI_DEF_CFA_REGISTER rbp
  476. testl $3,CS(%rdi)
  477. je 1f
  478. swapgs
  479. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  480. cmoveq %gs:pda_irqstackptr,%rsp
  481. /*
  482. * We entered an interrupt context - irqs are off:
  483. */
  484. TRACE_IRQS_OFF
  485. call \func
  486. .endm
  487. ENTRY(common_interrupt)
  488. XCPT_FRAME
  489. interrupt do_IRQ
  490. /* 0(%rsp): oldrsp-ARGOFFSET */
  491. ret_from_intr:
  492. cli
  493. TRACE_IRQS_OFF
  494. decl %gs:pda_irqcount
  495. leaveq
  496. CFI_DEF_CFA_REGISTER rsp
  497. CFI_ADJUST_CFA_OFFSET -8
  498. exit_intr:
  499. GET_THREAD_INFO(%rcx)
  500. testl $3,CS-ARGOFFSET(%rsp)
  501. je retint_kernel
  502. /* Interrupt came from user space */
  503. /*
  504. * Has a correct top of stack, but a partial stack frame
  505. * %rcx: thread info. Interrupts off.
  506. */
  507. retint_with_reschedule:
  508. movl $_TIF_WORK_MASK,%edi
  509. retint_check:
  510. movl threadinfo_flags(%rcx),%edx
  511. andl %edi,%edx
  512. CFI_REMEMBER_STATE
  513. jnz retint_careful
  514. retint_swapgs:
  515. /*
  516. * The iretq could re-enable interrupts:
  517. */
  518. cli
  519. TRACE_IRQS_IRETQ
  520. swapgs
  521. jmp restore_args
  522. retint_restore_args:
  523. cli
  524. /*
  525. * The iretq could re-enable interrupts:
  526. */
  527. TRACE_IRQS_IRETQ
  528. restore_args:
  529. RESTORE_ARGS 0,8,0
  530. iret_label:
  531. iretq
  532. .section __ex_table,"a"
  533. .quad iret_label,bad_iret
  534. .previous
  535. .section .fixup,"ax"
  536. /* force a signal here? this matches i386 behaviour */
  537. /* running with kernel gs */
  538. bad_iret:
  539. movq $11,%rdi /* SIGSEGV */
  540. TRACE_IRQS_ON
  541. sti
  542. jmp do_exit
  543. .previous
  544. /* edi: workmask, edx: work */
  545. retint_careful:
  546. CFI_RESTORE_STATE
  547. bt $TIF_NEED_RESCHED,%edx
  548. jnc retint_signal
  549. TRACE_IRQS_ON
  550. sti
  551. pushq %rdi
  552. CFI_ADJUST_CFA_OFFSET 8
  553. call schedule
  554. popq %rdi
  555. CFI_ADJUST_CFA_OFFSET -8
  556. GET_THREAD_INFO(%rcx)
  557. cli
  558. TRACE_IRQS_OFF
  559. jmp retint_check
  560. retint_signal:
  561. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  562. jz retint_swapgs
  563. TRACE_IRQS_ON
  564. sti
  565. SAVE_REST
  566. movq $-1,ORIG_RAX(%rsp)
  567. xorl %esi,%esi # oldset
  568. movq %rsp,%rdi # &pt_regs
  569. call do_notify_resume
  570. RESTORE_REST
  571. cli
  572. TRACE_IRQS_OFF
  573. movl $_TIF_NEED_RESCHED,%edi
  574. GET_THREAD_INFO(%rcx)
  575. jmp retint_check
  576. #ifdef CONFIG_PREEMPT
  577. /* Returning to kernel space. Check if we need preemption */
  578. /* rcx: threadinfo. interrupts off. */
  579. .p2align
  580. retint_kernel:
  581. cmpl $0,threadinfo_preempt_count(%rcx)
  582. jnz retint_restore_args
  583. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  584. jnc retint_restore_args
  585. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  586. jnc retint_restore_args
  587. call preempt_schedule_irq
  588. jmp exit_intr
  589. #endif
  590. CFI_ENDPROC
  591. END(common_interrupt)
  592. /*
  593. * APIC interrupts.
  594. */
  595. .macro apicinterrupt num,func
  596. INTR_FRAME
  597. pushq $~(\num)
  598. CFI_ADJUST_CFA_OFFSET 8
  599. interrupt \func
  600. jmp ret_from_intr
  601. CFI_ENDPROC
  602. .endm
  603. ENTRY(thermal_interrupt)
  604. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  605. END(thermal_interrupt)
  606. ENTRY(threshold_interrupt)
  607. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  608. END(threshold_interrupt)
  609. #ifdef CONFIG_SMP
  610. ENTRY(reschedule_interrupt)
  611. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  612. END(reschedule_interrupt)
  613. .macro INVALIDATE_ENTRY num
  614. ENTRY(invalidate_interrupt\num)
  615. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  616. END(invalidate_interrupt\num)
  617. .endm
  618. INVALIDATE_ENTRY 0
  619. INVALIDATE_ENTRY 1
  620. INVALIDATE_ENTRY 2
  621. INVALIDATE_ENTRY 3
  622. INVALIDATE_ENTRY 4
  623. INVALIDATE_ENTRY 5
  624. INVALIDATE_ENTRY 6
  625. INVALIDATE_ENTRY 7
  626. ENTRY(call_function_interrupt)
  627. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  628. END(call_function_interrupt)
  629. #endif
  630. #ifdef CONFIG_X86_LOCAL_APIC
  631. ENTRY(apic_timer_interrupt)
  632. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  633. END(apic_timer_interrupt)
  634. ENTRY(error_interrupt)
  635. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  636. END(error_interrupt)
  637. ENTRY(spurious_interrupt)
  638. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  639. END(spurious_interrupt)
  640. #endif
  641. /*
  642. * Exception entry points.
  643. */
  644. .macro zeroentry sym
  645. INTR_FRAME
  646. pushq $0 /* push error code/oldrax */
  647. CFI_ADJUST_CFA_OFFSET 8
  648. pushq %rax /* push real oldrax to the rdi slot */
  649. CFI_ADJUST_CFA_OFFSET 8
  650. leaq \sym(%rip),%rax
  651. jmp error_entry
  652. CFI_ENDPROC
  653. .endm
  654. .macro errorentry sym
  655. XCPT_FRAME
  656. pushq %rax
  657. CFI_ADJUST_CFA_OFFSET 8
  658. leaq \sym(%rip),%rax
  659. jmp error_entry
  660. CFI_ENDPROC
  661. .endm
  662. /* error code is on the stack already */
  663. /* handle NMI like exceptions that can happen everywhere */
  664. .macro paranoidentry sym, ist=0, irqtrace=1
  665. SAVE_ALL
  666. cld
  667. movl $1,%ebx
  668. movl $MSR_GS_BASE,%ecx
  669. rdmsr
  670. testl %edx,%edx
  671. js 1f
  672. swapgs
  673. xorl %ebx,%ebx
  674. 1:
  675. .if \ist
  676. movq %gs:pda_data_offset, %rbp
  677. .endif
  678. movq %rsp,%rdi
  679. movq ORIG_RAX(%rsp),%rsi
  680. movq $-1,ORIG_RAX(%rsp)
  681. .if \ist
  682. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  683. .endif
  684. call \sym
  685. .if \ist
  686. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  687. .endif
  688. cli
  689. .if \irqtrace
  690. TRACE_IRQS_OFF
  691. .endif
  692. .endm
  693. /*
  694. * "Paranoid" exit path from exception stack.
  695. * Paranoid because this is used by NMIs and cannot take
  696. * any kernel state for granted.
  697. * We don't do kernel preemption checks here, because only
  698. * NMI should be common and it does not enable IRQs and
  699. * cannot get reschedule ticks.
  700. *
  701. * "trace" is 0 for the NMI handler only, because irq-tracing
  702. * is fundamentally NMI-unsafe. (we cannot change the soft and
  703. * hard flags at once, atomically)
  704. */
  705. .macro paranoidexit trace=1
  706. /* ebx: no swapgs flag */
  707. paranoid_exit\trace:
  708. testl %ebx,%ebx /* swapgs needed? */
  709. jnz paranoid_restore\trace
  710. testl $3,CS(%rsp)
  711. jnz paranoid_userspace\trace
  712. paranoid_swapgs\trace:
  713. TRACE_IRQS_IRETQ 0
  714. swapgs
  715. paranoid_restore\trace:
  716. RESTORE_ALL 8
  717. iretq
  718. paranoid_userspace\trace:
  719. GET_THREAD_INFO(%rcx)
  720. movl threadinfo_flags(%rcx),%ebx
  721. andl $_TIF_WORK_MASK,%ebx
  722. jz paranoid_swapgs\trace
  723. movq %rsp,%rdi /* &pt_regs */
  724. call sync_regs
  725. movq %rax,%rsp /* switch stack for scheduling */
  726. testl $_TIF_NEED_RESCHED,%ebx
  727. jnz paranoid_schedule\trace
  728. movl %ebx,%edx /* arg3: thread flags */
  729. .if \trace
  730. TRACE_IRQS_ON
  731. .endif
  732. sti
  733. xorl %esi,%esi /* arg2: oldset */
  734. movq %rsp,%rdi /* arg1: &pt_regs */
  735. call do_notify_resume
  736. cli
  737. .if \trace
  738. TRACE_IRQS_OFF
  739. .endif
  740. jmp paranoid_userspace\trace
  741. paranoid_schedule\trace:
  742. .if \trace
  743. TRACE_IRQS_ON
  744. .endif
  745. sti
  746. call schedule
  747. cli
  748. .if \trace
  749. TRACE_IRQS_OFF
  750. .endif
  751. jmp paranoid_userspace\trace
  752. CFI_ENDPROC
  753. .endm
  754. /*
  755. * Exception entry point. This expects an error code/orig_rax on the stack
  756. * and the exception handler in %rax.
  757. */
  758. ENTRY(error_entry)
  759. _frame RDI
  760. /* rdi slot contains rax, oldrax contains error code */
  761. cld
  762. subq $14*8,%rsp
  763. CFI_ADJUST_CFA_OFFSET (14*8)
  764. movq %rsi,13*8(%rsp)
  765. CFI_REL_OFFSET rsi,RSI
  766. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  767. movq %rdx,12*8(%rsp)
  768. CFI_REL_OFFSET rdx,RDX
  769. movq %rcx,11*8(%rsp)
  770. CFI_REL_OFFSET rcx,RCX
  771. movq %rsi,10*8(%rsp) /* store rax */
  772. CFI_REL_OFFSET rax,RAX
  773. movq %r8, 9*8(%rsp)
  774. CFI_REL_OFFSET r8,R8
  775. movq %r9, 8*8(%rsp)
  776. CFI_REL_OFFSET r9,R9
  777. movq %r10,7*8(%rsp)
  778. CFI_REL_OFFSET r10,R10
  779. movq %r11,6*8(%rsp)
  780. CFI_REL_OFFSET r11,R11
  781. movq %rbx,5*8(%rsp)
  782. CFI_REL_OFFSET rbx,RBX
  783. movq %rbp,4*8(%rsp)
  784. CFI_REL_OFFSET rbp,RBP
  785. movq %r12,3*8(%rsp)
  786. CFI_REL_OFFSET r12,R12
  787. movq %r13,2*8(%rsp)
  788. CFI_REL_OFFSET r13,R13
  789. movq %r14,1*8(%rsp)
  790. CFI_REL_OFFSET r14,R14
  791. movq %r15,(%rsp)
  792. CFI_REL_OFFSET r15,R15
  793. xorl %ebx,%ebx
  794. testl $3,CS(%rsp)
  795. je error_kernelspace
  796. error_swapgs:
  797. swapgs
  798. error_sti:
  799. movq %rdi,RDI(%rsp)
  800. movq %rsp,%rdi
  801. movq ORIG_RAX(%rsp),%rsi /* get error code */
  802. movq $-1,ORIG_RAX(%rsp)
  803. call *%rax
  804. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  805. error_exit:
  806. movl %ebx,%eax
  807. RESTORE_REST
  808. cli
  809. TRACE_IRQS_OFF
  810. GET_THREAD_INFO(%rcx)
  811. testl %eax,%eax
  812. jne retint_kernel
  813. movl threadinfo_flags(%rcx),%edx
  814. movl $_TIF_WORK_MASK,%edi
  815. andl %edi,%edx
  816. jnz retint_careful
  817. /*
  818. * The iret might restore flags:
  819. */
  820. TRACE_IRQS_IRETQ
  821. swapgs
  822. RESTORE_ARGS 0,8,0
  823. jmp iret_label
  824. CFI_ENDPROC
  825. error_kernelspace:
  826. incl %ebx
  827. /* There are two places in the kernel that can potentially fault with
  828. usergs. Handle them here. The exception handlers after
  829. iret run with kernel gs again, so don't set the user space flag.
  830. B stepping K8s sometimes report an truncated RIP for IRET
  831. exceptions returning to compat mode. Check for these here too. */
  832. leaq iret_label(%rip),%rbp
  833. cmpq %rbp,RIP(%rsp)
  834. je error_swapgs
  835. movl %ebp,%ebp /* zero extend */
  836. cmpq %rbp,RIP(%rsp)
  837. je error_swapgs
  838. cmpq $gs_change,RIP(%rsp)
  839. je error_swapgs
  840. jmp error_sti
  841. END(error_entry)
  842. /* Reload gs selector with exception handling */
  843. /* edi: new selector */
  844. ENTRY(load_gs_index)
  845. CFI_STARTPROC
  846. pushf
  847. CFI_ADJUST_CFA_OFFSET 8
  848. cli
  849. swapgs
  850. gs_change:
  851. movl %edi,%gs
  852. 2: mfence /* workaround */
  853. swapgs
  854. popf
  855. CFI_ADJUST_CFA_OFFSET -8
  856. ret
  857. CFI_ENDPROC
  858. ENDPROC(load_gs_index)
  859. .section __ex_table,"a"
  860. .align 8
  861. .quad gs_change,bad_gs
  862. .previous
  863. .section .fixup,"ax"
  864. /* running with kernelgs */
  865. bad_gs:
  866. swapgs /* switch back to user gs */
  867. xorl %eax,%eax
  868. movl %eax,%gs
  869. jmp 2b
  870. .previous
  871. /*
  872. * Create a kernel thread.
  873. *
  874. * C extern interface:
  875. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  876. *
  877. * asm input arguments:
  878. * rdi: fn, rsi: arg, rdx: flags
  879. */
  880. ENTRY(kernel_thread)
  881. CFI_STARTPROC
  882. FAKE_STACK_FRAME $child_rip
  883. SAVE_ALL
  884. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  885. movq %rdx,%rdi
  886. orq kernel_thread_flags(%rip),%rdi
  887. movq $-1, %rsi
  888. movq %rsp, %rdx
  889. xorl %r8d,%r8d
  890. xorl %r9d,%r9d
  891. # clone now
  892. call do_fork
  893. movq %rax,RAX(%rsp)
  894. xorl %edi,%edi
  895. /*
  896. * It isn't worth to check for reschedule here,
  897. * so internally to the x86_64 port you can rely on kernel_thread()
  898. * not to reschedule the child before returning, this avoids the need
  899. * of hacks for example to fork off the per-CPU idle tasks.
  900. * [Hopefully no generic code relies on the reschedule -AK]
  901. */
  902. RESTORE_ALL
  903. UNFAKE_STACK_FRAME
  904. ret
  905. CFI_ENDPROC
  906. ENDPROC(kernel_thread)
  907. child_rip:
  908. /*
  909. * Here we are in the child and the registers are set as they were
  910. * at kernel_thread() invocation in the parent.
  911. */
  912. movq %rdi, %rax
  913. movq %rsi, %rdi
  914. call *%rax
  915. # exit
  916. xorl %edi, %edi
  917. call do_exit
  918. ENDPROC(child_rip)
  919. /*
  920. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  921. *
  922. * C extern interface:
  923. * extern long execve(char *name, char **argv, char **envp)
  924. *
  925. * asm input arguments:
  926. * rdi: name, rsi: argv, rdx: envp
  927. *
  928. * We want to fallback into:
  929. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  930. *
  931. * do_sys_execve asm fallback arguments:
  932. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  933. */
  934. ENTRY(execve)
  935. CFI_STARTPROC
  936. FAKE_STACK_FRAME $0
  937. SAVE_ALL
  938. call sys_execve
  939. movq %rax, RAX(%rsp)
  940. RESTORE_REST
  941. testq %rax,%rax
  942. je int_ret_from_sys_call
  943. RESTORE_ARGS
  944. UNFAKE_STACK_FRAME
  945. ret
  946. CFI_ENDPROC
  947. ENDPROC(execve)
  948. KPROBE_ENTRY(page_fault)
  949. errorentry do_page_fault
  950. END(page_fault)
  951. .previous .text
  952. ENTRY(coprocessor_error)
  953. zeroentry do_coprocessor_error
  954. END(coprocessor_error)
  955. ENTRY(simd_coprocessor_error)
  956. zeroentry do_simd_coprocessor_error
  957. END(simd_coprocessor_error)
  958. ENTRY(device_not_available)
  959. zeroentry math_state_restore
  960. END(device_not_available)
  961. /* runs on exception stack */
  962. KPROBE_ENTRY(debug)
  963. INTR_FRAME
  964. pushq $0
  965. CFI_ADJUST_CFA_OFFSET 8
  966. paranoidentry do_debug, DEBUG_STACK
  967. paranoidexit
  968. END(debug)
  969. .previous .text
  970. /* runs on exception stack */
  971. KPROBE_ENTRY(nmi)
  972. INTR_FRAME
  973. pushq $-1
  974. CFI_ADJUST_CFA_OFFSET 8
  975. paranoidentry do_nmi, 0, 0
  976. #ifdef CONFIG_TRACE_IRQFLAGS
  977. paranoidexit 0
  978. #else
  979. jmp paranoid_exit1
  980. CFI_ENDPROC
  981. #endif
  982. END(nmi)
  983. .previous .text
  984. KPROBE_ENTRY(int3)
  985. INTR_FRAME
  986. pushq $0
  987. CFI_ADJUST_CFA_OFFSET 8
  988. paranoidentry do_int3, DEBUG_STACK
  989. jmp paranoid_exit1
  990. CFI_ENDPROC
  991. END(int3)
  992. .previous .text
  993. ENTRY(overflow)
  994. zeroentry do_overflow
  995. END(overflow)
  996. ENTRY(bounds)
  997. zeroentry do_bounds
  998. END(bounds)
  999. ENTRY(invalid_op)
  1000. zeroentry do_invalid_op
  1001. END(invalid_op)
  1002. ENTRY(coprocessor_segment_overrun)
  1003. zeroentry do_coprocessor_segment_overrun
  1004. END(coprocessor_segment_overrun)
  1005. ENTRY(reserved)
  1006. zeroentry do_reserved
  1007. END(reserved)
  1008. /* runs on exception stack */
  1009. ENTRY(double_fault)
  1010. XCPT_FRAME
  1011. paranoidentry do_double_fault
  1012. jmp paranoid_exit1
  1013. CFI_ENDPROC
  1014. END(double_fault)
  1015. ENTRY(invalid_TSS)
  1016. errorentry do_invalid_TSS
  1017. END(invalid_TSS)
  1018. ENTRY(segment_not_present)
  1019. errorentry do_segment_not_present
  1020. END(segment_not_present)
  1021. /* runs on exception stack */
  1022. ENTRY(stack_segment)
  1023. XCPT_FRAME
  1024. paranoidentry do_stack_segment
  1025. jmp paranoid_exit1
  1026. CFI_ENDPROC
  1027. END(stack_segment)
  1028. KPROBE_ENTRY(general_protection)
  1029. errorentry do_general_protection
  1030. END(general_protection)
  1031. .previous .text
  1032. ENTRY(alignment_check)
  1033. errorentry do_alignment_check
  1034. END(alignment_check)
  1035. ENTRY(divide_error)
  1036. zeroentry do_divide_error
  1037. END(divide_error)
  1038. ENTRY(spurious_interrupt_bug)
  1039. zeroentry do_spurious_interrupt_bug
  1040. END(spurious_interrupt_bug)
  1041. #ifdef CONFIG_X86_MCE
  1042. /* runs on exception stack */
  1043. ENTRY(machine_check)
  1044. INTR_FRAME
  1045. pushq $0
  1046. CFI_ADJUST_CFA_OFFSET 8
  1047. paranoidentry do_machine_check
  1048. jmp paranoid_exit1
  1049. CFI_ENDPROC
  1050. END(machine_check)
  1051. #endif
  1052. ENTRY(call_softirq)
  1053. CFI_STARTPROC
  1054. movq %gs:pda_irqstackptr,%rax
  1055. movq %rsp,%rdx
  1056. CFI_DEF_CFA_REGISTER rdx
  1057. incl %gs:pda_irqcount
  1058. cmove %rax,%rsp
  1059. pushq %rdx
  1060. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  1061. call __do_softirq
  1062. popq %rsp
  1063. CFI_DEF_CFA_REGISTER rsp
  1064. decl %gs:pda_irqcount
  1065. ret
  1066. CFI_ENDPROC
  1067. ENDPROC(call_softirq)
  1068. #ifdef CONFIG_STACK_UNWIND
  1069. ENTRY(arch_unwind_init_running)
  1070. CFI_STARTPROC
  1071. movq %r15, R15(%rdi)
  1072. movq %r14, R14(%rdi)
  1073. xchgq %rsi, %rdx
  1074. movq %r13, R13(%rdi)
  1075. movq %r12, R12(%rdi)
  1076. xorl %eax, %eax
  1077. movq %rbp, RBP(%rdi)
  1078. movq %rbx, RBX(%rdi)
  1079. movq (%rsp), %rcx
  1080. movq %rax, R11(%rdi)
  1081. movq %rax, R10(%rdi)
  1082. movq %rax, R9(%rdi)
  1083. movq %rax, R8(%rdi)
  1084. movq %rax, RAX(%rdi)
  1085. movq %rax, RCX(%rdi)
  1086. movq %rax, RDX(%rdi)
  1087. movq %rax, RSI(%rdi)
  1088. movq %rax, RDI(%rdi)
  1089. movq %rax, ORIG_RAX(%rdi)
  1090. movq %rcx, RIP(%rdi)
  1091. leaq 8(%rsp), %rcx
  1092. movq $__KERNEL_CS, CS(%rdi)
  1093. movq %rax, EFLAGS(%rdi)
  1094. movq %rcx, RSP(%rdi)
  1095. movq $__KERNEL_DS, SS(%rdi)
  1096. jmpq *%rdx
  1097. CFI_ENDPROC
  1098. ENDPROC(arch_unwind_init_running)
  1099. #endif