entry.S 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * XXX if we had a free scratch register we could save the RSP into the stack frame
  170. * and report it properly in ps. Unfortunately we haven't.
  171. */
  172. ENTRY(system_call)
  173. CFI_STARTPROC simple
  174. CFI_DEF_CFA rsp,0
  175. CFI_REGISTER rip,rcx
  176. /*CFI_REGISTER rflags,r11*/
  177. swapgs
  178. movq %rsp,%gs:pda_oldrsp
  179. movq %gs:pda_kernelstack,%rsp
  180. sti
  181. SAVE_ARGS 8,1
  182. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  183. movq %rcx,RIP-ARGOFFSET(%rsp)
  184. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  185. GET_THREAD_INFO(%rcx)
  186. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  187. CFI_REMEMBER_STATE
  188. jnz tracesys
  189. cmpq $__NR_syscall_max,%rax
  190. ja badsys
  191. movq %r10,%rcx
  192. call *sys_call_table(,%rax,8) # XXX: rip relative
  193. movq %rax,RAX-ARGOFFSET(%rsp)
  194. /*
  195. * Syscall return path ending with SYSRET (fast path)
  196. * Has incomplete stack frame and undefined top of stack.
  197. */
  198. .globl ret_from_sys_call
  199. ret_from_sys_call:
  200. movl $_TIF_ALLWORK_MASK,%edi
  201. /* edi: flagmask */
  202. sysret_check:
  203. GET_THREAD_INFO(%rcx)
  204. cli
  205. movl threadinfo_flags(%rcx),%edx
  206. andl %edi,%edx
  207. CFI_REMEMBER_STATE
  208. jnz sysret_careful
  209. movq RIP-ARGOFFSET(%rsp),%rcx
  210. CFI_REGISTER rip,rcx
  211. RESTORE_ARGS 0,-ARG_SKIP,1
  212. /*CFI_REGISTER rflags,r11*/
  213. movq %gs:pda_oldrsp,%rsp
  214. swapgs
  215. sysretq
  216. /* Handle reschedules */
  217. /* edx: work, edi: workmask */
  218. sysret_careful:
  219. CFI_RESTORE_STATE
  220. bt $TIF_NEED_RESCHED,%edx
  221. jnc sysret_signal
  222. sti
  223. pushq %rdi
  224. CFI_ADJUST_CFA_OFFSET 8
  225. call schedule
  226. popq %rdi
  227. CFI_ADJUST_CFA_OFFSET -8
  228. jmp sysret_check
  229. /* Handle a signal */
  230. sysret_signal:
  231. sti
  232. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  233. jz 1f
  234. /* Really a signal */
  235. /* edx: work flags (arg3) */
  236. leaq do_notify_resume(%rip),%rax
  237. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  238. xorl %esi,%esi # oldset -> arg2
  239. call ptregscall_common
  240. 1: movl $_TIF_NEED_RESCHED,%edi
  241. jmp sysret_check
  242. badsys:
  243. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  244. jmp ret_from_sys_call
  245. /* Do syscall tracing */
  246. tracesys:
  247. CFI_RESTORE_STATE
  248. SAVE_REST
  249. movq $-ENOSYS,RAX(%rsp)
  250. FIXUP_TOP_OF_STACK %rdi
  251. movq %rsp,%rdi
  252. call syscall_trace_enter
  253. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  254. RESTORE_REST
  255. cmpq $__NR_syscall_max,%rax
  256. ja 1f
  257. movq %r10,%rcx /* fixup for C */
  258. call *sys_call_table(,%rax,8)
  259. movq %rax,RAX-ARGOFFSET(%rsp)
  260. 1: SAVE_REST
  261. movq %rsp,%rdi
  262. call syscall_trace_leave
  263. RESTORE_TOP_OF_STACK %rbx
  264. RESTORE_REST
  265. jmp ret_from_sys_call
  266. CFI_ENDPROC
  267. /*
  268. * Syscall return path ending with IRET.
  269. * Has correct top of stack, but partial stack frame.
  270. */
  271. ENTRY(int_ret_from_sys_call)
  272. CFI_STARTPROC simple
  273. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  274. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  275. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  276. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  277. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  278. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  279. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  280. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  281. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  282. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  283. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  284. CFI_REL_OFFSET r8,R8-ARGOFFSET
  285. CFI_REL_OFFSET r9,R9-ARGOFFSET
  286. CFI_REL_OFFSET r10,R10-ARGOFFSET
  287. CFI_REL_OFFSET r11,R11-ARGOFFSET
  288. cli
  289. testl $3,CS-ARGOFFSET(%rsp)
  290. je retint_restore_args
  291. movl $_TIF_ALLWORK_MASK,%edi
  292. /* edi: mask to check */
  293. int_with_check:
  294. GET_THREAD_INFO(%rcx)
  295. movl threadinfo_flags(%rcx),%edx
  296. andl %edi,%edx
  297. jnz int_careful
  298. andl $~TS_COMPAT,threadinfo_status(%rcx)
  299. jmp retint_swapgs
  300. /* Either reschedule or signal or syscall exit tracking needed. */
  301. /* First do a reschedule test. */
  302. /* edx: work, edi: workmask */
  303. int_careful:
  304. bt $TIF_NEED_RESCHED,%edx
  305. jnc int_very_careful
  306. sti
  307. pushq %rdi
  308. CFI_ADJUST_CFA_OFFSET 8
  309. call schedule
  310. popq %rdi
  311. CFI_ADJUST_CFA_OFFSET -8
  312. cli
  313. jmp int_with_check
  314. /* handle signals and tracing -- both require a full stack frame */
  315. int_very_careful:
  316. sti
  317. SAVE_REST
  318. /* Check for syscall exit trace */
  319. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  320. jz int_signal
  321. pushq %rdi
  322. CFI_ADJUST_CFA_OFFSET 8
  323. leaq 8(%rsp),%rdi # &ptregs -> arg1
  324. call syscall_trace_leave
  325. popq %rdi
  326. CFI_ADJUST_CFA_OFFSET -8
  327. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  328. cli
  329. jmp int_restore_rest
  330. int_signal:
  331. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  332. jz 1f
  333. movq %rsp,%rdi # &ptregs -> arg1
  334. xorl %esi,%esi # oldset -> arg2
  335. call do_notify_resume
  336. 1: movl $_TIF_NEED_RESCHED,%edi
  337. int_restore_rest:
  338. RESTORE_REST
  339. cli
  340. jmp int_with_check
  341. CFI_ENDPROC
  342. /*
  343. * Certain special system calls that need to save a complete full stack frame.
  344. */
  345. .macro PTREGSCALL label,func,arg
  346. .globl \label
  347. \label:
  348. leaq \func(%rip),%rax
  349. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  350. jmp ptregscall_common
  351. .endm
  352. CFI_STARTPROC
  353. PTREGSCALL stub_clone, sys_clone, %r8
  354. PTREGSCALL stub_fork, sys_fork, %rdi
  355. PTREGSCALL stub_vfork, sys_vfork, %rdi
  356. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  357. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  358. PTREGSCALL stub_iopl, sys_iopl, %rsi
  359. ENTRY(ptregscall_common)
  360. popq %r11
  361. CFI_ADJUST_CFA_OFFSET -8
  362. CFI_REGISTER rip, r11
  363. SAVE_REST
  364. movq %r11, %r15
  365. CFI_REGISTER rip, r15
  366. FIXUP_TOP_OF_STACK %r11
  367. call *%rax
  368. RESTORE_TOP_OF_STACK %r11
  369. movq %r15, %r11
  370. CFI_REGISTER rip, r11
  371. RESTORE_REST
  372. pushq %r11
  373. CFI_ADJUST_CFA_OFFSET 8
  374. CFI_REL_OFFSET rip, 0
  375. ret
  376. CFI_ENDPROC
  377. ENTRY(stub_execve)
  378. CFI_STARTPROC
  379. popq %r11
  380. CFI_ADJUST_CFA_OFFSET -8
  381. CFI_REGISTER rip, r11
  382. SAVE_REST
  383. movq %r11, %r15
  384. CFI_REGISTER rip, r15
  385. FIXUP_TOP_OF_STACK %r11
  386. call sys_execve
  387. GET_THREAD_INFO(%rcx)
  388. bt $TIF_IA32,threadinfo_flags(%rcx)
  389. CFI_REMEMBER_STATE
  390. jc exec_32bit
  391. RESTORE_TOP_OF_STACK %r11
  392. movq %r15, %r11
  393. CFI_REGISTER rip, r11
  394. RESTORE_REST
  395. pushq %r11
  396. CFI_ADJUST_CFA_OFFSET 8
  397. CFI_REL_OFFSET rip, 0
  398. ret
  399. exec_32bit:
  400. CFI_RESTORE_STATE
  401. movq %rax,RAX(%rsp)
  402. RESTORE_REST
  403. jmp int_ret_from_sys_call
  404. CFI_ENDPROC
  405. /*
  406. * sigreturn is special because it needs to restore all registers on return.
  407. * This cannot be done with SYSRET, so use the IRET return path instead.
  408. */
  409. ENTRY(stub_rt_sigreturn)
  410. CFI_STARTPROC
  411. addq $8, %rsp
  412. CFI_ADJUST_CFA_OFFSET -8
  413. SAVE_REST
  414. movq %rsp,%rdi
  415. FIXUP_TOP_OF_STACK %r11
  416. call sys_rt_sigreturn
  417. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  418. RESTORE_REST
  419. jmp int_ret_from_sys_call
  420. CFI_ENDPROC
  421. /*
  422. * initial frame state for interrupts and exceptions
  423. */
  424. .macro _frame ref
  425. CFI_STARTPROC simple
  426. CFI_DEF_CFA rsp,SS+8-\ref
  427. /*CFI_REL_OFFSET ss,SS-\ref*/
  428. CFI_REL_OFFSET rsp,RSP-\ref
  429. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  430. /*CFI_REL_OFFSET cs,CS-\ref*/
  431. CFI_REL_OFFSET rip,RIP-\ref
  432. .endm
  433. /* initial frame state for interrupts (and exceptions without error code) */
  434. #define INTR_FRAME _frame RIP
  435. /* initial frame state for exceptions with error code (and interrupts with
  436. vector already pushed) */
  437. #define XCPT_FRAME _frame ORIG_RAX
  438. /*
  439. * Interrupt entry/exit.
  440. *
  441. * Interrupt entry points save only callee clobbered registers in fast path.
  442. *
  443. * Entry runs with interrupts off.
  444. */
  445. /* 0(%rsp): interrupt number */
  446. .macro interrupt func
  447. cld
  448. #ifdef CONFIG_DEBUG_INFO
  449. SAVE_ALL
  450. movq %rsp,%rdi
  451. /*
  452. * Setup a stack frame pointer. This allows gdb to trace
  453. * back to the original stack.
  454. */
  455. movq %rsp,%rbp
  456. CFI_DEF_CFA_REGISTER rbp
  457. #else
  458. SAVE_ARGS
  459. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  460. #endif
  461. testl $3,CS(%rdi)
  462. je 1f
  463. swapgs
  464. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  465. movq %gs:pda_irqstackptr,%rax
  466. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  467. pushq %rdi # save old stack
  468. CFI_ADJUST_CFA_OFFSET 8
  469. call \func
  470. .endm
  471. ENTRY(common_interrupt)
  472. XCPT_FRAME
  473. interrupt do_IRQ
  474. /* 0(%rsp): oldrsp-ARGOFFSET */
  475. ret_from_intr:
  476. popq %rdi
  477. CFI_ADJUST_CFA_OFFSET -8
  478. cli
  479. decl %gs:pda_irqcount
  480. #ifdef CONFIG_DEBUG_INFO
  481. movq RBP(%rdi),%rbp
  482. CFI_DEF_CFA_REGISTER rsp
  483. #endif
  484. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  485. exit_intr:
  486. GET_THREAD_INFO(%rcx)
  487. testl $3,CS-ARGOFFSET(%rsp)
  488. je retint_kernel
  489. /* Interrupt came from user space */
  490. /*
  491. * Has a correct top of stack, but a partial stack frame
  492. * %rcx: thread info. Interrupts off.
  493. */
  494. retint_with_reschedule:
  495. movl $_TIF_WORK_MASK,%edi
  496. retint_check:
  497. movl threadinfo_flags(%rcx),%edx
  498. andl %edi,%edx
  499. CFI_REMEMBER_STATE
  500. jnz retint_careful
  501. retint_swapgs:
  502. swapgs
  503. retint_restore_args:
  504. cli
  505. RESTORE_ARGS 0,8,0
  506. iret_label:
  507. iretq
  508. .section __ex_table,"a"
  509. .quad iret_label,bad_iret
  510. .previous
  511. .section .fixup,"ax"
  512. /* force a signal here? this matches i386 behaviour */
  513. /* running with kernel gs */
  514. bad_iret:
  515. movq $-9999,%rdi /* better code? */
  516. jmp do_exit
  517. .previous
  518. /* edi: workmask, edx: work */
  519. retint_careful:
  520. CFI_RESTORE_STATE
  521. bt $TIF_NEED_RESCHED,%edx
  522. jnc retint_signal
  523. sti
  524. pushq %rdi
  525. CFI_ADJUST_CFA_OFFSET 8
  526. call schedule
  527. popq %rdi
  528. CFI_ADJUST_CFA_OFFSET -8
  529. GET_THREAD_INFO(%rcx)
  530. cli
  531. jmp retint_check
  532. retint_signal:
  533. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  534. jz retint_swapgs
  535. sti
  536. SAVE_REST
  537. movq $-1,ORIG_RAX(%rsp)
  538. xorl %esi,%esi # oldset
  539. movq %rsp,%rdi # &pt_regs
  540. call do_notify_resume
  541. RESTORE_REST
  542. cli
  543. movl $_TIF_NEED_RESCHED,%edi
  544. GET_THREAD_INFO(%rcx)
  545. jmp retint_check
  546. #ifdef CONFIG_PREEMPT
  547. /* Returning to kernel space. Check if we need preemption */
  548. /* rcx: threadinfo. interrupts off. */
  549. .p2align
  550. retint_kernel:
  551. cmpl $0,threadinfo_preempt_count(%rcx)
  552. jnz retint_restore_args
  553. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  554. jnc retint_restore_args
  555. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  556. jnc retint_restore_args
  557. call preempt_schedule_irq
  558. jmp exit_intr
  559. #endif
  560. CFI_ENDPROC
  561. /*
  562. * APIC interrupts.
  563. */
  564. .macro apicinterrupt num,func
  565. INTR_FRAME
  566. pushq $\num-256
  567. CFI_ADJUST_CFA_OFFSET 8
  568. interrupt \func
  569. jmp ret_from_intr
  570. CFI_ENDPROC
  571. .endm
  572. ENTRY(thermal_interrupt)
  573. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  574. ENTRY(threshold_interrupt)
  575. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  576. #ifdef CONFIG_SMP
  577. ENTRY(reschedule_interrupt)
  578. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  579. .macro INVALIDATE_ENTRY num
  580. ENTRY(invalidate_interrupt\num)
  581. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  582. .endm
  583. INVALIDATE_ENTRY 0
  584. INVALIDATE_ENTRY 1
  585. INVALIDATE_ENTRY 2
  586. INVALIDATE_ENTRY 3
  587. INVALIDATE_ENTRY 4
  588. INVALIDATE_ENTRY 5
  589. INVALIDATE_ENTRY 6
  590. INVALIDATE_ENTRY 7
  591. ENTRY(call_function_interrupt)
  592. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  593. #endif
  594. #ifdef CONFIG_X86_LOCAL_APIC
  595. ENTRY(apic_timer_interrupt)
  596. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  597. ENTRY(error_interrupt)
  598. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  599. ENTRY(spurious_interrupt)
  600. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  601. #endif
  602. /*
  603. * Exception entry points.
  604. */
  605. .macro zeroentry sym
  606. INTR_FRAME
  607. pushq $0 /* push error code/oldrax */
  608. CFI_ADJUST_CFA_OFFSET 8
  609. pushq %rax /* push real oldrax to the rdi slot */
  610. CFI_ADJUST_CFA_OFFSET 8
  611. leaq \sym(%rip),%rax
  612. jmp error_entry
  613. CFI_ENDPROC
  614. .endm
  615. .macro errorentry sym
  616. XCPT_FRAME
  617. pushq %rax
  618. CFI_ADJUST_CFA_OFFSET 8
  619. leaq \sym(%rip),%rax
  620. jmp error_entry
  621. CFI_ENDPROC
  622. .endm
  623. /* error code is on the stack already */
  624. /* handle NMI like exceptions that can happen everywhere */
  625. .macro paranoidentry sym, ist=0
  626. SAVE_ALL
  627. cld
  628. movl $1,%ebx
  629. movl $MSR_GS_BASE,%ecx
  630. rdmsr
  631. testl %edx,%edx
  632. js 1f
  633. swapgs
  634. xorl %ebx,%ebx
  635. 1:
  636. .if \ist
  637. movq %gs:pda_data_offset, %rbp
  638. .endif
  639. movq %rsp,%rdi
  640. movq ORIG_RAX(%rsp),%rsi
  641. movq $-1,ORIG_RAX(%rsp)
  642. .if \ist
  643. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  644. .endif
  645. call \sym
  646. .if \ist
  647. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  648. .endif
  649. cli
  650. .endm
  651. /*
  652. * Exception entry point. This expects an error code/orig_rax on the stack
  653. * and the exception handler in %rax.
  654. */
  655. ENTRY(error_entry)
  656. _frame RDI
  657. /* rdi slot contains rax, oldrax contains error code */
  658. cld
  659. subq $14*8,%rsp
  660. CFI_ADJUST_CFA_OFFSET (14*8)
  661. movq %rsi,13*8(%rsp)
  662. CFI_REL_OFFSET rsi,RSI
  663. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  664. movq %rdx,12*8(%rsp)
  665. CFI_REL_OFFSET rdx,RDX
  666. movq %rcx,11*8(%rsp)
  667. CFI_REL_OFFSET rcx,RCX
  668. movq %rsi,10*8(%rsp) /* store rax */
  669. CFI_REL_OFFSET rax,RAX
  670. movq %r8, 9*8(%rsp)
  671. CFI_REL_OFFSET r8,R8
  672. movq %r9, 8*8(%rsp)
  673. CFI_REL_OFFSET r9,R9
  674. movq %r10,7*8(%rsp)
  675. CFI_REL_OFFSET r10,R10
  676. movq %r11,6*8(%rsp)
  677. CFI_REL_OFFSET r11,R11
  678. movq %rbx,5*8(%rsp)
  679. CFI_REL_OFFSET rbx,RBX
  680. movq %rbp,4*8(%rsp)
  681. CFI_REL_OFFSET rbp,RBP
  682. movq %r12,3*8(%rsp)
  683. CFI_REL_OFFSET r12,R12
  684. movq %r13,2*8(%rsp)
  685. CFI_REL_OFFSET r13,R13
  686. movq %r14,1*8(%rsp)
  687. CFI_REL_OFFSET r14,R14
  688. movq %r15,(%rsp)
  689. CFI_REL_OFFSET r15,R15
  690. xorl %ebx,%ebx
  691. testl $3,CS(%rsp)
  692. je error_kernelspace
  693. error_swapgs:
  694. swapgs
  695. error_sti:
  696. movq %rdi,RDI(%rsp)
  697. movq %rsp,%rdi
  698. movq ORIG_RAX(%rsp),%rsi /* get error code */
  699. movq $-1,ORIG_RAX(%rsp)
  700. call *%rax
  701. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  702. error_exit:
  703. movl %ebx,%eax
  704. RESTORE_REST
  705. cli
  706. GET_THREAD_INFO(%rcx)
  707. testl %eax,%eax
  708. jne retint_kernel
  709. movl threadinfo_flags(%rcx),%edx
  710. movl $_TIF_WORK_MASK,%edi
  711. andl %edi,%edx
  712. jnz retint_careful
  713. swapgs
  714. RESTORE_ARGS 0,8,0
  715. jmp iret_label
  716. CFI_ENDPROC
  717. error_kernelspace:
  718. incl %ebx
  719. /* There are two places in the kernel that can potentially fault with
  720. usergs. Handle them here. The exception handlers after
  721. iret run with kernel gs again, so don't set the user space flag.
  722. B stepping K8s sometimes report an truncated RIP for IRET
  723. exceptions returning to compat mode. Check for these here too. */
  724. leaq iret_label(%rip),%rbp
  725. cmpq %rbp,RIP(%rsp)
  726. je error_swapgs
  727. movl %ebp,%ebp /* zero extend */
  728. cmpq %rbp,RIP(%rsp)
  729. je error_swapgs
  730. cmpq $gs_change,RIP(%rsp)
  731. je error_swapgs
  732. jmp error_sti
  733. /* Reload gs selector with exception handling */
  734. /* edi: new selector */
  735. ENTRY(load_gs_index)
  736. CFI_STARTPROC
  737. pushf
  738. CFI_ADJUST_CFA_OFFSET 8
  739. cli
  740. swapgs
  741. gs_change:
  742. movl %edi,%gs
  743. 2: mfence /* workaround */
  744. swapgs
  745. popf
  746. CFI_ADJUST_CFA_OFFSET -8
  747. ret
  748. CFI_ENDPROC
  749. .section __ex_table,"a"
  750. .align 8
  751. .quad gs_change,bad_gs
  752. .previous
  753. .section .fixup,"ax"
  754. /* running with kernelgs */
  755. bad_gs:
  756. swapgs /* switch back to user gs */
  757. xorl %eax,%eax
  758. movl %eax,%gs
  759. jmp 2b
  760. .previous
  761. /*
  762. * Create a kernel thread.
  763. *
  764. * C extern interface:
  765. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  766. *
  767. * asm input arguments:
  768. * rdi: fn, rsi: arg, rdx: flags
  769. */
  770. ENTRY(kernel_thread)
  771. CFI_STARTPROC
  772. FAKE_STACK_FRAME $child_rip
  773. SAVE_ALL
  774. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  775. movq %rdx,%rdi
  776. orq kernel_thread_flags(%rip),%rdi
  777. movq $-1, %rsi
  778. movq %rsp, %rdx
  779. xorl %r8d,%r8d
  780. xorl %r9d,%r9d
  781. # clone now
  782. call do_fork
  783. movq %rax,RAX(%rsp)
  784. xorl %edi,%edi
  785. /*
  786. * It isn't worth to check for reschedule here,
  787. * so internally to the x86_64 port you can rely on kernel_thread()
  788. * not to reschedule the child before returning, this avoids the need
  789. * of hacks for example to fork off the per-CPU idle tasks.
  790. * [Hopefully no generic code relies on the reschedule -AK]
  791. */
  792. RESTORE_ALL
  793. UNFAKE_STACK_FRAME
  794. ret
  795. CFI_ENDPROC
  796. child_rip:
  797. /*
  798. * Here we are in the child and the registers are set as they were
  799. * at kernel_thread() invocation in the parent.
  800. */
  801. movq %rdi, %rax
  802. movq %rsi, %rdi
  803. call *%rax
  804. # exit
  805. xorl %edi, %edi
  806. call do_exit
  807. /*
  808. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  809. *
  810. * C extern interface:
  811. * extern long execve(char *name, char **argv, char **envp)
  812. *
  813. * asm input arguments:
  814. * rdi: name, rsi: argv, rdx: envp
  815. *
  816. * We want to fallback into:
  817. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  818. *
  819. * do_sys_execve asm fallback arguments:
  820. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  821. */
  822. ENTRY(execve)
  823. CFI_STARTPROC
  824. FAKE_STACK_FRAME $0
  825. SAVE_ALL
  826. call sys_execve
  827. movq %rax, RAX(%rsp)
  828. RESTORE_REST
  829. testq %rax,%rax
  830. je int_ret_from_sys_call
  831. RESTORE_ARGS
  832. UNFAKE_STACK_FRAME
  833. ret
  834. CFI_ENDPROC
  835. KPROBE_ENTRY(page_fault)
  836. errorentry do_page_fault
  837. .previous .text
  838. ENTRY(coprocessor_error)
  839. zeroentry do_coprocessor_error
  840. ENTRY(simd_coprocessor_error)
  841. zeroentry do_simd_coprocessor_error
  842. ENTRY(device_not_available)
  843. zeroentry math_state_restore
  844. /* runs on exception stack */
  845. KPROBE_ENTRY(debug)
  846. INTR_FRAME
  847. pushq $0
  848. CFI_ADJUST_CFA_OFFSET 8
  849. paranoidentry do_debug, DEBUG_STACK
  850. jmp paranoid_exit
  851. CFI_ENDPROC
  852. .previous .text
  853. /* runs on exception stack */
  854. ENTRY(nmi)
  855. INTR_FRAME
  856. pushq $-1
  857. CFI_ADJUST_CFA_OFFSET 8
  858. paranoidentry do_nmi
  859. /*
  860. * "Paranoid" exit path from exception stack.
  861. * Paranoid because this is used by NMIs and cannot take
  862. * any kernel state for granted.
  863. * We don't do kernel preemption checks here, because only
  864. * NMI should be common and it does not enable IRQs and
  865. * cannot get reschedule ticks.
  866. */
  867. /* ebx: no swapgs flag */
  868. paranoid_exit:
  869. testl %ebx,%ebx /* swapgs needed? */
  870. jnz paranoid_restore
  871. testl $3,CS(%rsp)
  872. jnz paranoid_userspace
  873. paranoid_swapgs:
  874. swapgs
  875. paranoid_restore:
  876. RESTORE_ALL 8
  877. iretq
  878. paranoid_userspace:
  879. GET_THREAD_INFO(%rcx)
  880. movl threadinfo_flags(%rcx),%ebx
  881. andl $_TIF_WORK_MASK,%ebx
  882. jz paranoid_swapgs
  883. movq %rsp,%rdi /* &pt_regs */
  884. call sync_regs
  885. movq %rax,%rsp /* switch stack for scheduling */
  886. testl $_TIF_NEED_RESCHED,%ebx
  887. jnz paranoid_schedule
  888. movl %ebx,%edx /* arg3: thread flags */
  889. sti
  890. xorl %esi,%esi /* arg2: oldset */
  891. movq %rsp,%rdi /* arg1: &pt_regs */
  892. call do_notify_resume
  893. cli
  894. jmp paranoid_userspace
  895. paranoid_schedule:
  896. sti
  897. call schedule
  898. cli
  899. jmp paranoid_userspace
  900. CFI_ENDPROC
  901. KPROBE_ENTRY(int3)
  902. INTR_FRAME
  903. pushq $0
  904. CFI_ADJUST_CFA_OFFSET 8
  905. paranoidentry do_int3, DEBUG_STACK
  906. jmp paranoid_exit
  907. CFI_ENDPROC
  908. .previous .text
  909. ENTRY(overflow)
  910. zeroentry do_overflow
  911. ENTRY(bounds)
  912. zeroentry do_bounds
  913. ENTRY(invalid_op)
  914. zeroentry do_invalid_op
  915. ENTRY(coprocessor_segment_overrun)
  916. zeroentry do_coprocessor_segment_overrun
  917. ENTRY(reserved)
  918. zeroentry do_reserved
  919. /* runs on exception stack */
  920. ENTRY(double_fault)
  921. XCPT_FRAME
  922. paranoidentry do_double_fault
  923. jmp paranoid_exit
  924. CFI_ENDPROC
  925. ENTRY(invalid_TSS)
  926. errorentry do_invalid_TSS
  927. ENTRY(segment_not_present)
  928. errorentry do_segment_not_present
  929. /* runs on exception stack */
  930. ENTRY(stack_segment)
  931. XCPT_FRAME
  932. paranoidentry do_stack_segment
  933. jmp paranoid_exit
  934. CFI_ENDPROC
  935. KPROBE_ENTRY(general_protection)
  936. errorentry do_general_protection
  937. .previous .text
  938. ENTRY(alignment_check)
  939. errorentry do_alignment_check
  940. ENTRY(divide_error)
  941. zeroentry do_divide_error
  942. ENTRY(spurious_interrupt_bug)
  943. zeroentry do_spurious_interrupt_bug
  944. #ifdef CONFIG_X86_MCE
  945. /* runs on exception stack */
  946. ENTRY(machine_check)
  947. INTR_FRAME
  948. pushq $0
  949. CFI_ADJUST_CFA_OFFSET 8
  950. paranoidentry do_machine_check
  951. jmp paranoid_exit
  952. CFI_ENDPROC
  953. #endif
  954. ENTRY(call_softirq)
  955. CFI_STARTPROC
  956. movq %gs:pda_irqstackptr,%rax
  957. movq %rsp,%rdx
  958. CFI_DEF_CFA_REGISTER rdx
  959. incl %gs:pda_irqcount
  960. cmove %rax,%rsp
  961. pushq %rdx
  962. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  963. call __do_softirq
  964. popq %rsp
  965. CFI_DEF_CFA_REGISTER rsp
  966. decl %gs:pda_irqcount
  967. ret
  968. CFI_ENDPROC