entry.S 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorl %eax, %eax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. /*CFI_REL_OFFSET ss,0*/
  75. pushq %rax /* rsp */
  76. CFI_ADJUST_CFA_OFFSET 8
  77. CFI_REL_OFFSET rsp,0
  78. pushq $(1<<9) /* eflags - interrupts on */
  79. CFI_ADJUST_CFA_OFFSET 8
  80. /*CFI_REL_OFFSET rflags,0*/
  81. pushq $__KERNEL_CS /* cs */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. /*CFI_REL_OFFSET cs,0*/
  84. pushq \child_rip /* rip */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_REL_OFFSET rip,0
  87. pushq %rax /* orig rax */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. .endm
  90. .macro UNFAKE_STACK_FRAME
  91. addq $8*6, %rsp
  92. CFI_ADJUST_CFA_OFFSET -(6*8)
  93. .endm
  94. .macro CFI_DEFAULT_STACK start=1
  95. .if \start
  96. CFI_STARTPROC simple
  97. CFI_DEF_CFA rsp,SS+8
  98. .else
  99. CFI_DEF_CFA_OFFSET SS+8
  100. .endif
  101. CFI_REL_OFFSET r15,R15
  102. CFI_REL_OFFSET r14,R14
  103. CFI_REL_OFFSET r13,R13
  104. CFI_REL_OFFSET r12,R12
  105. CFI_REL_OFFSET rbp,RBP
  106. CFI_REL_OFFSET rbx,RBX
  107. CFI_REL_OFFSET r11,R11
  108. CFI_REL_OFFSET r10,R10
  109. CFI_REL_OFFSET r9,R9
  110. CFI_REL_OFFSET r8,R8
  111. CFI_REL_OFFSET rax,RAX
  112. CFI_REL_OFFSET rcx,RCX
  113. CFI_REL_OFFSET rdx,RDX
  114. CFI_REL_OFFSET rsi,RSI
  115. CFI_REL_OFFSET rdi,RDI
  116. CFI_REL_OFFSET rip,RIP
  117. /*CFI_REL_OFFSET cs,CS*/
  118. /*CFI_REL_OFFSET rflags,EFLAGS*/
  119. CFI_REL_OFFSET rsp,RSP
  120. /*CFI_REL_OFFSET ss,SS*/
  121. .endm
  122. /*
  123. * A newly forked process directly context switches into this.
  124. */
  125. /* rdi: prev */
  126. ENTRY(ret_from_fork)
  127. CFI_DEFAULT_STACK
  128. call schedule_tail
  129. GET_THREAD_INFO(%rcx)
  130. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  131. jnz rff_trace
  132. rff_action:
  133. RESTORE_REST
  134. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  135. je int_ret_from_sys_call
  136. testl $_TIF_IA32,threadinfo_flags(%rcx)
  137. jnz int_ret_from_sys_call
  138. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  139. jmp ret_from_sys_call
  140. rff_trace:
  141. movq %rsp,%rdi
  142. call syscall_trace_leave
  143. GET_THREAD_INFO(%rcx)
  144. jmp rff_action
  145. CFI_ENDPROC
  146. /*
  147. * System call entry. Upto 6 arguments in registers are supported.
  148. *
  149. * SYSCALL does not save anything on the stack and does not change the
  150. * stack pointer.
  151. */
  152. /*
  153. * Register setup:
  154. * rax system call number
  155. * rdi arg0
  156. * rcx return address for syscall/sysret, C arg3
  157. * rsi arg1
  158. * rdx arg2
  159. * r10 arg3 (--> moved to rcx for C)
  160. * r8 arg4
  161. * r9 arg5
  162. * r11 eflags for syscall/sysret, temporary for C
  163. * r12-r15,rbp,rbx saved by C code, not touched.
  164. *
  165. * Interrupts are off on entry.
  166. * Only called from user space.
  167. *
  168. * XXX if we had a free scratch register we could save the RSP into the stack frame
  169. * and report it properly in ps. Unfortunately we haven't.
  170. */
  171. ENTRY(system_call)
  172. CFI_STARTPROC simple
  173. CFI_DEF_CFA rsp,0
  174. CFI_REGISTER rip,rcx
  175. /*CFI_REGISTER rflags,r11*/
  176. swapgs
  177. movq %rsp,%gs:pda_oldrsp
  178. movq %gs:pda_kernelstack,%rsp
  179. sti
  180. SAVE_ARGS 8,1
  181. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  182. movq %rcx,RIP-ARGOFFSET(%rsp)
  183. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  184. GET_THREAD_INFO(%rcx)
  185. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  186. CFI_REMEMBER_STATE
  187. jnz tracesys
  188. cmpq $__NR_syscall_max,%rax
  189. ja badsys
  190. movq %r10,%rcx
  191. call *sys_call_table(,%rax,8) # XXX: rip relative
  192. movq %rax,RAX-ARGOFFSET(%rsp)
  193. /*
  194. * Syscall return path ending with SYSRET (fast path)
  195. * Has incomplete stack frame and undefined top of stack.
  196. */
  197. .globl ret_from_sys_call
  198. ret_from_sys_call:
  199. movl $_TIF_ALLWORK_MASK,%edi
  200. /* edi: flagmask */
  201. sysret_check:
  202. GET_THREAD_INFO(%rcx)
  203. cli
  204. movl threadinfo_flags(%rcx),%edx
  205. andl %edi,%edx
  206. CFI_REMEMBER_STATE
  207. jnz sysret_careful
  208. movq RIP-ARGOFFSET(%rsp),%rcx
  209. CFI_REGISTER rip,rcx
  210. RESTORE_ARGS 0,-ARG_SKIP,1
  211. /*CFI_REGISTER rflags,r11*/
  212. movq %gs:pda_oldrsp,%rsp
  213. swapgs
  214. sysretq
  215. /* Handle reschedules */
  216. /* edx: work, edi: workmask */
  217. sysret_careful:
  218. CFI_RESTORE_STATE
  219. bt $TIF_NEED_RESCHED,%edx
  220. jnc sysret_signal
  221. sti
  222. pushq %rdi
  223. CFI_ADJUST_CFA_OFFSET 8
  224. call schedule
  225. popq %rdi
  226. CFI_ADJUST_CFA_OFFSET -8
  227. jmp sysret_check
  228. /* Handle a signal */
  229. sysret_signal:
  230. sti
  231. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  232. jz 1f
  233. /* Really a signal */
  234. /* edx: work flags (arg3) */
  235. leaq do_notify_resume(%rip),%rax
  236. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  237. xorl %esi,%esi # oldset -> arg2
  238. call ptregscall_common
  239. 1: movl $_TIF_NEED_RESCHED,%edi
  240. jmp sysret_check
  241. badsys:
  242. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  243. jmp ret_from_sys_call
  244. /* Do syscall tracing */
  245. tracesys:
  246. CFI_RESTORE_STATE
  247. SAVE_REST
  248. movq $-ENOSYS,RAX(%rsp)
  249. FIXUP_TOP_OF_STACK %rdi
  250. movq %rsp,%rdi
  251. call syscall_trace_enter
  252. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  253. RESTORE_REST
  254. cmpq $__NR_syscall_max,%rax
  255. ja 1f
  256. movq %r10,%rcx /* fixup for C */
  257. call *sys_call_table(,%rax,8)
  258. movq %rax,RAX-ARGOFFSET(%rsp)
  259. 1: SAVE_REST
  260. movq %rsp,%rdi
  261. call syscall_trace_leave
  262. RESTORE_TOP_OF_STACK %rbx
  263. RESTORE_REST
  264. jmp ret_from_sys_call
  265. CFI_ENDPROC
  266. /*
  267. * Syscall return path ending with IRET.
  268. * Has correct top of stack, but partial stack frame.
  269. */
  270. ENTRY(int_ret_from_sys_call)
  271. CFI_STARTPROC simple
  272. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  273. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  274. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  275. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  276. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  277. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  278. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  279. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  280. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  281. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  282. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  283. CFI_REL_OFFSET r8,R8-ARGOFFSET
  284. CFI_REL_OFFSET r9,R9-ARGOFFSET
  285. CFI_REL_OFFSET r10,R10-ARGOFFSET
  286. CFI_REL_OFFSET r11,R11-ARGOFFSET
  287. cli
  288. testl $3,CS-ARGOFFSET(%rsp)
  289. je retint_restore_args
  290. movl $_TIF_ALLWORK_MASK,%edi
  291. /* edi: mask to check */
  292. int_with_check:
  293. GET_THREAD_INFO(%rcx)
  294. movl threadinfo_flags(%rcx),%edx
  295. andl %edi,%edx
  296. jnz int_careful
  297. jmp retint_swapgs
  298. /* Either reschedule or signal or syscall exit tracking needed. */
  299. /* First do a reschedule test. */
  300. /* edx: work, edi: workmask */
  301. int_careful:
  302. bt $TIF_NEED_RESCHED,%edx
  303. jnc int_very_careful
  304. sti
  305. pushq %rdi
  306. CFI_ADJUST_CFA_OFFSET 8
  307. call schedule
  308. popq %rdi
  309. CFI_ADJUST_CFA_OFFSET -8
  310. cli
  311. jmp int_with_check
  312. /* handle signals and tracing -- both require a full stack frame */
  313. int_very_careful:
  314. sti
  315. SAVE_REST
  316. /* Check for syscall exit trace */
  317. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  318. jz int_signal
  319. pushq %rdi
  320. CFI_ADJUST_CFA_OFFSET 8
  321. leaq 8(%rsp),%rdi # &ptregs -> arg1
  322. call syscall_trace_leave
  323. popq %rdi
  324. CFI_ADJUST_CFA_OFFSET -8
  325. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  326. cli
  327. jmp int_restore_rest
  328. int_signal:
  329. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  330. jz 1f
  331. movq %rsp,%rdi # &ptregs -> arg1
  332. xorl %esi,%esi # oldset -> arg2
  333. call do_notify_resume
  334. 1: movl $_TIF_NEED_RESCHED,%edi
  335. int_restore_rest:
  336. RESTORE_REST
  337. cli
  338. jmp int_with_check
  339. CFI_ENDPROC
  340. /*
  341. * Certain special system calls that need to save a complete full stack frame.
  342. */
  343. .macro PTREGSCALL label,func,arg
  344. .globl \label
  345. \label:
  346. leaq \func(%rip),%rax
  347. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  348. jmp ptregscall_common
  349. .endm
  350. CFI_STARTPROC
  351. PTREGSCALL stub_clone, sys_clone, %r8
  352. PTREGSCALL stub_fork, sys_fork, %rdi
  353. PTREGSCALL stub_vfork, sys_vfork, %rdi
  354. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  355. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  356. PTREGSCALL stub_iopl, sys_iopl, %rsi
  357. ENTRY(ptregscall_common)
  358. popq %r11
  359. CFI_ADJUST_CFA_OFFSET -8
  360. CFI_REGISTER rip, r11
  361. SAVE_REST
  362. movq %r11, %r15
  363. CFI_REGISTER rip, r15
  364. FIXUP_TOP_OF_STACK %r11
  365. call *%rax
  366. RESTORE_TOP_OF_STACK %r11
  367. movq %r15, %r11
  368. CFI_REGISTER rip, r11
  369. RESTORE_REST
  370. pushq %r11
  371. CFI_ADJUST_CFA_OFFSET 8
  372. CFI_REL_OFFSET rip, 0
  373. ret
  374. CFI_ENDPROC
  375. ENTRY(stub_execve)
  376. CFI_STARTPROC
  377. popq %r11
  378. CFI_ADJUST_CFA_OFFSET -8
  379. CFI_REGISTER rip, r11
  380. SAVE_REST
  381. movq %r11, %r15
  382. CFI_REGISTER rip, r15
  383. FIXUP_TOP_OF_STACK %r11
  384. call sys_execve
  385. GET_THREAD_INFO(%rcx)
  386. bt $TIF_IA32,threadinfo_flags(%rcx)
  387. CFI_REMEMBER_STATE
  388. jc exec_32bit
  389. RESTORE_TOP_OF_STACK %r11
  390. movq %r15, %r11
  391. CFI_REGISTER rip, r11
  392. RESTORE_REST
  393. pushq %r11
  394. CFI_ADJUST_CFA_OFFSET 8
  395. CFI_REL_OFFSET rip, 0
  396. ret
  397. exec_32bit:
  398. CFI_RESTORE_STATE
  399. movq %rax,RAX(%rsp)
  400. RESTORE_REST
  401. jmp int_ret_from_sys_call
  402. CFI_ENDPROC
  403. /*
  404. * sigreturn is special because it needs to restore all registers on return.
  405. * This cannot be done with SYSRET, so use the IRET return path instead.
  406. */
  407. ENTRY(stub_rt_sigreturn)
  408. CFI_STARTPROC
  409. addq $8, %rsp
  410. CFI_ADJUST_CFA_OFFSET -8
  411. SAVE_REST
  412. movq %rsp,%rdi
  413. FIXUP_TOP_OF_STACK %r11
  414. call sys_rt_sigreturn
  415. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  416. RESTORE_REST
  417. jmp int_ret_from_sys_call
  418. CFI_ENDPROC
  419. /*
  420. * initial frame state for interrupts and exceptions
  421. */
  422. .macro _frame ref
  423. CFI_STARTPROC simple
  424. CFI_DEF_CFA rsp,SS+8-\ref
  425. /*CFI_REL_OFFSET ss,SS-\ref*/
  426. CFI_REL_OFFSET rsp,RSP-\ref
  427. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  428. /*CFI_REL_OFFSET cs,CS-\ref*/
  429. CFI_REL_OFFSET rip,RIP-\ref
  430. .endm
  431. /* initial frame state for interrupts (and exceptions without error code) */
  432. #define INTR_FRAME _frame RIP
  433. /* initial frame state for exceptions with error code (and interrupts with
  434. vector already pushed) */
  435. #define XCPT_FRAME _frame ORIG_RAX
  436. /*
  437. * Interrupt entry/exit.
  438. *
  439. * Interrupt entry points save only callee clobbered registers in fast path.
  440. *
  441. * Entry runs with interrupts off.
  442. */
  443. /* 0(%rsp): interrupt number */
  444. .macro interrupt func
  445. cld
  446. #ifdef CONFIG_DEBUG_INFO
  447. SAVE_ALL
  448. movq %rsp,%rdi
  449. /*
  450. * Setup a stack frame pointer. This allows gdb to trace
  451. * back to the original stack.
  452. */
  453. movq %rsp,%rbp
  454. CFI_DEF_CFA_REGISTER rbp
  455. #else
  456. SAVE_ARGS
  457. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  458. #endif
  459. testl $3,CS(%rdi)
  460. je 1f
  461. swapgs
  462. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  463. movq %gs:pda_irqstackptr,%rax
  464. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  465. pushq %rdi # save old stack
  466. CFI_ADJUST_CFA_OFFSET 8
  467. call \func
  468. .endm
  469. ENTRY(common_interrupt)
  470. XCPT_FRAME
  471. interrupt do_IRQ
  472. /* 0(%rsp): oldrsp-ARGOFFSET */
  473. ret_from_intr:
  474. popq %rdi
  475. CFI_ADJUST_CFA_OFFSET -8
  476. cli
  477. decl %gs:pda_irqcount
  478. #ifdef CONFIG_DEBUG_INFO
  479. movq RBP(%rdi),%rbp
  480. CFI_DEF_CFA_REGISTER rsp
  481. #endif
  482. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  483. exit_intr:
  484. GET_THREAD_INFO(%rcx)
  485. testl $3,CS-ARGOFFSET(%rsp)
  486. je retint_kernel
  487. /* Interrupt came from user space */
  488. /*
  489. * Has a correct top of stack, but a partial stack frame
  490. * %rcx: thread info. Interrupts off.
  491. */
  492. retint_with_reschedule:
  493. movl $_TIF_WORK_MASK,%edi
  494. retint_check:
  495. movl threadinfo_flags(%rcx),%edx
  496. andl %edi,%edx
  497. CFI_REMEMBER_STATE
  498. jnz retint_careful
  499. retint_swapgs:
  500. swapgs
  501. retint_restore_args:
  502. cli
  503. RESTORE_ARGS 0,8,0
  504. iret_label:
  505. iretq
  506. .section __ex_table,"a"
  507. .quad iret_label,bad_iret
  508. .previous
  509. .section .fixup,"ax"
  510. /* force a signal here? this matches i386 behaviour */
  511. /* running with kernel gs */
  512. bad_iret:
  513. movq $-9999,%rdi /* better code? */
  514. jmp do_exit
  515. .previous
  516. /* edi: workmask, edx: work */
  517. retint_careful:
  518. CFI_RESTORE_STATE
  519. bt $TIF_NEED_RESCHED,%edx
  520. jnc retint_signal
  521. sti
  522. pushq %rdi
  523. CFI_ADJUST_CFA_OFFSET 8
  524. call schedule
  525. popq %rdi
  526. CFI_ADJUST_CFA_OFFSET -8
  527. GET_THREAD_INFO(%rcx)
  528. cli
  529. jmp retint_check
  530. retint_signal:
  531. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  532. jz retint_swapgs
  533. sti
  534. SAVE_REST
  535. movq $-1,ORIG_RAX(%rsp)
  536. xorl %esi,%esi # oldset
  537. movq %rsp,%rdi # &pt_regs
  538. call do_notify_resume
  539. RESTORE_REST
  540. cli
  541. movl $_TIF_NEED_RESCHED,%edi
  542. GET_THREAD_INFO(%rcx)
  543. jmp retint_check
  544. #ifdef CONFIG_PREEMPT
  545. /* Returning to kernel space. Check if we need preemption */
  546. /* rcx: threadinfo. interrupts off. */
  547. .p2align
  548. retint_kernel:
  549. cmpl $0,threadinfo_preempt_count(%rcx)
  550. jnz retint_restore_args
  551. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  552. jnc retint_restore_args
  553. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  554. jnc retint_restore_args
  555. call preempt_schedule_irq
  556. jmp exit_intr
  557. #endif
  558. CFI_ENDPROC
  559. /*
  560. * APIC interrupts.
  561. */
  562. .macro apicinterrupt num,func
  563. INTR_FRAME
  564. pushq $\num-256
  565. CFI_ADJUST_CFA_OFFSET 8
  566. interrupt \func
  567. jmp ret_from_intr
  568. CFI_ENDPROC
  569. .endm
  570. ENTRY(thermal_interrupt)
  571. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  572. #ifdef CONFIG_SMP
  573. ENTRY(reschedule_interrupt)
  574. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  575. .macro INVALIDATE_ENTRY num
  576. ENTRY(invalidate_interrupt\num)
  577. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  578. .endm
  579. INVALIDATE_ENTRY 0
  580. INVALIDATE_ENTRY 1
  581. INVALIDATE_ENTRY 2
  582. INVALIDATE_ENTRY 3
  583. INVALIDATE_ENTRY 4
  584. INVALIDATE_ENTRY 5
  585. INVALIDATE_ENTRY 6
  586. INVALIDATE_ENTRY 7
  587. ENTRY(call_function_interrupt)
  588. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  589. #endif
  590. #ifdef CONFIG_X86_LOCAL_APIC
  591. ENTRY(apic_timer_interrupt)
  592. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  593. ENTRY(error_interrupt)
  594. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  595. ENTRY(spurious_interrupt)
  596. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  597. #endif
  598. /*
  599. * Exception entry points.
  600. */
  601. .macro zeroentry sym
  602. INTR_FRAME
  603. pushq $0 /* push error code/oldrax */
  604. CFI_ADJUST_CFA_OFFSET 8
  605. pushq %rax /* push real oldrax to the rdi slot */
  606. CFI_ADJUST_CFA_OFFSET 8
  607. leaq \sym(%rip),%rax
  608. jmp error_entry
  609. CFI_ENDPROC
  610. .endm
  611. .macro errorentry sym
  612. XCPT_FRAME
  613. pushq %rax
  614. CFI_ADJUST_CFA_OFFSET 8
  615. leaq \sym(%rip),%rax
  616. jmp error_entry
  617. CFI_ENDPROC
  618. .endm
  619. /* error code is on the stack already */
  620. /* handle NMI like exceptions that can happen everywhere */
  621. .macro paranoidentry sym
  622. SAVE_ALL
  623. cld
  624. movl $1,%ebx
  625. movl $MSR_GS_BASE,%ecx
  626. rdmsr
  627. testl %edx,%edx
  628. js 1f
  629. swapgs
  630. xorl %ebx,%ebx
  631. 1: movq %rsp,%rdi
  632. movq ORIG_RAX(%rsp),%rsi
  633. movq $-1,ORIG_RAX(%rsp)
  634. call \sym
  635. cli
  636. .endm
  637. /*
  638. * Exception entry point. This expects an error code/orig_rax on the stack
  639. * and the exception handler in %rax.
  640. */
  641. ENTRY(error_entry)
  642. _frame RDI
  643. /* rdi slot contains rax, oldrax contains error code */
  644. cld
  645. subq $14*8,%rsp
  646. CFI_ADJUST_CFA_OFFSET (14*8)
  647. movq %rsi,13*8(%rsp)
  648. CFI_REL_OFFSET rsi,RSI
  649. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  650. movq %rdx,12*8(%rsp)
  651. CFI_REL_OFFSET rdx,RDX
  652. movq %rcx,11*8(%rsp)
  653. CFI_REL_OFFSET rcx,RCX
  654. movq %rsi,10*8(%rsp) /* store rax */
  655. CFI_REL_OFFSET rax,RAX
  656. movq %r8, 9*8(%rsp)
  657. CFI_REL_OFFSET r8,R8
  658. movq %r9, 8*8(%rsp)
  659. CFI_REL_OFFSET r9,R9
  660. movq %r10,7*8(%rsp)
  661. CFI_REL_OFFSET r10,R10
  662. movq %r11,6*8(%rsp)
  663. CFI_REL_OFFSET r11,R11
  664. movq %rbx,5*8(%rsp)
  665. CFI_REL_OFFSET rbx,RBX
  666. movq %rbp,4*8(%rsp)
  667. CFI_REL_OFFSET rbp,RBP
  668. movq %r12,3*8(%rsp)
  669. CFI_REL_OFFSET r12,R12
  670. movq %r13,2*8(%rsp)
  671. CFI_REL_OFFSET r13,R13
  672. movq %r14,1*8(%rsp)
  673. CFI_REL_OFFSET r14,R14
  674. movq %r15,(%rsp)
  675. CFI_REL_OFFSET r15,R15
  676. xorl %ebx,%ebx
  677. testl $3,CS(%rsp)
  678. je error_kernelspace
  679. error_swapgs:
  680. swapgs
  681. error_sti:
  682. movq %rdi,RDI(%rsp)
  683. movq %rsp,%rdi
  684. movq ORIG_RAX(%rsp),%rsi /* get error code */
  685. movq $-1,ORIG_RAX(%rsp)
  686. call *%rax
  687. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  688. error_exit:
  689. movl %ebx,%eax
  690. RESTORE_REST
  691. cli
  692. GET_THREAD_INFO(%rcx)
  693. testl %eax,%eax
  694. jne retint_kernel
  695. movl threadinfo_flags(%rcx),%edx
  696. movl $_TIF_WORK_MASK,%edi
  697. andl %edi,%edx
  698. jnz retint_careful
  699. swapgs
  700. RESTORE_ARGS 0,8,0
  701. iretq
  702. CFI_ENDPROC
  703. error_kernelspace:
  704. incl %ebx
  705. /* There are two places in the kernel that can potentially fault with
  706. usergs. Handle them here. The exception handlers after
  707. iret run with kernel gs again, so don't set the user space flag.
  708. B stepping K8s sometimes report an truncated RIP for IRET
  709. exceptions returning to compat mode. Check for these here too. */
  710. leaq iret_label(%rip),%rbp
  711. cmpq %rbp,RIP(%rsp)
  712. je error_swapgs
  713. movl %ebp,%ebp /* zero extend */
  714. cmpq %rbp,RIP(%rsp)
  715. je error_swapgs
  716. cmpq $gs_change,RIP(%rsp)
  717. je error_swapgs
  718. jmp error_sti
  719. /* Reload gs selector with exception handling */
  720. /* edi: new selector */
  721. ENTRY(load_gs_index)
  722. CFI_STARTPROC
  723. pushf
  724. CFI_ADJUST_CFA_OFFSET 8
  725. cli
  726. swapgs
  727. gs_change:
  728. movl %edi,%gs
  729. 2: mfence /* workaround */
  730. swapgs
  731. popf
  732. CFI_ADJUST_CFA_OFFSET -8
  733. ret
  734. CFI_ENDPROC
  735. .section __ex_table,"a"
  736. .align 8
  737. .quad gs_change,bad_gs
  738. .previous
  739. .section .fixup,"ax"
  740. /* running with kernelgs */
  741. bad_gs:
  742. swapgs /* switch back to user gs */
  743. xorl %eax,%eax
  744. movl %eax,%gs
  745. jmp 2b
  746. .previous
  747. /*
  748. * Create a kernel thread.
  749. *
  750. * C extern interface:
  751. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  752. *
  753. * asm input arguments:
  754. * rdi: fn, rsi: arg, rdx: flags
  755. */
  756. ENTRY(kernel_thread)
  757. CFI_STARTPROC
  758. FAKE_STACK_FRAME $child_rip
  759. SAVE_ALL
  760. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  761. movq %rdx,%rdi
  762. orq kernel_thread_flags(%rip),%rdi
  763. movq $-1, %rsi
  764. movq %rsp, %rdx
  765. xorl %r8d,%r8d
  766. xorl %r9d,%r9d
  767. # clone now
  768. call do_fork
  769. movq %rax,RAX(%rsp)
  770. xorl %edi,%edi
  771. /*
  772. * It isn't worth to check for reschedule here,
  773. * so internally to the x86_64 port you can rely on kernel_thread()
  774. * not to reschedule the child before returning, this avoids the need
  775. * of hacks for example to fork off the per-CPU idle tasks.
  776. * [Hopefully no generic code relies on the reschedule -AK]
  777. */
  778. RESTORE_ALL
  779. UNFAKE_STACK_FRAME
  780. ret
  781. CFI_ENDPROC
  782. child_rip:
  783. /*
  784. * Here we are in the child and the registers are set as they were
  785. * at kernel_thread() invocation in the parent.
  786. */
  787. movq %rdi, %rax
  788. movq %rsi, %rdi
  789. call *%rax
  790. # exit
  791. xorl %edi, %edi
  792. call do_exit
  793. /*
  794. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  795. *
  796. * C extern interface:
  797. * extern long execve(char *name, char **argv, char **envp)
  798. *
  799. * asm input arguments:
  800. * rdi: name, rsi: argv, rdx: envp
  801. *
  802. * We want to fallback into:
  803. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  804. *
  805. * do_sys_execve asm fallback arguments:
  806. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  807. */
  808. ENTRY(execve)
  809. CFI_STARTPROC
  810. FAKE_STACK_FRAME $0
  811. SAVE_ALL
  812. call sys_execve
  813. movq %rax, RAX(%rsp)
  814. RESTORE_REST
  815. testq %rax,%rax
  816. je int_ret_from_sys_call
  817. RESTORE_ARGS
  818. UNFAKE_STACK_FRAME
  819. ret
  820. CFI_ENDPROC
  821. KPROBE_ENTRY(page_fault)
  822. errorentry do_page_fault
  823. .previous .text
  824. ENTRY(coprocessor_error)
  825. zeroentry do_coprocessor_error
  826. ENTRY(simd_coprocessor_error)
  827. zeroentry do_simd_coprocessor_error
  828. ENTRY(device_not_available)
  829. zeroentry math_state_restore
  830. /* runs on exception stack */
  831. KPROBE_ENTRY(debug)
  832. INTR_FRAME
  833. pushq $0
  834. CFI_ADJUST_CFA_OFFSET 8
  835. paranoidentry do_debug
  836. jmp paranoid_exit
  837. CFI_ENDPROC
  838. .previous .text
  839. /* runs on exception stack */
  840. ENTRY(nmi)
  841. INTR_FRAME
  842. pushq $-1
  843. CFI_ADJUST_CFA_OFFSET 8
  844. paranoidentry do_nmi
  845. /*
  846. * "Paranoid" exit path from exception stack.
  847. * Paranoid because this is used by NMIs and cannot take
  848. * any kernel state for granted.
  849. * We don't do kernel preemption checks here, because only
  850. * NMI should be common and it does not enable IRQs and
  851. * cannot get reschedule ticks.
  852. */
  853. /* ebx: no swapgs flag */
  854. paranoid_exit:
  855. testl %ebx,%ebx /* swapgs needed? */
  856. jnz paranoid_restore
  857. testl $3,CS(%rsp)
  858. jnz paranoid_userspace
  859. paranoid_swapgs:
  860. swapgs
  861. paranoid_restore:
  862. RESTORE_ALL 8
  863. iretq
  864. paranoid_userspace:
  865. GET_THREAD_INFO(%rcx)
  866. movl threadinfo_flags(%rcx),%ebx
  867. andl $_TIF_WORK_MASK,%ebx
  868. jz paranoid_swapgs
  869. movq %rsp,%rdi /* &pt_regs */
  870. call sync_regs
  871. movq %rax,%rsp /* switch stack for scheduling */
  872. testl $_TIF_NEED_RESCHED,%ebx
  873. jnz paranoid_schedule
  874. movl %ebx,%edx /* arg3: thread flags */
  875. sti
  876. xorl %esi,%esi /* arg2: oldset */
  877. movq %rsp,%rdi /* arg1: &pt_regs */
  878. call do_notify_resume
  879. cli
  880. jmp paranoid_userspace
  881. paranoid_schedule:
  882. sti
  883. call schedule
  884. cli
  885. jmp paranoid_userspace
  886. CFI_ENDPROC
  887. KPROBE_ENTRY(int3)
  888. zeroentry do_int3
  889. .previous .text
  890. ENTRY(overflow)
  891. zeroentry do_overflow
  892. ENTRY(bounds)
  893. zeroentry do_bounds
  894. ENTRY(invalid_op)
  895. zeroentry do_invalid_op
  896. ENTRY(coprocessor_segment_overrun)
  897. zeroentry do_coprocessor_segment_overrun
  898. ENTRY(reserved)
  899. zeroentry do_reserved
  900. /* runs on exception stack */
  901. ENTRY(double_fault)
  902. XCPT_FRAME
  903. paranoidentry do_double_fault
  904. jmp paranoid_exit
  905. CFI_ENDPROC
  906. ENTRY(invalid_TSS)
  907. errorentry do_invalid_TSS
  908. ENTRY(segment_not_present)
  909. errorentry do_segment_not_present
  910. /* runs on exception stack */
  911. ENTRY(stack_segment)
  912. XCPT_FRAME
  913. paranoidentry do_stack_segment
  914. jmp paranoid_exit
  915. CFI_ENDPROC
  916. KPROBE_ENTRY(general_protection)
  917. errorentry do_general_protection
  918. .previous .text
  919. ENTRY(alignment_check)
  920. errorentry do_alignment_check
  921. ENTRY(divide_error)
  922. zeroentry do_divide_error
  923. ENTRY(spurious_interrupt_bug)
  924. zeroentry do_spurious_interrupt_bug
  925. #ifdef CONFIG_X86_MCE
  926. /* runs on exception stack */
  927. ENTRY(machine_check)
  928. INTR_FRAME
  929. pushq $0
  930. CFI_ADJUST_CFA_OFFSET 8
  931. paranoidentry do_machine_check
  932. jmp paranoid_exit
  933. CFI_ENDPROC
  934. #endif
  935. ENTRY(call_debug)
  936. zeroentry do_call_debug
  937. ENTRY(call_softirq)
  938. CFI_STARTPROC
  939. movq %gs:pda_irqstackptr,%rax
  940. pushq %r15
  941. CFI_ADJUST_CFA_OFFSET 8
  942. movq %rsp,%r15
  943. CFI_DEF_CFA_REGISTER r15
  944. incl %gs:pda_irqcount
  945. cmove %rax,%rsp
  946. call __do_softirq
  947. movq %r15,%rsp
  948. CFI_DEF_CFA_REGISTER rsp
  949. decl %gs:pda_irqcount
  950. popq %r15
  951. CFI_ADJUST_CFA_OFFSET -8
  952. ret
  953. CFI_ENDPROC