entry.S 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorl %eax, %eax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. pushq %rax /* rsp */
  75. CFI_ADJUST_CFA_OFFSET 8
  76. CFI_OFFSET rip,0
  77. pushq $(1<<9) /* eflags - interrupts on */
  78. CFI_ADJUST_CFA_OFFSET 8
  79. pushq $__KERNEL_CS /* cs */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. pushq \child_rip /* rip */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. CFI_OFFSET rip,0
  84. pushq %rax /* orig rax */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. .endm
  87. .macro UNFAKE_STACK_FRAME
  88. addq $8*6, %rsp
  89. CFI_ADJUST_CFA_OFFSET -(6*8)
  90. .endm
  91. .macro CFI_DEFAULT_STACK
  92. CFI_ADJUST_CFA_OFFSET (SS)
  93. CFI_OFFSET r15,R15-SS
  94. CFI_OFFSET r14,R14-SS
  95. CFI_OFFSET r13,R13-SS
  96. CFI_OFFSET r12,R12-SS
  97. CFI_OFFSET rbp,RBP-SS
  98. CFI_OFFSET rbx,RBX-SS
  99. CFI_OFFSET r11,R11-SS
  100. CFI_OFFSET r10,R10-SS
  101. CFI_OFFSET r9,R9-SS
  102. CFI_OFFSET r8,R8-SS
  103. CFI_OFFSET rax,RAX-SS
  104. CFI_OFFSET rcx,RCX-SS
  105. CFI_OFFSET rdx,RDX-SS
  106. CFI_OFFSET rsi,RSI-SS
  107. CFI_OFFSET rdi,RDI-SS
  108. CFI_OFFSET rsp,RSP-SS
  109. CFI_OFFSET rip,RIP-SS
  110. .endm
  111. /*
  112. * A newly forked process directly context switches into this.
  113. */
  114. /* rdi: prev */
  115. ENTRY(ret_from_fork)
  116. CFI_STARTPROC
  117. CFI_DEFAULT_STACK
  118. call schedule_tail
  119. GET_THREAD_INFO(%rcx)
  120. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  121. jnz rff_trace
  122. rff_action:
  123. RESTORE_REST
  124. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  125. je int_ret_from_sys_call
  126. testl $_TIF_IA32,threadinfo_flags(%rcx)
  127. jnz int_ret_from_sys_call
  128. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  129. jmp ret_from_sys_call
  130. rff_trace:
  131. movq %rsp,%rdi
  132. call syscall_trace_leave
  133. GET_THREAD_INFO(%rcx)
  134. jmp rff_action
  135. CFI_ENDPROC
  136. /*
  137. * System call entry. Upto 6 arguments in registers are supported.
  138. *
  139. * SYSCALL does not save anything on the stack and does not change the
  140. * stack pointer.
  141. */
  142. /*
  143. * Register setup:
  144. * rax system call number
  145. * rdi arg0
  146. * rcx return address for syscall/sysret, C arg3
  147. * rsi arg1
  148. * rdx arg2
  149. * r10 arg3 (--> moved to rcx for C)
  150. * r8 arg4
  151. * r9 arg5
  152. * r11 eflags for syscall/sysret, temporary for C
  153. * r12-r15,rbp,rbx saved by C code, not touched.
  154. *
  155. * Interrupts are off on entry.
  156. * Only called from user space.
  157. *
  158. * XXX if we had a free scratch register we could save the RSP into the stack frame
  159. * and report it properly in ps. Unfortunately we haven't.
  160. */
  161. ENTRY(system_call)
  162. CFI_STARTPROC
  163. swapgs
  164. movq %rsp,%gs:pda_oldrsp
  165. movq %gs:pda_kernelstack,%rsp
  166. sti
  167. SAVE_ARGS 8,1
  168. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  169. movq %rcx,RIP-ARGOFFSET(%rsp)
  170. GET_THREAD_INFO(%rcx)
  171. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  172. jnz tracesys
  173. cmpq $__NR_syscall_max,%rax
  174. ja badsys
  175. movq %r10,%rcx
  176. call *sys_call_table(,%rax,8) # XXX: rip relative
  177. movq %rax,RAX-ARGOFFSET(%rsp)
  178. /*
  179. * Syscall return path ending with SYSRET (fast path)
  180. * Has incomplete stack frame and undefined top of stack.
  181. */
  182. .globl ret_from_sys_call
  183. ret_from_sys_call:
  184. movl $_TIF_ALLWORK_MASK,%edi
  185. /* edi: flagmask */
  186. sysret_check:
  187. GET_THREAD_INFO(%rcx)
  188. cli
  189. movl threadinfo_flags(%rcx),%edx
  190. andl %edi,%edx
  191. jnz sysret_careful
  192. movq RIP-ARGOFFSET(%rsp),%rcx
  193. RESTORE_ARGS 0,-ARG_SKIP,1
  194. movq %gs:pda_oldrsp,%rsp
  195. swapgs
  196. sysretq
  197. /* Handle reschedules */
  198. /* edx: work, edi: workmask */
  199. sysret_careful:
  200. bt $TIF_NEED_RESCHED,%edx
  201. jnc sysret_signal
  202. sti
  203. pushq %rdi
  204. call schedule
  205. popq %rdi
  206. jmp sysret_check
  207. /* Handle a signal */
  208. sysret_signal:
  209. sti
  210. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  211. jz 1f
  212. /* Really a signal */
  213. /* edx: work flags (arg3) */
  214. leaq do_notify_resume(%rip),%rax
  215. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  216. xorl %esi,%esi # oldset -> arg2
  217. call ptregscall_common
  218. 1: movl $_TIF_NEED_RESCHED,%edi
  219. jmp sysret_check
  220. /* Do syscall tracing */
  221. tracesys:
  222. SAVE_REST
  223. movq $-ENOSYS,RAX(%rsp)
  224. FIXUP_TOP_OF_STACK %rdi
  225. movq %rsp,%rdi
  226. call syscall_trace_enter
  227. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  228. RESTORE_REST
  229. cmpq $__NR_syscall_max,%rax
  230. ja 1f
  231. movq %r10,%rcx /* fixup for C */
  232. call *sys_call_table(,%rax,8)
  233. movq %rax,RAX-ARGOFFSET(%rsp)
  234. 1: SAVE_REST
  235. movq %rsp,%rdi
  236. call syscall_trace_leave
  237. RESTORE_TOP_OF_STACK %rbx
  238. RESTORE_REST
  239. jmp ret_from_sys_call
  240. badsys:
  241. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  242. jmp ret_from_sys_call
  243. /*
  244. * Syscall return path ending with IRET.
  245. * Has correct top of stack, but partial stack frame.
  246. */
  247. ENTRY(int_ret_from_sys_call)
  248. cli
  249. testl $3,CS-ARGOFFSET(%rsp)
  250. je retint_restore_args
  251. movl $_TIF_ALLWORK_MASK,%edi
  252. /* edi: mask to check */
  253. int_with_check:
  254. GET_THREAD_INFO(%rcx)
  255. movl threadinfo_flags(%rcx),%edx
  256. andl %edi,%edx
  257. jnz int_careful
  258. jmp retint_swapgs
  259. /* Either reschedule or signal or syscall exit tracking needed. */
  260. /* First do a reschedule test. */
  261. /* edx: work, edi: workmask */
  262. int_careful:
  263. bt $TIF_NEED_RESCHED,%edx
  264. jnc int_very_careful
  265. sti
  266. pushq %rdi
  267. call schedule
  268. popq %rdi
  269. cli
  270. jmp int_with_check
  271. /* handle signals and tracing -- both require a full stack frame */
  272. int_very_careful:
  273. sti
  274. SAVE_REST
  275. /* Check for syscall exit trace */
  276. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  277. jz int_signal
  278. pushq %rdi
  279. leaq 8(%rsp),%rdi # &ptregs -> arg1
  280. call syscall_trace_leave
  281. popq %rdi
  282. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  283. cli
  284. jmp int_restore_rest
  285. int_signal:
  286. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  287. jz 1f
  288. movq %rsp,%rdi # &ptregs -> arg1
  289. xorl %esi,%esi # oldset -> arg2
  290. call do_notify_resume
  291. 1: movl $_TIF_NEED_RESCHED,%edi
  292. int_restore_rest:
  293. RESTORE_REST
  294. cli
  295. jmp int_with_check
  296. CFI_ENDPROC
  297. /*
  298. * Certain special system calls that need to save a complete full stack frame.
  299. */
  300. .macro PTREGSCALL label,func,arg
  301. .globl \label
  302. \label:
  303. leaq \func(%rip),%rax
  304. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  305. jmp ptregscall_common
  306. .endm
  307. PTREGSCALL stub_clone, sys_clone, %r8
  308. PTREGSCALL stub_fork, sys_fork, %rdi
  309. PTREGSCALL stub_vfork, sys_vfork, %rdi
  310. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  311. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  312. PTREGSCALL stub_iopl, sys_iopl, %rsi
  313. ENTRY(ptregscall_common)
  314. CFI_STARTPROC
  315. popq %r11
  316. CFI_ADJUST_CFA_OFFSET -8
  317. SAVE_REST
  318. movq %r11, %r15
  319. FIXUP_TOP_OF_STACK %r11
  320. call *%rax
  321. RESTORE_TOP_OF_STACK %r11
  322. movq %r15, %r11
  323. RESTORE_REST
  324. pushq %r11
  325. CFI_ADJUST_CFA_OFFSET 8
  326. ret
  327. CFI_ENDPROC
  328. ENTRY(stub_execve)
  329. CFI_STARTPROC
  330. popq %r11
  331. CFI_ADJUST_CFA_OFFSET -8
  332. SAVE_REST
  333. movq %r11, %r15
  334. FIXUP_TOP_OF_STACK %r11
  335. call sys_execve
  336. GET_THREAD_INFO(%rcx)
  337. bt $TIF_IA32,threadinfo_flags(%rcx)
  338. jc exec_32bit
  339. RESTORE_TOP_OF_STACK %r11
  340. movq %r15, %r11
  341. RESTORE_REST
  342. push %r11
  343. ret
  344. exec_32bit:
  345. CFI_ADJUST_CFA_OFFSET REST_SKIP
  346. movq %rax,RAX(%rsp)
  347. RESTORE_REST
  348. jmp int_ret_from_sys_call
  349. CFI_ENDPROC
  350. /*
  351. * sigreturn is special because it needs to restore all registers on return.
  352. * This cannot be done with SYSRET, so use the IRET return path instead.
  353. */
  354. ENTRY(stub_rt_sigreturn)
  355. CFI_STARTPROC
  356. addq $8, %rsp
  357. SAVE_REST
  358. movq %rsp,%rdi
  359. FIXUP_TOP_OF_STACK %r11
  360. call sys_rt_sigreturn
  361. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  362. RESTORE_REST
  363. jmp int_ret_from_sys_call
  364. CFI_ENDPROC
  365. /*
  366. * Interrupt entry/exit.
  367. *
  368. * Interrupt entry points save only callee clobbered registers in fast path.
  369. *
  370. * Entry runs with interrupts off.
  371. */
  372. /* 0(%rsp): interrupt number */
  373. .macro interrupt func
  374. CFI_STARTPROC simple
  375. CFI_DEF_CFA rsp,(SS-RDI)
  376. CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
  377. CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
  378. cld
  379. #ifdef CONFIG_DEBUG_INFO
  380. SAVE_ALL
  381. movq %rsp,%rdi
  382. /*
  383. * Setup a stack frame pointer. This allows gdb to trace
  384. * back to the original stack.
  385. */
  386. movq %rsp,%rbp
  387. CFI_DEF_CFA_REGISTER rbp
  388. #else
  389. SAVE_ARGS
  390. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  391. #endif
  392. testl $3,CS(%rdi)
  393. je 1f
  394. swapgs
  395. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  396. movq %gs:pda_irqstackptr,%rax
  397. cmoveq %rax,%rsp
  398. pushq %rdi # save old stack
  399. call \func
  400. .endm
  401. ENTRY(common_interrupt)
  402. interrupt do_IRQ
  403. /* 0(%rsp): oldrsp-ARGOFFSET */
  404. ret_from_intr:
  405. popq %rdi
  406. cli
  407. decl %gs:pda_irqcount
  408. #ifdef CONFIG_DEBUG_INFO
  409. movq RBP(%rdi),%rbp
  410. #endif
  411. leaq ARGOFFSET(%rdi),%rsp
  412. exit_intr:
  413. GET_THREAD_INFO(%rcx)
  414. testl $3,CS-ARGOFFSET(%rsp)
  415. je retint_kernel
  416. /* Interrupt came from user space */
  417. /*
  418. * Has a correct top of stack, but a partial stack frame
  419. * %rcx: thread info. Interrupts off.
  420. */
  421. retint_with_reschedule:
  422. movl $_TIF_WORK_MASK,%edi
  423. retint_check:
  424. movl threadinfo_flags(%rcx),%edx
  425. andl %edi,%edx
  426. jnz retint_careful
  427. retint_swapgs:
  428. swapgs
  429. retint_restore_args:
  430. cli
  431. RESTORE_ARGS 0,8,0
  432. iret_label:
  433. iretq
  434. .section __ex_table,"a"
  435. .quad iret_label,bad_iret
  436. .previous
  437. .section .fixup,"ax"
  438. /* force a signal here? this matches i386 behaviour */
  439. /* running with kernel gs */
  440. bad_iret:
  441. movq $-9999,%rdi /* better code? */
  442. jmp do_exit
  443. .previous
  444. /* edi: workmask, edx: work */
  445. retint_careful:
  446. bt $TIF_NEED_RESCHED,%edx
  447. jnc retint_signal
  448. sti
  449. pushq %rdi
  450. call schedule
  451. popq %rdi
  452. GET_THREAD_INFO(%rcx)
  453. cli
  454. jmp retint_check
  455. retint_signal:
  456. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  457. jz retint_swapgs
  458. sti
  459. SAVE_REST
  460. movq $-1,ORIG_RAX(%rsp)
  461. xorl %esi,%esi # oldset
  462. movq %rsp,%rdi # &pt_regs
  463. call do_notify_resume
  464. RESTORE_REST
  465. cli
  466. movl $_TIF_NEED_RESCHED,%edi
  467. GET_THREAD_INFO(%rcx)
  468. jmp retint_check
  469. #ifdef CONFIG_PREEMPT
  470. /* Returning to kernel space. Check if we need preemption */
  471. /* rcx: threadinfo. interrupts off. */
  472. .p2align
  473. retint_kernel:
  474. cmpl $0,threadinfo_preempt_count(%rcx)
  475. jnz retint_restore_args
  476. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  477. jnc retint_restore_args
  478. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  479. jnc retint_restore_args
  480. call preempt_schedule_irq
  481. jmp exit_intr
  482. #endif
  483. CFI_ENDPROC
  484. /*
  485. * APIC interrupts.
  486. */
  487. .macro apicinterrupt num,func
  488. pushq $\num-256
  489. interrupt \func
  490. jmp ret_from_intr
  491. CFI_ENDPROC
  492. .endm
  493. ENTRY(thermal_interrupt)
  494. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  495. #ifdef CONFIG_SMP
  496. ENTRY(reschedule_interrupt)
  497. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  498. .macro INVALIDATE_ENTRY num
  499. ENTRY(invalidate_interrupt\num)
  500. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  501. .endm
  502. INVALIDATE_ENTRY 0
  503. INVALIDATE_ENTRY 1
  504. INVALIDATE_ENTRY 2
  505. INVALIDATE_ENTRY 3
  506. INVALIDATE_ENTRY 4
  507. INVALIDATE_ENTRY 5
  508. INVALIDATE_ENTRY 6
  509. INVALIDATE_ENTRY 7
  510. ENTRY(call_function_interrupt)
  511. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  512. #endif
  513. #ifdef CONFIG_X86_LOCAL_APIC
  514. ENTRY(apic_timer_interrupt)
  515. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  516. ENTRY(error_interrupt)
  517. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  518. ENTRY(spurious_interrupt)
  519. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  520. #endif
  521. /*
  522. * Exception entry points.
  523. */
  524. .macro zeroentry sym
  525. pushq $0 /* push error code/oldrax */
  526. pushq %rax /* push real oldrax to the rdi slot */
  527. leaq \sym(%rip),%rax
  528. jmp error_entry
  529. .endm
  530. .macro errorentry sym
  531. pushq %rax
  532. leaq \sym(%rip),%rax
  533. jmp error_entry
  534. .endm
  535. /* error code is on the stack already */
  536. /* handle NMI like exceptions that can happen everywhere */
  537. .macro paranoidentry sym
  538. SAVE_ALL
  539. cld
  540. movl $1,%ebx
  541. movl $MSR_GS_BASE,%ecx
  542. rdmsr
  543. testl %edx,%edx
  544. js 1f
  545. swapgs
  546. xorl %ebx,%ebx
  547. 1: movq %rsp,%rdi
  548. movq ORIG_RAX(%rsp),%rsi
  549. movq $-1,ORIG_RAX(%rsp)
  550. call \sym
  551. cli
  552. .endm
  553. /*
  554. * Exception entry point. This expects an error code/orig_rax on the stack
  555. * and the exception handler in %rax.
  556. */
  557. ENTRY(error_entry)
  558. CFI_STARTPROC simple
  559. CFI_DEF_CFA rsp,(SS-RDI)
  560. CFI_REL_OFFSET rsp,(RSP-RDI)
  561. CFI_REL_OFFSET rip,(RIP-RDI)
  562. /* rdi slot contains rax, oldrax contains error code */
  563. cld
  564. subq $14*8,%rsp
  565. CFI_ADJUST_CFA_OFFSET (14*8)
  566. movq %rsi,13*8(%rsp)
  567. CFI_REL_OFFSET rsi,RSI
  568. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  569. movq %rdx,12*8(%rsp)
  570. CFI_REL_OFFSET rdx,RDX
  571. movq %rcx,11*8(%rsp)
  572. CFI_REL_OFFSET rcx,RCX
  573. movq %rsi,10*8(%rsp) /* store rax */
  574. CFI_REL_OFFSET rax,RAX
  575. movq %r8, 9*8(%rsp)
  576. CFI_REL_OFFSET r8,R8
  577. movq %r9, 8*8(%rsp)
  578. CFI_REL_OFFSET r9,R9
  579. movq %r10,7*8(%rsp)
  580. CFI_REL_OFFSET r10,R10
  581. movq %r11,6*8(%rsp)
  582. CFI_REL_OFFSET r11,R11
  583. movq %rbx,5*8(%rsp)
  584. CFI_REL_OFFSET rbx,RBX
  585. movq %rbp,4*8(%rsp)
  586. CFI_REL_OFFSET rbp,RBP
  587. movq %r12,3*8(%rsp)
  588. CFI_REL_OFFSET r12,R12
  589. movq %r13,2*8(%rsp)
  590. CFI_REL_OFFSET r13,R13
  591. movq %r14,1*8(%rsp)
  592. CFI_REL_OFFSET r14,R14
  593. movq %r15,(%rsp)
  594. CFI_REL_OFFSET r15,R15
  595. xorl %ebx,%ebx
  596. testl $3,CS(%rsp)
  597. je error_kernelspace
  598. error_swapgs:
  599. swapgs
  600. error_sti:
  601. movq %rdi,RDI(%rsp)
  602. movq %rsp,%rdi
  603. movq ORIG_RAX(%rsp),%rsi /* get error code */
  604. movq $-1,ORIG_RAX(%rsp)
  605. call *%rax
  606. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  607. error_exit:
  608. movl %ebx,%eax
  609. RESTORE_REST
  610. cli
  611. GET_THREAD_INFO(%rcx)
  612. testl %eax,%eax
  613. jne retint_kernel
  614. movl threadinfo_flags(%rcx),%edx
  615. movl $_TIF_WORK_MASK,%edi
  616. andl %edi,%edx
  617. jnz retint_careful
  618. swapgs
  619. RESTORE_ARGS 0,8,0
  620. iretq
  621. CFI_ENDPROC
  622. error_kernelspace:
  623. incl %ebx
  624. /* There are two places in the kernel that can potentially fault with
  625. usergs. Handle them here. The exception handlers after
  626. iret run with kernel gs again, so don't set the user space flag.
  627. B stepping K8s sometimes report an truncated RIP for IRET
  628. exceptions returning to compat mode. Check for these here too. */
  629. leaq iret_label(%rip),%rbp
  630. cmpq %rbp,RIP(%rsp)
  631. je error_swapgs
  632. movl %ebp,%ebp /* zero extend */
  633. cmpq %rbp,RIP(%rsp)
  634. je error_swapgs
  635. cmpq $gs_change,RIP(%rsp)
  636. je error_swapgs
  637. jmp error_sti
  638. /* Reload gs selector with exception handling */
  639. /* edi: new selector */
  640. ENTRY(load_gs_index)
  641. pushf
  642. cli
  643. swapgs
  644. gs_change:
  645. movl %edi,%gs
  646. 2: mfence /* workaround */
  647. swapgs
  648. popf
  649. ret
  650. .section __ex_table,"a"
  651. .align 8
  652. .quad gs_change,bad_gs
  653. .previous
  654. .section .fixup,"ax"
  655. /* running with kernelgs */
  656. bad_gs:
  657. swapgs /* switch back to user gs */
  658. xorl %eax,%eax
  659. movl %eax,%gs
  660. jmp 2b
  661. .previous
  662. /*
  663. * Create a kernel thread.
  664. *
  665. * C extern interface:
  666. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  667. *
  668. * asm input arguments:
  669. * rdi: fn, rsi: arg, rdx: flags
  670. */
  671. ENTRY(kernel_thread)
  672. CFI_STARTPROC
  673. FAKE_STACK_FRAME $child_rip
  674. SAVE_ALL
  675. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  676. movq %rdx,%rdi
  677. orq kernel_thread_flags(%rip),%rdi
  678. movq $-1, %rsi
  679. movq %rsp, %rdx
  680. xorl %r8d,%r8d
  681. xorl %r9d,%r9d
  682. # clone now
  683. call do_fork
  684. movq %rax,RAX(%rsp)
  685. xorl %edi,%edi
  686. /*
  687. * It isn't worth to check for reschedule here,
  688. * so internally to the x86_64 port you can rely on kernel_thread()
  689. * not to reschedule the child before returning, this avoids the need
  690. * of hacks for example to fork off the per-CPU idle tasks.
  691. * [Hopefully no generic code relies on the reschedule -AK]
  692. */
  693. RESTORE_ALL
  694. UNFAKE_STACK_FRAME
  695. ret
  696. CFI_ENDPROC
  697. child_rip:
  698. /*
  699. * Here we are in the child and the registers are set as they were
  700. * at kernel_thread() invocation in the parent.
  701. */
  702. movq %rdi, %rax
  703. movq %rsi, %rdi
  704. call *%rax
  705. # exit
  706. xorl %edi, %edi
  707. call do_exit
  708. /*
  709. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  710. *
  711. * C extern interface:
  712. * extern long execve(char *name, char **argv, char **envp)
  713. *
  714. * asm input arguments:
  715. * rdi: name, rsi: argv, rdx: envp
  716. *
  717. * We want to fallback into:
  718. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  719. *
  720. * do_sys_execve asm fallback arguments:
  721. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  722. */
  723. ENTRY(execve)
  724. CFI_STARTPROC
  725. FAKE_STACK_FRAME $0
  726. SAVE_ALL
  727. call sys_execve
  728. movq %rax, RAX(%rsp)
  729. RESTORE_REST
  730. testq %rax,%rax
  731. je int_ret_from_sys_call
  732. RESTORE_ARGS
  733. UNFAKE_STACK_FRAME
  734. ret
  735. CFI_ENDPROC
  736. KPROBE_ENTRY(page_fault)
  737. errorentry do_page_fault
  738. .previous .text
  739. ENTRY(coprocessor_error)
  740. zeroentry do_coprocessor_error
  741. ENTRY(simd_coprocessor_error)
  742. zeroentry do_simd_coprocessor_error
  743. ENTRY(device_not_available)
  744. zeroentry math_state_restore
  745. /* runs on exception stack */
  746. KPROBE_ENTRY(debug)
  747. CFI_STARTPROC
  748. pushq $0
  749. CFI_ADJUST_CFA_OFFSET 8
  750. paranoidentry do_debug
  751. jmp paranoid_exit
  752. CFI_ENDPROC
  753. .previous .text
  754. /* runs on exception stack */
  755. ENTRY(nmi)
  756. CFI_STARTPROC
  757. pushq $-1
  758. CFI_ADJUST_CFA_OFFSET 8
  759. paranoidentry do_nmi
  760. /*
  761. * "Paranoid" exit path from exception stack.
  762. * Paranoid because this is used by NMIs and cannot take
  763. * any kernel state for granted.
  764. * We don't do kernel preemption checks here, because only
  765. * NMI should be common and it does not enable IRQs and
  766. * cannot get reschedule ticks.
  767. */
  768. /* ebx: no swapgs flag */
  769. paranoid_exit:
  770. testl %ebx,%ebx /* swapgs needed? */
  771. jnz paranoid_restore
  772. testl $3,CS(%rsp)
  773. jnz paranoid_userspace
  774. paranoid_swapgs:
  775. swapgs
  776. paranoid_restore:
  777. RESTORE_ALL 8
  778. iretq
  779. paranoid_userspace:
  780. GET_THREAD_INFO(%rcx)
  781. movl threadinfo_flags(%rcx),%ebx
  782. andl $_TIF_WORK_MASK,%ebx
  783. jz paranoid_swapgs
  784. movq %rsp,%rdi /* &pt_regs */
  785. call sync_regs
  786. movq %rax,%rsp /* switch stack for scheduling */
  787. testl $_TIF_NEED_RESCHED,%ebx
  788. jnz paranoid_schedule
  789. movl %ebx,%edx /* arg3: thread flags */
  790. sti
  791. xorl %esi,%esi /* arg2: oldset */
  792. movq %rsp,%rdi /* arg1: &pt_regs */
  793. call do_notify_resume
  794. cli
  795. jmp paranoid_userspace
  796. paranoid_schedule:
  797. sti
  798. call schedule
  799. cli
  800. jmp paranoid_userspace
  801. CFI_ENDPROC
  802. KPROBE_ENTRY(int3)
  803. zeroentry do_int3
  804. .previous .text
  805. ENTRY(overflow)
  806. zeroentry do_overflow
  807. ENTRY(bounds)
  808. zeroentry do_bounds
  809. ENTRY(invalid_op)
  810. zeroentry do_invalid_op
  811. ENTRY(coprocessor_segment_overrun)
  812. zeroentry do_coprocessor_segment_overrun
  813. ENTRY(reserved)
  814. zeroentry do_reserved
  815. /* runs on exception stack */
  816. ENTRY(double_fault)
  817. CFI_STARTPROC
  818. paranoidentry do_double_fault
  819. jmp paranoid_exit
  820. CFI_ENDPROC
  821. ENTRY(invalid_TSS)
  822. errorentry do_invalid_TSS
  823. ENTRY(segment_not_present)
  824. errorentry do_segment_not_present
  825. /* runs on exception stack */
  826. ENTRY(stack_segment)
  827. CFI_STARTPROC
  828. paranoidentry do_stack_segment
  829. jmp paranoid_exit
  830. CFI_ENDPROC
  831. KPROBE_ENTRY(general_protection)
  832. errorentry do_general_protection
  833. .previous .text
  834. ENTRY(alignment_check)
  835. errorentry do_alignment_check
  836. ENTRY(divide_error)
  837. zeroentry do_divide_error
  838. ENTRY(spurious_interrupt_bug)
  839. zeroentry do_spurious_interrupt_bug
  840. #ifdef CONFIG_X86_MCE
  841. /* runs on exception stack */
  842. ENTRY(machine_check)
  843. CFI_STARTPROC
  844. pushq $0
  845. CFI_ADJUST_CFA_OFFSET 8
  846. paranoidentry do_machine_check
  847. jmp paranoid_exit
  848. CFI_ENDPROC
  849. #endif
  850. ENTRY(call_debug)
  851. zeroentry do_call_debug
  852. ENTRY(call_softirq)
  853. movq %gs:pda_irqstackptr,%rax
  854. pushq %r15
  855. movq %rsp,%r15
  856. incl %gs:pda_irqcount
  857. cmove %rax,%rsp
  858. call __do_softirq
  859. movq %r15,%rsp
  860. decl %gs:pda_irqcount
  861. popq %r15
  862. ret