entry.S 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/offset.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorq %rax, %rax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. pushq %rax /* rsp */
  75. CFI_ADJUST_CFA_OFFSET 8
  76. CFI_OFFSET rip,0
  77. pushq $(1<<9) /* eflags - interrupts on */
  78. CFI_ADJUST_CFA_OFFSET 8
  79. pushq $__KERNEL_CS /* cs */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. pushq \child_rip /* rip */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. CFI_OFFSET rip,0
  84. pushq %rax /* orig rax */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. .endm
  87. .macro UNFAKE_STACK_FRAME
  88. addq $8*6, %rsp
  89. CFI_ADJUST_CFA_OFFSET -(6*8)
  90. .endm
  91. .macro CFI_DEFAULT_STACK
  92. CFI_ADJUST_CFA_OFFSET (SS)
  93. CFI_OFFSET r15,R15-SS
  94. CFI_OFFSET r14,R14-SS
  95. CFI_OFFSET r13,R13-SS
  96. CFI_OFFSET r12,R12-SS
  97. CFI_OFFSET rbp,RBP-SS
  98. CFI_OFFSET rbx,RBX-SS
  99. CFI_OFFSET r11,R11-SS
  100. CFI_OFFSET r10,R10-SS
  101. CFI_OFFSET r9,R9-SS
  102. CFI_OFFSET r8,R8-SS
  103. CFI_OFFSET rax,RAX-SS
  104. CFI_OFFSET rcx,RCX-SS
  105. CFI_OFFSET rdx,RDX-SS
  106. CFI_OFFSET rsi,RSI-SS
  107. CFI_OFFSET rdi,RDI-SS
  108. CFI_OFFSET rsp,RSP-SS
  109. CFI_OFFSET rip,RIP-SS
  110. .endm
  111. /*
  112. * A newly forked process directly context switches into this.
  113. */
  114. /* rdi: prev */
  115. ENTRY(ret_from_fork)
  116. CFI_STARTPROC
  117. CFI_DEFAULT_STACK
  118. call schedule_tail
  119. GET_THREAD_INFO(%rcx)
  120. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  121. jnz rff_trace
  122. rff_action:
  123. RESTORE_REST
  124. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  125. je int_ret_from_sys_call
  126. testl $_TIF_IA32,threadinfo_flags(%rcx)
  127. jnz int_ret_from_sys_call
  128. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  129. jmp ret_from_sys_call
  130. rff_trace:
  131. movq %rsp,%rdi
  132. call syscall_trace_leave
  133. GET_THREAD_INFO(%rcx)
  134. jmp rff_action
  135. CFI_ENDPROC
  136. /*
  137. * System call entry. Upto 6 arguments in registers are supported.
  138. *
  139. * SYSCALL does not save anything on the stack and does not change the
  140. * stack pointer.
  141. */
  142. /*
  143. * Register setup:
  144. * rax system call number
  145. * rdi arg0
  146. * rcx return address for syscall/sysret, C arg3
  147. * rsi arg1
  148. * rdx arg2
  149. * r10 arg3 (--> moved to rcx for C)
  150. * r8 arg4
  151. * r9 arg5
  152. * r11 eflags for syscall/sysret, temporary for C
  153. * r12-r15,rbp,rbx saved by C code, not touched.
  154. *
  155. * Interrupts are off on entry.
  156. * Only called from user space.
  157. *
  158. * XXX if we had a free scratch register we could save the RSP into the stack frame
  159. * and report it properly in ps. Unfortunately we haven't.
  160. */
  161. ENTRY(system_call)
  162. CFI_STARTPROC
  163. swapgs
  164. movq %rsp,%gs:pda_oldrsp
  165. movq %gs:pda_kernelstack,%rsp
  166. sti
  167. SAVE_ARGS 8,1
  168. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  169. movq %rcx,RIP-ARGOFFSET(%rsp)
  170. GET_THREAD_INFO(%rcx)
  171. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  172. jnz tracesys
  173. cmpq $__NR_syscall_max,%rax
  174. ja badsys
  175. movq %r10,%rcx
  176. call *sys_call_table(,%rax,8) # XXX: rip relative
  177. movq %rax,RAX-ARGOFFSET(%rsp)
  178. /*
  179. * Syscall return path ending with SYSRET (fast path)
  180. * Has incomplete stack frame and undefined top of stack.
  181. */
  182. .globl ret_from_sys_call
  183. ret_from_sys_call:
  184. movl $_TIF_ALLWORK_MASK,%edi
  185. /* edi: flagmask */
  186. sysret_check:
  187. GET_THREAD_INFO(%rcx)
  188. cli
  189. movl threadinfo_flags(%rcx),%edx
  190. andl %edi,%edx
  191. jnz sysret_careful
  192. movq RIP-ARGOFFSET(%rsp),%rcx
  193. RESTORE_ARGS 0,-ARG_SKIP,1
  194. movq %gs:pda_oldrsp,%rsp
  195. swapgs
  196. sysretq
  197. /* Handle reschedules */
  198. /* edx: work, edi: workmask */
  199. sysret_careful:
  200. bt $TIF_NEED_RESCHED,%edx
  201. jnc sysret_signal
  202. sti
  203. pushq %rdi
  204. call schedule
  205. popq %rdi
  206. jmp sysret_check
  207. /* Handle a signal */
  208. /* edx: work flags (arg3) */
  209. sysret_signal:
  210. sti
  211. leaq do_notify_resume(%rip),%rax
  212. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  213. xorl %esi,%esi # oldset -> arg2
  214. call ptregscall_common
  215. jmp sysret_check
  216. /* Do syscall tracing */
  217. tracesys:
  218. SAVE_REST
  219. movq $-ENOSYS,RAX(%rsp)
  220. FIXUP_TOP_OF_STACK %rdi
  221. movq %rsp,%rdi
  222. call syscall_trace_enter
  223. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  224. RESTORE_REST
  225. cmpq $__NR_syscall_max,%rax
  226. ja 1f
  227. movq %r10,%rcx /* fixup for C */
  228. call *sys_call_table(,%rax,8)
  229. movq %rax,RAX-ARGOFFSET(%rsp)
  230. 1: SAVE_REST
  231. movq %rsp,%rdi
  232. call syscall_trace_leave
  233. RESTORE_TOP_OF_STACK %rbx
  234. RESTORE_REST
  235. jmp ret_from_sys_call
  236. badsys:
  237. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  238. jmp ret_from_sys_call
  239. /*
  240. * Syscall return path ending with IRET.
  241. * Has correct top of stack, but partial stack frame.
  242. */
  243. ENTRY(int_ret_from_sys_call)
  244. cli
  245. testl $3,CS-ARGOFFSET(%rsp)
  246. je retint_restore_args
  247. movl $_TIF_ALLWORK_MASK,%edi
  248. /* edi: mask to check */
  249. int_with_check:
  250. GET_THREAD_INFO(%rcx)
  251. movl threadinfo_flags(%rcx),%edx
  252. andl %edi,%edx
  253. jnz int_careful
  254. jmp retint_swapgs
  255. /* Either reschedule or signal or syscall exit tracking needed. */
  256. /* First do a reschedule test. */
  257. /* edx: work, edi: workmask */
  258. int_careful:
  259. bt $TIF_NEED_RESCHED,%edx
  260. jnc int_very_careful
  261. sti
  262. pushq %rdi
  263. call schedule
  264. popq %rdi
  265. cli
  266. jmp int_with_check
  267. /* handle signals and tracing -- both require a full stack frame */
  268. int_very_careful:
  269. sti
  270. SAVE_REST
  271. /* Check for syscall exit trace */
  272. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  273. jz int_signal
  274. pushq %rdi
  275. leaq 8(%rsp),%rdi # &ptregs -> arg1
  276. call syscall_trace_leave
  277. popq %rdi
  278. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  279. cli
  280. jmp int_restore_rest
  281. int_signal:
  282. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  283. jz 1f
  284. movq %rsp,%rdi # &ptregs -> arg1
  285. xorl %esi,%esi # oldset -> arg2
  286. call do_notify_resume
  287. 1: movl $_TIF_NEED_RESCHED,%edi
  288. int_restore_rest:
  289. RESTORE_REST
  290. cli
  291. jmp int_with_check
  292. CFI_ENDPROC
  293. /*
  294. * Certain special system calls that need to save a complete full stack frame.
  295. */
  296. .macro PTREGSCALL label,func,arg
  297. .globl \label
  298. \label:
  299. leaq \func(%rip),%rax
  300. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  301. jmp ptregscall_common
  302. .endm
  303. PTREGSCALL stub_clone, sys_clone, %r8
  304. PTREGSCALL stub_fork, sys_fork, %rdi
  305. PTREGSCALL stub_vfork, sys_vfork, %rdi
  306. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  307. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  308. PTREGSCALL stub_iopl, sys_iopl, %rsi
  309. ENTRY(ptregscall_common)
  310. CFI_STARTPROC
  311. popq %r11
  312. CFI_ADJUST_CFA_OFFSET -8
  313. SAVE_REST
  314. movq %r11, %r15
  315. FIXUP_TOP_OF_STACK %r11
  316. call *%rax
  317. RESTORE_TOP_OF_STACK %r11
  318. movq %r15, %r11
  319. RESTORE_REST
  320. pushq %r11
  321. CFI_ADJUST_CFA_OFFSET 8
  322. ret
  323. CFI_ENDPROC
  324. ENTRY(stub_execve)
  325. CFI_STARTPROC
  326. popq %r11
  327. CFI_ADJUST_CFA_OFFSET -8
  328. SAVE_REST
  329. movq %r11, %r15
  330. FIXUP_TOP_OF_STACK %r11
  331. call sys_execve
  332. GET_THREAD_INFO(%rcx)
  333. bt $TIF_IA32,threadinfo_flags(%rcx)
  334. jc exec_32bit
  335. RESTORE_TOP_OF_STACK %r11
  336. movq %r15, %r11
  337. RESTORE_REST
  338. push %r11
  339. ret
  340. exec_32bit:
  341. CFI_ADJUST_CFA_OFFSET REST_SKIP
  342. movq %rax,RAX(%rsp)
  343. RESTORE_REST
  344. jmp int_ret_from_sys_call
  345. CFI_ENDPROC
  346. /*
  347. * sigreturn is special because it needs to restore all registers on return.
  348. * This cannot be done with SYSRET, so use the IRET return path instead.
  349. */
  350. ENTRY(stub_rt_sigreturn)
  351. CFI_STARTPROC
  352. addq $8, %rsp
  353. SAVE_REST
  354. movq %rsp,%rdi
  355. FIXUP_TOP_OF_STACK %r11
  356. call sys_rt_sigreturn
  357. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  358. RESTORE_REST
  359. jmp int_ret_from_sys_call
  360. CFI_ENDPROC
  361. /*
  362. * Interrupt entry/exit.
  363. *
  364. * Interrupt entry points save only callee clobbered registers in fast path.
  365. *
  366. * Entry runs with interrupts off.
  367. */
  368. /* 0(%rsp): interrupt number */
  369. .macro interrupt func
  370. CFI_STARTPROC simple
  371. CFI_DEF_CFA rsp,(SS-RDI)
  372. CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
  373. CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
  374. cld
  375. #ifdef CONFIG_DEBUG_INFO
  376. SAVE_ALL
  377. movq %rsp,%rdi
  378. /*
  379. * Setup a stack frame pointer. This allows gdb to trace
  380. * back to the original stack.
  381. */
  382. movq %rsp,%rbp
  383. CFI_DEF_CFA_REGISTER rbp
  384. #else
  385. SAVE_ARGS
  386. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  387. #endif
  388. testl $3,CS(%rdi)
  389. je 1f
  390. swapgs
  391. 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
  392. movq %gs:pda_irqstackptr,%rax
  393. cmoveq %rax,%rsp
  394. pushq %rdi # save old stack
  395. call \func
  396. .endm
  397. ENTRY(common_interrupt)
  398. interrupt do_IRQ
  399. /* 0(%rsp): oldrsp-ARGOFFSET */
  400. ret_from_intr:
  401. popq %rdi
  402. cli
  403. subl $1,%gs:pda_irqcount
  404. #ifdef CONFIG_DEBUG_INFO
  405. movq RBP(%rdi),%rbp
  406. #endif
  407. leaq ARGOFFSET(%rdi),%rsp
  408. exit_intr:
  409. GET_THREAD_INFO(%rcx)
  410. testl $3,CS-ARGOFFSET(%rsp)
  411. je retint_kernel
  412. /* Interrupt came from user space */
  413. /*
  414. * Has a correct top of stack, but a partial stack frame
  415. * %rcx: thread info. Interrupts off.
  416. */
  417. retint_with_reschedule:
  418. movl $_TIF_WORK_MASK,%edi
  419. retint_check:
  420. movl threadinfo_flags(%rcx),%edx
  421. andl %edi,%edx
  422. jnz retint_careful
  423. retint_swapgs:
  424. swapgs
  425. retint_restore_args:
  426. cli
  427. RESTORE_ARGS 0,8,0
  428. iret_label:
  429. iretq
  430. .section __ex_table,"a"
  431. .quad iret_label,bad_iret
  432. .previous
  433. .section .fixup,"ax"
  434. /* force a signal here? this matches i386 behaviour */
  435. /* running with kernel gs */
  436. bad_iret:
  437. movq $-9999,%rdi /* better code? */
  438. jmp do_exit
  439. .previous
  440. /* edi: workmask, edx: work */
  441. retint_careful:
  442. bt $TIF_NEED_RESCHED,%edx
  443. jnc retint_signal
  444. sti
  445. pushq %rdi
  446. call schedule
  447. popq %rdi
  448. GET_THREAD_INFO(%rcx)
  449. cli
  450. jmp retint_check
  451. retint_signal:
  452. sti
  453. SAVE_REST
  454. movq $-1,ORIG_RAX(%rsp)
  455. xorq %rsi,%rsi # oldset
  456. movq %rsp,%rdi # &pt_regs
  457. call do_notify_resume
  458. RESTORE_REST
  459. cli
  460. GET_THREAD_INFO(%rcx)
  461. movl $_TIF_WORK_MASK,%edi
  462. jmp retint_check
  463. #ifdef CONFIG_PREEMPT
  464. /* Returning to kernel space. Check if we need preemption */
  465. /* rcx: threadinfo. interrupts off. */
  466. .p2align
  467. retint_kernel:
  468. cmpl $0,threadinfo_preempt_count(%rcx)
  469. jnz retint_restore_args
  470. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  471. jnc retint_restore_args
  472. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  473. jnc retint_restore_args
  474. call preempt_schedule_irq
  475. jmp exit_intr
  476. #endif
  477. CFI_ENDPROC
  478. /*
  479. * APIC interrupts.
  480. */
  481. .macro apicinterrupt num,func
  482. pushq $\num-256
  483. interrupt \func
  484. jmp ret_from_intr
  485. CFI_ENDPROC
  486. .endm
  487. ENTRY(thermal_interrupt)
  488. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  489. #ifdef CONFIG_SMP
  490. ENTRY(reschedule_interrupt)
  491. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  492. ENTRY(invalidate_interrupt)
  493. apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
  494. ENTRY(call_function_interrupt)
  495. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  496. #endif
  497. #ifdef CONFIG_X86_LOCAL_APIC
  498. ENTRY(apic_timer_interrupt)
  499. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  500. ENTRY(error_interrupt)
  501. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  502. ENTRY(spurious_interrupt)
  503. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  504. #endif
  505. /*
  506. * Exception entry points.
  507. */
  508. .macro zeroentry sym
  509. pushq $0 /* push error code/oldrax */
  510. pushq %rax /* push real oldrax to the rdi slot */
  511. leaq \sym(%rip),%rax
  512. jmp error_entry
  513. .endm
  514. .macro errorentry sym
  515. pushq %rax
  516. leaq \sym(%rip),%rax
  517. jmp error_entry
  518. .endm
  519. /* error code is on the stack already */
  520. /* handle NMI like exceptions that can happen everywhere */
  521. .macro paranoidentry sym
  522. SAVE_ALL
  523. cld
  524. movl $1,%ebx
  525. movl $MSR_GS_BASE,%ecx
  526. rdmsr
  527. testl %edx,%edx
  528. js 1f
  529. swapgs
  530. xorl %ebx,%ebx
  531. 1: movq %rsp,%rdi
  532. movq ORIG_RAX(%rsp),%rsi
  533. movq $-1,ORIG_RAX(%rsp)
  534. call \sym
  535. cli
  536. .endm
  537. /*
  538. * Exception entry point. This expects an error code/orig_rax on the stack
  539. * and the exception handler in %rax.
  540. */
  541. ENTRY(error_entry)
  542. CFI_STARTPROC simple
  543. CFI_DEF_CFA rsp,(SS-RDI)
  544. CFI_REL_OFFSET rsp,(RSP-RDI)
  545. CFI_REL_OFFSET rip,(RIP-RDI)
  546. /* rdi slot contains rax, oldrax contains error code */
  547. cld
  548. subq $14*8,%rsp
  549. CFI_ADJUST_CFA_OFFSET (14*8)
  550. movq %rsi,13*8(%rsp)
  551. CFI_REL_OFFSET rsi,RSI
  552. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  553. movq %rdx,12*8(%rsp)
  554. CFI_REL_OFFSET rdx,RDX
  555. movq %rcx,11*8(%rsp)
  556. CFI_REL_OFFSET rcx,RCX
  557. movq %rsi,10*8(%rsp) /* store rax */
  558. CFI_REL_OFFSET rax,RAX
  559. movq %r8, 9*8(%rsp)
  560. CFI_REL_OFFSET r8,R8
  561. movq %r9, 8*8(%rsp)
  562. CFI_REL_OFFSET r9,R9
  563. movq %r10,7*8(%rsp)
  564. CFI_REL_OFFSET r10,R10
  565. movq %r11,6*8(%rsp)
  566. CFI_REL_OFFSET r11,R11
  567. movq %rbx,5*8(%rsp)
  568. CFI_REL_OFFSET rbx,RBX
  569. movq %rbp,4*8(%rsp)
  570. CFI_REL_OFFSET rbp,RBP
  571. movq %r12,3*8(%rsp)
  572. CFI_REL_OFFSET r12,R12
  573. movq %r13,2*8(%rsp)
  574. CFI_REL_OFFSET r13,R13
  575. movq %r14,1*8(%rsp)
  576. CFI_REL_OFFSET r14,R14
  577. movq %r15,(%rsp)
  578. CFI_REL_OFFSET r15,R15
  579. xorl %ebx,%ebx
  580. testl $3,CS(%rsp)
  581. je error_kernelspace
  582. error_swapgs:
  583. swapgs
  584. error_sti:
  585. movq %rdi,RDI(%rsp)
  586. movq %rsp,%rdi
  587. movq ORIG_RAX(%rsp),%rsi /* get error code */
  588. movq $-1,ORIG_RAX(%rsp)
  589. call *%rax
  590. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  591. error_exit:
  592. movl %ebx,%eax
  593. RESTORE_REST
  594. cli
  595. GET_THREAD_INFO(%rcx)
  596. testl %eax,%eax
  597. jne retint_kernel
  598. movl threadinfo_flags(%rcx),%edx
  599. movl $_TIF_WORK_MASK,%edi
  600. andl %edi,%edx
  601. jnz retint_careful
  602. swapgs
  603. RESTORE_ARGS 0,8,0
  604. iretq
  605. CFI_ENDPROC
  606. error_kernelspace:
  607. incl %ebx
  608. /* There are two places in the kernel that can potentially fault with
  609. usergs. Handle them here. The exception handlers after
  610. iret run with kernel gs again, so don't set the user space flag.
  611. B stepping K8s sometimes report an truncated RIP for IRET
  612. exceptions returning to compat mode. Check for these here too. */
  613. leaq iret_label(%rip),%rbp
  614. cmpq %rbp,RIP(%rsp)
  615. je error_swapgs
  616. movl %ebp,%ebp /* zero extend */
  617. cmpq %rbp,RIP(%rsp)
  618. je error_swapgs
  619. cmpq $gs_change,RIP(%rsp)
  620. je error_swapgs
  621. jmp error_sti
  622. /* Reload gs selector with exception handling */
  623. /* edi: new selector */
  624. ENTRY(load_gs_index)
  625. pushf
  626. cli
  627. swapgs
  628. gs_change:
  629. movl %edi,%gs
  630. 2: mfence /* workaround */
  631. swapgs
  632. popf
  633. ret
  634. .section __ex_table,"a"
  635. .align 8
  636. .quad gs_change,bad_gs
  637. .previous
  638. .section .fixup,"ax"
  639. /* running with kernelgs */
  640. bad_gs:
  641. swapgs /* switch back to user gs */
  642. xorl %eax,%eax
  643. movl %eax,%gs
  644. jmp 2b
  645. .previous
  646. /*
  647. * Create a kernel thread.
  648. *
  649. * C extern interface:
  650. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  651. *
  652. * asm input arguments:
  653. * rdi: fn, rsi: arg, rdx: flags
  654. */
  655. ENTRY(kernel_thread)
  656. CFI_STARTPROC
  657. FAKE_STACK_FRAME $child_rip
  658. SAVE_ALL
  659. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  660. movq %rdx,%rdi
  661. orq kernel_thread_flags(%rip),%rdi
  662. movq $-1, %rsi
  663. movq %rsp, %rdx
  664. xorl %r8d,%r8d
  665. xorl %r9d,%r9d
  666. # clone now
  667. call do_fork
  668. movq %rax,RAX(%rsp)
  669. xorl %edi,%edi
  670. /*
  671. * It isn't worth to check for reschedule here,
  672. * so internally to the x86_64 port you can rely on kernel_thread()
  673. * not to reschedule the child before returning, this avoids the need
  674. * of hacks for example to fork off the per-CPU idle tasks.
  675. * [Hopefully no generic code relies on the reschedule -AK]
  676. */
  677. RESTORE_ALL
  678. UNFAKE_STACK_FRAME
  679. ret
  680. CFI_ENDPROC
  681. child_rip:
  682. /*
  683. * Here we are in the child and the registers are set as they were
  684. * at kernel_thread() invocation in the parent.
  685. */
  686. movq %rdi, %rax
  687. movq %rsi, %rdi
  688. call *%rax
  689. # exit
  690. xorq %rdi, %rdi
  691. call do_exit
  692. /*
  693. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  694. *
  695. * C extern interface:
  696. * extern long execve(char *name, char **argv, char **envp)
  697. *
  698. * asm input arguments:
  699. * rdi: name, rsi: argv, rdx: envp
  700. *
  701. * We want to fallback into:
  702. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  703. *
  704. * do_sys_execve asm fallback arguments:
  705. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  706. */
  707. ENTRY(execve)
  708. CFI_STARTPROC
  709. FAKE_STACK_FRAME $0
  710. SAVE_ALL
  711. call sys_execve
  712. movq %rax, RAX(%rsp)
  713. RESTORE_REST
  714. testq %rax,%rax
  715. je int_ret_from_sys_call
  716. RESTORE_ARGS
  717. UNFAKE_STACK_FRAME
  718. ret
  719. CFI_ENDPROC
  720. ENTRY(page_fault)
  721. errorentry do_page_fault
  722. ENTRY(coprocessor_error)
  723. zeroentry do_coprocessor_error
  724. ENTRY(simd_coprocessor_error)
  725. zeroentry do_simd_coprocessor_error
  726. ENTRY(device_not_available)
  727. zeroentry math_state_restore
  728. /* runs on exception stack */
  729. ENTRY(debug)
  730. CFI_STARTPROC
  731. pushq $0
  732. CFI_ADJUST_CFA_OFFSET 8
  733. paranoidentry do_debug
  734. jmp paranoid_exit
  735. CFI_ENDPROC
  736. /* runs on exception stack */
  737. ENTRY(nmi)
  738. CFI_STARTPROC
  739. pushq $-1
  740. CFI_ADJUST_CFA_OFFSET 8
  741. paranoidentry do_nmi
  742. /*
  743. * "Paranoid" exit path from exception stack.
  744. * Paranoid because this is used by NMIs and cannot take
  745. * any kernel state for granted.
  746. * We don't do kernel preemption checks here, because only
  747. * NMI should be common and it does not enable IRQs and
  748. * cannot get reschedule ticks.
  749. */
  750. /* ebx: no swapgs flag */
  751. paranoid_exit:
  752. testl %ebx,%ebx /* swapgs needed? */
  753. jnz paranoid_restore
  754. testl $3,CS(%rsp)
  755. jnz paranoid_userspace
  756. paranoid_swapgs:
  757. swapgs
  758. paranoid_restore:
  759. RESTORE_ALL 8
  760. iretq
  761. paranoid_userspace:
  762. GET_THREAD_INFO(%rcx)
  763. movl threadinfo_flags(%rcx),%ebx
  764. andl $_TIF_WORK_MASK,%ebx
  765. jz paranoid_swapgs
  766. movq %rsp,%rdi /* &pt_regs */
  767. call sync_regs
  768. movq %rax,%rsp /* switch stack for scheduling */
  769. testl $_TIF_NEED_RESCHED,%ebx
  770. jnz paranoid_schedule
  771. movl %ebx,%edx /* arg3: thread flags */
  772. sti
  773. xorl %esi,%esi /* arg2: oldset */
  774. movq %rsp,%rdi /* arg1: &pt_regs */
  775. call do_notify_resume
  776. cli
  777. jmp paranoid_userspace
  778. paranoid_schedule:
  779. sti
  780. call schedule
  781. cli
  782. jmp paranoid_userspace
  783. CFI_ENDPROC
  784. ENTRY(int3)
  785. zeroentry do_int3
  786. ENTRY(overflow)
  787. zeroentry do_overflow
  788. ENTRY(bounds)
  789. zeroentry do_bounds
  790. ENTRY(invalid_op)
  791. zeroentry do_invalid_op
  792. ENTRY(coprocessor_segment_overrun)
  793. zeroentry do_coprocessor_segment_overrun
  794. ENTRY(reserved)
  795. zeroentry do_reserved
  796. /* runs on exception stack */
  797. ENTRY(double_fault)
  798. CFI_STARTPROC
  799. paranoidentry do_double_fault
  800. jmp paranoid_exit
  801. CFI_ENDPROC
  802. ENTRY(invalid_TSS)
  803. errorentry do_invalid_TSS
  804. ENTRY(segment_not_present)
  805. errorentry do_segment_not_present
  806. /* runs on exception stack */
  807. ENTRY(stack_segment)
  808. CFI_STARTPROC
  809. paranoidentry do_stack_segment
  810. jmp paranoid_exit
  811. CFI_ENDPROC
  812. ENTRY(general_protection)
  813. errorentry do_general_protection
  814. ENTRY(alignment_check)
  815. errorentry do_alignment_check
  816. ENTRY(divide_error)
  817. zeroentry do_divide_error
  818. ENTRY(spurious_interrupt_bug)
  819. zeroentry do_spurious_interrupt_bug
  820. #ifdef CONFIG_X86_MCE
  821. /* runs on exception stack */
  822. ENTRY(machine_check)
  823. CFI_STARTPROC
  824. pushq $0
  825. CFI_ADJUST_CFA_OFFSET 8
  826. paranoidentry do_machine_check
  827. jmp paranoid_exit
  828. CFI_ENDPROC
  829. #endif
  830. ENTRY(call_debug)
  831. zeroentry do_call_debug